From: Olivier Sallou Date: Wed, 11 Sep 2019 07:42:44 +0000 (+0000) Subject: New upstream version 2.9.0 X-Git-Tag: archive/raspbian/2.16.0+ds-7+rpi1~1^2~45^2~7 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=777af4037326cc3a73f30190a1eb7bc591a27905;p=ncbi-blast%2B.git New upstream version 2.9.0 --- diff --git a/c++/compilers/unix/GCC.sh b/c++/compilers/unix/GCC.sh index c1af4dc2..4f445f18 100755 --- a/c++/compilers/unix/GCC.sh +++ b/c++/compilers/unix/GCC.sh @@ -3,7 +3,7 @@ # Setup the local working environment for the "configure" script # Compiler: GCC # -# $Revision: 491630 $ // by Denis Vakatov, NCBI (vakatov@ncbi.nlm.nih.gov) +# $Revision: 578209 $ // by Denis Vakatov, NCBI (vakatov@ncbi.nlm.nih.gov) ############################################################################# @@ -51,11 +51,16 @@ case "$1" in elif test "`$CXX -V$1 -dumpversion 2>/dev/null`" = "$1"; then CXX="$CXX -V$1" CC="$CC -V$1" - elif test "`$CXX -dumpversion 2>/dev/null`" \!= "$1"; then - cat </dev/null`" in + "$1" | "$1".* ) ;; + * ) + cat < "%input_def_path%" if exist "%input_def_path%" for /f %%a in ('xcopy "%input_def_path%" "%dest_spec%" /q /d /y') do (set copied_def=%%a) -if not %copied_asn%==0 goto DOGENERATE -if not %copied_def%==0 goto DOGENERATE +if not "%copied_asn%"=="0" goto DOGENERATE +if not "%copied_def%"=="0" goto DOGENERATE if not exist "%src_subtree%%input_asn_name%.files" goto DOGENERATE if not exist "%src_subtree%%input_asn_name%__.cpp" goto DOGENERATE if not exist "%src_subtree%%input_asn_name%___.cpp" goto DOGENERATE diff --git a/c++/compilers/vs2015/install.sh b/c++/compilers/vs2015/install.sh index 8a9d625e..5d3866fc 100644 --- a/c++/compilers/vs2015/install.sh +++ b/c++/compilers/vs2015/install.sh @@ -1,5 +1,5 @@ #! /bin/sh -# $Id: install.sh 561966 2018-04-16 13:07:35Z ivanov $ +# $Id: install.sh 577506 2019-01-04 18:39:23Z ivanov $ # Authors: Denis Vakatov (vakatov@ncbi.nlm.nih.gov) # Anton Lavrentiev (lavr@ncbi.nlm.nih.gov) # @@ -34,7 +34,6 @@ echo "[`basename $script`] NCBI C++: \"$builddir\" to \"$target\"..." # Derive the destination dirs -docdir="$target"/doc scriptdir="$target"/scripts incdir="$target"/include srcdir="$target"/src @@ -67,10 +66,6 @@ makedir "$target" -p makedir "$tmpdir" -p -# Documentation -echo "[`basename $script`] Installing documentation..." -install "$builddir/doc" "$docdir" - # Scripts echo "[`basename $script`] Installing scripts..." install "$builddir/scripts" "$scriptdir" diff --git a/c++/compilers/vs2017/datatool.bat b/c++/compilers/vs2017/datatool.bat index a8085a76..b86a8346 100644 --- a/c++/compilers/vs2017/datatool.bat +++ b/c++/compilers/vs2017/datatool.bat @@ -1,5 +1,5 @@ @echo off -REM $Id: datatool.bat 555778 2018-01-23 16:47:32Z gouriano $ +REM $Id: datatool.bat 577623 2019-01-07 18:13:55Z ivanov $ REM =========================================================================== REM REM PUBLIC DOMAIN NOTICE @@ -66,8 +66,8 @@ for /f %%a in ('xcopy "%input_asn_path%" "%dest_spec%" /q /d /y') do (set copied set copied_def=0 if not exist "%input_def_path%" echo [-] > "%input_def_path%" if exist "%input_def_path%" for /f %%a in ('xcopy "%input_def_path%" "%dest_spec%" /q /d /y') do (set copied_def=%%a) -if not %copied_asn%==0 goto DOGENERATE -if not %copied_def%==0 goto DOGENERATE +if not "%copied_asn%"=="0" goto DOGENERATE +if not "%copied_def%"=="0" goto DOGENERATE if not exist "%src_subtree%%input_asn_name%.files" goto DOGENERATE if not exist "%src_subtree%%input_asn_name%__.cpp" goto DOGENERATE if not exist "%src_subtree%%input_asn_name%___.cpp" goto DOGENERATE diff --git a/c++/compilers/vs2017/install.sh b/c++/compilers/vs2017/install.sh index 9ef475cd..89c9d1e2 100644 --- a/c++/compilers/vs2017/install.sh +++ b/c++/compilers/vs2017/install.sh @@ -1,5 +1,5 @@ #! /bin/sh -# $Id: install.sh 561966 2018-04-16 13:07:35Z ivanov $ +# $Id: install.sh 577506 2019-01-04 18:39:23Z ivanov $ # Authors: Denis Vakatov (vakatov@ncbi.nlm.nih.gov) # Anton Lavrentiev (lavr@ncbi.nlm.nih.gov) # @@ -34,7 +34,6 @@ echo "[`basename $script`] NCBI C++: \"$builddir\" to \"$target\"..." # Derive the destination dirs -docdir="$target"/doc scriptdir="$target"/scripts incdir="$target"/include srcdir="$target"/src @@ -67,10 +66,6 @@ makedir "$target" -p makedir "$tmpdir" -p -# Documentation -echo "[`basename $script`] Installing documentation..." -install "$builddir/doc" "$docdir" - # Scripts echo "[`basename $script`] Installing scripts..." install "$builddir/scripts" "$scriptdir" diff --git a/c++/compilers/vs2017/make.bat b/c++/compilers/vs2017/make.bat index ef930a7a..2c44ce42 100644 --- a/c++/compilers/vs2017/make.bat +++ b/c++/compilers/vs2017/make.bat @@ -1,5 +1,5 @@ @ECHO OFF -REM $Id: make.bat 556795 2018-02-05 16:14:31Z ivanov $ +REM $Id: make.bat 579874 2019-02-07 14:28:45Z ivanov $ REM =========================================================================== REM REM PUBLIC DOMAIN NOTICE @@ -58,6 +58,7 @@ rem --- Required parameters set cmd=%~1% set solution=%~2 +set solution_name=%solution:/=_% set libdll=%~3 set arch=%~4 @@ -154,7 +155,10 @@ if not "%with_openmp%" == "" ( time /t echo INFO: Configure "%libdll%\%solution% [ReleaseDLL|%arch%]" -%DEVENV% %libdll%\build\%solution%.sln /build "ReleaseDLL|%archwc%" /project "_CONFIGURE_" +set log=__%libdll%_%solution_name%.configure.log +echo %DEVENV% %libdll%\build\%solution%.sln /build "ReleaseDLL|%archwc%" /project "_CONFIGURE_" /out %log% +%DEVENV% %libdll%\build\%solution%.sln /build "ReleaseDLL|%archwc%" /project "_CONFIGURE_" /out %log% +type %log% if errorlevel 1 goto ABORT if not _%cmd% == _make goto COMPLETE @@ -186,7 +190,9 @@ rem Subroutines :build echo INFO: Building "%libdll%\%solution% [%1|%arch%]" - %DEVENV% %libdll%\build\%solution%.sln /build "%1|%archw%" /project "_BUILD_ALL_" + set log=__%libdll%_%solution_name%.build.%1.log + %DEVENV% %libdll%\build\%solution%.sln /build "%1|%archw%" /project "_BUILD_ALL_" /out %log% + type %log% exit /b %errorlevel% :check diff --git a/c++/compilers/xcode30_prj/copybin.sh b/c++/compilers/xcode30_prj/copybin.sh index 12a33f1b..993d61a1 100755 --- a/c++/compilers/xcode30_prj/copybin.sh +++ b/c++/compilers/xcode30_prj/copybin.sh @@ -1,5 +1,5 @@ #!/bin/sh -# $Id: copybin.sh 190015 2010-04-27 17:14:13Z gouriano $ +# $Id: copybin.sh 576271 2018-12-12 17:35:52Z ivanov $ # =========================================================================== # # PUBLIC DOMAIN NOTICE @@ -42,8 +42,8 @@ done test -d ${BUILD_TREE_BIN} || mkdir -p ${BUILD_TREE_BIN} echo ${CONFIGURATION} | grep elease > /dev/null if test $? -eq 0 -o "$PTB_NOCOPYBIN" != ""; then - mv -f ${TARGET_BUILD_DIR}/${TARGET_NAME}* ${BUILD_TREE_BIN} + mv -f ${TARGET_BUILD_DIR}/${PRODUCT_NAME}* ${BUILD_TREE_BIN} else - cp -f -p -R ${TARGET_BUILD_DIR}/${TARGET_NAME}* ${BUILD_TREE_BIN} + cp -f -p -R ${TARGET_BUILD_DIR}/${PRODUCT_NAME}* ${BUILD_TREE_BIN} fi exit 0 diff --git a/c++/include/algo/blast/api/rps_aux.hpp b/c++/include/algo/blast/api/rps_aux.hpp index 18847169..9cbcb4f2 100644 --- a/c++/include/algo/blast/api/rps_aux.hpp +++ b/c++/include/algo/blast/api/rps_aux.hpp @@ -1,4 +1,4 @@ -/* $Id: rps_aux.hpp 369420 2012-07-19 13:41:19Z boratyng $ +/* $Id: rps_aux.hpp 579190 2019-01-31 13:23:44Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -164,7 +164,7 @@ private: /// Pointer which contains pointers to data managed by the data members /// above - BlastRPSInfo* m_RpsInfo; + unique_ptr m_RpsInfo; }; END_SCOPE(blast) diff --git a/c++/include/algo/blast/blastinput/blast_args.hpp b/c++/include/algo/blast/blastinput/blast_args.hpp index 78fd0c2b..b7117baf 100644 --- a/c++/include/algo/blast/blastinput/blast_args.hpp +++ b/c++/include/algo/blast/blastinput/blast_args.hpp @@ -1,4 +1,4 @@ -/* $Id: blast_args.hpp 565102 2018-06-06 17:25:02Z rackerst $ +/* $Id: blast_args.hpp 579216 2019-01-31 16:18:17Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -942,6 +942,10 @@ public: return m_Subjects; } + void SetIPGFilteringSupport(bool val) { + m_SupportIPGFiltering = val; + } + protected: CRef m_SearchDb;/**< Description of the BLAST database */ bool m_RequestMoleculeType; /**< Determines whether the database's @@ -958,6 +962,7 @@ protected: CRef m_Scope; /**< CScope object in which all subject sequences read are kept */ bool m_SupportsDatabaseMasking; /**< true if it's supported */ + bool m_SupportIPGFiltering; /**< true if IPG filtering is supported */ }; /// Argument class to collect options specific to igBLAST @@ -1066,14 +1071,16 @@ public: m_Html(false), m_IsIgBlast(isIgblast), m_LineLength(align_format::kDfltLineLength), - m_FormatFlags(flag) + m_FormatFlags(flag), + m_HitsSortOption(-1), + m_HspsSortOption(-1) { if (m_IsIgBlast) { m_DfltNumAlignments = m_DfltNumDescriptions = 10; } else { m_DfltNumAlignments = align_format::kDfltArgNumAlignments; m_DfltNumDescriptions = align_format::kDfltArgNumDescriptions; - } + } }; /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ @@ -1093,7 +1100,8 @@ public: virtual void ParseFormattingString(const CArgs& args, EOutputFormat& fmt_type, - string& custom_fmt_spec) const; + string& custom_fmt_spec, + string& custom_delim) const; /// Get the choice of formatted output EOutputFormat GetFormattedOutputChoice() const { @@ -1143,6 +1151,14 @@ public: size_t GetLineLength() const { return m_LineLength; } + int GetHitsSortOption() const { + return m_HitsSortOption; + } + int GetHspsSortOption() const { + return m_HspsSortOption; + } + string GetCustomDelimiter(){return m_CustomDelim;} + protected: EOutputFormat m_OutputFormat; ///< Choice of formatting output bool m_ShowGis; ///< Display NCBI GIs? @@ -1157,6 +1173,9 @@ protected: string m_CustomOutputFormatSpec; size_t m_LineLength; EFormatFlags m_FormatFlags; + int m_HitsSortOption; + int m_HspsSortOption; + string m_CustomDelim; }; /// Formatting args for magicblast advertising only SAM and fast tabular @@ -1242,7 +1261,7 @@ public: /// Get the number of threads to spawn size_t GetNumThreads() const { return m_NumThreads; } -private: +protected: size_t m_NumThreads; ///< Number of threads to spawn }; diff --git a/c++/include/algo/blast/blastinput/cmdline_flags.hpp b/c++/include/algo/blast/blastinput/cmdline_flags.hpp index f0f3771a..60f63495 100644 --- a/c++/include/algo/blast/blastinput/cmdline_flags.hpp +++ b/c++/include/algo/blast/blastinput/cmdline_flags.hpp @@ -1,4 +1,4 @@ -/* $Id: cmdline_flags.hpp 562577 2018-04-24 15:51:23Z fongah2 $ +/* $Id: cmdline_flags.hpp 579216 2019-01-31 16:18:17Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -77,6 +77,11 @@ NCBI_BLASTINPUT_EXPORT extern const string kArgNegativeGiList; /// argument for gi list to exclude from a BLAST database search NCBI_BLASTINPUT_EXPORT extern const string kArgNegativeSeqidList; +/// IPG list file name to restrict BLAST database +NCBI_BLASTINPUT_EXPORT extern const string kArgIpgList; + +/// argument for IPG list to exclude from a BLAST database search +NCBI_BLASTINPUT_EXPORT extern const string kArgNegativeIpgList; /// List of filtering algorithms to apply to subjects as soft masking extern const string kArgDbSoftMask; diff --git a/c++/include/algo/blast/core/blast_options.h b/c++/include/algo/blast/core/blast_options.h index 9a6c9a4c..7d0c5ae9 100644 --- a/c++/include/algo/blast/core/blast_options.h +++ b/c++/include/algo/blast/core/blast_options.h @@ -1,4 +1,4 @@ -/* $Id: blast_options.h 562577 2018-04-24 15:51:23Z fongah2 $ +/* $Id: blast_options.h 577682 2019-01-08 12:49:31Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -167,7 +167,13 @@ extern "C" { /** Default max frequency for a database word. Words with higher frequency will be masked in the lookup table. */ -#define MAX_DB_WORD_COUNT_MAPPER 60 +#define MAX_DB_WORD_COUNT_MAPPER 30 + +/** Default maximum insert size: distance on the subject between reads that + belong to a pair, for spliced and non-spliced alignments */ +#define MAGICBLAST_MAX_INSERT_SIZE_SPLICED 1000000 +#define MAGICBLAST_MAX_INSERT_SIZE_NONSPLICED 100000 + /** Value used to indicate that no IMPALA-style scaling should be performed * when scaling a PSSM */ diff --git a/c++/include/algo/blast/format/blast_format.hpp b/c++/include/algo/blast/format/blast_format.hpp index b36c0945..009655b1 100644 --- a/c++/include/algo/blast/format/blast_format.hpp +++ b/c++/include/algo/blast/format/blast_format.hpp @@ -1,4 +1,4 @@ -/* $Id: blast_format.hpp 556289 2018-01-29 17:33:51Z jianye $ +/* $Id: blast_format.hpp 577762 2019-01-08 18:10:19Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -78,14 +78,20 @@ public: double identity; string productive; }; - - + enum { /// The line length of pairwise blast output kFormatLineLength = 68, kMinTaxFormatLineLength = 100 }; + ///Display options for blast_report + enum DisplayOption { + eDescriptions, + eAlignments, + eMetadata, + eDescriptionsWithTemplates // this option is not implemented in blast_report + }; /// Constructor /// @param opts BLAST options used in the search [in] /// @param db_adapter Adapter object representing a BLAST database or @@ -249,6 +255,15 @@ public: blast::CPsiBlastIterationState::TSeqIds prev_seqids = blast::CPsiBlastIterationState::TSeqIds()); + ///Print Metadata in json format or descriptions in html format or alignments in html format + ///app.ini file with template info must be present + /// @param results Object containing alignments, mask regions, and + /// ancillary data to be output [in] + /// @param displayOption indicates what info to display + void PrintReport(const blast::CSearchResults& results, + CBlastFormat::DisplayOption displayOption); + + /// Writes out the query and results as an "archive" format /// @param queries Query factory to provide queries /// @param options_handle BLAST options @@ -298,7 +313,13 @@ public: void SetBaseFile(string base) {m_BaseFile = base;} /// Set Alignment Length - void SetLineLength(size_t len) {m_LineLength = len;} + void SetLineLength(size_t len) {m_LineLength = len;} + void SetAlignSeqList(string alignSeqList) {m_AlignSeqList = alignSeqList;} + void SetHitsSortOption(int hitsSortOption) {m_HitsSortOption = hitsSortOption;} + void SetHspsSortOption(int hspsSortOption) {m_HspsSortOption = hspsSortOption;} + void SetCustomDelimiter(string customDelim) {m_CustomDelim = customDelim;} + + static void PrintArchive(CRef archive, CNcbiOstream& out); @@ -389,6 +410,15 @@ private: /// If true, print long sequence ids (database|accession) bool m_LongSeqId; + + + CShowBlastDefline::SDeflineTemplates *m_DeflineTemplates; + CDisplaySeqalign::SAlignTemplates *m_AlignTemplates; + string m_AlignSeqList; + + int m_HitsSortOption; + int m_HspsSortOption; + string m_CustomDelim; /// Output the ancillary data for one query that was searched /// @param summary The ancillary data to report [in] @@ -492,6 +522,13 @@ private: void x_InitSAMFormatter(); void x_PrintTaxReport(const blast::CSearchResults& results); + void x_InitDeflineTemplates(void); + void x_InitAlignTemplates(void); + void x_DisplayDeflinesWithTemplates(CConstRef aln_set); + void x_SetAlignParameters(CDisplaySeqalign& cds); + void x_DisplayAlignsWithTemplates(CConstRef aln_set,const blast::CSearchResults& results); + //Creates json object array of info for each defline to display + void x_CreateDeflinesJson(CConstRef aln_set); }; END_NCBI_SCOPE diff --git a/c++/include/cgi/cgi_exception.hpp b/c++/include/cgi/cgi_exception.hpp index a001be72..cf0feadb 100644 --- a/c++/include/cgi/cgi_exception.hpp +++ b/c++/include/cgi/cgi_exception.hpp @@ -1,7 +1,7 @@ #ifndef CGI___CGI_EXCEPTION__HPP #define CGI___CGI_EXCEPTION__HPP -/* $Id: cgi_exception.hpp 540924 2017-07-12 15:06:18Z grichenk $ +/* $Id: cgi_exception.hpp 578930 2019-01-28 18:18:04Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -132,10 +132,10 @@ protected: virtual void x_Init(const CDiagCompileInfo& info, const string& message, const CException* prev_exception, - EDiagSev severity); + EDiagSev severity) override; /// Override method for copying exception data. - virtual void x_Assign(const CException& src); + virtual void x_Assign(const CException& src) override; private: EStatusCode m_StatusCode; @@ -165,7 +165,7 @@ public: eValue, //< Bad cookie value eString //< Bad cookie string (Set-Cookie:) format }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { switch (GetErrCode()) { case eValue: return "Bad cookie"; @@ -205,7 +205,7 @@ public: eData //< Syntaxically correct but contains odd data (from the //< point of view of particular CGI application) }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { switch ( GetErrCode() ) { case eCookie: return "Malformed HTTP Cookie"; @@ -265,7 +265,7 @@ public: eErrno, //< Generic system call failure eModTime //< File modification time cannot be obtained }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { switch (GetErrCode()) { case eErrno: return "System error"; @@ -292,7 +292,7 @@ public: eDoubleHeader, ///< Header has already been written eBadHeaderValue ///< Invalid header value }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { switch ( GetErrCode() ) { case eDoubleHeader: return "Header has already been written"; @@ -320,7 +320,7 @@ public: eHeaderSent ///< Header has been written }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { switch ( GetErrCode() ) { case eHeaderSent: return "Header has been written"; @@ -350,7 +350,7 @@ public: eAttrNotFound, ///< Attribute not found eNotLoaded ///< Session not loaded }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { switch ( GetErrCode() ) { case eSessionId: return "SessionId not specified"; @@ -381,7 +381,7 @@ public: eApp //< Other error }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { switch (GetErrCode()) { case eApp: return "CGI application error"; diff --git a/c++/include/common/ncbi_package_ver.h b/c++/include/common/ncbi_package_ver.h index 8ee7f78a..81e2bcd8 100644 --- a/c++/include/common/ncbi_package_ver.h +++ b/c++/include/common/ncbi_package_ver.h @@ -7,8 +7,8 @@ #define NCBI_PACKAGE 1 #define NCBI_PACKAGE_NAME "blast" #define NCBI_PACKAGE_VERSION_MAJOR 2 -#define NCBI_PACKAGE_VERSION_MINOR 8 -#define NCBI_PACKAGE_VERSION_PATCH 1 +#define NCBI_PACKAGE_VERSION_MINOR 9 +#define NCBI_PACKAGE_VERSION_PATCH 0 #define NCBI_PACKAGE_CONFIG "" #define NCBI_PACKAGE_VERSION_STRINGIFY(x) #x diff --git a/c++/include/common/ncbi_source_ver.h b/c++/include/common/ncbi_source_ver.h index f4dbbe9f..cb5cfebe 100644 --- a/c++/include/common/ncbi_source_ver.h +++ b/c++/include/common/ncbi_source_ver.h @@ -1,4 +1,4 @@ -/* $Id: ncbi_source_ver.h 569379 2018-08-21 04:03:03Z syncbot $ +/* $Id: ncbi_source_ver.h 577239 2019-01-03 05:03:08Z syncbot $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -27,7 +27,7 @@ #include #include -/* #undef NCBI_PRODUCTION_VER */ +#define NCBI_PRODUCTION_VER 20190103 #define NCBI_DEVELOPMENT_VER 20180821 #if defined(NCBI_PRODUCTION_VER) diff --git a/c++/include/connect/ncbi_http_session.hpp b/c++/include/connect/ncbi_http_session.hpp index d5dfac21..38fb560e 100644 --- a/c++/include/connect/ncbi_http_session.hpp +++ b/c++/include/connect/ncbi_http_session.hpp @@ -1,7 +1,7 @@ #ifndef CONNECT___NCBI_HTTP_SESSION__HPP #define CONNECT___NCBI_HTTP_SESSION__HPP -/* $Id: ncbi_http_session.hpp 567636 2018-07-21 15:53:54Z mcelhany $ +/* $Id: ncbi_http_session.hpp 576759 2018-12-20 14:37:49Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -586,6 +586,7 @@ public: eGet = eReqMethod_Get, ePost = eReqMethod_Post, ePut = eReqMethod_Put, + ePatch = eReqMethod_Patch, eDelete = eReqMethod_Delete }; diff --git a/c++/include/corelib/ncbiapp.hpp b/c++/include/corelib/ncbiapp.hpp index bde714fc..c3d0373a 100644 --- a/c++/include/corelib/ncbiapp.hpp +++ b/c++/include/corelib/ncbiapp.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___NCBIAPP__HPP #define CORELIB___NCBIAPP__HPP -/* $Id: ncbiapp.hpp 561402 2018-04-06 16:45:41Z gouriano $ +/* $Id: ncbiapp.hpp 576975 2018-12-27 12:53:02Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -416,14 +416,16 @@ protected: /// Flags to adjust standard I/O streams' behaviour. enum EStdioSetup { - fDefault_SyncWithStdio = 0x01, - ///< Use compiler-specific default as pertains to the synchronizing - ///< of "C++" Cin/Cout/Cerr streams with their "C" counterparts. + fNoSyncWithStdio = 0x01, + ///< Turn off synchronizing of "C++" cin/cout/cerr streams with + ///< their "C" counterparts, possibly making the former not thread-safe. fDefault_CinBufferSize = 0x02, ///< Use compiler-specific default of Cin buffer size. fBinaryCin = 0x04, ///< treat standard input as binary - fBinaryCout = 0x08 ///< treat standard output as binary + fBinaryCout = 0x08, ///< treat standard output as binary + + fDefault_SyncWithStdio = 0x00, ///< @deprecated @sa fNoSyncWithStdio }; typedef int TStdioSetupFlags; ///< Binary OR of "EStdioSetup" diff --git a/c++/include/corelib/ncbimisc.hpp b/c++/include/corelib/ncbimisc.hpp index 4575f198..212a7667 100644 --- a/c++/include/corelib/ncbimisc.hpp +++ b/c++/include/corelib/ncbimisc.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___NCBIMISC__HPP #define CORELIB___NCBIMISC__HPP -/* $Id: ncbimisc.hpp 563123 2018-05-02 17:08:47Z lavr $ +/* $Id: ncbimisc.hpp 580817 2019-02-21 14:01:24Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1025,6 +1025,10 @@ public: #define GI_TO(T, gi) (static_cast(TIntId(gi))) #define GI_FROM(T, value) (TGi(static_cast(value))) +/// Temporary macros to convert TEntrezId to other types (int, unsigned etc.). +#define ENTREZ_ID_TO(T, entrez_id) (static_cast(TIntId(entrez_id))) +#define ENTREZ_ID_FROM(T, value) (TGi(static_cast(value))) + /// Convert gi-compatible int to/from other types. #define INT_ID_TO(T, id) (static_cast(id)) #define INT_ID_FROM(T, value) (static_cast(value)) diff --git a/c++/include/corelib/ncbithr.hpp b/c++/include/corelib/ncbithr.hpp index 6cf09366..31466456 100644 --- a/c++/include/corelib/ncbithr.hpp +++ b/c++/include/corelib/ncbithr.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___NCBITHR__HPP #define CORELIB___NCBITHR__HPP -/* $Id: ncbithr.hpp 545414 2017-09-06 15:40:16Z grichenk $ +/* $Id: ncbithr.hpp 579979 2019-02-08 14:16:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/include/dbapi/driver/exception.hpp b/c++/include/dbapi/driver/exception.hpp index c593fad5..2a80f257 100644 --- a/c++/include/dbapi/driver/exception.hpp +++ b/c++/include/dbapi/driver/exception.hpp @@ -1,7 +1,7 @@ #ifndef DBAPI_DRIVER___EXCEPTION__HPP #define DBAPI_DRIVER___EXCEPTION__HPP -/* $Id: exception.hpp 563975 2018-05-17 18:33:33Z ucko $ +/* $Id: exception.hpp 578927 2019-01-28 18:17:52Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -69,21 +69,21 @@ END_SCOPE(impl) } \ public: \ virtual ~exception_class(void) throw() {} \ - virtual const char* GetType(void) const {return #exception_class;} \ + const char* GetType(void) const override {return #exception_class;} \ typedef int TErrCode; \ TErrCode GetErrCode(void) const \ { \ return typeid(*this) == typeid(exception_class) ? \ (TErrCode)x_GetErrCode() : (TErrCode)CException::eInvalid; \ } \ - virtual CDB_Exception* Clone(void) const \ + virtual CDB_Exception* Clone(void) const override \ { \ return new exception_class(*this); \ } \ NCBI_EXCEPTION_DEFAULT_THROW(exception_class) \ protected: \ exception_class(void) {} \ - virtual const CException* x_Clone(void) const \ + virtual const CException* x_Clone(void) const override \ { \ return new exception_class(*this); \ } \ @@ -187,7 +187,7 @@ public: // DEPRECATED, Will be removed soon. NCBI_DEPRECATED static const char* SeverityString(EDB_Severity sev); - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; public: // Duplicate methods. We need them to support the old interface. @@ -231,7 +231,7 @@ public: int GetSybaseSeverity(void) const { return m_SybaseSeverity; } public: - virtual void ReportExtra(ostream& out) const; + virtual void ReportExtra(ostream& out) const override; virtual CDB_Exception* Clone(void) const; public: @@ -261,8 +261,8 @@ protected: void x_StartOfWhat(ostream& out) const; void x_EndOfWhat (ostream& out) const; void x_Init(const CDiagCompileInfo& info, const string& message, - const CException* prev_exception, EDiagSev severity); - virtual void x_Assign(const CException& src); + const CException* prev_exception, EDiagSev severity) override; + virtual void x_Assign(const CException& src) override; void x_InitCDB(int db_error_code) { m_DBErrCode = db_error_code; } SContext& x_SetContext(void); @@ -338,10 +338,10 @@ public: const string& ProcName() const { return m_ProcName; } int ProcLine() const { return m_ProcLine; } - virtual void ReportExtra(ostream& out) const; + virtual void ReportExtra(ostream& out) const override; protected: - virtual void x_Assign(const CException& src); + virtual void x_Assign(const CException& src) override; private: string m_ProcName; @@ -376,10 +376,10 @@ public: const string& SqlState() const { return m_SqlState; } int BatchLine() const { return m_BatchLine; } - virtual void ReportExtra(ostream& out) const; + virtual void ReportExtra(ostream& out) const override; protected: - virtual void x_Assign(const CException& src); + virtual void x_Assign(const CException& src) override; private: string m_SqlState; @@ -516,11 +516,11 @@ public: string WhatThis(void) const; - virtual void ReportExtra(ostream& out) const; + virtual void ReportExtra(ostream& out) const override; protected: void ReportErrorStack(ostream& out) const; - virtual void x_Assign(const CException& src); + virtual void x_Assign(const CException& src) override; private: // We use "deque" instead of "stack" here we need to iterate over all diff --git a/c++/include/dbapi/variant.hpp b/c++/include/dbapi/variant.hpp index 673da72c..caf2ed73 100644 --- a/c++/include/dbapi/variant.hpp +++ b/c++/include/dbapi/variant.hpp @@ -1,7 +1,7 @@ #ifndef DBAPI___VARIANT__HPP #define DBAPI___VARIANT__HPP -/* $Id: variant.hpp 563801 2018-05-15 15:55:00Z ucko $ +/* $Id: variant.hpp 578927 2019-01-28 18:17:52Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -65,7 +65,7 @@ public: CVariantException(const string& message); - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CVariantException, CException); }; diff --git a/c++/include/misc/jsonwrapp/jsonwrapp.hpp b/c++/include/misc/jsonwrapp/jsonwrapp.hpp new file mode 100644 index 00000000..599e4692 --- /dev/null +++ b/c++/include/misc/jsonwrapp/jsonwrapp.hpp @@ -0,0 +1,33 @@ +#ifndef MISC_JSONWRAPP___JSONWRAPP__HPP +#define MISC_JSONWRAPP___JSONWRAPP__HPP + +/* $Id: jsonwrapp.hpp 528470 2017-02-23 14:33:44Z gouriano $ +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +*/ + +#include + +#endif /* MISC_JSONWRAPP___JSONWRAPP__HPP */ + + diff --git a/c++/include/misc/jsonwrapp/jsonwrapp10.hpp b/c++/include/misc/jsonwrapp/jsonwrapp10.hpp new file mode 100644 index 00000000..44b8f96e --- /dev/null +++ b/c++/include/misc/jsonwrapp/jsonwrapp10.hpp @@ -0,0 +1,2236 @@ +#ifndef MISC_JSONWRAPP___JSONWRAPP10__HPP +#define MISC_JSONWRAPP___JSONWRAPP10__HPP + +/* $Id: jsonwrapp10.hpp 528470 2017-02-23 14:33:44Z gouriano $ +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Author: Andrei Gourianov +* +* File Description: +* Wrapper API to work with JSON data +* http://www.ietf.org/rfc/rfc4627.txt +* +* Internally, data of any type is stored in a universal container. +* We define different API classes here for the sake of semantics only. +* +* This implementation uses object adapter pattern, in which adapter contains +* a pointer to an instance of a class it wraps. +* Please note, objects of classes defined here act like pointers, +* ie, creating, or copying them does not create any data, their destruction +* does not destroy any data either. +* +* We prohibit creation of standalone JSON value object; +* only document objects can be created and copied. That is, +* all values are associated with a specific document only. +* When a document is destroyed, all its values are destroyed as well. +* So, to create a value, one should add it into a document (or into JSON +* array or object) and get a proper adapter object. +* +* Classes that store JSON value: +* CJson_ConstNode, CJson_Node -- base class; +* CJson_ConstValue, CJson_Value -- primitive type data +* (string, number, boolean, null); +* CJson_ConstArray, CJson_Array -- JSON array; +* CJson_ConstObject, CJson_Object -- JSON object; +* CJson_Document -- serializable JSON data container - array or object. +* +* Sequential access parsing event listener: +* CJson_WalkHandler -- define your own class derived from this one. +*/ + +#include +#include +#include + +#define RAPIDJSON_NOMEMBERITERATORCLASS +#include "rapidjson10/rapidjson.h" +#include "rapidjson10/document.h" +#include "rapidjson10/prettywriter.h" +#include "rapidjson10/filereadstream.h" +#include "rapidjson10/filewritestream.h" +#include "rapidjson10/error/en.h" + + +BEGIN_NCBI_SCOPE + +class CJson_Document; +class CJson_ConstValue; +class CJson_Value; +class CJson_ConstArray; +class CJson_Array; +class CJson_ConstObject; +class CJson_Object; + +enum EJson_Write_Flags { + fJson_Write_IndentWithSpace = 0, ///< use space (' ') symbol for indentation + fJson_Write_NoIndentation = (1 << 0), ///< do not use indentation + fJson_Write_NoEol = (1 << 1), ///< do not write end-of-line symbol + fJson_Write_IndentWithTab = (1 << 2) ///< use tab ('\t') symbol for indentation +}; +typedef unsigned int TJson_Write_Flags; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Node +/// +/// Container for JSON value. +/// A JSON value must be an object, array, number, or string, or one of +/// the following three literal names: false null true. +/// The class provides basic access methods only. +/// To get access to value data, one should "get" (cast) it as +/// Value, Array or Object and use an appropriate API. + +class CJson_ConstNode +{ +protected: + typedef rapidjson::Value _Impl; + +public: + typedef char TCharType; + typedef ncbi::CStringUTF8 TStringType; + typedef ncbi::CStringUTF8 TKeyType; + +public: + /// Value type + enum EJsonType { + eNull, ///< null + eBool, ///< bool + eNumber, ///< number + eString, ///< string + eArray, ///< array + eObject ///< object + }; + /// Get value type + EJsonType GetType(void) const; + + bool IsNull( void) const; + bool IsValue( void) const; + bool IsArray( void) const; + bool IsObject( void) const; + + /// Get JSON value contents of the node + CJson_ConstValue GetValue(void) const; + + /// Get JSON array contents of the node + CJson_ConstArray GetArray(void) const; + + /// Get JSON object contents of the node + CJson_ConstObject GetObject(void) const; + + /// Convert the contents of the node into string + std::string ToString(TJson_Write_Flags flags = fJson_Write_IndentWithSpace, + unsigned int indent_char_count = 4) const; + + ~CJson_ConstNode(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstNode(const CJson_ConstNode& n); + /// Note: this does not copy Node data + /// Instead, both Node object will point to the same data + CJson_ConstNode& operator=(const CJson_ConstNode& n); + + bool operator!=(const CJson_ConstNode& n) const; + bool operator==(const CJson_ConstNode& n) const; + +protected: + CJson_ConstNode(void) : m_Impl(0) { + } + CJson_ConstNode(_Impl* impl) : m_Impl(impl) { + } + _Impl* m_Impl; + static _Impl*& x_Impl(CJson_ConstNode& v){ + return v.m_Impl; + } + friend class CJson_Node; + friend class CJson_ConstValue; + friend class CJson_Array; + friend class CJson_ConstArray; + friend class CJson_ConstObject; + friend class CJson_Object; + friend class CJson_WalkHandler; + friend class CJson_Document; + friend class CJson_ConstObject_pair; + friend class CJson_Object_pair; +}; + +///////////////////////////////////////////////////////////////////////////// + +class CJson_Node : virtual public CJson_ConstNode +{ +public: + /// Erase node data and convert it into JSON NULL value + CJson_Node& SetNull(void); + + /// Erase node data and convert it into JSON value + CJson_Value ResetValue(void); + + /// Get JSON value contents of the node + CJson_Value SetValue(void); + + /// Erase node data and convert it into JSON array + CJson_Array ResetArray(void); + + /// Get JSON array contents of the node + CJson_Array SetArray(void); + + /// Erase node data and convert it into JSON object + CJson_Object ResetObject(void); + + /// Get JSON object contents of the node + CJson_Object SetObject(void); + + ~CJson_Node(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Node(const CJson_Node& n); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Node& operator=(const CJson_Node& n); + + /// Copy Node contents data into this node + CJson_Node& AssignCopy(const CJson_ConstNode& n); + +protected: + CJson_Node(void) { + } + CJson_Node(_Impl* impl) : CJson_ConstNode(impl) { + m_Impl = impl; + } + friend class CJson_Value; + friend class CJson_Array; + friend class CJson_ConstArray; + friend class CJson_Object; + friend class CJson_ConstObject; + friend class CJson_Document; + friend class CJson_ConstObject_pair; + friend class CJson_Object_pair; +}; + + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Value +/// +/// Standard type JSON value. + +class CJson_ConstValue : virtual public CJson_ConstNode +{ +public: + /// Test if value type is compatible with C++ type + bool IsBool( void) const; + bool IsNumber( void) const; + bool IsInt4( void) const; + bool IsUint4( void) const; + bool IsInt8( void) const; + bool IsUint8( void) const; + bool IsDouble( void) const; + bool IsString( void) const; + + /// Get primitive value data + bool GetBool( void) const; + Int4 GetInt4( void) const; + Uint4 GetUint4( void) const; + Int8 GetInt8( void) const; + Uint8 GetUint8( void) const; + double GetDouble( void) const; + TStringType GetString(void) const; + size_t GetStringLength(void) const; + + ~CJson_ConstValue(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstValue(const CJson_ConstValue& n); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstValue& operator=(const CJson_ConstValue& n); + +protected: + CJson_ConstValue(void) {} + CJson_ConstValue(_Impl* impl) : CJson_ConstNode(impl) { + m_Impl = impl; + } + friend class CJson_ConstNode; + friend class CJson_WalkHandler; +}; + +///////////////////////////////////////////////////////////////////////////// + +class CJson_Value : public CJson_ConstValue, public CJson_Node +{ +public: + /// Set primitive value data + CJson_Value& SetBool( bool value); + CJson_Value& SetInt4( Int4 value); + CJson_Value& SetUint4( Uint4 value); + CJson_Value& SetInt8( Int8 value); + CJson_Value& SetUint8( Uint8 value); + CJson_Value& SetDouble(double value); + CJson_Value& SetString(const TStringType& value); + + ~CJson_Value(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Value(const CJson_Value& n); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Value& operator=(const CJson_Value& n); + +protected: + CJson_Value(void) { + } + CJson_Value( _Impl* impl) : CJson_ConstValue(impl), CJson_Node(impl) { + } + friend class CJson_Node; +}; + + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Array +/// +/// JSON array is an ordered sequence of zero or more values. +/// The class provides API to populate and explore a JSON array + +class CJson_ConstArray : virtual public CJson_ConstNode +{ +protected: + typedef rapidjson::Value::ValueIterator _ImplIterator; + typedef rapidjson::Value::ConstValueIterator _ImplCIterator; + +public: + typedef CJson_ConstNode value_type; + typedef CJson_ConstNode& const_reference; + + /// Random-access iterator to access const JSON array element. + /// It is designed to resemble std::vector::const_iterator class. + class const_iterator { + public: + typedef std::random_access_iterator_tag iterator_category; + typedef CJson_ConstNode value_type; + typedef std::ptrdiff_t difference_type; + typedef std::ptrdiff_t distance_type; + typedef CJson_ConstNode* pointer; + typedef CJson_ConstNode& reference; + + ~const_iterator(void) {} + const_iterator(void); + const_iterator(const const_iterator& vi); + const_iterator& operator=(const const_iterator& vi); + + /// Comparison + bool operator!=(const const_iterator& vi) const; + bool operator==(const const_iterator& vi) const; + bool operator<( const const_iterator& vi) const; + bool operator<=(const const_iterator& vi) const; + bool operator>( const const_iterator& vi) const; + bool operator>=(const const_iterator& vi) const; + + /// Increment and decrement + const_iterator& operator++(void); + const_iterator operator++(int); + const_iterator& operator+=(int); + const_iterator operator+(int) const; + const_iterator& operator--(void); + const_iterator operator--(int); + const_iterator& operator-=(int); + const_iterator operator-(int) const; + + /// Dereference + const CJson_ConstNode& operator*( void) const; + const CJson_ConstNode* operator->(void) const; + + /// Distance + distance_type operator-(const_iterator vi) const; + protected: + const_iterator(const _ImplCIterator vi); + const_iterator(const _ImplIterator vi); + _ImplIterator m_vi; + mutable CJson_Node m_v; + friend class CJson_ConstArray; + friend class CJson_Array; + }; + + /// Random-access iterator to access non-const JSON array element. + /// It is designed to resemble std::vector::iterator class. + class iterator : public const_iterator { + public: + typedef std::random_access_iterator_tag iterator_category; + typedef CJson_Node value_type; + typedef std::ptrdiff_t difference_type; + typedef std::ptrdiff_t distance_type; + typedef CJson_Node* pointer; + typedef CJson_Node& reference; + + ~iterator(void) {} + iterator(void); + iterator(const iterator& i); + iterator& operator=(const iterator& vi); + + /// Increment and decrement + iterator& operator++(void); + iterator operator++(int); + iterator operator+(int) const; + iterator& operator--(void); + iterator operator--(int); + iterator operator-(int) const; + + /// Dereference + CJson_Node& operator*(void) const; + CJson_Node* operator->(void) const; + + /// Distance + distance_type operator-(iterator vi) const; + + private: + iterator(const _ImplIterator vi); + friend class CJson_Array; + }; + +public: + + /// Return the number of elements in the array + size_t size(void) const; + + /// Return the number of elements that the array could contain without + /// allocating more storage. + size_t capacity(void) const; + + /// Test if the array is empty + bool empty(void) const; + + /// Return a reference to the element at a specified location in the array + /// If index is greater than or equal to the size of the array, + /// the function throws std::out_of_range exception + CJson_ConstNode at(size_t index) const; + + /// Return a reference to the element at a specified location in the array + /// If index is greater than or equal to the size of the array, + /// the result is undefined + CJson_ConstNode operator[](size_t index) const; + + /// Return a reference to the first element in the array + /// If the array is empty, the result is undefined + CJson_ConstNode front(void) const; + + /// Return a reference to the last element of the array. + /// If the array is empty, the result is undefined + CJson_ConstNode back(void) const; + + /// Return a random-access iterator to the first element in the array + const_iterator begin(void) const; + + /// Return a random-access iterator that points just beyond the end of + /// the array + const_iterator end(void) const; + + ~CJson_ConstArray(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstArray(const CJson_ConstArray& n); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstArray& operator=(const CJson_ConstArray& n); + +protected: + CJson_ConstArray(void) { + } + CJson_ConstArray(_Impl* impl) : CJson_ConstNode(impl) { + m_Impl = impl; + } + friend class CJson_ConstNode; +}; + +///////////////////////////////////////////////////////////////////////////// + +class CJson_Array : public CJson_ConstArray, public CJson_Node +{ +public: + typedef CJson_Node value_type; + typedef CJson_Node& reference; + typedef CJson_ConstArray::const_iterator const_iterator; + typedef CJson_ConstArray::iterator iterator; + + /// Reserve a minimum length of storage for the array object + void reserve(size_t count); + + /// Erase all elements of the array + void clear(void); + + /// Remove an element + iterator erase(const_iterator _where); + + /// Remove a range of elements + iterator erase(const_iterator _first, const_iterator _last); + + /// Return a reference to the element at a specified location in the array + /// If index is greater than or equal to the size of the array, + /// the function throws std::out_of_range exception + CJson_Node at(size_t index); + + /// Return a reference to the element at a specified location in the array + /// If index is greater than or equal to the size of the array, + /// the result is undefined + CJson_Node operator[](size_t index); + + /// Return a reference to the first element in the array + /// If the array is empty, the result is undefined + CJson_Node front(void); + + /// Return a reference to the last element of the array. + /// If the array is empty, the result is undefined + CJson_Node back(void); + + /// Add null element to the end of the array. + void push_back(void); //null value + + /// Add primitive type element to the end of the array. +#ifndef NCBI_COMPILER_WORKSHOP + template void push_back(const T&); // primitive and string + template void push_back(const T*); +#else + void push_back(const bool& v); + void push_back(const Int4& v); + void push_back(const Uint4& v); + void push_back(const Int8& v); + void push_back(const Uint8& v); + void push_back(const float& v); + void push_back(const double& v); + void push_back(const CJson_Node::TCharType* v); + void push_back(const CJson_Node::TStringType& v); + void push_back(const CJson_ConstNode& v); +#endif + + /// Add array type element to the end of the array. + CJson_Array push_back_array(void); + + /// Add object type element to the end of the array. + CJson_Object push_back_object(void); + + /// Delete the element at the end of the array + void pop_back(void); + + /// Return a random-access iterator to the first element in the array + iterator begin(void) const; + + /// Return a random-access iterator that points just beyond the end of + /// the array + iterator end(void) const; + + ~CJson_Array(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Array(const CJson_Array& n); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Array& operator=(const CJson_Array& n); + +protected: + CJson_Array(void) { + } + CJson_Array(_Impl* impl) : CJson_ConstArray(impl), CJson_Node(impl) { + } + friend class CJson_Node; + friend class CJson_Object; + template class CProhibited {}; +}; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_ConstObject value type - [name,value] pair. + +class CJson_Object_pair; +class CJson_ConstObject_pair { +protected: + typedef rapidjson::Value _Impl; +public: + const CJson_Node::TCharType* name; + const CJson_ConstNode value; + + ~CJson_ConstObject_pair(void) {} + CJson_ConstObject_pair(void); + CJson_ConstObject_pair(const CJson_Node::TCharType* _name, const _Impl& _value); + CJson_ConstObject_pair(const CJson_Object_pair& p); + CJson_ConstObject_pair& assign( + const CJson_Node::TCharType* _name, const _Impl& _value); +}; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Object value type - [name,value] pair. + +class CJson_Object_pair { +protected: + typedef rapidjson::Value _Impl; +public: + const CJson_Node::TCharType* name; + CJson_Node value; + + ~CJson_Object_pair(void) {} + CJson_Object_pair(void); + CJson_Object_pair(const CJson_Node::TCharType* _name, _Impl& _value); + CJson_Object_pair& assign(const CJson_Node::TCharType* _name, _Impl& _value); +}; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Object +/// +/// A JSON object is an unordered collection of name/value pairs. +/// The class provides API to populate and explore a JSON object + +class CJson_ConstObject : virtual public CJson_ConstNode +{ +protected: + typedef rapidjson::Value::MemberIterator _ImplIterator; + typedef rapidjson::Value::ConstMemberIterator _ImplCIterator; + +public: + typedef CJson_ConstObject_pair value_type; + class iterator; + + /// Bidirectional iterator to access const JSON object element. + /// It is designed to resemble std::map::const_iterator class. + /// Dereferencing the iterator returns [name,value] pair. + class const_iterator { + public: + typedef CJson_ConstObject_pair pair; + + typedef std::bidirectional_iterator_tag iterator_category; + typedef CJson_ConstObject::const_iterator::pair value_type; + typedef std::ptrdiff_t difference_type; + typedef std::ptrdiff_t distance_type; + typedef CJson_ConstObject::const_iterator::pair* pointer; + typedef CJson_ConstObject::const_iterator::pair& reference; + + ~const_iterator(void) {} + const_iterator(void); + const_iterator(const const_iterator& vi); + const_iterator& operator=(const const_iterator& vi); + const_iterator(const iterator& vi); + const_iterator& operator=(const iterator& vi); + + /// Comparison + bool operator!=(const const_iterator& vi) const; + bool operator==(const const_iterator& vi) const; + bool operator!=(const iterator& vi) const; + bool operator==(const iterator& vi) const; + + /// Increment and decrement + const_iterator& operator++(void); + const_iterator operator++(int); + const_iterator& operator--(void); + const_iterator operator--(int); + + /// Dereference + const pair& operator*(void) const; + const pair* operator->(void) const; + + protected: + const_iterator(const _ImplCIterator vi); + const_iterator(const _ImplIterator vi); + _ImplIterator m_vi; + mutable pair m_pvi; + friend class CJson_ConstObject; + friend class CJson_Object; + friend class iterator; + }; + + /// Bidirectional iterator to access non-const JSON object element. + /// It is designed to resemble std::map::iterator class. + /// Dereferencing the iterator returns [name,value] pair. + class iterator { + public: + typedef CJson_Object_pair pair; + + typedef std::bidirectional_iterator_tag iterator_category; + typedef CJson_ConstObject::iterator::pair value_type; + typedef std::ptrdiff_t difference_type; + typedef std::ptrdiff_t distance_type; + typedef CJson_ConstObject::iterator::pair* pointer; + typedef CJson_ConstObject::iterator::pair& reference; + + ~iterator(void) {} + iterator(void); + iterator(const iterator& i); + iterator& operator=(const iterator& vi); + + /// Comparison + bool operator!=(const iterator& vi) const; + bool operator==(const iterator& vi) const; + bool operator!=(const const_iterator& vi) const; + bool operator==(const const_iterator& vi) const; + + /// Increment and decrement + iterator& operator++(void); + iterator operator++(int); + iterator& operator--(void); + iterator operator--(int); + + /// Dereference + pair& operator*(void) const; + pair* operator->(void) const; + + private: + iterator(const _ImplIterator vi); + _ImplIterator m_vi; + mutable pair m_pvi; + friend class CJson_ConstObject; + friend class CJson_Object; + friend class const_iterator; + }; + + /// Return the number of elements in the object + size_t size(void) const; + + /// Test if the object is empty + bool empty(void) const; + + /// Access an element with a given name. + /// If such element was not found, the function throws std::out_of_range exception + CJson_ConstNode at(const CJson_Node::TKeyType& name) const; + + /// Access an element with a given name. + /// If such element does not exist in this object, the result is undefined. + CJson_ConstNode operator[](const CJson_Node::TKeyType& name) const; + + /// Return an iterator that points to the first element in the object + const_iterator begin(void) const; + + /// Return an iterator that points to the location after the last element. + const_iterator end(void) const; + + /// Return an iterator that points to the location of the element. + const_iterator find(const CJson_Node::TKeyType& name) const; + + /// Test if an element with this name exists in the object + bool has(const CJson_Node::TKeyType& name) const; + + ~CJson_ConstObject(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstObject(const CJson_ConstObject& v); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstObject& operator=(const CJson_ConstObject& v); + +protected: + CJson_ConstObject(void) { + } + CJson_ConstObject( _Impl* impl) : CJson_ConstNode(impl) { + m_Impl = impl; + } + friend class CJson_ConstNode; +}; + +///////////////////////////////////////////////////////////////////////////// + +class CJson_Object : public CJson_ConstObject, public CJson_Node +{ +public: + typedef CJson_ConstObject::const_iterator const_iterator; + typedef CJson_ConstObject::iterator iterator; + typedef CJson_Object_pair value_type; + + /// Erase all elements of the object + void clear(void); + + /// Remove an element with a given name from the object + /// Returns the number of elements that have been removed + size_t erase(const CJson_Node::TKeyType& name); + + /// Remove an element + iterator erase(const_iterator _where); + + /// Remove a range of elements + iterator erase(const_iterator _first, const_iterator _last); + + /// Access an element with a given name. + /// If such element was not found, the function throws std::out_of_range exception + CJson_Node at(const CJson_Node::TKeyType& name); + + /// Access an element with a given name. + /// If such element does not exist in this object, it will be added. + CJson_Node operator[](const CJson_Node::TKeyType& name); + + /// Insert null element into the object + void insert(const CJson_Node::TKeyType& name); + + /// Insert primitive type element into the object + template void insert(const CJson_Node::TKeyType& name, const T&); + template void insert(const CJson_Node::TKeyType& name, const T*); + +#ifdef NCBI_COMPILER_WORKSHOP + void insert(const CJson_Node::TKeyType& name, + const CJson_Node::TStringType& value); +#endif + + /// Insert array type element into the object + CJson_Array insert_array( const CJson_Node::TKeyType& name); + + /// Insert object type element into the object + CJson_Object insert_object(const CJson_Node::TKeyType& name); + + /// Return an iterator that points to the first element in the object + iterator begin(void) const; + + /// Return an iterator that points to the location after the last element. + iterator end(void) const; + + /// Return an iterator that points to the location of the element. + iterator find(const CJson_Node::TKeyType& name) const; + + ~CJson_Object(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Object(const CJson_Object& v); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Object& operator=(const CJson_Object& v); + +protected: + CJson_Object(void) { + } + CJson_Object(_Impl* impl) : CJson_ConstObject(impl), CJson_Node(impl) { + } + friend class CJson_Node; + friend class CJson_Array; + template class CProhibited {}; +}; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_WalkHandler +/// +/// Sequential access parsing event listener. +/// Provides a mechanism for reading data from a JSON file or document +/// as a series of events. + +class CJson_WalkHandler : public rapidjson::BaseReaderHandler<> +{ +public: + CJson_WalkHandler(void); + virtual ~CJson_WalkHandler(void) {} + + /// Begin reading object contents + /// + /// @param name + /// Name of this object in the parent object, or empty string + /// if this object has no parent. + virtual bool BeginObject(const CJson_Node::TKeyType& /*name*/) { + return true; + } + + /// Begin reading object member + /// + /// Right after this event, there can be one of the following only: + /// EndObject, BeginObject, BeginArray, or PlainMemberValue. + /// + /// @param name + /// Name of this object in the parent object, or empty string + /// if this object has no parent. + /// @param member + /// Member name + virtual bool BeginObjectMember(const CJson_Node::TKeyType& /*name*/, + const CJson_Node::TKeyType& /*member*/) { + return true; + } + + /// Primitive type data has been read + /// + /// @param name + /// Name of this object in the parent object, or empty string + /// if this object has no parent. + /// @param member + /// Member name + /// @param value + /// JSON value + virtual bool PlainMemberValue(const CJson_Node::TKeyType& /*name*/, + const CJson_Node::TKeyType& /*member*/, + const CJson_ConstValue& /*value*/) { + return true; + } + + /// End reading object contents + /// + /// @param name + /// Name of this object in the parent object, or empty string + /// if this object has no parent. + virtual bool EndObject(const CJson_Node::TKeyType& /*name*/) { + return true; + } + + + /// Begin reading array contents + /// + /// @param name + /// Name of this array in the parent object, or empty string + /// if this array has no parent. + virtual bool BeginArray(const CJson_Node::TKeyType& /*name*/) { + return true; + } + + /// Begin reading array element + /// + /// Right after this event, there can be one of the following only: + /// EndArray, BeginObject, BeginArray, or PlainElementValue. + /// + /// @param name + /// Name of this array in the parent object, or empty string + /// if this array has no parent. + /// @param index + /// Index of the array element + virtual bool BeginArrayElement(const CJson_Node::TKeyType& /*name*/, + size_t /*index*/) { + return true; + } + + /// Primitive type data has been read + /// + /// @param name + /// Name of this array in the parent object, or empty string + /// if this array has no parent. + /// @param member + /// Index of the array element + /// @param value + /// JSON value + virtual bool PlainElementValue(const CJson_Node::TKeyType& /*name*/, + size_t /*index*/, + const CJson_ConstValue& /*value*/) { + return true; + } + + /// End reading array contents + /// + /// @param name + /// Name of this array in the parent object, or empty string + /// if this array has no parent. + virtual bool EndArray(const CJson_Node::TKeyType& /*name*/) { + return true; + } + + /// Return current stack path as string + /// For example: "/root/obj2/arr[3]" + CJson_Node::TKeyType GetCurrentJPath(void) const; + + /// Convert data, starting at the current parsing position, into + /// a document object. + /// This method may be called from BeginObject or BeginArray only. + bool Read(CJson_Document& doc); + +private: + bool x_Notify(const rapidjson::Value& v); + bool x_BeginObjectOrArray(bool object_type); + void x_EndObjectOrArray(void); + +public: + // The following functions are named this way because rapidjson requires so + bool Null(); + bool Bool(bool v); + bool Int(int v); + bool Uint(unsigned v); + bool Int64(int64_t v); + bool Uint64(uint64_t v); + bool Double(double v); + bool String(const Ch* buf, rapidjson::SizeType sz, bool c); + bool Key( const Ch* buf, rapidjson::SizeType sz, bool c); + bool StartObject(); + bool EndObject(rapidjson::SizeType sz); + bool StartArray(); + bool EndArray(rapidjson::SizeType sz); + +private: + void x_SetSource(std::istream* in) {m_in=in;} + std::istream* m_in; // Input stream + std::vector m_object_type; // Object (true), or array (false) + std::vector m_index; // array element index + std::vector m_name; // object member name + + friend class CJson_Document; +}; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Document +/// +/// Serializable, copyable container for JSON data. + +class CJson_Document : public CJson_Node +{ + typedef rapidjson::Document _DocImpl; + +public: + CJson_Document(CJson_Node::EJsonType type = CJson_Node::eObject); + /// Create document by parsing UTF8 string + CJson_Document(const TStringType& v); + /// Copy another document contents into this document + CJson_Document(const CJson_Document& v); + /// Copy another document contents into this document + CJson_Document& operator=(const CJson_Document& v); + /// Copy another Node contents into this document + CJson_Document(const CJson_ConstNode& v); + /// Copy another Node contents into this document + CJson_Document& operator=(const CJson_ConstNode& v); + + ~CJson_Document(void) { + } + + /// Read JSON data from a UTF8 string + bool ParseString(const TStringType& v); + + /// Read JSON data from a stream + bool Read(std::istream& in); + + /// Read JSON data from a file + bool Read(const std::string& filename) { + std::ifstream in(filename.c_str()); + return Read(in); + } + + /// Test if the most recent read was successful + bool ReadSucceeded(void); + + /// Get most recent read error + std::string GetReadError(void) const; + + /// Write JSON data into a stream + void Write(std::ostream& out, TJson_Write_Flags flags = fJson_Write_IndentWithSpace, + unsigned int indent_char_count = 4) const; + + /// Write JSON data into a file + void Write(const std::string& filename, TJson_Write_Flags flags = fJson_Write_IndentWithSpace, + unsigned int indent_char_count = 4) const { + std::ofstream out(filename.c_str()); + Write(out, flags, indent_char_count); + } + + /// Traverse the document contents + void Walk(CJson_WalkHandler& walk) const; + + /// Traverse the JSON data stream contents + static void Walk(std::istream& in, CJson_WalkHandler& walk); + + +private: + _DocImpl m_DocImpl; +}; + + +///////////////////////////////////////////////////////////////////////////// + +/// Extraction operator for JSON document +inline std::istream& operator>>(std::istream& is, CJson_Document& d) { + if (!d.Read(is)) { + is.setstate(std::ios::failbit); + } + return is; +} + +/// Insertion operator for JSON document +inline std::ostream& operator<<(std::ostream& os, const CJson_Document& d) +{ + d.Write(os); + return os; +} + +/// Insertion operator for JSON node +inline std::ostream& operator<<(std::ostream& os, const CJson_ConstNode& v) +{ + return operator<<(os, CJson_Document(v)); +} + + + +///////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////// +// inline implementations + +#if defined(NCBI_COMPILER_GCC) && !defined(__clang__) +#if (NCBI_COMPILER_VERSION == 442) || (NCBI_COMPILER_VERSION == 443) +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif +#endif + +#define JSONWRAPP_TO_NCBIUTF8(v) (v) + +// workarounds to make it compile +#if defined(NCBI_COMPILER_GCC) && (NCBI_COMPILER_VERSION < 442) && !defined(__clang__) +# define JSONWRAPP_MAKENODE(v) CJson_Node(v) +#else +# define JSONWRAPP_MAKENODE(v) (v) +#endif + +// -------------------------------------------------------------------------- +// CJson_Node methods + +inline CJson_ConstNode::CJson_ConstNode(const CJson_ConstNode& n) + : m_Impl(n.m_Impl) { +} +inline CJson_ConstNode& +CJson_ConstNode::operator=(const CJson_ConstNode& n) { + m_Impl = n.m_Impl; return *this; +} +inline bool +CJson_ConstNode::operator==(const CJson_ConstNode& n) const +{ + return m_Impl->operator==(*n.m_Impl); +} +inline bool +CJson_ConstNode::operator!=(const CJson_ConstNode& n) const +{ + return m_Impl->operator!=(*n.m_Impl); +} + +inline CJson_Node::CJson_Node(const CJson_Node& n) + : CJson_ConstNode(n) { +} +inline CJson_Node& +CJson_Node::operator=(const CJson_Node& n) { + CJson_ConstNode::operator=(n); return *this; +} +inline CJson_Node& +CJson_Node::AssignCopy(const CJson_ConstNode& n) { + m_Impl->CopyFrom( *n.m_Impl, *m_Impl->GetValueAllocator()); + return *this; +} + + +inline CJson_Node::EJsonType +CJson_ConstNode::GetType(void) const { + switch (m_Impl->GetType()) { + default: + case rapidjson::kNullType: break; + case rapidjson::kFalseType: + case rapidjson::kTrueType: return eBool; + case rapidjson::kObjectType: return eObject; + case rapidjson::kArrayType: return eArray; + case rapidjson::kStringType: return eString; + case rapidjson::kNumberType: return eNumber; + } + return eNull; +} + +inline bool CJson_ConstNode::IsNull(void) const { + return m_Impl->IsNull(); +} +inline bool CJson_ConstNode::IsValue(void) const { + return !IsObject() && !IsArray(); +} +inline bool CJson_ConstNode::IsArray(void) const { + return m_Impl->IsArray(); +} +inline bool CJson_ConstNode::IsObject(void) const { + return m_Impl->IsObject(); +} +inline CJson_Node& CJson_Node::SetNull(void) { + m_Impl->SetNull( ); return *this; +} + +inline CJson_Value CJson_Node::ResetValue(void) { + m_Impl->SetNull(); + return CJson_Value(m_Impl); +} +inline CJson_Value CJson_Node::SetValue(void) { + _ASSERT(IsValue()); + return CJson_Value(m_Impl); +} +inline CJson_ConstValue CJson_ConstNode::GetValue(void) const { + _ASSERT(IsValue()); + return CJson_ConstValue(m_Impl); +} +inline CJson_Array CJson_Node::ResetArray(void) { + m_Impl->SetArray(); + return CJson_Array(m_Impl); +} +inline CJson_Array CJson_Node::SetArray(void) { + _ASSERT(IsArray()); + return CJson_Array(m_Impl); +} +inline CJson_ConstArray CJson_ConstNode::GetArray(void) const { + _ASSERT(IsArray()); + return CJson_ConstArray(m_Impl); +} +inline CJson_Object CJson_Node::ResetObject(void) { + m_Impl->SetObject(); + return CJson_Object(m_Impl); +} +inline CJson_Object CJson_Node::SetObject(void) { + _ASSERT(IsObject()); + return CJson_Object(m_Impl); +} +inline CJson_ConstObject CJson_ConstNode::GetObject(void) const { + _ASSERT(IsObject()); + return CJson_ConstObject(m_Impl); +} +inline std::string +CJson_ConstNode::ToString(TJson_Write_Flags flags, unsigned int indent_char_count) const { + ncbi::CNcbiOstrstream os; + rapidjson::CppOStream ofs(os); + rapidjson::PrettyWriter writer(ofs); + if (flags & fJson_Write_NoIndentation) { + writer.SetIndent(' ', 0); + } else { + writer.SetIndent( (flags & fJson_Write_IndentWithTab) ? '\t' : ' ', indent_char_count); + } + if (flags & fJson_Write_NoEol) { + writer.SetWriteEol(false); + } + m_Impl->Accept(writer); + return std::string( ncbi::CNcbiOstrstreamToString(os) ); +} + +// -------------------------------------------------------------------------- +// CJson_Value methods + +inline CJson_ConstValue::CJson_ConstValue( const CJson_ConstValue& n) + : CJson_ConstNode(n) { +} +inline CJson_ConstValue& +CJson_ConstValue::operator=(const CJson_ConstValue& n) { + CJson_ConstNode::operator=(n); return *this; +} +inline CJson_Value::CJson_Value( const CJson_Value& n) + : CJson_ConstValue(n), CJson_Node(n) { +} +inline CJson_Value& +CJson_Value::operator=(const CJson_Value& n) { + CJson_Node::operator=(n); return *this; +} + +inline bool CJson_ConstValue::IsBool(void) const { + return m_Impl->IsBool(); +} +inline bool CJson_ConstValue::IsNumber(void) const { + return m_Impl->IsNumber(); +} +inline bool CJson_ConstValue::IsInt4(void) const { + return m_Impl->IsInt(); +} +inline bool CJson_ConstValue::IsUint4(void) const { + return m_Impl->IsUint(); +} +inline bool CJson_ConstValue::IsInt8(void) const { + return m_Impl->IsInt64(); +} +inline bool CJson_ConstValue::IsUint8(void) const { + return m_Impl->IsUint64(); +} +inline bool CJson_ConstValue::IsDouble(void) const { + return m_Impl->IsDouble(); +} +inline bool CJson_ConstValue::IsString(void) const { + return m_Impl->IsString(); +} + +inline bool CJson_ConstValue::GetBool(void) const { + return m_Impl->GetBool(); +} +inline Int4 CJson_ConstValue::GetInt4(void) const { + return m_Impl->GetInt(); +} +inline Uint4 CJson_ConstValue::GetUint4(void) const { + return m_Impl->GetUint(); +} +inline Int8 CJson_ConstValue::GetInt8(void) const { + return m_Impl->GetInt64(); +} +inline Uint8 CJson_ConstValue::GetUint8(void) const { + return m_Impl->GetUint64(); +} +inline double CJson_ConstValue::GetDouble(void) const { + return m_Impl->GetDouble(); +} +inline CJson_Node::TStringType +CJson_ConstValue::GetString(void) const { + return JSONWRAPP_TO_NCBIUTF8(m_Impl->GetString()); +} +inline size_t CJson_ConstValue::GetStringLength(void) const { + return m_Impl->GetStringLength(); +} + +inline CJson_Value& CJson_Value::SetBool(bool value) { + m_Impl->SetBool( value); return *this; +} +inline CJson_Value& CJson_Value::SetInt4(Int4 value) { + m_Impl->SetInt( value); return *this; +} +inline CJson_Value& CJson_Value::SetUint4(Uint4 value) { + m_Impl->SetUint( value); return *this; +} +inline CJson_Value& CJson_Value::SetInt8(Int8 value) { + m_Impl->SetInt64( value); return *this; +} +inline CJson_Value& CJson_Value::SetUint8(Uint8 value) { + m_Impl->SetUint64(value); return *this; +} +inline CJson_Value& CJson_Value::SetDouble(double value) { + m_Impl->SetDouble(value); return *this; +} +inline CJson_Value& CJson_Value::SetString(const CJson_Node::TStringType& value) { + m_Impl->SetString(value.c_str(), *(m_Impl->GetValueAllocator())); return *this; +} + +// -------------------------------------------------------------------------- +// CJson_Array methods +inline CJson_ConstArray::CJson_ConstArray( const CJson_ConstArray& n) + : CJson_ConstNode(n) { +} +inline CJson_ConstArray& +CJson_ConstArray::operator=(const CJson_ConstArray& n) { + CJson_ConstNode::operator=(n); return *this; +} +inline CJson_Array::CJson_Array( const CJson_Array& n) + : CJson_ConstArray(n), CJson_Node(n) { +} +inline CJson_Array& +CJson_Array::operator=(const CJson_Array& n) { + CJson_Node::operator=(n); return *this; +} +inline void CJson_Array::reserve(size_t count) { + m_Impl->Reserve(rapidjson::SizeType(count), *(m_Impl->GetValueAllocator())); +} +inline void CJson_Array::clear(void) { + m_Impl->Clear(); +} +inline CJson_Array::iterator CJson_Array::erase(CJson_ConstArray::const_iterator _where) { + return CJson_Array::iterator( m_Impl->Erase( _where.m_vi)); +} +inline CJson_Array::iterator +CJson_Array::erase(CJson_ConstArray::const_iterator _first, CJson_ConstArray::const_iterator _last) { + return CJson_Array::iterator( m_Impl->Erase( _first.m_vi, _last.m_vi)); +} +inline size_t CJson_ConstArray::size(void) const { + return m_Impl->Size(); +} +inline size_t CJson_ConstArray::capacity(void) const { + return m_Impl->Capacity(); +} +inline bool CJson_ConstArray::empty(void) const { + return m_Impl->Empty(); +} +inline CJson_ConstNode CJson_ConstArray::at(size_t index) const { + if (index >= size()) { + throw std::out_of_range("array index out of range"); + } + return operator[](index); +} +inline CJson_Node CJson_Array::at(size_t index) { + if (index >= size()) { + throw std::out_of_range("array index out of range"); + } + return operator[](index); +} +inline CJson_ConstNode CJson_ConstArray::operator[](size_t index) const { + return CJson_ConstNode(&(m_Impl->operator[](rapidjson::SizeType(index)))); +} +inline CJson_Node CJson_Array::operator[](size_t index) { + return CJson_Node(&(m_Impl->operator[](rapidjson::SizeType(index)))); +} +inline CJson_ConstNode CJson_ConstArray::front(void) const { + return operator[](0); +} +inline CJson_Node CJson_Array::front(void) { + return operator[](0); +} +inline CJson_ConstNode CJson_ConstArray::back(void) const { + return operator[](size()-1); +} +inline CJson_Node CJson_Array::back(void) { + return operator[](size()-1); +} + +// Implicit conversions are prohibited +#ifndef NCBI_COMPILER_WORKSHOP +// this may fail to compile +//template void CJson_Array::push_back(T) =delete; +// this will compile: +template inline void CJson_Array::push_back(const T&) { + CProhibited::Implicit_conversions_are_prohibited(); +} +template inline void CJson_Array::push_back(const T*) { + CProhibited::Implicit_conversions_are_prohibited(); +} +#define JSW_EMPTY_TEMPLATE template<> +#else +#define JSW_EMPTY_TEMPLATE +#endif +inline void CJson_Array::push_back(void) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const bool& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetBool(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const Int4& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetInt(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const Uint4& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetUint(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const Int8& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetInt64(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const Uint8& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetUint64(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const float& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetDouble(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const double& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetDouble(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back( + const CJson_Node::TCharType* v) { + rapidjson::Value sv(v, *(m_Impl->GetValueAllocator())); + m_Impl->PushBack( sv, *(m_Impl->GetValueAllocator())); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back( + const CJson_Node::TStringType& value) { + push_back(value.c_str()); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_ConstNode& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetValueAllocator(a).CopyFrom( *v.m_Impl, *a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_Node& v) { + push_back(v); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_ConstArray& v) { + push_back(v); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_Array& v) { + push_back(v); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_ConstObject& v) { + push_back(v); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_Object& v) { + push_back(v); +} +#undef JSW_EMPTY_TEMPLATE + +inline CJson_Array CJson_Array::push_back_array(void) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetArray().SetValueAllocator(a), *a); + return back().SetArray(); +} + +inline CJson_Object CJson_Array::push_back_object(void) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetObject().SetValueAllocator(a), *a); + return back().SetObject(); +} + +inline void CJson_Array::pop_back(void) { + m_Impl->PopBack(); +} + +inline CJson_ConstArray::const_iterator +CJson_ConstArray::begin(void) const { + return const_iterator(m_Impl->Begin()); +} +inline CJson_ConstArray::const_iterator +CJson_ConstArray::end(void) const { + return const_iterator(m_Impl->End()); +} +inline CJson_Array::iterator CJson_Array::begin(void) const { + return iterator(m_Impl->Begin()); +} +inline CJson_Array::iterator CJson_Array::end(void) const { + return iterator(m_Impl->End()); +} + +// -------------------------------------------------------------------------- +// CJson_Array::const_iterator + +inline CJson_ConstArray::const_iterator::const_iterator(void) + : m_vi(0), m_v(JSONWRAPP_MAKENODE(0)) { +} +inline CJson_ConstArray::const_iterator::const_iterator( + const CJson_ConstArray::const_iterator& i) : m_vi(i.m_vi), m_v(i.m_v) { +} +inline CJson_ConstArray::const_iterator& +CJson_ConstArray::const_iterator::operator=( + const CJson_ConstArray::const_iterator& vi) { + m_vi = vi.m_vi; return *this; +} +inline bool +CJson_ConstArray::const_iterator::operator!=( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi != vi.m_vi; +} +inline bool +CJson_ConstArray::const_iterator::operator==( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi == vi.m_vi; +} +inline bool +CJson_ConstArray::const_iterator::operator<( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi < vi.m_vi; +} +inline bool +CJson_ConstArray::const_iterator::operator<=( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi <= vi.m_vi; +} +inline bool +CJson_ConstArray::const_iterator::operator>( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi > vi.m_vi; +} +inline bool +CJson_ConstArray::const_iterator::operator>=( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi >= vi.m_vi; +} +inline CJson_ConstArray::const_iterator& +CJson_ConstArray::const_iterator::operator++(void) { + ++m_vi; return *this; +} +inline CJson_ConstArray::const_iterator +CJson_ConstArray::const_iterator::operator++(int) { + const_iterator tmp(*this); ++m_vi; return tmp; +} +inline CJson_ConstArray::const_iterator& +CJson_ConstArray::const_iterator::operator+=(int i) { + m_vi += i; return *this; +} +inline CJson_ConstArray::const_iterator +CJson_ConstArray::const_iterator::operator+(int i) const { + return const_iterator(m_vi + i); +} +inline CJson_ConstArray::const_iterator& +CJson_ConstArray::const_iterator::operator--(void) { + --m_vi; return *this; +} +inline CJson_ConstArray::const_iterator +CJson_ConstArray::const_iterator::operator--(int) { + const_iterator tmp(*this); --m_vi; return tmp; +} +inline CJson_ConstArray::const_iterator& +CJson_ConstArray::const_iterator::operator-=(int i) { + m_vi -= i; return *this; +} +inline CJson_ConstArray::const_iterator +CJson_ConstArray::const_iterator::operator-(int i) const { + return const_iterator(m_vi - i); +} +inline const CJson_ConstNode& +CJson_ConstArray::const_iterator::operator*(void) const { + x_Impl(m_v) = m_vi; return m_v; +} +inline const CJson_ConstNode* +CJson_Array::const_iterator::operator->(void) const { + x_Impl(m_v) = m_vi; return &m_v; +} +inline CJson_ConstArray::const_iterator::distance_type +CJson_ConstArray::const_iterator::operator-(CJson_ConstArray::const_iterator vi) const { + return m_vi - vi.m_vi; +} + +inline CJson_ConstArray::const_iterator::const_iterator( + const CJson_ConstArray::_ImplCIterator vi) + : m_vi(const_cast(vi)), + m_v(JSONWRAPP_MAKENODE(0)) { +} +inline CJson_ConstArray::const_iterator::const_iterator( + const CJson_ConstArray::_ImplIterator vi) + : m_vi(vi), m_v(JSONWRAPP_MAKENODE(0)) { +} +// -------------------------------------------------------------------------- +// CJson_Array::iterator + +inline CJson_ConstArray::iterator::iterator(void) { +} +inline CJson_ConstArray::iterator::iterator(const CJson_Array::iterator& i) + : const_iterator(i) { +} +inline CJson_ConstArray::iterator& +CJson_ConstArray::iterator::operator=(const CJson_Array::iterator& vi) { + const_iterator::operator=(vi); return *this; +} +inline CJson_ConstArray::iterator +CJson_Array::iterator::operator+(int i) const { + return iterator(m_vi + i); +} +inline CJson_ConstArray::iterator +CJson_Array::iterator::operator-(int i) const { + return iterator(m_vi - i); +} +inline CJson_ConstArray::iterator& +CJson_ConstArray::iterator::operator++(void) { + ++m_vi; return *this; +} +inline CJson_ConstArray::iterator +CJson_ConstArray::iterator::operator++(int) { + iterator tmp(*this); ++m_vi; return tmp; +} +inline CJson_ConstArray::iterator& +CJson_ConstArray::iterator::operator--(void) { + --m_vi; return *this; +} +inline CJson_ConstArray::iterator +CJson_ConstArray::iterator::operator--(int) { + iterator tmp(*this); --m_vi; return tmp; +} +inline CJson_Node& +CJson_ConstArray::iterator::operator*(void) const { + x_Impl(m_v) = m_vi; return m_v; +} +inline CJson_Node* +CJson_ConstArray::iterator::operator->(void) const { + x_Impl(m_v) = m_vi; return &m_v; +} +inline CJson_ConstArray::iterator::distance_type +CJson_ConstArray::iterator::operator-(CJson_ConstArray::iterator vi) const { + return m_vi - vi.m_vi; +} +inline CJson_ConstArray::iterator::iterator( + const CJson_ConstArray::_ImplIterator vi) : const_iterator(vi) { +} + +// -------------------------------------------------------------------------- +// CJson_Object methods + +inline CJson_ConstObject::CJson_ConstObject( const CJson_ConstObject& n) + : CJson_ConstNode(n) { +} +inline CJson_ConstObject& +CJson_ConstObject::operator=(const CJson_ConstObject& n) { + CJson_ConstNode::operator=(n); return *this; +} +inline CJson_Object::CJson_Object( const CJson_Object& n) + : CJson_ConstObject(n), CJson_Node(n) { +} +inline CJson_Object& +CJson_Object::operator=(const CJson_Object& n) { + CJson_Node::operator=(n); return *this; +} +inline void CJson_Object::clear(void) { + m_Impl->RemoveAllMembers(); +} +inline size_t CJson_Object::erase(const CJson_Node::TKeyType& name) { + return m_Impl->RemoveMember(name.c_str()) ? 1 : 0; +} +inline CJson_Object::iterator CJson_Object::erase(CJson_ConstObject::const_iterator _where) { + return CJson_Object::iterator( m_Impl->EraseMember( _where.m_vi)); +} +inline CJson_Object::iterator +CJson_Object::erase(CJson_ConstObject::const_iterator _first, CJson_ConstObject::const_iterator _last) { + return CJson_Object::iterator( m_Impl->EraseMember( _first.m_vi, _last.m_vi)); +} +inline size_t CJson_ConstObject::size(void) const { + return m_Impl->MemberCount(); +} +inline bool CJson_ConstObject::empty(void) const { + return m_Impl->ObjectEmpty(); +} +inline CJson_ConstNode +CJson_ConstObject::at(const CJson_Node::TKeyType& name) const { + if (!has(name)) { + throw std::out_of_range(name + " object member not found"); + } + return CJson_ConstNode(&(m_Impl->operator[](name.c_str()))); +} +inline CJson_Node +CJson_Object::at(const CJson_Node::TKeyType& name) { + if (!has(name)) { + throw std::out_of_range(name + " object member not found"); + } + return CJson_Object(&(m_Impl->operator[](name.c_str()))); +} +inline CJson_ConstNode +CJson_ConstObject::operator[](const CJson_Node::TKeyType& name) const { + return CJson_ConstNode(&(m_Impl->operator[](name.c_str()))); +} +inline CJson_Node +CJson_Object::operator[](const CJson_Node::TKeyType& name) { + if (!has(name)) { + insert(name); + } + return CJson_Node(&(m_Impl->operator[](name.c_str()))); +} + +inline void CJson_Object::insert(const CJson_Node::TKeyType& name) { + rapidjson::Value::AllocatorType& a = *(m_Impl->GetValueAllocator()); + rapidjson::Value sv_name(name.c_str(), a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetValueAllocator(&a), a); +} +// Implicit conversions are prohibited +// this may fail to compile +//template void CJson_Object::insert(const std::string& , T) =delete; +// this will compile: +template inline void CJson_Object::insert( + const CJson_Node::TKeyType& , const T&) { + CProhibited::Implicit_conversions_are_prohibited(); +} +template inline void CJson_Object::insert( + const CJson_Node::TKeyType& , const T*) { + CProhibited::Implicit_conversions_are_prohibited(); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const bool& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetBool(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const Int4& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetInt(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const Uint4& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetUint(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const Int8& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetInt64(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const Uint8& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetUint64(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const float& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetDouble(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const double& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetDouble(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, + const CJson_Node::TCharType* value) { + rapidjson::Value::AllocatorType& a = *(m_Impl->GetValueAllocator()); + rapidjson::Value sv_name(name.c_str(), a); + rapidjson::Value sv_value(value, a); + m_Impl->AddMember( sv_name, sv_value, a); +} +#ifndef NCBI_COMPILER_WORKSHOP +template<> +#endif +inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, + const CJson_Node::TStringType& value) { + insert(name, value.c_str()); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_ConstNode& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetValueAllocator(a).CopyFrom( *v.m_Impl, *a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_Node& v) { + insert(name, v); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_ConstArray& v) { + insert(name, v); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_Array& v) { + insert(name, v); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_ConstObject& v) { + insert(name, v); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_Object& v) { + insert(name, v); +} +inline CJson_Array +CJson_Object::insert_array(const CJson_Node::TKeyType& name) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetArray().SetValueAllocator(a), *a); + return operator[](name).SetArray(); +} +inline CJson_Object +CJson_Object::insert_object(const CJson_Node::TKeyType& name) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetObject().SetValueAllocator(a), *a); + return operator[](name).SetObject(); +} +inline bool +CJson_ConstObject::has(const CJson_Node::TKeyType& name) const { + return m_Impl->HasMember(name.c_str()); +} + + +inline CJson_ConstObject::const_iterator +CJson_ConstObject::begin(void) const { + return const_iterator(m_Impl->MemberBegin()); +} +inline CJson_ConstObject::const_iterator +CJson_ConstObject::end(void) const { + return const_iterator(m_Impl->MemberEnd()); +} +inline CJson_ConstObject::const_iterator +CJson_ConstObject::find(const CJson_Node::TKeyType& name) const { + _ImplCIterator m = m_Impl->FindMember(name.c_str()); + return m ? const_iterator(m) : end(); +} + +inline CJson_Object::iterator +CJson_Object::begin(void) const { + return iterator(m_Impl->MemberBegin()); +} +inline CJson_Object::iterator +CJson_Object::end(void) const { + return iterator(m_Impl->MemberEnd()); +} +inline CJson_Object::iterator +CJson_Object::find(const CJson_Node::TKeyType& name) const { + _ImplIterator m = m_Impl->FindMember(name.c_str()); + return m ? iterator(m) : end(); +} + + +// -------------------------------------------------------------------------- +// CJson_Object::const_iterator + +inline CJson_ConstObject::const_iterator::const_iterator(void) + : m_vi(0) { +} +inline CJson_ConstObject::const_iterator::const_iterator( + const CJson_ConstObject::const_iterator& i) + : m_vi(i.m_vi) { +} +inline CJson_ConstObject::const_iterator::const_iterator( + const CJson_ConstObject::iterator& i) + : m_vi(i.m_vi) { +} +inline CJson_ConstObject::const_iterator& +CJson_ConstObject::const_iterator::operator++(void) { + ++m_vi; return *this; +} +inline CJson_ConstObject::const_iterator +CJson_ConstObject::const_iterator::operator++(int) { + const_iterator tmp(*this); ++m_vi; return tmp; +} +inline CJson_ConstObject::const_iterator& +CJson_ConstObject::const_iterator::operator--(void) { + --m_vi; return *this; +} +inline CJson_ConstObject::const_iterator +CJson_ConstObject::const_iterator::operator--(int) { + const_iterator tmp(*this); --m_vi; return tmp; +} +inline CJson_ConstObject::const_iterator& +CJson_ConstObject::const_iterator::operator=( + const CJson_ConstObject::const_iterator& vi) { + m_vi = vi.m_vi; return *this; +} +inline CJson_ConstObject::const_iterator& +CJson_ConstObject::const_iterator::operator=( + const CJson_ConstObject::iterator& vi) { + m_vi = vi.m_vi; return *this; +} +inline bool +CJson_ConstObject::const_iterator::operator!=( + const CJson_ConstObject::const_iterator& vi) const { + return m_vi != vi.m_vi; +} +inline bool +CJson_ConstObject::const_iterator::operator==( + const CJson_ConstObject::const_iterator& vi) const { + return m_vi == vi.m_vi; +} +inline bool +CJson_ConstObject::const_iterator::operator!=( + const CJson_ConstObject::iterator& vi) const { + return m_vi != vi.m_vi; +} +inline bool +CJson_ConstObject::const_iterator::operator==( + const CJson_ConstObject::iterator& vi) const { + return m_vi == vi.m_vi; +} + +inline CJson_ConstObject_pair::CJson_ConstObject_pair(void) + : name(0), value(JSONWRAPP_MAKENODE(0)) { +} +inline CJson_ConstObject_pair::CJson_ConstObject_pair( + const CJson_Node::TCharType* _name, const _Impl& _value) + : name(_name), value(JSONWRAPP_MAKENODE(const_cast<_Impl*>(&_value))) { +} +inline CJson_ConstObject_pair::CJson_ConstObject_pair(const CJson_Object_pair& p) + : name(p.name), value(JSONWRAPP_MAKENODE(const_cast<_Impl*>(p.value.m_Impl))) { +} +inline CJson_ConstObject_pair& +CJson_ConstObject_pair::assign( + const CJson_Node::TCharType* _name, const _Impl& _value) { + this->~CJson_ConstObject_pair(); + new (this) CJson_ConstObject_pair(_name, _value); + return *this; +} + +inline const CJson_ConstObject::const_iterator::pair& +CJson_ConstObject::const_iterator::operator*(void) const { + return m_pvi.assign(m_vi->name.GetString(), m_vi->value); +} +inline const CJson_ConstObject::const_iterator::pair* +CJson_ConstObject::const_iterator::operator->(void) const { + return &(m_pvi.assign(m_vi->name.GetString(), m_vi->value)); +} + +inline CJson_ConstObject::const_iterator::const_iterator( + const CJson_ConstObject::_ImplCIterator vi) + : m_vi(const_cast(vi)) { +} +inline CJson_ConstObject::const_iterator::const_iterator( + const CJson_ConstObject::_ImplIterator vi) + : m_vi(vi) { +} + +// -------------------------------------------------------------------------- +// CJson_Object::iterator + +inline CJson_ConstObject::iterator::iterator(void) { +} +inline CJson_ConstObject::iterator::iterator( + const CJson_ConstObject::iterator& i) : m_vi(i.m_vi) { +} +inline CJson_ConstObject::iterator& +CJson_ConstObject::iterator::operator=(const CJson_ConstObject::iterator& vi) { + m_vi = vi.m_vi; return *this; +} +inline bool +CJson_ConstObject::iterator::operator!=( + const CJson_ConstObject::iterator& vi) const { + return m_vi != vi.m_vi; +} +inline bool +CJson_ConstObject::iterator::operator==( + const CJson_ConstObject::iterator& vi) const { + return m_vi == vi.m_vi; +} +inline bool +CJson_ConstObject::iterator::operator!=( + const CJson_ConstObject::const_iterator& vi) const { + return m_vi != vi.m_vi; +} +inline bool +CJson_ConstObject::iterator::operator==( + const CJson_ConstObject::const_iterator& vi) const { + return m_vi == vi.m_vi; +} +inline CJson_ConstObject::iterator& +CJson_ConstObject::iterator::operator++(void) { + ++m_vi; return *this; +} +inline CJson_ConstObject::iterator +CJson_ConstObject::iterator::operator++(int) { + iterator tmp(*this); ++m_vi; return tmp; +} +inline CJson_ConstObject::iterator& +CJson_ConstObject::iterator::operator--(void) { + --m_vi; return *this; +} +inline CJson_ConstObject::iterator +CJson_ConstObject::iterator::operator--(int) { + iterator tmp(*this); --m_vi; return tmp; +} +inline CJson_ConstObject::iterator::pair& +CJson_ConstObject::iterator::operator*(void) const { + return m_pvi.assign(m_vi->name.GetString(), m_vi->value); +} +inline CJson_ConstObject::iterator::pair* +CJson_ConstObject::iterator::operator->(void) const { + return &(m_pvi.assign(m_vi->name.GetString(), m_vi->value)); +} + +inline CJson_Object_pair::CJson_Object_pair(void) + : name(0), value(JSONWRAPP_MAKENODE(0)) { +} +inline CJson_Object_pair::CJson_Object_pair( + const CJson_Node::TCharType* _name, _Impl& _value) + : name(_name), value(JSONWRAPP_MAKENODE(&_value)) { +} +inline CJson_Object_pair& +CJson_Object_pair::assign( + const CJson_Node::TCharType* _name, _Impl& _value) { + this->~CJson_Object_pair(); + new (this) CJson_Object_pair(_name, _value); + return *this; +} + +inline CJson_ConstObject::iterator::iterator( + const CJson_Object::_ImplIterator vi) + : m_vi(vi) { +} + +// -------------------------------------------------------------------------- +// CJson_WalkHandler methods + +inline CJson_WalkHandler::CJson_WalkHandler(void) + : m_in(0) { + m_object_type.push_back(true); m_index.push_back(size_t(-1)); + m_name.push_back(kEmptyStr); +} + +inline bool CJson_WalkHandler::x_Notify(const rapidjson::Value& v) { + bool ret = true; + if (m_object_type.back()) { + ret = BeginObjectMember(m_name[m_name.size()-2], m_name.back()); + if (ret) { + ret = PlainMemberValue( m_name[m_name.size()-2], m_name.back(), + const_cast(&v)); + } + return ret; + } + ret = BeginArrayElement(m_name[m_name.size()-2], m_index.back()); + if (ret) { + PlainElementValue(m_name[m_name.size()-2], m_index.back(), + const_cast(&v)); + } + ++(m_index.back()); + return ret; +} +inline bool CJson_WalkHandler::x_BeginObjectOrArray(bool object_type) { + bool ret = true; + if (m_object_type.size() > 1) { + if (m_object_type.back()) { + ret = BeginObjectMember(m_name[m_name.size()-2], m_name.back()); + } else { + ret = BeginArrayElement(m_name[m_name.size()-2], m_index.back()); + } + } + if (ret) { + m_object_type.push_back(object_type); m_index.push_back(size_t(-1)); + m_name.push_back(kEmptyStr); + } + return ret; +} +inline void CJson_WalkHandler::x_EndObjectOrArray(void) { + m_object_type.pop_back(); m_index.pop_back(); m_name.pop_back(); + if (!m_object_type.back()) { + ++(m_index.back()); + } +} +inline bool CJson_WalkHandler::Null() { + return x_Notify( rapidjson::Value().SetNull()); +} +inline bool CJson_WalkHandler::Bool(bool v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Int(int v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Uint(unsigned v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Int64(int64_t v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Uint64(uint64_t v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Double(double v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::String(const Ch* buf, + rapidjson::SizeType sz, bool) { + rapidjson::Value jv(buf,sz); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Key(const Ch* buf, + rapidjson::SizeType sz, bool c) { + m_name.back().assign(buf, sz); + return true; +} + +inline bool CJson_WalkHandler::StartObject() { + if (!x_BeginObjectOrArray(true)) { + return false; + } + return BeginObject(m_name[m_name.size()-2]); +} +inline bool CJson_WalkHandler::EndObject(rapidjson::SizeType) { + m_name.back().clear(); + bool ret = EndObject(m_name[m_name.size()-2]); + x_EndObjectOrArray(); + return ret; +} +inline bool CJson_WalkHandler::StartArray() { + if (!x_BeginObjectOrArray(false)) { + return false; + } + bool ret = BeginArray(m_name[m_name.size()-2]); + m_index.back() = 0; + return ret; +} +inline bool CJson_WalkHandler::EndArray(rapidjson::SizeType) { + m_index.back() = size_t(-1); + bool ret = EndArray(m_name[m_name.size()-2]); + x_EndObjectOrArray(); + return ret; +} + +inline CJson_Node::TKeyType +CJson_WalkHandler::GetCurrentJPath(void) const { + std::vector::const_iterator t = m_object_type.begin(); + std::vector::const_iterator te = m_object_type.end(); + std::vector::const_iterator i = m_index.begin(); + std::vector::const_iterator n = m_name.begin(); + CJson_Node::TKeyType path; + for ( ++t, ++i, ++n; t != te; ++t, ++i, ++n) { + if (*t) { + path += JSONWRAPP_TO_NCBIUTF8("/"); + path += JSONWRAPP_TO_NCBIUTF8(*n); + } else if (*i != size_t(-1)) { + path += JSONWRAPP_TO_NCBIUTF8("["); + path += JSONWRAPP_TO_NCBIUTF8(ncbi::NStr::NumericToString(*i)); + path += JSONWRAPP_TO_NCBIUTF8("]"); + } + } + return path; +} + +inline bool CJson_WalkHandler::Read(CJson_Document& doc) { + bool b = false; + if (m_in) { + m_in->unget(); + b = doc.Read(*m_in); + m_in->unget(); + } + return b; +} + +// -------------------------------------------------------------------------- +// CJson_Document methods + +inline CJson_Document::CJson_Document( CJson_Value::EJsonType type) { + switch (type) { + default: + case CJson_Node::eObject: m_DocImpl.SetObject(); break; + case CJson_Node::eArray: m_DocImpl.SetArray(); break; + case CJson_Node::eNull: m_DocImpl.SetNull(); break; + case CJson_Node::eBool: m_DocImpl.SetBool(false); break; + case CJson_Node::eNumber: m_DocImpl.SetInt(0); break; + case CJson_Node::eString: m_DocImpl.SetString(kEmptyCStr,0); break; + } + m_Impl = &m_DocImpl; +} +inline CJson_Document::CJson_Document(const CJson_ConstNode::TStringType& v) { + m_DocImpl.Parse >(v.c_str()); + m_Impl = &m_DocImpl; +} +inline CJson_Document::CJson_Document(const CJson_Document& v) { + m_DocImpl.CopyFrom(*v.m_Impl, m_DocImpl.GetAllocator()); + m_Impl = &m_DocImpl; +} +inline CJson_Document& CJson_Document::operator=(const CJson_Document& v) { + m_DocImpl.CopyFrom(*v.m_Impl, m_DocImpl.GetAllocator()); + return *this; +} +inline CJson_Document::CJson_Document(const CJson_ConstNode& v) { + m_DocImpl.CopyFrom(*v.m_Impl, m_DocImpl.GetAllocator()); + m_Impl = &m_DocImpl; +} +inline CJson_Document& CJson_Document::operator=(const CJson_ConstNode& v) { + m_DocImpl.CopyFrom(*v.m_Impl, m_DocImpl.GetAllocator()); + return *this; +} + +inline bool CJson_Document::ParseString(const CJson_ConstNode::TStringType& v) { + m_DocImpl.Parse >(v.c_str()); + return !m_DocImpl.HasParseError(); +} + +inline bool CJson_Document::Read(std::istream& in) { + rapidjson::CppIStream ifs(in); + m_DocImpl.ParseStream(ifs); + return !m_DocImpl.HasParseError(); +} + +inline bool CJson_Document::ReadSucceeded(void) { + return !m_DocImpl.HasParseError(); +} +inline std::string CJson_Document::GetReadError() const { + return rapidjson::GetParseError_En(m_DocImpl.GetParseError()); +} + +inline void CJson_Document::Write(std::ostream& out, + TJson_Write_Flags flags, unsigned int indent_char_count) const { + rapidjson::CppOStream ofs(out); + rapidjson::PrettyWriter writer(ofs); + if (flags & fJson_Write_NoIndentation) { + writer.SetIndent(' ', 0); + } else { + writer.SetIndent( (flags & fJson_Write_IndentWithTab) ? '\t' : ' ', indent_char_count); + } + if (flags & fJson_Write_NoEol) { + writer.SetWriteEol(false); + } + m_DocImpl.Accept(writer); +} + +inline void CJson_Document::Walk(CJson_WalkHandler& walk) const { + walk.x_SetSource(0); + m_DocImpl.Accept(walk); +} + +inline void CJson_Document::Walk(std::istream& in, + CJson_WalkHandler& walk) { + walk.x_SetSource(&in); + rapidjson::CppIStream ifs(in); + rapidjson::Reader rdr; + rdr.Parse(ifs,walk); +} + +END_NCBI_SCOPE + +#endif /* MISC_JSONWRAPP___JSONWRAPP10__HPP */ + + diff --git a/c++/include/misc/jsonwrapp/jsonwrapp11.hpp b/c++/include/misc/jsonwrapp/jsonwrapp11.hpp new file mode 100644 index 00000000..f09cce5f --- /dev/null +++ b/c++/include/misc/jsonwrapp/jsonwrapp11.hpp @@ -0,0 +1,2450 @@ +#ifndef MISC_JSONWRAPP___JSONWRAPP11__HPP +#define MISC_JSONWRAPP___JSONWRAPP11__HPP + +/* $Id: jsonwrapp11.hpp 547979 2017-10-06 16:50:25Z gouriano $ +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Author: Andrei Gourianov +* +* File Description: +* Wrapper API to work with JSON data +* JSON format: http://www.ietf.org/rfc/rfc7159.txt +* JSON pointer: https://tools.ietf.org/html/rfc6901 +* JSON schema: http://json-schema.org/documentation.html +* +* Internally, data of any type is stored in a universal container. +* We define different API classes here for the sake of semantics only. +* +* This implementation uses object adapter pattern, in which adapter contains +* a pointer to an instance of a class it wraps. +* Please note, objects of classes defined here act like pointers, +* ie, creating, or copying them does not create any data, their destruction +* does not destroy any data either. +* +* We prohibit creation of standalone JSON value object; +* only document objects can be created and copied. That is, +* all values are associated with a specific document only. +* When a document is destroyed, all its values are destroyed as well. +* So, to create a value, one should add it into a document (or into JSON +* array or object) and get a proper adapter object. +* +* Classes that store JSON value: +* CJson_ConstNode, CJson_Node -- base class; +* CJson_ConstValue, CJson_Value -- primitive type data +* (string, number, boolean, null); +* CJson_ConstArray, CJson_Array -- JSON array; +* CJson_ConstObject, CJson_Object -- JSON object; +* CJson_Document -- serializable JSON data container - array or object. +* +* Sequential access parsing event listener: +* CJson_WalkHandler -- define your own class derived from this one. +* +* Internally, the wrapper uses RapidJSON library, v1.1.0 +* https://github.com/miloyip/rapidjson +* Please, DO NOT USE RapidJSON directly, as this may change. +*/ + +#include +#include +#include + +#define RAPIDJSON_NOMEMBERITERATORCLASS +#include "rapidjson11/rapidjson.h" +#include "rapidjson11/document.h" +#include "rapidjson11/prettywriter.h" +#include "rapidjson11/filereadstream.h" +#include "rapidjson11/istreamwrapper.h" +#include "rapidjson11/filewritestream.h" +#include "rapidjson11/ostreamwrapper.h" +#include "rapidjson11/error/en.h" +#include "rapidjson11/schema.h" + + +BEGIN_NCBI_SCOPE + +class CJson_Document; +class CJson_ConstValue; +class CJson_Value; +class CJson_ConstArray; +class CJson_Array; +class CJson_ConstObject; +class CJson_Object; + +enum EJson_Write_Flags { + fJson_Write_IndentWithSpace = 0, ///< use space (' ') symbol for indentation + fJson_Write_NoIndentation = (1 << 0), ///< do not use indentation + fJson_Write_NoEol = (1 << 1), ///< do not write end-of-line symbol + fJson_Write_IndentWithTab = (1 << 2) ///< use tab ('\t') symbol for indentation +}; +typedef unsigned int TJson_Write_Flags; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Node +/// +/// Container for JSON value. +/// A JSON value must be an object, array, number, or string, or one of +/// the following three literal names: false null true. +/// The class provides basic access methods only. +/// To get access to value data, one should "get" (cast) it as +/// Value, Array or Object and use an appropriate API. + +class CJson_ConstNode +{ +protected: + typedef rapidjson::Value _Impl; + +public: + typedef char TCharType; + typedef ncbi::CStringUTF8 TStringType; + typedef ncbi::CStringUTF8 TKeyType; + +public: + /// Value type + enum EJsonType { + eNull, ///< null + eBool, ///< bool + eNumber, ///< number + eString, ///< string + eArray, ///< array + eObject ///< object + }; + /// Get value type + EJsonType GetType(void) const; + + bool IsNull( void) const; + bool IsValue( void) const; + bool IsArray( void) const; + bool IsObject( void) const; + + /// Get JSON value contents of the node + CJson_ConstValue GetValue(void) const; + + /// Get JSON array contents of the node + CJson_ConstArray GetArray(void) const; + + /// Get JSON object contents of the node + CJson_ConstObject GetObject(void) const; + + /// Check if there is a node for JSON pointer + /// + bool HasNode(const TKeyType& value) const; + + /// Get node by JSON pointer + /// If node not found, method throws std::out_of_range exception + CJson_ConstNode GetNode(const TKeyType& value) const; + + /// Convert the contents of the node into string + /// + /// @note Method returns JSON text representation of the node. + /// When the node is a string, this differs from the node value, + /// because JSON representation of a string includes quotation marks. + /// To get string value of such node, use GetValue().GetString(). + /// For example: + /// node.ToString() will return "value" + /// node.GetValue().GetString() will return value + std::string ToString(TJson_Write_Flags flags = fJson_Write_IndentWithSpace, + unsigned int indent_char_count = 4) const; + + ~CJson_ConstNode(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstNode(const CJson_ConstNode& n); + /// Note: this does not copy Node data + /// Instead, both Node object will point to the same data + CJson_ConstNode& operator=(const CJson_ConstNode& n); + + bool operator!=(const CJson_ConstNode& n) const; + bool operator==(const CJson_ConstNode& n) const; + +protected: + CJson_ConstNode(void) : m_Impl(0) { + } + CJson_ConstNode(_Impl* impl) : m_Impl(impl) { + } + _Impl* m_Impl; + static _Impl*& x_Impl(CJson_ConstNode& v){ + return v.m_Impl; + } + friend class CJson_Node; + friend class CJson_ConstValue; + friend class CJson_Array; + friend class CJson_ConstArray; + friend class CJson_ConstObject; + friend class CJson_Object; + friend class CJson_WalkHandler; + friend class CJson_Document; + friend class CJson_ConstObject_pair; + friend class CJson_Object_pair; +}; + +///////////////////////////////////////////////////////////////////////////// + +class CJson_Node : virtual public CJson_ConstNode +{ +public: + /// Erase node data and convert it into JSON NULL value + CJson_Node& SetNull(void); + + /// Erase node data and convert it into JSON value + CJson_Value ResetValue(void); + + /// Get JSON value contents of the node + CJson_Value SetValue(void); + + /// Erase node data and convert it into JSON array + CJson_Array ResetArray(void); + + /// Get JSON array contents of the node + CJson_Array SetArray(void); + + /// Erase node data and convert it into JSON object + CJson_Object ResetObject(void); + + /// Get JSON object contents of the node + CJson_Object SetObject(void); + + /// Get node by JSON pointer + /// If node not found, it will be created + CJson_Node SetNode(const TKeyType& value); + + ~CJson_Node(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Node(const CJson_Node& n); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Node& operator=(const CJson_Node& n); + + /// Copy Node contents data into this node + CJson_Node& AssignCopy(const CJson_ConstNode& n); + +protected: + CJson_Node(void) { + } + CJson_Node(_Impl* impl) : CJson_ConstNode(impl) { + m_Impl = impl; + } + friend class CJson_Value; + friend class CJson_Array; + friend class CJson_ConstArray; + friend class CJson_Object; + friend class CJson_ConstObject; + friend class CJson_Document; + friend class CJson_ConstObject_pair; + friend class CJson_Object_pair; +}; + + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Value +/// +/// Standard type JSON value. + +class CJson_ConstValue : virtual public CJson_ConstNode +{ +public: + /// Test if value type is compatible with C++ type + bool IsBool( void) const; + bool IsNumber( void) const; + bool IsInt4( void) const; + bool IsUint4( void) const; + bool IsInt8( void) const; + bool IsUint8( void) const; + bool IsDouble( void) const; + bool IsString( void) const; + + /// Get primitive value data + bool GetBool( void) const; + Int4 GetInt4( void) const; + Uint4 GetUint4( void) const; + Int8 GetInt8( void) const; + Uint8 GetUint8( void) const; + double GetDouble( void) const; + TStringType GetString(void) const; + size_t GetStringLength(void) const; + + ~CJson_ConstValue(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstValue(const CJson_ConstValue& n); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstValue& operator=(const CJson_ConstValue& n); + +protected: + CJson_ConstValue(void) {} + CJson_ConstValue(_Impl* impl) : CJson_ConstNode(impl) { + m_Impl = impl; + } + friend class CJson_ConstNode; + friend class CJson_WalkHandler; +}; + +///////////////////////////////////////////////////////////////////////////// + +class CJson_Value : public CJson_ConstValue, public CJson_Node +{ +public: + /// Set primitive value data + CJson_Value& SetBool( bool value); + CJson_Value& SetInt4( Int4 value); + CJson_Value& SetUint4( Uint4 value); + CJson_Value& SetInt8( Int8 value); + CJson_Value& SetUint8( Uint8 value); + CJson_Value& SetDouble(double value); + CJson_Value& SetString(const TStringType& value); + + ~CJson_Value(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Value(const CJson_Value& n); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Value& operator=(const CJson_Value& n); + +protected: + CJson_Value(void) { + } + CJson_Value( _Impl* impl) : CJson_ConstValue(impl), CJson_Node(impl) { + } + friend class CJson_Node; +}; + + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Array +/// +/// JSON array is an ordered sequence of zero or more values. +/// The class provides API to populate and explore a JSON array + +class CJson_ConstArray : virtual public CJson_ConstNode +{ +protected: + typedef rapidjson::Value::ValueIterator _ImplIterator; + typedef rapidjson::Value::ConstValueIterator _ImplCIterator; + +public: + typedef CJson_ConstNode value_type; + typedef CJson_ConstNode& const_reference; + + /// Random-access iterator to access const JSON array element. + /// It is designed to resemble std::vector::const_iterator class. + class const_iterator { + public: + typedef std::random_access_iterator_tag iterator_category; + typedef CJson_ConstNode value_type; + typedef std::ptrdiff_t difference_type; + typedef std::ptrdiff_t distance_type; + typedef CJson_ConstNode* pointer; + typedef CJson_ConstNode& reference; + + ~const_iterator(void) {} + const_iterator(void); + const_iterator(const const_iterator& vi); + const_iterator& operator=(const const_iterator& vi); + + /// Comparison + bool operator!=(const const_iterator& vi) const; + bool operator==(const const_iterator& vi) const; + bool operator<( const const_iterator& vi) const; + bool operator<=(const const_iterator& vi) const; + bool operator>( const const_iterator& vi) const; + bool operator>=(const const_iterator& vi) const; + + /// Increment and decrement + const_iterator& operator++(void); + const_iterator operator++(int); + const_iterator& operator+=(int); + const_iterator operator+(int) const; + const_iterator& operator--(void); + const_iterator operator--(int); + const_iterator& operator-=(int); + const_iterator operator-(int) const; + + /// Dereference + const CJson_ConstNode& operator*( void) const; + const CJson_ConstNode* operator->(void) const; + + /// Distance + distance_type operator-(const_iterator vi) const; + protected: + const_iterator(const _ImplCIterator vi); + const_iterator(const _ImplIterator vi); + _ImplIterator m_vi; + mutable CJson_Node m_v; + friend class CJson_ConstArray; + friend class CJson_Array; + }; + + /// Random-access iterator to access non-const JSON array element. + /// It is designed to resemble std::vector::iterator class. + class iterator : public const_iterator { + public: + typedef std::random_access_iterator_tag iterator_category; + typedef CJson_Node value_type; + typedef std::ptrdiff_t difference_type; + typedef std::ptrdiff_t distance_type; + typedef CJson_Node* pointer; + typedef CJson_Node& reference; + + ~iterator(void) {} + iterator(void); + iterator(const iterator& i); + iterator& operator=(const iterator& vi); + + /// Increment and decrement + iterator& operator++(void); + iterator operator++(int); + iterator operator+(int) const; + iterator& operator--(void); + iterator operator--(int); + iterator operator-(int) const; + + /// Dereference + CJson_Node& operator*(void) const; + CJson_Node* operator->(void) const; + + /// Distance + distance_type operator-(iterator vi) const; + + private: + iterator(const _ImplIterator vi); + friend class CJson_Array; + }; + +public: + + /// Return the number of elements in the array + size_t size(void) const; + + /// Return the number of elements that the array could contain without + /// allocating more storage. + size_t capacity(void) const; + + /// Test if the array is empty + bool empty(void) const; + + /// Return a reference to the element at a specified location in the array + /// If index is greater than or equal to the size of the array, + /// the function throws std::out_of_range exception + CJson_ConstNode at(size_t index) const; + + /// Return a reference to the element at a specified location in the array + /// If index is greater than or equal to the size of the array, + /// the result is undefined + CJson_ConstNode operator[](size_t index) const; + + /// Return a reference to the first element in the array + /// If the array is empty, the result is undefined + CJson_ConstNode front(void) const; + + /// Return a reference to the last element of the array. + /// If the array is empty, the result is undefined + CJson_ConstNode back(void) const; + + /// Return a random-access iterator to the first element in the array + const_iterator begin(void) const; + + /// Return a random-access iterator that points just beyond the end of + /// the array + const_iterator end(void) const; + + ~CJson_ConstArray(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstArray(const CJson_ConstArray& n); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstArray& operator=(const CJson_ConstArray& n); + +protected: + CJson_ConstArray(void) { + } + CJson_ConstArray(_Impl* impl) : CJson_ConstNode(impl) { + m_Impl = impl; + } + friend class CJson_ConstNode; +}; + +///////////////////////////////////////////////////////////////////////////// + +class CJson_Array : public CJson_ConstArray, public CJson_Node +{ +public: + typedef CJson_Node value_type; + typedef CJson_Node& reference; + typedef CJson_ConstArray::const_iterator const_iterator; + typedef CJson_ConstArray::iterator iterator; + + /// Reserve a minimum length of storage for the array object + void reserve(size_t count); + + /// Erase all elements of the array + void clear(void); + + /// Remove an element + iterator erase(const_iterator _where); + + /// Remove a range of elements + iterator erase(const_iterator _first, const_iterator _last); + + /// Return a reference to the element at a specified location in the array + /// If index is greater than or equal to the size of the array, + /// the function throws std::out_of_range exception + CJson_Node at(size_t index); + + /// Return a reference to the element at a specified location in the array + /// If index is greater than or equal to the size of the array, + /// the result is undefined + CJson_Node operator[](size_t index); + + /// Return a reference to the first element in the array + /// If the array is empty, the result is undefined + CJson_Node front(void); + + /// Return a reference to the last element of the array. + /// If the array is empty, the result is undefined + CJson_Node back(void); + + /// Add null element to the end of the array. + void push_back(void); //null value + + /// Add primitive type element to the end of the array. +#ifndef NCBI_COMPILER_WORKSHOP + template void push_back(const T&); // primitive and string + template void push_back(const T*); +#else + void push_back(const bool& v); + void push_back(const Int4& v); + void push_back(const Uint4& v); + void push_back(const Int8& v); + void push_back(const Uint8& v); + void push_back(const float& v); + void push_back(const double& v); + void push_back(const CJson_Node::TCharType* v); + void push_back(const CJson_Node::TStringType& v); + void push_back(const CJson_ConstNode& v); +#endif + + /// Add array type element to the end of the array. + CJson_Array push_back_array(void); + + /// Add object type element to the end of the array. + CJson_Object push_back_object(void); + + /// Delete the element at the end of the array + void pop_back(void); + + /// Return a random-access iterator to the first element in the array + iterator begin(void) const; + + /// Return a random-access iterator that points just beyond the end of + /// the array + iterator end(void) const; + + ~CJson_Array(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Array(const CJson_Array& n); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Array& operator=(const CJson_Array& n); + +protected: + CJson_Array(void) { + } + CJson_Array(_Impl* impl) : CJson_ConstArray(impl), CJson_Node(impl) { + } + friend class CJson_Node; + friend class CJson_Object; + template class CProhibited {}; +}; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_ConstObject value type - [name,value] pair. + +class CJson_Object_pair; +class CJson_ConstObject_pair { +protected: + typedef rapidjson::Value _Impl; +public: + const CJson_Node::TCharType* name; + const CJson_ConstNode value; + + ~CJson_ConstObject_pair(void) {} + CJson_ConstObject_pair(void); + CJson_ConstObject_pair(const CJson_Node::TCharType* _name, const _Impl& _value); + CJson_ConstObject_pair(const CJson_Object_pair& p); + CJson_ConstObject_pair& assign( + const CJson_Node::TCharType* _name, const _Impl& _value); +}; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Object value type - [name,value] pair. + +class CJson_Object_pair { +protected: + typedef rapidjson::Value _Impl; +public: + const CJson_Node::TCharType* name; + CJson_Node value; + + ~CJson_Object_pair(void) {} + CJson_Object_pair(void); + CJson_Object_pair(const CJson_Node::TCharType* _name, _Impl& _value); + CJson_Object_pair& assign(const CJson_Node::TCharType* _name, _Impl& _value); +}; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Object +/// +/// A JSON object is an unordered collection of name/value pairs. +/// The class provides API to populate and explore a JSON object + +class CJson_ConstObject : virtual public CJson_ConstNode +{ +protected: + typedef rapidjson::Value::MemberIterator _ImplIterator; + typedef rapidjson::Value::ConstMemberIterator _ImplCIterator; + +public: + typedef CJson_ConstObject_pair value_type; + class iterator; + + /// Bidirectional iterator to access const JSON object element. + /// It is designed to resemble std::map::const_iterator class. + /// Dereferencing the iterator returns [name,value] pair. + class const_iterator { + public: + typedef CJson_ConstObject_pair pair; + + typedef std::bidirectional_iterator_tag iterator_category; + typedef CJson_ConstObject::const_iterator::pair value_type; + typedef std::ptrdiff_t difference_type; + typedef std::ptrdiff_t distance_type; + typedef CJson_ConstObject::const_iterator::pair* pointer; + typedef CJson_ConstObject::const_iterator::pair& reference; + + ~const_iterator(void) {} + const_iterator(void); + const_iterator(const const_iterator& vi); + const_iterator& operator=(const const_iterator& vi); + const_iterator(const iterator& vi); + const_iterator& operator=(const iterator& vi); + + /// Comparison + bool operator!=(const const_iterator& vi) const; + bool operator==(const const_iterator& vi) const; + bool operator!=(const iterator& vi) const; + bool operator==(const iterator& vi) const; + + /// Increment and decrement + const_iterator& operator++(void); + const_iterator operator++(int); + const_iterator& operator--(void); + const_iterator operator--(int); + + /// Dereference + const pair& operator*(void) const; + const pair* operator->(void) const; + + protected: + const_iterator(const _ImplCIterator vi); + const_iterator(const _ImplIterator vi); + _ImplIterator m_vi; + mutable pair m_pvi; + friend class CJson_ConstObject; + friend class CJson_Object; + friend class iterator; + }; + + /// Bidirectional iterator to access non-const JSON object element. + /// It is designed to resemble std::map::iterator class. + /// Dereferencing the iterator returns [name,value] pair. + class iterator { + public: + typedef CJson_Object_pair pair; + + typedef std::bidirectional_iterator_tag iterator_category; + typedef CJson_ConstObject::iterator::pair value_type; + typedef std::ptrdiff_t difference_type; + typedef std::ptrdiff_t distance_type; + typedef CJson_ConstObject::iterator::pair* pointer; + typedef CJson_ConstObject::iterator::pair& reference; + + ~iterator(void) {} + iterator(void); + iterator(const iterator& i); + iterator& operator=(const iterator& vi); + + /// Comparison + bool operator!=(const iterator& vi) const; + bool operator==(const iterator& vi) const; + bool operator!=(const const_iterator& vi) const; + bool operator==(const const_iterator& vi) const; + + /// Increment and decrement + iterator& operator++(void); + iterator operator++(int); + iterator& operator--(void); + iterator operator--(int); + + /// Dereference + pair& operator*(void) const; + pair* operator->(void) const; + + private: + iterator(const _ImplIterator vi); + _ImplIterator m_vi; + mutable pair m_pvi; + friend class CJson_ConstObject; + friend class CJson_Object; + friend class const_iterator; + }; + + /// Return the number of elements in the object + size_t size(void) const; + + /// Test if the object is empty + bool empty(void) const; + + /// Access an element with a given name. + /// If such element was not found, the function throws std::out_of_range exception + CJson_ConstNode at(const CJson_Node::TKeyType& name) const; + + /// Access an element with a given name. + /// If such element does not exist in this object, the result is undefined. + CJson_ConstNode operator[](const CJson_Node::TKeyType& name) const; + + /// Return an iterator that points to the first element in the object + const_iterator begin(void) const; + + /// Return an iterator that points to the location after the last element. + const_iterator end(void) const; + + /// Return an iterator that points to the location of the element. + const_iterator find(const CJson_Node::TKeyType& name) const; + + /// Test if an element with this name exists in the object + bool has(const CJson_Node::TKeyType& name) const; + + ~CJson_ConstObject(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstObject(const CJson_ConstObject& v); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_ConstObject& operator=(const CJson_ConstObject& v); + +protected: + CJson_ConstObject(void) { + } + CJson_ConstObject( _Impl* impl) : CJson_ConstNode(impl) { + m_Impl = impl; + } + friend class CJson_ConstNode; +}; + +///////////////////////////////////////////////////////////////////////////// + +class CJson_Object : public CJson_ConstObject, public CJson_Node +{ +public: + typedef CJson_ConstObject::const_iterator const_iterator; + typedef CJson_ConstObject::iterator iterator; + typedef CJson_Object_pair value_type; + + /// Erase all elements of the object + void clear(void); + + /// Remove an element with a given name from the object + /// Returns the number of elements that have been removed + size_t erase(const CJson_Node::TKeyType& name); + + /// Remove an element + iterator erase(const_iterator _where); + + /// Remove a range of elements + iterator erase(const_iterator _first, const_iterator _last); + + /// Access an element with a given name. + /// If such element was not found, the function throws std::out_of_range exception + CJson_Node at(const CJson_Node::TKeyType& name); + + /// Access an element with a given name. + /// If such element does not exist in this object, it will be added. + CJson_Node operator[](const CJson_Node::TKeyType& name); + + /// Insert null element into the object + void insert(const CJson_Node::TKeyType& name); + + /// Insert primitive type element into the object + template void insert(const CJson_Node::TKeyType& name, const T&); + template void insert(const CJson_Node::TKeyType& name, const T*); + +#ifdef NCBI_COMPILER_WORKSHOP + void insert(const CJson_Node::TKeyType& name, + const CJson_Node::TStringType& value); +#endif + + /// Insert array type element into the object + CJson_Array insert_array( const CJson_Node::TKeyType& name); + + /// Insert object type element into the object + CJson_Object insert_object(const CJson_Node::TKeyType& name); + + /// Return an iterator that points to the first element in the object + iterator begin(void) const; + + /// Return an iterator that points to the location after the last element. + iterator end(void) const; + + /// Return an iterator that points to the location of the element. + iterator find(const CJson_Node::TKeyType& name) const; + + ~CJson_Object(void) {} + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Object(const CJson_Object& v); + /// Note: this does not copy Node data + /// Instead, both Node objects will point to the same data + CJson_Object& operator=(const CJson_Object& v); + +protected: + CJson_Object(void) { + } + CJson_Object(_Impl* impl) : CJson_ConstObject(impl), CJson_Node(impl) { + } + friend class CJson_Node; + friend class CJson_Array; + template class CProhibited {}; +}; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_WalkHandler +/// +/// Sequential access parsing event listener. +/// Provides a mechanism for reading data from a JSON file or document +/// as a series of events. + +class CJson_WalkHandler : public rapidjson::BaseReaderHandler<> +{ +public: + CJson_WalkHandler(void); + virtual ~CJson_WalkHandler(void) {} + + /// Begin reading object contents + /// + /// @param name + /// Name of this object in the parent object, or empty string + /// if this object has no parent. + virtual bool BeginObject(const CJson_Node::TKeyType& /*name*/) { + return true; + } + + /// Begin reading object member + /// + /// Right after this event, there can be one of the following only: + /// EndObject, BeginObject, BeginArray, or PlainMemberValue. + /// + /// @param name + /// Name of this object in the parent object, or empty string + /// if this object has no parent. + /// @param member + /// Member name + virtual bool BeginObjectMember(const CJson_Node::TKeyType& /*name*/, + const CJson_Node::TKeyType& /*member*/) { + return true; + } + + /// Primitive type data has been read + /// + /// @param name + /// Name of this object in the parent object, or empty string + /// if this object has no parent. + /// @param member + /// Member name + /// @param value + /// JSON value + virtual bool PlainMemberValue(const CJson_Node::TKeyType& /*name*/, + const CJson_Node::TKeyType& /*member*/, + const CJson_ConstValue& /*value*/) { + return true; + } + + /// End reading object contents + /// + /// @param name + /// Name of this object in the parent object, or empty string + /// if this object has no parent. + virtual bool EndObject(const CJson_Node::TKeyType& /*name*/) { + return true; + } + + + /// Begin reading array contents + /// + /// @param name + /// Name of this array in the parent object, or empty string + /// if this array has no parent. + virtual bool BeginArray(const CJson_Node::TKeyType& /*name*/) { + return true; + } + + /// Begin reading array element + /// + /// Right after this event, there can be one of the following only: + /// EndArray, BeginObject, BeginArray, or PlainElementValue. + /// + /// @param name + /// Name of this array in the parent object, or empty string + /// if this array has no parent. + /// @param index + /// Index of the array element + virtual bool BeginArrayElement(const CJson_Node::TKeyType& /*name*/, + size_t /*index*/) { + return true; + } + + /// Primitive type data has been read + /// + /// @param name + /// Name of this array in the parent object, or empty string + /// if this array has no parent. + /// @param member + /// Index of the array element + /// @param value + /// JSON value + virtual bool PlainElementValue(const CJson_Node::TKeyType& /*name*/, + size_t /*index*/, + const CJson_ConstValue& /*value*/) { + return true; + } + + /// End reading array contents + /// + /// @param name + /// Name of this array in the parent object, or empty string + /// if this array has no parent. + virtual bool EndArray(const CJson_Node::TKeyType& /*name*/) { + return true; + } + + /// Return current stack path as string + /// For example: "/root/obj2/arr[3]" + CJson_Node::TKeyType GetCurrentJPath(void) const; + + /// Return current stack path as JSON pointer + /// For example: "/root/obj2/arr/3" + CJson_Node::TKeyType GetCurrentJPointer(void) const; + + /// Convert data, starting at the current parsing position, into + /// a document object. + /// This method may be called from BeginObject or BeginArray only. + bool Read(CJson_Document& doc); + +private: + bool x_Notify(const rapidjson::Value& v); + bool x_BeginObjectOrArray(bool object_type); + void x_EndObjectOrArray(void); + +public: + // The following functions are named this way because rapidjson requires so + bool Null(); + bool Bool(bool v); + bool Int(int v); + bool Uint(unsigned v); + bool Int64(int64_t v); + bool Uint64(uint64_t v); + bool Double(double v); + bool String(const Ch* buf, rapidjson::SizeType sz, bool c); + bool Key( const Ch* buf, rapidjson::SizeType sz, bool c); + bool StartObject(); + bool EndObject(rapidjson::SizeType sz); + bool StartArray(); + bool EndArray(rapidjson::SizeType sz); + +private: + void x_SetSource(std::istream* in) {m_in=in;} + std::istream* m_in; // Input stream + std::vector m_object_type; // Object (true), or array (false) + std::vector m_index; // array element index + std::vector m_name; // object member name + + friend class CJson_Document; +}; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Document +/// +/// Serializable, copyable container for JSON data. +class CJson_Schema; + +class CJson_Document : public CJson_Node +{ + typedef rapidjson::Document _DocImpl; + +public: + CJson_Document(CJson_Node::EJsonType type = CJson_Node::eObject); + /// Create document by parsing UTF8 string + CJson_Document(const TStringType& v); + /// Copy another document contents into this document + CJson_Document(const CJson_Document& v); + /// Copy another document contents into this document + CJson_Document& operator=(const CJson_Document& v); + /// Copy another Node contents into this document + CJson_Document(const CJson_ConstNode& v); + /// Copy another Node contents into this document + CJson_Document& operator=(const CJson_ConstNode& v); + + ~CJson_Document(void) { + } + + /// Read JSON data from a UTF8 string + bool ParseString(const TStringType& v); + + /// Read JSON data from a stream + bool Read(std::istream& in); + + /// Read JSON data from a file + bool Read(const std::string& filename) { + std::ifstream in(filename.c_str()); + return Read(in); + } + + /// Read JSON data from a stream, validating against schema + bool Read(std::istream& in, CJson_Schema& schema); + + /// Read JSON data from a file, validating against schema + bool Read(const std::string& filename, CJson_Schema& schema) { + std::ifstream in(filename.c_str()); + return Read(in, schema); + } + + /// Test if the most recent read was successful + bool ReadSucceeded(void); + + /// Get most recent read error + std::string GetReadError(void) const; + + /// Write JSON data into a stream + bool Write(std::ostream& out, TJson_Write_Flags flags = fJson_Write_IndentWithSpace, + unsigned int indent_char_count = 4) const; + + /// Write JSON data into a file + bool Write(const std::string& filename, TJson_Write_Flags flags = fJson_Write_IndentWithSpace, + unsigned int indent_char_count = 4) const { + std::ofstream out(filename.c_str()); + return Write(out, flags, indent_char_count); + } + + /// Write JSON data into a stream, validating against schema + bool Write(std::ostream& out, CJson_Schema& schema, + TJson_Write_Flags flags = fJson_Write_IndentWithSpace, + unsigned int indent_char_count = 4) const; + + /// Write JSON data into a file, validating against schema + bool Write(const std::string& filename, CJson_Schema& schema, + TJson_Write_Flags flags = fJson_Write_IndentWithSpace, + unsigned int indent_char_count = 4) const { + std::ofstream out(filename.c_str()); + return Write(out, schema, flags, indent_char_count); + } + + /// Traverse the document contents + void Walk(CJson_WalkHandler& walk) const; + + /// Traverse the JSON data stream contents + static void Walk(std::istream& in, CJson_WalkHandler& walk); + + +private: + _DocImpl m_DocImpl; + friend class CJson_Schema; +}; + +///////////////////////////////////////////////////////////////////////////// +/// +/// CJson_Schema +/// + +class CJson_Schema +{ +public: + /// Construct JSON schema from JSON document + CJson_Schema(const CJson_Document& schema); + + /// Validate JSON document against schema + bool Validate(const CJson_Document& v); + /// Validate JSON data from a stream + bool Validate(std::istream& in); + /// Validate JSON data from a file + bool Validate(const std::string& filename) { + std::ifstream in(filename.c_str()); + return Validate(in); + } + + /// Return result of the most recent validation + bool IsValid(void) const; + + /// Return name of the property which does not conform to schema + std::string GetInvalidValueProperty(void) const; + /// Return nonconforming value URI in the document + std::string GetInvalidValueDocumentUri(void) const; + /// Return nonconforming value URI in schema + std::string GetInvalidValueSchemaUri(void) const; + /// Return validation error + std::string GetValidationError() const; + +private: + rapidjson::SchemaDocument m_SchemaDocument; + rapidjson::SchemaValidator m_SchemaValidator; + friend class CJson_Document; +}; + + +///////////////////////////////////////////////////////////////////////////// + +/// Extraction operator for JSON document +inline std::istream& operator>>(std::istream& is, CJson_Document& d) { + if (!d.Read(is)) { + is.setstate(std::ios::failbit); + } + return is; +} + +/// Insertion operator for JSON document +inline std::ostream& operator<<(std::ostream& os, const CJson_Document& d) +{ + d.Write(os); + return os; +} + +/// Insertion operator for JSON node +inline std::ostream& operator<<(std::ostream& os, const CJson_ConstNode& v) +{ + return operator<<(os, CJson_Document(v)); +} + + + +///////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////// +// inline implementations + +#if defined(NCBI_COMPILER_GCC) && !defined(__clang__) +#if (NCBI_COMPILER_VERSION == 442) || (NCBI_COMPILER_VERSION == 443) +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif +#endif + +#define JSONWRAPP_TO_NCBIUTF8(v) (v) + +// workarounds to make it compile +#if defined(NCBI_COMPILER_GCC) && (NCBI_COMPILER_VERSION < 442) && !defined(__clang__) +# define JSONWRAPP_MAKENODE(v) CJson_Node(v) +#else +# define JSONWRAPP_MAKENODE(v) (v) +#endif + +// -------------------------------------------------------------------------- +// CJson_Node methods + +inline CJson_ConstNode::CJson_ConstNode(const CJson_ConstNode& n) + : m_Impl(n.m_Impl) { +} +inline CJson_ConstNode& +CJson_ConstNode::operator=(const CJson_ConstNode& n) { + m_Impl = n.m_Impl; return *this; +} +inline bool +CJson_ConstNode::operator==(const CJson_ConstNode& n) const +{ + return m_Impl->operator==(*n.m_Impl); +} +inline bool +CJson_ConstNode::operator!=(const CJson_ConstNode& n) const +{ + return m_Impl->operator!=(*n.m_Impl); +} + +inline CJson_Node::CJson_Node(const CJson_Node& n) + : CJson_ConstNode(n) { +} +inline CJson_Node& +CJson_Node::operator=(const CJson_Node& n) { + CJson_ConstNode::operator=(n); return *this; +} +inline CJson_Node& +CJson_Node::AssignCopy(const CJson_ConstNode& n) { + m_Impl->CopyFrom( *n.m_Impl, *m_Impl->GetValueAllocator()); + return *this; +} + + +inline CJson_Node::EJsonType +CJson_ConstNode::GetType(void) const { + switch (m_Impl->GetType()) { + default: + case rapidjson::kNullType: break; + case rapidjson::kFalseType: + case rapidjson::kTrueType: return eBool; + case rapidjson::kObjectType: return eObject; + case rapidjson::kArrayType: return eArray; + case rapidjson::kStringType: return eString; + case rapidjson::kNumberType: return eNumber; + } + return eNull; +} + +inline bool CJson_ConstNode::IsNull(void) const { + return m_Impl->IsNull(); +} +inline bool CJson_ConstNode::IsValue(void) const { + return !IsObject() && !IsArray(); +} +inline bool CJson_ConstNode::IsArray(void) const { + return m_Impl->IsArray(); +} +inline bool CJson_ConstNode::IsObject(void) const { + return m_Impl->IsObject(); +} +inline bool CJson_ConstNode::HasNode(const TKeyType& value) const { + return rapidjson::Pointer(value.c_str()).Get(*m_Impl) != nullptr; +} +inline CJson_ConstNode CJson_ConstNode::GetNode(const TKeyType& value) const { + _Impl *v = rapidjson::Pointer(value.c_str()).Get(*m_Impl); + if (!v) { + throw std::out_of_range("node not found"); + } + return CJson_ConstNode(v); +} + +inline CJson_Node CJson_Node::SetNode(const TKeyType& value) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + return CJson_Node(&rapidjson::Pointer(value.c_str()).Create(*m_Impl, *a)); +} + +inline CJson_Node& CJson_Node::SetNull(void) { + m_Impl->SetNull( ); return *this; +} + +inline CJson_Value CJson_Node::ResetValue(void) { + m_Impl->SetNull(); + return CJson_Value(m_Impl); +} +inline CJson_Value CJson_Node::SetValue(void) { + _ASSERT(IsValue()); + return CJson_Value(m_Impl); +} +inline CJson_ConstValue CJson_ConstNode::GetValue(void) const { + _ASSERT(IsValue()); + return CJson_ConstValue(m_Impl); +} +inline CJson_Array CJson_Node::ResetArray(void) { + m_Impl->SetArray(); + return CJson_Array(m_Impl); +} +inline CJson_Array CJson_Node::SetArray(void) { + _ASSERT(IsArray()); + return CJson_Array(m_Impl); +} +inline CJson_ConstArray CJson_ConstNode::GetArray(void) const { + _ASSERT(IsArray()); + return CJson_ConstArray(m_Impl); +} +inline CJson_Object CJson_Node::ResetObject(void) { + m_Impl->SetObject(); + return CJson_Object(m_Impl); +} +inline CJson_Object CJson_Node::SetObject(void) { + _ASSERT(IsObject()); + return CJson_Object(m_Impl); +} +inline CJson_ConstObject CJson_ConstNode::GetObject(void) const { + _ASSERT(IsObject()); + return CJson_ConstObject(m_Impl); +} +inline std::string +CJson_ConstNode::ToString(TJson_Write_Flags flags, unsigned int indent_char_count) const { + ncbi::CNcbiOstrstream os; + rapidjson::OStreamWrapper ofs(os); + rapidjson::PrettyWriter writer(ofs); + if (flags & fJson_Write_NoIndentation) { + writer.SetIndent(' ', 0); + } else { + writer.SetIndent( (flags & fJson_Write_IndentWithTab) ? '\t' : ' ', indent_char_count); + } + if (flags & fJson_Write_NoEol) { + writer.SetWriteEol(false); + } + m_Impl->Accept(writer); + return std::string( ncbi::CNcbiOstrstreamToString(os) ); +} + +// -------------------------------------------------------------------------- +// CJson_Value methods + +inline CJson_ConstValue::CJson_ConstValue( const CJson_ConstValue& n) + : CJson_ConstNode(n) { +} +inline CJson_ConstValue& +CJson_ConstValue::operator=(const CJson_ConstValue& n) { + CJson_ConstNode::operator=(n); return *this; +} +inline CJson_Value::CJson_Value( const CJson_Value& n) + : CJson_ConstNode(), CJson_ConstValue(n), CJson_Node(n) { +} +inline CJson_Value& +CJson_Value::operator=(const CJson_Value& n) { + CJson_Node::operator=(n); return *this; +} + +inline bool CJson_ConstValue::IsBool(void) const { + return m_Impl->IsBool(); +} +inline bool CJson_ConstValue::IsNumber(void) const { + return m_Impl->IsNumber(); +} +inline bool CJson_ConstValue::IsInt4(void) const { + return m_Impl->IsInt(); +} +inline bool CJson_ConstValue::IsUint4(void) const { + return m_Impl->IsUint(); +} +inline bool CJson_ConstValue::IsInt8(void) const { + return m_Impl->IsInt64(); +} +inline bool CJson_ConstValue::IsUint8(void) const { + return m_Impl->IsUint64(); +} +inline bool CJson_ConstValue::IsDouble(void) const { + return m_Impl->IsDouble(); +} +inline bool CJson_ConstValue::IsString(void) const { + return m_Impl->IsString(); +} + +inline bool CJson_ConstValue::GetBool(void) const { + return m_Impl->GetBool(); +} +inline Int4 CJson_ConstValue::GetInt4(void) const { + return m_Impl->GetInt(); +} +inline Uint4 CJson_ConstValue::GetUint4(void) const { + return m_Impl->GetUint(); +} +inline Int8 CJson_ConstValue::GetInt8(void) const { + return m_Impl->GetInt64(); +} +inline Uint8 CJson_ConstValue::GetUint8(void) const { + return m_Impl->GetUint64(); +} +inline double CJson_ConstValue::GetDouble(void) const { + return m_Impl->GetDouble(); +} +inline CJson_Node::TStringType +CJson_ConstValue::GetString(void) const { + return JSONWRAPP_TO_NCBIUTF8(m_Impl->GetString()); +} +inline size_t CJson_ConstValue::GetStringLength(void) const { + return m_Impl->GetStringLength(); +} + +inline CJson_Value& CJson_Value::SetBool(bool value) { + m_Impl->SetBool( value); return *this; +} +inline CJson_Value& CJson_Value::SetInt4(Int4 value) { + m_Impl->SetInt( value); return *this; +} +inline CJson_Value& CJson_Value::SetUint4(Uint4 value) { + m_Impl->SetUint( value); return *this; +} +inline CJson_Value& CJson_Value::SetInt8(Int8 value) { + m_Impl->SetInt64( value); return *this; +} +inline CJson_Value& CJson_Value::SetUint8(Uint8 value) { + m_Impl->SetUint64(value); return *this; +} +inline CJson_Value& CJson_Value::SetDouble(double value) { + m_Impl->SetDouble(value); return *this; +} +inline CJson_Value& CJson_Value::SetString(const CJson_Node::TStringType& value) { + m_Impl->SetString(value.c_str(), *(m_Impl->GetValueAllocator())); return *this; +} + +// -------------------------------------------------------------------------- +// CJson_Array methods +inline CJson_ConstArray::CJson_ConstArray( const CJson_ConstArray& n) + : CJson_ConstNode(n) { +} +inline CJson_ConstArray& +CJson_ConstArray::operator=(const CJson_ConstArray& n) { + CJson_ConstNode::operator=(n); return *this; +} +inline CJson_Array::CJson_Array( const CJson_Array& n) + : CJson_ConstNode(), CJson_ConstArray(n), CJson_Node(n) { +} +inline CJson_Array& +CJson_Array::operator=(const CJson_Array& n) { + CJson_Node::operator=(n); return *this; +} +inline void CJson_Array::reserve(size_t count) { + m_Impl->Reserve(rapidjson::SizeType(count), *(m_Impl->GetValueAllocator())); +} +inline void CJson_Array::clear(void) { + m_Impl->Clear(); +} +inline CJson_Array::iterator CJson_Array::erase(CJson_ConstArray::const_iterator _where) { + return CJson_Array::iterator( m_Impl->Erase( _where.m_vi)); +} +inline CJson_Array::iterator +CJson_Array::erase(CJson_ConstArray::const_iterator _first, CJson_ConstArray::const_iterator _last) { + return CJson_Array::iterator( m_Impl->Erase( _first.m_vi, _last.m_vi)); +} +inline size_t CJson_ConstArray::size(void) const { + return m_Impl->Size(); +} +inline size_t CJson_ConstArray::capacity(void) const { + return m_Impl->Capacity(); +} +inline bool CJson_ConstArray::empty(void) const { + return m_Impl->Empty(); +} +inline CJson_ConstNode CJson_ConstArray::at(size_t index) const { + if (index >= size()) { + throw std::out_of_range("array index out of range"); + } + return operator[](index); +} +inline CJson_Node CJson_Array::at(size_t index) { + if (index >= size()) { + throw std::out_of_range("array index out of range"); + } + return operator[](index); +} +inline CJson_ConstNode CJson_ConstArray::operator[](size_t index) const { + return CJson_ConstNode(&(m_Impl->operator[](rapidjson::SizeType(index)))); +} +inline CJson_Node CJson_Array::operator[](size_t index) { + return CJson_Node(&(m_Impl->operator[](rapidjson::SizeType(index)))); +} +inline CJson_ConstNode CJson_ConstArray::front(void) const { + return operator[](0); +} +inline CJson_Node CJson_Array::front(void) { + return operator[](0); +} +inline CJson_ConstNode CJson_ConstArray::back(void) const { + return operator[](size()-1); +} +inline CJson_Node CJson_Array::back(void) { + return operator[](size()-1); +} + +// Implicit conversions are prohibited +#ifndef NCBI_COMPILER_WORKSHOP +// this may fail to compile +//template void CJson_Array::push_back(T) =delete; +// this will compile: +template inline void CJson_Array::push_back(const T&) { + CProhibited::Implicit_conversions_are_prohibited(); +} +template inline void CJson_Array::push_back(const T*) { + CProhibited::Implicit_conversions_are_prohibited(); +} +#define JSW_EMPTY_TEMPLATE template<> +#else +#define JSW_EMPTY_TEMPLATE +#endif +inline void CJson_Array::push_back(void) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const bool& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetBool(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const Int4& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetInt(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const Uint4& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetUint(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const Int8& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetInt64(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const Uint8& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetUint64(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const float& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetDouble(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const double& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetDouble(v).SetValueAllocator(a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back( + const CJson_Node::TCharType* v) { + rapidjson::Value sv(v, *(m_Impl->GetValueAllocator())); + m_Impl->PushBack( sv, *(m_Impl->GetValueAllocator())); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back( + const CJson_Node::TStringType& value) { + push_back(value.c_str()); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_ConstNode& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetValueAllocator(a).CopyFrom( *v.m_Impl, *a), *a); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_Node& v) { + push_back(v); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_ConstArray& v) { + push_back(v); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_Array& v) { + push_back(v); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_ConstObject& v) { + push_back(v); +} +JSW_EMPTY_TEMPLATE inline void CJson_Array::push_back(const CJson_Object& v) { + push_back(v); +} +#undef JSW_EMPTY_TEMPLATE + +inline CJson_Array CJson_Array::push_back_array(void) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetArray().SetValueAllocator(a), *a); + return back().SetArray(); +} + +inline CJson_Object CJson_Array::push_back_object(void) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + m_Impl->PushBack( rapidjson::Value().SetObject().SetValueAllocator(a), *a); + return back().SetObject(); +} + +inline void CJson_Array::pop_back(void) { + m_Impl->PopBack(); +} + +inline CJson_ConstArray::const_iterator +CJson_ConstArray::begin(void) const { + return const_iterator(m_Impl->Begin()); +} +inline CJson_ConstArray::const_iterator +CJson_ConstArray::end(void) const { + return const_iterator(m_Impl->End()); +} +inline CJson_Array::iterator CJson_Array::begin(void) const { + return iterator(m_Impl->Begin()); +} +inline CJson_Array::iterator CJson_Array::end(void) const { + return iterator(m_Impl->End()); +} + +// -------------------------------------------------------------------------- +// CJson_Array::const_iterator + +inline CJson_ConstArray::const_iterator::const_iterator(void) + : m_vi(0), m_v(JSONWRAPP_MAKENODE(0)) { +} +inline CJson_ConstArray::const_iterator::const_iterator( + const CJson_ConstArray::const_iterator& i) : m_vi(i.m_vi), m_v(i.m_v) { +} +inline CJson_ConstArray::const_iterator& +CJson_ConstArray::const_iterator::operator=( + const CJson_ConstArray::const_iterator& vi) { + m_vi = vi.m_vi; return *this; +} +inline bool +CJson_ConstArray::const_iterator::operator!=( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi != vi.m_vi; +} +inline bool +CJson_ConstArray::const_iterator::operator==( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi == vi.m_vi; +} +inline bool +CJson_ConstArray::const_iterator::operator<( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi < vi.m_vi; +} +inline bool +CJson_ConstArray::const_iterator::operator<=( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi <= vi.m_vi; +} +inline bool +CJson_ConstArray::const_iterator::operator>( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi > vi.m_vi; +} +inline bool +CJson_ConstArray::const_iterator::operator>=( + const CJson_ConstArray::const_iterator& vi) const { + return m_vi >= vi.m_vi; +} +inline CJson_ConstArray::const_iterator& +CJson_ConstArray::const_iterator::operator++(void) { + ++m_vi; return *this; +} +inline CJson_ConstArray::const_iterator +CJson_ConstArray::const_iterator::operator++(int) { + const_iterator tmp(*this); ++m_vi; return tmp; +} +inline CJson_ConstArray::const_iterator& +CJson_ConstArray::const_iterator::operator+=(int i) { + m_vi += i; return *this; +} +inline CJson_ConstArray::const_iterator +CJson_ConstArray::const_iterator::operator+(int i) const { + return const_iterator(m_vi + i); +} +inline CJson_ConstArray::const_iterator& +CJson_ConstArray::const_iterator::operator--(void) { + --m_vi; return *this; +} +inline CJson_ConstArray::const_iterator +CJson_ConstArray::const_iterator::operator--(int) { + const_iterator tmp(*this); --m_vi; return tmp; +} +inline CJson_ConstArray::const_iterator& +CJson_ConstArray::const_iterator::operator-=(int i) { + m_vi -= i; return *this; +} +inline CJson_ConstArray::const_iterator +CJson_ConstArray::const_iterator::operator-(int i) const { + return const_iterator(m_vi - i); +} +inline const CJson_ConstNode& +CJson_ConstArray::const_iterator::operator*(void) const { + x_Impl(m_v) = m_vi; return m_v; +} +inline const CJson_ConstNode* +CJson_Array::const_iterator::operator->(void) const { + x_Impl(m_v) = m_vi; return &m_v; +} +inline CJson_ConstArray::const_iterator::distance_type +CJson_ConstArray::const_iterator::operator-(CJson_ConstArray::const_iterator vi) const { + return m_vi - vi.m_vi; +} + +inline CJson_ConstArray::const_iterator::const_iterator( + const CJson_ConstArray::_ImplCIterator vi) + : m_vi(const_cast(vi)), + m_v(JSONWRAPP_MAKENODE(0)) { +} +inline CJson_ConstArray::const_iterator::const_iterator( + const CJson_ConstArray::_ImplIterator vi) + : m_vi(vi), m_v(JSONWRAPP_MAKENODE(0)) { +} +// -------------------------------------------------------------------------- +// CJson_Array::iterator + +inline CJson_ConstArray::iterator::iterator(void) { +} +inline CJson_ConstArray::iterator::iterator(const CJson_Array::iterator& i) + : const_iterator(i) { +} +inline CJson_ConstArray::iterator& +CJson_ConstArray::iterator::operator=(const CJson_Array::iterator& vi) { + const_iterator::operator=(vi); return *this; +} +inline CJson_ConstArray::iterator +CJson_Array::iterator::operator+(int i) const { + return iterator(m_vi + i); +} +inline CJson_ConstArray::iterator +CJson_Array::iterator::operator-(int i) const { + return iterator(m_vi - i); +} +inline CJson_ConstArray::iterator& +CJson_ConstArray::iterator::operator++(void) { + ++m_vi; return *this; +} +inline CJson_ConstArray::iterator +CJson_ConstArray::iterator::operator++(int) { + iterator tmp(*this); ++m_vi; return tmp; +} +inline CJson_ConstArray::iterator& +CJson_ConstArray::iterator::operator--(void) { + --m_vi; return *this; +} +inline CJson_ConstArray::iterator +CJson_ConstArray::iterator::operator--(int) { + iterator tmp(*this); --m_vi; return tmp; +} +inline CJson_Node& +CJson_ConstArray::iterator::operator*(void) const { + x_Impl(m_v) = m_vi; return m_v; +} +inline CJson_Node* +CJson_ConstArray::iterator::operator->(void) const { + x_Impl(m_v) = m_vi; return &m_v; +} +inline CJson_ConstArray::iterator::distance_type +CJson_ConstArray::iterator::operator-(CJson_ConstArray::iterator vi) const { + return m_vi - vi.m_vi; +} +inline CJson_ConstArray::iterator::iterator( + const CJson_ConstArray::_ImplIterator vi) : const_iterator(vi) { +} + +// -------------------------------------------------------------------------- +// CJson_Object methods + +inline CJson_ConstObject::CJson_ConstObject( const CJson_ConstObject& n) + : CJson_ConstNode(n) { +} +inline CJson_ConstObject& +CJson_ConstObject::operator=(const CJson_ConstObject& n) { + CJson_ConstNode::operator=(n); return *this; +} +inline CJson_Object::CJson_Object( const CJson_Object& n) + : CJson_ConstNode(), CJson_ConstObject(n), CJson_Node(n) { +} +inline CJson_Object& +CJson_Object::operator=(const CJson_Object& n) { + CJson_Node::operator=(n); return *this; +} +inline void CJson_Object::clear(void) { + m_Impl->RemoveAllMembers(); +} +inline size_t CJson_Object::erase(const CJson_Node::TKeyType& name) { + return m_Impl->RemoveMember(name.c_str()) ? 1 : 0; +} +inline CJson_Object::iterator CJson_Object::erase(CJson_ConstObject::const_iterator _where) { + return CJson_Object::iterator( m_Impl->EraseMember( _where.m_vi)); +} +inline CJson_Object::iterator +CJson_Object::erase(CJson_ConstObject::const_iterator _first, CJson_ConstObject::const_iterator _last) { + return CJson_Object::iterator( m_Impl->EraseMember( _first.m_vi, _last.m_vi)); +} +inline size_t CJson_ConstObject::size(void) const { + return m_Impl->MemberCount(); +} +inline bool CJson_ConstObject::empty(void) const { + return m_Impl->ObjectEmpty(); +} +inline CJson_ConstNode +CJson_ConstObject::at(const CJson_Node::TKeyType& name) const { + if (!has(name)) { + throw std::out_of_range(name + " object member not found"); + } + return CJson_ConstNode(&(m_Impl->operator[](name.c_str()))); +} +inline CJson_Node +CJson_Object::at(const CJson_Node::TKeyType& name) { + if (!has(name)) { + throw std::out_of_range(name + " object member not found"); + } + return CJson_Object(&(m_Impl->operator[](name.c_str()))); +} +inline CJson_ConstNode +CJson_ConstObject::operator[](const CJson_Node::TKeyType& name) const { + return CJson_ConstNode(&(m_Impl->operator[](name.c_str()))); +} +inline CJson_Node +CJson_Object::operator[](const CJson_Node::TKeyType& name) { + if (!has(name)) { + insert(name); + } + return CJson_Node(&(m_Impl->operator[](name.c_str()))); +} + +inline void CJson_Object::insert(const CJson_Node::TKeyType& name) { + rapidjson::Value::AllocatorType& a = *(m_Impl->GetValueAllocator()); + rapidjson::Value sv_name(name.c_str(), a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetValueAllocator(&a), a); +} +// Implicit conversions are prohibited +// this may fail to compile +//template void CJson_Object::insert(const std::string& , T) =delete; +// this will compile: +template inline void CJson_Object::insert( + const CJson_Node::TKeyType& , const T&) { + CProhibited::Implicit_conversions_are_prohibited(); +} +template inline void CJson_Object::insert( + const CJson_Node::TKeyType& , const T*) { + CProhibited::Implicit_conversions_are_prohibited(); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const bool& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetBool(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const Int4& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetInt(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const Uint4& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetUint(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const Int8& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetInt64(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const Uint8& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetUint64(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const float& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetDouble(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const double& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetDouble(v).SetValueAllocator(a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, + const CJson_Node::TCharType* value) { + rapidjson::Value::AllocatorType& a = *(m_Impl->GetValueAllocator()); + rapidjson::Value sv_name(name.c_str(), a); + rapidjson::Value sv_value(value, a); + m_Impl->AddMember( sv_name, sv_value, a); +} +#ifndef NCBI_COMPILER_WORKSHOP +template<> +#endif +inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, + const CJson_Node::TStringType& value) { + insert(name, value.c_str()); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_ConstNode& v) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetValueAllocator(a).CopyFrom( *v.m_Impl, *a), *a); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_Node& v) { + insert(name, v); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_ConstArray& v) { + insert(name, v); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_Array& v) { + insert(name, v); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_ConstObject& v) { + insert(name, v); +} +template<> inline void +CJson_Object::insert(const CJson_Node::TKeyType& name, const CJson_Object& v) { + insert(name, v); +} +inline CJson_Array +CJson_Object::insert_array(const CJson_Node::TKeyType& name) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetArray().SetValueAllocator(a), *a); + return operator[](name).SetArray(); +} +inline CJson_Object +CJson_Object::insert_object(const CJson_Node::TKeyType& name) { + rapidjson::Value::AllocatorType* a = m_Impl->GetValueAllocator(); + rapidjson::Value sv_name(name.c_str(), *a); + m_Impl->AddMember( sv_name, rapidjson::Value().SetObject().SetValueAllocator(a), *a); + return operator[](name).SetObject(); +} +inline bool +CJson_ConstObject::has(const CJson_Node::TKeyType& name) const { + return m_Impl->HasMember(name.c_str()); +} + + +inline CJson_ConstObject::const_iterator +CJson_ConstObject::begin(void) const { + return const_iterator(m_Impl->MemberBegin()); +} +inline CJson_ConstObject::const_iterator +CJson_ConstObject::end(void) const { + return const_iterator(m_Impl->MemberEnd()); +} +inline CJson_ConstObject::const_iterator +CJson_ConstObject::find(const CJson_Node::TKeyType& name) const { + _ImplCIterator m = m_Impl->FindMember(name.c_str()); + return m ? const_iterator(m) : end(); +} + +inline CJson_Object::iterator +CJson_Object::begin(void) const { + return iterator(m_Impl->MemberBegin()); +} +inline CJson_Object::iterator +CJson_Object::end(void) const { + return iterator(m_Impl->MemberEnd()); +} +inline CJson_Object::iterator +CJson_Object::find(const CJson_Node::TKeyType& name) const { + _ImplIterator m = m_Impl->FindMember(name.c_str()); + return m ? iterator(m) : end(); +} + + +// -------------------------------------------------------------------------- +// CJson_Object::const_iterator + +inline CJson_ConstObject::const_iterator::const_iterator(void) + : m_vi(0) { +} +inline CJson_ConstObject::const_iterator::const_iterator( + const CJson_ConstObject::const_iterator& i) + : m_vi(i.m_vi) { +} +inline CJson_ConstObject::const_iterator::const_iterator( + const CJson_ConstObject::iterator& i) + : m_vi(i.m_vi) { +} +inline CJson_ConstObject::const_iterator& +CJson_ConstObject::const_iterator::operator++(void) { + ++m_vi; return *this; +} +inline CJson_ConstObject::const_iterator +CJson_ConstObject::const_iterator::operator++(int) { + const_iterator tmp(*this); ++m_vi; return tmp; +} +inline CJson_ConstObject::const_iterator& +CJson_ConstObject::const_iterator::operator--(void) { + --m_vi; return *this; +} +inline CJson_ConstObject::const_iterator +CJson_ConstObject::const_iterator::operator--(int) { + const_iterator tmp(*this); --m_vi; return tmp; +} +inline CJson_ConstObject::const_iterator& +CJson_ConstObject::const_iterator::operator=( + const CJson_ConstObject::const_iterator& vi) { + m_vi = vi.m_vi; return *this; +} +inline CJson_ConstObject::const_iterator& +CJson_ConstObject::const_iterator::operator=( + const CJson_ConstObject::iterator& vi) { + m_vi = vi.m_vi; return *this; +} +inline bool +CJson_ConstObject::const_iterator::operator!=( + const CJson_ConstObject::const_iterator& vi) const { + return m_vi != vi.m_vi; +} +inline bool +CJson_ConstObject::const_iterator::operator==( + const CJson_ConstObject::const_iterator& vi) const { + return m_vi == vi.m_vi; +} +inline bool +CJson_ConstObject::const_iterator::operator!=( + const CJson_ConstObject::iterator& vi) const { + return m_vi != vi.m_vi; +} +inline bool +CJson_ConstObject::const_iterator::operator==( + const CJson_ConstObject::iterator& vi) const { + return m_vi == vi.m_vi; +} + +inline CJson_ConstObject_pair::CJson_ConstObject_pair(void) + : name(0), value(JSONWRAPP_MAKENODE(0)) { +} +inline CJson_ConstObject_pair::CJson_ConstObject_pair( + const CJson_Node::TCharType* _name, const _Impl& _value) + : name(_name), value(JSONWRAPP_MAKENODE(const_cast<_Impl*>(&_value))) { +} +inline CJson_ConstObject_pair::CJson_ConstObject_pair(const CJson_Object_pair& p) + : name(p.name), value(JSONWRAPP_MAKENODE(const_cast<_Impl*>(p.value.m_Impl))) { +} +inline CJson_ConstObject_pair& +CJson_ConstObject_pair::assign( + const CJson_Node::TCharType* _name, const _Impl& _value) { + this->~CJson_ConstObject_pair(); + new (this) CJson_ConstObject_pair(_name, _value); + return *this; +} + +inline const CJson_ConstObject::const_iterator::pair& +CJson_ConstObject::const_iterator::operator*(void) const { + return m_pvi.assign(m_vi->name.GetString(), m_vi->value); +} +inline const CJson_ConstObject::const_iterator::pair* +CJson_ConstObject::const_iterator::operator->(void) const { + return &(m_pvi.assign(m_vi->name.GetString(), m_vi->value)); +} + +inline CJson_ConstObject::const_iterator::const_iterator( + const CJson_ConstObject::_ImplCIterator vi) + : m_vi(const_cast(vi)) { +} +inline CJson_ConstObject::const_iterator::const_iterator( + const CJson_ConstObject::_ImplIterator vi) + : m_vi(vi) { +} + +// -------------------------------------------------------------------------- +// CJson_Object::iterator + +inline CJson_ConstObject::iterator::iterator(void) { +} +inline CJson_ConstObject::iterator::iterator( + const CJson_ConstObject::iterator& i) : m_vi(i.m_vi) { +} +inline CJson_ConstObject::iterator& +CJson_ConstObject::iterator::operator=(const CJson_ConstObject::iterator& vi) { + m_vi = vi.m_vi; return *this; +} +inline bool +CJson_ConstObject::iterator::operator!=( + const CJson_ConstObject::iterator& vi) const { + return m_vi != vi.m_vi; +} +inline bool +CJson_ConstObject::iterator::operator==( + const CJson_ConstObject::iterator& vi) const { + return m_vi == vi.m_vi; +} +inline bool +CJson_ConstObject::iterator::operator!=( + const CJson_ConstObject::const_iterator& vi) const { + return m_vi != vi.m_vi; +} +inline bool +CJson_ConstObject::iterator::operator==( + const CJson_ConstObject::const_iterator& vi) const { + return m_vi == vi.m_vi; +} +inline CJson_ConstObject::iterator& +CJson_ConstObject::iterator::operator++(void) { + ++m_vi; return *this; +} +inline CJson_ConstObject::iterator +CJson_ConstObject::iterator::operator++(int) { + iterator tmp(*this); ++m_vi; return tmp; +} +inline CJson_ConstObject::iterator& +CJson_ConstObject::iterator::operator--(void) { + --m_vi; return *this; +} +inline CJson_ConstObject::iterator +CJson_ConstObject::iterator::operator--(int) { + iterator tmp(*this); --m_vi; return tmp; +} +inline CJson_ConstObject::iterator::pair& +CJson_ConstObject::iterator::operator*(void) const { + return m_pvi.assign(m_vi->name.GetString(), m_vi->value); +} +inline CJson_ConstObject::iterator::pair* +CJson_ConstObject::iterator::operator->(void) const { + return &(m_pvi.assign(m_vi->name.GetString(), m_vi->value)); +} + +inline CJson_Object_pair::CJson_Object_pair(void) + : name(0), value(JSONWRAPP_MAKENODE(0)) { +} +inline CJson_Object_pair::CJson_Object_pair( + const CJson_Node::TCharType* _name, _Impl& _value) + : name(_name), value(JSONWRAPP_MAKENODE(&_value)) { +} +inline CJson_Object_pair& +CJson_Object_pair::assign( + const CJson_Node::TCharType* _name, _Impl& _value) { + this->~CJson_Object_pair(); + new (this) CJson_Object_pair(_name, _value); + return *this; +} + +inline CJson_ConstObject::iterator::iterator( + const CJson_Object::_ImplIterator vi) + : m_vi(vi) { +} + +// -------------------------------------------------------------------------- +// CJson_WalkHandler methods + +inline CJson_WalkHandler::CJson_WalkHandler(void) + : m_in(0) { + m_object_type.push_back(true); m_index.push_back(size_t(-1)); + m_name.push_back(kEmptyStr); +} + +inline bool CJson_WalkHandler::x_Notify(const rapidjson::Value& v) { + bool ret = true; + if (m_object_type.back()) { + ret = BeginObjectMember(m_name[m_name.size()-2], m_name.back()); + if (ret) { + ret = PlainMemberValue( m_name[m_name.size()-2], m_name.back(), + const_cast(&v)); + } + return ret; + } + ret = BeginArrayElement(m_name[m_name.size()-2], m_index.back()); + if (ret) { + PlainElementValue(m_name[m_name.size()-2], m_index.back(), + const_cast(&v)); + } + ++(m_index.back()); + return ret; +} +inline bool CJson_WalkHandler::x_BeginObjectOrArray(bool object_type) { + bool ret = true; + if (m_object_type.size() > 1) { + if (m_object_type.back()) { + ret = BeginObjectMember(m_name[m_name.size()-2], m_name.back()); + } else { + ret = BeginArrayElement(m_name[m_name.size()-2], m_index.back()); + } + } + if (ret) { + m_object_type.push_back(object_type); m_index.push_back(size_t(-1)); + m_name.push_back(kEmptyStr); + } + return ret; +} +inline void CJson_WalkHandler::x_EndObjectOrArray(void) { + m_object_type.pop_back(); m_index.pop_back(); m_name.pop_back(); + if (!m_object_type.back()) { + ++(m_index.back()); + } +} +inline bool CJson_WalkHandler::Null() { + return x_Notify( rapidjson::Value().SetNull()); +} +inline bool CJson_WalkHandler::Bool(bool v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Int(int v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Uint(unsigned v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Int64(int64_t v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Uint64(uint64_t v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Double(double v) { + rapidjson::Value jv(v); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::String(const Ch* buf, + rapidjson::SizeType sz, bool) { + rapidjson::Value jv(buf,sz); + return x_Notify(jv); +} +inline bool CJson_WalkHandler::Key(const Ch* buf, + rapidjson::SizeType sz, bool /*c*/) { + m_name.back().assign(buf, sz); + return true; +} + +inline bool CJson_WalkHandler::StartObject() { + if (!x_BeginObjectOrArray(true)) { + return false; + } + return BeginObject(m_name[m_name.size()-2]); +} +inline bool CJson_WalkHandler::EndObject(rapidjson::SizeType) { + m_name.back().clear(); + bool ret = EndObject(m_name[m_name.size()-2]); + x_EndObjectOrArray(); + return ret; +} +inline bool CJson_WalkHandler::StartArray() { + if (!x_BeginObjectOrArray(false)) { + return false; + } + bool ret = BeginArray(m_name[m_name.size()-2]); + m_index.back() = 0; + return ret; +} +inline bool CJson_WalkHandler::EndArray(rapidjson::SizeType) { + m_index.back() = size_t(-1); + bool ret = EndArray(m_name[m_name.size()-2]); + x_EndObjectOrArray(); + return ret; +} + +inline CJson_Node::TKeyType +CJson_WalkHandler::GetCurrentJPath(void) const { + std::vector::const_iterator t = m_object_type.begin(); + std::vector::const_iterator te = m_object_type.end(); + std::vector::const_iterator i = m_index.begin(); + std::vector::const_iterator n = m_name.begin(); + CJson_Node::TKeyType path; + for ( ++t, ++i, ++n; t != te; ++t, ++i, ++n) { + if (*t) { + path += JSONWRAPP_TO_NCBIUTF8("/"); + path += JSONWRAPP_TO_NCBIUTF8(*n); + } else if (*i != size_t(-1)) { + path += JSONWRAPP_TO_NCBIUTF8("["); + path += JSONWRAPP_TO_NCBIUTF8(ncbi::NStr::NumericToString(*i)); + path += JSONWRAPP_TO_NCBIUTF8("]"); + } + } + return path; +} + +inline CJson_Node::TKeyType CJson_WalkHandler::GetCurrentJPointer(void) const { + std::vector::const_iterator t = m_object_type.begin(); + std::vector::const_iterator te = m_object_type.end(); + std::vector::const_iterator i = m_index.begin(); + std::vector::const_iterator n = m_name.begin(); + CJson_Node::TKeyType path; + for ( ++t, ++i, ++n; t != te; ++t, ++i, ++n) { + path += JSONWRAPP_TO_NCBIUTF8("/"); + if (*t) { + path += JSONWRAPP_TO_NCBIUTF8(*n); + } else if (*i != size_t(-1)) { + path += JSONWRAPP_TO_NCBIUTF8(ncbi::NStr::NumericToString(*i)); + } + } + return path; +} + +inline bool CJson_WalkHandler::Read(CJson_Document& doc) { + bool b = false; + if (m_in) { + m_in->unget(); + b = doc.Read(*m_in); + m_in->unget(); + } + return b; +} + +// -------------------------------------------------------------------------- +// CJson_Document methods + +inline CJson_Document::CJson_Document( CJson_Value::EJsonType type) { + switch (type) { + default: + case CJson_Node::eObject: m_DocImpl.SetObject(); break; + case CJson_Node::eArray: m_DocImpl.SetArray(); break; + case CJson_Node::eNull: m_DocImpl.SetNull(); break; + case CJson_Node::eBool: m_DocImpl.SetBool(false); break; + case CJson_Node::eNumber: m_DocImpl.SetInt(0); break; + case CJson_Node::eString: m_DocImpl.SetString(kEmptyCStr,0); break; + } + m_Impl = &m_DocImpl; +} +inline CJson_Document::CJson_Document(const CJson_ConstNode::TStringType& v) { + m_DocImpl.Parse >(v.c_str()); + m_Impl = &m_DocImpl; +} +inline CJson_Document::CJson_Document(const CJson_Document& v) + : CJson_ConstNode(), CJson_Node() +{ + m_DocImpl.CopyFrom(*v.m_Impl, m_DocImpl.GetAllocator()); + m_Impl = &m_DocImpl; +} +inline CJson_Document& CJson_Document::operator=(const CJson_Document& v) { + m_DocImpl.CopyFrom(*v.m_Impl, m_DocImpl.GetAllocator()); + return *this; +} +inline CJson_Document::CJson_Document(const CJson_ConstNode& v) { + m_DocImpl.CopyFrom(*v.m_Impl, m_DocImpl.GetAllocator()); + m_Impl = &m_DocImpl; +} +inline CJson_Document& CJson_Document::operator=(const CJson_ConstNode& v) { + m_DocImpl.CopyFrom(*v.m_Impl, m_DocImpl.GetAllocator()); + return *this; +} + +inline bool CJson_Document::ParseString(const CJson_ConstNode::TStringType& v) { + m_DocImpl.Parse >(v.c_str()); + return !m_DocImpl.HasParseError(); +} + +inline bool CJson_Document::Read(std::istream& in) { + rapidjson::IStreamWrapper ifs(in); + m_DocImpl.ParseStream(ifs); + return !m_DocImpl.HasParseError(); +} + +inline bool CJson_Document::Read(std::istream& in, CJson_Schema& schema) { + rapidjson::IStreamWrapper ifs(in); + rapidjson::SchemaValidatingReader< + rapidjson::kParseStopWhenDoneFlag, + rapidjson::IStreamWrapper, + rapidjson::UTF8<> > rdr(ifs, schema.m_SchemaDocument); + m_DocImpl.Populate(rdr); + m_DocImpl.SetParseResult(rdr.GetParseResult()); + schema.m_SchemaValidator.SetValidationError(rdr); + if (m_DocImpl.HasParseError()) { + m_DocImpl.SetNull(); + } + return !m_DocImpl.HasParseError(); +} + +inline bool CJson_Document::ReadSucceeded(void) { + return !m_DocImpl.HasParseError(); +} +inline std::string CJson_Document::GetReadError() const { + return rapidjson::GetParseError_En(m_DocImpl.GetParseError()); +} + +inline bool CJson_Document::Write(std::ostream& out, + TJson_Write_Flags flags, unsigned int indent_char_count) const { + rapidjson::OStreamWrapper ofs(out); + rapidjson::PrettyWriter writer(ofs); + if (flags & fJson_Write_NoIndentation) { + writer.SetIndent(' ', 0); + } else { + writer.SetIndent( (flags & fJson_Write_IndentWithTab) ? '\t' : ' ', indent_char_count); + } + if (flags & fJson_Write_NoEol) { + writer.SetWriteEol(false); + } + return m_DocImpl.Accept(writer); +} + +inline bool CJson_Document::Write(std::ostream& out, CJson_Schema& schema, + TJson_Write_Flags flags, unsigned int indent_char_count) const +{ + rapidjson::OStreamWrapper ofs(out); + rapidjson::PrettyWriter writer(ofs); + if (flags & fJson_Write_NoIndentation) { + writer.SetIndent(' ', 0); + } else { + writer.SetIndent( (flags & fJson_Write_IndentWithTab) ? '\t' : ' ', indent_char_count); + } + if (flags & fJson_Write_NoEol) { + writer.SetWriteEol(false); + } + rapidjson::GenericSchemaValidator< rapidjson::SchemaDocument, + rapidjson::PrettyWriter > validator(schema.m_SchemaDocument, writer); + bool res = m_DocImpl.Accept(validator); + schema.m_SchemaValidator.SetValidationError(validator); + return res; +} + +inline void CJson_Document::Walk(CJson_WalkHandler& walk) const { + walk.x_SetSource(0); + m_DocImpl.Accept(walk); +} + +inline void CJson_Document::Walk(std::istream& in, + CJson_WalkHandler& walk) { + walk.x_SetSource(&in); + rapidjson::IStreamWrapper ifs(in); + rapidjson::Reader rdr; + rdr.Parse(ifs,walk); +} + +// -------------------------------------------------------------------------- +// CJson_Schema methods + + +inline CJson_Schema::CJson_Schema(const CJson_Document& schema) + : m_SchemaDocument(schema.m_DocImpl), + m_SchemaValidator( m_SchemaDocument) +{ +} + +inline bool CJson_Schema::Validate(const CJson_Document& v) { + m_SchemaValidator.Reset(); + return v.m_DocImpl.Accept(m_SchemaValidator); +} +inline bool CJson_Schema::Validate(std::istream& in) { + m_SchemaValidator.Reset(); + rapidjson::IStreamWrapper ifs(in); + rapidjson::Reader rdr; + return rdr.Parse(ifs, m_SchemaValidator); +} + +inline bool CJson_Schema::IsValid(void) const { + return m_SchemaValidator.IsValid(); +} +inline std::string CJson_Schema::GetInvalidValueProperty(void) const { + return m_SchemaValidator.GetInvalidSchemaKeyword(); +} +inline std::string CJson_Schema::GetInvalidValueDocumentUri(void) const { + rapidjson::StringBuffer sb; + m_SchemaValidator.GetInvalidDocumentPointer().StringifyUriFragment(sb); + return sb.GetString(); +} +inline std::string CJson_Schema::GetInvalidValueSchemaUri(void) const { + rapidjson::StringBuffer sb; + m_SchemaValidator.GetInvalidSchemaPointer().StringifyUriFragment(sb); + return sb.GetString(); +} +inline std::string CJson_Schema::GetValidationError() const { + std::string res; + if (!IsValid()) { + res = "Invalid property \'" + GetInvalidValueProperty() + + "\' of value \'" + GetInvalidValueDocumentUri() + + "\', see schema at \'" + GetInvalidValueSchemaUri() + + "\'"; + } + return res; +} + +END_NCBI_SCOPE + +#endif /* MISC_JSONWRAPP___JSONWRAPP11__HPP */ + + diff --git a/c++/include/misc/jsonwrapp/rapidjson10/__NCBI_read_me.txt b/c++/include/misc/jsonwrapp/rapidjson10/__NCBI_read_me.txt new file mode 100644 index 00000000..95323025 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/__NCBI_read_me.txt @@ -0,0 +1,20 @@ + +Please, DO NOT USE rapidjson directly. +Use jsonwrapp instead. + + + +Source: +https://github.com/miloyip/rapidjson +release v1.0.2 + + +NCBI modifications: + +Search for 'NCBI' in the following files: +rapidjson.h +document.h +filereadstream.h +filewritestream.h +prettywriter.h + diff --git a/c++/include/misc/jsonwrapp/rapidjson10/allocators.h b/c++/include/misc/jsonwrapp/rapidjson10/allocators.h new file mode 100644 index 00000000..d74a6715 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/allocators.h @@ -0,0 +1,261 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ALLOCATORS_H_ +#define RAPIDJSON_ALLOCATORS_H_ + +#include "rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Allocator + +/*! \class rapidjson::Allocator + \brief Concept for allocating, resizing and freeing memory block. + + Note that Malloc() and Realloc() are non-static but Free() is static. + + So if an allocator need to support Free(), it needs to put its pointer in + the header of memory block. + +\code +concept Allocator { + static const bool kNeedFree; //!< Whether this allocator needs to call Free(). + + // Allocate a memory block. + // \param size of the memory block in bytes. + // \returns pointer to the memory block. + void* Malloc(size_t size); + + // Resize a memory block. + // \param originalPtr The pointer to current memory block. Null pointer is permitted. + // \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.) + // \param newSize the new size in bytes. + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize); + + // Free a memory block. + // \param pointer to the memory block. Null pointer is permitted. + static void Free(void *ptr); +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// CrtAllocator + +//! C-runtime library allocator. +/*! This class is just wrapper for standard C library memory routines. + \note implements Allocator concept +*/ +class CrtAllocator { +public: + static const bool kNeedFree = true; + void* Malloc(size_t size) { + if (size) // behavior of malloc(0) is implementation defined. + return std::malloc(size); + else + return NULL; // standardize to returning NULL. + } + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { + (void)originalSize; + if (newSize == 0) { + std::free(originalPtr); + return NULL; + } + return std::realloc(originalPtr, newSize); + } + static void Free(void *ptr) { std::free(ptr); } +}; + +/////////////////////////////////////////////////////////////////////////////// +// MemoryPoolAllocator + +//! Default memory allocator used by the parser and DOM. +/*! This allocator allocate memory blocks from pre-allocated memory chunks. + + It does not free memory blocks. And Realloc() only allocate new memory. + + The memory chunks are allocated by BaseAllocator, which is CrtAllocator by default. + + User may also supply a buffer as the first chunk. + + If the user-buffer is full then additional chunks are allocated by BaseAllocator. + + The user-buffer is not deallocated by this allocator. + + \tparam BaseAllocator the allocator type for allocating memory chunks. Default is CrtAllocator. + \note implements Allocator concept +*/ +template +class MemoryPoolAllocator { +public: + static const bool kNeedFree = false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator) + + //! Constructor with chunkSize. + /*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. + \param baseAllocator The allocator for allocating memory chunks. + */ + MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : + chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0) + { + } + + //! Constructor with user-supplied buffer. + /*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size. + + The user buffer will not be deallocated when this allocator is destructed. + + \param buffer User supplied buffer. + \param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader). + \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. + \param baseAllocator The allocator for allocating memory chunks. + */ + MemoryPoolAllocator(void *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : + chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0) + { + RAPIDJSON_ASSERT(buffer != 0); + RAPIDJSON_ASSERT(size > sizeof(ChunkHeader)); + chunkHead_ = reinterpret_cast(buffer); + chunkHead_->capacity = size - sizeof(ChunkHeader); + chunkHead_->size = 0; + chunkHead_->next = 0; + } + + //! Destructor. + /*! This deallocates all memory chunks, excluding the user-supplied buffer. + */ + ~MemoryPoolAllocator() { + Clear(); + RAPIDJSON_DELETE(ownBaseAllocator_); + } + + //! Deallocates all memory chunks, excluding the user-supplied buffer. + void Clear() { + while (chunkHead_ && chunkHead_ != userBuffer_) { + ChunkHeader* next = chunkHead_->next; + baseAllocator_->Free(chunkHead_); + chunkHead_ = next; + } + if (chunkHead_ && chunkHead_ == userBuffer_) + chunkHead_->size = 0; // Clear user buffer + } + + //! Computes the total capacity of allocated memory chunks. + /*! \return total capacity in bytes. + */ + size_t Capacity() const { + size_t capacity = 0; + for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) + capacity += c->capacity; + return capacity; + } + + //! Computes the memory blocks allocated. + /*! \return total used bytes. + */ + size_t Size() const { + size_t size = 0; + for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) + size += c->size; + return size; + } + + //! Allocates a memory block. (concept Allocator) + void* Malloc(size_t size) { + if (!size) + return NULL; + + size = RAPIDJSON_ALIGN(size); + if (chunkHead_ == 0 || chunkHead_->size + size > chunkHead_->capacity) + AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size); + + void *buffer = reinterpret_cast(chunkHead_ + 1) + chunkHead_->size; + chunkHead_->size += size; + return buffer; + } + + //! Resizes a memory block (concept Allocator) + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { + if (originalPtr == 0) + return Malloc(newSize); + + if (newSize == 0) + return NULL; + + // Do not shrink if new size is smaller than original + if (originalSize >= newSize) + return originalPtr; + + // Simply expand it if it is the last allocation and there is sufficient space + if (originalPtr == (char *)(chunkHead_ + 1) + chunkHead_->size - originalSize) { + size_t increment = static_cast(newSize - originalSize); + increment = RAPIDJSON_ALIGN(increment); + if (chunkHead_->size + increment <= chunkHead_->capacity) { + chunkHead_->size += increment; + return originalPtr; + } + } + + // Realloc process: allocate and copy memory, do not free original buffer. + void* newBuffer = Malloc(newSize); + RAPIDJSON_ASSERT(newBuffer != 0); // Do not handle out-of-memory explicitly. + if (originalSize) + std::memcpy(newBuffer, originalPtr, originalSize); + return newBuffer; + } + + //! Frees a memory block (concept Allocator) + static void Free(void *ptr) { (void)ptr; } // Do nothing + +private: + //! Copy constructor is not permitted. + MemoryPoolAllocator(const MemoryPoolAllocator& rhs) /* = delete */; + //! Copy assignment operator is not permitted. + MemoryPoolAllocator& operator=(const MemoryPoolAllocator& rhs) /* = delete */; + + //! Creates a new chunk. + /*! \param capacity Capacity of the chunk in bytes. + */ + void AddChunk(size_t capacity) { + if (!baseAllocator_) + ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator()); + ChunkHeader* chunk = reinterpret_cast(baseAllocator_->Malloc(sizeof(ChunkHeader) + capacity)); + chunk->capacity = capacity; + chunk->size = 0; + chunk->next = chunkHead_; + chunkHead_ = chunk; + } + + static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity. + + //! Chunk header for perpending to each chunk. + /*! Chunks are stored as a singly linked list. + */ + struct ChunkHeader { + size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself). + size_t size; //!< Current size of allocated memory in bytes. + ChunkHeader *next; //!< Next chunk in the linked list. + }; + + ChunkHeader *chunkHead_; //!< Head of the chunk linked-list. Only the head chunk serves allocation. + size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated. + void *userBuffer_; //!< User supplied buffer. + BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks. + BaseAllocator* ownBaseAllocator_; //!< base allocator created by this object. +}; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_ENCODINGS_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/document.h b/c++/include/misc/jsonwrapp/rapidjson10/document.h new file mode 100644 index 00000000..bbce30e7 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/document.h @@ -0,0 +1,2112 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_DOCUMENT_H_ +#define RAPIDJSON_DOCUMENT_H_ + +/*! \file document.h */ + +#include "reader.h" +#include "internal/meta.h" +#include "internal/strfunc.h" +#include // placement new + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +#elif defined(__GNUC__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +// NCBI +#define RAPIDJSON_NCBI_NOOPTIMIZE __attribute__((optimize("O0"))) +#endif + +#ifndef RAPIDJSON_NCBI_NOOPTIMIZE +#define RAPIDJSON_NCBI_NOOPTIMIZE +#endif +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_HAS_STDSTRING + +#ifndef RAPIDJSON_HAS_STDSTRING +#ifdef RAPIDJSON_DOXYGEN_RUNNING +#define RAPIDJSON_HAS_STDSTRING 1 // force generation of documentation +#else +#define RAPIDJSON_HAS_STDSTRING 0 // no std::string support by default +#endif +/*! \def RAPIDJSON_HAS_STDSTRING + \ingroup RAPIDJSON_CONFIG + \brief Enable RapidJSON support for \c std::string + + By defining this preprocessor symbol to \c 1, several convenience functions for using + \ref rapidjson::GenericValue with \c std::string are enabled, especially + for construction and comparison. + + \hideinitializer +*/ +#endif // !defined(RAPIDJSON_HAS_STDSTRING) + +#if RAPIDJSON_HAS_STDSTRING +#include +#endif // RAPIDJSON_HAS_STDSTRING + +#ifndef RAPIDJSON_NOMEMBERITERATORCLASS +#include // std::iterator, std::random_access_iterator_tag +#endif + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS +#include // std::move +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +// Forward declaration. +template +class GenericValue; + +//! Name-value pair in a JSON object value. +/*! + This class was internal to GenericValue. It used to be a inner struct. + But a compiler (IBM XL C/C++ for AIX) have reported to have problem with that so it moved as a namespace scope struct. + https://code.google.com/p/rapidjson/issues/detail?id=64 +*/ +template +struct GenericMember { +//NCBI: added assignment operator; Clang 3.2 wanted it + GenericMember& operator= (GenericMember& rhs) { + name = rhs.name; + value = rhs.value; + return *this; + } + + GenericValue name; //!< name of member (must be a string) + GenericValue value; //!< value of member. +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericMemberIterator + +#ifndef RAPIDJSON_NOMEMBERITERATORCLASS + +//! (Constant) member iterator for a JSON object value +/*! + \tparam Const Is this a constant iterator? + \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document) + \tparam Allocator Allocator type for allocating memory of object, array and string. + + This class implements a Random Access Iterator for GenericMember elements + of a GenericValue, see ISO/IEC 14882:2003(E) C++ standard, 24.1 [lib.iterator.requirements]. + + \note This iterator implementation is mainly intended to avoid implicit + conversions from iterator values to \c NULL, + e.g. from GenericValue::FindMember. + + \note Define \c RAPIDJSON_NOMEMBERITERATORCLASS to fall back to a + pointer-based implementation, if your platform doesn't provide + the C++ header. + + \see GenericMember, GenericValue::MemberIterator, GenericValue::ConstMemberIterator + */ +template +class GenericMemberIterator + : public std::iterator >::Type> { + + friend class GenericValue; + template friend class GenericMemberIterator; + + typedef GenericMember PlainType; + typedef typename internal::MaybeAddConst::Type ValueType; + typedef std::iterator BaseType; + +public: + //! Iterator type itself + typedef GenericMemberIterator Iterator; + //! Constant iterator type + typedef GenericMemberIterator ConstIterator; + //! Non-constant iterator type + typedef GenericMemberIterator NonConstIterator; + + //! Pointer to (const) GenericMember + typedef typename BaseType::pointer Pointer; + //! Reference to (const) GenericMember + typedef typename BaseType::reference Reference; + //! Signed integer type (e.g. \c ptrdiff_t) + typedef typename BaseType::difference_type DifferenceType; + + //! Default constructor (singular value) + /*! Creates an iterator pointing to no element. + \note All operations, except for comparisons, are undefined on such values. + */ + GenericMemberIterator() : ptr_() {} + + //! Iterator conversions to more const + /*! + \param it (Non-const) iterator to copy from + + Allows the creation of an iterator from another GenericMemberIterator + that is "less const". Especially, creating a non-constant iterator + from a constant iterator are disabled: + \li const -> non-const (not ok) + \li const -> const (ok) + \li non-const -> const (ok) + \li non-const -> non-const (ok) + + \note If the \c Const template parameter is already \c false, this + constructor effectively defines a regular copy-constructor. + Otherwise, the copy constructor is implicitly defined. + */ + GenericMemberIterator(const NonConstIterator & it) : ptr_(it.ptr_) {} + + //! @name stepping + //@{ + Iterator& operator++(){ ++ptr_; return *this; } + Iterator& operator--(){ --ptr_; return *this; } + Iterator operator++(int){ Iterator old(*this); ++ptr_; return old; } + Iterator operator--(int){ Iterator old(*this); --ptr_; return old; } + //@} + + //! @name increment/decrement + //@{ + Iterator operator+(DifferenceType n) const { return Iterator(ptr_+n); } + Iterator operator-(DifferenceType n) const { return Iterator(ptr_-n); } + + Iterator& operator+=(DifferenceType n) { ptr_+=n; return *this; } + Iterator& operator-=(DifferenceType n) { ptr_-=n; return *this; } + //@} + + //! @name relations + //@{ + bool operator==(ConstIterator that) const { return ptr_ == that.ptr_; } + bool operator!=(ConstIterator that) const { return ptr_ != that.ptr_; } + bool operator<=(ConstIterator that) const { return ptr_ <= that.ptr_; } + bool operator>=(ConstIterator that) const { return ptr_ >= that.ptr_; } + bool operator< (ConstIterator that) const { return ptr_ < that.ptr_; } + bool operator> (ConstIterator that) const { return ptr_ > that.ptr_; } + //@} + + //! @name dereference + //@{ + Reference operator*() const { return *ptr_; } + Pointer operator->() const { return ptr_; } + Reference operator[](DifferenceType n) const { return ptr_[n]; } + //@} + + //! Distance + DifferenceType operator-(ConstIterator that) const { return ptr_-that.ptr_; } + +private: + //! Internal constructor from plain pointer + explicit GenericMemberIterator(Pointer p) : ptr_(p) {} + + Pointer ptr_; //!< raw pointer +}; + +#else // RAPIDJSON_NOMEMBERITERATORCLASS + +// class-based member iterator implementation disabled, use plain pointers + +template +struct GenericMemberIterator; + +//! non-const GenericMemberIterator +template +struct GenericMemberIterator { + //! use plain pointer as iterator type + typedef GenericMember* Iterator; +}; +//! const GenericMemberIterator +template +struct GenericMemberIterator { + //! use plain const pointer as iterator type + typedef const GenericMember* Iterator; +}; + +#endif // RAPIDJSON_NOMEMBERITERATORCLASS + +/////////////////////////////////////////////////////////////////////////////// +// GenericStringRef + +//! Reference to a constant string (not taking a copy) +/*! + \tparam CharType character type of the string + + This helper class is used to automatically infer constant string + references for string literals, especially from \c const \b (!) + character arrays. + + The main use is for creating JSON string values without copying the + source string via an \ref Allocator. This requires that the referenced + string pointers have a sufficient lifetime, which exceeds the lifetime + of the associated GenericValue. + + \b Example + \code + Value v("foo"); // ok, no need to copy & calculate length + const char foo[] = "foo"; + v.SetString(foo); // ok + + const char* bar = foo; + // Value x(bar); // not ok, can't rely on bar's lifetime + Value x(StringRef(bar)); // lifetime explicitly guaranteed by user + Value y(StringRef(bar, 3)); // ok, explicitly pass length + \endcode + + \see StringRef, GenericValue::SetString +*/ +template +struct GenericStringRef { + typedef CharType Ch; //!< character type of the string + + //! Create string reference from \c const character array + /*! + This constructor implicitly creates a constant string reference from + a \c const character array. It has better performance than + \ref StringRef(const CharType*) by inferring the string \ref length + from the array length, and also supports strings containing null + characters. + + \tparam N length of the string, automatically inferred + + \param str Constant character array, lifetime assumed to be longer + than the use of the string in e.g. a GenericValue + + \post \ref s == str + + \note Constant complexity. + \note There is a hidden, private overload to disallow references to + non-const character arrays to be created via this constructor. + By this, e.g. function-scope arrays used to be filled via + \c snprintf are excluded from consideration. + In such cases, the referenced string should be \b copied to the + GenericValue instead. + */ + template + GenericStringRef(const CharType (&str)[N]) RAPIDJSON_NOEXCEPT + : s(str), length(N-1) {} + + //! Explicitly create string reference from \c const character pointer + /*! + This constructor can be used to \b explicitly create a reference to + a constant string pointer. + + \see StringRef(const CharType*) + + \param str Constant character pointer, lifetime assumed to be longer + than the use of the string in e.g. a GenericValue + + \post \ref s == str + + \note There is a hidden, private overload to disallow references to + non-const character arrays to be created via this constructor. + By this, e.g. function-scope arrays used to be filled via + \c snprintf are excluded from consideration. + In such cases, the referenced string should be \b copied to the + GenericValue instead. + */ + explicit GenericStringRef(const CharType* str) + : s(str), length(internal::StrLen(str)){ RAPIDJSON_ASSERT(s != NULL); } + + //! Create constant string reference from pointer and length + /*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \param len length of the string, excluding the trailing NULL terminator + + \post \ref s == str && \ref length == len + \note Constant complexity. + */ + GenericStringRef(const CharType* str, SizeType len) + : s(str), length(len) { RAPIDJSON_ASSERT(s != NULL); } + + //! implicit conversion to plain CharType pointer + operator const Ch *() const { return s; } + + const Ch* const s; //!< plain CharType pointer + const SizeType length; //!< length of the string (excluding the trailing NULL terminator) + +private: + //! Disallow copy-assignment + GenericStringRef operator=(const GenericStringRef&); + //! Disallow construction from non-const array + template + GenericStringRef(CharType (&str)[N]) /* = delete */; +}; + +//! Mark a character pointer as constant string +/*! Mark a plain character pointer as a "string literal". This function + can be used to avoid copying a character string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + \tparam CharType Character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \return GenericStringRef string reference object + \relatesalso GenericStringRef + + \see GenericValue::GenericValue(StringRefType), GenericValue::operator=(StringRefType), GenericValue::SetString(StringRefType), GenericValue::PushBack(StringRefType, Allocator&), GenericValue::AddMember +*/ +template +inline GenericStringRef StringRef(const CharType* str) { + return GenericStringRef(str, internal::StrLen(str)); +} + +//! Mark a character pointer as constant string +/*! Mark a plain character pointer as a "string literal". This function + can be used to avoid copying a character string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + + This version has better performance with supplied length, and also + supports string containing null characters. + + \tparam CharType character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \param length The length of source string. + \return GenericStringRef string reference object + \relatesalso GenericStringRef +*/ +template +inline GenericStringRef StringRef(const CharType* str, size_t length) { + return GenericStringRef(str, SizeType(length)); +} + +#if RAPIDJSON_HAS_STDSTRING +//! Mark a string object as constant string +/*! Mark a string object (e.g. \c std::string) as a "string literal". + This function can be used to avoid copying a string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + + \tparam CharType character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \return GenericStringRef string reference object + \relatesalso GenericStringRef + \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. +*/ +template +inline GenericStringRef StringRef(const std::basic_string& str) { + return GenericStringRef(str.data(), SizeType(str.size())); +} +#endif + +/////////////////////////////////////////////////////////////////////////////// +// GenericValue type traits +namespace internal { + +template +struct IsGenericValueImpl : FalseType {}; + +// select candidates according to nested encoding and allocator types +template struct IsGenericValueImpl::Type, typename Void::Type> + : IsBaseOf, T>::Type {}; + +// helper to match arbitrary GenericValue instantiations, including derived classes +template struct IsGenericValue : IsGenericValueImpl::Type {}; + +} // namespace internal + +/////////////////////////////////////////////////////////////////////////////// +// GenericValue + +//! Represents a JSON value. Use Value for UTF8 encoding and default allocator. +/*! + A JSON value can be one of 7 types. This class is a variant type supporting + these types. + + Use the Value if UTF8 and default allocator + + \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document) + \tparam Allocator Allocator type for allocating memory of object, array and string. +*/ +template > +class GenericValue { +public: + //! Name-value pair in an object. + typedef GenericMember Member; + typedef Encoding EncodingType; //!< Encoding type from template parameter. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef GenericStringRef StringRefType; //!< Reference to a constant string + typedef typename GenericMemberIterator::Iterator MemberIterator; //!< Member iterator for iterating in object. + typedef typename GenericMemberIterator::Iterator ConstMemberIterator; //!< Constant member iterator for iterating in object. + typedef GenericValue* ValueIterator; //!< Value iterator for iterating in array. + typedef const GenericValue* ConstValueIterator; //!< Constant value iterator for iterating in array. + typedef GenericValue ValueType; //!< Value type of itself. + + //!@name Constructors and destructor. + //@{ + +//NCBI: allocator_ is not initialized intentionally +// Rapidjson uses in-place constructors in many places (eg, SetNull() method) +// We want to preserve allocator_ value in such cases + + //! Default constructor creates a null value. + GenericValue() RAPIDJSON_NOEXCEPT : data_(), flags_(kNullFlag) {} + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move constructor in C++11 + GenericValue(GenericValue&& rhs) RAPIDJSON_NOEXCEPT : data_(rhs.data_), flags_(rhs.flags_) { + rhs.flags_ = kNullFlag; // give up contents + } +#endif + +private: + //! Copy constructor is not permitted. + GenericValue(const GenericValue& rhs); + +public: + + //! Constructor with JSON value type. + /*! This creates a Value of specified type with default content. + \param type Type of the value. + \note Default content for number is zero. + */ + explicit GenericValue(Type type) RAPIDJSON_NOEXCEPT : data_(), flags_() { + static const unsigned defaultFlags[7] = { + kNullFlag, kFalseFlag, kTrueFlag, kObjectFlag, kArrayFlag, kShortStringFlag, + kNumberAnyFlag + }; + RAPIDJSON_ASSERT(type <= kNumberType); + flags_ = defaultFlags[type]; + + // Use ShortString to store empty string. + if (type == kStringType) + data_.ss.SetLength(0); + } + + //! Explicit copy constructor (with allocator) + /*! Creates a copy of a Value by using the given Allocator + \tparam SourceAllocator allocator of \c rhs + \param rhs Value to copy from (read-only) + \param allocator Allocator for allocating copied elements and buffers. Commonly use GenericDocument::GetAllocator(). + \see CopyFrom() + */ + template< typename SourceAllocator > + GenericValue(const GenericValue& rhs, Allocator & allocator); + + //! Constructor for boolean value. + /*! \param b Boolean value + \note This constructor is limited to \em real boolean values and rejects + implicitly converted types like arbitrary pointers. Use an explicit cast + to \c bool, if you want to construct a boolean JSON value in such cases. + */ +#ifndef RAPIDJSON_DOXYGEN_RUNNING // hide SFINAE from Doxygen + template + explicit GenericValue(T b, RAPIDJSON_ENABLEIF((internal::IsSame))) RAPIDJSON_NOEXCEPT +#else + explicit GenericValue(bool b) RAPIDJSON_NOEXCEPT +#endif + : data_(), flags_(b ? kTrueFlag : kFalseFlag) { + // safe-guard against failing SFINAE + RAPIDJSON_STATIC_ASSERT((internal::IsSame::Value)); + } + + //! Constructor for int value. + explicit GenericValue(int i) RAPIDJSON_NOEXCEPT : data_(), flags_(kNumberIntFlag) { + data_.n.i64 = i; + if (i >= 0) + flags_ |= kUintFlag | kUint64Flag; + } + + //! Constructor for unsigned value. + explicit GenericValue(unsigned u) RAPIDJSON_NOEXCEPT : data_(), flags_(kNumberUintFlag) { + data_.n.u64 = u; + if (!(u & 0x80000000)) + flags_ |= kIntFlag | kInt64Flag; + } + + //! Constructor for int64_t value. + explicit GenericValue(int64_t i64) RAPIDJSON_NOEXCEPT : data_(), flags_(kNumberInt64Flag) { + data_.n.i64 = i64; + if (i64 >= 0) { + flags_ |= kNumberUint64Flag; + if (!(static_cast(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000))) + flags_ |= kUintFlag; + if (!(static_cast(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + flags_ |= kIntFlag; + } + else if (i64 >= static_cast(RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + flags_ |= kIntFlag; + } + + //! Constructor for uint64_t value. + explicit GenericValue(uint64_t u64) RAPIDJSON_NOEXCEPT : data_(), flags_(kNumberUint64Flag) { + data_.n.u64 = u64; + if (!(u64 & RAPIDJSON_UINT64_C2(0x80000000, 0x00000000))) + flags_ |= kInt64Flag; + if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000))) + flags_ |= kUintFlag; + if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + flags_ |= kIntFlag; + } + + //! Constructor for double value. + explicit GenericValue(double d) RAPIDJSON_NOEXCEPT : data_(), flags_(kNumberDoubleFlag) { data_.n.d = d; } + + //! Constructor for constant string (i.e. do not make a copy of string) + GenericValue(const Ch* s, SizeType length) RAPIDJSON_NOEXCEPT : data_(), flags_() { SetStringRaw(StringRef(s, length)); } + + //! Constructor for constant string (i.e. do not make a copy of string) + explicit GenericValue(StringRefType s) RAPIDJSON_NOEXCEPT : data_(), flags_() { SetStringRaw(s); } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch* s, SizeType length, Allocator& allocator) : data_(), flags_() { SetStringRaw(StringRef(s, length), allocator); } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch*s, Allocator& allocator) : data_(), flags_() { SetStringRaw(StringRef(s), allocator); } + +#if RAPIDJSON_HAS_STDSTRING + //! Constructor for copy-string from a string object (i.e. do make a copy of string) + /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + GenericValue(const std::basic_string& s, Allocator& allocator) : data_(), flags_() { SetStringRaw(StringRef(s), allocator); } +#endif + + //! Destructor. + /*! Need to destruct elements of array, members of object, or copy-string. + */ + ~GenericValue() { + if (Allocator::kNeedFree) { // Shortcut by Allocator's trait + switch(flags_) { + case kArrayFlag: + for (GenericValue* v = data_.a.elements; v != data_.a.elements + data_.a.size; ++v) + v->~GenericValue(); + Allocator::Free(data_.a.elements); + break; + + case kObjectFlag: + for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m) + m->~Member(); + Allocator::Free(data_.o.members); + break; + + case kCopyStringFlag: + Allocator::Free(const_cast(data_.s.str)); + break; + + default: + break; // Do nothing for other types. + } + } + } + + //@} + + //!@name Assignment operators + //@{ + + //! Assignment with move semantics. + /*! \param rhs Source of the assignment. It will become a null value after assignment. + */ + GenericValue& operator=(GenericValue& rhs) RAPIDJSON_NOEXCEPT { + RAPIDJSON_ASSERT(this != &rhs); + this->~GenericValue(); + RawAssign(rhs); + return *this; + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move assignment in C++11 + GenericValue& operator=(GenericValue&& rhs) RAPIDJSON_NOEXCEPT { + return *this = rhs.Move(); + } +#endif + + //! Assignment of constant string reference (no copy) + /*! \param str Constant string reference to be assigned + \note This overload is needed to avoid clashes with the generic primitive type assignment overload below. + \see GenericStringRef, operator=(T) + */ + GenericValue& operator=(StringRefType str) RAPIDJSON_NOEXCEPT { + GenericValue s(str); + return *this = s; + } + + //! Assignment with primitive types. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param value The value to be assigned. + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref SetString(const Ch*, Allocator&) (for copying) or + \ref StringRef() (to explicitly mark the pointer as constant) instead. + All other pointer types would implicitly convert to \c bool, + use \ref SetBool() instead. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::IsPointer), (GenericValue&)) + operator=(T value) { + GenericValue v(value); + return *this = v; + } + + //! Deep-copy assignment from Value + /*! Assigns a \b copy of the Value to the current Value object + \tparam SourceAllocator Allocator type of \c rhs + \param rhs Value to copy from (read-only) + \param allocator Allocator to use for copying + */ + template + GenericValue& CopyFrom(const GenericValue& rhs, Allocator& allocator) { + RAPIDJSON_ASSERT((void*)this != (void const*)&rhs); + this->~GenericValue(); + new (this) GenericValue(rhs, allocator); + return *this; + } + + //! Exchange the contents of this value with those of other. + /*! + \param other Another value. + \note Constant complexity. + */ + GenericValue& Swap(GenericValue& other) RAPIDJSON_NOEXCEPT { + GenericValue temp; + temp.RawAssign(*this); + RawAssign(other); + other.RawAssign(temp); + return *this; + } + + //! Prepare Value for move semantics + /*! \return *this */ + GenericValue& Move() RAPIDJSON_NOEXCEPT { return *this; } + //@} + + //!@name Equal-to and not-equal-to operators + //@{ + //! Equal-to operator + /*! + \note If an object contains duplicated named member, comparing equality with any object is always \c false. + \note Linear time complexity (number of all values in the subtree and total lengths of all strings). + */ + template + bool operator==(const GenericValue& rhs) const { + typedef GenericValue RhsType; + if (GetType() != rhs.GetType()) + return false; + + switch (GetType()) { + case kObjectType: // Warning: O(n^2) inner-loop + if (data_.o.size != rhs.data_.o.size) + return false; + for (ConstMemberIterator lhsMemberItr = MemberBegin(); lhsMemberItr != MemberEnd(); ++lhsMemberItr) { + typename RhsType::ConstMemberIterator rhsMemberItr = rhs.FindMember(lhsMemberItr->name); + if (rhsMemberItr == rhs.MemberEnd() || lhsMemberItr->value != rhsMemberItr->value) + return false; + } + return true; + + case kArrayType: + if (data_.a.size != rhs.data_.a.size) + return false; + for (SizeType i = 0; i < data_.a.size; i++) + if ((*this)[i] != rhs[i]) + return false; + return true; + + case kStringType: + return StringEqual(rhs); + + case kNumberType: + if (IsDouble() || rhs.IsDouble()) { + double a = GetDouble(); // May convert from integer to double. + double b = rhs.GetDouble(); // Ditto + return a >= b && a <= b; // Prevent -Wfloat-equal + } + else + return data_.n.u64 == rhs.data_.n.u64; + + default: // kTrueType, kFalseType, kNullType + return true; + } + } + + //! Equal-to operator with const C-string pointer + bool operator==(const Ch* rhs) const { return *this == GenericValue(StringRef(rhs)); } + +#if RAPIDJSON_HAS_STDSTRING + //! Equal-to operator with string object + /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + bool operator==(const std::basic_string& rhs) const { return *this == GenericValue(StringRef(rhs)); } +#endif + + //! Equal-to operator with primitive types + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c true, \c false + */ + template RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr,internal::IsGenericValue >), (bool)) operator==(const T& rhs) const { return *this == GenericValue(rhs); } + + //! Not-equal-to operator + /*! \return !(*this == rhs) + */ + template + bool operator!=(const GenericValue& rhs) const { return !(*this == rhs); } + + //! Not-equal-to operator with const C-string pointer + bool operator!=(const Ch* rhs) const { return !(*this == rhs); } + + //! Not-equal-to operator with arbitrary types + /*! \return !(*this == rhs) + */ + template RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue), (bool)) operator!=(const T& rhs) const { return !(*this == rhs); } + + //! Equal-to operator with arbitrary types (symmetric version) + /*! \return (rhs == lhs) + */ + template friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue), (bool)) operator==(const T& lhs, const GenericValue& rhs) { return rhs == lhs; } + + //! Not-Equal-to operator with arbitrary types (symmetric version) + /*! \return !(rhs == lhs) + */ + template friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue), (bool)) operator!=(const T& lhs, const GenericValue& rhs) { return !(rhs == lhs); } + //@} + + //!@name Type + //@{ + + Type GetType() const { return static_cast(flags_ & kTypeMask); } + bool IsNull() const { return flags_ == kNullFlag; } + bool IsFalse() const { return flags_ == kFalseFlag; } + bool IsTrue() const { return flags_ == kTrueFlag; } + bool IsBool() const { return (flags_ & kBoolFlag) != 0; } + bool IsObject() const { return flags_ == kObjectFlag; } + bool IsArray() const { return flags_ == kArrayFlag; } + bool IsNumber() const { return (flags_ & kNumberFlag) != 0; } + bool IsInt() const { return (flags_ & kIntFlag) != 0; } + bool IsUint() const { return (flags_ & kUintFlag) != 0; } + bool IsInt64() const { return (flags_ & kInt64Flag) != 0; } + bool IsUint64() const { return (flags_ & kUint64Flag) != 0; } + bool IsDouble() const { return (flags_ & kDoubleFlag) != 0; } + bool IsString() const { return (flags_ & kStringFlag) != 0; } + + //@} + + //!@name Null + //@{ + + GenericValue& SetNull() { this->~GenericValue(); new (this) GenericValue(); return *this; } + + //@} + + //!@name Bool + //@{ + + bool GetBool() const { RAPIDJSON_ASSERT(IsBool()); return flags_ == kTrueFlag; } + //!< Set boolean value + /*! \post IsBool() == true */ + GenericValue& SetBool(bool b) { this->~GenericValue(); new (this) GenericValue(b); return *this; } + + //@} + + //!@name Object + //@{ + + //! Set this value as an empty object. + /*! \post IsObject() == true */ + GenericValue& SetObject() { this->~GenericValue(); new (this) GenericValue(kObjectType); return *this; } + + //! Get the number of members in the object. + SizeType MemberCount() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size; } + + //! Check whether the object is empty. + bool ObjectEmpty() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size == 0; } + + //! Get a value from an object associated with the name. + /*! \pre IsObject() == true + \tparam T Either \c Ch or \c const \c Ch (template used for disambiguation with \ref operator[](SizeType)) + \note In version 0.1x, if the member is not found, this function returns a null value. This makes issue 7. + Since 0.2, if the name is not correct, it will assert. + If user is unsure whether a member exists, user should use HasMember() first. + A better approach is to use FindMember(). + \note Linear time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr::Type, Ch> >),(GenericValue&)) operator[](T* name) { + GenericValue n(StringRef(name)); + return (*this)[n]; + } + template + RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr::Type, Ch> >),(const GenericValue&)) operator[](T* name) const { return const_cast(*this)[name]; } + + //! Get a value from an object associated with the name. + /*! \pre IsObject() == true + \tparam SourceAllocator Allocator of the \c name value + + \note Compared to \ref operator[](T*), this version is faster because it does not need a StrLen(). + And it can also handle strings with embedded null characters. + + \note Linear time complexity. + */ + template + GenericValue& operator[](const GenericValue& name) { + MemberIterator member = FindMember(name); + if (member != MemberEnd()) + return member->value; + else { + RAPIDJSON_ASSERT(false); // see above note + static GenericValue NullValue; + return NullValue; + } + } + template + const GenericValue& operator[](const GenericValue& name) const { return const_cast(*this)[name]; } + +#if RAPIDJSON_HAS_STDSTRING + //! Get a value from an object associated with name (string object). + GenericValue& operator[](const std::basic_string& name) { return (*this)[GenericValue(StringRef(name))]; } + const GenericValue& operator[](const std::basic_string& name) const { return (*this)[GenericValue(StringRef(name))]; } +#endif + + //! Const member iterator + /*! \pre IsObject() == true */ + ConstMemberIterator MemberBegin() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(data_.o.members); } + //! Const \em past-the-end member iterator + /*! \pre IsObject() == true */ + ConstMemberIterator MemberEnd() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(data_.o.members + data_.o.size); } + //! Member iterator + /*! \pre IsObject() == true */ + MemberIterator MemberBegin() { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(data_.o.members); } + //! \em Past-the-end member iterator + /*! \pre IsObject() == true */ + MemberIterator MemberEnd() { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(data_.o.members + data_.o.size); } + + //! Check whether a member exists in the object. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + bool HasMember(const Ch* name) const { return FindMember(name) != MemberEnd(); } + +#if RAPIDJSON_HAS_STDSTRING + //! Check whether a member exists in the object with string object. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + bool HasMember(const std::basic_string& name) const { return FindMember(name) != MemberEnd(); } +#endif + + //! Check whether a member exists in the object with GenericValue name. + /*! + This version is faster because it does not need a StrLen(). It can also handle string with null character. + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + template + bool HasMember(const GenericValue& name) const { return FindMember(name) != MemberEnd(); } + + //! Find member by name. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + + \note Earlier versions of Rapidjson returned a \c NULL pointer, in case + the requested member doesn't exist. For consistency with e.g. + \c std::map, this has been changed to MemberEnd() now. + \note Linear time complexity. + */ + MemberIterator FindMember(const Ch* name) { + GenericValue n(StringRef(name)); + return FindMember(n); + } + + ConstMemberIterator FindMember(const Ch* name) const { return const_cast(*this).FindMember(name); } + + //! Find member by name. + /*! + This version is faster because it does not need a StrLen(). It can also handle string with null character. + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + + \note Earlier versions of Rapidjson returned a \c NULL pointer, in case + the requested member doesn't exist. For consistency with e.g. + \c std::map, this has been changed to MemberEnd() now. + \note Linear time complexity. + */ + template + MemberIterator FindMember(const GenericValue& name) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(name.IsString()); + MemberIterator member = MemberBegin(); + for ( ; member != MemberEnd(); ++member) + if (name.StringEqual(member->name)) + break; + return member; + } + template ConstMemberIterator FindMember(const GenericValue& name) const { return const_cast(*this).FindMember(name); } + +#if RAPIDJSON_HAS_STDSTRING + //! Find member by string object name. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + */ + MemberIterator FindMember(const std::basic_string& name) { return FindMember(StringRef(name)); } + ConstMemberIterator FindMember(const std::basic_string& name) const { return FindMember(StringRef(name)); } +#endif + + //! Add a member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value Value of any type. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note The ownership of \c name and \c value will be transferred to this object on success. + \pre IsObject() && name.IsString() + \post name.IsNull() && value.IsNull() + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, GenericValue& value, Allocator& allocator) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(name.IsString()); + + Object& o = data_.o; + if (o.size >= o.capacity) { + if (o.capacity == 0) { + o.capacity = kDefaultObjectCapacity; + o.members = reinterpret_cast(allocator.Malloc(o.capacity * sizeof(Member))); + } + else { + SizeType oldCapacity = o.capacity; + o.capacity += (oldCapacity + 1) / 2; // grow by factor 1.5 + o.members = reinterpret_cast(allocator.Realloc(o.members, oldCapacity * sizeof(Member), o.capacity * sizeof(Member))); + } + } + o.members[o.size].name.RawAssign(name); + o.members[o.size].value.RawAssign(value); + o.size++; + return *this; + } + + //! Add a constant string value as member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, StringRefType value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Add a string object as member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, std::basic_string& value, Allocator& allocator) { + GenericValue v(value, allocator); + return AddMember(name, v, allocator); + } +#endif + + //! Add any primitive value as member (name-value pair) to the object. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param name A string value as name of member. + \param value Value of primitive type \c T as value of member + \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref + AddMember(StringRefType, StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized Constant time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (GenericValue&)) + AddMember(GenericValue& name, T value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericValue& AddMember(GenericValue&& name, GenericValue&& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(GenericValue&& name, GenericValue& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(GenericValue& name, GenericValue&& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(StringRefType name, GenericValue&& value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + + + //! Add a member (name-value pair) to the object. + /*! \param name A constant string reference as name of member. + \param value Value of any type. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note The ownership of \c value will be transferred to this object on success. + \pre IsObject() + \post value.IsNull() + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(StringRefType name, GenericValue& value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } + + //! Add a constant string value as member (name-value pair) to the object. + /*! \param name A constant string reference as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(StringRefType,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(StringRefType name, StringRefType value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + + //! Add any primitive value as member (name-value pair) to the object. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param name A constant string reference as name of member. + \param value Value of primitive type \c T as value of member + \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref + AddMember(StringRefType, StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized Constant time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (GenericValue&)) + AddMember(StringRefType name, T value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } + + //! Remove all members in the object. + /*! This function do not deallocate memory in the object, i.e. the capacity is unchanged. + \note Linear time complexity. + */ + void RemoveAllMembers() { + RAPIDJSON_ASSERT(IsObject()); + for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m) + m->~Member(); + data_.o.size = 0; + } + + //! Remove a member in object by its name. + /*! \param name Name of member to be removed. + \return Whether the member existed. + \note This function may reorder the object members. Use \ref + EraseMember(ConstMemberIterator) if you need to preserve the + relative order of the remaining members. + \note Linear time complexity. + */ + bool RemoveMember(const Ch* name) { + GenericValue n(StringRef(name)); + return RemoveMember(n); + } + +#if RAPIDJSON_HAS_STDSTRING + bool RemoveMember(const std::basic_string& name) { return RemoveMember(GenericValue(StringRef(name))); } +#endif + + template + bool RemoveMember(const GenericValue& name) { + MemberIterator m = FindMember(name); + if (m != MemberEnd()) { + RemoveMember(m); + return true; + } + else + return false; + } + + //! Remove a member in object by iterator. + /*! \param m member iterator (obtained by FindMember() or MemberBegin()). + \return the new iterator after removal. + \note This function may reorder the object members. Use \ref + EraseMember(ConstMemberIterator) if you need to preserve the + relative order of the remaining members. + \note Constant time complexity. + */ + MemberIterator RemoveMember(MemberIterator m) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(data_.o.size > 0); + RAPIDJSON_ASSERT(data_.o.members != 0); + RAPIDJSON_ASSERT(m >= MemberBegin() && m < MemberEnd()); + + MemberIterator last(data_.o.members + (data_.o.size - 1)); + if (data_.o.size > 1 && m != last) { + // Move the last one to this place + *m = *last; + } + else { + // Only one left, just destroy + m->~Member(); + } + --data_.o.size; + return m; + } + + //! Remove a member from an object by iterator. + /*! \param pos iterator to the member to remove + \pre IsObject() == true && \ref MemberBegin() <= \c pos < \ref MemberEnd() + \return Iterator following the removed element. + If the iterator \c pos refers to the last element, the \ref MemberEnd() iterator is returned. + \note This function preserves the relative order of the remaining object + members. If you do not need this, use the more efficient \ref RemoveMember(MemberIterator). + \note Linear time complexity. + */ + MemberIterator EraseMember(ConstMemberIterator pos) { + return EraseMember(pos, pos +1); + } + + //! Remove members in the range [first, last) from an object. + /*! \param first iterator to the first member to remove + \param last iterator following the last member to remove + \pre IsObject() == true && \ref MemberBegin() <= \c first <= \c last <= \ref MemberEnd() + \return Iterator following the last removed element. + \note This function preserves the relative order of the remaining object + members. + \note Linear time complexity. + */ + MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(data_.o.size > 0); + RAPIDJSON_ASSERT(data_.o.members != 0); + RAPIDJSON_ASSERT(first >= MemberBegin()); + RAPIDJSON_ASSERT(first <= last); + RAPIDJSON_ASSERT(last <= MemberEnd()); + + MemberIterator pos = MemberBegin() + (first - MemberBegin()); + for (MemberIterator itr = pos; itr != last; ++itr) + itr->~Member(); + std::memmove(&*pos, &*last, (MemberEnd() - last) * sizeof(Member)); + data_.o.size -= (last - first); + return pos; + } + + //! Erase a member in object by its name. + /*! \param name Name of member to be removed. + \return Whether the member existed. + \note Linear time complexity. + */ + bool EraseMember(const Ch* name) { + GenericValue n(StringRef(name)); + return EraseMember(n); + } + +#if RAPIDJSON_HAS_STDSTRING + bool EraseMember(const std::basic_string& name) { return EraseMember(GenericValue(StringRef(name))); } +#endif + + template + bool EraseMember(const GenericValue& name) { + MemberIterator m = FindMember(name); + if (m != MemberEnd()) { + EraseMember(m); + return true; + } + else + return false; + } + + //@} + + //!@name Array + //@{ + + //! Set this value as an empty array. + /*! \post IsArray == true */ + GenericValue& SetArray() { this->~GenericValue(); new (this) GenericValue(kArrayType); return *this; } + + //! Get the number of elements in array. + SizeType Size() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size; } + + //! Get the capacity of array. + SizeType Capacity() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.capacity; } + + //! Check whether the array is empty. + bool Empty() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size == 0; } + + //! Remove all elements in the array. + /*! This function do not deallocate memory in the array, i.e. the capacity is unchanged. + \note Linear time complexity. + */ + void Clear() { + RAPIDJSON_ASSERT(IsArray()); + for (SizeType i = 0; i < data_.a.size; ++i) + data_.a.elements[i].~GenericValue(); + data_.a.size = 0; + } + + //! Get an element from array by index. + /*! \pre IsArray() == true + \param index Zero-based index of element. + \see operator[](T*) + */ + GenericValue& operator[](SizeType index) { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(index < data_.a.size); + return data_.a.elements[index]; + } + const GenericValue& operator[](SizeType index) const { return const_cast(*this)[index]; } + + //! Element iterator + /*! \pre IsArray() == true */ + ValueIterator Begin() { RAPIDJSON_ASSERT(IsArray()); return data_.a.elements; } + //! \em Past-the-end element iterator + /*! \pre IsArray() == true */ + ValueIterator End() { RAPIDJSON_ASSERT(IsArray()); return data_.a.elements + data_.a.size; } + //! Constant element iterator + /*! \pre IsArray() == true */ + ConstValueIterator Begin() const { return const_cast(*this).Begin(); } + //! Constant \em past-the-end element iterator + /*! \pre IsArray() == true */ + ConstValueIterator End() const { return const_cast(*this).End(); } + + //! Request the array to have enough capacity to store elements. + /*! \param newCapacity The capacity that the array at least need to have. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note Linear time complexity. + */ + GenericValue& Reserve(SizeType newCapacity, Allocator &allocator) { + RAPIDJSON_ASSERT(IsArray()); + if (newCapacity > data_.a.capacity) { + data_.a.elements = (GenericValue*)allocator.Realloc(data_.a.elements, data_.a.capacity * sizeof(GenericValue), newCapacity * sizeof(GenericValue)); + data_.a.capacity = newCapacity; + } + return *this; + } + + //! Append a GenericValue at the end of the array. + /*! \param value Value to be appended. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \post value.IsNull() == true + \return The value itself for fluent API. + \note The ownership of \c value will be transferred to this array on success. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + \note Amortized constant time complexity. + */ + GenericValue& PushBack(GenericValue& value, Allocator& allocator) { + RAPIDJSON_ASSERT(IsArray()); + if (data_.a.size >= data_.a.capacity) + Reserve(data_.a.capacity == 0 ? kDefaultArrayCapacity : (data_.a.capacity + (data_.a.capacity + 1) / 2), allocator); + data_.a.elements[data_.a.size++].RawAssign(value); + return *this; + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericValue& PushBack(GenericValue&& value, Allocator& allocator) { + return PushBack(value, allocator); + } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + + //! Append a constant string reference at the end of the array. + /*! \param value Constant string reference to be appended. + \param allocator Allocator for reallocating memory. It must be the same one used previously. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \return The value itself for fluent API. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + \note Amortized constant time complexity. + \see GenericStringRef + */ + GenericValue& PushBack(StringRefType value, Allocator& allocator) { + return (*this).template PushBack(value, allocator); + } + + //! Append a primitive value at the end of the array. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param value Value of primitive type T to be appended. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \return The value itself for fluent API. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref PushBack(GenericValue&, Allocator&) or \ref + PushBack(StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized constant time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (GenericValue&)) + PushBack(T value, Allocator& allocator) { + GenericValue v(value); + return PushBack(v, allocator); + } + + //! Remove the last element in the array. + /*! + \note Constant time complexity. + */ + GenericValue& PopBack() { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(!Empty()); + data_.a.elements[--data_.a.size].~GenericValue(); + return *this; + } + + //! Remove an element of array by iterator. + /*! + \param pos iterator to the element to remove + \pre IsArray() == true && \ref Begin() <= \c pos < \ref End() + \return Iterator following the removed element. If the iterator pos refers to the last element, the End() iterator is returned. + \note Linear time complexity. + */ + ValueIterator Erase(ConstValueIterator pos) { + return Erase(pos, pos + 1); + } + + //! Remove elements in the range [first, last) of the array. + /*! + \param first iterator to the first element to remove + \param last iterator following the last element to remove + \pre IsArray() == true && \ref Begin() <= \c first <= \c last <= \ref End() + \return Iterator following the last removed element. + \note Linear time complexity. + */ + ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(data_.a.size > 0); + RAPIDJSON_ASSERT(data_.a.elements != 0); + RAPIDJSON_ASSERT(first >= Begin()); + RAPIDJSON_ASSERT(first <= last); + RAPIDJSON_ASSERT(last <= End()); + ValueIterator pos = Begin() + (first - Begin()); + for (ValueIterator itr = pos; itr != last; ++itr) + itr->~GenericValue(); + std::memmove(pos, last, (End() - last) * sizeof(GenericValue)); + data_.a.size -= (last - first); + return pos; + } + + //@} + + //!@name Number + //@{ + + int GetInt() const { RAPIDJSON_ASSERT(flags_ & kIntFlag); return data_.n.i.i; } + unsigned GetUint() const { RAPIDJSON_ASSERT(flags_ & kUintFlag); return data_.n.u.u; } + int64_t GetInt64() const { RAPIDJSON_ASSERT(flags_ & kInt64Flag); return data_.n.i64; } + uint64_t GetUint64() const { RAPIDJSON_ASSERT(flags_ & kUint64Flag); return data_.n.u64; } + + double GetDouble() const { + RAPIDJSON_ASSERT(IsNumber()); + if ((flags_ & kDoubleFlag) != 0) return data_.n.d; // exact type, no conversion. + if ((flags_ & kIntFlag) != 0) return data_.n.i.i; // int -> double + if ((flags_ & kUintFlag) != 0) return data_.n.u.u; // unsigned -> double + if ((flags_ & kInt64Flag) != 0) return (double)data_.n.i64; // int64_t -> double (may lose precision) + RAPIDJSON_ASSERT((flags_ & kUint64Flag) != 0); return (double)data_.n.u64; // uint64_t -> double (may lose precision) + } + + GenericValue& SetInt(int i) { this->~GenericValue(); new (this) GenericValue(i); return *this; } + GenericValue& SetUint(unsigned u) { this->~GenericValue(); new (this) GenericValue(u); return *this; } + GenericValue& SetInt64(int64_t i64) { this->~GenericValue(); new (this) GenericValue(i64); return *this; } + GenericValue& SetUint64(uint64_t u64) { this->~GenericValue(); new (this) GenericValue(u64); return *this; } + GenericValue& SetDouble(double d) { this->~GenericValue(); new (this) GenericValue(d); return *this; } + + //@} + + //!@name String + //@{ + + const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return ((flags_ & kInlineStrFlag) ? data_.ss.str : data_.s.str); } + + //! Get the length of string. + /*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength(). + */ + SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return ((flags_ & kInlineStrFlag) ? (data_.ss.GetLength()) : data_.s.length); } + + //! Set this value as a string without copying source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string pointer. + \param length The length of source string, excluding the trailing null terminator. + \return The value itself for fluent API. + \post IsString() == true && GetString() == s && GetStringLength() == length + \see SetString(StringRefType) + */ + GenericValue& SetString(const Ch* s, SizeType length) { return SetString(StringRef(s, length)); } + + //! Set this value as a string without copying source string. + /*! \param s source string reference + \return The value itself for fluent API. + \post IsString() == true && GetString() == s && GetStringLength() == s.length + */ + GenericValue& SetString(StringRefType s) { this->~GenericValue(); SetStringRaw(s); return *this; } + + //! Set this value as a string by copying from source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string. + \param length The length of source string, excluding the trailing null terminator. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length + */ + GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { this->~GenericValue(); SetStringRaw(StringRef(s, length), allocator); return *this; } + + //! Set this value as a string by copying from source string. + /*! \param s source string. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length + */ + GenericValue& SetString(const Ch* s, Allocator& allocator) { return SetString(s, internal::StrLen(s), allocator); } + +#if RAPIDJSON_HAS_STDSTRING + //! Set this value as a string by copying from source string. + /*! \param s source string. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s.data() && strcmp(GetString(),s.data() == 0 && GetStringLength() == s.size() + \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + GenericValue& SetString(const std::basic_string& s, Allocator& allocator) { return SetString(s.data(), SizeType(s.size()), allocator); } +#endif + + //@} + + //! Generate events of this value to a Handler. + /*! This function adopts the GoF visitor pattern. + Typical usage is to output this JSON value as JSON text via Writer, which is a Handler. + It can also be used to deep clone this value via GenericDocument, which is also a Handler. + \tparam Handler type of handler. + \param handler An object implementing concept Handler. + */ + template + bool Accept(Handler& handler) const { + switch(GetType()) { + case kNullType: return handler.Null(); + case kFalseType: return handler.Bool(false); + case kTrueType: return handler.Bool(true); + + case kObjectType: + if (!handler.StartObject()) + return false; + for (ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m) { + RAPIDJSON_ASSERT(m->name.IsString()); // User may change the type of name by MemberIterator. + if (!handler.Key(m->name.GetString(), m->name.GetStringLength(), (m->name.flags_ & kCopyFlag) != 0)) + return false; + if (!m->value.Accept(handler)) + return false; + } + return handler.EndObject(data_.o.size); + + case kArrayType: + if (!handler.StartArray()) + return false; + for (GenericValue* v = data_.a.elements; v != data_.a.elements + data_.a.size; ++v) + if (!v->Accept(handler)) + return false; + return handler.EndArray(data_.a.size); + + case kStringType: + return handler.String(GetString(), GetStringLength(), (flags_ & kCopyFlag) != 0); + + default: + RAPIDJSON_ASSERT(GetType() == kNumberType); + if (IsInt()) return handler.Int(data_.n.i.i); + else if (IsUint()) return handler.Uint(data_.n.u.u); + else if (IsInt64()) return handler.Int64(data_.n.i64); + else if (IsUint64()) return handler.Uint64(data_.n.u64); + else return handler.Double(data_.n.d); + } + } + +private: + template friend class GenericValue; + template friend class GenericDocument; + + enum { + kBoolFlag = 0x100, + kNumberFlag = 0x200, + kIntFlag = 0x400, + kUintFlag = 0x800, + kInt64Flag = 0x1000, + kUint64Flag = 0x2000, + kDoubleFlag = 0x4000, + kStringFlag = 0x100000, + kCopyFlag = 0x200000, + kInlineStrFlag = 0x400000, + + // Initial flags of different types. + kNullFlag = kNullType, + kTrueFlag = kTrueType | kBoolFlag, + kFalseFlag = kFalseType | kBoolFlag, + kNumberIntFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag, + kNumberUintFlag = kNumberType | kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag, + kNumberInt64Flag = kNumberType | kNumberFlag | kInt64Flag, + kNumberUint64Flag = kNumberType | kNumberFlag | kUint64Flag, + kNumberDoubleFlag = kNumberType | kNumberFlag | kDoubleFlag, + kNumberAnyFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag | kUintFlag | kUint64Flag | kDoubleFlag, + kConstStringFlag = kStringType | kStringFlag, + kCopyStringFlag = kStringType | kStringFlag | kCopyFlag, + kShortStringFlag = kStringType | kStringFlag | kCopyFlag | kInlineStrFlag, + kObjectFlag = kObjectType, + kArrayFlag = kArrayType, + + kTypeMask = 0xFF // bitwise-and with mask of 0xFF can be optimized by compiler + }; + + static const SizeType kDefaultArrayCapacity = 16; + static const SizeType kDefaultObjectCapacity = 16; + + struct String { + const Ch* str; + SizeType length; + unsigned hashcode; //!< reserved + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + // implementation detail: ShortString can represent zero-terminated strings up to MaxSize chars + // (excluding the terminating zero) and store a value to determine the length of the contained + // string in the last character str[LenPos] by storing "MaxSize - length" there. If the string + // to store has the maximal length of MaxSize then str[LenPos] will be 0 and therefore act as + // the string terminator as well. For getting the string length back from that value just use + // "MaxSize - str[LenPos]". + // This allows to store 11-chars strings in 32-bit mode and 15-chars strings in 64-bit mode + // inline (for `UTF8`-encoded strings). + struct ShortString { + enum { MaxChars = sizeof(String) / sizeof(Ch), MaxSize = MaxChars - 1, LenPos = MaxSize }; + Ch str[MaxChars]; + + inline static bool Usable(SizeType len) { return (MaxSize >= len); } + inline void SetLength(SizeType len) { str[LenPos] = (Ch)(MaxSize - len); } + inline SizeType GetLength() const { return (SizeType)(MaxSize - str[LenPos]); } + }; // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + // By using proper binary layout, retrieval of different integer types do not need conversions. + union Number { +#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN + struct I { + int i; + char padding[4]; + }i; + struct U { + unsigned u; + char padding2[4]; + }u; +#else + struct I { + char padding[4]; + int i; + }i; + struct U { + char padding2[4]; + unsigned u; + }u; +#endif + int64_t i64; + uint64_t u64; + double d; + }; // 8 bytes + + struct Object { + Member* members; + SizeType size; + SizeType capacity; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + struct Array { + GenericValue* elements; + SizeType size; + SizeType capacity; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + union Data { + String s; + ShortString ss; + Number n; + Object o; + Array a; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + // Initialize this value as array with initial data, without calling destructor. + void SetArrayRaw(GenericValue* values, SizeType count, Allocator& allocator) { + flags_ = kArrayFlag; + if (count) { + data_.a.elements = (GenericValue*)allocator.Malloc(count * sizeof(GenericValue)); + std::memcpy(data_.a.elements, values, count * sizeof(GenericValue)); + } + else + data_.a.elements = NULL; + data_.a.size = data_.a.capacity = count; +//NCBI: assign allocator +SetValueAllocator(&allocator); + } + + //! Initialize this value as object with initial data, without calling destructor. + void SetObjectRaw(Member* members, SizeType count, Allocator& allocator) { + flags_ = kObjectFlag; + if (count) { + data_.o.members = (Member*)allocator.Malloc(count * sizeof(Member)); + std::memcpy(data_.o.members, members, count * sizeof(Member)); + } + else + data_.o.members = NULL; + data_.o.size = data_.o.capacity = count; +//NCBI: assign allocator +SetValueAllocator(&allocator); + } + + //! Initialize this value as constant string, without calling destructor. + void SetStringRaw(StringRefType s) RAPIDJSON_NOEXCEPT { + flags_ = kConstStringFlag; + data_.s.str = s; + data_.s.length = s.length; + } + + //! Initialize this value as copy string with initial data, without calling destructor. + void SetStringRaw(StringRefType s, Allocator& allocator) { + Ch* str = NULL; + if(ShortString::Usable(s.length)) { + flags_ = kShortStringFlag; + data_.ss.SetLength(s.length); + str = data_.ss.str; + } else { + flags_ = kCopyStringFlag; + data_.s.length = s.length; + str = (Ch *)allocator.Malloc((s.length + 1) * sizeof(Ch)); + data_.s.str = str; + } + std::memcpy(str, s, s.length * sizeof(Ch)); + str[s.length] = '\0'; +//NCBI: assign allocator +SetValueAllocator(&allocator); + } + + //! Assignment without calling destructor + void RawAssign(GenericValue& rhs) RAPIDJSON_NOEXCEPT { + data_ = rhs.data_; + flags_ = rhs.flags_; + rhs.flags_ = kNullFlag; +//NCBI: assign allocator +SetValueAllocator(rhs.GetValueAllocator()); + } + + template + bool StringEqual(const GenericValue& rhs) const { + RAPIDJSON_ASSERT(IsString()); + RAPIDJSON_ASSERT(rhs.IsString()); + + const SizeType len1 = GetStringLength(); + const SizeType len2 = rhs.GetStringLength(); + if(len1 != len2) { return false; } + + const Ch* const str1 = GetString(); + const Ch* const str2 = rhs.GetString(); + if(str1 == str2) { return true; } // fast path for constant string + + return (std::memcmp(str1, str2, sizeof(Ch) * len1) == 0); + } + +protected: + Data data_; + unsigned flags_; +//NCBI: added allocator +#if 1 +protected: + Allocator* allocator_; +public: + ValueType& RAPIDJSON_NCBI_NOOPTIMIZE SetValueAllocator(Allocator* allocator) { + allocator_ = allocator; + return *this; + } + Allocator* GetValueAllocator(void) const { + return allocator_; + } +#endif +}; + +//! GenericValue with UTF8 encoding +typedef GenericValue > Value; + +/////////////////////////////////////////////////////////////////////////////// +// GenericDocument + +//! A document for parsing JSON text as DOM. +/*! + \note implements Handler concept + \tparam Encoding Encoding for both parsing and string storage. + \tparam Allocator Allocator for allocating memory for the DOM + \tparam StackAllocator Allocator for allocating memory for stack during parsing. + \warning Although GenericDocument inherits from GenericValue, the API does \b not provide any virtual functions, especially no virtual destructor. To avoid memory leaks, do not \c delete a GenericDocument object via a pointer to a GenericValue. +*/ +template , typename StackAllocator = CrtAllocator> +class GenericDocument : public GenericValue { +public: + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef GenericValue ValueType; //!< Value type of the document. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + + //! Constructor + /*! \param allocator Optional allocator for allocating memory. + \param stackCapacity Optional initial capacity of stack in bytes. + \param stackAllocator Optional allocator for allocating memory for stack. + */ +// NCBI: moved allocator_into GenericValue +// Allocator* allocator_; +#if 0 + GenericDocument(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : + allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_() + { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + } +#else + GenericDocument(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : + ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_() + { + if (!allocator) { + ownAllocator_ = allocator = RAPIDJSON_NEW(Allocator()); + } + ValueType::SetValueAllocator(allocator); + } +#endif + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move constructor in C++11 + GenericDocument(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT + : ValueType(std::move(rhs)), +// NCBI: moved allocator_into GenericValue +// allocator_(rhs.allocator_), + ownAllocator_(rhs.ownAllocator_), + stack_(std::move(rhs.stack_)), + parseResult_(rhs.parseResult_) + { +// NCBI: moved allocator_into GenericValue +ValueType::SetValueAllocator(rhs.GetValueAllocator()); +// rhs.allocator_ = 0; +rhs.ValueType::SetValueAllocator(0); + + rhs.ownAllocator_ = 0; + rhs.parseResult_ = ParseResult(); + } +#endif + + ~GenericDocument() { + Destroy(); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move assignment in C++11 + GenericDocument& operator=(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT + { + // The cast to ValueType is necessary here, because otherwise it would + // attempt to call GenericValue's templated assignment operator. + ValueType::operator=(std::forward(rhs)); + + // Calling the destructor here would prematurely call stack_'s destructor + Destroy(); + +// NCBI: moved allocator_into GenericValue +// allocator_ = rhs.allocator_; +ValueType::SetValueAllocator(rhs.ValueType::GetValueAllocator()); + + ownAllocator_ = rhs.ownAllocator_; + stack_ = std::move(rhs.stack_); + parseResult_ = rhs.parseResult_; + +// NCBI: moved allocator_into GenericValue +rhs.ValueType::SetValueAllocator(0); + + rhs.ownAllocator_ = 0; + rhs.parseResult_ = ParseResult(); + + return *this; + } +#endif + + //!@name Parse from stream + //!@{ + + //! Parse JSON text from an input stream (with Encoding conversion) + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam SourceEncoding Encoding of input stream + \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseStream(InputStream& is) { + ValueType::SetNull(); // Remove existing root if exist + GenericReader reader(&stack_.GetAllocator()); + ClearStackOnExit scope(*this); + parseResult_ = reader.template Parse(is, *this); + if (parseResult_) { + RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object + this->RawAssign(*stack_.template Pop(1)); // Add this-> to prevent issue 13. +//NCBI added allocator + ValueType::SetValueAllocator(ownAllocator_); + } + return *this; + } + + //! Parse JSON text from an input stream + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseStream(InputStream& is) { + return ParseStream(is); + } + + //! Parse JSON text from an input stream (with \ref kParseDefaultFlags) + /*! \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseStream(InputStream& is) { + return ParseStream(is); + } + //!@} + + //!@name Parse in-place from mutable string + //!@{ + + //! Parse JSON text from a mutable string + /*! \tparam parseFlags Combination of \ref ParseFlag. + \param str Mutable zero-terminated string to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseInsitu(Ch* str) { + GenericInsituStringStream s(str); + return ParseStream(s); + } + + //! Parse JSON text from a mutable string (with \ref kParseDefaultFlags) + /*! \param str Mutable zero-terminated string to be parsed. + \return The document itself for fluent API. + */ + GenericDocument& ParseInsitu(Ch* str) { + return ParseInsitu(str); + } + //!@} + + //!@name Parse from read-only string + //!@{ + + //! Parse JSON text from a read-only string (with Encoding conversion) + /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). + \tparam SourceEncoding Transcoding from input Encoding + \param str Read-only zero-terminated string to be parsed. + */ + template + GenericDocument& Parse(const Ch* str) { + RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); + GenericStringStream s(str); + return ParseStream(s); + } + + //! Parse JSON text from a read-only string + /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). + \param str Read-only zero-terminated string to be parsed. + */ + template + GenericDocument& Parse(const Ch* str) { + return Parse(str); + } + + //! Parse JSON text from a read-only string (with \ref kParseDefaultFlags) + /*! \param str Read-only zero-terminated string to be parsed. + */ + GenericDocument& Parse(const Ch* str) { + return Parse(str); + } + //!@} + + //!@name Handling parse errors + //!@{ + + //! Whether a parse error has occured in the last parsing. + bool HasParseError() const { return parseResult_.IsError(); } + + //! Get the \ref ParseErrorCode of last parsing. + ParseErrorCode GetParseError() const { return parseResult_.Code(); } + + //! Get the position of last parsing error in input, 0 otherwise. + size_t GetErrorOffset() const { return parseResult_.Offset(); } + + //!@} + + //! Get the allocator of this document. +// NCBI +#if 0 + Allocator& GetAllocator() { return *allocator_; } +#else + Allocator& GetAllocator() const { return *ValueType::GetValueAllocator(); } + Allocator* GetOwnAllocator() const { return ownAllocator_; } +#endif + + //! Get the capacity of stack in bytes. + size_t GetStackCapacity() const { return stack_.GetCapacity(); } + +private: + // clear stack on any exit from ParseStream, e.g. due to exception + struct ClearStackOnExit { + explicit ClearStackOnExit(GenericDocument& d) : d_(d) {} + ~ClearStackOnExit() { d_.ClearStack(); } + private: + ClearStackOnExit(const ClearStackOnExit&); + ClearStackOnExit& operator=(const ClearStackOnExit&); + GenericDocument& d_; + }; + + // callers of the following private Handler functions + template friend class GenericReader; // for parsing + template friend class GenericValue; // for deep copying + + // Implementation of Handler +//NCBI: added allocator + bool Null() { (new (stack_.template Push()) ValueType( ))->SetValueAllocator(&GetAllocator()); return true; } + bool Bool(bool b) { (new (stack_.template Push()) ValueType(b))->SetValueAllocator(&GetAllocator()); return true; } + bool Int(int i) { (new (stack_.template Push()) ValueType(i))->SetValueAllocator(&GetAllocator()); return true; } + bool Uint(unsigned i) { (new (stack_.template Push()) ValueType(i))->SetValueAllocator(&GetAllocator()); return true; } + bool Int64(int64_t i) { (new (stack_.template Push()) ValueType(i))->SetValueAllocator(&GetAllocator()); return true; } + bool Uint64(uint64_t i) { (new (stack_.template Push()) ValueType(i))->SetValueAllocator(&GetAllocator()); return true; } + bool Double(double d) { (new (stack_.template Push()) ValueType(d))->SetValueAllocator(&GetAllocator()); return true; } + + bool String(const Ch* str, SizeType length, bool copy) { + if (copy) + new (stack_.template Push()) ValueType(str, length, GetAllocator()); + else + new (stack_.template Push()) ValueType(str, length); + return true; + } + + bool StartObject() { (new (stack_.template Push()) ValueType(kObjectType))->SetValueAllocator(&GetAllocator()); return true; } + + bool Key(const Ch* str, SizeType length, bool copy) { return String(str, length, copy); } + + bool EndObject(SizeType memberCount) { + typename ValueType::Member* members = stack_.template Pop(memberCount); + stack_.template Top()->SetObjectRaw(members, (SizeType)memberCount, GetAllocator()); + return true; + } + + bool StartArray() { (new (stack_.template Push()) ValueType(kArrayType))->SetValueAllocator(&GetAllocator()); return true; } + + bool EndArray(SizeType elementCount) { + ValueType* elements = stack_.template Pop(elementCount); + stack_.template Top()->SetArrayRaw(elements, elementCount, GetAllocator()); + return true; + } + +private: + //! Prohibit copying + GenericDocument(const GenericDocument&); + //! Prohibit assignment + GenericDocument& operator=(const GenericDocument&); + + void ClearStack() { + if (Allocator::kNeedFree) + while (stack_.GetSize() > 0) // Here assumes all elements in stack array are GenericValue (Member is actually 2 GenericValue objects) + (stack_.template Pop(1))->~ValueType(); + else + stack_.Clear(); + stack_.ShrinkToFit(); + } + + void Destroy() { + if (ownAllocator_) { + RAPIDJSON_DELETE(ownAllocator_); + } + } + + static const size_t kDefaultStackCapacity = 1024; +// NCBI: moved allocator_into GenericValue +// Allocator* allocator_; + Allocator* ownAllocator_; + internal::Stack stack_; + ParseResult parseResult_; +}; + +//! GenericDocument with UTF8 encoding +typedef GenericDocument > Document; + +// defined here due to the dependency on GenericDocument +template +template +inline +GenericValue::GenericValue(const GenericValue& rhs, Allocator& allocator) +{ + switch (rhs.GetType()) { + case kObjectType: + case kArrayType: { // perform deep copy via SAX Handler + GenericDocument d(&allocator); + rhs.Accept(d); + RawAssign(*d.stack_.template Pop(1)); + } + break; + case kStringType: + if (rhs.flags_ == kConstStringFlag) { + flags_ = rhs.flags_; + data_ = *reinterpret_cast(&rhs.data_); + } else { + SetStringRaw(StringRef(rhs.GetString(), rhs.GetStringLength()), allocator); + } + break; + default: // kNumberType, kTrueType, kFalseType, kNullType + flags_ = rhs.flags_; + data_ = *reinterpret_cast(&rhs.data_); + } +} + +RAPIDJSON_NAMESPACE_END + +#if defined(_MSC_VER) || defined(__GNUC__) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_DOCUMENT_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/encodedstream.h b/c++/include/misc/jsonwrapp/rapidjson10/encodedstream.h new file mode 100644 index 00000000..7c8863fe --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/encodedstream.h @@ -0,0 +1,261 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ENCODEDSTREAM_H_ +#define RAPIDJSON_ENCODEDSTREAM_H_ + +#include "rapidjson.h" + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Input byte stream wrapper with a statically bound encoding. +/*! + \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. + \tparam InputByteStream Type of input byte stream. For example, FileReadStream. +*/ +template +class EncodedInputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); +public: + typedef typename Encoding::Ch Ch; + + EncodedInputStream(InputByteStream& is) : is_(is) { + current_ = Encoding::TakeBOM(is_); + } + + Ch Peek() const { return current_; } + Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; } + size_t Tell() const { return is_.Tell(); } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + EncodedInputStream(const EncodedInputStream&); + EncodedInputStream& operator=(const EncodedInputStream&); + + InputByteStream& is_; + Ch current_; +}; + +//! Output byte stream wrapper with statically bound encoding. +/*! + \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. + \tparam InputByteStream Type of input byte stream. For example, FileWriteStream. +*/ +template +class EncodedOutputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); +public: + typedef typename Encoding::Ch Ch; + + EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { + if (putBOM) + Encoding::PutBOM(os_); + } + + void Put(Ch c) { Encoding::Put(os_, c); } + void Flush() { os_.Flush(); } + + // Not implemented + Ch Peek() const { RAPIDJSON_ASSERT(false); } + Ch Take() { RAPIDJSON_ASSERT(false); } + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + EncodedOutputStream(const EncodedOutputStream&); + EncodedOutputStream& operator=(const EncodedOutputStream&); + + OutputByteStream& os_; +}; + +#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8::x, UTF16LE::x, UTF16BE::x, UTF32LE::x, UTF32BE::x + +//! Input stream wrapper with dynamically bound encoding and automatic encoding detection. +/*! + \tparam CharType Type of character for reading. + \tparam InputByteStream type of input byte stream to be wrapped. +*/ +template +class AutoUTFInputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); +public: + typedef CharType Ch; + + //! Constructor. + /*! + \param is input stream to be wrapped. + \param type UTF encoding type if it is not detected from the stream. + */ + AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) { + RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); + DetectType(); + static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) }; + takeFunc_ = f[type_]; + current_ = takeFunc_(*is_); + } + + UTFType GetType() const { return type_; } + bool HasBOM() const { return hasBOM_; } + + Ch Peek() const { return current_; } + Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; } + size_t Tell() const { return is_->Tell(); } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + AutoUTFInputStream(const AutoUTFInputStream&); + AutoUTFInputStream& operator=(const AutoUTFInputStream&); + + // Detect encoding type with BOM or RFC 4627 + void DetectType() { + // BOM (Byte Order Mark): + // 00 00 FE FF UTF-32BE + // FF FE 00 00 UTF-32LE + // FE FF UTF-16BE + // FF FE UTF-16LE + // EF BB BF UTF-8 + + const unsigned char* c = (const unsigned char *)is_->Peek4(); + if (!c) + return; + + unsigned bom = c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24); + hasBOM_ = false; + if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } + else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } + else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); } + else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); } + else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); } + + // RFC 4627: Section 3 + // "Since the first two characters of a JSON text will always be ASCII + // characters [RFC0020], it is possible to determine whether an octet + // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking + // at the pattern of nulls in the first four octets." + // 00 00 00 xx UTF-32BE + // 00 xx 00 xx UTF-16BE + // xx 00 00 00 UTF-32LE + // xx 00 xx 00 UTF-16LE + // xx xx xx xx UTF-8 + + if (!hasBOM_) { + unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0); + switch (pattern) { + case 0x08: type_ = kUTF32BE; break; + case 0x0A: type_ = kUTF16BE; break; + case 0x01: type_ = kUTF32LE; break; + case 0x05: type_ = kUTF16LE; break; + case 0x0F: type_ = kUTF8; break; + default: break; // Use type defined by user. + } + } + + // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. + if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); + if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); + } + + typedef Ch (*TakeFunc)(InputByteStream& is); + InputByteStream* is_; + UTFType type_; + Ch current_; + TakeFunc takeFunc_; + bool hasBOM_; +}; + +//! Output stream wrapper with dynamically bound encoding and automatic encoding detection. +/*! + \tparam CharType Type of character for writing. + \tparam InputByteStream type of output byte stream to be wrapped. +*/ +template +class AutoUTFOutputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); +public: + typedef CharType Ch; + + //! Constructor. + /*! + \param os output stream to be wrapped. + \param type UTF encoding type. + \param putBOM Whether to write BOM at the beginning of the stream. + */ + AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) { + RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); + + // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. + if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); + if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); + + static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) }; + putFunc_ = f[type_]; + + if (putBOM) + PutBOM(); + } + + UTFType GetType() const { return type_; } + + void Put(Ch c) { putFunc_(*os_, c); } + void Flush() { os_->Flush(); } + + // Not implemented + Ch Peek() const { RAPIDJSON_ASSERT(false); } + Ch Take() { RAPIDJSON_ASSERT(false); } + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + AutoUTFOutputStream(const AutoUTFOutputStream&); + AutoUTFOutputStream& operator=(const AutoUTFOutputStream&); + + void PutBOM() { + typedef void (*PutBOMFunc)(OutputByteStream&); + static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) }; + f[type_](*os_); + } + + typedef void (*PutFunc)(OutputByteStream&, Ch); + + OutputByteStream* os_; + UTFType type_; + PutFunc putFunc_; +}; + +#undef RAPIDJSON_ENCODINGS_FUNC + +RAPIDJSON_NAMESPACE_END + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/encodings.h b/c++/include/misc/jsonwrapp/rapidjson10/encodings.h new file mode 100644 index 00000000..90b46ed3 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/encodings.h @@ -0,0 +1,625 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ENCODINGS_H_ +#define RAPIDJSON_ENCODINGS_H_ + +#include "rapidjson.h" + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data +RAPIDJSON_DIAG_OFF(4702) // unreachable code +#elif defined(__GNUC__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +RAPIDJSON_DIAG_OFF(overflow) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Encoding + +/*! \class rapidjson::Encoding + \brief Concept for encoding of Unicode characters. + +\code +concept Encoding { + typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition. + + enum { supportUnicode = 1 }; // or 0 if not supporting unicode + + //! \brief Encode a Unicode codepoint to an output stream. + //! \param os Output stream. + //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. + template + static void Encode(OutputStream& os, unsigned codepoint); + + //! \brief Decode a Unicode codepoint from an input stream. + //! \param is Input stream. + //! \param codepoint Output of the unicode codepoint. + //! \return true if a valid codepoint can be decoded from the stream. + template + static bool Decode(InputStream& is, unsigned* codepoint); + + //! \brief Validate one Unicode codepoint from an encoded stream. + //! \param is Input stream to obtain codepoint. + //! \param os Output for copying one codepoint. + //! \return true if it is valid. + //! \note This function just validating and copying the codepoint without actually decode it. + template + static bool Validate(InputStream& is, OutputStream& os); + + // The following functions are deal with byte streams. + + //! Take a character from input byte stream, skip BOM if exist. + template + static CharType TakeBOM(InputByteStream& is); + + //! Take a character from input byte stream. + template + static Ch Take(InputByteStream& is); + + //! Put BOM to output byte stream. + template + static void PutBOM(OutputByteStream& os); + + //! Put a character to output byte stream. + template + static void Put(OutputByteStream& os, Ch c); +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// UTF8 + +//! UTF-8 encoding. +/*! http://en.wikipedia.org/wiki/UTF-8 + http://tools.ietf.org/html/rfc3629 + \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char. + \note implements Encoding concept +*/ +template +struct UTF8 { + typedef CharType Ch; + + enum { supportUnicode = 1 }; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + if (codepoint <= 0x7F) + os.Put(static_cast(codepoint & 0xFF)); + else if (codepoint <= 0x7FF) { + os.Put(static_cast(0xC0 | ((codepoint >> 6) & 0xFF))); + os.Put(static_cast(0x80 | ((codepoint & 0x3F)))); + } + else if (codepoint <= 0xFFFF) { + os.Put(static_cast(0xE0 | ((codepoint >> 12) & 0xFF))); + os.Put(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + os.Put(static_cast(0x80 | (codepoint & 0x3F))); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + os.Put(static_cast(0xF0 | ((codepoint >> 18) & 0xFF))); + os.Put(static_cast(0x80 | ((codepoint >> 12) & 0x3F))); + os.Put(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + os.Put(static_cast(0x80 | (codepoint & 0x3F))); + } + } + + template + static bool Decode(InputStream& is, unsigned* codepoint) { +#define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu) +#define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0) +#define TAIL() COPY(); TRANS(0x70) + Ch c = is.Take(); + if (!(c & 0x80)) { + *codepoint = (unsigned char)c; + return true; + } + + unsigned char type = GetRange((unsigned char)c); + *codepoint = (0xFF >> type) & (unsigned char)c; + bool result = true; + switch (type) { + case 2: TAIL(); return result; + case 3: TAIL(); TAIL(); return result; + case 4: COPY(); TRANS(0x50); TAIL(); return result; + case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; + case 6: TAIL(); TAIL(); TAIL(); return result; + case 10: COPY(); TRANS(0x20); TAIL(); return result; + case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; + default: return false; + } +#undef COPY +#undef TRANS +#undef TAIL + } + + template + static bool Validate(InputStream& is, OutputStream& os) { +#define COPY() os.Put(c = is.Take()) +#define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0) +#define TAIL() COPY(); TRANS(0x70) + Ch c; + COPY(); + if (!(c & 0x80)) + return true; + + bool result = true; + switch (GetRange((unsigned char)c)) { + case 2: TAIL(); return result; + case 3: TAIL(); TAIL(); return result; + case 4: COPY(); TRANS(0x50); TAIL(); return result; + case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; + case 6: TAIL(); TAIL(); TAIL(); return result; + case 10: COPY(); TRANS(0x20); TAIL(); return result; + case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; + default: return false; + } +#undef COPY +#undef TRANS +#undef TAIL + } + + static unsigned char GetRange(unsigned char c) { + // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. + static const unsigned char type[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, + 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, + 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, + 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + }; + return type[c]; + } + + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + Ch c = Take(is); + if ((unsigned char)c != 0xEFu) return c; + c = is.Take(); + if ((unsigned char)c != 0xBBu) return c; + c = is.Take(); + if ((unsigned char)c != 0xBFu) return c; + c = is.Take(); + return c; + } + + template + static Ch Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + return is.Take(); + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu); + } + + template + static void Put(OutputByteStream& os, Ch c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(c)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF16 + +//! UTF-16 encoding. +/*! http://en.wikipedia.org/wiki/UTF-16 + http://tools.ietf.org/html/rfc2781 + \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. + \note implements Encoding concept + + \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. + For streaming, use UTF16LE and UTF16BE, which handle endianness. +*/ +template +struct UTF16 { + typedef CharType Ch; + RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); + + enum { supportUnicode = 1 }; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + if (codepoint <= 0xFFFF) { + RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair + os.Put(static_cast(codepoint)); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + unsigned v = codepoint - 0x10000; + os.Put(static_cast((v >> 10) | 0xD800)); + os.Put((v & 0x3FF) | 0xDC00); + } + } + + template + static bool Decode(InputStream& is, unsigned* codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); + Ch c = is.Take(); + if (c < 0xD800 || c > 0xDFFF) { + *codepoint = c; + return true; + } + else if (c <= 0xDBFF) { + *codepoint = (c & 0x3FF) << 10; + c = is.Take(); + *codepoint |= (c & 0x3FF); + *codepoint += 0x10000; + return c >= 0xDC00 && c <= 0xDFFF; + } + return false; + } + + template + static bool Validate(InputStream& is, OutputStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + Ch c; + os.Put(c = is.Take()); + if (c < 0xD800 || c > 0xDFFF) + return true; + else if (c <= 0xDBFF) { + os.Put(c = is.Take()); + return c >= 0xDC00 && c <= 0xDFFF; + } + return false; + } +}; + +//! UTF-16 little endian encoding. +template +struct UTF16LE : UTF16 { + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return (unsigned short)c == 0xFEFFu ? Take(is) : c; + } + + template + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = (unsigned char)is.Take(); + c |= (unsigned char)is.Take() << 8; + return c; + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(0xFFu); os.Put(0xFEu); + } + + template + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(c & 0xFFu); + os.Put((c >> 8) & 0xFFu); + } +}; + +//! UTF-16 big endian encoding. +template +struct UTF16BE : UTF16 { + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return (unsigned short)c == 0xFEFFu ? Take(is) : c; + } + + template + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = (unsigned char)is.Take() << 8; + c |= (unsigned char)is.Take(); + return c; + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(0xFEu); os.Put(0xFFu); + } + + template + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put((c >> 8) & 0xFFu); + os.Put(c & 0xFFu); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF32 + +//! UTF-32 encoding. +/*! http://en.wikipedia.org/wiki/UTF-32 + \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. + \note implements Encoding concept + + \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. + For streaming, use UTF32LE and UTF32BE, which handle endianness. +*/ +template +struct UTF32 { + typedef CharType Ch; + RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); + + enum { supportUnicode = 1 }; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + os.Put(codepoint); + } + + template + static bool Decode(InputStream& is, unsigned* codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); + Ch c = is.Take(); + *codepoint = c; + return c <= 0x10FFFF; + } + + template + static bool Validate(InputStream& is, OutputStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); + Ch c; + os.Put(c = is.Take()); + return c <= 0x10FFFF; + } +}; + +//! UTF-32 little endian enocoding. +template +struct UTF32LE : UTF32 { + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return (unsigned)c == 0x0000FEFFu ? Take(is) : c; + } + + template + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = (unsigned char)is.Take(); + c |= (unsigned char)is.Take() << 8; + c |= (unsigned char)is.Take() << 16; + c |= (unsigned char)is.Take() << 24; + return c; + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(0xFFu); os.Put(0xFEu); os.Put(0x00u); os.Put(0x00u); + } + + template + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(c & 0xFFu); + os.Put((c >> 8) & 0xFFu); + os.Put((c >> 16) & 0xFFu); + os.Put((c >> 24) & 0xFFu); + } +}; + +//! UTF-32 big endian encoding. +template +struct UTF32BE : UTF32 { + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return (unsigned)c == 0x0000FEFFu ? Take(is) : c; + } + + template + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = (unsigned char)is.Take() << 24; + c |= (unsigned char)is.Take() << 16; + c |= (unsigned char)is.Take() << 8; + c |= (unsigned char)is.Take(); + return c; + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(0x00u); os.Put(0x00u); os.Put(0xFEu); os.Put(0xFFu); + } + + template + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put((c >> 24) & 0xFFu); + os.Put((c >> 16) & 0xFFu); + os.Put((c >> 8) & 0xFFu); + os.Put(c & 0xFFu); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// ASCII + +//! ASCII encoding. +/*! http://en.wikipedia.org/wiki/ASCII + \tparam CharType Code unit for storing 7-bit ASCII data. Default is char. + \note implements Encoding concept +*/ +template +struct ASCII { + typedef CharType Ch; + + enum { supportUnicode = 0 }; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x7F); + os.Put(static_cast(codepoint & 0xFF)); + } + + template + static bool Decode(InputStream& is, unsigned* codepoint) { + unsigned char c = static_cast(is.Take()); + *codepoint = c; + return c <= 0X7F; + } + + template + static bool Validate(InputStream& is, OutputStream& os) { + unsigned char c = is.Take(); + os.Put(c); + return c <= 0x7F; + } + + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + Ch c = Take(is); + return c; + } + + template + static Ch Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + return is.Take(); + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + (void)os; + } + + template + static void Put(OutputByteStream& os, Ch c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(c)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// AutoUTF + +//! Runtime-specified UTF encoding type of a stream. +enum UTFType { + kUTF8 = 0, //!< UTF-8. + kUTF16LE = 1, //!< UTF-16 little endian. + kUTF16BE = 2, //!< UTF-16 big endian. + kUTF32LE = 3, //!< UTF-32 little endian. + kUTF32BE = 4 //!< UTF-32 big endian. +}; + +//! Dynamically select encoding according to stream's runtime-specified UTF encoding type. +/*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType(). +*/ +template +struct AutoUTF { + typedef CharType Ch; + + enum { supportUnicode = 1 }; + +#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8::x, UTF16LE::x, UTF16BE::x, UTF32LE::x, UTF32BE::x + + template + RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) { + typedef void (*EncodeFunc)(OutputStream&, unsigned); + static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) }; + (*f[os.GetType()])(os, codepoint); + } + + template + RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { + typedef bool (*DecodeFunc)(InputStream&, unsigned*); + static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) }; + return (*f[is.GetType()])(is, codepoint); + } + + template + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + typedef bool (*ValidateFunc)(InputStream&, OutputStream&); + static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) }; + return (*f[is.GetType()])(is, os); + } + +#undef RAPIDJSON_ENCODINGS_FUNC +}; + +/////////////////////////////////////////////////////////////////////////////// +// Transcoder + +//! Encoding conversion. +template +struct Transcoder { + //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream. + template + RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { + unsigned codepoint; + if (!SourceEncoding::Decode(is, &codepoint)) + return false; + TargetEncoding::Encode(os, codepoint); + return true; + } + + //! Validate one Unicode codepoint from an encoded stream. + template + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + return Transcode(is, os); // Since source/target encoding is different, must transcode. + } +}; + +//! Specialization of Transcoder with same source and target encoding. +template +struct Transcoder { + template + RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { + os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class. + return true; + } + + template + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + return Encoding::Validate(is, os); // source/target encoding are the same + } +}; + +RAPIDJSON_NAMESPACE_END + +#if defined(__GNUC__) || defined(_MSV_VER) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_ENCODINGS_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/error/en.h b/c++/include/misc/jsonwrapp/rapidjson10/error/en.h new file mode 100644 index 00000000..d5f9caab --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/error/en.h @@ -0,0 +1,65 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ERROR_EN_H__ +#define RAPIDJSON_ERROR_EN_H__ + +#include "error.h" + +RAPIDJSON_NAMESPACE_BEGIN + +//! Maps error code of parsing into error message. +/*! + \ingroup RAPIDJSON_ERRORS + \param parseErrorCode Error code obtained in parsing. + \return the error message. + \note User can make a copy of this function for localization. + Using switch-case is safer for future modification of error codes. +*/ +inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErrorCode) { + switch (parseErrorCode) { + case kParseErrorNone: return RAPIDJSON_ERROR_STRING("No error."); + + case kParseErrorDocumentEmpty: return RAPIDJSON_ERROR_STRING("The document is empty."); + case kParseErrorDocumentRootNotSingular: return RAPIDJSON_ERROR_STRING("The document root must not follow by other values."); + + case kParseErrorValueInvalid: return RAPIDJSON_ERROR_STRING("Invalid value."); + + case kParseErrorObjectMissName: return RAPIDJSON_ERROR_STRING("Missing a name for object member."); + case kParseErrorObjectMissColon: return RAPIDJSON_ERROR_STRING("Missing a colon after a name of object member."); + case kParseErrorObjectMissCommaOrCurlyBracket: return RAPIDJSON_ERROR_STRING("Missing a comma or '}' after an object member."); + + case kParseErrorArrayMissCommaOrSquareBracket: return RAPIDJSON_ERROR_STRING("Missing a comma or ']' after an array element."); + + case kParseErrorStringUnicodeEscapeInvalidHex: return RAPIDJSON_ERROR_STRING("Incorrect hex digit after \\u escape in string."); + case kParseErrorStringUnicodeSurrogateInvalid: return RAPIDJSON_ERROR_STRING("The surrogate pair in string is invalid."); + case kParseErrorStringEscapeInvalid: return RAPIDJSON_ERROR_STRING("Invalid escape character in string."); + case kParseErrorStringMissQuotationMark: return RAPIDJSON_ERROR_STRING("Missing a closing quotation mark in string."); + case kParseErrorStringInvalidEncoding: return RAPIDJSON_ERROR_STRING("Invalid encoding in string."); + + case kParseErrorNumberTooBig: return RAPIDJSON_ERROR_STRING("Number too big to be stored in double."); + case kParseErrorNumberMissFraction: return RAPIDJSON_ERROR_STRING("Miss fraction part in number."); + case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number."); + + case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error."); + case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error."); + + default: + return RAPIDJSON_ERROR_STRING("Unknown error."); + } +} + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_ERROR_EN_H__ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/error/error.h b/c++/include/misc/jsonwrapp/rapidjson10/error/error.h new file mode 100644 index 00000000..f9094fb9 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/error/error.h @@ -0,0 +1,146 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ERROR_ERROR_H__ +#define RAPIDJSON_ERROR_ERROR_H__ + +#include "../rapidjson.h" + +/*! \file error.h */ + +/*! \defgroup RAPIDJSON_ERRORS RapidJSON error handling */ + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ERROR_CHARTYPE + +//! Character type of error messages. +/*! \ingroup RAPIDJSON_ERRORS + The default character type is \c char. + On Windows, user can define this macro as \c TCHAR for supporting both + unicode/non-unicode settings. +*/ +#ifndef RAPIDJSON_ERROR_CHARTYPE +#define RAPIDJSON_ERROR_CHARTYPE char +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ERROR_STRING + +//! Macro for converting string literial to \ref RAPIDJSON_ERROR_CHARTYPE[]. +/*! \ingroup RAPIDJSON_ERRORS + By default this conversion macro does nothing. + On Windows, user can define this macro as \c _T(x) for supporting both + unicode/non-unicode settings. +*/ +#ifndef RAPIDJSON_ERROR_STRING +#define RAPIDJSON_ERROR_STRING(x) x +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// ParseErrorCode + +//! Error code of parsing. +/*! \ingroup RAPIDJSON_ERRORS + \see GenericReader::Parse, GenericReader::GetParseErrorCode +*/ +enum ParseErrorCode { + kParseErrorNone = 0, //!< No error. + + kParseErrorDocumentEmpty, //!< The document is empty. + kParseErrorDocumentRootNotSingular, //!< The document root must not follow by other values. + + kParseErrorValueInvalid, //!< Invalid value. + + kParseErrorObjectMissName, //!< Missing a name for object member. + kParseErrorObjectMissColon, //!< Missing a colon after a name of object member. + kParseErrorObjectMissCommaOrCurlyBracket, //!< Missing a comma or '}' after an object member. + + kParseErrorArrayMissCommaOrSquareBracket, //!< Missing a comma or ']' after an array element. + + kParseErrorStringUnicodeEscapeInvalidHex, //!< Incorrect hex digit after \\u escape in string. + kParseErrorStringUnicodeSurrogateInvalid, //!< The surrogate pair in string is invalid. + kParseErrorStringEscapeInvalid, //!< Invalid escape character in string. + kParseErrorStringMissQuotationMark, //!< Missing a closing quotation mark in string. + kParseErrorStringInvalidEncoding, //!< Invalid encoding in string. + + kParseErrorNumberTooBig, //!< Number too big to be stored in double. + kParseErrorNumberMissFraction, //!< Miss fraction part in number. + kParseErrorNumberMissExponent, //!< Miss exponent in number. + + kParseErrorTermination, //!< Parsing was terminated. + kParseErrorUnspecificSyntaxError //!< Unspecific syntax error. +}; + +//! Result of parsing (wraps ParseErrorCode) +/*! + \ingroup RAPIDJSON_ERRORS + \code + Document doc; + ParseResult ok = doc.Parse("[42]"); + if (!ok) { + fprintf(stderr, "JSON parse error: %s (%u)", + GetParseError_En(ok.Code()), ok.Offset()); + exit(EXIT_FAILURE); + } + \endcode + \see GenericReader::Parse, GenericDocument::Parse +*/ +struct ParseResult { + + //! Default constructor, no error. + ParseResult() : code_(kParseErrorNone), offset_(0) {} + //! Constructor to set an error. + ParseResult(ParseErrorCode code, size_t offset) : code_(code), offset_(offset) {} + + //! Get the error code. + ParseErrorCode Code() const { return code_; } + //! Get the error offset, if \ref IsError(), 0 otherwise. + size_t Offset() const { return offset_; } + + //! Conversion to \c bool, returns \c true, iff !\ref IsError(). + operator bool() const { return !IsError(); } + //! Whether the result is an error. + bool IsError() const { return code_ != kParseErrorNone; } + + bool operator==(const ParseResult& that) const { return code_ == that.code_; } + bool operator==(ParseErrorCode code) const { return code_ == code; } + friend bool operator==(ParseErrorCode code, const ParseResult & err) { return code == err.code_; } + + //! Reset error code. + void Clear() { Set(kParseErrorNone); } + //! Update error code and offset. + void Set(ParseErrorCode code, size_t offset = 0) { code_ = code; offset_ = offset; } + +private: + ParseErrorCode code_; + size_t offset_; +}; + +//! Function pointer type of GetParseError(). +/*! \ingroup RAPIDJSON_ERRORS + + This is the prototype for \c GetParseError_X(), where \c X is a locale. + User can dynamically change locale in runtime, e.g.: +\code + GetParseErrorFunc GetParseError = GetParseError_En; // or whatever + const RAPIDJSON_ERROR_CHARTYPE* s = GetParseError(document.GetParseErrorCode()); +\endcode +*/ +typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetParseErrorFunc)(ParseErrorCode); + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_ERROR_ERROR_H__ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/filereadstream.h b/c++/include/misc/jsonwrapp/rapidjson10/filereadstream.h new file mode 100644 index 00000000..ecc8055b --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/filereadstream.h @@ -0,0 +1,136 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_FILEREADSTREAM_H_ +#define RAPIDJSON_FILEREADSTREAM_H_ + +#include "rapidjson.h" +#include + +RAPIDJSON_NAMESPACE_BEGIN + +//! File byte stream for input using fread(). +/*! + \note implements Stream concept +*/ +class FileReadStream { +public: + typedef char Ch; //!< Character type (byte). + + //! Constructor. + /*! + \param fp File pointer opened for read. + \param buffer user-supplied buffer. + \param bufferSize size of buffer in bytes. Must >=4 bytes. + */ + FileReadStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) { + RAPIDJSON_ASSERT(fp_ != 0); + RAPIDJSON_ASSERT(bufferSize >= 4); + Read(); + } + + Ch Peek() const { return *current_; } + Ch Take() { Ch c = *current_; Read(); return c; } + size_t Tell() const { return count_ + static_cast(current_ - buffer_); } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + // For encoding detection only. + const Ch* Peek4() const { + return (current_ + 4 <= bufferLast_) ? current_ : 0; + } + +private: + void Read() { + if (current_ < bufferLast_) + ++current_; + else if (!eof_) { + count_ += readCount_; + readCount_ = fread(buffer_, 1, bufferSize_, fp_); + bufferLast_ = buffer_ + readCount_ - 1; + current_ = buffer_; + + if (readCount_ < bufferSize_) { + buffer_[readCount_] = '\0'; + ++bufferLast_; + eof_ = true; + } + } + } + + std::FILE* fp_; + Ch *buffer_; + size_t bufferSize_; + Ch *bufferLast_; + Ch *current_; + size_t readCount_; + size_t count_; //!< Number of characters read + bool eof_; +}; + +//NCBI: added CppIStream +class CppIStream { +public: + typedef char Ch; //!< Character type (byte). + + CppIStream(std::istream& in) : in_(&in), count_(0) { + } + CppIStream& operator=(CppIStream& i) { + in_ = i.in_; + current_ = i.current_; + count_ = i.count_; + return *this; + } + + Ch Peek() const { + char c = in_->peek(); + if (c == std::istream::traits_type::eof()) { c='\0';} + return c; + } + Ch Take() { + char c = in_->get(); + return c; + } + size_t Tell() const { + return (size_t)(in_->gcount()); + } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + void Read() { + current_ = (Ch)in_->get(); + if (!in_->fail()) { + count_++; + } + else { + current_ = '\0'; + } + } + std::istream* in_; + Ch current_; + size_t count_; +}; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/filewritestream.h b/c++/include/misc/jsonwrapp/rapidjson10/filewritestream.h new file mode 100644 index 00000000..48fc9037 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/filewritestream.h @@ -0,0 +1,128 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_FILEWRITESTREAM_H_ +#define RAPIDJSON_FILEWRITESTREAM_H_ + +#include "rapidjson.h" +#include + +RAPIDJSON_NAMESPACE_BEGIN + +//! Wrapper of C file stream for input using fread(). +/*! + \note implements Stream concept +*/ +class FileWriteStream { +public: + typedef char Ch; //!< Character type. Only support char. + + FileWriteStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferEnd_(buffer + bufferSize), current_(buffer_) { + RAPIDJSON_ASSERT(fp_ != 0); + } + + void Put(char c) { + if (current_ >= bufferEnd_) + Flush(); + + *current_++ = c; + } + + void PutN(char c, size_t n) { + size_t avail = static_cast(bufferEnd_ - current_); + while (n > avail) { + std::memset(current_, c, avail); + current_ += avail; + Flush(); + n -= avail; + avail = static_cast(bufferEnd_ - current_); + } + + if (n > 0) { + std::memset(current_, c, n); + current_ += n; + } + } + + void Flush() { + if (current_ != buffer_) { + fwrite(buffer_, 1, static_cast(current_ - buffer_), fp_); + current_ = buffer_; + } + } + + // Not implemented + char Peek() const { RAPIDJSON_ASSERT(false); return 0; } + char Take() { RAPIDJSON_ASSERT(false); return 0; } + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + // Prohibit copy constructor & assignment operator. + FileWriteStream(const FileWriteStream&); + FileWriteStream& operator=(const FileWriteStream&); + + std::FILE* fp_; + char *buffer_; + char *bufferEnd_; + char *current_; +}; + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(FileWriteStream& stream, char c, size_t n) { + stream.PutN(c, n); +} + +//NCBI: added CppOStream + +class CppOStream { +public: + typedef char Ch; //!< Character type. Only support char. + + CppOStream(std::ostream& out) : out_(&out), count_(0) { + Read(); + } + + void Put(Ch c) { + out_->put(c); + count_++; + } + void PutN(Ch c, size_t n) { + while(n--) { + Put(c); + } + } + void Flush() { + out_->flush(); + } + + // Not implemented + Ch Peek() const { RAPIDJSON_ASSERT(false); return 0; } + Ch Take() { RAPIDJSON_ASSERT(false); return 0; } + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + void Read() { + } + std::ostream* out_; + size_t count_; +}; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/internal/biginteger.h b/c++/include/misc/jsonwrapp/rapidjson10/internal/biginteger.h new file mode 100644 index 00000000..99a30acf --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/internal/biginteger.h @@ -0,0 +1,280 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_BIGINTEGER_H_ +#define RAPIDJSON_BIGINTEGER_H_ + +#include "../rapidjson.h" + +#if defined(_MSC_VER) && defined(_M_AMD64) +#include // for _umul128 +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +class BigInteger { +public: + typedef uint64_t Type; + + BigInteger(const BigInteger& rhs) : count_(rhs.count_) { + std::memcpy(digits_, rhs.digits_, count_ * sizeof(Type)); + } + + explicit BigInteger(uint64_t u) : count_(1) { + digits_[0] = u; + } + + BigInteger(const char* decimals, size_t length) : count_(1) { + RAPIDJSON_ASSERT(length > 0); + digits_[0] = 0; + size_t i = 0; + const size_t kMaxDigitPerIteration = 19; // 2^64 = 18446744073709551616 > 10^19 + while (length >= kMaxDigitPerIteration) { + AppendDecimal64(decimals + i, decimals + i + kMaxDigitPerIteration); + length -= kMaxDigitPerIteration; + i += kMaxDigitPerIteration; + } + + if (length > 0) + AppendDecimal64(decimals + i, decimals + i + length); + } + + BigInteger& operator=(uint64_t u) { + digits_[0] = u; + count_ = 1; + return *this; + } + + BigInteger& operator+=(uint64_t u) { + Type backup = digits_[0]; + digits_[0] += u; + for (size_t i = 0; i < count_ - 1; i++) { + if (digits_[i] >= backup) + return *this; // no carry + backup = digits_[i + 1]; + digits_[i + 1] += 1; + } + + // Last carry + if (digits_[count_ - 1] < backup) + PushBack(1); + + return *this; + } + + BigInteger& operator*=(uint64_t u) { + if (u == 0) return *this = 0; + if (u == 1) return *this; + if (*this == 1) return *this = u; + + uint64_t k = 0; + for (size_t i = 0; i < count_; i++) { + uint64_t hi; + digits_[i] = MulAdd64(digits_[i], u, k, &hi); + k = hi; + } + + if (k > 0) + PushBack(k); + + return *this; + } + + BigInteger& operator*=(uint32_t u) { + if (u == 0) return *this = 0; + if (u == 1) return *this; + if (*this == 1) return *this = u; + + uint64_t k = 0; + for (size_t i = 0; i < count_; i++) { + const uint64_t c = digits_[i] >> 32; + const uint64_t d = digits_[i] & 0xFFFFFFFF; + const uint64_t uc = u * c; + const uint64_t ud = u * d; + const uint64_t p0 = ud + k; + const uint64_t p1 = uc + (p0 >> 32); + digits_[i] = (p0 & 0xFFFFFFFF) | (p1 << 32); + k = p1 >> 32; + } + + if (k > 0) + PushBack(k); + + return *this; + } + + BigInteger& operator<<=(size_t shift) { + if (IsZero() || shift == 0) return *this; + + size_t offset = shift / kTypeBit; + size_t interShift = shift % kTypeBit; + RAPIDJSON_ASSERT(count_ + offset <= kCapacity); + + if (interShift == 0) { + std::memmove(&digits_[count_ - 1 + offset], &digits_[count_ - 1], count_ * sizeof(Type)); + count_ += offset; + } + else { + digits_[count_] = 0; + for (size_t i = count_; i > 0; i--) + digits_[i + offset] = (digits_[i] << interShift) | (digits_[i - 1] >> (kTypeBit - interShift)); + digits_[offset] = digits_[0] << interShift; + count_ += offset; + if (digits_[count_]) + count_++; + } + + std::memset(digits_, 0, offset * sizeof(Type)); + + return *this; + } + + bool operator==(const BigInteger& rhs) const { + return count_ == rhs.count_ && std::memcmp(digits_, rhs.digits_, count_ * sizeof(Type)) == 0; + } + + bool operator==(const Type rhs) const { + return count_ == 1 && digits_[0] == rhs; + } + + BigInteger& MultiplyPow5(unsigned exp) { + static const uint32_t kPow5[12] = { + 5, + 5 * 5, + 5 * 5 * 5, + 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 + }; + if (exp == 0) return *this; + for (; exp >= 27; exp -= 27) *this *= RAPIDJSON_UINT64_C2(0X6765C793, 0XFA10079D); // 5^27 + for (; exp >= 13; exp -= 13) *this *= static_cast(1220703125u); // 5^13 + if (exp > 0) *this *= kPow5[exp - 1]; + return *this; + } + + // Compute absolute difference of this and rhs. + // Assume this != rhs + bool Difference(const BigInteger& rhs, BigInteger* out) const { + int cmp = Compare(rhs); + RAPIDJSON_ASSERT(cmp != 0); + const BigInteger *a, *b; // Makes a > b + bool ret; + if (cmp < 0) { a = &rhs; b = this; ret = true; } + else { a = this; b = &rhs; ret = false; } + + Type borrow = 0; + for (size_t i = 0; i < a->count_; i++) { + Type d = a->digits_[i] - borrow; + if (i < b->count_) + d -= b->digits_[i]; + borrow = (d > a->digits_[i]) ? 1 : 0; + out->digits_[i] = d; + if (d != 0) + out->count_ = i + 1; + } + + return ret; + } + + int Compare(const BigInteger& rhs) const { + if (count_ != rhs.count_) + return count_ < rhs.count_ ? -1 : 1; + + for (size_t i = count_; i-- > 0;) + if (digits_[i] != rhs.digits_[i]) + return digits_[i] < rhs.digits_[i] ? -1 : 1; + + return 0; + } + + size_t GetCount() const { return count_; } + Type GetDigit(size_t index) const { RAPIDJSON_ASSERT(index < count_); return digits_[index]; } + bool IsZero() const { return count_ == 1 && digits_[0] == 0; } + +private: + void AppendDecimal64(const char* begin, const char* end) { + uint64_t u = ParseUint64(begin, end); + if (IsZero()) + *this = u; + else { + unsigned exp = static_cast(end - begin); + (MultiplyPow5(exp) <<= exp) += u; // *this = *this * 10^exp + u + } + } + + void PushBack(Type digit) { + RAPIDJSON_ASSERT(count_ < kCapacity); + digits_[count_++] = digit; + } + + static uint64_t ParseUint64(const char* begin, const char* end) { + uint64_t r = 0; + for (const char* p = begin; p != end; ++p) { + RAPIDJSON_ASSERT(*p >= '0' && *p <= '9'); + r = r * 10 + (*p - '0'); + } + return r; + } + + // Assume a * b + k < 2^128 + static uint64_t MulAdd64(uint64_t a, uint64_t b, uint64_t k, uint64_t* outHigh) { +#if defined(_MSC_VER) && defined(_M_AMD64) + uint64_t low = _umul128(a, b, outHigh) + k; + if (low < k) + (*outHigh)++; + return low; +#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) + __extension__ typedef unsigned __int128 uint128; + uint128 p = static_cast(a) * static_cast(b); + p += k; + *outHigh = static_cast(p >> 64); + return static_cast(p); +#else + const uint64_t a0 = a & 0xFFFFFFFF, a1 = a >> 32, b0 = b & 0xFFFFFFFF, b1 = b >> 32; + uint64_t x0 = a0 * b0, x1 = a0 * b1, x2 = a1 * b0, x3 = a1 * b1; + x1 += (x0 >> 32); // can't give carry + x1 += x2; + if (x1 < x2) + x3 += (static_cast(1) << 32); + uint64_t lo = (x1 << 32) + (x0 & 0xFFFFFFFF); + uint64_t hi = x3 + (x1 >> 32); + + lo += k; + if (lo < k) + hi++; + *outHigh = hi; + return lo; +#endif + } + + static const size_t kBitCount = 3328; // 64bit * 54 > 10^1000 + static const size_t kCapacity = kBitCount / sizeof(Type); + static const size_t kTypeBit = sizeof(Type) * 8; + + Type digits_[kCapacity]; + size_t count_; +}; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_BIGINTEGER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/internal/diyfp.h b/c++/include/misc/jsonwrapp/rapidjson10/internal/diyfp.h new file mode 100644 index 00000000..3b6c4238 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/internal/diyfp.h @@ -0,0 +1,247 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// This is a C++ header-only implementation of Grisu2 algorithm from the publication: +// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with +// integers." ACM Sigplan Notices 45.6 (2010): 233-243. + +#ifndef RAPIDJSON_DIYFP_H_ +#define RAPIDJSON_DIYFP_H_ + +#include "../rapidjson.h" + +#if defined(_MSC_VER) && defined(_M_AMD64) +#include +#pragma intrinsic(_BitScanReverse64) +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +struct DiyFp { + DiyFp() {} + + DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {} + + explicit DiyFp(double d) { + union { + double d; + uint64_t u64; + } u = { d }; + + int biased_e = static_cast((u.u64 & kDpExponentMask) >> kDpSignificandSize); + uint64_t significand = (u.u64 & kDpSignificandMask); + if (biased_e != 0) { + f = significand + kDpHiddenBit; + e = biased_e - kDpExponentBias; + } + else { + f = significand; + e = kDpMinExponent + 1; + } + } + + DiyFp operator-(const DiyFp& rhs) const { + return DiyFp(f - rhs.f, e); + } + + DiyFp operator*(const DiyFp& rhs) const { +#if defined(_MSC_VER) && defined(_M_AMD64) + uint64_t h; + uint64_t l = _umul128(f, rhs.f, &h); + if (l & (uint64_t(1) << 63)) // rounding + h++; + return DiyFp(h, e + rhs.e + 64); +#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) + __extension__ typedef unsigned __int128 uint128; + uint128 p = static_cast(f) * static_cast(rhs.f); + uint64_t h = static_cast(p >> 64); + uint64_t l = static_cast(p); + if (l & (uint64_t(1) << 63)) // rounding + h++; + return DiyFp(h, e + rhs.e + 64); +#else + const uint64_t M32 = 0xFFFFFFFF; + const uint64_t a = f >> 32; + const uint64_t b = f & M32; + const uint64_t c = rhs.f >> 32; + const uint64_t d = rhs.f & M32; + const uint64_t ac = a * c; + const uint64_t bc = b * c; + const uint64_t ad = a * d; + const uint64_t bd = b * d; + uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32); + tmp += 1U << 31; /// mult_round + return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64); +#endif + } + + DiyFp Normalize() const { +#if defined(_MSC_VER) && defined(_M_AMD64) + unsigned long index; + _BitScanReverse64(&index, f); + return DiyFp(f << (63 - index), e - (63 - index)); +#elif defined(__GNUC__) && __GNUC__ >= 4 + int s = __builtin_clzll(f); + return DiyFp(f << s, e - s); +#else + DiyFp res = *this; + while (!(res.f & (static_cast(1) << 63))) { + res.f <<= 1; + res.e--; + } + return res; +#endif + } + + DiyFp NormalizeBoundary() const { + DiyFp res = *this; + while (!(res.f & (kDpHiddenBit << 1))) { + res.f <<= 1; + res.e--; + } + res.f <<= (kDiySignificandSize - kDpSignificandSize - 2); + res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2); + return res; + } + + void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const { + DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary(); + DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1); + mi.f <<= mi.e - pl.e; + mi.e = pl.e; + *plus = pl; + *minus = mi; + } + + double ToDouble() const { + union { + double d; + uint64_t u64; + }u; + const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 : + static_cast(e + kDpExponentBias); + u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize); + return u.d; + } + + static const int kDiySignificandSize = 64; + static const int kDpSignificandSize = 52; + static const int kDpExponentBias = 0x3FF + kDpSignificandSize; + static const int kDpMaxExponent = 0x7FF - kDpExponentBias; + static const int kDpMinExponent = -kDpExponentBias; + static const int kDpDenormalExponent = -kDpExponentBias + 1; + static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000); + static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF); + static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000); + + uint64_t f; + int e; +}; + +inline DiyFp GetCachedPowerByIndex(size_t index) { + // 10^-348, 10^-340, ..., 10^340 + static const uint64_t kCachedPowers_F[] = { + RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76), + RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea), + RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df), + RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f), + RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c), + RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5), + RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d), + RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637), + RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7), + RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5), + RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b), + RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996), + RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6), + RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8), + RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053), + RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd), + RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94), + RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b), + RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac), + RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3), + RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb), + RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c), + RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000), + RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984), + RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70), + RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245), + RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8), + RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a), + RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea), + RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85), + RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2), + RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3), + RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25), + RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece), + RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5), + RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a), + RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c), + RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a), + RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129), + RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429), + RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d), + RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841), + RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9), + RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b) + }; + static const int16_t kCachedPowers_E[] = { + -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, + -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, + -688, -661, -635, -608, -582, -555, -529, -502, -475, -449, + -422, -396, -369, -343, -316, -289, -263, -236, -210, -183, + -157, -130, -103, -77, -50, -24, 3, 30, 56, 83, + 109, 136, 162, 189, 216, 242, 269, 295, 322, 348, + 375, 402, 428, 455, 481, 508, 534, 561, 588, 614, + 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, + 907, 933, 960, 986, 1013, 1039, 1066 + }; + return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]); +} + +inline DiyFp GetCachedPower(int e, int* K) { + + //int k = static_cast(ceil((-61 - e) * 0.30102999566398114)) + 374; + double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive + int k = static_cast(dk); + if (dk - k > 0.0) + k++; + + unsigned index = static_cast((k >> 3) + 1); + *K = -(-348 + static_cast(index << 3)); // decimal exponent no need lookup table + + return GetCachedPowerByIndex(index); +} + +inline DiyFp GetCachedPower10(int exp, int *outExp) { + unsigned index = (exp + 348) / 8; + *outExp = -348 + index * 8; + return GetCachedPowerByIndex(index); + } + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_DIYFP_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/internal/dtoa.h b/c++/include/misc/jsonwrapp/rapidjson10/internal/dtoa.h new file mode 100644 index 00000000..2d8d2e46 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/internal/dtoa.h @@ -0,0 +1,217 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// This is a C++ header-only implementation of Grisu2 algorithm from the publication: +// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with +// integers." ACM Sigplan Notices 45.6 (2010): 233-243. + +#ifndef RAPIDJSON_DTOA_ +#define RAPIDJSON_DTOA_ + +#include "itoa.h" // GetDigitsLut() +#include "diyfp.h" +#include "ieee754.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) { + while (rest < wp_w && delta - rest >= ten_kappa && + (rest + ten_kappa < wp_w || /// closer + wp_w - rest > rest + ten_kappa - wp_w)) { + buffer[len - 1]--; + rest += ten_kappa; + } +} + +inline unsigned CountDecimalDigit32(uint32_t n) { + // Simple pure C++ implementation was faster than __builtin_clz version in this situation. + if (n < 10) return 1; + if (n < 100) return 2; + if (n < 1000) return 3; + if (n < 10000) return 4; + if (n < 100000) return 5; + if (n < 1000000) return 6; + if (n < 10000000) return 7; + if (n < 100000000) return 8; + // Will not reach 10 digits in DigitGen() + //if (n < 1000000000) return 9; + //return 10; + return 9; +} + +inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) { + static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 }; + const DiyFp one(uint64_t(1) << -Mp.e, Mp.e); + const DiyFp wp_w = Mp - W; + uint32_t p1 = static_cast(Mp.f >> -one.e); + uint64_t p2 = Mp.f & (one.f - 1); + int kappa = CountDecimalDigit32(p1); // kappa in [0, 9] + *len = 0; + + while (kappa > 0) { + uint32_t d = 0; + switch (kappa) { + case 9: d = p1 / 100000000; p1 %= 100000000; break; + case 8: d = p1 / 10000000; p1 %= 10000000; break; + case 7: d = p1 / 1000000; p1 %= 1000000; break; + case 6: d = p1 / 100000; p1 %= 100000; break; + case 5: d = p1 / 10000; p1 %= 10000; break; + case 4: d = p1 / 1000; p1 %= 1000; break; + case 3: d = p1 / 100; p1 %= 100; break; + case 2: d = p1 / 10; p1 %= 10; break; + case 1: d = p1; p1 = 0; break; + default:; + } + if (d || *len) + buffer[(*len)++] = static_cast('0' + static_cast(d)); + kappa--; + uint64_t tmp = (static_cast(p1) << -one.e) + p2; + if (tmp <= delta) { + *K += kappa; + GrisuRound(buffer, *len, delta, tmp, static_cast(kPow10[kappa]) << -one.e, wp_w.f); + return; + } + } + + // kappa = 0 + for (;;) { + p2 *= 10; + delta *= 10; + char d = static_cast(p2 >> -one.e); + if (d || *len) + buffer[(*len)++] = static_cast('0' + d); + p2 &= one.f - 1; + kappa--; + if (p2 < delta) { + *K += kappa; + GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * kPow10[-kappa]); + return; + } + } +} + +inline void Grisu2(double value, char* buffer, int* length, int* K) { + const DiyFp v(value); + DiyFp w_m, w_p; + v.NormalizedBoundaries(&w_m, &w_p); + + const DiyFp c_mk = GetCachedPower(w_p.e, K); + const DiyFp W = v.Normalize() * c_mk; + DiyFp Wp = w_p * c_mk; + DiyFp Wm = w_m * c_mk; + Wm.f++; + Wp.f--; + DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K); +} + +inline char* WriteExponent(int K, char* buffer) { + if (K < 0) { + *buffer++ = '-'; + K = -K; + } + + if (K >= 100) { + *buffer++ = static_cast('0' + static_cast(K / 100)); + K %= 100; + const char* d = GetDigitsLut() + K * 2; + *buffer++ = d[0]; + *buffer++ = d[1]; + } + else if (K >= 10) { + const char* d = GetDigitsLut() + K * 2; + *buffer++ = d[0]; + *buffer++ = d[1]; + } + else + *buffer++ = static_cast('0' + static_cast(K)); + + return buffer; +} + +inline char* Prettify(char* buffer, int length, int k) { + const int kk = length + k; // 10^(kk-1) <= v < 10^kk + + if (length <= kk && kk <= 21) { + // 1234e7 -> 12340000000 + for (int i = length; i < kk; i++) + buffer[i] = '0'; + buffer[kk] = '.'; + buffer[kk + 1] = '0'; + return &buffer[kk + 2]; + } + else if (0 < kk && kk <= 21) { + // 1234e-2 -> 12.34 + std::memmove(&buffer[kk + 1], &buffer[kk], length - kk); + buffer[kk] = '.'; + return &buffer[length + 1]; + } + else if (-6 < kk && kk <= 0) { + // 1234e-6 -> 0.001234 + const int offset = 2 - kk; + std::memmove(&buffer[offset], &buffer[0], length); + buffer[0] = '0'; + buffer[1] = '.'; + for (int i = 2; i < offset; i++) + buffer[i] = '0'; + return &buffer[length + offset]; + } + else if (length == 1) { + // 1e30 + buffer[1] = 'e'; + return WriteExponent(kk - 1, &buffer[2]); + } + else { + // 1234e30 -> 1.234e33 + std::memmove(&buffer[2], &buffer[1], length - 1); + buffer[1] = '.'; + buffer[length + 1] = 'e'; + return WriteExponent(kk - 1, &buffer[0 + length + 2]); + } +} + +inline char* dtoa(double value, char* buffer) { + Double d(value); + if (d.IsZero()) { + if (d.Sign()) + *buffer++ = '-'; // -0.0, Issue #289 + buffer[0] = '0'; + buffer[1] = '.'; + buffer[2] = '0'; + return &buffer[3]; + } + else { + if (value < 0) { + *buffer++ = '-'; + value = -value; + } + int length, K; + Grisu2(value, buffer, &length, &K); + return Prettify(buffer, length, K); + } +} + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_DTOA_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/internal/ieee754.h b/c++/include/misc/jsonwrapp/rapidjson10/internal/ieee754.h new file mode 100644 index 00000000..e3f03364 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/internal/ieee754.h @@ -0,0 +1,77 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_IEEE754_ +#define RAPIDJSON_IEEE754_ + +#include "../rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +class Double { +public: + Double() {} + Double(double d) : d_(d) {} + Double(uint64_t u) : u_(u) {} + + double Value() const { return d_; } + uint64_t Uint64Value() const { return u_; } + + double NextPositiveDouble() const { + RAPIDJSON_ASSERT(!Sign()); + return Double(u_ + 1).Value(); + } + + bool Sign() const { return (u_ & kSignMask) != 0; } + uint64_t Significand() const { return u_ & kSignificandMask; } + int Exponent() const { return static_cast(((u_ & kExponentMask) >> kSignificandSize) - kExponentBias); } + + bool IsNan() const { return (u_ & kExponentMask) == kExponentMask && Significand() != 0; } + bool IsInf() const { return (u_ & kExponentMask) == kExponentMask && Significand() == 0; } + bool IsNormal() const { return (u_ & kExponentMask) != 0 || Significand() == 0; } + bool IsZero() const { return (u_ & (kExponentMask | kSignificandMask)) == 0; } + + uint64_t IntegerSignificand() const { return IsNormal() ? Significand() | kHiddenBit : Significand(); } + int IntegerExponent() const { return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize; } + uint64_t ToBias() const { return (u_ & kSignMask) ? ~u_ + 1 : u_ | kSignMask; } + + static unsigned EffectiveSignificandSize(int order) { + if (order >= -1021) + return 53; + else if (order <= -1074) + return 0; + else + return order + 1074; + } + +private: + static const int kSignificandSize = 52; + static const int kExponentBias = 0x3FF; + static const int kDenormalExponent = 1 - kExponentBias; + static const uint64_t kSignMask = RAPIDJSON_UINT64_C2(0x80000000, 0x00000000); + static const uint64_t kExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000); + static const uint64_t kSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF); + static const uint64_t kHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000); + + union { + double d_; + uint64_t u_; + }; +}; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_IEEE754_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/internal/itoa.h b/c++/include/misc/jsonwrapp/rapidjson10/internal/itoa.h new file mode 100644 index 00000000..01a4e7e7 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/internal/itoa.h @@ -0,0 +1,304 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ITOA_ +#define RAPIDJSON_ITOA_ + +#include "../rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +inline const char* GetDigitsLut() { + static const char cDigitsLut[200] = { + '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9', + '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9', + '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9', + '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9', + '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9', + '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9', + '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9', + '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9', + '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9', + '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9' + }; + return cDigitsLut; +} + +inline char* u32toa(uint32_t value, char* buffer) { + const char* cDigitsLut = GetDigitsLut(); + + if (value < 10000) { + const uint32_t d1 = (value / 100) << 1; + const uint32_t d2 = (value % 100) << 1; + + if (value >= 1000) + *buffer++ = cDigitsLut[d1]; + if (value >= 100) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= 10) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + } + else if (value < 100000000) { + // value = bbbbcccc + const uint32_t b = value / 10000; + const uint32_t c = value % 10000; + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + if (value >= 10000000) + *buffer++ = cDigitsLut[d1]; + if (value >= 1000000) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= 100000) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + } + else { + // value = aabbbbcccc in decimal + + const uint32_t a = value / 100000000; // 1 to 42 + value %= 100000000; + + if (a >= 10) { + const unsigned i = a << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + } + else + *buffer++ = static_cast('0' + static_cast(a)); + + const uint32_t b = value / 10000; // 0 to 9999 + const uint32_t c = value % 10000; // 0 to 9999 + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + *buffer++ = cDigitsLut[d1]; + *buffer++ = cDigitsLut[d1 + 1]; + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + } + return buffer; +} + +inline char* i32toa(int32_t value, char* buffer) { + uint32_t u = static_cast(value); + if (value < 0) { + *buffer++ = '-'; + u = ~u + 1; + } + + return u32toa(u, buffer); +} + +inline char* u64toa(uint64_t value, char* buffer) { + const char* cDigitsLut = GetDigitsLut(); + const uint64_t kTen8 = 100000000; + const uint64_t kTen9 = kTen8 * 10; + const uint64_t kTen10 = kTen8 * 100; + const uint64_t kTen11 = kTen8 * 1000; + const uint64_t kTen12 = kTen8 * 10000; + const uint64_t kTen13 = kTen8 * 100000; + const uint64_t kTen14 = kTen8 * 1000000; + const uint64_t kTen15 = kTen8 * 10000000; + const uint64_t kTen16 = kTen8 * kTen8; + + if (value < kTen8) { + uint32_t v = static_cast(value); + if (v < 10000) { + const uint32_t d1 = (v / 100) << 1; + const uint32_t d2 = (v % 100) << 1; + + if (v >= 1000) + *buffer++ = cDigitsLut[d1]; + if (v >= 100) + *buffer++ = cDigitsLut[d1 + 1]; + if (v >= 10) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + } + else { + // value = bbbbcccc + const uint32_t b = v / 10000; + const uint32_t c = v % 10000; + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + if (value >= 10000000) + *buffer++ = cDigitsLut[d1]; + if (value >= 1000000) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= 100000) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + } + } + else if (value < kTen16) { + const uint32_t v0 = static_cast(value / kTen8); + const uint32_t v1 = static_cast(value % kTen8); + + const uint32_t b0 = v0 / 10000; + const uint32_t c0 = v0 % 10000; + + const uint32_t d1 = (b0 / 100) << 1; + const uint32_t d2 = (b0 % 100) << 1; + + const uint32_t d3 = (c0 / 100) << 1; + const uint32_t d4 = (c0 % 100) << 1; + + const uint32_t b1 = v1 / 10000; + const uint32_t c1 = v1 % 10000; + + const uint32_t d5 = (b1 / 100) << 1; + const uint32_t d6 = (b1 % 100) << 1; + + const uint32_t d7 = (c1 / 100) << 1; + const uint32_t d8 = (c1 % 100) << 1; + + if (value >= kTen15) + *buffer++ = cDigitsLut[d1]; + if (value >= kTen14) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= kTen13) + *buffer++ = cDigitsLut[d2]; + if (value >= kTen12) + *buffer++ = cDigitsLut[d2 + 1]; + if (value >= kTen11) + *buffer++ = cDigitsLut[d3]; + if (value >= kTen10) + *buffer++ = cDigitsLut[d3 + 1]; + if (value >= kTen9) + *buffer++ = cDigitsLut[d4]; + if (value >= kTen8) + *buffer++ = cDigitsLut[d4 + 1]; + + *buffer++ = cDigitsLut[d5]; + *buffer++ = cDigitsLut[d5 + 1]; + *buffer++ = cDigitsLut[d6]; + *buffer++ = cDigitsLut[d6 + 1]; + *buffer++ = cDigitsLut[d7]; + *buffer++ = cDigitsLut[d7 + 1]; + *buffer++ = cDigitsLut[d8]; + *buffer++ = cDigitsLut[d8 + 1]; + } + else { + const uint32_t a = static_cast(value / kTen16); // 1 to 1844 + value %= kTen16; + + if (a < 10) + *buffer++ = static_cast('0' + static_cast(a)); + else if (a < 100) { + const uint32_t i = a << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + } + else if (a < 1000) { + *buffer++ = static_cast('0' + static_cast(a / 100)); + + const uint32_t i = (a % 100) << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + } + else { + const uint32_t i = (a / 100) << 1; + const uint32_t j = (a % 100) << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + *buffer++ = cDigitsLut[j]; + *buffer++ = cDigitsLut[j + 1]; + } + + const uint32_t v0 = static_cast(value / kTen8); + const uint32_t v1 = static_cast(value % kTen8); + + const uint32_t b0 = v0 / 10000; + const uint32_t c0 = v0 % 10000; + + const uint32_t d1 = (b0 / 100) << 1; + const uint32_t d2 = (b0 % 100) << 1; + + const uint32_t d3 = (c0 / 100) << 1; + const uint32_t d4 = (c0 % 100) << 1; + + const uint32_t b1 = v1 / 10000; + const uint32_t c1 = v1 % 10000; + + const uint32_t d5 = (b1 / 100) << 1; + const uint32_t d6 = (b1 % 100) << 1; + + const uint32_t d7 = (c1 / 100) << 1; + const uint32_t d8 = (c1 % 100) << 1; + + *buffer++ = cDigitsLut[d1]; + *buffer++ = cDigitsLut[d1 + 1]; + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + *buffer++ = cDigitsLut[d5]; + *buffer++ = cDigitsLut[d5 + 1]; + *buffer++ = cDigitsLut[d6]; + *buffer++ = cDigitsLut[d6 + 1]; + *buffer++ = cDigitsLut[d7]; + *buffer++ = cDigitsLut[d7 + 1]; + *buffer++ = cDigitsLut[d8]; + *buffer++ = cDigitsLut[d8 + 1]; + } + + return buffer; +} + +inline char* i64toa(int64_t value, char* buffer) { + uint64_t u = static_cast(value); + if (value < 0) { + *buffer++ = '-'; + u = ~u + 1; + } + + return u64toa(u, buffer); +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_ITOA_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/internal/meta.h b/c++/include/misc/jsonwrapp/rapidjson10/internal/meta.h new file mode 100644 index 00000000..5a9aaa42 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/internal/meta.h @@ -0,0 +1,181 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_META_H_ +#define RAPIDJSON_INTERNAL_META_H_ + +#include "../rapidjson.h" + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif +#if defined(_MSC_VER) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(6334) +#endif + +#if RAPIDJSON_HAS_CXX11_TYPETRAITS +#include +#endif + +//@cond RAPIDJSON_INTERNAL +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +// Helper to wrap/convert arbitrary types to void, useful for arbitrary type matching +template struct Void { typedef void Type; }; + +/////////////////////////////////////////////////////////////////////////////// +// BoolType, TrueType, FalseType +// +template struct BoolType { + static const bool Value = Cond; + typedef BoolType Type; +}; +typedef BoolType TrueType; +typedef BoolType FalseType; + + +/////////////////////////////////////////////////////////////////////////////// +// SelectIf, BoolExpr, NotExpr, AndExpr, OrExpr +// + +template struct SelectIfImpl { template struct Apply { typedef T1 Type; }; }; +template <> struct SelectIfImpl { template struct Apply { typedef T2 Type; }; }; +template struct SelectIfCond : SelectIfImpl::template Apply {}; +template struct SelectIf : SelectIfCond {}; + +template struct AndExprCond : FalseType {}; +template <> struct AndExprCond : TrueType {}; +template struct OrExprCond : TrueType {}; +template <> struct OrExprCond : FalseType {}; + +template struct BoolExpr : SelectIf::Type {}; +template struct NotExpr : SelectIf::Type {}; +template struct AndExpr : AndExprCond::Type {}; +template struct OrExpr : OrExprCond::Type {}; + + +/////////////////////////////////////////////////////////////////////////////// +// AddConst, MaybeAddConst, RemoveConst +template struct AddConst { typedef const T Type; }; +template struct MaybeAddConst : SelectIfCond {}; +template struct RemoveConst { typedef T Type; }; +template struct RemoveConst { typedef T Type; }; + + +/////////////////////////////////////////////////////////////////////////////// +// IsSame, IsConst, IsMoreConst, IsPointer +// +template struct IsSame : FalseType {}; +template struct IsSame : TrueType {}; + +template struct IsConst : FalseType {}; +template struct IsConst : TrueType {}; + +template +struct IsMoreConst + : AndExpr::Type, typename RemoveConst::Type>, + BoolType::Value >= IsConst::Value> >::Type {}; + +template struct IsPointer : FalseType {}; +template struct IsPointer : TrueType {}; + +/////////////////////////////////////////////////////////////////////////////// +// IsBaseOf +// +#if RAPIDJSON_HAS_CXX11_TYPETRAITS + +template struct IsBaseOf + : BoolType< ::std::is_base_of::value> {}; + +#else // simplified version adopted from Boost + +template struct IsBaseOfImpl { + RAPIDJSON_STATIC_ASSERT(sizeof(B) != 0); + RAPIDJSON_STATIC_ASSERT(sizeof(D) != 0); + + typedef char (&Yes)[1]; + typedef char (&No) [2]; + + template + static Yes Check(const D*, T); + static No Check(const B*, int); + + struct Host { + operator const B*() const; + operator const D*(); + }; + + enum { Value = (sizeof(Check(Host(), 0)) == sizeof(Yes)) }; +}; + +template struct IsBaseOf + : OrExpr, BoolExpr > >::Type {}; + +#endif // RAPIDJSON_HAS_CXX11_TYPETRAITS + + +////////////////////////////////////////////////////////////////////////// +// EnableIf / DisableIf +// +template struct EnableIfCond { typedef T Type; }; +template struct EnableIfCond { /* empty */ }; + +template struct DisableIfCond { typedef T Type; }; +template struct DisableIfCond { /* empty */ }; + +template +struct EnableIf : EnableIfCond {}; + +template +struct DisableIf : DisableIfCond {}; + +// SFINAE helpers +struct SfinaeTag {}; +template struct RemoveSfinaeTag; +template struct RemoveSfinaeTag { typedef T Type; }; + +#define RAPIDJSON_REMOVEFPTR_(type) \ + typename ::RAPIDJSON_NAMESPACE::internal::RemoveSfinaeTag \ + < ::RAPIDJSON_NAMESPACE::internal::SfinaeTag&(*) type>::Type + +#define RAPIDJSON_ENABLEIF(cond) \ + typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \ + ::Type * = NULL + +#define RAPIDJSON_DISABLEIF(cond) \ + typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \ + ::Type * = NULL + +#define RAPIDJSON_ENABLEIF_RETURN(cond,returntype) \ + typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \ + ::Type + +#define RAPIDJSON_DISABLEIF_RETURN(cond,returntype) \ + typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \ + ::Type + +} // namespace internal +RAPIDJSON_NAMESPACE_END +//@endcond + +#if defined(__GNUC__) || defined(_MSC_VER) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_INTERNAL_META_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/internal/pow10.h b/c++/include/misc/jsonwrapp/rapidjson10/internal/pow10.h new file mode 100644 index 00000000..02f475d7 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/internal/pow10.h @@ -0,0 +1,55 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_POW10_ +#define RAPIDJSON_POW10_ + +#include "../rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +//! Computes integer powers of 10 in double (10.0^n). +/*! This function uses lookup table for fast and accurate results. + \param n non-negative exponent. Must <= 308. + \return 10.0^n +*/ +inline double Pow10(int n) { + static const double e[] = { // 1e-0...1e308: 309 * 8 bytes = 2472 bytes + 1e+0, + 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, + 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, + 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, + 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, + 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100, + 1e+101,1e+102,1e+103,1e+104,1e+105,1e+106,1e+107,1e+108,1e+109,1e+110,1e+111,1e+112,1e+113,1e+114,1e+115,1e+116,1e+117,1e+118,1e+119,1e+120, + 1e+121,1e+122,1e+123,1e+124,1e+125,1e+126,1e+127,1e+128,1e+129,1e+130,1e+131,1e+132,1e+133,1e+134,1e+135,1e+136,1e+137,1e+138,1e+139,1e+140, + 1e+141,1e+142,1e+143,1e+144,1e+145,1e+146,1e+147,1e+148,1e+149,1e+150,1e+151,1e+152,1e+153,1e+154,1e+155,1e+156,1e+157,1e+158,1e+159,1e+160, + 1e+161,1e+162,1e+163,1e+164,1e+165,1e+166,1e+167,1e+168,1e+169,1e+170,1e+171,1e+172,1e+173,1e+174,1e+175,1e+176,1e+177,1e+178,1e+179,1e+180, + 1e+181,1e+182,1e+183,1e+184,1e+185,1e+186,1e+187,1e+188,1e+189,1e+190,1e+191,1e+192,1e+193,1e+194,1e+195,1e+196,1e+197,1e+198,1e+199,1e+200, + 1e+201,1e+202,1e+203,1e+204,1e+205,1e+206,1e+207,1e+208,1e+209,1e+210,1e+211,1e+212,1e+213,1e+214,1e+215,1e+216,1e+217,1e+218,1e+219,1e+220, + 1e+221,1e+222,1e+223,1e+224,1e+225,1e+226,1e+227,1e+228,1e+229,1e+230,1e+231,1e+232,1e+233,1e+234,1e+235,1e+236,1e+237,1e+238,1e+239,1e+240, + 1e+241,1e+242,1e+243,1e+244,1e+245,1e+246,1e+247,1e+248,1e+249,1e+250,1e+251,1e+252,1e+253,1e+254,1e+255,1e+256,1e+257,1e+258,1e+259,1e+260, + 1e+261,1e+262,1e+263,1e+264,1e+265,1e+266,1e+267,1e+268,1e+269,1e+270,1e+271,1e+272,1e+273,1e+274,1e+275,1e+276,1e+277,1e+278,1e+279,1e+280, + 1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300, + 1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308 + }; + RAPIDJSON_ASSERT(n >= 0 && n <= 308); + return e[n]; +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_POW10_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/internal/stack.h b/c++/include/misc/jsonwrapp/rapidjson10/internal/stack.h new file mode 100644 index 00000000..722d5692 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/internal/stack.h @@ -0,0 +1,179 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_STACK_H_ +#define RAPIDJSON_INTERNAL_STACK_H_ + +#include "../rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +/////////////////////////////////////////////////////////////////////////////// +// Stack + +//! A type-unsafe stack for storing different types of data. +/*! \tparam Allocator Allocator for allocating stack memory. +*/ +template +class Stack { +public: + // Optimization note: Do not allocate memory for stack_ in constructor. + // Do it lazily when first Push() -> Expand() -> Resize(). + Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) { + RAPIDJSON_ASSERT(stackCapacity > 0); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + Stack(Stack&& rhs) + : allocator_(rhs.allocator_), + ownAllocator_(rhs.ownAllocator_), + stack_(rhs.stack_), + stackTop_(rhs.stackTop_), + stackEnd_(rhs.stackEnd_), + initialCapacity_(rhs.initialCapacity_) + { + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.stack_ = 0; + rhs.stackTop_ = 0; + rhs.stackEnd_ = 0; + rhs.initialCapacity_ = 0; + } +#endif + + ~Stack() { + Destroy(); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + Stack& operator=(Stack&& rhs) { + if (&rhs != this) + { + Destroy(); + + allocator_ = rhs.allocator_; + ownAllocator_ = rhs.ownAllocator_; + stack_ = rhs.stack_; + stackTop_ = rhs.stackTop_; + stackEnd_ = rhs.stackEnd_; + initialCapacity_ = rhs.initialCapacity_; + + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.stack_ = 0; + rhs.stackTop_ = 0; + rhs.stackEnd_ = 0; + rhs.initialCapacity_ = 0; + } + return *this; + } +#endif + + void Clear() { stackTop_ = stack_; } + + void ShrinkToFit() { + if (Empty()) { + // If the stack is empty, completely deallocate the memory. + Allocator::Free(stack_); + stack_ = 0; + stackTop_ = 0; + stackEnd_ = 0; + } + else + Resize(GetSize()); + } + + // Optimization note: try to minimize the size of this function for force inline. + // Expansion is run very infrequently, so it is moved to another (probably non-inline) function. + template + RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) { + // Expand the stack if needed + if (stackTop_ + sizeof(T) * count >= stackEnd_) + Expand(count); + + T* ret = reinterpret_cast(stackTop_); + stackTop_ += sizeof(T) * count; + return ret; + } + + template + T* Pop(size_t count) { + RAPIDJSON_ASSERT(GetSize() >= count * sizeof(T)); + stackTop_ -= count * sizeof(T); + return reinterpret_cast(stackTop_); + } + + template + T* Top() { + RAPIDJSON_ASSERT(GetSize() >= sizeof(T)); + return reinterpret_cast(stackTop_ - sizeof(T)); + } + + template + T* Bottom() { return (T*)stack_; } + + Allocator& GetAllocator() { return *allocator_; } + bool Empty() const { return stackTop_ == stack_; } + size_t GetSize() const { return static_cast(stackTop_ - stack_); } + size_t GetCapacity() const { return static_cast(stackEnd_ - stack_); } + +private: + template + void Expand(size_t count) { + // Only expand the capacity if the current stack exists. Otherwise just create a stack with initial capacity. + size_t newCapacity; + if (stack_ == 0) { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + newCapacity = initialCapacity_; + } else { + newCapacity = GetCapacity(); + newCapacity += (newCapacity + 1) / 2; + } + size_t newSize = GetSize() + sizeof(T) * count; + if (newCapacity < newSize) + newCapacity = newSize; + + Resize(newCapacity); + } + + void Resize(size_t newCapacity) { + const size_t size = GetSize(); // Backup the current size + stack_ = (char*)allocator_->Realloc(stack_, GetCapacity(), newCapacity); + stackTop_ = stack_ + size; + stackEnd_ = stack_ + newCapacity; + } + + void Destroy() { + Allocator::Free(stack_); + RAPIDJSON_DELETE(ownAllocator_); // Only delete if it is owned by the stack + } + + // Prohibit copy constructor & assignment operator. + Stack(const Stack&); + Stack& operator=(const Stack&); + + Allocator* allocator_; + Allocator* ownAllocator_; + char *stack_; + char *stackTop_; + char *stackEnd_; + size_t initialCapacity_; +}; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_STACK_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/internal/strfunc.h b/c++/include/misc/jsonwrapp/rapidjson10/internal/strfunc.h new file mode 100644 index 00000000..84405065 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/internal/strfunc.h @@ -0,0 +1,39 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_STRFUNC_H_ +#define RAPIDJSON_INTERNAL_STRFUNC_H_ + +#include "../rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +//! Custom strlen() which works on different character types. +/*! \tparam Ch Character type (e.g. char, wchar_t, short) + \param s Null-terminated input string. + \return Number of characters in the string. + \note This has the same semantics as strlen(), the return value is not number of Unicode codepoints. +*/ +template +inline SizeType StrLen(const Ch* s) { + const Ch* p = s; + while (*p) ++p; + return SizeType(p - s); +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_INTERNAL_STRFUNC_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/internal/strtod.h b/c++/include/misc/jsonwrapp/rapidjson10/internal/strtod.h new file mode 100644 index 00000000..ace65f67 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/internal/strtod.h @@ -0,0 +1,270 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_STRTOD_ +#define RAPIDJSON_STRTOD_ + +#include "../rapidjson.h" +#include "ieee754.h" +#include "biginteger.h" +#include "diyfp.h" +#include "pow10.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +inline double FastPath(double significand, int exp) { + if (exp < -308) + return 0.0; + else if (exp >= 0) + return significand * internal::Pow10(exp); + else + return significand / internal::Pow10(-exp); +} + +inline double StrtodNormalPrecision(double d, int p) { + if (p < -308) { + // Prevent expSum < -308, making Pow10(p) = 0 + d = FastPath(d, -308); + d = FastPath(d, p + 308); + } + else + d = FastPath(d, p); + return d; +} + +template +inline T Min3(T a, T b, T c) { + T m = a; + if (m > b) m = b; + if (m > c) m = c; + return m; +} + +inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp) { + const Double db(b); + const uint64_t bInt = db.IntegerSignificand(); + const int bExp = db.IntegerExponent(); + const int hExp = bExp - 1; + + int dS_Exp2 = 0, dS_Exp5 = 0, bS_Exp2 = 0, bS_Exp5 = 0, hS_Exp2 = 0, hS_Exp5 = 0; + + // Adjust for decimal exponent + if (dExp >= 0) { + dS_Exp2 += dExp; + dS_Exp5 += dExp; + } + else { + bS_Exp2 -= dExp; + bS_Exp5 -= dExp; + hS_Exp2 -= dExp; + hS_Exp5 -= dExp; + } + + // Adjust for binary exponent + if (bExp >= 0) + bS_Exp2 += bExp; + else { + dS_Exp2 -= bExp; + hS_Exp2 -= bExp; + } + + // Adjust for half ulp exponent + if (hExp >= 0) + hS_Exp2 += hExp; + else { + dS_Exp2 -= hExp; + bS_Exp2 -= hExp; + } + + // Remove common power of two factor from all three scaled values + int common_Exp2 = Min3(dS_Exp2, bS_Exp2, hS_Exp2); + dS_Exp2 -= common_Exp2; + bS_Exp2 -= common_Exp2; + hS_Exp2 -= common_Exp2; + + BigInteger dS = d; + dS.MultiplyPow5(dS_Exp5) <<= dS_Exp2; + + BigInteger bS(bInt); + bS.MultiplyPow5(bS_Exp5) <<= bS_Exp2; + + BigInteger hS(1); + hS.MultiplyPow5(hS_Exp5) <<= hS_Exp2; + + BigInteger delta(0); + dS.Difference(bS, &delta); + + return delta.Compare(hS); +} + +inline bool StrtodFast(double d, int p, double* result) { + // Use fast path for string-to-double conversion if possible + // see http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + if (p > 22 && p < 22 + 16) { + // Fast Path Cases In Disguise + d *= internal::Pow10(p - 22); + p = 22; + } + + if (p >= -22 && p <= 22 && d <= 9007199254740991.0) { // 2^53 - 1 + *result = FastPath(d, p); + return true; + } + else + return false; +} + +// Compute an approximation and see if it is within 1/2 ULP +inline bool StrtodDiyFp(const char* decimals, size_t length, size_t decimalPosition, int exp, double* result) { + uint64_t significand = 0; + size_t i = 0; // 2^64 - 1 = 18446744073709551615, 1844674407370955161 = 0x1999999999999999 + for (; i < length; i++) { + if (significand > RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || + (significand == RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) && decimals[i] > '5')) + break; + significand = significand * 10 + (decimals[i] - '0'); + } + + if (i < length && decimals[i] >= '5') // Rounding + significand++; + + size_t remaining = length - i; + const unsigned kUlpShift = 3; + const unsigned kUlp = 1 << kUlpShift; + int error = (remaining == 0) ? 0 : kUlp / 2; + + DiyFp v(significand, 0); + v = v.Normalize(); + error <<= -v.e; + + const int dExp = (int)decimalPosition - (int)i + exp; + + int actualExp; + DiyFp cachedPower = GetCachedPower10(dExp, &actualExp); + if (actualExp != dExp) { + static const DiyFp kPow10[] = { + DiyFp(RAPIDJSON_UINT64_C2(0xa0000000, 00000000), -60), // 10^1 + DiyFp(RAPIDJSON_UINT64_C2(0xc8000000, 00000000), -57), // 10^2 + DiyFp(RAPIDJSON_UINT64_C2(0xfa000000, 00000000), -54), // 10^3 + DiyFp(RAPIDJSON_UINT64_C2(0x9c400000, 00000000), -50), // 10^4 + DiyFp(RAPIDJSON_UINT64_C2(0xc3500000, 00000000), -47), // 10^5 + DiyFp(RAPIDJSON_UINT64_C2(0xf4240000, 00000000), -44), // 10^6 + DiyFp(RAPIDJSON_UINT64_C2(0x98968000, 00000000), -40) // 10^7 + }; + int adjustment = dExp - actualExp - 1; + RAPIDJSON_ASSERT(adjustment >= 0 && adjustment < 7); + v = v * kPow10[adjustment]; + if (length + adjustment > 19) // has more digits than decimal digits in 64-bit + error += kUlp / 2; + } + + v = v * cachedPower; + + error += kUlp + (error == 0 ? 0 : 1); + + const int oldExp = v.e; + v = v.Normalize(); + error <<= oldExp - v.e; + + const unsigned effectiveSignificandSize = Double::EffectiveSignificandSize(64 + v.e); + unsigned precisionSize = 64 - effectiveSignificandSize; + if (precisionSize + kUlpShift >= 64) { + unsigned scaleExp = (precisionSize + kUlpShift) - 63; + v.f >>= scaleExp; + v.e += scaleExp; + error = (error >> scaleExp) + 1 + kUlp; + precisionSize -= scaleExp; + } + + DiyFp rounded(v.f >> precisionSize, v.e + precisionSize); + const uint64_t precisionBits = (v.f & ((uint64_t(1) << precisionSize) - 1)) * kUlp; + const uint64_t halfWay = (uint64_t(1) << (precisionSize - 1)) * kUlp; + if (precisionBits >= halfWay + error) { + rounded.f++; + if (rounded.f & (DiyFp::kDpHiddenBit << 1)) { // rounding overflows mantissa (issue #340) + rounded.f >>= 1; + rounded.e++; + } + } + + *result = rounded.ToDouble(); + + return halfWay - error >= precisionBits || precisionBits >= halfWay + error; +} + +inline double StrtodBigInteger(double approx, const char* decimals, size_t length, size_t decimalPosition, int exp) { + const BigInteger dInt(decimals, length); + const int dExp = (int)decimalPosition - (int)length + exp; + Double a(approx); + int cmp = CheckWithinHalfULP(a.Value(), dInt, dExp); + if (cmp < 0) + return a.Value(); // within half ULP + else if (cmp == 0) { + // Round towards even + if (a.Significand() & 1) + return a.NextPositiveDouble(); + else + return a.Value(); + } + else // adjustment + return a.NextPositiveDouble(); +} + +inline double StrtodFullPrecision(double d, int p, const char* decimals, size_t length, size_t decimalPosition, int exp) { + RAPIDJSON_ASSERT(d >= 0.0); + RAPIDJSON_ASSERT(length >= 1); + + double result; + if (StrtodFast(d, p, &result)) + return result; + + // Trim leading zeros + while (*decimals == '0' && length > 1) { + length--; + decimals++; + decimalPosition--; + } + + // Trim trailing zeros + while (decimals[length - 1] == '0' && length > 1) { + length--; + decimalPosition--; + exp++; + } + + // Trim right-most digits + const int kMaxDecimalDigit = 780; + if ((int)length > kMaxDecimalDigit) { + int delta = (int(length) - kMaxDecimalDigit); + exp += delta; + decimalPosition -= delta; + length = kMaxDecimalDigit; + } + + // If too small, underflow to zero + if (int(length) + exp < -324) + return 0.0; + + if (StrtodDiyFp(decimals, length, decimalPosition, exp, &result)) + return result; + + // Use approximation from StrtodDiyFp and make adjustment with BigInteger comparison + return StrtodBigInteger(result, decimals, length, decimalPosition, exp); +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_STRTOD_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/license.txt b/c++/include/misc/jsonwrapp/rapidjson10/license.txt new file mode 100644 index 00000000..879293af --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/license.txt @@ -0,0 +1,57 @@ +Tencent is pleased to support the open source community by making RapidJSON available. + +Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. + +If you have downloaded a copy of the RapidJSON binary from Tencent, please note that the RapidJSON binary is licensed under the MIT License. +If you have downloaded a copy of the RapidJSON source code from Tencent, please note that RapidJSON source code is licensed under the MIT License, except for the third-party components listed below which are subject to different license terms. Your integration of RapidJSON into your own projects may require compliance with the MIT License, as well as the other licenses applicable to the third-party components included within RapidJSON. +A copy of the MIT License is included in this file. + +Other dependencies and licenses: + +Open Source Software Licensed Under the BSD License: +-------------------------------------------------------------------- + +The msinttypes r29 +Copyright (c) 2006-2013 Alexander Chemeris +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Open Source Software Licensed Under the JSON License: +-------------------------------------------------------------------- + +json.org +Copyright (c) 2002 JSON.org +All Rights Reserved. + +JSON_checker +Copyright (c) 2002 JSON.org +All Rights Reserved. + + +Terms of the JSON License: +--------------------------------------------------- + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +Terms of the MIT License: +-------------------------------------------------------------------- + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/c++/include/misc/jsonwrapp/rapidjson10/memorybuffer.h b/c++/include/misc/jsonwrapp/rapidjson10/memorybuffer.h new file mode 100644 index 00000000..2484b218 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/memorybuffer.h @@ -0,0 +1,70 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_MEMORYBUFFER_H_ +#define RAPIDJSON_MEMORYBUFFER_H_ + +#include "rapidjson.h" +#include "internal/stack.h" + +RAPIDJSON_NAMESPACE_BEGIN + +//! Represents an in-memory output byte stream. +/*! + This class is mainly for being wrapped by EncodedOutputStream or AutoUTFOutputStream. + + It is similar to FileWriteBuffer but the destination is an in-memory buffer instead of a file. + + Differences between MemoryBuffer and StringBuffer: + 1. StringBuffer has Encoding but MemoryBuffer is only a byte buffer. + 2. StringBuffer::GetString() returns a null-terminated string. MemoryBuffer::GetBuffer() returns a buffer without terminator. + + \tparam Allocator type for allocating memory buffer. + \note implements Stream concept +*/ +template +struct GenericMemoryBuffer { + typedef char Ch; // byte + + GenericMemoryBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} + + void Put(Ch c) { *stack_.template Push() = c; } + void Flush() {} + + void Clear() { stack_.Clear(); } + void ShrinkToFit() { stack_.ShrinkToFit(); } + Ch* Push(size_t count) { return stack_.template Push(count); } + void Pop(size_t count) { stack_.template Pop(count); } + + const Ch* GetBuffer() const { + return stack_.template Bottom(); + } + + size_t GetSize() const { return stack_.GetSize(); } + + static const size_t kDefaultCapacity = 256; + mutable internal::Stack stack_; +}; + +typedef GenericMemoryBuffer<> MemoryBuffer; + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(MemoryBuffer& memoryBuffer, char c, size_t n) { + std::memset(memoryBuffer.stack_.Push(n), c, n * sizeof(c)); +} + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_MEMORYBUFFER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/memorystream.h b/c++/include/misc/jsonwrapp/rapidjson10/memorystream.h new file mode 100644 index 00000000..99feae5d --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/memorystream.h @@ -0,0 +1,61 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_MEMORYSTREAM_H_ +#define RAPIDJSON_MEMORYSTREAM_H_ + +#include "rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN + +//! Represents an in-memory input byte stream. +/*! + This class is mainly for being wrapped by EncodedInputStream or AutoUTFInputStream. + + It is similar to FileReadBuffer but the source is an in-memory buffer instead of a file. + + Differences between MemoryStream and StringStream: + 1. StringStream has encoding but MemoryStream is a byte stream. + 2. MemoryStream needs size of the source buffer and the buffer don't need to be null terminated. StringStream assume null-terminated string as source. + 3. MemoryStream supports Peek4() for encoding detection. StringStream is specified with an encoding so it should not have Peek4(). + \note implements Stream concept +*/ +struct MemoryStream { + typedef char Ch; // byte + + MemoryStream(const Ch *src, size_t size) : src_(src), begin_(src), end_(src + size), size_(size) {} + + Ch Peek() const { return (src_ == end_) ? '\0' : *src_; } + Ch Take() { return (src_ == end_) ? '\0' : *src_++; } + size_t Tell() const { return static_cast(src_ - begin_); } + + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + // For encoding detection only. + const Ch* Peek4() const { + return Tell() + 4 <= size_ ? src_ : 0; + } + + const Ch* src_; //!< Current read position. + const Ch* begin_; //!< Original head of the string. + const Ch* end_; //!< End of stream. + size_t size_; //!< Size of the stream. +}; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_MEMORYBUFFER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/msinttypes/inttypes.h b/c++/include/misc/jsonwrapp/rapidjson10/msinttypes/inttypes.h new file mode 100644 index 00000000..18111286 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/msinttypes/inttypes.h @@ -0,0 +1,316 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +// The above software in this distribution may have been modified by +// THL A29 Limited ("Tencent Modifications"). +// All Tencent Modifications are Copyright (C) 2015 THL A29 Limited. + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "stdint.h" + +// miloyip: VC supports inttypes.h since VC2013 +#if _MSC_VER >= 1800 +#include +#else + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + +#endif // _MSC_VER >= 1800 + +#endif // _MSC_INTTYPES_H_ ] diff --git a/c++/include/misc/jsonwrapp/rapidjson10/msinttypes/stdint.h b/c++/include/misc/jsonwrapp/rapidjson10/msinttypes/stdint.h new file mode 100644 index 00000000..a26fff4b --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/msinttypes/stdint.h @@ -0,0 +1,300 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +// The above software in this distribution may have been modified by +// THL A29 Limited ("Tencent Modifications"). +// All Tencent Modifications are Copyright (C) 2015 THL A29 Limited. + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +// miloyip: Originally Visual Studio 2010 uses its own stdint.h. However it generates warning with INT64_C(), so change to use this file for vs2010. +#if _MSC_VER >= 1600 // [ +#include + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +#undef INT8_C +#undef INT16_C +#undef INT32_C +#undef INT64_C +#undef UINT8_C +#undef UINT16_C +#undef UINT32_C +#undef UINT64_C + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +// These #ifndef's are needed to prevent collisions with . +// Check out Issue 9 for the details. +#ifndef INTMAX_C // [ +# define INTMAX_C INT64_C +#endif // INTMAX_C ] +#ifndef UINTMAX_C // [ +# define UINTMAX_C UINT64_C +#endif // UINTMAX_C ] + +#endif // __STDC_CONSTANT_MACROS ] + +#else // ] _MSC_VER >= 1700 [ + +#include + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +// These #ifndef's are needed to prevent collisions with . +// Check out Issue 9 for the details. +#ifndef INTMAX_C // [ +# define INTMAX_C INT64_C +#endif // INTMAX_C ] +#ifndef UINTMAX_C // [ +# define UINTMAX_C UINT64_C +#endif // UINTMAX_C ] + +#endif // __STDC_CONSTANT_MACROS ] + +#endif // _MSC_VER >= 1600 ] + +#endif // _MSC_STDINT_H_ ] diff --git a/c++/include/misc/jsonwrapp/rapidjson10/pointer.h b/c++/include/misc/jsonwrapp/rapidjson10/pointer.h new file mode 100644 index 00000000..5d2aa8d6 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/pointer.h @@ -0,0 +1,1313 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_POINTER_H_ +#define RAPIDJSON_POINTER_H_ + +#include "document.h" +#include "internal/itoa.h" + +RAPIDJSON_NAMESPACE_BEGIN + +static const SizeType kPointerInvalidIndex = ~SizeType(0); //!< Represents an invalid index in GenericPointer::Token + +//! Error code of parsing. +/*! \ingroup RAPIDJSON_ERRORS + \see GenericPointer::GenericPointer, GenericPointer::GetParseErrorCode +*/ +enum PointerParseErrorCode { + kPointerParseErrorNone = 0, //!< The parse is successful + + kPointerParseErrorTokenMustBeginWithSolidus, //!< A token must begin with a '/' + kPointerParseErrorInvalidEscape, //!< Invalid escape + kPointerParseErrorInvalidPercentEncoding, //!< Invalid percent encoding in URI fragment + kPointerParseErrorCharacterMustPercentEncode //!< A character must percent encoded in URI fragment +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericPointer + +//! Represents a JSON Pointer. Use Pointer for UTF8 encoding and default allocator. +/*! + This class implements RFC 6901 "JavaScript Object Notation (JSON) Pointer" + (https://tools.ietf.org/html/rfc6901). + + A JSON pointer is for identifying a specific value in a JSON document + (GenericDocument). It can simplify coding of DOM tree manipulation, because it + can access multiple-level depth of DOM tree with single API call. + + After it parses a string representation (e.g. "/foo/0" or URI fragment + representation (e.g. "#/foo/0") into its internal representation (tokens), + it can be used to resolve a specific value in multiple documents, or sub-tree + of documents. + + Contrary to GenericValue, Pointer can be copy constructed and copy assigned. + Apart from assignment, a Pointer cannot be modified after construction. + + Although Pointer is very convenient, please aware that constructing Pointer + involves parsing and dynamic memory allocation. A special constructor with user- + supplied tokens eliminates these. + + GenericPointer depends on GenericDocument and GenericValue. + + \tparam ValueType The value type of the DOM tree. E.g. GenericValue > + \tparam Allocator The allocator type for allocating memory for internal representation. + + \note GenericPointer uses same encoding of ValueType. + However, Allocator of GenericPointer is independent of Allocator of Value. +*/ +template +class GenericPointer { +public: + typedef typename ValueType::EncodingType EncodingType; //!< Encoding type from Value + typedef typename EncodingType::Ch Ch; //!< Character type from Value + + //! A token is the basic units of internal representation. + /*! + A JSON pointer string representation "/foo/123" is parsed to two tokens: + "foo" and 123. 123 will be represented in both numeric form and string form. + They are resolved according to the actual value type (object or array). + + For token that are not numbers, or the numeric value is out of bound + (greater than limits of SizeType), they are only treated as string form + (i.e. the token's index will be equal to kPointerInvalidIndex). + + This struct is public so that user can create a Pointer without parsing and + allocation, using a special constructor. + */ + struct Token { + const Ch* name; //!< Name of the token. It has null character at the end but it can contain null character. + SizeType length; //!< Length of the name. + SizeType index; //!< A valid array index, if it is not equal to kPointerInvalidIndex. + }; + + //!@name Constructors and destructor. + //@{ + + //! Default constructor. + GenericPointer() : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {} + + //! Constructor that parses a string or URI fragment representation. + /*! + \param source A null-terminated, string or URI fragment representation of JSON pointer. + \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one. + */ + explicit GenericPointer(const Ch* source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + Parse(source, internal::StrLen(source)); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Constructor that parses a string or URI fragment representation. + /*! + \param source A string or URI fragment representation of JSON pointer. + \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one. + \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + explicit GenericPointer(const std::basic_string& source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + Parse(source.c_str(), source.size()); + } +#endif + + //! Constructor that parses a string or URI fragment representation, with length of the source string. + /*! + \param source A string or URI fragment representation of JSON pointer. + \param length Length of source. + \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one. + \note Slightly faster than the overload without length. + */ + GenericPointer(const Ch* source, size_t length, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + Parse(source, length); + } + + //! Constructor with user-supplied tokens. + /*! + This constructor let user supplies const array of tokens. + This prevents the parsing process and eliminates allocation. + This is preferred for memory constrained environments. + + \param tokens An constant array of tokens representing the JSON pointer. + \param tokenCount Number of tokens. + + \b Example + \code + #define NAME(s) { s, sizeof(s) / sizeof(s[0]) - 1, kPointerInvalidIndex } + #define INDEX(i) { #i, sizeof(#i) - 1, i } + + static const Pointer::Token kTokens[] = { NAME("foo"), INDEX(123) }; + static const Pointer p(kTokens, sizeof(kTokens) / sizeof(kTokens[0])); + // Equivalent to static const Pointer p("/foo/123"); + + #undef NAME + #undef INDEX + \endcode + */ + GenericPointer(const Token* tokens, size_t tokenCount) : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(const_cast(tokens)), tokenCount_(tokenCount), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {} + + //! Copy constructor. + GenericPointer(const GenericPointer& rhs) : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + *this = rhs; + } + + //! Destructor. + ~GenericPointer() { + if (nameBuffer_) // If user-supplied tokens constructor is used, nameBuffer_ is nullptr and tokens_ are not deallocated. + Allocator::Free(tokens_); + RAPIDJSON_DELETE(ownAllocator_); + } + + //! Assignment operator. + GenericPointer& operator=(const GenericPointer& rhs) { + if (this != &rhs) { + // Do not delete ownAllcator + if (nameBuffer_) + Allocator::Free(tokens_); + + tokenCount_ = rhs.tokenCount_; + parseErrorOffset_ = rhs.parseErrorOffset_; + parseErrorCode_ = rhs.parseErrorCode_; + + if (rhs.nameBuffer_) + CopyFromRaw(rhs); // Normally parsed tokens. + else { + tokens_ = rhs.tokens_; // User supplied const tokens. + nameBuffer_ = 0; + } + } + return *this; + } + + //@} + + //!@name Append token + //@{ + + //! Append a token and return a new Pointer + /*! + \param token Token to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const Token& token, Allocator* allocator = 0) const { + GenericPointer r; + r.allocator_ = allocator; + Ch *p = r.CopyFromRaw(*this, 1, token.length + 1); + std::memcpy(p, token.name, (token.length + 1) * sizeof(Ch)); + r.tokens_[tokenCount_].name = p; + r.tokens_[tokenCount_].length = token.length; + r.tokens_[tokenCount_].index = token.index; + return r; + } + + //! Append a name token with length, and return a new Pointer + /*! + \param name Name to be appended. + \param length Length of name. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const Ch* name, SizeType length, Allocator* allocator = 0) const { + Token token = { name, length, kPointerInvalidIndex }; + return Append(token, allocator); + } + + //! Append a name token without length, and return a new Pointer + /*! + \param name Name (const Ch*) to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr::Type, Ch> >), (GenericPointer)) + Append(T* name, Allocator* allocator = 0) const { + return Append(name, StrLen(name), allocator); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Append a name token, and return a new Pointer + /*! + \param name Name to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const std::basic_string& name, Allocator* allocator = 0) const { + return Append(name.c_str(), static_cast(name.size()), allocator); + } +#endif + + //! Append a index token, and return a new Pointer + /*! + \param index Index to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(SizeType index, Allocator* allocator = 0) const { + char buffer[21]; + SizeType length = (sizeof(SizeType) == 4 ? internal::u32toa(index, buffer): internal::u64toa(index, buffer)) - buffer; + buffer[length] = '\0'; + + if (sizeof(Ch) == 1) { + Token token = { (Ch*)buffer, length, index }; + return Append(token, allocator); + } + else { + Ch name[21]; + for (size_t i = 0; i <= length; i++) + name[i] = buffer[i]; + Token token = { name, length, index }; + return Append(token, allocator); + } + } + + //! Append a token by value, and return a new Pointer + /*! + \param value Value (either Uint or String) to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const ValueType& token, Allocator* allocator = 0) const { + if (token.IsString()) + return Append(token.GetString(), token.GetStringLength(), allocator); + else { + RAPIDJSON_ASSERT(token.IsUint64()); + RAPIDJSON_ASSERT(token.GetUint64() <= SizeType(~0)); + return Append(static_cast(token.GetUint64()), allocator); + } + } + + //!@name Handling Parse Error + //@{ + + //! Check whether this is a valid pointer. + bool IsValid() const { return parseErrorCode_ == kPointerParseErrorNone; } + + //! Get the parsing error offset in code unit. + size_t GetParseErrorOffset() const { return parseErrorOffset_; } + + //! Get the parsing error code. + PointerParseErrorCode GetParseErrorCode() const { return parseErrorCode_; } + + //@} + + //!@name Tokens + //@{ + + //! Get the token array (const version only). + const Token* GetTokens() const { return tokens_; } + + //! Get the number of tokens. + size_t GetTokenCount() const { return tokenCount_; } + + //@} + + //!@name Equality/inequality operators + //@{ + + //! Equality operator. + /*! + \note When any pointers are invalid, always returns false. + */ + bool operator==(const GenericPointer& rhs) const { + if (!IsValid() || !rhs.IsValid() || tokenCount_ != rhs.tokenCount_) + return false; + + for (size_t i = 0; i < tokenCount_; i++) { + if (tokens_[i].index != rhs.tokens_[i].index || + tokens_[i].length != rhs.tokens_[i].length || + (tokens_[i].length != 0 && std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch)* tokens_[i].length) != 0)) + { + return false; + } + } + + return true; + } + + //! Inequality operator. + /*! + \note When any pointers are invalid, always returns true. + */ + bool operator!=(const GenericPointer& rhs) const { return !(*this == rhs); } + + //@} + + //!@name Stringify + //@{ + + //! Stringify the pointer into string representation. + /*! + \tparam OutputStream Type of output stream. + \param os The output stream. + */ + template + bool Stringify(OutputStream& os) const { + return Stringify(os); + } + + //! Stringify the pointer into URI fragment representation. + /*! + \tparam OutputStream Type of output stream. + \param os The output stream. + */ + template + bool StringifyUriFragment(OutputStream& os) const { + return Stringify(os); + } + + //@} + + //!@name Create value + //@{ + + //! Create a value in a subtree. + /*! + If the value is not exist, it creates all parent values and a JSON Null value. + So it always succeed and return the newly created or existing value. + + Remind that it may change types of parents according to tokens, so it + potentially removes previously stored values. For example, if a document + was an array, and "/foo" is used to create a value, then the document + will be changed to an object, and all existing array elements are lost. + + \param root Root value of a DOM subtree to be resolved. It can be any value other than document root. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \param alreadyExist If non-null, it stores whether the resolved value is already exist. + \return The resolved newly created (a JSON Null value), or already exists value. + */ + ValueType& Create(ValueType& root, typename ValueType::AllocatorType& allocator, bool* alreadyExist = 0) const { + RAPIDJSON_ASSERT(IsValid()); + ValueType* v = &root; + bool exist = true; + for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + if (v->IsArray() && t->name[0] == '-' && t->length == 1) { + v->PushBack(Value().Move(), allocator); + v = &((*v)[v->Size() - 1]); + exist = false; + } + else { + if (t->index == kPointerInvalidIndex) { // must be object name + if (!v->IsObject()) + v->SetObject(); // Change to Object + } + else { // object name or array index + if (!v->IsArray() && !v->IsObject()) + v->SetArray(); // Change to Array + } + + if (v->IsArray()) { + if (t->index >= v->Size()) { + v->Reserve(t->index + 1, allocator); + while (t->index >= v->Size()) + v->PushBack(Value().Move(), allocator); + exist = false; + } + v = &((*v)[t->index]); + } + else { + typename ValueType::MemberIterator m = v->FindMember(GenericStringRef(t->name, t->length)); + if (m == v->MemberEnd()) { + v->AddMember(Value(t->name, t->length, allocator).Move(), Value().Move(), allocator); + v = &(--v->MemberEnd())->value; // Assumes AddMember() appends at the end + exist = false; + } + else + v = &m->value; + } + } + } + + if (alreadyExist) + *alreadyExist = exist; + + return *v; + } + + //! Creates a value in a document. + /*! + \param document A document to be resolved. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \param alreadyExist If non-null, it stores whether the resolved value is already exist. + \return The resolved newly created, or already exists value. + */ + template + ValueType& Create(GenericDocument& document, bool* alreadyExist = 0) const { + return Create(document, document.GetAllocator(), alreadyExist); + } + + //@} + + //!@name Query value + //@{ + + //! Query a value in a subtree. + /*! + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \return Pointer to the value if it can be resolved. Otherwise null. + */ + ValueType* Get(ValueType& root) const { + RAPIDJSON_ASSERT(IsValid()); + ValueType* v = &root; + for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + switch (v->GetType()) { + case kObjectType: + { + typename ValueType::MemberIterator m = v->FindMember(GenericStringRef(t->name, t->length)); + if (m == v->MemberEnd()) + return 0; + v = &m->value; + } + break; + case kArrayType: + if (t->index == kPointerInvalidIndex || t->index >= v->Size()) + return 0; + v = &((*v)[t->index]); + break; + default: + return 0; + } + } + return v; + } + + //! Query a const value in a const subtree. + /*! + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \return Pointer to the value if it can be resolved. Otherwise null. + */ + const ValueType* Get(const ValueType& root) const { return Get(const_cast(root)); } + + //@} + + //!@name Query a value with default + //@{ + + //! Query a value in a subtree with default value. + /*! + Similar to Get(), but if the specified value do not exists, it creates all parents and clone the default value. + So that this function always succeed. + + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param defaultValue Default value to be cloned if the value was not exists. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \see Create() + */ + ValueType& GetWithDefault(ValueType& root, const ValueType& defaultValue, typename ValueType::AllocatorType& allocator) const { + bool alreadyExist; + Value& v = Create(root, allocator, &alreadyExist); + return alreadyExist ? v : v.CopyFrom(defaultValue, allocator); + } + + //! Query a value in a subtree with default null-terminated string. + ValueType& GetWithDefault(ValueType& root, const Ch* defaultValue, typename ValueType::AllocatorType& allocator) const { + bool alreadyExist; + Value& v = Create(root, allocator, &alreadyExist); + return alreadyExist ? v : v.SetString(defaultValue, allocator); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Query a value in a subtree with default std::basic_string. + ValueType& GetWithDefault(ValueType& root, const std::basic_string& defaultValue, typename ValueType::AllocatorType& allocator) const { + bool alreadyExist; + Value& v = Create(root, allocator, &alreadyExist); + return alreadyExist ? v : v.SetString(defaultValue, allocator); + } +#endif + + //! Query a value in a subtree with default primitive value. + /*! + \tparam T \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (ValueType&)) + GetWithDefault(ValueType& root, T defaultValue, typename ValueType::AllocatorType& allocator) const { + return GetWithDefault(root, ValueType(defaultValue).Move(), allocator); + } + + //! Query a value in a document with default value. + template + ValueType& GetWithDefault(GenericDocument& document, const ValueType& defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } + + //! Query a value in a document with default null-terminated string. + template + ValueType& GetWithDefault(GenericDocument& document, const Ch* defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Query a value in a document with default std::basic_string. + template + ValueType& GetWithDefault(GenericDocument& document, const std::basic_string& defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } +#endif + + //! Query a value in a document with default primitive value. + /*! + \tparam T \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (ValueType&)) + GetWithDefault(GenericDocument& document, T defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } + + //@} + + //!@name Set a value + //@{ + + //! Set a value in a subtree, with move semantics. + /*! + It creates all parents if they are not exist or types are different to the tokens. + So this function always succeeds but potentially remove existing values. + + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param value Value to be set. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \see Create() + */ + ValueType& Set(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = value; + } + + //! Set a value in a subtree, with copy semantics. + ValueType& Set(ValueType& root, const ValueType& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator).CopyFrom(value, allocator); + } + + //! Set a null-terminated string in a subtree. + ValueType& Set(ValueType& root, const Ch* value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = ValueType(value, allocator).Move(); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Set a std::basic_string in a subtree. + ValueType& Set(ValueType& root, const std::basic_string& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = ValueType(value, allocator).Move(); + } +#endif + + //! Set a primitive value in a subtree. + /*! + \tparam T \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (ValueType&)) + Set(ValueType& root, T value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = ValueType(value).Move(); + } + + //! Set a value in a document, with move semantics. + template + ValueType& Set(GenericDocument& document, ValueType& value) const { + return Create(document) = value; + } + + //! Set a value in a document, with copy semantics. + template + ValueType& Set(GenericDocument& document, const ValueType& value) const { + return Create(document).CopyFrom(value, document.GetAllocator()); + } + + //! Set a null-terminated string in a document. + template + ValueType& Set(GenericDocument& document, const Ch* value) const { + return Create(document) = ValueType(value, document.GetAllocator()).Move(); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Sets a std::basic_string in a document. + template + ValueType& Set(GenericDocument& document, const std::basic_string& value) const { + return Create(document) = ValueType(value, document.GetAllocator()).Move(); + } +#endif + + //! Set a primitive value in a document. + /*! + \tparam T \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (ValueType&)) + Set(GenericDocument& document, T value) const { + return Create(document) = value; + } + + //@} + + //!@name Swap a value + //@{ + + //! Swap a value with a value in a subtree. + /*! + It creates all parents if they are not exist or types are different to the tokens. + So this function always succeeds but potentially remove existing values. + + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param value Value to be swapped. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \see Create() + */ + ValueType& Swap(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator).Swap(value); + } + + //! Swap a value with a value in a document. + template + ValueType& Swap(GenericDocument& document, ValueType& value) const { + return Create(document).Swap(value); + } + + //@} + + //! Erase a value in a subtree. + /*! + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \return Whether the resolved value is found and erased. + + \note Erasing with an empty pointer \c Pointer(""), i.e. the root, always fail and return false. + */ + bool Erase(ValueType& root) const { + RAPIDJSON_ASSERT(IsValid()); + if (tokenCount_ == 0) // Cannot erase the root + return false; + + ValueType* v = &root; + const Token* last = tokens_ + (tokenCount_ - 1); + for (const Token *t = tokens_; t != last; ++t) { + switch (v->GetType()) { + case kObjectType: + { + typename ValueType::MemberIterator m = v->FindMember(GenericStringRef(t->name, t->length)); + if (m == v->MemberEnd()) + return false; + v = &m->value; + } + break; + case kArrayType: + if (t->index == kPointerInvalidIndex || t->index >= v->Size()) + return false; + v = &((*v)[t->index]); + break; + default: + return false; + } + } + + switch (v->GetType()) { + case kObjectType: + return v->EraseMember(GenericStringRef(last->name, last->length)); + case kArrayType: + if (last->index == kPointerInvalidIndex || last->index >= v->Size()) + return false; + v->Erase(v->Begin() + last->index); + return true; + default: + return false; + } + } + +private: + //! Clone the content from rhs to this. + /*! + \param rhs Source pointer. + \param extraToken Extra tokens to be allocated. + \param extraNameBufferSize Extra name buffer size (in number of Ch) to be allocated. + \return Start of non-occupied name buffer, for storing extra names. + */ + Ch* CopyFromRaw(const GenericPointer& rhs, size_t extraToken = 0, size_t extraNameBufferSize = 0) { + if (!allocator_) // allocator is independently owned. + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + + size_t nameBufferSize = rhs.tokenCount_; // null terminators for tokens + for (Token *t = rhs.tokens_; t != rhs.tokens_ + rhs.tokenCount_; ++t) + nameBufferSize += t->length; + + tokenCount_ = rhs.tokenCount_ + extraToken; + tokens_ = static_cast(allocator_->Malloc(tokenCount_ * sizeof(Token) + (nameBufferSize + extraNameBufferSize) * sizeof(Ch))); + nameBuffer_ = reinterpret_cast(tokens_ + tokenCount_); + std::memcpy(tokens_, rhs.tokens_, rhs.tokenCount_ * sizeof(Token)); + std::memcpy(nameBuffer_, rhs.nameBuffer_, nameBufferSize * sizeof(Ch)); + + // Adjust pointers to name buffer + std::ptrdiff_t diff = nameBuffer_ - rhs.nameBuffer_; + for (Token *t = tokens_; t != tokens_ + rhs.tokenCount_; ++t) + t->name += diff; + + return nameBuffer_ + nameBufferSize; + } + + //! Check whether a character should be percent-encoded. + /*! + According to RFC 3986 2.3 Unreserved Characters. + \param c The character (code unit) to be tested. + */ + bool NeedPercentEncode(Ch c) const { + return !((c >= '0' && c <= '9') || (c >= 'A' && c <='Z') || (c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '_' || c =='~'); + } + + //! Parse a JSON String or its URI fragment representation into tokens. + /*! + \param source Either a JSON Pointer string, or its URI fragment representation. Not need to be null terminated. + \param length Length of the source string. + \note Source cannot be JSON String Representation of JSON Pointer, e.g. In "/\u0000", \u0000 will not be unescaped. + */ + void Parse(const Ch* source, size_t length) { + RAPIDJSON_ASSERT(source != NULL); + RAPIDJSON_ASSERT(nameBuffer_ == 0); + RAPIDJSON_ASSERT(tokens_ == 0); + + // Create own allocator if user did not supply. + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + + // Count number of '/' as tokenCount + tokenCount_ = 0; + for (const Ch* s = source; s != source + length; s++) + if (*s == '/') + tokenCount_++; + + Token* token = tokens_ = static_cast(allocator_->Malloc(tokenCount_ * sizeof(Token) + length * sizeof(Ch))); + Ch* name = nameBuffer_ = reinterpret_cast(tokens_ + tokenCount_); + size_t i = 0; + + // Detect if it is a URI fragment + bool uriFragment = false; + if (source[i] == '#') { + uriFragment = true; + i++; + } + + if (i != length && source[i] != '/') { + parseErrorCode_ = kPointerParseErrorTokenMustBeginWithSolidus; + goto error; + } + + while (i < length) { + RAPIDJSON_ASSERT(source[i] == '/'); + i++; // consumes '/' + + token->name = name; + bool isNumber = true; + + while (i < length && source[i] != '/') { + Ch c = source[i]; + if (uriFragment) { + // Decoding percent-encoding for URI fragment + if (c == '%') { + PercentDecodeStream is(&source[i], source + length); + GenericInsituStringStream os(name); + Ch* begin = os.PutBegin(); + if (!Transcoder, EncodingType>().Validate(is, os) || !is.IsValid()) { + parseErrorCode_ = kPointerParseErrorInvalidPercentEncoding; + goto error; + } + size_t len = os.PutEnd(begin); + i += is.Tell() - 1; + if (len == 1) + c = *name; + else { + name += len; + isNumber = false; + i++; + continue; + } + } + else if (NeedPercentEncode(c)) { + parseErrorCode_ = kPointerParseErrorCharacterMustPercentEncode; + goto error; + } + } + + i++; + + // Escaping "~0" -> '~', "~1" -> '/' + if (c == '~') { + if (i < length) { + c = source[i]; + if (c == '0') c = '~'; + else if (c == '1') c = '/'; + else { + parseErrorCode_ = kPointerParseErrorInvalidEscape; + goto error; + } + i++; + } + else { + parseErrorCode_ = kPointerParseErrorInvalidEscape; + goto error; + } + } + + // First check for index: all of characters are digit + if (c < '0' || c > '9') + isNumber = false; + + *name++ = c; + } + token->length = name - token->name; + if (token->length == 0) + isNumber = false; + *name++ = '\0'; // Null terminator + + // Second check for index: more than one digit cannot have leading zero + if (isNumber && token->length > 1 && token->name[0] == '0') + isNumber = false; + + // String to SizeType conversion + SizeType n = 0; + if (isNumber) { + for (size_t j = 0; j < token->length; j++) { + SizeType m = n * 10 + static_cast(token->name[j] - '0'); + if (m < n) { // overflow detection + isNumber = false; + break; + } + n = m; + } + } + + token->index = isNumber ? n : kPointerInvalidIndex; + token++; + } + + RAPIDJSON_ASSERT(name <= nameBuffer_ + length); // Should not overflow buffer + parseErrorCode_ = kPointerParseErrorNone; + return; + + error: + Allocator::Free(tokens_); + nameBuffer_ = 0; + tokens_ = 0; + tokenCount_ = 0; + parseErrorOffset_ = i; + return; + } + + //! Stringify to string or URI fragment representation. + /*! + \tparam uriFragment True for stringifying to URI fragment representation. False for string representation. + \tparam OutputStream type of output stream. + \param os The output stream. + */ + template + bool Stringify(OutputStream& os) const { + RAPIDJSON_ASSERT(IsValid()); + + if (uriFragment) + os.Put('#'); + + for (Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + os.Put('/'); + for (size_t j = 0; j < t->length; j++) { + Ch c = t->name[j]; + if (c == '~') { + os.Put('~'); + os.Put('0'); + } + else if (c == '/') { + os.Put('~'); + os.Put('1'); + } + else if (uriFragment && NeedPercentEncode(c)) { + // Transcode to UTF8 sequence + GenericStringStream source(&t->name[j]); + PercentEncodeStream target(os); + if (!Transcoder >().Validate(source, target)) + return false; + j += source.Tell() - 1; + } + else + os.Put(c); + } + } + return true; + } + + //! A helper stream for decoding a percent-encoded sequence into code unit. + /*! + This stream decodes %XY triplet into code unit (0-255). + If it encounters invalid characters, it sets output code unit as 0 and + mark invalid, and to be checked by IsValid(). + */ + class PercentDecodeStream { + public: + //! Constructor + /*! + \param source Start of the stream + \param end Past-the-end of the stream. + */ + PercentDecodeStream(const Ch* source, const Ch* end) : src_(source), head_(source), end_(end), valid_(true) {} + + Ch Take() { + if (*src_ != '%' || src_ + 3 > end_) { // %XY triplet + valid_ = false; + return 0; + } + src_++; + Ch c = 0; + for (int j = 0; j < 2; j++) { + c <<= 4; + Ch h = *src_; + if (h >= '0' && h <= '9') c += h - '0'; + else if (h >= 'A' && h <= 'F') c += h - 'A' + 10; + else if (h >= 'a' && h <= 'f') c += h - 'a' + 10; + else { + valid_ = false; + return 0; + } + src_++; + } + return c; + } + + size_t Tell() const { return src_ - head_; } + bool IsValid() const { return valid_; } + + private: + const Ch* src_; //!< Current read position. + const Ch* head_; //!< Original head of the string. + const Ch* end_; //!< Past-the-end position. + bool valid_; //!< Whether the parsing is valid. + }; + + //! A helper stream to encode character (UTF-8 code unit) into percent-encoded sequence. + template + class PercentEncodeStream { + public: + PercentEncodeStream(OutputStream& os) : os_(os) {} + void Put(char c) { // UTF-8 must be byte + unsigned char u = static_cast(c); + static const char hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + os_.Put('%'); + os_.Put(hexDigits[u >> 4]); + os_.Put(hexDigits[u & 15]); + } + private: + OutputStream& os_; + }; + + Allocator* allocator_; //!< The current allocator. It is either user-supplied or equal to ownAllocator_. + Allocator* ownAllocator_; //!< Allocator owned by this Pointer. + Ch* nameBuffer_; //!< A buffer containing all names in tokens. + Token* tokens_; //!< A list of tokens. + size_t tokenCount_; //!< Number of tokens in tokens_. + size_t parseErrorOffset_; //!< Offset in code unit when parsing fail. + PointerParseErrorCode parseErrorCode_; //!< Parsing error code. +}; + +//! GenericPointer for Value (UTF-8, default allocator). +typedef GenericPointer Pointer; + +//!@name Helper functions for GenericPointer +//@{ + +////////////////////////////////////////////////////////////////////////////// + +template +typename T::ValueType& CreateValueByPointer(T& root, const GenericPointer& pointer, typename T::AllocatorType& a) { + return pointer.Create(root, a); +} + +template +typename T::ValueType& CreateValueByPointer(T& root, const CharType(&source)[N], typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Create(root, a); +} + +// No allocator parameter + +template +typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const GenericPointer& pointer) { + return pointer.Create(document); +} + +template +typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const CharType(&source)[N]) { + return GenericPointer(source, N - 1).Create(document); +} + +////////////////////////////////////////////////////////////////////////////// + +template +typename T::ValueType* GetValueByPointer(T& root, const GenericPointer& pointer) { + return pointer.Get(root); +} + +template +const typename T::ValueType* GetValueByPointer(const T& root, const GenericPointer& pointer) { + return pointer.Get(root); +} + +template +typename T::ValueType* GetValueByPointer(T& root, const CharType (&source)[N]) { + return GenericPointer(source, N - 1).Get(root); +} + +template +const typename T::ValueType* GetValueByPointer(const T& root, const CharType(&source)[N]) { + return GenericPointer(source, N - 1).Get(root); +} + +////////////////////////////////////////////////////////////////////////////// + +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer& pointer, const typename T::ValueType& defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} + +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer& pointer, const typename T::Ch* defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer& pointer, const std::basic_string& defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename T::ValueType&)) +GetValueByPointerWithDefault(T& root, const GenericPointer& pointer, T2 defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} + +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::ValueType& defaultValue, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).GetWithDefault(root, defaultValue, a); +} + +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::Ch* defaultValue, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).GetWithDefault(root, defaultValue, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const std::basic_string& defaultValue, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).GetWithDefault(root, defaultValue, a); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename T::ValueType&)) +GetValueByPointerWithDefault(T& root, const CharType(&source)[N], T2 defaultValue, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).GetWithDefault(root, defaultValue, a); +} + +// No allocator parameter + +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer& pointer, const typename DocumentType::ValueType& defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} + +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer& pointer, const typename DocumentType::Ch* defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer& pointer, const std::basic_string& defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename DocumentType::ValueType&)) +GetValueByPointerWithDefault(DocumentType& document, const GenericPointer& pointer, T2 defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} + +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& defaultValue) { + return GenericPointer(source, N - 1).GetWithDefault(document, defaultValue); +} + +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* defaultValue) { + return GenericPointer(source, N - 1).GetWithDefault(document, defaultValue); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const std::basic_string& defaultValue) { + return GenericPointer(source, N - 1).GetWithDefault(document, defaultValue); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename DocumentType::ValueType&)) +GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], T2 defaultValue) { + return GenericPointer(source, N - 1).GetWithDefault(document, defaultValue); +} + +////////////////////////////////////////////////////////////////////////////// + +template +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer& pointer, typename T::ValueType& value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +template +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer& pointer, const typename T::ValueType& value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +template +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer& pointer, const typename T::Ch* value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer& pointer, const std::basic_string& value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename T::ValueType&)) +SetValueByPointer(T& root, const GenericPointer& pointer, T2 value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +template +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Set(root, value, a); +} + +template +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::ValueType& value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Set(root, value, a); +} + +template +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::Ch* value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Set(root, value, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const std::basic_string& value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Set(root, value, a); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename T::ValueType&)) +SetValueByPointer(T& root, const CharType(&source)[N], T2 value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Set(root, value, a); +} + +// No allocator parameter + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer& pointer, typename DocumentType::ValueType& value) { + return pointer.Set(document, value); +} + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer& pointer, const typename DocumentType::ValueType& value) { + return pointer.Set(document, value); +} + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer& pointer, const typename DocumentType::Ch* value) { + return pointer.Set(document, value); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer& pointer, const std::basic_string& value) { + return pointer.Set(document, value); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename DocumentType::ValueType&)) +SetValueByPointer(DocumentType& document, const GenericPointer& pointer, T2 value) { + return pointer.Set(document, value); +} + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) { + return GenericPointer(source, N - 1).Set(document, value); +} + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& value) { + return GenericPointer(source, N - 1).Set(document, value); +} + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* value) { + return GenericPointer(source, N - 1).Set(document, value); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const std::basic_string& value) { + return GenericPointer(source, N - 1).Set(document, value); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename DocumentType::ValueType&)) +SetValueByPointer(DocumentType& document, const CharType(&source)[N], T2 value) { + return GenericPointer(source, N - 1).Set(document, value); +} + +////////////////////////////////////////////////////////////////////////////// + +template +typename T::ValueType& SwapValueByPointer(T& root, const GenericPointer& pointer, typename T::ValueType& value, typename T::AllocatorType& a) { + return pointer.Swap(root, value, a); +} + +template +typename T::ValueType& SwapValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Swap(root, value, a); +} + +template +typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const GenericPointer& pointer, typename DocumentType::ValueType& value) { + return pointer.Swap(document, value); +} + +template +typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) { + return GenericPointer(source, N - 1).Swap(document, value); +} + +////////////////////////////////////////////////////////////////////////////// + +template +bool EraseValueByPointer(T& root, const GenericPointer& pointer) { + return pointer.Erase(root); +} + +template +bool EraseValueByPointer(T& root, const CharType(&source)[N]) { + return GenericPointer(source, N - 1).Erase(root); +} + +//@} + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_POINTER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/prettywriter.h b/c++/include/misc/jsonwrapp/rapidjson10/prettywriter.h new file mode 100644 index 00000000..af614121 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/prettywriter.h @@ -0,0 +1,222 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_PRETTYWRITER_H_ +#define RAPIDJSON_PRETTYWRITER_H_ + +#include "writer.h" + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Writer with indentation and spacing. +/*! + \tparam OutputStream Type of ouptut os. + \tparam SourceEncoding Encoding of source string. + \tparam TargetEncoding Encoding of output stream. + \tparam StackAllocator Type of allocator for allocating memory of stack. +*/ +template, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator> +class PrettyWriter : public Writer { +public: + typedef Writer Base; + typedef typename Base::Ch Ch; + + //! Constructor + /*! \param os Output stream. + \param allocator User supplied allocator. If it is null, it will create a private one. + \param levelDepth Initial capacity of stack. + */ + PrettyWriter(OutputStream& os, StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : + Base(os, allocator, levelDepth), indentChar_(' '), indentCharCount_(4), eol_(true) {} + + //! Set custom indentation. + /*! \param indentChar Character for indentation. Must be whitespace character (' ', '\\t', '\\n', '\\r'). + \param indentCharCount Number of indent characters for each indentation level. + \note The default indentation is 4 spaces. + */ + PrettyWriter& SetIndent(Ch indentChar, unsigned indentCharCount) { + RAPIDJSON_ASSERT(indentChar == ' ' || indentChar == '\t' || indentChar == '\n' || indentChar == '\r'); + indentChar_ = indentChar; + indentCharCount_ = indentCharCount; + return *this; + } + +// NCBI - added + PrettyWriter& SetWriteEol(bool eol) { + eol_ = eol; + return *this; + } + + /*! @name Implementation of Handler + \see Handler + */ + //@{ + + bool Null() { PrettyPrefix(kNullType); return Base::WriteNull(); } + bool Bool(bool b) { PrettyPrefix(b ? kTrueType : kFalseType); return Base::WriteBool(b); } + bool Int(int i) { PrettyPrefix(kNumberType); return Base::WriteInt(i); } + bool Uint(unsigned u) { PrettyPrefix(kNumberType); return Base::WriteUint(u); } + bool Int64(int64_t i64) { PrettyPrefix(kNumberType); return Base::WriteInt64(i64); } + bool Uint64(uint64_t u64) { PrettyPrefix(kNumberType); return Base::WriteUint64(u64); } + bool Double(double d) { PrettyPrefix(kNumberType); return Base::WriteDouble(d); } + + bool String(const Ch* str, SizeType length, bool copy = false) { + (void)copy; + PrettyPrefix(kStringType); + return Base::WriteString(str, length); + } + +#if RAPIDJSON_HAS_STDSTRING + bool String(const std::basic_string& str) { + return String(str.data(), SizeType(str.size())); + } +#endif + + bool StartObject() { + PrettyPrefix(kObjectType); + new (Base::level_stack_.template Push()) typename Base::Level(false); + return Base::WriteStartObject(); + } + + bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); } + + bool EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); + RAPIDJSON_ASSERT(!Base::level_stack_.template Top()->inArray); + bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; + + if (!empty) { + WriteEol(); + WriteIndent(); + } + bool ret = Base::WriteEndObject(); + (void)ret; + RAPIDJSON_ASSERT(ret == true); + if (Base::level_stack_.Empty()) // end of json text + Base::os_->Flush(); + return true; + } + + bool StartArray() { + PrettyPrefix(kArrayType); + new (Base::level_stack_.template Push()) typename Base::Level(true); + return Base::WriteStartArray(); + } + + bool EndArray(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); + RAPIDJSON_ASSERT(Base::level_stack_.template Top()->inArray); + bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; + + if (!empty) { + WriteEol(); + WriteIndent(); + } + bool ret = Base::WriteEndArray(); + (void)ret; + RAPIDJSON_ASSERT(ret == true); + if (Base::level_stack_.Empty()) // end of json text + Base::os_->Flush(); + return true; + } + + //@} + + /*! @name Convenience extensions */ + //@{ + + //! Simpler but slower overload. + bool String(const Ch* str) { return String(str, internal::StrLen(str)); } + bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); } + + //@} +protected: + void PrettyPrefix(Type type) { + (void)type; + if (Base::level_stack_.GetSize() != 0) { // this value is not at root + typename Base::Level* level = Base::level_stack_.template Top(); + + if (level->inArray) { + if (level->valueCount > 0) { + Base::os_->Put(','); // add comma if it is not the first element in array + WriteEol(); + } + else + WriteEol(); + WriteIndent(); + } + else { // in object + if (level->valueCount > 0) { + if (level->valueCount % 2 == 0) { + Base::os_->Put(','); + WriteEol(); + } + else { + Base::os_->Put(':'); + Base::os_->Put(' '); + } + } + else + WriteEol(); + + if (level->valueCount % 2 == 0) + WriteIndent(); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else { + RAPIDJSON_ASSERT(!Base::hasRoot_); // Should only has one and only one root. + Base::hasRoot_ = true; + } + } + + void WriteIndent() { + size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_; + PutN(*Base::os_, indentChar_, count); + } + +// NCBI +// added WriteEol + void WriteEol() { + if (eol_) { + Base::os_->Put('\n'); + } + } + + Ch indentChar_; + unsigned indentCharCount_; + bool eol_; + +private: + // Prohibit copy constructor & assignment operator. + PrettyWriter(const PrettyWriter&); + PrettyWriter& operator=(const PrettyWriter&); +}; + +RAPIDJSON_NAMESPACE_END + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/rapidjson.h b/c++/include/misc/jsonwrapp/rapidjson10/rapidjson.h new file mode 100644 index 00000000..84901023 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/rapidjson.h @@ -0,0 +1,659 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_RAPIDJSON_H_ +#define RAPIDJSON_RAPIDJSON_H_ + +/*!\file rapidjson.h + \brief common definitions and configuration + + \see RAPIDJSON_CONFIG + */ + +/*! \defgroup RAPIDJSON_CONFIG RapidJSON configuration + \brief Configuration macros for library features + + Some RapidJSON features are configurable to adapt the library to a wide + variety of platforms, environments and usage scenarios. Most of the + features can be configured in terms of overriden or predefined + preprocessor macros at compile-time. + + Some additional customization is available in the \ref RAPIDJSON_ERRORS APIs. + + \note These macros should be given on the compiler command-line + (where applicable) to avoid inconsistent values when compiling + different translation units of a single application. + */ + +#include // malloc(), realloc(), free(), size_t +#include // memset(), memcpy(), memmove(), memcmp() + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_VERSION_STRING +// +// ALWAYS synchronize the following 3 macros with corresponding variables in /CMakeLists.txt. +// + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +// token stringification +#define RAPIDJSON_STRINGIFY(x) RAPIDJSON_DO_STRINGIFY(x) +#define RAPIDJSON_DO_STRINGIFY(x) #x +//!@endcond + +/*! \def RAPIDJSON_MAJOR_VERSION + \ingroup RAPIDJSON_CONFIG + \brief Major version of RapidJSON in integer. +*/ +/*! \def RAPIDJSON_MINOR_VERSION + \ingroup RAPIDJSON_CONFIG + \brief Minor version of RapidJSON in integer. +*/ +/*! \def RAPIDJSON_PATCH_VERSION + \ingroup RAPIDJSON_CONFIG + \brief Patch version of RapidJSON in integer. +*/ +/*! \def RAPIDJSON_VERSION_STRING + \ingroup RAPIDJSON_CONFIG + \brief Version of RapidJSON in ".." string format. +*/ +#define RAPIDJSON_MAJOR_VERSION 1 +#define RAPIDJSON_MINOR_VERSION 0 +#define RAPIDJSON_PATCH_VERSION 2 +#define RAPIDJSON_VERSION_STRING \ + RAPIDJSON_STRINGIFY(RAPIDJSON_MAJOR_VERSION.RAPIDJSON_MINOR_VERSION.RAPIDJSON_PATCH_VERSION) + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NAMESPACE_(BEGIN|END) +/*! \def RAPIDJSON_NAMESPACE + \ingroup RAPIDJSON_CONFIG + \brief provide custom rapidjson namespace + + In order to avoid symbol clashes and/or "One Definition Rule" errors + between multiple inclusions of (different versions of) RapidJSON in + a single binary, users can customize the name of the main RapidJSON + namespace. + + In case of a single nesting level, defining \c RAPIDJSON_NAMESPACE + to a custom name (e.g. \c MyRapidJSON) is sufficient. If multiple + levels are needed, both \ref RAPIDJSON_NAMESPACE_BEGIN and \ref + RAPIDJSON_NAMESPACE_END need to be defined as well: + + \code + // in some .cpp file + #define RAPIDJSON_NAMESPACE my::rapidjson + #define RAPIDJSON_NAMESPACE_BEGIN namespace my { namespace rapidjson { + #define RAPIDJSON_NAMESPACE_END } } + #include "rapidjson/..." + \endcode + + \see rapidjson + */ +/*! \def RAPIDJSON_NAMESPACE_BEGIN + \ingroup RAPIDJSON_CONFIG + \brief provide custom rapidjson namespace (opening expression) + \see RAPIDJSON_NAMESPACE +*/ +/*! \def RAPIDJSON_NAMESPACE_END + \ingroup RAPIDJSON_CONFIG + \brief provide custom rapidjson namespace (closing expression) + \see RAPIDJSON_NAMESPACE +*/ +#ifndef RAPIDJSON_NAMESPACE +#define RAPIDJSON_NAMESPACE rapidjson +#endif +#ifndef RAPIDJSON_NAMESPACE_BEGIN +#define RAPIDJSON_NAMESPACE_BEGIN namespace RAPIDJSON_NAMESPACE { +#endif +#ifndef RAPIDJSON_NAMESPACE_END +#define RAPIDJSON_NAMESPACE_END } +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NO_INT64DEFINE + +/*! \def RAPIDJSON_NO_INT64DEFINE + \ingroup RAPIDJSON_CONFIG + \brief Use external 64-bit integer types. + + RapidJSON requires the 64-bit integer types \c int64_t and \c uint64_t types + to be available at global scope. + + If users have their own definition, define RAPIDJSON_NO_INT64DEFINE to + prevent RapidJSON from defining its own types. +*/ +#ifndef RAPIDJSON_NO_INT64DEFINE +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#ifdef _MSC_VER +#include "msinttypes/stdint.h" +#include "msinttypes/inttypes.h" +#else +// Other compilers should have this. +#include +#include +#endif +//!@endcond +#ifdef RAPIDJSON_DOXYGEN_RUNNING +#define RAPIDJSON_NO_INT64DEFINE +#endif +#endif // RAPIDJSON_NO_INT64TYPEDEF + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_FORCEINLINE + +#ifndef RAPIDJSON_FORCEINLINE +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#if defined(_MSC_VER) && !defined(NDEBUG) +#define RAPIDJSON_FORCEINLINE __forceinline +#elif defined(__GNUC__) && __GNUC__ >= 4 && !defined(NDEBUG) +#define RAPIDJSON_FORCEINLINE __attribute__((always_inline)) +#else +#define RAPIDJSON_FORCEINLINE +#endif +//!@endcond +#endif // RAPIDJSON_FORCEINLINE + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ENDIAN +#define RAPIDJSON_LITTLEENDIAN 0 //!< Little endian machine +#define RAPIDJSON_BIGENDIAN 1 //!< Big endian machine + +//! Endianness of the machine. +/*! + \def RAPIDJSON_ENDIAN + \ingroup RAPIDJSON_CONFIG + + GCC 4.6 provided macro for detecting endianness of the target machine. But other + compilers may not have this. User can define RAPIDJSON_ENDIAN to either + \ref RAPIDJSON_LITTLEENDIAN or \ref RAPIDJSON_BIGENDIAN. + + Default detection implemented with reference to + \li https://gcc.gnu.org/onlinedocs/gcc-4.6.0/cpp/Common-Predefined-Macros.html + \li http://www.boost.org/doc/libs/1_42_0/boost/detail/endian.hpp +*/ +#ifndef RAPIDJSON_ENDIAN +// Detect with GCC 4.6's macro +# ifdef __BYTE_ORDER__ +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif // __BYTE_ORDER__ +// Detect with GLIBC's endian.h +# elif defined(__GLIBC__) +# include +# if (__BYTE_ORDER == __LITTLE_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif (__BYTE_ORDER == __BIG_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif // __GLIBC__ +// Detect with _LITTLE_ENDIAN and _BIG_ENDIAN macro +# elif defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +// Detect with architecture macros +# elif defined(__sparc) || defined(__sparc__) || defined(_POWER) || defined(__powerpc__) || defined(__ppc__) || defined(__hpux) || defined(__hppa) || defined(_MIPSEB) || defined(_POWER) || defined(__s390__) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# elif defined(__i386__) || defined(__alpha__) || defined(__ia64) || defined(__ia64__) || defined(_M_IX86) || defined(_M_IA64) || defined(_M_ALPHA) || defined(__amd64) || defined(__amd64__) || defined(_M_AMD64) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(__bfin__) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif defined(RAPIDJSON_DOXYGEN_RUNNING) +# define RAPIDJSON_ENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif +#endif // RAPIDJSON_ENDIAN + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_64BIT + +//! Whether using 64-bit architecture +#ifndef RAPIDJSON_64BIT +#if defined(__LP64__) || defined(_WIN64) +#define RAPIDJSON_64BIT 1 +#else +#define RAPIDJSON_64BIT 0 +#endif +#endif // RAPIDJSON_64BIT + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ALIGN + +//! Data alignment of the machine. +/*! \ingroup RAPIDJSON_CONFIG + \param x pointer to align + + Some machines require strict data alignment. Currently the default uses 4 bytes + alignment. User can customize by defining the RAPIDJSON_ALIGN function macro., +*/ +#ifndef RAPIDJSON_ALIGN +#if RAPIDJSON_64BIT == 1 +#define RAPIDJSON_ALIGN(x) ((x + 7u) & ~7u) +#else +#define RAPIDJSON_ALIGN(x) ((x + 3u) & ~3u) +#endif +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_UINT64_C2 + +//! Construct a 64-bit literal by a pair of 32-bit integer. +/*! + 64-bit literal with or without ULL suffix is prone to compiler warnings. + UINT64_C() is C macro which cause compilation problems. + Use this macro to define 64-bit constants by a pair of 32-bit integer. +*/ +#ifndef RAPIDJSON_UINT64_C2 +#define RAPIDJSON_UINT64_C2(high32, low32) ((static_cast(high32) << 32) | static_cast(low32)) +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_SIMD + +/*! \def RAPIDJSON_SIMD + \ingroup RAPIDJSON_CONFIG + \brief Enable SSE2/SSE4.2 optimization. + + RapidJSON supports optimized implementations for some parsing operations + based on the SSE2 or SSE4.2 SIMD extensions on modern Intel-compatible + processors. + + To enable these optimizations, two different symbols can be defined; + \code + // Enable SSE2 optimization. + #define RAPIDJSON_SSE2 + + // Enable SSE4.2 optimization. + #define RAPIDJSON_SSE42 + \endcode + + \c RAPIDJSON_SSE42 takes precedence, if both are defined. + + If any of these symbols is defined, RapidJSON defines the macro + \c RAPIDJSON_SIMD to indicate the availability of the optimized code. +*/ +#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) \ + || defined(RAPIDJSON_DOXYGEN_RUNNING) +#define RAPIDJSON_SIMD +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NO_SIZETYPEDEFINE + +#ifndef RAPIDJSON_NO_SIZETYPEDEFINE +/*! \def RAPIDJSON_NO_SIZETYPEDEFINE + \ingroup RAPIDJSON_CONFIG + \brief User-provided \c SizeType definition. + + In order to avoid using 32-bit size types for indexing strings and arrays, + define this preprocessor symbol and provide the type rapidjson::SizeType + before including RapidJSON: + \code + #define RAPIDJSON_NO_SIZETYPEDEFINE + namespace rapidjson { typedef ::std::size_t SizeType; } + #include "rapidjson/..." + \endcode + + \see rapidjson::SizeType +*/ +#ifdef RAPIDJSON_DOXYGEN_RUNNING +#define RAPIDJSON_NO_SIZETYPEDEFINE +#endif +RAPIDJSON_NAMESPACE_BEGIN +//! Size type (for string lengths, array sizes, etc.) +/*! RapidJSON uses 32-bit array/string indices even on 64-bit platforms, + instead of using \c size_t. Users may override the SizeType by defining + \ref RAPIDJSON_NO_SIZETYPEDEFINE. +*/ +typedef unsigned SizeType; +RAPIDJSON_NAMESPACE_END +#endif + +// always import std::size_t to rapidjson namespace +RAPIDJSON_NAMESPACE_BEGIN +using std::size_t; +RAPIDJSON_NAMESPACE_END + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ASSERT + +//! Assertion. +/*! \ingroup RAPIDJSON_CONFIG + By default, rapidjson uses C \c assert() for internal assertions. + User can override it by defining RAPIDJSON_ASSERT(x) macro. + + \note Parsing errors are handled and can be customized by the + \ref RAPIDJSON_ERRORS APIs. +*/ +#ifndef RAPIDJSON_ASSERT +#include +#define RAPIDJSON_ASSERT(x) assert(x) +#endif // RAPIDJSON_ASSERT + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_STATIC_ASSERT + +// Adopt from boost +#ifndef RAPIDJSON_STATIC_ASSERT +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +RAPIDJSON_NAMESPACE_BEGIN +template struct STATIC_ASSERTION_FAILURE; +template <> struct STATIC_ASSERTION_FAILURE { enum { value = 1 }; }; +template struct StaticAssertTest {}; +RAPIDJSON_NAMESPACE_END + +#define RAPIDJSON_JOIN(X, Y) RAPIDJSON_DO_JOIN(X, Y) +#define RAPIDJSON_DO_JOIN(X, Y) RAPIDJSON_DO_JOIN2(X, Y) +#define RAPIDJSON_DO_JOIN2(X, Y) X##Y + +#if defined(__GNUC__) +#define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE __attribute__((unused)) +#else +#define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE +#endif +//!@endcond + +/*! \def RAPIDJSON_STATIC_ASSERT + \brief (Internal) macro to check for conditions at compile-time + \param x compile-time condition + \hideinitializer + */ +#define RAPIDJSON_STATIC_ASSERT(x) \ + typedef ::RAPIDJSON_NAMESPACE::StaticAssertTest< \ + sizeof(::RAPIDJSON_NAMESPACE::STATIC_ASSERTION_FAILURE)> \ + RAPIDJSON_JOIN(StaticAssertTypedef, __LINE__) RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE +#endif + +/////////////////////////////////////////////////////////////////////////////// +// Helpers + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN + +#define RAPIDJSON_MULTILINEMACRO_BEGIN do { +#define RAPIDJSON_MULTILINEMACRO_END \ +} while((void)0, 0) + +// adopted from Boost +#define RAPIDJSON_VERSION_CODE(x,y,z) \ + (((x)*100000) + ((y)*100) + (z)) + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_DIAG_PUSH/POP, RAPIDJSON_DIAG_OFF + +#if defined(__GNUC__) +#define RAPIDJSON_GNUC \ + RAPIDJSON_VERSION_CODE(__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__) +#endif + +#if defined(__clang__) || (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,2,0)) + +#define RAPIDJSON_PRAGMA(x) _Pragma(RAPIDJSON_STRINGIFY(x)) +#define RAPIDJSON_DIAG_PRAGMA(x) RAPIDJSON_PRAGMA(GCC diagnostic x) +#define RAPIDJSON_DIAG_OFF(x) \ + RAPIDJSON_DIAG_PRAGMA(ignored RAPIDJSON_STRINGIFY(RAPIDJSON_JOIN(-W,x))) + +// push/pop support in Clang and GCC>=4.6 +#if defined(__clang__) || (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) +#define RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PRAGMA(push) +#define RAPIDJSON_DIAG_POP RAPIDJSON_DIAG_PRAGMA(pop) +#else // GCC >= 4.2, < 4.6 +#define RAPIDJSON_DIAG_PUSH /* ignored */ +#define RAPIDJSON_DIAG_POP /* ignored */ +#endif + +#elif defined(_MSC_VER) + +// pragma (MSVC specific) +#define RAPIDJSON_PRAGMA(x) __pragma(x) +#define RAPIDJSON_DIAG_PRAGMA(x) RAPIDJSON_PRAGMA(warning(x)) + +#define RAPIDJSON_DIAG_OFF(x) RAPIDJSON_DIAG_PRAGMA(disable: x) +#define RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PRAGMA(push) +#define RAPIDJSON_DIAG_POP RAPIDJSON_DIAG_PRAGMA(pop) + +#else + +#define RAPIDJSON_DIAG_OFF(x) /* ignored */ +#define RAPIDJSON_DIAG_PUSH /* ignored */ +#define RAPIDJSON_DIAG_POP /* ignored */ + +#endif // RAPIDJSON_DIAG_* + +// NCBI +#if NCBI_COMPILER_ICC +#undef RAPIDJSON_DIAG_OFF +#define RAPIDJSON_DIAG_OFF(x) +#endif +/////////////////////////////////////////////////////////////////////////////// +// C++11 features + +#ifndef RAPIDJSON_HAS_CXX11_RVALUE_REFS +#if defined(__clang__) +#define RAPIDJSON_HAS_CXX11_RVALUE_REFS __has_feature(cxx_rvalue_references) && \ + (defined(_LIBCPP_VERSION) || defined(__GLIBCXX__) && __GLIBCXX__ >= 20080306) +#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,3,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \ + (defined(_MSC_VER) && _MSC_VER >= 1600) + +#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1 +#else +#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 0 +#endif +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + +#ifndef RAPIDJSON_HAS_CXX11_NOEXCEPT +#if defined(__clang__) +#define RAPIDJSON_HAS_CXX11_NOEXCEPT __has_feature(cxx_noexcept) +#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) +// (defined(_MSC_VER) && _MSC_VER >= ????) // not yet supported +#define RAPIDJSON_HAS_CXX11_NOEXCEPT 1 +#else +#define RAPIDJSON_HAS_CXX11_NOEXCEPT 0 +#endif +#endif +#if RAPIDJSON_HAS_CXX11_NOEXCEPT +#define RAPIDJSON_NOEXCEPT noexcept +#else +#define RAPIDJSON_NOEXCEPT /* noexcept */ +#endif // RAPIDJSON_HAS_CXX11_NOEXCEPT + +// no automatic detection, yet +#ifndef RAPIDJSON_HAS_CXX11_TYPETRAITS +#define RAPIDJSON_HAS_CXX11_TYPETRAITS 0 +#endif + +//!@endcond + +/////////////////////////////////////////////////////////////////////////////// +// new/delete + +#ifndef RAPIDJSON_NEW +///! customization point for global \c new +#define RAPIDJSON_NEW(x) new x +#endif +#ifndef RAPIDJSON_DELETE +///! customization point for global \c delete +#define RAPIDJSON_DELETE(x) delete x +#endif + +/////////////////////////////////////////////////////////////////////////////// +// Allocators and Encodings + +#include "allocators.h" +#include "encodings.h" + +/*! \namespace rapidjson + \brief main RapidJSON namespace + \see RAPIDJSON_NAMESPACE +*/ +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Stream + +/*! \class rapidjson::Stream + \brief Concept for reading and writing characters. + + For read-only stream, no need to implement PutBegin(), Put(), Flush() and PutEnd(). + + For write-only stream, only need to implement Put() and Flush(). + +\code +concept Stream { + typename Ch; //!< Character type of the stream. + + //! Read the current character from stream without moving the read cursor. + Ch Peek() const; + + //! Read the current character from stream and moving the read cursor to next character. + Ch Take(); + + //! Get the current read cursor. + //! \return Number of characters read from start. + size_t Tell(); + + //! Begin writing operation at the current read pointer. + //! \return The begin writer pointer. + Ch* PutBegin(); + + //! Write a character. + void Put(Ch c); + + //! Flush the buffer. + void Flush(); + + //! End the writing operation. + //! \param begin The begin write pointer returned by PutBegin(). + //! \return Number of characters written. + size_t PutEnd(Ch* begin); +} +\endcode +*/ + +//! Provides additional information for stream. +/*! + By using traits pattern, this type provides a default configuration for stream. + For custom stream, this type can be specialized for other configuration. + See TEST(Reader, CustomStringStream) in readertest.cpp for example. +*/ +template +struct StreamTraits { + //! Whether to make local copy of stream for optimization during parsing. + /*! + By default, for safety, streams do not use local copy optimization. + Stream that can be copied fast should specialize this, like StreamTraits. + */ + enum { copyOptimization = 0 }; +}; + +//! Put N copies of a character to a stream. +template +inline void PutN(Stream& stream, Ch c, size_t n) { + for (size_t i = 0; i < n; i++) + stream.Put(c); +} + +/////////////////////////////////////////////////////////////////////////////// +// StringStream + +//! Read-only string stream. +/*! \note implements Stream concept +*/ +template +struct GenericStringStream { + typedef typename Encoding::Ch Ch; + + GenericStringStream(const Ch *src) : src_(src), head_(src) {} + + Ch Peek() const { return *src_; } + Ch Take() { return *src_++; } + size_t Tell() const { return static_cast(src_ - head_); } + + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + const Ch* src_; //!< Current read position. + const Ch* head_; //!< Original head of the string. +}; + +template +struct StreamTraits > { + enum { copyOptimization = 1 }; +}; + +//! String stream with UTF8 encoding. +typedef GenericStringStream > StringStream; + +/////////////////////////////////////////////////////////////////////////////// +// InsituStringStream + +//! A read-write string stream. +/*! This string stream is particularly designed for in-situ parsing. + \note implements Stream concept +*/ +template +struct GenericInsituStringStream { + typedef typename Encoding::Ch Ch; + + GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {} + + // Read + Ch Peek() { return *src_; } + Ch Take() { return *src_++; } + size_t Tell() { return static_cast(src_ - head_); } + + // Write + void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; } + + Ch* PutBegin() { return dst_ = src_; } + size_t PutEnd(Ch* begin) { return static_cast(dst_ - begin); } + void Flush() {} + + Ch* Push(size_t count) { Ch* begin = dst_; dst_ += count; return begin; } + void Pop(size_t count) { dst_ -= count; } + + Ch* src_; + Ch* dst_; + Ch* head_; +}; + +template +struct StreamTraits > { + enum { copyOptimization = 1 }; +}; + +//! Insitu string stream with UTF8 encoding. +typedef GenericInsituStringStream > InsituStringStream; + +/////////////////////////////////////////////////////////////////////////////// +// Type + +//! Type of JSON value +enum Type { + kNullType = 0, //!< null + kFalseType = 1, //!< false + kTrueType = 2, //!< true + kObjectType = 3, //!< object + kArrayType = 4, //!< array + kStringType = 5, //!< string + kNumberType = 6 //!< number +}; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/reader.h b/c++/include/misc/jsonwrapp/rapidjson10/reader.h new file mode 100644 index 00000000..b214b6bf --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/reader.h @@ -0,0 +1,1452 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_READER_H_ +#define RAPIDJSON_READER_H_ + +/*! \file reader.h */ + +#include "rapidjson.h" +#include "encodings.h" +#include "internal/meta.h" +#include "internal/stack.h" +#include "internal/strtod.h" + +#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER) +#include +#pragma intrinsic(_BitScanForward) +#endif +#ifdef RAPIDJSON_SSE42 +#include +#elif defined(RAPIDJSON_SSE2) +#include +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +RAPIDJSON_DIAG_OFF(4702) // unreachable code +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define RAPIDJSON_NOTHING /* deliberately empty */ +#ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN +#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + if (HasParseError()) { return value; } \ + RAPIDJSON_MULTILINEMACRO_END +#endif +#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \ + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING) +//!@endcond + +/*! \def RAPIDJSON_PARSE_ERROR_NORETURN + \ingroup RAPIDJSON_ERRORS + \brief Macro to indicate a parse error. + \param parseErrorCode \ref rapidjson::ParseErrorCode of the error + \param offset position of the error in JSON input (\c size_t) + + This macros can be used as a customization point for the internal + error handling mechanism of RapidJSON. + + A common usage model is to throw an exception instead of requiring the + caller to explicitly check the \ref rapidjson::GenericReader::Parse's + return value: + + \code + #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \ + throw ParseException(parseErrorCode, #parseErrorCode, offset) + + #include // std::runtime_error + #include "rapidjson/error/error.h" // rapidjson::ParseResult + + struct ParseException : std::runtime_error, rapidjson::ParseResult { + ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset) + : std::runtime_error(msg), ParseResult(code, offset) {} + }; + + #include "rapidjson/reader.h" + \endcode + + \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse + */ +#ifndef RAPIDJSON_PARSE_ERROR_NORETURN +#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \ + SetParseError(parseErrorCode, offset); \ + RAPIDJSON_MULTILINEMACRO_END +#endif + +/*! \def RAPIDJSON_PARSE_ERROR + \ingroup RAPIDJSON_ERRORS + \brief (Internal) macro to indicate and handle a parse error. + \param parseErrorCode \ref rapidjson::ParseErrorCode of the error + \param offset position of the error in JSON input (\c size_t) + + Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing. + + \see RAPIDJSON_PARSE_ERROR_NORETURN + \hideinitializer + */ +#ifndef RAPIDJSON_PARSE_ERROR +#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \ + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \ + RAPIDJSON_MULTILINEMACRO_END +#endif + +#include "error/error.h" // ParseErrorCode, ParseResult + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// ParseFlag + +/*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS + \ingroup RAPIDJSON_CONFIG + \brief User-defined kParseDefaultFlags definition. + + User can define this as any \c ParseFlag combinations. +*/ +#ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS +#define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags +#endif + +//! Combination of parseFlags +/*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream + */ +enum ParseFlag { + kParseNoFlags = 0, //!< No flags are set. + kParseInsituFlag = 1, //!< In-situ(destructive) parsing. + kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings. + kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing. + kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error. + kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower). + kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS +}; + +/////////////////////////////////////////////////////////////////////////////// +// Handler + +/*! \class rapidjson::Handler + \brief Concept for receiving events from GenericReader upon parsing. + The functions return true if no error occurs. If they return false, + the event publisher should terminate the process. +\code +concept Handler { + typename Ch; + + bool Null(); + bool Bool(bool b); + bool Int(int i); + bool Uint(unsigned i); + bool Int64(int64_t i); + bool Uint64(uint64_t i); + bool Double(double d); + bool String(const Ch* str, SizeType length, bool copy); + bool StartObject(); + bool Key(const Ch* str, SizeType length, bool copy); + bool EndObject(SizeType memberCount); + bool StartArray(); + bool EndArray(SizeType elementCount); +}; +\endcode +*/ +/////////////////////////////////////////////////////////////////////////////// +// BaseReaderHandler + +//! Default implementation of Handler. +/*! This can be used as base class of any reader handler. + \note implements Handler concept +*/ +template, typename Derived = void> +struct BaseReaderHandler { + typedef typename Encoding::Ch Ch; + + typedef typename internal::SelectIf, BaseReaderHandler, Derived>::Type Override; + + bool Default() { return true; } + bool Null() { return static_cast(*this).Default(); } + bool Bool(bool) { return static_cast(*this).Default(); } + bool Int(int) { return static_cast(*this).Default(); } + bool Uint(unsigned) { return static_cast(*this).Default(); } + bool Int64(int64_t) { return static_cast(*this).Default(); } + bool Uint64(uint64_t) { return static_cast(*this).Default(); } + bool Double(double) { return static_cast(*this).Default(); } + bool String(const Ch*, SizeType, bool) { return static_cast(*this).Default(); } + bool StartObject() { return static_cast(*this).Default(); } + bool Key(const Ch* str, SizeType len, bool copy) { return static_cast(*this).String(str, len, copy); } + bool EndObject(SizeType) { return static_cast(*this).Default(); } + bool StartArray() { return static_cast(*this).Default(); } + bool EndArray(SizeType) { return static_cast(*this).Default(); } +}; + +/////////////////////////////////////////////////////////////////////////////// +// StreamLocalCopy + +namespace internal { + +template::copyOptimization> +class StreamLocalCopy; + +//! Do copy optimization. +template +class StreamLocalCopy { +public: + StreamLocalCopy(Stream& original) : s(original), original_(original) {} + ~StreamLocalCopy() { original_ = s; } + + Stream s; + +private: + StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; + + Stream& original_; +}; + +//! Keep reference. +template +class StreamLocalCopy { +public: + StreamLocalCopy(Stream& original) : s(original) {} + + Stream& s; + +private: + StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; +}; + +} // namespace internal + +/////////////////////////////////////////////////////////////////////////////// +// SkipWhitespace + +//! Skip the JSON white spaces in a stream. +/*! \param is A input stream for skipping white spaces. + \note This function has SSE2/SSE4.2 specialization. +*/ +template +void SkipWhitespace(InputStream& is) { + internal::StreamLocalCopy copy(is); + InputStream& s(copy.s); + + while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t') + s.Take(); +} + +#ifdef RAPIDJSON_SSE42 +//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + // Fast return for single non-whitespace + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // 16-byte align to the next boundary + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & ~15); + while (p != nextAligned) + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // The rest of string using SIMD + static const char whitespace[16] = " \n\r\t"; + const __m128i w = _mm_load_si128((const __m128i *)&whitespace[0]); + + for (;; p += 16) { + const __m128i s = _mm_load_si128((const __m128i *)p); + const unsigned r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY)); + if (r != 0) { // some of characters is non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + _BitScanForward(&offset, r); + return p + offset; +#else + return p + __builtin_ffs(r) - 1; +#endif + } + } +} + +#elif defined(RAPIDJSON_SSE2) + +//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + // Fast return for single non-whitespace + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // 16-byte align to the next boundary + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & ~15); + while (p != nextAligned) + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // The rest of string + static const char whitespaces[4][17] = { + " ", + "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r", + "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"}; + + const __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]); + const __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]); + const __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]); + const __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]); + + for (;; p += 16) { + const __m128i s = _mm_load_si128((const __m128i *)p); + __m128i x = _mm_cmpeq_epi8(s, w0); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); + unsigned short r = (unsigned short)~_mm_movemask_epi8(x); + if (r != 0) { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + _BitScanForward(&offset, r); + return p + offset; +#else + return p + __builtin_ffs(r) - 1; +#endif + } + } +} + +#endif // RAPIDJSON_SSE2 + +#ifdef RAPIDJSON_SIMD +//! Template function specialization for InsituStringStream +template<> inline void SkipWhitespace(InsituStringStream& is) { + is.src_ = const_cast(SkipWhitespace_SIMD(is.src_)); +} + +//! Template function specialization for StringStream +template<> inline void SkipWhitespace(StringStream& is) { + is.src_ = SkipWhitespace_SIMD(is.src_); +} +#endif // RAPIDJSON_SIMD + +/////////////////////////////////////////////////////////////////////////////// +// GenericReader + +//! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator. +/*! GenericReader parses JSON text from a stream, and send events synchronously to an + object implementing Handler concept. + + It needs to allocate a stack for storing a single decoded string during + non-destructive parsing. + + For in-situ parsing, the decoded string is directly written to the source + text string, no temporary buffer is required. + + A GenericReader object can be reused for parsing multiple JSON text. + + \tparam SourceEncoding Encoding of the input stream. + \tparam TargetEncoding Encoding of the parse output. + \tparam StackAllocator Allocator type for stack. +*/ +template +class GenericReader { +public: + typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type + + //! Constructor. + /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) + \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) + */ + GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {} + + //! Parse JSON text. + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam InputStream Type of input stream, implementing Stream concept. + \tparam Handler Type of handler, implementing Handler concept. + \param is Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template + ParseResult Parse(InputStream& is, Handler& handler) { + if (parseFlags & kParseIterativeFlag) + return IterativeParse(is, handler); + + parseResult_.Clear(); + + ClearStackOnExit scope(*this); + + SkipWhitespace(is); + + if (is.Peek() == '\0') { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell()); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + } + else { + ParseValue(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + + if (!(parseFlags & kParseStopWhenDoneFlag)) { + SkipWhitespace(is); + + if (is.Peek() != '\0') { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell()); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + } + } + } + + return parseResult_; + } + + //! Parse JSON text (with \ref kParseDefaultFlags) + /*! \tparam InputStream Type of input stream, implementing Stream concept + \tparam Handler Type of handler, implementing Handler concept. + \param is Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template + ParseResult Parse(InputStream& is, Handler& handler) { + return Parse(is, handler); + } + + //! Whether a parse error has occured in the last parsing. + bool HasParseError() const { return parseResult_.IsError(); } + + //! Get the \ref ParseErrorCode of last parsing. + ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); } + + //! Get the position of last parsing error in input, 0 otherwise. + size_t GetErrorOffset() const { return parseResult_.Offset(); } + +protected: + void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); } + +private: + // Prohibit copy constructor & assignment operator. + GenericReader(const GenericReader&); + GenericReader& operator=(const GenericReader&); + + void ClearStack() { stack_.Clear(); } + + // clear stack on any exit from ParseStream, e.g. due to exception + struct ClearStackOnExit { + explicit ClearStackOnExit(GenericReader& r) : r_(r) {} + ~ClearStackOnExit() { r_.ClearStack(); } + private: + GenericReader& r_; + ClearStackOnExit(const ClearStackOnExit&); + ClearStackOnExit& operator=(const ClearStackOnExit&); + }; + + // Parse object: { string : value, ... } + template + void ParseObject(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == '{'); + is.Take(); // Skip '{' + + if (!handler.StartObject()) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + + SkipWhitespace(is); + + if (is.Peek() == '}') { + is.Take(); + if (!handler.EndObject(0)) // empty object + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + + for (SizeType memberCount = 0;;) { + if (is.Peek() != '"') + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); + + ParseString(is, handler, true); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + SkipWhitespace(is); + + if (is.Take() != ':') + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); + + SkipWhitespace(is); + + ParseValue(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + SkipWhitespace(is); + + ++memberCount; + + switch (is.Take()) { + case ',': SkipWhitespace(is); break; + case '}': + if (!handler.EndObject(memberCount)) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + default: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); + } + } + } + + // Parse array: [ value, ... ] + template + void ParseArray(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == '['); + is.Take(); // Skip '[' + + if (!handler.StartArray()) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + + SkipWhitespace(is); + + if (is.Peek() == ']') { + is.Take(); + if (!handler.EndArray(0)) // empty array + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + + for (SizeType elementCount = 0;;) { + ParseValue(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + ++elementCount; + SkipWhitespace(is); + + switch (is.Take()) { + case ',': SkipWhitespace(is); break; + case ']': + if (!handler.EndArray(elementCount)) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + default: RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); + } + } + } + + template + void ParseNull(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 'n'); + is.Take(); + + if (is.Take() == 'u' && is.Take() == 'l' && is.Take() == 'l') { + if (!handler.Null()) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1); + } + + template + void ParseTrue(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 't'); + is.Take(); + + if (is.Take() == 'r' && is.Take() == 'u' && is.Take() == 'e') { + if (!handler.Bool(true)) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1); + } + + template + void ParseFalse(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 'f'); + is.Take(); + + if (is.Take() == 'a' && is.Take() == 'l' && is.Take() == 's' && is.Take() == 'e') { + if (!handler.Bool(false)) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1); + } + + // Helper function to parse four hexidecimal digits in \uXXXX in ParseString(). + template + unsigned ParseHex4(InputStream& is) { + unsigned codepoint = 0; + for (int i = 0; i < 4; i++) { + Ch c = is.Take(); + codepoint <<= 4; + codepoint += static_cast(c); + if (c >= '0' && c <= '9') + codepoint -= '0'; + else if (c >= 'A' && c <= 'F') + codepoint -= 'A' - 10; + else if (c >= 'a' && c <= 'f') + codepoint -= 'a' - 10; + else { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, is.Tell() - 1); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0); + } + } + return codepoint; + } + + template + class StackStream { + public: + typedef CharType Ch; + + StackStream(internal::Stack& stack) : stack_(stack), length_(0) {} + RAPIDJSON_FORCEINLINE void Put(Ch c) { + *stack_.template Push() = c; + ++length_; + } + size_t Length() const { return length_; } + Ch* Pop() { + return stack_.template Pop(length_); + } + + private: + StackStream(const StackStream&); + StackStream& operator=(const StackStream&); + + internal::Stack& stack_; + SizeType length_; + }; + + // Parse string and generate String event. Different code paths for kParseInsituFlag. + template + void ParseString(InputStream& is, Handler& handler, bool isKey = false) { + internal::StreamLocalCopy copy(is); + InputStream& s(copy.s); + + bool success = false; + if (parseFlags & kParseInsituFlag) { + typename InputStream::Ch *head = s.PutBegin(); + ParseStringToStream(s, s); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + size_t length = s.PutEnd(head) - 1; + RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); + const typename TargetEncoding::Ch* const str = (typename TargetEncoding::Ch*)head; + success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false)); + } + else { + StackStream stackStream(stack_); + ParseStringToStream(s, stackStream); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + SizeType length = static_cast(stackStream.Length()) - 1; + const typename TargetEncoding::Ch* const str = stackStream.Pop(); + success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true)); + } + if (!success) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell()); + } + + // Parse string to an output is + // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation. + template + RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) { +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + static const char escape[256] = { + Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/', + Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, + 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, + 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 + }; +#undef Z16 +//!@endcond + + RAPIDJSON_ASSERT(is.Peek() == '\"'); + is.Take(); // Skip '\"' + + for (;;) { + Ch c = is.Peek(); + if (c == '\\') { // Escape + is.Take(); + Ch e = is.Take(); + if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) { + os.Put(escape[(unsigned char)e]); + } + else if (e == 'u') { // Unicode + unsigned codepoint = ParseHex4(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + if (codepoint >= 0xD800 && codepoint <= 0xDBFF) { + // Handle UTF-16 surrogate pair + if (is.Take() != '\\' || is.Take() != 'u') + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2); + unsigned codepoint2 = ParseHex4(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + if (codepoint2 < 0xDC00 || codepoint2 > 0xDFFF) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2); + codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; + } + TEncoding::Encode(os, codepoint); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1); + } + else if (c == '"') { // Closing double quote + is.Take(); + os.Put('\0'); // null-terminate the string + return; + } + else if (c == '\0') + RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell() - 1); + else if ((unsigned)c < 0x20) // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1); + else { + if (parseFlags & kParseValidateEncodingFlag ? + !Transcoder::Validate(is, os) : + !Transcoder::Transcode(is, os)) + RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell()); + } + } + } + + template + class NumberStream; + + template + class NumberStream { + public: + NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; } + ~NumberStream() {} + + RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); } + RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); } + RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); } + size_t Tell() { return is.Tell(); } + size_t Length() { return 0; } + const char* Pop() { return 0; } + + protected: + NumberStream& operator=(const NumberStream&); + + InputStream& is; + }; + + template + class NumberStream : public NumberStream { + typedef NumberStream Base; + public: + NumberStream(GenericReader& reader, InputStream& is) : NumberStream(reader, is), stackStream(reader.stack_) {} + ~NumberStream() {} + + RAPIDJSON_FORCEINLINE Ch TakePush() { + stackStream.Put((char)Base::is.Peek()); + return Base::is.Take(); + } + + size_t Length() { return stackStream.Length(); } + + const char* Pop() { + stackStream.Put('\0'); + return stackStream.Pop(); + } + + private: + StackStream stackStream; + }; + + template + void ParseNumber(InputStream& is, Handler& handler) { + internal::StreamLocalCopy copy(is); + NumberStream s(*this, copy.s); + + // Parse minus + bool minus = false; + if (s.Peek() == '-') { + minus = true; + s.Take(); + } + + // Parse int: zero / ( digit1-9 *DIGIT ) + unsigned i = 0; + uint64_t i64 = 0; + bool use64bit = false; + int significandDigit = 0; + if (s.Peek() == '0') { + i = 0; + s.TakePush(); + } + else if (s.Peek() >= '1' && s.Peek() <= '9') { + i = static_cast(s.TakePush() - '0'); + + if (minus) + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i >= 214748364) { // 2^31 = 2147483648 + if (i != 214748364 || s.Peek() > '8') { + i64 = i; + use64bit = true; + break; + } + } + i = i * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } + else + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i >= 429496729) { // 2^32 - 1 = 4294967295 + if (i != 429496729 || s.Peek() > '5') { + i64 = i; + use64bit = true; + break; + } + } + i = i * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + + // Parse 64bit int + bool useDouble = false; + double d = 0.0; + if (use64bit) { + if (minus) + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC)) // 2^63 = 9223372036854775808 + if (i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8') { + d = i64; + useDouble = true; + break; + } + i64 = i64 * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } + else + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999)) // 2^64 - 1 = 18446744073709551615 + if (i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5') { + d = i64; + useDouble = true; + break; + } + i64 = i64 * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } + } + + // Force double for big integer + if (useDouble) { + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (d >= 1.7976931348623157e307) // DBL_MAX / 10.0 + RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell()); + d = d * 10 + (s.TakePush() - '0'); + } + } + + // Parse frac = decimal-point 1*DIGIT + int expFrac = 0; + size_t decimalPosition; + if (s.Peek() == '.') { + s.Take(); + decimalPosition = s.Length(); + + if (!(s.Peek() >= '0' && s.Peek() <= '9')) + RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell()); + + if (!useDouble) { +#if RAPIDJSON_64BIT + // Use i64 to store significand in 64-bit architecture + if (!use64bit) + i64 = i; + + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path + break; + else { + i64 = i64 * 10 + static_cast(s.TakePush() - '0'); + --expFrac; + if (i64 != 0) + significandDigit++; + } + } + + d = (double)i64; +#else + // Use double to store significand in 32-bit architecture + d = use64bit ? (double)i64 : (double)i; +#endif + useDouble = true; + } + + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (significandDigit < 17) { + d = d * 10.0 + (s.TakePush() - '0'); + --expFrac; + if (d > 0.0) + significandDigit++; + } + else + s.TakePush(); + } + } + else + decimalPosition = s.Length(); // decimal position at the end of integer. + + // Parse exp = e [ minus / plus ] 1*DIGIT + int exp = 0; + if (s.Peek() == 'e' || s.Peek() == 'E') { + if (!useDouble) { + d = use64bit ? i64 : i; + useDouble = true; + } + s.Take(); + + bool expMinus = false; + if (s.Peek() == '+') + s.Take(); + else if (s.Peek() == '-') { + s.Take(); + expMinus = true; + } + + if (s.Peek() >= '0' && s.Peek() <= '9') { + exp = s.Take() - '0'; + if (expMinus) { + while (s.Peek() >= '0' && s.Peek() <= '9') { + exp = exp * 10 + (s.Take() - '0'); + if (exp >= 214748364) { // Issue #313: prevent overflow exponent + while (s.Peek() >= '0' && s.Peek() <= '9') // Consume the rest of exponent + s.Take(); + } + } + } + else { // positive exp + int maxExp = 308 - expFrac; + while (s.Peek() >= '0' && s.Peek() <= '9') { + exp = exp * 10 + (s.Take() - '0'); + if (exp > maxExp) + RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell()); + } + } + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell()); + + if (expMinus) + exp = -exp; + } + + // Finish parsing, call event according to the type of number. + bool cont = true; + size_t length = s.Length(); + const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not. + + if (useDouble) { + int p = exp + expFrac; + if (parseFlags & kParseFullPrecisionFlag) + d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp); + else + d = internal::StrtodNormalPrecision(d, p); + + cont = handler.Double(minus ? -d : d); + } + else { + if (use64bit) { + if (minus) + cont = handler.Int64(static_cast(~i64 + 1)); + else + cont = handler.Uint64(i64); + } + else { + if (minus) + cont = handler.Int(static_cast(~i + 1)); + else + cont = handler.Uint(i); + } + } + if (!cont) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell()); + } + + // Parse any JSON value + template + void ParseValue(InputStream& is, Handler& handler) { + switch (is.Peek()) { + case 'n': ParseNull (is, handler); break; + case 't': ParseTrue (is, handler); break; + case 'f': ParseFalse (is, handler); break; + case '"': ParseString(is, handler); break; + case '{': ParseObject(is, handler); break; + case '[': ParseArray (is, handler); break; + default : ParseNumber(is, handler); + } + } + + // Iterative Parsing + + // States + enum IterativeParsingState { + IterativeParsingStartState = 0, + IterativeParsingFinishState, + IterativeParsingErrorState, + + // Object states + IterativeParsingObjectInitialState, + IterativeParsingMemberKeyState, + IterativeParsingKeyValueDelimiterState, + IterativeParsingMemberValueState, + IterativeParsingMemberDelimiterState, + IterativeParsingObjectFinishState, + + // Array states + IterativeParsingArrayInitialState, + IterativeParsingElementState, + IterativeParsingElementDelimiterState, + IterativeParsingArrayFinishState, + + // Single value state + IterativeParsingValueState, + + cIterativeParsingStateCount + }; + + // Tokens + enum Token { + LeftBracketToken = 0, + RightBracketToken, + + LeftCurlyBracketToken, + RightCurlyBracketToken, + + CommaToken, + ColonToken, + + StringToken, + FalseToken, + TrueToken, + NullToken, + NumberToken, + + kTokenCount + }; + + RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) { + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define N NumberToken +#define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N + // Maps from ASCII to Token + static const unsigned char tokenMap[256] = { + N16, // 00~0F + N16, // 10~1F + N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F + N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F + N16, // 40~4F + N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F + N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F + N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F + N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF + }; +#undef N +#undef N16 +//!@endcond + + if (sizeof(Ch) == 1 || unsigned(c) < 256) + return (Token)tokenMap[(unsigned char)c]; + else + return NumberToken; + } + + RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) { + // current state x one lookahead token -> new state + static const char G[cIterativeParsingStateCount][kTokenCount] = { + // Start + { + IterativeParsingArrayInitialState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingValueState, // String + IterativeParsingValueState, // False + IterativeParsingValueState, // True + IterativeParsingValueState, // Null + IterativeParsingValueState // Number + }, + // Finish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // Error(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // ObjectInitial + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberKeyState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // MemberKey + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingKeyValueDelimiterState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // KeyValueDelimiter + { + IterativeParsingArrayInitialState, // Left bracket(push MemberValue state) + IterativeParsingErrorState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberValueState, // String + IterativeParsingMemberValueState, // False + IterativeParsingMemberValueState, // True + IterativeParsingMemberValueState, // Null + IterativeParsingMemberValueState // Number + }, + // MemberValue + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingMemberDelimiterState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // MemberDelimiter + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberKeyState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // ObjectFinish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // ArrayInitial + { + IterativeParsingArrayInitialState, // Left bracket(push Element state) + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push Element state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingElementState, // String + IterativeParsingElementState, // False + IterativeParsingElementState, // True + IterativeParsingElementState, // Null + IterativeParsingElementState // Number + }, + // Element + { + IterativeParsingErrorState, // Left bracket + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingElementDelimiterState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // ElementDelimiter + { + IterativeParsingArrayInitialState, // Left bracket(push Element state) + IterativeParsingErrorState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push Element state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingElementState, // String + IterativeParsingElementState, // False + IterativeParsingElementState, // True + IterativeParsingElementState, // Null + IterativeParsingElementState // Number + }, + // ArrayFinish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // Single Value (sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + } + }; // End of G + + return (IterativeParsingState)G[state][token]; + } + + // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). + // May return a new state on state pop. + template + RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) { + (void)token; + + switch (dst) { + case IterativeParsingErrorState: + return dst; + + case IterativeParsingObjectInitialState: + case IterativeParsingArrayInitialState: + { + // Push the state(Element or MemeberValue) if we are nested in another array or value of member. + // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop. + IterativeParsingState n = src; + if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState) + n = IterativeParsingElementState; + else if (src == IterativeParsingKeyValueDelimiterState) + n = IterativeParsingMemberValueState; + // Push current state. + *stack_.template Push(1) = n; + // Initialize and push the member/element count. + *stack_.template Push(1) = 0; + // Call handler + bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray(); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return dst; + } + } + + case IterativeParsingMemberKeyState: + ParseString(is, handler, true); + if (HasParseError()) + return IterativeParsingErrorState; + else + return dst; + + case IterativeParsingKeyValueDelimiterState: + RAPIDJSON_ASSERT(token == ColonToken); + is.Take(); + return dst; + + case IterativeParsingMemberValueState: + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return dst; + + case IterativeParsingElementState: + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return dst; + + case IterativeParsingMemberDelimiterState: + case IterativeParsingElementDelimiterState: + is.Take(); + // Update member/element count. + *stack_.template Top() = *stack_.template Top() + 1; + return dst; + + case IterativeParsingObjectFinishState: + { + // Get member count. + SizeType c = *stack_.template Pop(1); + // If the object is not empty, count the last member. + if (src == IterativeParsingMemberValueState) + ++c; + // Restore the state. + IterativeParsingState n = static_cast(*stack_.template Pop(1)); + // Transit to Finish state if this is the topmost scope. + if (n == IterativeParsingStartState) + n = IterativeParsingFinishState; + // Call handler + bool hr = handler.EndObject(c); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return n; + } + } + + case IterativeParsingArrayFinishState: + { + // Get element count. + SizeType c = *stack_.template Pop(1); + // If the array is not empty, count the last element. + if (src == IterativeParsingElementState) + ++c; + // Restore the state. + IterativeParsingState n = static_cast(*stack_.template Pop(1)); + // Transit to Finish state if this is the topmost scope. + if (n == IterativeParsingStartState) + n = IterativeParsingFinishState; + // Call handler + bool hr = handler.EndArray(c); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return n; + } + } + + default: + // This branch is for IterativeParsingValueState actually. + // Use `default:` rather than + // `case IterativeParsingValueState:` is for code coverage. + + // The IterativeParsingStartState is not enumerated in this switch-case. + // It is impossible for that case. And it can be caught by following assertion. + + // The IterativeParsingFinishState is not enumerated in this switch-case either. + // It is a "derivative" state which cannot triggered from Predict() directly. + // Therefore it cannot happen here. And it can be caught by following assertion. + RAPIDJSON_ASSERT(dst == IterativeParsingValueState); + + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return IterativeParsingFinishState; + } + } + + template + void HandleError(IterativeParsingState src, InputStream& is) { + if (HasParseError()) { + // Error flag has been set. + return; + } + + switch (src) { + case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); + case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); + case IterativeParsingObjectInitialState: + case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); + case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); + case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); + case IterativeParsingElementState: RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); + default: RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); + } + } + + template + ParseResult IterativeParse(InputStream& is, Handler& handler) { + parseResult_.Clear(); + ClearStackOnExit scope(*this); + IterativeParsingState state = IterativeParsingStartState; + + SkipWhitespace(is); + while (is.Peek() != '\0') { + Token t = Tokenize(is.Peek()); + IterativeParsingState n = Predict(state, t); + IterativeParsingState d = Transit(state, t, n, is, handler); + + if (d == IterativeParsingErrorState) { + HandleError(state, is); + break; + } + + state = d; + + // Do not further consume streams if a root JSON has been parsed. + if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState) + break; + + SkipWhitespace(is); + } + + // Handle the end of file. + if (state != IterativeParsingFinishState) + HandleError(state, is); + + return parseResult_; + } + + static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. + internal::Stack stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. + ParseResult parseResult_; +}; // class GenericReader + +//! Reader with UTF8 encoding and default allocator. +typedef GenericReader, UTF8<> > Reader; + +RAPIDJSON_NAMESPACE_END + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_READER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/stringbuffer.h b/c++/include/misc/jsonwrapp/rapidjson10/stringbuffer.h new file mode 100644 index 00000000..1c9c80b7 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/stringbuffer.h @@ -0,0 +1,93 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_STRINGBUFFER_H_ +#define RAPIDJSON_STRINGBUFFER_H_ + +#include "rapidjson.h" + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS +#include // std::move +#endif + +#include "internal/stack.h" + +RAPIDJSON_NAMESPACE_BEGIN + +//! Represents an in-memory output stream. +/*! + \tparam Encoding Encoding of the stream. + \tparam Allocator type for allocating memory buffer. + \note implements Stream concept +*/ +template +class GenericStringBuffer { +public: + typedef typename Encoding::Ch Ch; + + GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericStringBuffer(GenericStringBuffer&& rhs) : stack_(std::move(rhs.stack_)) {} + GenericStringBuffer& operator=(GenericStringBuffer&& rhs) { + if (&rhs != this) + stack_ = std::move(rhs.stack_); + return *this; + } +#endif + + void Put(Ch c) { *stack_.template Push() = c; } + void Flush() {} + + void Clear() { stack_.Clear(); } + void ShrinkToFit() { + // Push and pop a null terminator. This is safe. + *stack_.template Push() = '\0'; + stack_.ShrinkToFit(); + stack_.template Pop(1); + } + Ch* Push(size_t count) { return stack_.template Push(count); } + void Pop(size_t count) { stack_.template Pop(count); } + + const Ch* GetString() const { + // Push and pop a null terminator. This is safe. + *stack_.template Push() = '\0'; + stack_.template Pop(1); + + return stack_.template Bottom(); + } + + size_t GetSize() const { return stack_.GetSize(); } + + static const size_t kDefaultCapacity = 256; + mutable internal::Stack stack_; + +private: + // Prohibit copy constructor & assignment operator. + GenericStringBuffer(const GenericStringBuffer&); + GenericStringBuffer& operator=(const GenericStringBuffer&); +}; + +//! String buffer with UTF8 encoding +typedef GenericStringBuffer > StringBuffer; + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(GenericStringBuffer >& stream, char c, size_t n) { + std::memset(stream.stack_.Push(n), c, n * sizeof(c)); +} + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_STRINGBUFFER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson10/writer.h b/c++/include/misc/jsonwrapp/rapidjson10/writer.h new file mode 100644 index 00000000..e1eea38b --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson10/writer.h @@ -0,0 +1,395 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_WRITER_H_ +#define RAPIDJSON_WRITER_H_ + +#include "rapidjson.h" +#include "internal/stack.h" +#include "internal/strfunc.h" +#include "internal/dtoa.h" +#include "internal/itoa.h" +#include "stringbuffer.h" +#include // placement new + +#if RAPIDJSON_HAS_STDSTRING +#include +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! JSON writer +/*! Writer implements the concept Handler. + It generates JSON text by events to an output os. + + User may programmatically calls the functions of a writer to generate JSON text. + + On the other side, a writer can also be passed to objects that generates events, + + for example Reader::Parse() and Document::Accept(). + + \tparam OutputStream Type of output stream. + \tparam SourceEncoding Encoding of source string. + \tparam TargetEncoding Encoding of output stream. + \tparam StackAllocator Type of allocator for allocating memory of stack. + \note implements Handler concept +*/ +template, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator> +class Writer { +public: + typedef typename SourceEncoding::Ch Ch; + + //! Constructor + /*! \param os Output stream. + \param stackAllocator User supplied allocator. If it is null, it will create a private one. + \param levelDepth Initial capacity of stack. + */ + explicit + Writer(OutputStream& os, StackAllocator* stackAllocator = 0, size_t levelDepth = kDefaultLevelDepth) : + os_(&os), level_stack_(stackAllocator, levelDepth * sizeof(Level)), hasRoot_(false) {} + + explicit + Writer(StackAllocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) : + os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), hasRoot_(false) {} + + //! Reset the writer with a new stream. + /*! + This function reset the writer with a new stream and default settings, + in order to make a Writer object reusable for output multiple JSONs. + + \param os New output stream. + \code + Writer writer(os1); + writer.StartObject(); + // ... + writer.EndObject(); + + writer.Reset(os2); + writer.StartObject(); + // ... + writer.EndObject(); + \endcode + */ + void Reset(OutputStream& os) { + os_ = &os; + hasRoot_ = false; + level_stack_.Clear(); + } + + //! Checks whether the output is a complete JSON. + /*! + A complete JSON has a complete root object or array. + */ + bool IsComplete() const { + return hasRoot_ && level_stack_.Empty(); + } + + /*!@name Implementation of Handler + \see Handler + */ + //@{ + + bool Null() { Prefix(kNullType); return WriteNull(); } + bool Bool(bool b) { Prefix(b ? kTrueType : kFalseType); return WriteBool(b); } + bool Int(int i) { Prefix(kNumberType); return WriteInt(i); } + bool Uint(unsigned u) { Prefix(kNumberType); return WriteUint(u); } + bool Int64(int64_t i64) { Prefix(kNumberType); return WriteInt64(i64); } + bool Uint64(uint64_t u64) { Prefix(kNumberType); return WriteUint64(u64); } + + //! Writes the given \c double value to the stream + /*! + \param d The value to be written. + \return Whether it is succeed. + */ + bool Double(double d) { Prefix(kNumberType); return WriteDouble(d); } + + bool String(const Ch* str, SizeType length, bool copy = false) { + (void)copy; + Prefix(kStringType); + return WriteString(str, length); + } + +#if RAPIDJSON_HAS_STDSTRING + bool String(const std::basic_string& str) { + return String(str.data(), SizeType(str.size())); + } +#endif + + bool StartObject() { + Prefix(kObjectType); + new (level_stack_.template Push()) Level(false); + return WriteStartObject(); + } + + bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); } + + bool EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); + RAPIDJSON_ASSERT(!level_stack_.template Top()->inArray); + level_stack_.template Pop(1); + bool ret = WriteEndObject(); + if (level_stack_.Empty()) // end of json text + os_->Flush(); + return ret; + } + + bool StartArray() { + Prefix(kArrayType); + new (level_stack_.template Push()) Level(true); + return WriteStartArray(); + } + + bool EndArray(SizeType elementCount = 0) { + (void)elementCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); + RAPIDJSON_ASSERT(level_stack_.template Top()->inArray); + level_stack_.template Pop(1); + bool ret = WriteEndArray(); + if (level_stack_.Empty()) // end of json text + os_->Flush(); + return ret; + } + //@} + + /*! @name Convenience extensions */ + //@{ + + //! Simpler but slower overload. + bool String(const Ch* str) { return String(str, internal::StrLen(str)); } + bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); } + + //@} + +protected: + //! Information for each nested level + struct Level { + Level(bool inArray_) : valueCount(0), inArray(inArray_) {} + size_t valueCount; //!< number of values in this level + bool inArray; //!< true if in array, otherwise in object + }; + + static const size_t kDefaultLevelDepth = 32; + + bool WriteNull() { + os_->Put('n'); os_->Put('u'); os_->Put('l'); os_->Put('l'); return true; + } + + bool WriteBool(bool b) { + if (b) { + os_->Put('t'); os_->Put('r'); os_->Put('u'); os_->Put('e'); + } + else { + os_->Put('f'); os_->Put('a'); os_->Put('l'); os_->Put('s'); os_->Put('e'); + } + return true; + } + + bool WriteInt(int i) { + char buffer[11]; + const char* end = internal::i32toa(i, buffer); + for (const char* p = buffer; p != end; ++p) + os_->Put(*p); + return true; + } + + bool WriteUint(unsigned u) { + char buffer[10]; + const char* end = internal::u32toa(u, buffer); + for (const char* p = buffer; p != end; ++p) + os_->Put(*p); + return true; + } + + bool WriteInt64(int64_t i64) { + char buffer[21]; + const char* end = internal::i64toa(i64, buffer); + for (const char* p = buffer; p != end; ++p) + os_->Put(*p); + return true; + } + + bool WriteUint64(uint64_t u64) { + char buffer[20]; + char* end = internal::u64toa(u64, buffer); + for (char* p = buffer; p != end; ++p) + os_->Put(*p); + return true; + } + + bool WriteDouble(double d) { + char buffer[25]; + char* end = internal::dtoa(d, buffer); + for (char* p = buffer; p != end; ++p) + os_->Put(*p); + return true; + } + + bool WriteString(const Ch* str, SizeType length) { + static const char hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + static const char escape[256] = { +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + //0 1 2 3 4 5 6 7 8 9 A B C D E F + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', 'n', 'u', 'f', 'r', 'u', 'u', // 00 + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', // 10 + 0, 0, '"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20 + Z16, Z16, // 30~4F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, // 50 + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 // 60~FF +#undef Z16 + }; + + os_->Put('\"'); + GenericStringStream is(str); + while (is.Tell() < length) { + const Ch c = is.Peek(); + if (!TargetEncoding::supportUnicode && (unsigned)c >= 0x80) { + // Unicode escaping + unsigned codepoint; + if (!SourceEncoding::Decode(is, &codepoint)) + return false; + os_->Put('\\'); + os_->Put('u'); + if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) { + os_->Put(hexDigits[(codepoint >> 12) & 15]); + os_->Put(hexDigits[(codepoint >> 8) & 15]); + os_->Put(hexDigits[(codepoint >> 4) & 15]); + os_->Put(hexDigits[(codepoint ) & 15]); + } + else { + RAPIDJSON_ASSERT(codepoint >= 0x010000 && codepoint <= 0x10FFFF); + // Surrogate pair + unsigned s = codepoint - 0x010000; + unsigned lead = (s >> 10) + 0xD800; + unsigned trail = (s & 0x3FF) + 0xDC00; + os_->Put(hexDigits[(lead >> 12) & 15]); + os_->Put(hexDigits[(lead >> 8) & 15]); + os_->Put(hexDigits[(lead >> 4) & 15]); + os_->Put(hexDigits[(lead ) & 15]); + os_->Put('\\'); + os_->Put('u'); + os_->Put(hexDigits[(trail >> 12) & 15]); + os_->Put(hexDigits[(trail >> 8) & 15]); + os_->Put(hexDigits[(trail >> 4) & 15]); + os_->Put(hexDigits[(trail ) & 15]); + } + } + else if ((sizeof(Ch) == 1 || (unsigned)c < 256) && escape[(unsigned char)c]) { + is.Take(); + os_->Put('\\'); + os_->Put(escape[(unsigned char)c]); + if (escape[(unsigned char)c] == 'u') { + os_->Put('0'); + os_->Put('0'); + os_->Put(hexDigits[(unsigned char)c >> 4]); + os_->Put(hexDigits[(unsigned char)c & 0xF]); + } + } + else + if (!Transcoder::Transcode(is, *os_)) + return false; + } + os_->Put('\"'); + return true; + } + + bool WriteStartObject() { os_->Put('{'); return true; } + bool WriteEndObject() { os_->Put('}'); return true; } + bool WriteStartArray() { os_->Put('['); return true; } + bool WriteEndArray() { os_->Put(']'); return true; } + + void Prefix(Type type) { + (void)type; + if (level_stack_.GetSize() != 0) { // this value is not at root + Level* level = level_stack_.template Top(); + if (level->valueCount > 0) { + if (level->inArray) + os_->Put(','); // add comma if it is not the first element in array + else // in object + os_->Put((level->valueCount % 2 == 0) ? ',' : ':'); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else { + RAPIDJSON_ASSERT(!hasRoot_); // Should only has one and only one root. + hasRoot_ = true; + } + } + + OutputStream* os_; + internal::Stack level_stack_; + bool hasRoot_; + +private: + // Prohibit copy constructor & assignment operator. + Writer(const Writer&); + Writer& operator=(const Writer&); +}; + +// Full specialization for StringStream to prevent memory copying + +template<> +inline bool Writer::WriteInt(int i) { + char *buffer = os_->Push(11); + const char* end = internal::i32toa(i, buffer); + os_->Pop(11 - (end - buffer)); + return true; +} + +template<> +inline bool Writer::WriteUint(unsigned u) { + char *buffer = os_->Push(10); + const char* end = internal::u32toa(u, buffer); + os_->Pop(10 - (end - buffer)); + return true; +} + +template<> +inline bool Writer::WriteInt64(int64_t i64) { + char *buffer = os_->Push(21); + const char* end = internal::i64toa(i64, buffer); + os_->Pop(21 - (end - buffer)); + return true; +} + +template<> +inline bool Writer::WriteUint64(uint64_t u) { + char *buffer = os_->Push(20); + const char* end = internal::u64toa(u, buffer); + os_->Pop(20 - (end - buffer)); + return true; +} + +template<> +inline bool Writer::WriteDouble(double d) { + char *buffer = os_->Push(25); + char* end = internal::dtoa(d, buffer); + os_->Pop(25 - (end - buffer)); + return true; +} + +RAPIDJSON_NAMESPACE_END + +#ifdef _MSC_VER +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/__NCBI_read_me.txt b/c++/include/misc/jsonwrapp/rapidjson11/__NCBI_read_me.txt new file mode 100644 index 00000000..5de26491 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/__NCBI_read_me.txt @@ -0,0 +1,20 @@ + +Please, DO NOT USE rapidjson directly. +Use jsonwrapp instead. + + + +Source: +https://github.com/miloyip/rapidjson +release v1.1.0 + + +NCBI modifications: + +Search for 'NCBI' in the following files: +rapidjson.h +document.h +prettywriter.h +schema.h +pointer.h + diff --git a/c++/include/misc/jsonwrapp/rapidjson11/allocators.h b/c++/include/misc/jsonwrapp/rapidjson11/allocators.h new file mode 100644 index 00000000..655f4a38 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/allocators.h @@ -0,0 +1,271 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ALLOCATORS_H_ +#define RAPIDJSON_ALLOCATORS_H_ + +#include "rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Allocator + +/*! \class rapidjson::Allocator + \brief Concept for allocating, resizing and freeing memory block. + + Note that Malloc() and Realloc() are non-static but Free() is static. + + So if an allocator need to support Free(), it needs to put its pointer in + the header of memory block. + +\code +concept Allocator { + static const bool kNeedFree; //!< Whether this allocator needs to call Free(). + + // Allocate a memory block. + // \param size of the memory block in bytes. + // \returns pointer to the memory block. + void* Malloc(size_t size); + + // Resize a memory block. + // \param originalPtr The pointer to current memory block. Null pointer is permitted. + // \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.) + // \param newSize the new size in bytes. + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize); + + // Free a memory block. + // \param pointer to the memory block. Null pointer is permitted. + static void Free(void *ptr); +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// CrtAllocator + +//! C-runtime library allocator. +/*! This class is just wrapper for standard C library memory routines. + \note implements Allocator concept +*/ +class CrtAllocator { +public: + static const bool kNeedFree = true; + void* Malloc(size_t size) { + if (size) // behavior of malloc(0) is implementation defined. + return std::malloc(size); + else + return NULL; // standardize to returning NULL. + } + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { + (void)originalSize; + if (newSize == 0) { + std::free(originalPtr); + return NULL; + } + return std::realloc(originalPtr, newSize); + } + static void Free(void *ptr) { std::free(ptr); } +}; + +/////////////////////////////////////////////////////////////////////////////// +// MemoryPoolAllocator + +//! Default memory allocator used by the parser and DOM. +/*! This allocator allocate memory blocks from pre-allocated memory chunks. + + It does not free memory blocks. And Realloc() only allocate new memory. + + The memory chunks are allocated by BaseAllocator, which is CrtAllocator by default. + + User may also supply a buffer as the first chunk. + + If the user-buffer is full then additional chunks are allocated by BaseAllocator. + + The user-buffer is not deallocated by this allocator. + + \tparam BaseAllocator the allocator type for allocating memory chunks. Default is CrtAllocator. + \note implements Allocator concept +*/ +template +class MemoryPoolAllocator { +public: + static const bool kNeedFree = false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator) + + //! Constructor with chunkSize. + /*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. + \param baseAllocator The allocator for allocating memory chunks. + */ + MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : + chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0) + { + } + + //! Constructor with user-supplied buffer. + /*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size. + + The user buffer will not be deallocated when this allocator is destructed. + + \param buffer User supplied buffer. + \param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader). + \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. + \param baseAllocator The allocator for allocating memory chunks. + */ + MemoryPoolAllocator(void *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : + chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0) + { + RAPIDJSON_ASSERT(buffer != 0); + RAPIDJSON_ASSERT(size > sizeof(ChunkHeader)); + chunkHead_ = reinterpret_cast(buffer); + chunkHead_->capacity = size - sizeof(ChunkHeader); + chunkHead_->size = 0; + chunkHead_->next = 0; + } + + //! Destructor. + /*! This deallocates all memory chunks, excluding the user-supplied buffer. + */ + ~MemoryPoolAllocator() { + Clear(); + RAPIDJSON_DELETE(ownBaseAllocator_); + } + + //! Deallocates all memory chunks, excluding the user-supplied buffer. + void Clear() { + while (chunkHead_ && chunkHead_ != userBuffer_) { + ChunkHeader* next = chunkHead_->next; + baseAllocator_->Free(chunkHead_); + chunkHead_ = next; + } + if (chunkHead_ && chunkHead_ == userBuffer_) + chunkHead_->size = 0; // Clear user buffer + } + + //! Computes the total capacity of allocated memory chunks. + /*! \return total capacity in bytes. + */ + size_t Capacity() const { + size_t capacity = 0; + for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) + capacity += c->capacity; + return capacity; + } + + //! Computes the memory blocks allocated. + /*! \return total used bytes. + */ + size_t Size() const { + size_t size = 0; + for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) + size += c->size; + return size; + } + + //! Allocates a memory block. (concept Allocator) + void* Malloc(size_t size) { + if (!size) + return NULL; + + size = RAPIDJSON_ALIGN(size); + if (chunkHead_ == 0 || chunkHead_->size + size > chunkHead_->capacity) + if (!AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size)) + return NULL; + + void *buffer = reinterpret_cast(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size; + chunkHead_->size += size; + return buffer; + } + + //! Resizes a memory block (concept Allocator) + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { + if (originalPtr == 0) + return Malloc(newSize); + + if (newSize == 0) + return NULL; + + originalSize = RAPIDJSON_ALIGN(originalSize); + newSize = RAPIDJSON_ALIGN(newSize); + + // Do not shrink if new size is smaller than original + if (originalSize >= newSize) + return originalPtr; + + // Simply expand it if it is the last allocation and there is sufficient space + if (originalPtr == reinterpret_cast(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size - originalSize) { + size_t increment = static_cast(newSize - originalSize); + if (chunkHead_->size + increment <= chunkHead_->capacity) { + chunkHead_->size += increment; + return originalPtr; + } + } + + // Realloc process: allocate and copy memory, do not free original buffer. + if (void* newBuffer = Malloc(newSize)) { + if (originalSize) + std::memcpy(newBuffer, originalPtr, originalSize); + return newBuffer; + } + else + return NULL; + } + + //! Frees a memory block (concept Allocator) + static void Free(void *ptr) { (void)ptr; } // Do nothing + +private: + //! Copy constructor is not permitted. + MemoryPoolAllocator(const MemoryPoolAllocator& rhs) /* = delete */; + //! Copy assignment operator is not permitted. + MemoryPoolAllocator& operator=(const MemoryPoolAllocator& rhs) /* = delete */; + + //! Creates a new chunk. + /*! \param capacity Capacity of the chunk in bytes. + \return true if success. + */ + bool AddChunk(size_t capacity) { + if (!baseAllocator_) + ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator)(); + if (ChunkHeader* chunk = reinterpret_cast(baseAllocator_->Malloc(RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + capacity))) { + chunk->capacity = capacity; + chunk->size = 0; + chunk->next = chunkHead_; + chunkHead_ = chunk; + return true; + } + else + return false; + } + + static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity. + + //! Chunk header for perpending to each chunk. + /*! Chunks are stored as a singly linked list. + */ + struct ChunkHeader { + size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself). + size_t size; //!< Current size of allocated memory in bytes. + ChunkHeader *next; //!< Next chunk in the linked list. + }; + + ChunkHeader *chunkHead_; //!< Head of the chunk linked-list. Only the head chunk serves allocation. + size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated. + void *userBuffer_; //!< User supplied buffer. + BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks. + BaseAllocator* ownBaseAllocator_; //!< base allocator created by this object. +}; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_ENCODINGS_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/document.h b/c++/include/misc/jsonwrapp/rapidjson11/document.h new file mode 100644 index 00000000..df382473 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/document.h @@ -0,0 +1,2691 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_DOCUMENT_H_ +#define RAPIDJSON_DOCUMENT_H_ + +/*! \file document.h */ + +#include "reader.h" +#include "internal/meta.h" +#include "internal/strfunc.h" +#include "memorystream.h" +#include "encodedstream.h" +#include // placement new +#include + +RAPIDJSON_DIAG_PUSH +#ifdef _MSC_VER +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +RAPIDJSON_DIAG_OFF(4244) // conversion from kXxxFlags to 'uint16_t', possible loss of data +#ifdef _MINWINDEF_ // see: http://stackoverflow.com/questions/22744262/cant-call-stdmax-because-minwindef-h-defines-max +#ifndef NOMINMAX +#pragma push_macro("min") +#pragma push_macro("max") +#undef min +#undef max +#endif +#endif +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(switch-enum) +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_OFF(effc++) +#if __GNUC__ >= 6 +RAPIDJSON_DIAG_OFF(terminate) // ignore throwing RAPIDJSON_ASSERT in RAPIDJSON_NOEXCEPT functions +#endif +// NCBI +#ifndef __clang__ +#define RAPIDJSON_NCBI_NOOPTIMIZE __attribute__((optimize("O0"))) +#endif +#endif // __GNUC__ + +// NCBI +#ifndef RAPIDJSON_NCBI_NOOPTIMIZE +#define RAPIDJSON_NCBI_NOOPTIMIZE +#endif + +#ifndef RAPIDJSON_NOMEMBERITERATORCLASS +#include // std::iterator, std::random_access_iterator_tag +#endif + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS +#include // std::move +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +// Forward declaration. +template +class GenericValue; + +template +class GenericDocument; + +//! Name-value pair in a JSON object value. +/*! + This class was internal to GenericValue. It used to be a inner struct. + But a compiler (IBM XL C/C++ for AIX) have reported to have problem with that so it moved as a namespace scope struct. + https://code.google.com/p/rapidjson/issues/detail?id=64 +*/ +template +struct GenericMember { +//NCBI: added assignment operator; Clang 3.2 wanted it + GenericMember& operator= (GenericMember& rhs) { + name = rhs.name; + value = rhs.value; + return *this; + } + + GenericValue name; //!< name of member (must be a string) + GenericValue value; //!< value of member. +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericMemberIterator + +#ifndef RAPIDJSON_NOMEMBERITERATORCLASS + +//! (Constant) member iterator for a JSON object value +/*! + \tparam Const Is this a constant iterator? + \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document) + \tparam Allocator Allocator type for allocating memory of object, array and string. + + This class implements a Random Access Iterator for GenericMember elements + of a GenericValue, see ISO/IEC 14882:2003(E) C++ standard, 24.1 [lib.iterator.requirements]. + + \note This iterator implementation is mainly intended to avoid implicit + conversions from iterator values to \c NULL, + e.g. from GenericValue::FindMember. + + \note Define \c RAPIDJSON_NOMEMBERITERATORCLASS to fall back to a + pointer-based implementation, if your platform doesn't provide + the C++ header. + + \see GenericMember, GenericValue::MemberIterator, GenericValue::ConstMemberIterator + */ +template +class GenericMemberIterator + : public std::iterator >::Type> { + + friend class GenericValue; + template friend class GenericMemberIterator; + + typedef GenericMember PlainType; + typedef typename internal::MaybeAddConst::Type ValueType; + typedef std::iterator BaseType; + +public: + //! Iterator type itself + typedef GenericMemberIterator Iterator; + //! Constant iterator type + typedef GenericMemberIterator ConstIterator; + //! Non-constant iterator type + typedef GenericMemberIterator NonConstIterator; + + //! Pointer to (const) GenericMember + typedef typename BaseType::pointer Pointer; + //! Reference to (const) GenericMember + typedef typename BaseType::reference Reference; + //! Signed integer type (e.g. \c ptrdiff_t) + typedef typename BaseType::difference_type DifferenceType; + + //! Default constructor (singular value) + /*! Creates an iterator pointing to no element. + \note All operations, except for comparisons, are undefined on such values. + */ + GenericMemberIterator() : ptr_() {} + + //! Iterator conversions to more const + /*! + \param it (Non-const) iterator to copy from + + Allows the creation of an iterator from another GenericMemberIterator + that is "less const". Especially, creating a non-constant iterator + from a constant iterator are disabled: + \li const -> non-const (not ok) + \li const -> const (ok) + \li non-const -> const (ok) + \li non-const -> non-const (ok) + + \note If the \c Const template parameter is already \c false, this + constructor effectively defines a regular copy-constructor. + Otherwise, the copy constructor is implicitly defined. + */ + GenericMemberIterator(const NonConstIterator & it) : ptr_(it.ptr_) {} + Iterator& operator=(const NonConstIterator & it) { ptr_ = it.ptr_; return *this; } + + //! @name stepping + //@{ + Iterator& operator++(){ ++ptr_; return *this; } + Iterator& operator--(){ --ptr_; return *this; } + Iterator operator++(int){ Iterator old(*this); ++ptr_; return old; } + Iterator operator--(int){ Iterator old(*this); --ptr_; return old; } + //@} + + //! @name increment/decrement + //@{ + Iterator operator+(DifferenceType n) const { return Iterator(ptr_+n); } + Iterator operator-(DifferenceType n) const { return Iterator(ptr_-n); } + + Iterator& operator+=(DifferenceType n) { ptr_+=n; return *this; } + Iterator& operator-=(DifferenceType n) { ptr_-=n; return *this; } + //@} + + //! @name relations + //@{ + bool operator==(ConstIterator that) const { return ptr_ == that.ptr_; } + bool operator!=(ConstIterator that) const { return ptr_ != that.ptr_; } + bool operator<=(ConstIterator that) const { return ptr_ <= that.ptr_; } + bool operator>=(ConstIterator that) const { return ptr_ >= that.ptr_; } + bool operator< (ConstIterator that) const { return ptr_ < that.ptr_; } + bool operator> (ConstIterator that) const { return ptr_ > that.ptr_; } + //@} + + //! @name dereference + //@{ + Reference operator*() const { return *ptr_; } + Pointer operator->() const { return ptr_; } + Reference operator[](DifferenceType n) const { return ptr_[n]; } + //@} + + //! Distance + DifferenceType operator-(ConstIterator that) const { return ptr_-that.ptr_; } + +private: + //! Internal constructor from plain pointer + explicit GenericMemberIterator(Pointer p) : ptr_(p) {} + + Pointer ptr_; //!< raw pointer +}; + +#else // RAPIDJSON_NOMEMBERITERATORCLASS + +// class-based member iterator implementation disabled, use plain pointers + +template +struct GenericMemberIterator; + +//! non-const GenericMemberIterator +template +struct GenericMemberIterator { + //! use plain pointer as iterator type + typedef GenericMember* Iterator; +}; +//! const GenericMemberIterator +template +struct GenericMemberIterator { + //! use plain const pointer as iterator type + typedef const GenericMember* Iterator; +}; + +#endif // RAPIDJSON_NOMEMBERITERATORCLASS + +/////////////////////////////////////////////////////////////////////////////// +// GenericStringRef + +//! Reference to a constant string (not taking a copy) +/*! + \tparam CharType character type of the string + + This helper class is used to automatically infer constant string + references for string literals, especially from \c const \b (!) + character arrays. + + The main use is for creating JSON string values without copying the + source string via an \ref Allocator. This requires that the referenced + string pointers have a sufficient lifetime, which exceeds the lifetime + of the associated GenericValue. + + \b Example + \code + Value v("foo"); // ok, no need to copy & calculate length + const char foo[] = "foo"; + v.SetString(foo); // ok + + const char* bar = foo; + // Value x(bar); // not ok, can't rely on bar's lifetime + Value x(StringRef(bar)); // lifetime explicitly guaranteed by user + Value y(StringRef(bar, 3)); // ok, explicitly pass length + \endcode + + \see StringRef, GenericValue::SetString +*/ +template +struct GenericStringRef { + typedef CharType Ch; //!< character type of the string + + //! Create string reference from \c const character array +#ifndef __clang__ // -Wdocumentation + /*! + This constructor implicitly creates a constant string reference from + a \c const character array. It has better performance than + \ref StringRef(const CharType*) by inferring the string \ref length + from the array length, and also supports strings containing null + characters. + + \tparam N length of the string, automatically inferred + + \param str Constant character array, lifetime assumed to be longer + than the use of the string in e.g. a GenericValue + + \post \ref s == str + + \note Constant complexity. + \note There is a hidden, private overload to disallow references to + non-const character arrays to be created via this constructor. + By this, e.g. function-scope arrays used to be filled via + \c snprintf are excluded from consideration. + In such cases, the referenced string should be \b copied to the + GenericValue instead. + */ +#endif + template + GenericStringRef(const CharType (&str)[N]) RAPIDJSON_NOEXCEPT + : s(str), length(N-1) {} + + //! Explicitly create string reference from \c const character pointer +#ifndef __clang__ // -Wdocumentation + /*! + This constructor can be used to \b explicitly create a reference to + a constant string pointer. + + \see StringRef(const CharType*) + + \param str Constant character pointer, lifetime assumed to be longer + than the use of the string in e.g. a GenericValue + + \post \ref s == str + + \note There is a hidden, private overload to disallow references to + non-const character arrays to be created via this constructor. + By this, e.g. function-scope arrays used to be filled via + \c snprintf are excluded from consideration. + In such cases, the referenced string should be \b copied to the + GenericValue instead. + */ +#endif + explicit GenericStringRef(const CharType* str) + : s(str), length(internal::StrLen(str)){ RAPIDJSON_ASSERT(s != 0); } + + //! Create constant string reference from pointer and length +#ifndef __clang__ // -Wdocumentation + /*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \param len length of the string, excluding the trailing NULL terminator + + \post \ref s == str && \ref length == len + \note Constant complexity. + */ +#endif + GenericStringRef(const CharType* str, SizeType len) + : s(str), length(len) { RAPIDJSON_ASSERT(s != 0); } + + GenericStringRef(const GenericStringRef& rhs) : s(rhs.s), length(rhs.length) {} + + //! implicit conversion to plain CharType pointer + operator const Ch *() const { return s; } + + const Ch* const s; //!< plain CharType pointer + const SizeType length; //!< length of the string (excluding the trailing NULL terminator) + +private: + //! Disallow construction from non-const array + template + GenericStringRef(CharType (&str)[N]) /* = delete */; + //! Copy assignment operator not permitted - immutable type + GenericStringRef& operator=(const GenericStringRef& rhs) /* = delete */; +}; + +//! Mark a character pointer as constant string +/*! Mark a plain character pointer as a "string literal". This function + can be used to avoid copying a character string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + \tparam CharType Character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \return GenericStringRef string reference object + \relatesalso GenericStringRef + + \see GenericValue::GenericValue(StringRefType), GenericValue::operator=(StringRefType), GenericValue::SetString(StringRefType), GenericValue::PushBack(StringRefType, Allocator&), GenericValue::AddMember +*/ +template +inline GenericStringRef StringRef(const CharType* str) { + return GenericStringRef(str, internal::StrLen(str)); +} + +//! Mark a character pointer as constant string +/*! Mark a plain character pointer as a "string literal". This function + can be used to avoid copying a character string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + + This version has better performance with supplied length, and also + supports string containing null characters. + + \tparam CharType character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \param length The length of source string. + \return GenericStringRef string reference object + \relatesalso GenericStringRef +*/ +template +inline GenericStringRef StringRef(const CharType* str, size_t length) { + return GenericStringRef(str, SizeType(length)); +} + +#if RAPIDJSON_HAS_STDSTRING +//! Mark a string object as constant string +/*! Mark a string object (e.g. \c std::string) as a "string literal". + This function can be used to avoid copying a string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + + \tparam CharType character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \return GenericStringRef string reference object + \relatesalso GenericStringRef + \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. +*/ +template +inline GenericStringRef StringRef(const std::basic_string& str) { + return GenericStringRef(str.data(), SizeType(str.size())); +} +#endif + +/////////////////////////////////////////////////////////////////////////////// +// GenericValue type traits +namespace internal { + +template +struct IsGenericValueImpl : FalseType {}; + +// select candidates according to nested encoding and allocator types +template struct IsGenericValueImpl::Type, typename Void::Type> + : IsBaseOf, T>::Type {}; + +// helper to match arbitrary GenericValue instantiations, including derived classes +template struct IsGenericValue : IsGenericValueImpl::Type {}; + +} // namespace internal + +/////////////////////////////////////////////////////////////////////////////// +// TypeHelper + +namespace internal { + +template +struct TypeHelper {}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsBool(); } + static bool Get(const ValueType& v) { return v.GetBool(); } + static ValueType& Set(ValueType& v, bool data) { return v.SetBool(data); } + static ValueType& Set(ValueType& v, bool data, typename ValueType::AllocatorType&) { return v.SetBool(data); } +}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsInt(); } + static int Get(const ValueType& v) { return v.GetInt(); } + static ValueType& Set(ValueType& v, int data) { return v.SetInt(data); } + static ValueType& Set(ValueType& v, int data, typename ValueType::AllocatorType&) { return v.SetInt(data); } +}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsUint(); } + static unsigned Get(const ValueType& v) { return v.GetUint(); } + static ValueType& Set(ValueType& v, unsigned data) { return v.SetUint(data); } + static ValueType& Set(ValueType& v, unsigned data, typename ValueType::AllocatorType&) { return v.SetUint(data); } +}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsInt64(); } + static int64_t Get(const ValueType& v) { return v.GetInt64(); } + static ValueType& Set(ValueType& v, int64_t data) { return v.SetInt64(data); } + static ValueType& Set(ValueType& v, int64_t data, typename ValueType::AllocatorType&) { return v.SetInt64(data); } +}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsUint64(); } + static uint64_t Get(const ValueType& v) { return v.GetUint64(); } + static ValueType& Set(ValueType& v, uint64_t data) { return v.SetUint64(data); } + static ValueType& Set(ValueType& v, uint64_t data, typename ValueType::AllocatorType&) { return v.SetUint64(data); } +}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsDouble(); } + static double Get(const ValueType& v) { return v.GetDouble(); } + static ValueType& Set(ValueType& v, double data) { return v.SetDouble(data); } + static ValueType& Set(ValueType& v, double data, typename ValueType::AllocatorType&) { return v.SetDouble(data); } +}; + +template +struct TypeHelper { + static bool Is(const ValueType& v) { return v.IsFloat(); } + static float Get(const ValueType& v) { return v.GetFloat(); } + static ValueType& Set(ValueType& v, float data) { return v.SetFloat(data); } + static ValueType& Set(ValueType& v, float data, typename ValueType::AllocatorType&) { return v.SetFloat(data); } +}; + +template +struct TypeHelper { + typedef const typename ValueType::Ch* StringType; + static bool Is(const ValueType& v) { return v.IsString(); } + static StringType Get(const ValueType& v) { return v.GetString(); } + static ValueType& Set(ValueType& v, const StringType data) { return v.SetString(typename ValueType::StringRefType(data)); } + static ValueType& Set(ValueType& v, const StringType data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); } +}; + +#if RAPIDJSON_HAS_STDSTRING +template +struct TypeHelper > { + typedef std::basic_string StringType; + static bool Is(const ValueType& v) { return v.IsString(); } + static StringType Get(const ValueType& v) { return StringType(v.GetString(), v.GetStringLength()); } + static ValueType& Set(ValueType& v, const StringType& data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); } +}; +#endif + +template +struct TypeHelper { + typedef typename ValueType::Array ArrayType; + static bool Is(const ValueType& v) { return v.IsArray(); } + static ArrayType Get(ValueType& v) { return v.GetArray(); } + static ValueType& Set(ValueType& v, ArrayType data) { return v = data; } + static ValueType& Set(ValueType& v, ArrayType data, typename ValueType::AllocatorType&) { return v = data; } +}; + +template +struct TypeHelper { + typedef typename ValueType::ConstArray ArrayType; + static bool Is(const ValueType& v) { return v.IsArray(); } + static ArrayType Get(const ValueType& v) { return v.GetArray(); } +}; + +template +struct TypeHelper { + typedef typename ValueType::Object ObjectType; + static bool Is(const ValueType& v) { return v.IsObject(); } + static ObjectType Get(ValueType& v) { return v.GetObject(); } + static ValueType& Set(ValueType& v, ObjectType data) { return v = data; } + static ValueType& Set(ValueType& v, ObjectType data, typename ValueType::AllocatorType&) { v = data; } +}; + +template +struct TypeHelper { + typedef typename ValueType::ConstObject ObjectType; + static bool Is(const ValueType& v) { return v.IsObject(); } + static ObjectType Get(const ValueType& v) { return v.GetObject(); } +}; + +} // namespace internal + +// Forward declarations +template class GenericArray; +template class GenericObject; + +/////////////////////////////////////////////////////////////////////////////// +// GenericValue + +//! Represents a JSON value. Use Value for UTF8 encoding and default allocator. +/*! + A JSON value can be one of 7 types. This class is a variant type supporting + these types. + + Use the Value if UTF8 and default allocator + + \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document) + \tparam Allocator Allocator type for allocating memory of object, array and string. +*/ +template > +class GenericValue { +public: + //! Name-value pair in an object. + typedef GenericMember Member; + typedef Encoding EncodingType; //!< Encoding type from template parameter. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef GenericStringRef StringRefType; //!< Reference to a constant string + typedef typename GenericMemberIterator::Iterator MemberIterator; //!< Member iterator for iterating in object. + typedef typename GenericMemberIterator::Iterator ConstMemberIterator; //!< Constant member iterator for iterating in object. + typedef GenericValue* ValueIterator; //!< Value iterator for iterating in array. + typedef const GenericValue* ConstValueIterator; //!< Constant value iterator for iterating in array. + typedef GenericValue ValueType; //!< Value type of itself. + typedef GenericArray Array; + typedef GenericArray ConstArray; + typedef GenericObject Object; + typedef GenericObject ConstObject; + + //!@name Constructors and destructor. + //@{ + +//NCBI: allocator_ is not initialized intentionally +// Rapidjson uses in-place constructors in many places (eg, SetNull() method) +// We want to preserve allocator_ value in such cases + + //! Default constructor creates a null value. + GenericValue() RAPIDJSON_NOEXCEPT : data_() { data_.f.flags = kNullFlag; } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move constructor in C++11 + GenericValue(GenericValue&& rhs) RAPIDJSON_NOEXCEPT : data_(rhs.data_) { + rhs.data_.f.flags = kNullFlag; // give up contents + } +#endif + +private: + //! Copy constructor is not permitted. + GenericValue(const GenericValue& rhs); + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Moving from a GenericDocument is not permitted. + template + GenericValue(GenericDocument&& rhs); + + //! Move assignment from a GenericDocument is not permitted. + template + GenericValue& operator=(GenericDocument&& rhs); +#endif + +public: + + //! Constructor with JSON value type. + /*! This creates a Value of specified type with default content. + \param type Type of the value. + \note Default content for number is zero. + */ + explicit GenericValue(Type type) RAPIDJSON_NOEXCEPT : data_() { + static const uint16_t defaultFlags[7] = { + kNullFlag, kFalseFlag, kTrueFlag, kObjectFlag, kArrayFlag, kShortStringFlag, + kNumberAnyFlag + }; + RAPIDJSON_ASSERT(type <= kNumberType); + data_.f.flags = defaultFlags[type]; + + // Use ShortString to store empty string. + if (type == kStringType) + data_.ss.SetLength(0); + } + + //! Explicit copy constructor (with allocator) + /*! Creates a copy of a Value by using the given Allocator + \tparam SourceAllocator allocator of \c rhs + \param rhs Value to copy from (read-only) + \param allocator Allocator for allocating copied elements and buffers. Commonly use GenericDocument::GetAllocator(). + \see CopyFrom() + */ + template + GenericValue(const GenericValue& rhs, Allocator& allocator) { + switch (rhs.GetType()) { + case kObjectType: { + SizeType count = rhs.data_.o.size; + Member* lm = reinterpret_cast(allocator.Malloc(count * sizeof(Member))); + const typename GenericValue::Member* rm = rhs.GetMembersPointer(); + for (SizeType i = 0; i < count; i++) { + new (&lm[i].name) GenericValue(rm[i].name, allocator); + new (&lm[i].value) GenericValue(rm[i].value, allocator); + } + data_.f.flags = kObjectFlag; + data_.o.size = data_.o.capacity = count; + SetMembersPointer(lm); + } + break; + case kArrayType: { + SizeType count = rhs.data_.a.size; + GenericValue* le = reinterpret_cast(allocator.Malloc(count * sizeof(GenericValue))); + const GenericValue* re = rhs.GetElementsPointer(); + for (SizeType i = 0; i < count; i++) + new (&le[i]) GenericValue(re[i], allocator); + data_.f.flags = kArrayFlag; + data_.a.size = data_.a.capacity = count; + SetElementsPointer(le); + } + break; + case kStringType: + if (rhs.data_.f.flags == kConstStringFlag) { + data_.f.flags = rhs.data_.f.flags; + data_ = *reinterpret_cast(&rhs.data_); + } + else + SetStringRaw(StringRef(rhs.GetString(), rhs.GetStringLength()), allocator); + break; + default: + data_.f.flags = rhs.data_.f.flags; + data_ = *reinterpret_cast(&rhs.data_); + break; + } + } + + //! Constructor for boolean value. + /*! \param b Boolean value + \note This constructor is limited to \em real boolean values and rejects + implicitly converted types like arbitrary pointers. Use an explicit cast + to \c bool, if you want to construct a boolean JSON value in such cases. + */ +#ifndef RAPIDJSON_DOXYGEN_RUNNING // hide SFINAE from Doxygen + template + explicit GenericValue(T b, RAPIDJSON_ENABLEIF((internal::IsSame))) RAPIDJSON_NOEXCEPT // See #472 +#else + explicit GenericValue(bool b) RAPIDJSON_NOEXCEPT +#endif + : data_() { + // safe-guard against failing SFINAE + RAPIDJSON_STATIC_ASSERT((internal::IsSame::Value)); + data_.f.flags = b ? kTrueFlag : kFalseFlag; + } + + //! Constructor for int value. + explicit GenericValue(int i) RAPIDJSON_NOEXCEPT : data_() { + data_.n.i64 = i; + data_.f.flags = (i >= 0) ? (kNumberIntFlag | kUintFlag | kUint64Flag) : kNumberIntFlag; + } + + //! Constructor for unsigned value. + explicit GenericValue(unsigned u) RAPIDJSON_NOEXCEPT : data_() { + data_.n.u64 = u; + data_.f.flags = (u & 0x80000000) ? kNumberUintFlag : (kNumberUintFlag | kIntFlag | kInt64Flag); + } + + //! Constructor for int64_t value. + explicit GenericValue(int64_t i64) RAPIDJSON_NOEXCEPT : data_() { + data_.n.i64 = i64; + data_.f.flags = kNumberInt64Flag; + if (i64 >= 0) { + data_.f.flags |= kNumberUint64Flag; + if (!(static_cast(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000))) + data_.f.flags |= kUintFlag; + if (!(static_cast(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + data_.f.flags |= kIntFlag; + } + else if (i64 >= static_cast(RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + data_.f.flags |= kIntFlag; + } + + //! Constructor for uint64_t value. + explicit GenericValue(uint64_t u64) RAPIDJSON_NOEXCEPT : data_() { + data_.n.u64 = u64; + data_.f.flags = kNumberUint64Flag; + if (!(u64 & RAPIDJSON_UINT64_C2(0x80000000, 0x00000000))) + data_.f.flags |= kInt64Flag; + if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000))) + data_.f.flags |= kUintFlag; + if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + data_.f.flags |= kIntFlag; + } + + //! Constructor for double value. + explicit GenericValue(double d) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = d; data_.f.flags = kNumberDoubleFlag; } + + //! Constructor for float value. + explicit GenericValue(float f) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = static_cast(f); data_.f.flags = kNumberDoubleFlag; } + + //! Constructor for constant string (i.e. do not make a copy of string) + GenericValue(const Ch* s, SizeType length) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(StringRef(s, length)); } + + //! Constructor for constant string (i.e. do not make a copy of string) + explicit GenericValue(StringRefType s) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(s); } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch* s, SizeType length, Allocator& allocator) : data_() { SetStringRaw(StringRef(s, length), allocator); } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch*s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); } + +#if RAPIDJSON_HAS_STDSTRING + //! Constructor for copy-string from a string object (i.e. do make a copy of string) + /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + GenericValue(const std::basic_string& s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); } +#endif + + //! Constructor for Array. + /*! + \param a An array obtained by \c GetArray(). + \note \c Array is always pass-by-value. + \note the source array is moved into this value and the sourec array becomes empty. + */ + GenericValue(Array a) RAPIDJSON_NOEXCEPT : data_(a.value_.data_) { + a.value_.data_ = Data(); + a.value_.data_.f.flags = kArrayFlag; + } + + //! Constructor for Object. + /*! + \param o An object obtained by \c GetObject(). + \note \c Object is always pass-by-value. + \note the source object is moved into this value and the sourec object becomes empty. + */ + GenericValue(Object o) RAPIDJSON_NOEXCEPT : data_(o.value_.data_) { + o.value_.data_ = Data(); + o.value_.data_.f.flags = kObjectFlag; + } + + //! Destructor. + /*! Need to destruct elements of array, members of object, or copy-string. + */ + ~GenericValue() { + if (Allocator::kNeedFree) { // Shortcut by Allocator's trait + switch(data_.f.flags) { + case kArrayFlag: + { + GenericValue* e = GetElementsPointer(); + for (GenericValue* v = e; v != e + data_.a.size; ++v) + v->~GenericValue(); + Allocator::Free(e); + } + break; + + case kObjectFlag: + for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m) + m->~Member(); + Allocator::Free(GetMembersPointer()); + break; + + case kCopyStringFlag: + Allocator::Free(const_cast(GetStringPointer())); + break; + + default: + break; // Do nothing for other types. + } + } + } + + //@} + + //!@name Assignment operators + //@{ + + //! Assignment with move semantics. + /*! \param rhs Source of the assignment. It will become a null value after assignment. + */ + GenericValue& operator=(GenericValue& rhs) RAPIDJSON_NOEXCEPT { + RAPIDJSON_ASSERT(this != &rhs); + this->~GenericValue(); + RawAssign(rhs); + return *this; + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move assignment in C++11 + GenericValue& operator=(GenericValue&& rhs) RAPIDJSON_NOEXCEPT { + return *this = rhs.Move(); + } +#endif + + //! Assignment of constant string reference (no copy) + /*! \param str Constant string reference to be assigned + \note This overload is needed to avoid clashes with the generic primitive type assignment overload below. + \see GenericStringRef, operator=(T) + */ + GenericValue& operator=(StringRefType str) RAPIDJSON_NOEXCEPT { + GenericValue s(str); + return *this = s; + } + + //! Assignment with primitive types. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param value The value to be assigned. + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref SetString(const Ch*, Allocator&) (for copying) or + \ref StringRef() (to explicitly mark the pointer as constant) instead. + All other pointer types would implicitly convert to \c bool, + use \ref SetBool() instead. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::IsPointer), (GenericValue&)) + operator=(T value) { + GenericValue v(value); + return *this = v; + } + + //! Deep-copy assignment from Value + /*! Assigns a \b copy of the Value to the current Value object + \tparam SourceAllocator Allocator type of \c rhs + \param rhs Value to copy from (read-only) + \param allocator Allocator to use for copying + */ + template + GenericValue& CopyFrom(const GenericValue& rhs, Allocator& allocator) { + RAPIDJSON_ASSERT(static_cast(this) != static_cast(&rhs)); + this->~GenericValue(); + new (this) GenericValue(rhs, allocator); + return *this; + } + + //! Exchange the contents of this value with those of other. + /*! + \param other Another value. + \note Constant complexity. + */ + GenericValue& Swap(GenericValue& other) RAPIDJSON_NOEXCEPT { + GenericValue temp; + temp.RawAssign(*this); + RawAssign(other); + other.RawAssign(temp); + return *this; + } + + //! free-standing swap function helper + /*! + Helper function to enable support for common swap implementation pattern based on \c std::swap: + \code + void swap(MyClass& a, MyClass& b) { + using std::swap; + swap(a.value, b.value); + // ... + } + \endcode + \see Swap() + */ + friend inline void swap(GenericValue& a, GenericValue& b) RAPIDJSON_NOEXCEPT { a.Swap(b); } + + //! Prepare Value for move semantics + /*! \return *this */ + GenericValue& Move() RAPIDJSON_NOEXCEPT { return *this; } + //@} + + //!@name Equal-to and not-equal-to operators + //@{ + //! Equal-to operator + /*! + \note If an object contains duplicated named member, comparing equality with any object is always \c false. + \note Linear time complexity (number of all values in the subtree and total lengths of all strings). + */ + template + bool operator==(const GenericValue& rhs) const { + typedef GenericValue RhsType; + if (GetType() != rhs.GetType()) + return false; + + switch (GetType()) { + case kObjectType: // Warning: O(n^2) inner-loop + if (data_.o.size != rhs.data_.o.size) + return false; + for (ConstMemberIterator lhsMemberItr = MemberBegin(); lhsMemberItr != MemberEnd(); ++lhsMemberItr) { + typename RhsType::ConstMemberIterator rhsMemberItr = rhs.FindMember(lhsMemberItr->name); + if (rhsMemberItr == rhs.MemberEnd() || lhsMemberItr->value != rhsMemberItr->value) + return false; + } + return true; + + case kArrayType: + if (data_.a.size != rhs.data_.a.size) + return false; + for (SizeType i = 0; i < data_.a.size; i++) + if ((*this)[i] != rhs[i]) + return false; + return true; + + case kStringType: + return StringEqual(rhs); + + case kNumberType: + if (IsDouble() || rhs.IsDouble()) { + double a = GetDouble(); // May convert from integer to double. + double b = rhs.GetDouble(); // Ditto + return a >= b && a <= b; // Prevent -Wfloat-equal + } + else + return data_.n.u64 == rhs.data_.n.u64; + + default: + return true; + } + } + + //! Equal-to operator with const C-string pointer + bool operator==(const Ch* rhs) const { return *this == GenericValue(StringRef(rhs)); } + +#if RAPIDJSON_HAS_STDSTRING + //! Equal-to operator with string object + /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + bool operator==(const std::basic_string& rhs) const { return *this == GenericValue(StringRef(rhs)); } +#endif + + //! Equal-to operator with primitive types + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c true, \c false + */ + template RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr,internal::IsGenericValue >), (bool)) operator==(const T& rhs) const { return *this == GenericValue(rhs); } + + //! Not-equal-to operator + /*! \return !(*this == rhs) + */ + template + bool operator!=(const GenericValue& rhs) const { return !(*this == rhs); } + + //! Not-equal-to operator with const C-string pointer + bool operator!=(const Ch* rhs) const { return !(*this == rhs); } + + //! Not-equal-to operator with arbitrary types + /*! \return !(*this == rhs) + */ + template RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue), (bool)) operator!=(const T& rhs) const { return !(*this == rhs); } + + //! Equal-to operator with arbitrary types (symmetric version) + /*! \return (rhs == lhs) + */ + template friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue), (bool)) operator==(const T& lhs, const GenericValue& rhs) { return rhs == lhs; } + + //! Not-Equal-to operator with arbitrary types (symmetric version) + /*! \return !(rhs == lhs) + */ + template friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue), (bool)) operator!=(const T& lhs, const GenericValue& rhs) { return !(rhs == lhs); } + //@} + + //!@name Type + //@{ + + Type GetType() const { return static_cast(data_.f.flags & kTypeMask); } + bool IsNull() const { return data_.f.flags == kNullFlag; } + bool IsFalse() const { return data_.f.flags == kFalseFlag; } + bool IsTrue() const { return data_.f.flags == kTrueFlag; } + bool IsBool() const { return (data_.f.flags & kBoolFlag) != 0; } + bool IsObject() const { return data_.f.flags == kObjectFlag; } + bool IsArray() const { return data_.f.flags == kArrayFlag; } + bool IsNumber() const { return (data_.f.flags & kNumberFlag) != 0; } + bool IsInt() const { return (data_.f.flags & kIntFlag) != 0; } + bool IsUint() const { return (data_.f.flags & kUintFlag) != 0; } + bool IsInt64() const { return (data_.f.flags & kInt64Flag) != 0; } + bool IsUint64() const { return (data_.f.flags & kUint64Flag) != 0; } + bool IsDouble() const { return (data_.f.flags & kDoubleFlag) != 0; } + bool IsString() const { return (data_.f.flags & kStringFlag) != 0; } + + // Checks whether a number can be losslessly converted to a double. + bool IsLosslessDouble() const { + if (!IsNumber()) return false; + if (IsUint64()) { + uint64_t u = GetUint64(); + volatile double d = static_cast(u); + return (d >= 0.0) + && (d < static_cast(std::numeric_limits::max())) + && (u == static_cast(d)); + } + if (IsInt64()) { + int64_t i = GetInt64(); + volatile double d = static_cast(i); + return (d >= static_cast(std::numeric_limits::min())) + && (d < static_cast(std::numeric_limits::max())) + && (i == static_cast(d)); + } + return true; // double, int, uint are always lossless + } + + // Checks whether a number is a float (possible lossy). + bool IsFloat() const { + if ((data_.f.flags & kDoubleFlag) == 0) + return false; + double d = GetDouble(); + return d >= -3.4028234e38 && d <= 3.4028234e38; + } + // Checks whether a number can be losslessly converted to a float. + bool IsLosslessFloat() const { + if (!IsNumber()) return false; + double a = GetDouble(); + if (a < static_cast(-std::numeric_limits::max()) + || a > static_cast(std::numeric_limits::max())) + return false; + double b = static_cast(static_cast(a)); + return a >= b && a <= b; // Prevent -Wfloat-equal + } + + //@} + + //!@name Null + //@{ + + GenericValue& SetNull() { this->~GenericValue(); new (this) GenericValue(); return *this; } + + //@} + + //!@name Bool + //@{ + + bool GetBool() const { RAPIDJSON_ASSERT(IsBool()); return data_.f.flags == kTrueFlag; } + //!< Set boolean value + /*! \post IsBool() == true */ + GenericValue& SetBool(bool b) { this->~GenericValue(); new (this) GenericValue(b); return *this; } + + //@} + + //!@name Object + //@{ + + //! Set this value as an empty object. + /*! \post IsObject() == true */ + GenericValue& SetObject() { this->~GenericValue(); new (this) GenericValue(kObjectType); return *this; } + + //! Get the number of members in the object. + SizeType MemberCount() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size; } + + //! Check whether the object is empty. + bool ObjectEmpty() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size == 0; } + + //! Get a value from an object associated with the name. + /*! \pre IsObject() == true + \tparam T Either \c Ch or \c const \c Ch (template used for disambiguation with \ref operator[](SizeType)) + \note In version 0.1x, if the member is not found, this function returns a null value. This makes issue 7. + Since 0.2, if the name is not correct, it will assert. + If user is unsure whether a member exists, user should use HasMember() first. + A better approach is to use FindMember(). + \note Linear time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr::Type, Ch> >),(GenericValue&)) operator[](T* name) { + GenericValue n(StringRef(name)); + return (*this)[n]; + } + template + RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr::Type, Ch> >),(const GenericValue&)) operator[](T* name) const { return const_cast(*this)[name]; } + + //! Get a value from an object associated with the name. + /*! \pre IsObject() == true + \tparam SourceAllocator Allocator of the \c name value + + \note Compared to \ref operator[](T*), this version is faster because it does not need a StrLen(). + And it can also handle strings with embedded null characters. + + \note Linear time complexity. + */ + template + GenericValue& operator[](const GenericValue& name) { + MemberIterator member = FindMember(name); + if (member != MemberEnd()) + return member->value; + else { + RAPIDJSON_ASSERT(false); // see above note + + // This will generate -Wexit-time-destructors in clang + // static GenericValue NullValue; + // return NullValue; + + // Use static buffer and placement-new to prevent destruction + static char buffer[sizeof(GenericValue)]; + return *new (buffer) GenericValue(); + } + } + template + const GenericValue& operator[](const GenericValue& name) const { return const_cast(*this)[name]; } + +#if RAPIDJSON_HAS_STDSTRING + //! Get a value from an object associated with name (string object). + GenericValue& operator[](const std::basic_string& name) { return (*this)[GenericValue(StringRef(name))]; } + const GenericValue& operator[](const std::basic_string& name) const { return (*this)[GenericValue(StringRef(name))]; } +#endif + + //! Const member iterator + /*! \pre IsObject() == true */ + ConstMemberIterator MemberBegin() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer()); } + //! Const \em past-the-end member iterator + /*! \pre IsObject() == true */ + ConstMemberIterator MemberEnd() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer() + data_.o.size); } + //! Member iterator + /*! \pre IsObject() == true */ + MemberIterator MemberBegin() { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer()); } + //! \em Past-the-end member iterator + /*! \pre IsObject() == true */ + MemberIterator MemberEnd() { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer() + data_.o.size); } + + //! Check whether a member exists in the object. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + bool HasMember(const Ch* name) const { return FindMember(name) != MemberEnd(); } + +#if RAPIDJSON_HAS_STDSTRING + //! Check whether a member exists in the object with string object. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + bool HasMember(const std::basic_string& name) const { return FindMember(name) != MemberEnd(); } +#endif + + //! Check whether a member exists in the object with GenericValue name. + /*! + This version is faster because it does not need a StrLen(). It can also handle string with null character. + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + template + bool HasMember(const GenericValue& name) const { return FindMember(name) != MemberEnd(); } + + //! Find member by name. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + + \note Earlier versions of Rapidjson returned a \c NULL pointer, in case + the requested member doesn't exist. For consistency with e.g. + \c std::map, this has been changed to MemberEnd() now. + \note Linear time complexity. + */ + MemberIterator FindMember(const Ch* name) { + GenericValue n(StringRef(name)); + return FindMember(n); + } + + ConstMemberIterator FindMember(const Ch* name) const { return const_cast(*this).FindMember(name); } + + //! Find member by name. + /*! + This version is faster because it does not need a StrLen(). It can also handle string with null character. + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + + \note Earlier versions of Rapidjson returned a \c NULL pointer, in case + the requested member doesn't exist. For consistency with e.g. + \c std::map, this has been changed to MemberEnd() now. + \note Linear time complexity. + */ + template + MemberIterator FindMember(const GenericValue& name) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(name.IsString()); + MemberIterator member = MemberBegin(); + for ( ; member != MemberEnd(); ++member) + if (name.StringEqual(member->name)) + break; + return member; + } + template ConstMemberIterator FindMember(const GenericValue& name) const { return const_cast(*this).FindMember(name); } + +#if RAPIDJSON_HAS_STDSTRING + //! Find member by string object name. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + */ + MemberIterator FindMember(const std::basic_string& name) { return FindMember(GenericValue(StringRef(name))); } + ConstMemberIterator FindMember(const std::basic_string& name) const { return FindMember(GenericValue(StringRef(name))); } +#endif + + //! Add a member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value Value of any type. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note The ownership of \c name and \c value will be transferred to this object on success. + \pre IsObject() && name.IsString() + \post name.IsNull() && value.IsNull() + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, GenericValue& value, Allocator& allocator) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(name.IsString()); + + ObjectData& o = data_.o; + if (o.size >= o.capacity) { + if (o.capacity == 0) { + o.capacity = kDefaultObjectCapacity; + SetMembersPointer(reinterpret_cast(allocator.Malloc(o.capacity * sizeof(Member)))); + } + else { + SizeType oldCapacity = o.capacity; + o.capacity += (oldCapacity + 1) / 2; // grow by factor 1.5 + SetMembersPointer(reinterpret_cast(allocator.Realloc(GetMembersPointer(), oldCapacity * sizeof(Member), o.capacity * sizeof(Member)))); + } + } + Member* members = GetMembersPointer(); + members[o.size].name.RawAssign(name); + members[o.size].value.RawAssign(value); + o.size++; + return *this; + } + + //! Add a constant string value as member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, StringRefType value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Add a string object as member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, std::basic_string& value, Allocator& allocator) { + GenericValue v(value, allocator); + return AddMember(name, v, allocator); + } +#endif + + //! Add any primitive value as member (name-value pair) to the object. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param name A string value as name of member. + \param value Value of primitive type \c T as value of member + \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref + AddMember(StringRefType, StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized Constant time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (GenericValue&)) + AddMember(GenericValue& name, T value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericValue& AddMember(GenericValue&& name, GenericValue&& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(GenericValue&& name, GenericValue& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(GenericValue& name, GenericValue&& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(StringRefType name, GenericValue&& value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + + + //! Add a member (name-value pair) to the object. + /*! \param name A constant string reference as name of member. + \param value Value of any type. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note The ownership of \c value will be transferred to this object on success. + \pre IsObject() + \post value.IsNull() + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(StringRefType name, GenericValue& value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } + + //! Add a constant string value as member (name-value pair) to the object. + /*! \param name A constant string reference as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(StringRefType,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(StringRefType name, StringRefType value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + + //! Add any primitive value as member (name-value pair) to the object. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param name A constant string reference as name of member. + \param value Value of primitive type \c T as value of member + \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref + AddMember(StringRefType, StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized Constant time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (GenericValue&)) + AddMember(StringRefType name, T value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } + + //! Remove all members in the object. + /*! This function do not deallocate memory in the object, i.e. the capacity is unchanged. + \note Linear time complexity. + */ + void RemoveAllMembers() { + RAPIDJSON_ASSERT(IsObject()); + for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m) + m->~Member(); + data_.o.size = 0; + } + + //! Remove a member in object by its name. + /*! \param name Name of member to be removed. + \return Whether the member existed. + \note This function may reorder the object members. Use \ref + EraseMember(ConstMemberIterator) if you need to preserve the + relative order of the remaining members. + \note Linear time complexity. + */ + bool RemoveMember(const Ch* name) { + GenericValue n(StringRef(name)); + return RemoveMember(n); + } + +#if RAPIDJSON_HAS_STDSTRING + bool RemoveMember(const std::basic_string& name) { return RemoveMember(GenericValue(StringRef(name))); } +#endif + + template + bool RemoveMember(const GenericValue& name) { + MemberIterator m = FindMember(name); + if (m != MemberEnd()) { + RemoveMember(m); + return true; + } + else + return false; + } + + //! Remove a member in object by iterator. + /*! \param m member iterator (obtained by FindMember() or MemberBegin()). + \return the new iterator after removal. + \note This function may reorder the object members. Use \ref + EraseMember(ConstMemberIterator) if you need to preserve the + relative order of the remaining members. + \note Constant time complexity. + */ + MemberIterator RemoveMember(MemberIterator m) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(data_.o.size > 0); + RAPIDJSON_ASSERT(GetMembersPointer() != 0); + RAPIDJSON_ASSERT(m >= MemberBegin() && m < MemberEnd()); + + MemberIterator last(GetMembersPointer() + (data_.o.size - 1)); + if (data_.o.size > 1 && m != last) + *m = *last; // Move the last one to this place + else + m->~Member(); // Only one left, just destroy + --data_.o.size; + return m; + } + + //! Remove a member from an object by iterator. + /*! \param pos iterator to the member to remove + \pre IsObject() == true && \ref MemberBegin() <= \c pos < \ref MemberEnd() + \return Iterator following the removed element. + If the iterator \c pos refers to the last element, the \ref MemberEnd() iterator is returned. + \note This function preserves the relative order of the remaining object + members. If you do not need this, use the more efficient \ref RemoveMember(MemberIterator). + \note Linear time complexity. + */ + MemberIterator EraseMember(ConstMemberIterator pos) { + return EraseMember(pos, pos +1); + } + + //! Remove members in the range [first, last) from an object. + /*! \param first iterator to the first member to remove + \param last iterator following the last member to remove + \pre IsObject() == true && \ref MemberBegin() <= \c first <= \c last <= \ref MemberEnd() + \return Iterator following the last removed element. + \note This function preserves the relative order of the remaining object + members. + \note Linear time complexity. + */ + MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(data_.o.size > 0); + RAPIDJSON_ASSERT(GetMembersPointer() != 0); + RAPIDJSON_ASSERT(first >= MemberBegin()); + RAPIDJSON_ASSERT(first <= last); + RAPIDJSON_ASSERT(last <= MemberEnd()); + + MemberIterator pos = MemberBegin() + (first - MemberBegin()); + for (MemberIterator itr = pos; itr != last; ++itr) + itr->~Member(); + std::memmove(&*pos, &*last, static_cast(MemberEnd() - last) * sizeof(Member)); + data_.o.size -= static_cast(last - first); + return pos; + } + + //! Erase a member in object by its name. + /*! \param name Name of member to be removed. + \return Whether the member existed. + \note Linear time complexity. + */ + bool EraseMember(const Ch* name) { + GenericValue n(StringRef(name)); + return EraseMember(n); + } + +#if RAPIDJSON_HAS_STDSTRING + bool EraseMember(const std::basic_string& name) { return EraseMember(GenericValue(StringRef(name))); } +#endif + + template + bool EraseMember(const GenericValue& name) { + MemberIterator m = FindMember(name); + if (m != MemberEnd()) { + EraseMember(m); + return true; + } + else + return false; + } + + Object GetObject() { RAPIDJSON_ASSERT(IsObject()); return Object(*this); } + ConstObject GetObject() const { RAPIDJSON_ASSERT(IsObject()); return ConstObject(*this); } + + //@} + + //!@name Array + //@{ + + //! Set this value as an empty array. + /*! \post IsArray == true */ + GenericValue& SetArray() { this->~GenericValue(); new (this) GenericValue(kArrayType); return *this; } + + //! Get the number of elements in array. + SizeType Size() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size; } + + //! Get the capacity of array. + SizeType Capacity() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.capacity; } + + //! Check whether the array is empty. + bool Empty() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size == 0; } + + //! Remove all elements in the array. + /*! This function do not deallocate memory in the array, i.e. the capacity is unchanged. + \note Linear time complexity. + */ + void Clear() { + RAPIDJSON_ASSERT(IsArray()); + GenericValue* e = GetElementsPointer(); + for (GenericValue* v = e; v != e + data_.a.size; ++v) + v->~GenericValue(); + data_.a.size = 0; + } + + //! Get an element from array by index. + /*! \pre IsArray() == true + \param index Zero-based index of element. + \see operator[](T*) + */ + GenericValue& operator[](SizeType index) { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(index < data_.a.size); + return GetElementsPointer()[index]; + } + const GenericValue& operator[](SizeType index) const { return const_cast(*this)[index]; } + + //! Element iterator + /*! \pre IsArray() == true */ + ValueIterator Begin() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer(); } + //! \em Past-the-end element iterator + /*! \pre IsArray() == true */ + ValueIterator End() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer() + data_.a.size; } + //! Constant element iterator + /*! \pre IsArray() == true */ + ConstValueIterator Begin() const { return const_cast(*this).Begin(); } + //! Constant \em past-the-end element iterator + /*! \pre IsArray() == true */ + ConstValueIterator End() const { return const_cast(*this).End(); } + + //! Request the array to have enough capacity to store elements. + /*! \param newCapacity The capacity that the array at least need to have. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note Linear time complexity. + */ + GenericValue& Reserve(SizeType newCapacity, Allocator &allocator) { + RAPIDJSON_ASSERT(IsArray()); + if (newCapacity > data_.a.capacity) { + SetElementsPointer(reinterpret_cast(allocator.Realloc(GetElementsPointer(), data_.a.capacity * sizeof(GenericValue), newCapacity * sizeof(GenericValue)))); + data_.a.capacity = newCapacity; + } + return *this; + } + + //! Append a GenericValue at the end of the array. + /*! \param value Value to be appended. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \post value.IsNull() == true + \return The value itself for fluent API. + \note The ownership of \c value will be transferred to this array on success. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + \note Amortized constant time complexity. + */ + GenericValue& PushBack(GenericValue& value, Allocator& allocator) { + RAPIDJSON_ASSERT(IsArray()); + if (data_.a.size >= data_.a.capacity) + Reserve(data_.a.capacity == 0 ? kDefaultArrayCapacity : (data_.a.capacity + (data_.a.capacity + 1) / 2), allocator); + GetElementsPointer()[data_.a.size++].RawAssign(value); + return *this; + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericValue& PushBack(GenericValue&& value, Allocator& allocator) { + return PushBack(value, allocator); + } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + + //! Append a constant string reference at the end of the array. + /*! \param value Constant string reference to be appended. + \param allocator Allocator for reallocating memory. It must be the same one used previously. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \return The value itself for fluent API. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + \note Amortized constant time complexity. + \see GenericStringRef + */ + GenericValue& PushBack(StringRefType value, Allocator& allocator) { + return (*this).template PushBack(value, allocator); + } + + //! Append a primitive value at the end of the array. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param value Value of primitive type T to be appended. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \return The value itself for fluent API. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref PushBack(GenericValue&, Allocator&) or \ref + PushBack(StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized constant time complexity. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (GenericValue&)) + PushBack(T value, Allocator& allocator) { + GenericValue v(value); + return PushBack(v, allocator); + } + + //! Remove the last element in the array. + /*! + \note Constant time complexity. + */ + GenericValue& PopBack() { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(!Empty()); + GetElementsPointer()[--data_.a.size].~GenericValue(); + return *this; + } + + //! Remove an element of array by iterator. + /*! + \param pos iterator to the element to remove + \pre IsArray() == true && \ref Begin() <= \c pos < \ref End() + \return Iterator following the removed element. If the iterator pos refers to the last element, the End() iterator is returned. + \note Linear time complexity. + */ + ValueIterator Erase(ConstValueIterator pos) { + return Erase(pos, pos + 1); + } + + //! Remove elements in the range [first, last) of the array. + /*! + \param first iterator to the first element to remove + \param last iterator following the last element to remove + \pre IsArray() == true && \ref Begin() <= \c first <= \c last <= \ref End() + \return Iterator following the last removed element. + \note Linear time complexity. + */ + ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(data_.a.size > 0); + RAPIDJSON_ASSERT(GetElementsPointer() != 0); + RAPIDJSON_ASSERT(first >= Begin()); + RAPIDJSON_ASSERT(first <= last); + RAPIDJSON_ASSERT(last <= End()); + ValueIterator pos = Begin() + (first - Begin()); + for (ValueIterator itr = pos; itr != last; ++itr) + itr->~GenericValue(); + std::memmove(pos, last, static_cast(End() - last) * sizeof(GenericValue)); + data_.a.size -= static_cast(last - first); + return pos; + } + + Array GetArray() { RAPIDJSON_ASSERT(IsArray()); return Array(*this); } + ConstArray GetArray() const { RAPIDJSON_ASSERT(IsArray()); return ConstArray(*this); } + + //@} + + //!@name Number + //@{ + + int GetInt() const { RAPIDJSON_ASSERT(data_.f.flags & kIntFlag); return data_.n.i.i; } + unsigned GetUint() const { RAPIDJSON_ASSERT(data_.f.flags & kUintFlag); return data_.n.u.u; } + int64_t GetInt64() const { RAPIDJSON_ASSERT(data_.f.flags & kInt64Flag); return data_.n.i64; } + uint64_t GetUint64() const { RAPIDJSON_ASSERT(data_.f.flags & kUint64Flag); return data_.n.u64; } + + //! Get the value as double type. + /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessDouble() to check whether the converison is lossless. + */ + double GetDouble() const { + RAPIDJSON_ASSERT(IsNumber()); + if ((data_.f.flags & kDoubleFlag) != 0) return data_.n.d; // exact type, no conversion. + if ((data_.f.flags & kIntFlag) != 0) return data_.n.i.i; // int -> double + if ((data_.f.flags & kUintFlag) != 0) return data_.n.u.u; // unsigned -> double + if ((data_.f.flags & kInt64Flag) != 0) return static_cast(data_.n.i64); // int64_t -> double (may lose precision) + RAPIDJSON_ASSERT((data_.f.flags & kUint64Flag) != 0); return static_cast(data_.n.u64); // uint64_t -> double (may lose precision) + } + + //! Get the value as float type. + /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessFloat() to check whether the converison is lossless. + */ + float GetFloat() const { + return static_cast(GetDouble()); + } + + GenericValue& SetInt(int i) { this->~GenericValue(); new (this) GenericValue(i); return *this; } + GenericValue& SetUint(unsigned u) { this->~GenericValue(); new (this) GenericValue(u); return *this; } + GenericValue& SetInt64(int64_t i64) { this->~GenericValue(); new (this) GenericValue(i64); return *this; } + GenericValue& SetUint64(uint64_t u64) { this->~GenericValue(); new (this) GenericValue(u64); return *this; } + GenericValue& SetDouble(double d) { this->~GenericValue(); new (this) GenericValue(d); return *this; } + GenericValue& SetFloat(float f) { this->~GenericValue(); new (this) GenericValue(static_cast(f)); return *this; } + + //@} + + //!@name String + //@{ + + const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return (data_.f.flags & kInlineStrFlag) ? data_.ss.str : GetStringPointer(); } + + //! Get the length of string. + /*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength(). + */ + SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return ((data_.f.flags & kInlineStrFlag) ? (data_.ss.GetLength()) : data_.s.length); } + + //! Set this value as a string without copying source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string pointer. + \param length The length of source string, excluding the trailing null terminator. + \return The value itself for fluent API. + \post IsString() == true && GetString() == s && GetStringLength() == length + \see SetString(StringRefType) + */ + GenericValue& SetString(const Ch* s, SizeType length) { return SetString(StringRef(s, length)); } + + //! Set this value as a string without copying source string. + /*! \param s source string reference + \return The value itself for fluent API. + \post IsString() == true && GetString() == s && GetStringLength() == s.length + */ + GenericValue& SetString(StringRefType s) { this->~GenericValue(); SetStringRaw(s); return *this; } + + //! Set this value as a string by copying from source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string. + \param length The length of source string, excluding the trailing null terminator. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length + */ + GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { this->~GenericValue(); SetStringRaw(StringRef(s, length), allocator); return *this; } + + //! Set this value as a string by copying from source string. + /*! \param s source string. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length + */ + GenericValue& SetString(const Ch* s, Allocator& allocator) { return SetString(s, internal::StrLen(s), allocator); } + +#if RAPIDJSON_HAS_STDSTRING + //! Set this value as a string by copying from source string. + /*! \param s source string. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s.data() && strcmp(GetString(),s.data() == 0 && GetStringLength() == s.size() + \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + GenericValue& SetString(const std::basic_string& s, Allocator& allocator) { return SetString(s.data(), SizeType(s.size()), allocator); } +#endif + + //@} + + //!@name Array + //@{ + + //! Templated version for checking whether this value is type T. + /*! + \tparam T Either \c bool, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c float, \c const \c char*, \c std::basic_string + */ + template + bool Is() const { return internal::TypeHelper::Is(*this); } + + template + T Get() const { return internal::TypeHelper::Get(*this); } + + template + T Get() { return internal::TypeHelper::Get(*this); } + + template + ValueType& Set(const T& data) { return internal::TypeHelper::Set(*this, data); } + + template + ValueType& Set(const T& data, AllocatorType& allocator) { return internal::TypeHelper::Set(*this, data, allocator); } + + //@} + + //! Generate events of this value to a Handler. + /*! This function adopts the GoF visitor pattern. + Typical usage is to output this JSON value as JSON text via Writer, which is a Handler. + It can also be used to deep clone this value via GenericDocument, which is also a Handler. + \tparam Handler type of handler. + \param handler An object implementing concept Handler. + */ + template + bool Accept(Handler& handler) const { + switch(GetType()) { + case kNullType: return handler.Null(); + case kFalseType: return handler.Bool(false); + case kTrueType: return handler.Bool(true); + + case kObjectType: + if (RAPIDJSON_UNLIKELY(!handler.StartObject())) + return false; + for (ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m) { + RAPIDJSON_ASSERT(m->name.IsString()); // User may change the type of name by MemberIterator. + if (RAPIDJSON_UNLIKELY(!handler.Key(m->name.GetString(), m->name.GetStringLength(), (m->name.data_.f.flags & kCopyFlag) != 0))) + return false; + if (RAPIDJSON_UNLIKELY(!m->value.Accept(handler))) + return false; + } + return handler.EndObject(data_.o.size); + + case kArrayType: + if (RAPIDJSON_UNLIKELY(!handler.StartArray())) + return false; + for (const GenericValue* v = Begin(); v != End(); ++v) + if (RAPIDJSON_UNLIKELY(!v->Accept(handler))) + return false; + return handler.EndArray(data_.a.size); + + case kStringType: + return handler.String(GetString(), GetStringLength(), (data_.f.flags & kCopyFlag) != 0); + + default: + RAPIDJSON_ASSERT(GetType() == kNumberType); + if (IsDouble()) return handler.Double(data_.n.d); + else if (IsInt()) return handler.Int(data_.n.i.i); + else if (IsUint()) return handler.Uint(data_.n.u.u); + else if (IsInt64()) return handler.Int64(data_.n.i64); + else return handler.Uint64(data_.n.u64); + } + } + +private: + template friend class GenericValue; + template friend class GenericDocument; + + enum { + kBoolFlag = 0x0008, + kNumberFlag = 0x0010, + kIntFlag = 0x0020, + kUintFlag = 0x0040, + kInt64Flag = 0x0080, + kUint64Flag = 0x0100, + kDoubleFlag = 0x0200, + kStringFlag = 0x0400, + kCopyFlag = 0x0800, + kInlineStrFlag = 0x1000, + + // Initial flags of different types. + kNullFlag = kNullType, + kTrueFlag = kTrueType | kBoolFlag, + kFalseFlag = kFalseType | kBoolFlag, + kNumberIntFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag, + kNumberUintFlag = kNumberType | kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag, + kNumberInt64Flag = kNumberType | kNumberFlag | kInt64Flag, + kNumberUint64Flag = kNumberType | kNumberFlag | kUint64Flag, + kNumberDoubleFlag = kNumberType | kNumberFlag | kDoubleFlag, + kNumberAnyFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag | kUintFlag | kUint64Flag | kDoubleFlag, + kConstStringFlag = kStringType | kStringFlag, + kCopyStringFlag = kStringType | kStringFlag | kCopyFlag, + kShortStringFlag = kStringType | kStringFlag | kCopyFlag | kInlineStrFlag, + kObjectFlag = kObjectType, + kArrayFlag = kArrayType, + + kTypeMask = 0x07 + }; + + static const SizeType kDefaultArrayCapacity = 16; + static const SizeType kDefaultObjectCapacity = 16; + + struct Flag { +#if RAPIDJSON_48BITPOINTER_OPTIMIZATION + char payload[sizeof(SizeType) * 2 + 6]; // 2 x SizeType + lower 48-bit pointer +#elif RAPIDJSON_64BIT + char payload[sizeof(SizeType) * 2 + sizeof(void*) + 6]; // 6 padding bytes +#else + char payload[sizeof(SizeType) * 2 + sizeof(void*) + 2]; // 2 padding bytes +#endif + uint16_t flags; + }; + + struct String { + SizeType length; + SizeType hashcode; //!< reserved + const Ch* str; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + // implementation detail: ShortString can represent zero-terminated strings up to MaxSize chars + // (excluding the terminating zero) and store a value to determine the length of the contained + // string in the last character str[LenPos] by storing "MaxSize - length" there. If the string + // to store has the maximal length of MaxSize then str[LenPos] will be 0 and therefore act as + // the string terminator as well. For getting the string length back from that value just use + // "MaxSize - str[LenPos]". + // This allows to store 13-chars strings in 32-bit mode, 21-chars strings in 64-bit mode, + // 13-chars strings for RAPIDJSON_48BITPOINTER_OPTIMIZATION=1 inline (for `UTF8`-encoded strings). + struct ShortString { + enum { MaxChars = sizeof(static_cast(0)->payload) / sizeof(Ch), MaxSize = MaxChars - 1, LenPos = MaxSize }; + Ch str[MaxChars]; + + inline static bool Usable(SizeType len) { return (MaxSize >= len); } + inline void SetLength(SizeType len) { str[LenPos] = static_cast(MaxSize - len); } + inline SizeType GetLength() const { return static_cast(MaxSize - str[LenPos]); } + }; // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + // By using proper binary layout, retrieval of different integer types do not need conversions. + union Number { +#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN + struct I { + int i; + char padding[4]; + }i; + struct U { + unsigned u; + char padding2[4]; + }u; +#else + struct I { + char padding[4]; + int i; + }i; + struct U { + char padding2[4]; + unsigned u; + }u; +#endif + int64_t i64; + uint64_t u64; + double d; + }; // 8 bytes + + struct ObjectData { + SizeType size; + SizeType capacity; + Member* members; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + struct ArrayData { + SizeType size; + SizeType capacity; + GenericValue* elements; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + union Data { + String s; + ShortString ss; + Number n; + ObjectData o; + ArrayData a; + Flag f; + }; // 16 bytes in 32-bit mode, 24 bytes in 64-bit mode, 16 bytes in 64-bit with RAPIDJSON_48BITPOINTER_OPTIMIZATION + + RAPIDJSON_FORCEINLINE const Ch* GetStringPointer() const { return RAPIDJSON_GETPOINTER(Ch, data_.s.str); } + RAPIDJSON_FORCEINLINE const Ch* SetStringPointer(const Ch* str) { return RAPIDJSON_SETPOINTER(Ch, data_.s.str, str); } + RAPIDJSON_FORCEINLINE GenericValue* GetElementsPointer() const { return RAPIDJSON_GETPOINTER(GenericValue, data_.a.elements); } + RAPIDJSON_FORCEINLINE GenericValue* SetElementsPointer(GenericValue* elements) { return RAPIDJSON_SETPOINTER(GenericValue, data_.a.elements, elements); } + RAPIDJSON_FORCEINLINE Member* GetMembersPointer() const { return RAPIDJSON_GETPOINTER(Member, data_.o.members); } + RAPIDJSON_FORCEINLINE Member* SetMembersPointer(Member* members) { return RAPIDJSON_SETPOINTER(Member, data_.o.members, members); } + + // Initialize this value as array with initial data, without calling destructor. + void SetArrayRaw(GenericValue* values, SizeType count, Allocator& allocator) { + data_.f.flags = kArrayFlag; + if (count) { + GenericValue* e = static_cast(allocator.Malloc(count * sizeof(GenericValue))); + SetElementsPointer(e); + std::memcpy(e, values, count * sizeof(GenericValue)); + } + else + SetElementsPointer(0); + data_.a.size = data_.a.capacity = count; +//NCBI: assign allocator +SetValueAllocator(&allocator); + } + + //! Initialize this value as object with initial data, without calling destructor. + void SetObjectRaw(Member* members, SizeType count, Allocator& allocator) { + data_.f.flags = kObjectFlag; + if (count) { + Member* m = static_cast(allocator.Malloc(count * sizeof(Member))); + SetMembersPointer(m); + std::memcpy(m, members, count * sizeof(Member)); + } + else + SetMembersPointer(0); + data_.o.size = data_.o.capacity = count; +//NCBI: assign allocator +SetValueAllocator(&allocator); + } + + //! Initialize this value as constant string, without calling destructor. + void SetStringRaw(StringRefType s) RAPIDJSON_NOEXCEPT { + data_.f.flags = kConstStringFlag; + SetStringPointer(s); + data_.s.length = s.length; + } + + //! Initialize this value as copy string with initial data, without calling destructor. + void SetStringRaw(StringRefType s, Allocator& allocator) { + Ch* str = 0; + if (ShortString::Usable(s.length)) { + data_.f.flags = kShortStringFlag; + data_.ss.SetLength(s.length); + str = data_.ss.str; + } else { + data_.f.flags = kCopyStringFlag; + data_.s.length = s.length; + str = static_cast(allocator.Malloc((s.length + 1) * sizeof(Ch))); + SetStringPointer(str); + } + std::memcpy(str, s, s.length * sizeof(Ch)); + str[s.length] = '\0'; +//NCBI: assign allocator +SetValueAllocator(&allocator); + } + + //! Assignment without calling destructor + void RawAssign(GenericValue& rhs) RAPIDJSON_NOEXCEPT { + data_ = rhs.data_; + // data_.f.flags = rhs.data_.f.flags; + rhs.data_.f.flags = kNullFlag; +//NCBI: assign allocator +SetValueAllocator(rhs.GetValueAllocator()); + } + + template + bool StringEqual(const GenericValue& rhs) const { + RAPIDJSON_ASSERT(IsString()); + RAPIDJSON_ASSERT(rhs.IsString()); + + const SizeType len1 = GetStringLength(); + const SizeType len2 = rhs.GetStringLength(); + if(len1 != len2) { return false; } + + const Ch* const str1 = GetString(); + const Ch* const str2 = rhs.GetString(); + if(str1 == str2) { return true; } // fast path for constant string + + return (std::memcmp(str1, str2, sizeof(Ch) * len1) == 0); + } + + Data data_; +//NCBI: added allocator +#if 1 +protected: + Allocator* allocator_; +public: + ValueType& RAPIDJSON_NCBI_NOOPTIMIZE SetValueAllocator(Allocator* allocator) { + allocator_ = allocator; + return *this; + } + Allocator* GetValueAllocator(void) const { + return allocator_; + } +#endif +}; + +//! GenericValue with UTF8 encoding +typedef GenericValue > Value; + +/////////////////////////////////////////////////////////////////////////////// +// GenericDocument + +//! A document for parsing JSON text as DOM. +/*! + \note implements Handler concept + \tparam Encoding Encoding for both parsing and string storage. + \tparam Allocator Allocator for allocating memory for the DOM + \tparam StackAllocator Allocator for allocating memory for stack during parsing. + \warning Although GenericDocument inherits from GenericValue, the API does \b not provide any virtual functions, especially no virtual destructor. To avoid memory leaks, do not \c delete a GenericDocument object via a pointer to a GenericValue. +*/ +template , typename StackAllocator = CrtAllocator> +class GenericDocument : public GenericValue { +public: + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef GenericValue ValueType; //!< Value type of the document. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + + //! Constructor + /*! Creates an empty document of specified type. + \param type Mandatory type of object to create. + \param allocator Optional allocator for allocating memory. + \param stackCapacity Optional initial capacity of stack in bytes. + \param stackAllocator Optional allocator for allocating memory for stack. + */ +// NCBI: moved allocator_into GenericValue +#if 0 + explicit GenericDocument(Type type, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : + GenericValue(type), allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_() + { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); + } +#else + explicit GenericDocument(Type type, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : + GenericValue(type), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_() + { + if (!allocator) { + ownAllocator_ = allocator = RAPIDJSON_NEW(Allocator()); + } + ValueType::SetValueAllocator(allocator); + } +#endif + + //! Constructor + /*! Creates an empty document which type is Null. + \param allocator Optional allocator for allocating memory. + \param stackCapacity Optional initial capacity of stack in bytes. + \param stackAllocator Optional allocator for allocating memory for stack. + */ +// NCBI: moved allocator_into GenericValue +#if 0 + GenericDocument(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : + allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_() + { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); + } +#else + GenericDocument(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : + ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_() + { + if (!allocator) { + ownAllocator_ = allocator = RAPIDJSON_NEW(Allocator()); + } + ValueType::SetValueAllocator(allocator); + } +#endif + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move constructor in C++11 + GenericDocument(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT + : ValueType(std::forward(rhs)), // explicit cast to avoid prohibited move from Document +// NCBI: moved allocator_into GenericValue +// allocator_(rhs.allocator_), + ownAllocator_(rhs.ownAllocator_), + stack_(std::move(rhs.stack_)), + parseResult_(rhs.parseResult_) + { +// NCBI: moved allocator_into GenericValue +ValueType::SetValueAllocator(rhs.GetValueAllocator()); +// rhs.allocator_ = 0; +rhs.ValueType::SetValueAllocator(0); + rhs.ownAllocator_ = 0; + rhs.parseResult_ = ParseResult(); + } +#endif + + ~GenericDocument() { + Destroy(); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move assignment in C++11 + GenericDocument& operator=(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT + { + // The cast to ValueType is necessary here, because otherwise it would + // attempt to call GenericValue's templated assignment operator. + ValueType::operator=(std::forward(rhs)); + + // Calling the destructor here would prematurely call stack_'s destructor + Destroy(); + +// NCBI: moved allocator_into GenericValue +// allocator_ = rhs.allocator_; +ValueType::SetValueAllocator(rhs.ValueType::GetValueAllocator()); + + ownAllocator_ = rhs.ownAllocator_; + stack_ = std::move(rhs.stack_); + parseResult_ = rhs.parseResult_; + +// NCBI: moved allocator_into GenericValue +rhs.ValueType::SetValueAllocator(0); +// rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.parseResult_ = ParseResult(); + + return *this; + } +#endif + + //! Exchange the contents of this document with those of another. + /*! + \param rhs Another document. + \note Constant complexity. + \see GenericValue::Swap + */ + GenericDocument& Swap(GenericDocument& rhs) RAPIDJSON_NOEXCEPT { + ValueType::Swap(rhs); + stack_.Swap(rhs.stack_); +// NCBI: moved allocator_into GenericValue + internal::Swap(ValueType::allocator_, rhs.ValueType::allocator_); + internal::Swap(ownAllocator_, rhs.ownAllocator_); + internal::Swap(parseResult_, rhs.parseResult_); + return *this; + } + + //! free-standing swap function helper + /*! + Helper function to enable support for common swap implementation pattern based on \c std::swap: + \code + void swap(MyClass& a, MyClass& b) { + using std::swap; + swap(a.doc, b.doc); + // ... + } + \endcode + \see Swap() + */ + friend inline void swap(GenericDocument& a, GenericDocument& b) RAPIDJSON_NOEXCEPT { a.Swap(b); } + + //! Populate this document by a generator which produces SAX events. + /*! \tparam Generator A functor with bool f(Handler) prototype. + \param g Generator functor which sends SAX events to the parameter. + \return The document itself for fluent API. + */ + template + GenericDocument& Populate(Generator& g) { + ClearStackOnExit scope(*this); + if (g(*this)) { + RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object + ValueType::operator=(*stack_.template Pop(1));// Move value from stack to document + } + return *this; + } + + //!@name Parse from stream + //!@{ + + //! Parse JSON text from an input stream (with Encoding conversion) + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam SourceEncoding Encoding of input stream + \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseStream(InputStream& is) { + GenericReader reader( + stack_.HasAllocator() ? &stack_.GetAllocator() : 0); + ClearStackOnExit scope(*this); + parseResult_ = reader.template Parse(is, *this); + if (parseResult_) { + RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object + ValueType::operator=(*stack_.template Pop(1));// Move value from stack to document +//NCBI added allocator + ValueType::SetValueAllocator(ownAllocator_); + } + return *this; + } + + //! Parse JSON text from an input stream + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseStream(InputStream& is) { + return ParseStream(is); + } + + //! Parse JSON text from an input stream (with \ref kParseDefaultFlags) + /*! \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseStream(InputStream& is) { + return ParseStream(is); + } + //!@} + + //!@name Parse in-place from mutable string + //!@{ + + //! Parse JSON text from a mutable string + /*! \tparam parseFlags Combination of \ref ParseFlag. + \param str Mutable zero-terminated string to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseInsitu(Ch* str) { + GenericInsituStringStream s(str); + return ParseStream(s); + } + + //! Parse JSON text from a mutable string (with \ref kParseDefaultFlags) + /*! \param str Mutable zero-terminated string to be parsed. + \return The document itself for fluent API. + */ + GenericDocument& ParseInsitu(Ch* str) { + return ParseInsitu(str); + } + //!@} + + //!@name Parse from read-only string + //!@{ + + //! Parse JSON text from a read-only string (with Encoding conversion) + /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). + \tparam SourceEncoding Transcoding from input Encoding + \param str Read-only zero-terminated string to be parsed. + */ + template + GenericDocument& Parse(const typename SourceEncoding::Ch* str) { + RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); + GenericStringStream s(str); + return ParseStream(s); + } + + //! Parse JSON text from a read-only string + /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). + \param str Read-only zero-terminated string to be parsed. + */ + template + GenericDocument& Parse(const Ch* str) { + return Parse(str); + } + + //! Parse JSON text from a read-only string (with \ref kParseDefaultFlags) + /*! \param str Read-only zero-terminated string to be parsed. + */ + GenericDocument& Parse(const Ch* str) { + return Parse(str); + } + + template + GenericDocument& Parse(const typename SourceEncoding::Ch* str, size_t length) { + RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); + MemoryStream ms(static_cast(str), length * sizeof(typename SourceEncoding::Ch)); + EncodedInputStream is(ms); + ParseStream(is); + return *this; + } + + template + GenericDocument& Parse(const Ch* str, size_t length) { + return Parse(str, length); + } + + GenericDocument& Parse(const Ch* str, size_t length) { + return Parse(str, length); + } + +#if RAPIDJSON_HAS_STDSTRING + template + GenericDocument& Parse(const std::basic_string& str) { + // c_str() is constant complexity according to standard. Should be faster than Parse(const char*, size_t) + return Parse(str.c_str()); + } + + template + GenericDocument& Parse(const std::basic_string& str) { + return Parse(str.c_str()); + } + + GenericDocument& Parse(const std::basic_string& str) { + return Parse(str); + } +#endif // RAPIDJSON_HAS_STDSTRING + + //!@} + + //!@name Handling parse errors + //!@{ + + //! Whether a parse error has occured in the last parsing. + bool HasParseError() const { return parseResult_.IsError(); } + + //! Get the \ref ParseErrorCode of last parsing. + ParseErrorCode GetParseError() const { return parseResult_.Code(); } + + //! Get the position of last parsing error in input, 0 otherwise. + size_t GetErrorOffset() const { return parseResult_.Offset(); } + +//NCBI: added + void SetParseResult(const ParseResult& result) { + parseResult_.Set(result.Code(), result.Offset()); + } + + //! Implicit conversion to get the last parse result +#ifndef __clang // -Wdocumentation + /*! \return \ref ParseResult of the last parse operation + + \code + Document doc; + ParseResult ok = doc.Parse(json); + if (!ok) + printf( "JSON parse error: %s (%u)\n", GetParseError_En(ok.Code()), ok.Offset()); + \endcode + */ +#endif + operator ParseResult() const { return parseResult_; } + //!@} + + //! Get the allocator of this document. +// NCBI +#if 0 + Allocator& GetAllocator() { + RAPIDJSON_ASSERT(allocator_); + return *allocator_; + } +#else + Allocator& GetAllocator() const { return *ValueType::GetValueAllocator(); } + Allocator* GetOwnAllocator() const { return ownAllocator_; } +#endif + + //! Get the capacity of stack in bytes. + size_t GetStackCapacity() const { return stack_.GetCapacity(); } + +private: + // clear stack on any exit from ParseStream, e.g. due to exception + struct ClearStackOnExit { + explicit ClearStackOnExit(GenericDocument& d) : d_(d) {} + ~ClearStackOnExit() { d_.ClearStack(); } + private: + ClearStackOnExit(const ClearStackOnExit&); + ClearStackOnExit& operator=(const ClearStackOnExit&); + GenericDocument& d_; + }; + + // callers of the following private Handler functions + // template friend class GenericReader; // for parsing + template friend class GenericValue; // for deep copying + +public: + // Implementation of Handler + bool Null() { (new (stack_.template Push()) ValueType( ))->SetValueAllocator(&GetAllocator()); return true; } + bool Bool(bool b) { (new (stack_.template Push()) ValueType(b))->SetValueAllocator(&GetAllocator()); return true; } + bool Int(int i) { (new (stack_.template Push()) ValueType(i))->SetValueAllocator(&GetAllocator()); return true; } + bool Uint(unsigned i) { (new (stack_.template Push()) ValueType(i))->SetValueAllocator(&GetAllocator()); return true; } + bool Int64(int64_t i) { (new (stack_.template Push()) ValueType(i))->SetValueAllocator(&GetAllocator()); return true; } + bool Uint64(uint64_t i) { (new (stack_.template Push()) ValueType(i))->SetValueAllocator(&GetAllocator()); return true; } + bool Double(double d) { (new (stack_.template Push()) ValueType(d))->SetValueAllocator(&GetAllocator()); return true; } + + bool RawNumber(const Ch* str, SizeType length, bool copy) { + if (copy) + new (stack_.template Push()) ValueType(str, length, GetAllocator()); + else + new (stack_.template Push()) ValueType(str, length); + return true; + } + + bool String(const Ch* str, SizeType length, bool copy) { + if (copy) + new (stack_.template Push()) ValueType(str, length, GetAllocator()); + else + new (stack_.template Push()) ValueType(str, length); + return true; + } + + bool StartObject() { (new (stack_.template Push()) ValueType(kObjectType))->SetValueAllocator(&GetAllocator()); return true; } + + bool Key(const Ch* str, SizeType length, bool copy) { return String(str, length, copy); } + + bool EndObject(SizeType memberCount) { + typename ValueType::Member* members = stack_.template Pop(memberCount); + stack_.template Top()->SetObjectRaw(members, memberCount, GetAllocator()); + return true; + } + + bool StartArray() { (new (stack_.template Push()) ValueType(kArrayType))->SetValueAllocator(&GetAllocator()); return true; } + + bool EndArray(SizeType elementCount) { + ValueType* elements = stack_.template Pop(elementCount); + stack_.template Top()->SetArrayRaw(elements, elementCount, GetAllocator()); + return true; + } + +private: + //! Prohibit copying + GenericDocument(const GenericDocument&); + //! Prohibit assignment + GenericDocument& operator=(const GenericDocument&); + + void ClearStack() { + if (Allocator::kNeedFree) + while (stack_.GetSize() > 0) // Here assumes all elements in stack array are GenericValue (Member is actually 2 GenericValue objects) + (stack_.template Pop(1))->~ValueType(); + else + stack_.Clear(); + stack_.ShrinkToFit(); + } + + void Destroy() { + RAPIDJSON_DELETE(ownAllocator_); + } + + static const size_t kDefaultStackCapacity = 1024; +// NCBI: moved allocator_into GenericValue +// Allocator* allocator_; + Allocator* ownAllocator_; + internal::Stack stack_; + ParseResult parseResult_; +}; + +//! GenericDocument with UTF8 encoding +typedef GenericDocument > Document; + +//! Helper class for accessing Value of array type. +/*! + Instance of this helper class is obtained by \c GenericValue::GetArray(). + In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1. +*/ +template +class GenericArray { +public: + typedef GenericArray ConstArray; + typedef GenericArray Array; + typedef ValueT PlainType; + typedef typename internal::MaybeAddConst::Type ValueType; + typedef ValueType* ValueIterator; // This may be const or non-const iterator + typedef const ValueT* ConstValueIterator; + typedef typename ValueType::AllocatorType AllocatorType; + typedef typename ValueType::StringRefType StringRefType; + + template + friend class GenericValue; + + GenericArray(const GenericArray& rhs) : value_(rhs.value_) {} + GenericArray& operator=(const GenericArray& rhs) { value_ = rhs.value_; return *this; } + ~GenericArray() {} + + SizeType Size() const { return value_.Size(); } + SizeType Capacity() const { return value_.Capacity(); } + bool Empty() const { return value_.Empty(); } + void Clear() const { value_.Clear(); } + ValueType& operator[](SizeType index) const { return value_[index]; } + ValueIterator Begin() const { return value_.Begin(); } + ValueIterator End() const { return value_.End(); } + GenericArray Reserve(SizeType newCapacity, AllocatorType &allocator) const { value_.Reserve(newCapacity, allocator); return *this; } + GenericArray PushBack(ValueType& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericArray PushBack(ValueType&& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericArray PushBack(StringRefType value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } + template RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (const GenericArray&)) PushBack(T value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } + GenericArray PopBack() const { value_.PopBack(); return *this; } + ValueIterator Erase(ConstValueIterator pos) const { return value_.Erase(pos); } + ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) const { return value_.Erase(first, last); } + +#if RAPIDJSON_HAS_CXX11_RANGE_FOR + ValueIterator begin() const { return value_.Begin(); } + ValueIterator end() const { return value_.End(); } +#endif + +private: + GenericArray(); + GenericArray(ValueType& value) : value_(value) {} + ValueType& value_; +}; + +//! Helper class for accessing Value of object type. +/*! + Instance of this helper class is obtained by \c GenericValue::GetObject(). + In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1. +*/ +template +class GenericObject { +public: + typedef GenericObject ConstObject; + typedef GenericObject Object; + typedef ValueT PlainType; + typedef typename internal::MaybeAddConst::Type ValueType; + typedef GenericMemberIterator MemberIterator; // This may be const or non-const iterator + typedef GenericMemberIterator ConstMemberIterator; + typedef typename ValueType::AllocatorType AllocatorType; + typedef typename ValueType::StringRefType StringRefType; + typedef typename ValueType::EncodingType EncodingType; + typedef typename ValueType::Ch Ch; + + template + friend class GenericValue; + + GenericObject(const GenericObject& rhs) : value_(rhs.value_) {} + GenericObject& operator=(const GenericObject& rhs) { value_ = rhs.value_; return *this; } + ~GenericObject() {} + + SizeType MemberCount() const { return value_.MemberCount(); } + bool ObjectEmpty() const { return value_.ObjectEmpty(); } + template ValueType& operator[](T* name) const { return value_[name]; } + template ValueType& operator[](const GenericValue& name) const { return value_[name]; } +#if RAPIDJSON_HAS_STDSTRING + ValueType& operator[](const std::basic_string& name) const { return value_[name]; } +#endif + MemberIterator MemberBegin() const { return value_.MemberBegin(); } + MemberIterator MemberEnd() const { return value_.MemberEnd(); } + bool HasMember(const Ch* name) const { return value_.HasMember(name); } +#if RAPIDJSON_HAS_STDSTRING + bool HasMember(const std::basic_string& name) const { return value_.HasMember(name); } +#endif + template bool HasMember(const GenericValue& name) const { return value_.HasMember(name); } + MemberIterator FindMember(const Ch* name) const { return value_.FindMember(name); } + template MemberIterator FindMember(const GenericValue& name) const { return value_.FindMember(name); } +#if RAPIDJSON_HAS_STDSTRING + MemberIterator FindMember(const std::basic_string& name) const { return value_.FindMember(name); } +#endif + GenericObject AddMember(ValueType& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(ValueType& name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#if RAPIDJSON_HAS_STDSTRING + GenericObject AddMember(ValueType& name, std::basic_string& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#endif + template RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (ValueType&)) AddMember(ValueType& name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericObject AddMember(ValueType&& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(ValueType&& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(ValueType& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(StringRefType name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericObject AddMember(StringRefType name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(StringRefType name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + template RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (GenericObject)) AddMember(StringRefType name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + void RemoveAllMembers() { return value_.RemoveAllMembers(); } + bool RemoveMember(const Ch* name) const { return value_.RemoveMember(name); } +#if RAPIDJSON_HAS_STDSTRING + bool RemoveMember(const std::basic_string& name) const { return value_.RemoveMember(name); } +#endif + template bool RemoveMember(const GenericValue& name) const { return value_.RemoveMember(name); } + MemberIterator RemoveMember(MemberIterator m) const { return value_.RemoveMember(m); } + MemberIterator EraseMember(ConstMemberIterator pos) const { return value_.EraseMember(pos); } + MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) const { return value_.EraseMember(first, last); } + bool EraseMember(const Ch* name) const { return value_.EraseMember(name); } +#if RAPIDJSON_HAS_STDSTRING + bool EraseMember(const std::basic_string& name) const { return EraseMember(ValueType(StringRef(name))); } +#endif + template bool EraseMember(const GenericValue& name) const { return value_.EraseMember(name); } + +#if RAPIDJSON_HAS_CXX11_RANGE_FOR + MemberIterator begin() const { return value_.MemberBegin(); } + MemberIterator end() const { return value_.MemberEnd(); } +#endif + +private: + GenericObject(); + GenericObject(ValueType& value) : value_(value) {} + ValueType& value_; +}; + +RAPIDJSON_NAMESPACE_END +#ifdef _MINWINDEF_ // see: http://stackoverflow.com/questions/22744262/cant-call-stdmax-because-minwindef-h-defines-max +#ifndef NOMINMAX +#pragma pop_macro("min") +#pragma pop_macro("max") +#endif +#endif +RAPIDJSON_DIAG_POP + +#endif // RAPIDJSON_DOCUMENT_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/encodedstream.h b/c++/include/misc/jsonwrapp/rapidjson11/encodedstream.h new file mode 100644 index 00000000..223601c0 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/encodedstream.h @@ -0,0 +1,299 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ENCODEDSTREAM_H_ +#define RAPIDJSON_ENCODEDSTREAM_H_ + +#include "stream.h" +#include "memorystream.h" + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Input byte stream wrapper with a statically bound encoding. +/*! + \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. + \tparam InputByteStream Type of input byte stream. For example, FileReadStream. +*/ +template +class EncodedInputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); +public: + typedef typename Encoding::Ch Ch; + + EncodedInputStream(InputByteStream& is) : is_(is) { + current_ = Encoding::TakeBOM(is_); + } + + Ch Peek() const { return current_; } + Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; } + size_t Tell() const { return is_.Tell(); } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + EncodedInputStream(const EncodedInputStream&); + EncodedInputStream& operator=(const EncodedInputStream&); + + InputByteStream& is_; + Ch current_; +}; + +//! Specialized for UTF8 MemoryStream. +template <> +class EncodedInputStream, MemoryStream> { +public: + typedef UTF8<>::Ch Ch; + + EncodedInputStream(MemoryStream& is) : is_(is) { + if (static_cast(is_.Peek()) == 0xEFu) is_.Take(); + if (static_cast(is_.Peek()) == 0xBBu) is_.Take(); + if (static_cast(is_.Peek()) == 0xBFu) is_.Take(); + } + Ch Peek() const { return is_.Peek(); } + Ch Take() { return is_.Take(); } + size_t Tell() const { return is_.Tell(); } + + // Not implemented + void Put(Ch) {} + void Flush() {} + Ch* PutBegin() { return 0; } + size_t PutEnd(Ch*) { return 0; } + + MemoryStream& is_; + +private: + EncodedInputStream(const EncodedInputStream&); + EncodedInputStream& operator=(const EncodedInputStream&); +}; + +//! Output byte stream wrapper with statically bound encoding. +/*! + \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. + \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream. +*/ +template +class EncodedOutputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); +public: + typedef typename Encoding::Ch Ch; + + EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { + if (putBOM) + Encoding::PutBOM(os_); + } + + void Put(Ch c) { Encoding::Put(os_, c); } + void Flush() { os_.Flush(); } + + // Not implemented + Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} + Ch Take() { RAPIDJSON_ASSERT(false); return 0;} + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + EncodedOutputStream(const EncodedOutputStream&); + EncodedOutputStream& operator=(const EncodedOutputStream&); + + OutputByteStream& os_; +}; + +#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8::x, UTF16LE::x, UTF16BE::x, UTF32LE::x, UTF32BE::x + +//! Input stream wrapper with dynamically bound encoding and automatic encoding detection. +/*! + \tparam CharType Type of character for reading. + \tparam InputByteStream type of input byte stream to be wrapped. +*/ +template +class AutoUTFInputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); +public: + typedef CharType Ch; + + //! Constructor. + /*! + \param is input stream to be wrapped. + \param type UTF encoding type if it is not detected from the stream. + */ + AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) { + RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); + DetectType(); + static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) }; + takeFunc_ = f[type_]; + current_ = takeFunc_(*is_); + } + + UTFType GetType() const { return type_; } + bool HasBOM() const { return hasBOM_; } + + Ch Peek() const { return current_; } + Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; } + size_t Tell() const { return is_->Tell(); } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + AutoUTFInputStream(const AutoUTFInputStream&); + AutoUTFInputStream& operator=(const AutoUTFInputStream&); + + // Detect encoding type with BOM or RFC 4627 + void DetectType() { + // BOM (Byte Order Mark): + // 00 00 FE FF UTF-32BE + // FF FE 00 00 UTF-32LE + // FE FF UTF-16BE + // FF FE UTF-16LE + // EF BB BF UTF-8 + + const unsigned char* c = reinterpret_cast(is_->Peek4()); + if (!c) + return; + + unsigned bom = static_cast(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24)); + hasBOM_ = false; + if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } + else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } + else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); } + else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); } + else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); } + + // RFC 4627: Section 3 + // "Since the first two characters of a JSON text will always be ASCII + // characters [RFC0020], it is possible to determine whether an octet + // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking + // at the pattern of nulls in the first four octets." + // 00 00 00 xx UTF-32BE + // 00 xx 00 xx UTF-16BE + // xx 00 00 00 UTF-32LE + // xx 00 xx 00 UTF-16LE + // xx xx xx xx UTF-8 + + if (!hasBOM_) { + int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0); + switch (pattern) { + case 0x08: type_ = kUTF32BE; break; + case 0x0A: type_ = kUTF16BE; break; + case 0x01: type_ = kUTF32LE; break; + case 0x05: type_ = kUTF16LE; break; + case 0x0F: type_ = kUTF8; break; + default: break; // Use type defined by user. + } + } + + // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. + if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); + if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); + } + + typedef Ch (*TakeFunc)(InputByteStream& is); + InputByteStream* is_; + UTFType type_; + Ch current_; + TakeFunc takeFunc_; + bool hasBOM_; +}; + +//! Output stream wrapper with dynamically bound encoding and automatic encoding detection. +/*! + \tparam CharType Type of character for writing. + \tparam OutputByteStream type of output byte stream to be wrapped. +*/ +template +class AutoUTFOutputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); +public: + typedef CharType Ch; + + //! Constructor. + /*! + \param os output stream to be wrapped. + \param type UTF encoding type. + \param putBOM Whether to write BOM at the beginning of the stream. + */ + AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) { + RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); + + // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. + if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); + if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); + + static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) }; + putFunc_ = f[type_]; + + if (putBOM) + PutBOM(); + } + + UTFType GetType() const { return type_; } + + void Put(Ch c) { putFunc_(*os_, c); } + void Flush() { os_->Flush(); } + + // Not implemented + Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} + Ch Take() { RAPIDJSON_ASSERT(false); return 0;} + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + AutoUTFOutputStream(const AutoUTFOutputStream&); + AutoUTFOutputStream& operator=(const AutoUTFOutputStream&); + + void PutBOM() { + typedef void (*PutBOMFunc)(OutputByteStream&); + static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) }; + f[type_](*os_); + } + + typedef void (*PutFunc)(OutputByteStream&, Ch); + + OutputByteStream* os_; + UTFType type_; + PutFunc putFunc_; +}; + +#undef RAPIDJSON_ENCODINGS_FUNC + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/encodings.h b/c++/include/misc/jsonwrapp/rapidjson11/encodings.h new file mode 100644 index 00000000..ed7d44d3 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/encodings.h @@ -0,0 +1,716 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ENCODINGS_H_ +#define RAPIDJSON_ENCODINGS_H_ + +#include "rapidjson.h" + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data +RAPIDJSON_DIAG_OFF(4702) // unreachable code +#elif defined(__GNUC__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +RAPIDJSON_DIAG_OFF(overflow) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Encoding + +/*! \class rapidjson::Encoding + \brief Concept for encoding of Unicode characters. + +\code +concept Encoding { + typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition. + + enum { supportUnicode = 1 }; // or 0 if not supporting unicode + + //! \brief Encode a Unicode codepoint to an output stream. + //! \param os Output stream. + //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. + template + static void Encode(OutputStream& os, unsigned codepoint); + + //! \brief Decode a Unicode codepoint from an input stream. + //! \param is Input stream. + //! \param codepoint Output of the unicode codepoint. + //! \return true if a valid codepoint can be decoded from the stream. + template + static bool Decode(InputStream& is, unsigned* codepoint); + + //! \brief Validate one Unicode codepoint from an encoded stream. + //! \param is Input stream to obtain codepoint. + //! \param os Output for copying one codepoint. + //! \return true if it is valid. + //! \note This function just validating and copying the codepoint without actually decode it. + template + static bool Validate(InputStream& is, OutputStream& os); + + // The following functions are deal with byte streams. + + //! Take a character from input byte stream, skip BOM if exist. + template + static CharType TakeBOM(InputByteStream& is); + + //! Take a character from input byte stream. + template + static Ch Take(InputByteStream& is); + + //! Put BOM to output byte stream. + template + static void PutBOM(OutputByteStream& os); + + //! Put a character to output byte stream. + template + static void Put(OutputByteStream& os, Ch c); +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// UTF8 + +//! UTF-8 encoding. +/*! http://en.wikipedia.org/wiki/UTF-8 + http://tools.ietf.org/html/rfc3629 + \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char. + \note implements Encoding concept +*/ +template +struct UTF8 { + typedef CharType Ch; + + enum { supportUnicode = 1 }; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + if (codepoint <= 0x7F) + os.Put(static_cast(codepoint & 0xFF)); + else if (codepoint <= 0x7FF) { + os.Put(static_cast(0xC0 | ((codepoint >> 6) & 0xFF))); + os.Put(static_cast(0x80 | ((codepoint & 0x3F)))); + } + else if (codepoint <= 0xFFFF) { + os.Put(static_cast(0xE0 | ((codepoint >> 12) & 0xFF))); + os.Put(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + os.Put(static_cast(0x80 | (codepoint & 0x3F))); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + os.Put(static_cast(0xF0 | ((codepoint >> 18) & 0xFF))); + os.Put(static_cast(0x80 | ((codepoint >> 12) & 0x3F))); + os.Put(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + os.Put(static_cast(0x80 | (codepoint & 0x3F))); + } + } + + template + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + if (codepoint <= 0x7F) + PutUnsafe(os, static_cast(codepoint & 0xFF)); + else if (codepoint <= 0x7FF) { + PutUnsafe(os, static_cast(0xC0 | ((codepoint >> 6) & 0xFF))); + PutUnsafe(os, static_cast(0x80 | ((codepoint & 0x3F)))); + } + else if (codepoint <= 0xFFFF) { + PutUnsafe(os, static_cast(0xE0 | ((codepoint >> 12) & 0xFF))); + PutUnsafe(os, static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + PutUnsafe(os, static_cast(0x80 | (codepoint & 0x3F))); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + PutUnsafe(os, static_cast(0xF0 | ((codepoint >> 18) & 0xFF))); + PutUnsafe(os, static_cast(0x80 | ((codepoint >> 12) & 0x3F))); + PutUnsafe(os, static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + PutUnsafe(os, static_cast(0x80 | (codepoint & 0x3F))); + } + } + + template + static bool Decode(InputStream& is, unsigned* codepoint) { +#define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast(c) & 0x3Fu) +#define TRANS(mask) result &= ((GetRange(static_cast(c)) & mask) != 0) +#define TAIL() COPY(); TRANS(0x70) + typename InputStream::Ch c = is.Take(); + if (!(c & 0x80)) { + *codepoint = static_cast(c); + return true; + } + + unsigned char type = GetRange(static_cast(c)); + if (type >= 32) { + *codepoint = 0; + } else { + *codepoint = (0xFFu >> type) & static_cast(c); + } + bool result = true; + switch (type) { + case 2: TAIL(); return result; + case 3: TAIL(); TAIL(); return result; + case 4: COPY(); TRANS(0x50); TAIL(); return result; + case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; + case 6: TAIL(); TAIL(); TAIL(); return result; + case 10: COPY(); TRANS(0x20); TAIL(); return result; + case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; + default: return false; + } +#undef COPY +#undef TRANS +#undef TAIL + } + + template + static bool Validate(InputStream& is, OutputStream& os) { +#define COPY() os.Put(c = is.Take()) +#define TRANS(mask) result &= ((GetRange(static_cast(c)) & mask) != 0) +#define TAIL() COPY(); TRANS(0x70) + Ch c; + COPY(); + if (!(c & 0x80)) + return true; + + bool result = true; + switch (GetRange(static_cast(c))) { + case 2: TAIL(); return result; + case 3: TAIL(); TAIL(); return result; + case 4: COPY(); TRANS(0x50); TAIL(); return result; + case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; + case 6: TAIL(); TAIL(); TAIL(); return result; + case 10: COPY(); TRANS(0x20); TAIL(); return result; + case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; + default: return false; + } +#undef COPY +#undef TRANS +#undef TAIL + } + + static unsigned char GetRange(unsigned char c) { + // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. + static const unsigned char type[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, + 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, + 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, + 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + }; + return type[c]; + } + + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + typename InputByteStream::Ch c = Take(is); + if (static_cast(c) != 0xEFu) return c; + c = is.Take(); + if (static_cast(c) != 0xBBu) return c; + c = is.Take(); + if (static_cast(c) != 0xBFu) return c; + c = is.Take(); + return c; + } + + template + static Ch Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + return static_cast(is.Take()); + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(0xEFu)); + os.Put(static_cast(0xBBu)); + os.Put(static_cast(0xBFu)); + } + + template + static void Put(OutputByteStream& os, Ch c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(c)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF16 + +//! UTF-16 encoding. +/*! http://en.wikipedia.org/wiki/UTF-16 + http://tools.ietf.org/html/rfc2781 + \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. + \note implements Encoding concept + + \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. + For streaming, use UTF16LE and UTF16BE, which handle endianness. +*/ +template +struct UTF16 { + typedef CharType Ch; + RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); + + enum { supportUnicode = 1 }; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + if (codepoint <= 0xFFFF) { + RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair + os.Put(static_cast(codepoint)); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + unsigned v = codepoint - 0x10000; + os.Put(static_cast((v >> 10) | 0xD800)); + os.Put(static_cast((v & 0x3FF) | 0xDC00)); + } + } + + + template + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + if (codepoint <= 0xFFFF) { + RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair + PutUnsafe(os, static_cast(codepoint)); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + unsigned v = codepoint - 0x10000; + PutUnsafe(os, static_cast((v >> 10) | 0xD800)); + PutUnsafe(os, static_cast((v & 0x3FF) | 0xDC00)); + } + } + + template + static bool Decode(InputStream& is, unsigned* codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); + typename InputStream::Ch c = is.Take(); + if (c < 0xD800 || c > 0xDFFF) { + *codepoint = static_cast(c); + return true; + } + else if (c <= 0xDBFF) { + *codepoint = (static_cast(c) & 0x3FF) << 10; + c = is.Take(); + *codepoint |= (static_cast(c) & 0x3FF); + *codepoint += 0x10000; + return c >= 0xDC00 && c <= 0xDFFF; + } + return false; + } + + template + static bool Validate(InputStream& is, OutputStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + typename InputStream::Ch c; + os.Put(static_cast(c = is.Take())); + if (c < 0xD800 || c > 0xDFFF) + return true; + else if (c <= 0xDBFF) { + os.Put(c = is.Take()); + return c >= 0xDC00 && c <= 0xDFFF; + } + return false; + } +}; + +//! UTF-16 little endian encoding. +template +struct UTF16LE : UTF16 { + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast(c) == 0xFEFFu ? Take(is) : c; + } + + template + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast(is.Take()); + c |= static_cast(static_cast(is.Take())) << 8; + return static_cast(c); + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(0xFFu)); + os.Put(static_cast(0xFEu)); + } + + template + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(static_cast(c) & 0xFFu)); + os.Put(static_cast((static_cast(c) >> 8) & 0xFFu)); + } +}; + +//! UTF-16 big endian encoding. +template +struct UTF16BE : UTF16 { + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast(c) == 0xFEFFu ? Take(is) : c; + } + + template + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast(static_cast(is.Take())) << 8; + c |= static_cast(is.Take()); + return static_cast(c); + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(0xFEu)); + os.Put(static_cast(0xFFu)); + } + + template + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast((static_cast(c) >> 8) & 0xFFu)); + os.Put(static_cast(static_cast(c) & 0xFFu)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF32 + +//! UTF-32 encoding. +/*! http://en.wikipedia.org/wiki/UTF-32 + \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. + \note implements Encoding concept + + \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. + For streaming, use UTF32LE and UTF32BE, which handle endianness. +*/ +template +struct UTF32 { + typedef CharType Ch; + RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); + + enum { supportUnicode = 1 }; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + os.Put(codepoint); + } + + template + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + PutUnsafe(os, codepoint); + } + + template + static bool Decode(InputStream& is, unsigned* codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); + Ch c = is.Take(); + *codepoint = c; + return c <= 0x10FFFF; + } + + template + static bool Validate(InputStream& is, OutputStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); + Ch c; + os.Put(c = is.Take()); + return c <= 0x10FFFF; + } +}; + +//! UTF-32 little endian enocoding. +template +struct UTF32LE : UTF32 { + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast(c) == 0x0000FEFFu ? Take(is) : c; + } + + template + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast(is.Take()); + c |= static_cast(static_cast(is.Take())) << 8; + c |= static_cast(static_cast(is.Take())) << 16; + c |= static_cast(static_cast(is.Take())) << 24; + return static_cast(c); + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(0xFFu)); + os.Put(static_cast(0xFEu)); + os.Put(static_cast(0x00u)); + os.Put(static_cast(0x00u)); + } + + template + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(c & 0xFFu)); + os.Put(static_cast((c >> 8) & 0xFFu)); + os.Put(static_cast((c >> 16) & 0xFFu)); + os.Put(static_cast((c >> 24) & 0xFFu)); + } +}; + +//! UTF-32 big endian encoding. +template +struct UTF32BE : UTF32 { + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast(c) == 0x0000FEFFu ? Take(is) : c; + } + + template + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast(static_cast(is.Take())) << 24; + c |= static_cast(static_cast(is.Take())) << 16; + c |= static_cast(static_cast(is.Take())) << 8; + c |= static_cast(static_cast(is.Take())); + return static_cast(c); + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(0x00u)); + os.Put(static_cast(0x00u)); + os.Put(static_cast(0xFEu)); + os.Put(static_cast(0xFFu)); + } + + template + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast((c >> 24) & 0xFFu)); + os.Put(static_cast((c >> 16) & 0xFFu)); + os.Put(static_cast((c >> 8) & 0xFFu)); + os.Put(static_cast(c & 0xFFu)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// ASCII + +//! ASCII encoding. +/*! http://en.wikipedia.org/wiki/ASCII + \tparam CharType Code unit for storing 7-bit ASCII data. Default is char. + \note implements Encoding concept +*/ +template +struct ASCII { + typedef CharType Ch; + + enum { supportUnicode = 0 }; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x7F); + os.Put(static_cast(codepoint & 0xFF)); + } + + template + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x7F); + PutUnsafe(os, static_cast(codepoint & 0xFF)); + } + + template + static bool Decode(InputStream& is, unsigned* codepoint) { + uint8_t c = static_cast(is.Take()); + *codepoint = c; + return c <= 0X7F; + } + + template + static bool Validate(InputStream& is, OutputStream& os) { + uint8_t c = static_cast(is.Take()); + os.Put(static_cast(c)); + return c <= 0x7F; + } + + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + uint8_t c = static_cast(Take(is)); + return static_cast(c); + } + + template + static Ch Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + return static_cast(is.Take()); + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + (void)os; + } + + template + static void Put(OutputByteStream& os, Ch c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(c)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// AutoUTF + +//! Runtime-specified UTF encoding type of a stream. +enum UTFType { + kUTF8 = 0, //!< UTF-8. + kUTF16LE = 1, //!< UTF-16 little endian. + kUTF16BE = 2, //!< UTF-16 big endian. + kUTF32LE = 3, //!< UTF-32 little endian. + kUTF32BE = 4 //!< UTF-32 big endian. +}; + +//! Dynamically select encoding according to stream's runtime-specified UTF encoding type. +/*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType(). +*/ +template +struct AutoUTF { + typedef CharType Ch; + + enum { supportUnicode = 1 }; + +#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8::x, UTF16LE::x, UTF16BE::x, UTF32LE::x, UTF32BE::x + + template + RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) { + typedef void (*EncodeFunc)(OutputStream&, unsigned); + static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) }; + (*f[os.GetType()])(os, codepoint); + } + + template + RAPIDJSON_FORCEINLINE static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + typedef void (*EncodeFunc)(OutputStream&, unsigned); + static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) }; + (*f[os.GetType()])(os, codepoint); + } + + template + RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { + typedef bool (*DecodeFunc)(InputStream&, unsigned*); + static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) }; + return (*f[is.GetType()])(is, codepoint); + } + + template + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + typedef bool (*ValidateFunc)(InputStream&, OutputStream&); + static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) }; + return (*f[is.GetType()])(is, os); + } + +#undef RAPIDJSON_ENCODINGS_FUNC +}; + +/////////////////////////////////////////////////////////////////////////////// +// Transcoder + +//! Encoding conversion. +template +struct Transcoder { + //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream. + template + RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { + unsigned codepoint; + if (!SourceEncoding::Decode(is, &codepoint)) + return false; + TargetEncoding::Encode(os, codepoint); + return true; + } + + template + RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) { + unsigned codepoint; + if (!SourceEncoding::Decode(is, &codepoint)) + return false; + TargetEncoding::EncodeUnsafe(os, codepoint); + return true; + } + + //! Validate one Unicode codepoint from an encoded stream. + template + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + return Transcode(is, os); // Since source/target encoding is different, must transcode. + } +}; + +// Forward declaration. +template +inline void PutUnsafe(Stream& stream, typename Stream::Ch c); + +//! Specialization of Transcoder with same source and target encoding. +template +struct Transcoder { + template + RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { + os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class. + return true; + } + + template + RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) { + PutUnsafe(os, is.Take()); // Just copy one code unit. This semantic is different from primary template class. + return true; + } + + template + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + return Encoding::Validate(is, os); // source/target encoding are the same + } +}; + +RAPIDJSON_NAMESPACE_END + +#if defined(__GNUC__) || defined(_MSC_VER) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_ENCODINGS_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/error/en.h b/c++/include/misc/jsonwrapp/rapidjson11/error/en.h new file mode 100644 index 00000000..2db838bf --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/error/en.h @@ -0,0 +1,74 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ERROR_EN_H_ +#define RAPIDJSON_ERROR_EN_H_ + +#include "error.h" + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(switch-enum) +RAPIDJSON_DIAG_OFF(covered-switch-default) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Maps error code of parsing into error message. +/*! + \ingroup RAPIDJSON_ERRORS + \param parseErrorCode Error code obtained in parsing. + \return the error message. + \note User can make a copy of this function for localization. + Using switch-case is safer for future modification of error codes. +*/ +inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErrorCode) { + switch (parseErrorCode) { + case kParseErrorNone: return RAPIDJSON_ERROR_STRING("No error."); + + case kParseErrorDocumentEmpty: return RAPIDJSON_ERROR_STRING("The document is empty."); + case kParseErrorDocumentRootNotSingular: return RAPIDJSON_ERROR_STRING("The document root must not be followed by other values."); + + case kParseErrorValueInvalid: return RAPIDJSON_ERROR_STRING("Invalid value."); + + case kParseErrorObjectMissName: return RAPIDJSON_ERROR_STRING("Missing a name for object member."); + case kParseErrorObjectMissColon: return RAPIDJSON_ERROR_STRING("Missing a colon after a name of object member."); + case kParseErrorObjectMissCommaOrCurlyBracket: return RAPIDJSON_ERROR_STRING("Missing a comma or '}' after an object member."); + + case kParseErrorArrayMissCommaOrSquareBracket: return RAPIDJSON_ERROR_STRING("Missing a comma or ']' after an array element."); + + case kParseErrorStringUnicodeEscapeInvalidHex: return RAPIDJSON_ERROR_STRING("Incorrect hex digit after \\u escape in string."); + case kParseErrorStringUnicodeSurrogateInvalid: return RAPIDJSON_ERROR_STRING("The surrogate pair in string is invalid."); + case kParseErrorStringEscapeInvalid: return RAPIDJSON_ERROR_STRING("Invalid escape character in string."); + case kParseErrorStringMissQuotationMark: return RAPIDJSON_ERROR_STRING("Missing a closing quotation mark in string."); + case kParseErrorStringInvalidEncoding: return RAPIDJSON_ERROR_STRING("Invalid encoding in string."); + + case kParseErrorNumberTooBig: return RAPIDJSON_ERROR_STRING("Number too big to be stored in double."); + case kParseErrorNumberMissFraction: return RAPIDJSON_ERROR_STRING("Miss fraction part in number."); + case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number."); + + case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error."); + case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error."); + + default: return RAPIDJSON_ERROR_STRING("Unknown error."); + } +} + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_ERROR_EN_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/error/error.h b/c++/include/misc/jsonwrapp/rapidjson11/error/error.h new file mode 100644 index 00000000..95cb31a7 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/error/error.h @@ -0,0 +1,155 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ERROR_ERROR_H_ +#define RAPIDJSON_ERROR_ERROR_H_ + +#include "../rapidjson.h" + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +#endif + +/*! \file error.h */ + +/*! \defgroup RAPIDJSON_ERRORS RapidJSON error handling */ + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ERROR_CHARTYPE + +//! Character type of error messages. +/*! \ingroup RAPIDJSON_ERRORS + The default character type is \c char. + On Windows, user can define this macro as \c TCHAR for supporting both + unicode/non-unicode settings. +*/ +#ifndef RAPIDJSON_ERROR_CHARTYPE +#define RAPIDJSON_ERROR_CHARTYPE char +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ERROR_STRING + +//! Macro for converting string literial to \ref RAPIDJSON_ERROR_CHARTYPE[]. +/*! \ingroup RAPIDJSON_ERRORS + By default this conversion macro does nothing. + On Windows, user can define this macro as \c _T(x) for supporting both + unicode/non-unicode settings. +*/ +#ifndef RAPIDJSON_ERROR_STRING +#define RAPIDJSON_ERROR_STRING(x) x +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// ParseErrorCode + +//! Error code of parsing. +/*! \ingroup RAPIDJSON_ERRORS + \see GenericReader::Parse, GenericReader::GetParseErrorCode +*/ +enum ParseErrorCode { + kParseErrorNone = 0, //!< No error. + + kParseErrorDocumentEmpty, //!< The document is empty. + kParseErrorDocumentRootNotSingular, //!< The document root must not follow by other values. + + kParseErrorValueInvalid, //!< Invalid value. + + kParseErrorObjectMissName, //!< Missing a name for object member. + kParseErrorObjectMissColon, //!< Missing a colon after a name of object member. + kParseErrorObjectMissCommaOrCurlyBracket, //!< Missing a comma or '}' after an object member. + + kParseErrorArrayMissCommaOrSquareBracket, //!< Missing a comma or ']' after an array element. + + kParseErrorStringUnicodeEscapeInvalidHex, //!< Incorrect hex digit after \\u escape in string. + kParseErrorStringUnicodeSurrogateInvalid, //!< The surrogate pair in string is invalid. + kParseErrorStringEscapeInvalid, //!< Invalid escape character in string. + kParseErrorStringMissQuotationMark, //!< Missing a closing quotation mark in string. + kParseErrorStringInvalidEncoding, //!< Invalid encoding in string. + + kParseErrorNumberTooBig, //!< Number too big to be stored in double. + kParseErrorNumberMissFraction, //!< Miss fraction part in number. + kParseErrorNumberMissExponent, //!< Miss exponent in number. + + kParseErrorTermination, //!< Parsing was terminated. + kParseErrorUnspecificSyntaxError //!< Unspecific syntax error. +}; + +//! Result of parsing (wraps ParseErrorCode) +/*! + \ingroup RAPIDJSON_ERRORS + \code + Document doc; + ParseResult ok = doc.Parse("[42]"); + if (!ok) { + fprintf(stderr, "JSON parse error: %s (%u)", + GetParseError_En(ok.Code()), ok.Offset()); + exit(EXIT_FAILURE); + } + \endcode + \see GenericReader::Parse, GenericDocument::Parse +*/ +struct ParseResult { +public: + //! Default constructor, no error. + ParseResult() : code_(kParseErrorNone), offset_(0) {} + //! Constructor to set an error. + ParseResult(ParseErrorCode code, size_t offset) : code_(code), offset_(offset) {} + + //! Get the error code. + ParseErrorCode Code() const { return code_; } + //! Get the error offset, if \ref IsError(), 0 otherwise. + size_t Offset() const { return offset_; } + + //! Conversion to \c bool, returns \c true, iff !\ref IsError(). + operator bool() const { return !IsError(); } + //! Whether the result is an error. + bool IsError() const { return code_ != kParseErrorNone; } + + bool operator==(const ParseResult& that) const { return code_ == that.code_; } + bool operator==(ParseErrorCode code) const { return code_ == code; } + friend bool operator==(ParseErrorCode code, const ParseResult & err) { return code == err.code_; } + + //! Reset error code. + void Clear() { Set(kParseErrorNone); } + //! Update error code and offset. + void Set(ParseErrorCode code, size_t offset = 0) { code_ = code; offset_ = offset; } + +private: + ParseErrorCode code_; + size_t offset_; +}; + +//! Function pointer type of GetParseError(). +/*! \ingroup RAPIDJSON_ERRORS + + This is the prototype for \c GetParseError_X(), where \c X is a locale. + User can dynamically change locale in runtime, e.g.: +\code + GetParseErrorFunc GetParseError = GetParseError_En; // or whatever + const RAPIDJSON_ERROR_CHARTYPE* s = GetParseError(document.GetParseErrorCode()); +\endcode +*/ +typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetParseErrorFunc)(ParseErrorCode); + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_ERROR_ERROR_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/filereadstream.h b/c++/include/misc/jsonwrapp/rapidjson11/filereadstream.h new file mode 100644 index 00000000..b56ea13b --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/filereadstream.h @@ -0,0 +1,99 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_FILEREADSTREAM_H_ +#define RAPIDJSON_FILEREADSTREAM_H_ + +#include "stream.h" +#include + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(unreachable-code) +RAPIDJSON_DIAG_OFF(missing-noreturn) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! File byte stream for input using fread(). +/*! + \note implements Stream concept +*/ +class FileReadStream { +public: + typedef char Ch; //!< Character type (byte). + + //! Constructor. + /*! + \param fp File pointer opened for read. + \param buffer user-supplied buffer. + \param bufferSize size of buffer in bytes. Must >=4 bytes. + */ + FileReadStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) { + RAPIDJSON_ASSERT(fp_ != 0); + RAPIDJSON_ASSERT(bufferSize >= 4); + Read(); + } + + Ch Peek() const { return *current_; } + Ch Take() { Ch c = *current_; Read(); return c; } + size_t Tell() const { return count_ + static_cast(current_ - buffer_); } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + // For encoding detection only. + const Ch* Peek4() const { + return (current_ + 4 <= bufferLast_) ? current_ : 0; + } + +private: + void Read() { + if (current_ < bufferLast_) + ++current_; + else if (!eof_) { + count_ += readCount_; + readCount_ = fread(buffer_, 1, bufferSize_, fp_); + bufferLast_ = buffer_ + readCount_ - 1; + current_ = buffer_; + + if (readCount_ < bufferSize_) { + buffer_[readCount_] = '\0'; + ++bufferLast_; + eof_ = true; + } + } + } + + std::FILE* fp_; + Ch *buffer_; + size_t bufferSize_; + Ch *bufferLast_; + Ch *current_; + size_t readCount_; + size_t count_; //!< Number of characters read + bool eof_; +}; + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/filewritestream.h b/c++/include/misc/jsonwrapp/rapidjson11/filewritestream.h new file mode 100644 index 00000000..6378dd60 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/filewritestream.h @@ -0,0 +1,104 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_FILEWRITESTREAM_H_ +#define RAPIDJSON_FILEWRITESTREAM_H_ + +#include "stream.h" +#include + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(unreachable-code) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Wrapper of C file stream for input using fread(). +/*! + \note implements Stream concept +*/ +class FileWriteStream { +public: + typedef char Ch; //!< Character type. Only support char. + + FileWriteStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferEnd_(buffer + bufferSize), current_(buffer_) { + RAPIDJSON_ASSERT(fp_ != 0); + } + + void Put(char c) { + if (current_ >= bufferEnd_) + Flush(); + + *current_++ = c; + } + + void PutN(char c, size_t n) { + size_t avail = static_cast(bufferEnd_ - current_); + while (n > avail) { + std::memset(current_, c, avail); + current_ += avail; + Flush(); + n -= avail; + avail = static_cast(bufferEnd_ - current_); + } + + if (n > 0) { + std::memset(current_, c, n); + current_ += n; + } + } + + void Flush() { + if (current_ != buffer_) { + size_t result = fwrite(buffer_, 1, static_cast(current_ - buffer_), fp_); + if (result < static_cast(current_ - buffer_)) { + // failure deliberately ignored at this time + // added to avoid warn_unused_result build errors + } + current_ = buffer_; + } + } + + // Not implemented + char Peek() const { RAPIDJSON_ASSERT(false); return 0; } + char Take() { RAPIDJSON_ASSERT(false); return 0; } + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + // Prohibit copy constructor & assignment operator. + FileWriteStream(const FileWriteStream&); + FileWriteStream& operator=(const FileWriteStream&); + + std::FILE* fp_; + char *buffer_; + char *bufferEnd_; + char *current_; +}; + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(FileWriteStream& stream, char c, size_t n) { + stream.PutN(c, n); +} + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/fwd.h b/c++/include/misc/jsonwrapp/rapidjson11/fwd.h new file mode 100644 index 00000000..e8104e84 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/fwd.h @@ -0,0 +1,151 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_FWD_H_ +#define RAPIDJSON_FWD_H_ + +#include "rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN + +// encodings.h + +template struct UTF8; +template struct UTF16; +template struct UTF16BE; +template struct UTF16LE; +template struct UTF32; +template struct UTF32BE; +template struct UTF32LE; +template struct ASCII; +template struct AutoUTF; + +template +struct Transcoder; + +// allocators.h + +class CrtAllocator; + +template +class MemoryPoolAllocator; + +// stream.h + +template +struct GenericStringStream; + +typedef GenericStringStream > StringStream; + +template +struct GenericInsituStringStream; + +typedef GenericInsituStringStream > InsituStringStream; + +// stringbuffer.h + +template +class GenericStringBuffer; + +typedef GenericStringBuffer, CrtAllocator> StringBuffer; + +// filereadstream.h + +class FileReadStream; + +// filewritestream.h + +class FileWriteStream; + +// memorybuffer.h + +template +struct GenericMemoryBuffer; + +typedef GenericMemoryBuffer MemoryBuffer; + +// memorystream.h + +struct MemoryStream; + +// reader.h + +template +struct BaseReaderHandler; + +template +class GenericReader; + +typedef GenericReader, UTF8, CrtAllocator> Reader; + +// writer.h + +template +class Writer; + +// prettywriter.h + +template +class PrettyWriter; + +// document.h + +template +struct GenericMember; + +template +class GenericMemberIterator; + +template +struct GenericStringRef; + +template +class GenericValue; + +typedef GenericValue, MemoryPoolAllocator > Value; + +template +class GenericDocument; + +typedef GenericDocument, MemoryPoolAllocator, CrtAllocator> Document; + +// pointer.h + +template +class GenericPointer; + +typedef GenericPointer Pointer; + +// schema.h + +template +class IGenericRemoteSchemaDocumentProvider; + +template +class GenericSchemaDocument; + +typedef GenericSchemaDocument SchemaDocument; +typedef IGenericRemoteSchemaDocumentProvider IRemoteSchemaDocumentProvider; + +template < + typename SchemaDocumentType, + typename OutputHandler, + typename StateAllocator> +class GenericSchemaValidator; + +typedef GenericSchemaValidator, void>, CrtAllocator> SchemaValidator; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_RAPIDJSONFWD_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/biginteger.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/biginteger.h new file mode 100644 index 00000000..9d3e88c9 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/biginteger.h @@ -0,0 +1,290 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_BIGINTEGER_H_ +#define RAPIDJSON_BIGINTEGER_H_ + +#include "../rapidjson.h" + +#if defined(_MSC_VER) && defined(_M_AMD64) +#include // for _umul128 +#pragma intrinsic(_umul128) +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +class BigInteger { +public: + typedef uint64_t Type; + + BigInteger(const BigInteger& rhs) : count_(rhs.count_) { + std::memcpy(digits_, rhs.digits_, count_ * sizeof(Type)); + } + + explicit BigInteger(uint64_t u) : count_(1) { + digits_[0] = u; + } + + BigInteger(const char* decimals, size_t length) : count_(1) { + RAPIDJSON_ASSERT(length > 0); + digits_[0] = 0; + size_t i = 0; + const size_t kMaxDigitPerIteration = 19; // 2^64 = 18446744073709551616 > 10^19 + while (length >= kMaxDigitPerIteration) { + AppendDecimal64(decimals + i, decimals + i + kMaxDigitPerIteration); + length -= kMaxDigitPerIteration; + i += kMaxDigitPerIteration; + } + + if (length > 0) + AppendDecimal64(decimals + i, decimals + i + length); + } + + BigInteger& operator=(const BigInteger &rhs) + { + if (this != &rhs) { + count_ = rhs.count_; + std::memcpy(digits_, rhs.digits_, count_ * sizeof(Type)); + } + return *this; + } + + BigInteger& operator=(uint64_t u) { + digits_[0] = u; + count_ = 1; + return *this; + } + + BigInteger& operator+=(uint64_t u) { + Type backup = digits_[0]; + digits_[0] += u; + for (size_t i = 0; i < count_ - 1; i++) { + if (digits_[i] >= backup) + return *this; // no carry + backup = digits_[i + 1]; + digits_[i + 1] += 1; + } + + // Last carry + if (digits_[count_ - 1] < backup) + PushBack(1); + + return *this; + } + + BigInteger& operator*=(uint64_t u) { + if (u == 0) return *this = 0; + if (u == 1) return *this; + if (*this == 1) return *this = u; + + uint64_t k = 0; + for (size_t i = 0; i < count_; i++) { + uint64_t hi; + digits_[i] = MulAdd64(digits_[i], u, k, &hi); + k = hi; + } + + if (k > 0) + PushBack(k); + + return *this; + } + + BigInteger& operator*=(uint32_t u) { + if (u == 0) return *this = 0; + if (u == 1) return *this; + if (*this == 1) return *this = u; + + uint64_t k = 0; + for (size_t i = 0; i < count_; i++) { + const uint64_t c = digits_[i] >> 32; + const uint64_t d = digits_[i] & 0xFFFFFFFF; + const uint64_t uc = u * c; + const uint64_t ud = u * d; + const uint64_t p0 = ud + k; + const uint64_t p1 = uc + (p0 >> 32); + digits_[i] = (p0 & 0xFFFFFFFF) | (p1 << 32); + k = p1 >> 32; + } + + if (k > 0) + PushBack(k); + + return *this; + } + + BigInteger& operator<<=(size_t shift) { + if (IsZero() || shift == 0) return *this; + + size_t offset = shift / kTypeBit; + size_t interShift = shift % kTypeBit; + RAPIDJSON_ASSERT(count_ + offset <= kCapacity); + + if (interShift == 0) { + std::memmove(&digits_[count_ - 1 + offset], &digits_[count_ - 1], count_ * sizeof(Type)); + count_ += offset; + } + else { + digits_[count_] = 0; + for (size_t i = count_; i > 0; i--) + digits_[i + offset] = (digits_[i] << interShift) | (digits_[i - 1] >> (kTypeBit - interShift)); + digits_[offset] = digits_[0] << interShift; + count_ += offset; + if (digits_[count_]) + count_++; + } + + std::memset(digits_, 0, offset * sizeof(Type)); + + return *this; + } + + bool operator==(const BigInteger& rhs) const { + return count_ == rhs.count_ && std::memcmp(digits_, rhs.digits_, count_ * sizeof(Type)) == 0; + } + + bool operator==(const Type rhs) const { + return count_ == 1 && digits_[0] == rhs; + } + + BigInteger& MultiplyPow5(unsigned exp) { + static const uint32_t kPow5[12] = { + 5, + 5 * 5, + 5 * 5 * 5, + 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 + }; + if (exp == 0) return *this; + for (; exp >= 27; exp -= 27) *this *= RAPIDJSON_UINT64_C2(0X6765C793, 0XFA10079D); // 5^27 + for (; exp >= 13; exp -= 13) *this *= static_cast(1220703125u); // 5^13 + if (exp > 0) *this *= kPow5[exp - 1]; + return *this; + } + + // Compute absolute difference of this and rhs. + // Assume this != rhs + bool Difference(const BigInteger& rhs, BigInteger* out) const { + int cmp = Compare(rhs); + RAPIDJSON_ASSERT(cmp != 0); + const BigInteger *a, *b; // Makes a > b + bool ret; + if (cmp < 0) { a = &rhs; b = this; ret = true; } + else { a = this; b = &rhs; ret = false; } + + Type borrow = 0; + for (size_t i = 0; i < a->count_; i++) { + Type d = a->digits_[i] - borrow; + if (i < b->count_) + d -= b->digits_[i]; + borrow = (d > a->digits_[i]) ? 1 : 0; + out->digits_[i] = d; + if (d != 0) + out->count_ = i + 1; + } + + return ret; + } + + int Compare(const BigInteger& rhs) const { + if (count_ != rhs.count_) + return count_ < rhs.count_ ? -1 : 1; + + for (size_t i = count_; i-- > 0;) + if (digits_[i] != rhs.digits_[i]) + return digits_[i] < rhs.digits_[i] ? -1 : 1; + + return 0; + } + + size_t GetCount() const { return count_; } + Type GetDigit(size_t index) const { RAPIDJSON_ASSERT(index < count_); return digits_[index]; } + bool IsZero() const { return count_ == 1 && digits_[0] == 0; } + +private: + void AppendDecimal64(const char* begin, const char* end) { + uint64_t u = ParseUint64(begin, end); + if (IsZero()) + *this = u; + else { + unsigned exp = static_cast(end - begin); + (MultiplyPow5(exp) <<= exp) += u; // *this = *this * 10^exp + u + } + } + + void PushBack(Type digit) { + RAPIDJSON_ASSERT(count_ < kCapacity); + digits_[count_++] = digit; + } + + static uint64_t ParseUint64(const char* begin, const char* end) { + uint64_t r = 0; + for (const char* p = begin; p != end; ++p) { + RAPIDJSON_ASSERT(*p >= '0' && *p <= '9'); + r = r * 10u + static_cast(*p - '0'); + } + return r; + } + + // Assume a * b + k < 2^128 + static uint64_t MulAdd64(uint64_t a, uint64_t b, uint64_t k, uint64_t* outHigh) { +#if defined(_MSC_VER) && defined(_M_AMD64) + uint64_t low = _umul128(a, b, outHigh) + k; + if (low < k) + (*outHigh)++; + return low; +#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) + __extension__ typedef unsigned __int128 uint128; + uint128 p = static_cast(a) * static_cast(b); + p += k; + *outHigh = static_cast(p >> 64); + return static_cast(p); +#else + const uint64_t a0 = a & 0xFFFFFFFF, a1 = a >> 32, b0 = b & 0xFFFFFFFF, b1 = b >> 32; + uint64_t x0 = a0 * b0, x1 = a0 * b1, x2 = a1 * b0, x3 = a1 * b1; + x1 += (x0 >> 32); // can't give carry + x1 += x2; + if (x1 < x2) + x3 += (static_cast(1) << 32); + uint64_t lo = (x1 << 32) + (x0 & 0xFFFFFFFF); + uint64_t hi = x3 + (x1 >> 32); + + lo += k; + if (lo < k) + hi++; + *outHigh = hi; + return lo; +#endif + } + + static const size_t kBitCount = 3328; // 64bit * 54 > 10^1000 + static const size_t kCapacity = kBitCount / sizeof(Type); + static const size_t kTypeBit = sizeof(Type) * 8; + + Type digits_[kCapacity]; + size_t count_; +}; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_BIGINTEGER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/diyfp.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/diyfp.h new file mode 100644 index 00000000..c9fefdc6 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/diyfp.h @@ -0,0 +1,258 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// This is a C++ header-only implementation of Grisu2 algorithm from the publication: +// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with +// integers." ACM Sigplan Notices 45.6 (2010): 233-243. + +#ifndef RAPIDJSON_DIYFP_H_ +#define RAPIDJSON_DIYFP_H_ + +#include "../rapidjson.h" + +#if defined(_MSC_VER) && defined(_M_AMD64) +#include +#pragma intrinsic(_BitScanReverse64) +#pragma intrinsic(_umul128) +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +#endif + +struct DiyFp { + DiyFp() : f(), e() {} + + DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {} + + explicit DiyFp(double d) { + union { + double d; + uint64_t u64; + } u = { d }; + + int biased_e = static_cast((u.u64 & kDpExponentMask) >> kDpSignificandSize); + uint64_t significand = (u.u64 & kDpSignificandMask); + if (biased_e != 0) { + f = significand + kDpHiddenBit; + e = biased_e - kDpExponentBias; + } + else { + f = significand; + e = kDpMinExponent + 1; + } + } + + DiyFp operator-(const DiyFp& rhs) const { + return DiyFp(f - rhs.f, e); + } + + DiyFp operator*(const DiyFp& rhs) const { +#if defined(_MSC_VER) && defined(_M_AMD64) + uint64_t h; + uint64_t l = _umul128(f, rhs.f, &h); + if (l & (uint64_t(1) << 63)) // rounding + h++; + return DiyFp(h, e + rhs.e + 64); +#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) + __extension__ typedef unsigned __int128 uint128; + uint128 p = static_cast(f) * static_cast(rhs.f); + uint64_t h = static_cast(p >> 64); + uint64_t l = static_cast(p); + if (l & (uint64_t(1) << 63)) // rounding + h++; + return DiyFp(h, e + rhs.e + 64); +#else + const uint64_t M32 = 0xFFFFFFFF; + const uint64_t a = f >> 32; + const uint64_t b = f & M32; + const uint64_t c = rhs.f >> 32; + const uint64_t d = rhs.f & M32; + const uint64_t ac = a * c; + const uint64_t bc = b * c; + const uint64_t ad = a * d; + const uint64_t bd = b * d; + uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32); + tmp += 1U << 31; /// mult_round + return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64); +#endif + } + + DiyFp Normalize() const { +#if defined(_MSC_VER) && defined(_M_AMD64) + unsigned long index; + _BitScanReverse64(&index, f); + return DiyFp(f << (63 - index), e - (63 - index)); +#elif defined(__GNUC__) && __GNUC__ >= 4 + int s = __builtin_clzll(f); + return DiyFp(f << s, e - s); +#else + DiyFp res = *this; + while (!(res.f & (static_cast(1) << 63))) { + res.f <<= 1; + res.e--; + } + return res; +#endif + } + + DiyFp NormalizeBoundary() const { + DiyFp res = *this; + while (!(res.f & (kDpHiddenBit << 1))) { + res.f <<= 1; + res.e--; + } + res.f <<= (kDiySignificandSize - kDpSignificandSize - 2); + res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2); + return res; + } + + void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const { + DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary(); + DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1); + mi.f <<= mi.e - pl.e; + mi.e = pl.e; + *plus = pl; + *minus = mi; + } + + double ToDouble() const { + union { + double d; + uint64_t u64; + }u; + const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 : + static_cast(e + kDpExponentBias); + u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize); + return u.d; + } + + static const int kDiySignificandSize = 64; + static const int kDpSignificandSize = 52; + static const int kDpExponentBias = 0x3FF + kDpSignificandSize; + static const int kDpMaxExponent = 0x7FF - kDpExponentBias; + static const int kDpMinExponent = -kDpExponentBias; + static const int kDpDenormalExponent = -kDpExponentBias + 1; + static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000); + static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF); + static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000); + + uint64_t f; + int e; +}; + +inline DiyFp GetCachedPowerByIndex(size_t index) { + // 10^-348, 10^-340, ..., 10^340 + static const uint64_t kCachedPowers_F[] = { + RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76), + RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea), + RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df), + RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f), + RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c), + RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5), + RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d), + RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637), + RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7), + RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5), + RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b), + RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996), + RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6), + RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8), + RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053), + RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd), + RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94), + RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b), + RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac), + RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3), + RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb), + RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c), + RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000), + RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984), + RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70), + RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245), + RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8), + RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a), + RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea), + RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85), + RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2), + RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3), + RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25), + RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece), + RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5), + RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a), + RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c), + RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a), + RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129), + RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429), + RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d), + RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841), + RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9), + RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b) + }; + static const int16_t kCachedPowers_E[] = { + -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, + -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, + -688, -661, -635, -608, -582, -555, -529, -502, -475, -449, + -422, -396, -369, -343, -316, -289, -263, -236, -210, -183, + -157, -130, -103, -77, -50, -24, 3, 30, 56, 83, + 109, 136, 162, 189, 216, 242, 269, 295, 322, 348, + 375, 402, 428, 455, 481, 508, 534, 561, 588, 614, + 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, + 907, 933, 960, 986, 1013, 1039, 1066 + }; + return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]); +} + +inline DiyFp GetCachedPower(int e, int* K) { + + //int k = static_cast(ceil((-61 - e) * 0.30102999566398114)) + 374; + double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive + int k = static_cast(dk); + if (dk - k > 0.0) + k++; + + unsigned index = static_cast((k >> 3) + 1); + *K = -(-348 + static_cast(index << 3)); // decimal exponent no need lookup table + + return GetCachedPowerByIndex(index); +} + +inline DiyFp GetCachedPower10(int exp, int *outExp) { + unsigned index = (static_cast(exp) + 348u) / 8u; + *outExp = -348 + static_cast(index) * 8; + return GetCachedPowerByIndex(index); + } + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +RAPIDJSON_DIAG_OFF(padded) +#endif + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_DIYFP_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/dtoa.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/dtoa.h new file mode 100644 index 00000000..bf2e9b2e --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/dtoa.h @@ -0,0 +1,245 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// This is a C++ header-only implementation of Grisu2 algorithm from the publication: +// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with +// integers." ACM Sigplan Notices 45.6 (2010): 233-243. + +#ifndef RAPIDJSON_DTOA_ +#define RAPIDJSON_DTOA_ + +#include "itoa.h" // GetDigitsLut() +#include "diyfp.h" +#include "ieee754.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +RAPIDJSON_DIAG_OFF(array-bounds) // some gcc versions generate wrong warnings https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124 +#endif + +inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) { + while (rest < wp_w && delta - rest >= ten_kappa && + (rest + ten_kappa < wp_w || /// closer + wp_w - rest > rest + ten_kappa - wp_w)) { + buffer[len - 1]--; + rest += ten_kappa; + } +} + +inline int CountDecimalDigit32(uint32_t n) { + // Simple pure C++ implementation was faster than __builtin_clz version in this situation. + if (n < 10) return 1; + if (n < 100) return 2; + if (n < 1000) return 3; + if (n < 10000) return 4; + if (n < 100000) return 5; + if (n < 1000000) return 6; + if (n < 10000000) return 7; + if (n < 100000000) return 8; + // Will not reach 10 digits in DigitGen() + //if (n < 1000000000) return 9; + //return 10; + return 9; +} + +inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) { + static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 }; + const DiyFp one(uint64_t(1) << -Mp.e, Mp.e); + const DiyFp wp_w = Mp - W; + uint32_t p1 = static_cast(Mp.f >> -one.e); + uint64_t p2 = Mp.f & (one.f - 1); + int kappa = CountDecimalDigit32(p1); // kappa in [0, 9] + *len = 0; + + while (kappa > 0) { + uint32_t d = 0; + switch (kappa) { + case 9: d = p1 / 100000000; p1 %= 100000000; break; + case 8: d = p1 / 10000000; p1 %= 10000000; break; + case 7: d = p1 / 1000000; p1 %= 1000000; break; + case 6: d = p1 / 100000; p1 %= 100000; break; + case 5: d = p1 / 10000; p1 %= 10000; break; + case 4: d = p1 / 1000; p1 %= 1000; break; + case 3: d = p1 / 100; p1 %= 100; break; + case 2: d = p1 / 10; p1 %= 10; break; + case 1: d = p1; p1 = 0; break; + default:; + } + if (d || *len) + buffer[(*len)++] = static_cast('0' + static_cast(d)); + kappa--; + uint64_t tmp = (static_cast(p1) << -one.e) + p2; + if (tmp <= delta) { + *K += kappa; + GrisuRound(buffer, *len, delta, tmp, static_cast(kPow10[kappa]) << -one.e, wp_w.f); + return; + } + } + + // kappa = 0 + for (;;) { + p2 *= 10; + delta *= 10; + char d = static_cast(p2 >> -one.e); + if (d || *len) + buffer[(*len)++] = static_cast('0' + d); + p2 &= one.f - 1; + kappa--; + if (p2 < delta) { + *K += kappa; + int index = -kappa; + GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? kPow10[index] : 0)); + return; + } + } +} + +inline void Grisu2(double value, char* buffer, int* length, int* K) { + const DiyFp v(value); + DiyFp w_m, w_p; + v.NormalizedBoundaries(&w_m, &w_p); + + const DiyFp c_mk = GetCachedPower(w_p.e, K); + const DiyFp W = v.Normalize() * c_mk; + DiyFp Wp = w_p * c_mk; + DiyFp Wm = w_m * c_mk; + Wm.f++; + Wp.f--; + DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K); +} + +inline char* WriteExponent(int K, char* buffer) { + if (K < 0) { + *buffer++ = '-'; + K = -K; + } + + if (K >= 100) { + *buffer++ = static_cast('0' + static_cast(K / 100)); + K %= 100; + const char* d = GetDigitsLut() + K * 2; + *buffer++ = d[0]; + *buffer++ = d[1]; + } + else if (K >= 10) { + const char* d = GetDigitsLut() + K * 2; + *buffer++ = d[0]; + *buffer++ = d[1]; + } + else + *buffer++ = static_cast('0' + static_cast(K)); + + return buffer; +} + +inline char* Prettify(char* buffer, int length, int k, int maxDecimalPlaces) { + const int kk = length + k; // 10^(kk-1) <= v < 10^kk + + if (0 <= k && kk <= 21) { + // 1234e7 -> 12340000000 + for (int i = length; i < kk; i++) + buffer[i] = '0'; + buffer[kk] = '.'; + buffer[kk + 1] = '0'; + return &buffer[kk + 2]; + } + else if (0 < kk && kk <= 21) { + // 1234e-2 -> 12.34 + std::memmove(&buffer[kk + 1], &buffer[kk], static_cast(length - kk)); + buffer[kk] = '.'; + if (0 > k + maxDecimalPlaces) { + // When maxDecimalPlaces = 2, 1.2345 -> 1.23, 1.102 -> 1.1 + // Remove extra trailing zeros (at least one) after truncation. + for (int i = kk + maxDecimalPlaces; i > kk + 1; i--) + if (buffer[i] != '0') + return &buffer[i + 1]; + return &buffer[kk + 2]; // Reserve one zero + } + else + return &buffer[length + 1]; + } + else if (-6 < kk && kk <= 0) { + // 1234e-6 -> 0.001234 + const int offset = 2 - kk; + std::memmove(&buffer[offset], &buffer[0], static_cast(length)); + buffer[0] = '0'; + buffer[1] = '.'; + for (int i = 2; i < offset; i++) + buffer[i] = '0'; + if (length - kk > maxDecimalPlaces) { + // When maxDecimalPlaces = 2, 0.123 -> 0.12, 0.102 -> 0.1 + // Remove extra trailing zeros (at least one) after truncation. + for (int i = maxDecimalPlaces + 1; i > 2; i--) + if (buffer[i] != '0') + return &buffer[i + 1]; + return &buffer[3]; // Reserve one zero + } + else + return &buffer[length + offset]; + } + else if (kk < -maxDecimalPlaces) { + // Truncate to zero + buffer[0] = '0'; + buffer[1] = '.'; + buffer[2] = '0'; + return &buffer[3]; + } + else if (length == 1) { + // 1e30 + buffer[1] = 'e'; + return WriteExponent(kk - 1, &buffer[2]); + } + else { + // 1234e30 -> 1.234e33 + std::memmove(&buffer[2], &buffer[1], static_cast(length - 1)); + buffer[1] = '.'; + buffer[length + 1] = 'e'; + return WriteExponent(kk - 1, &buffer[0 + length + 2]); + } +} + +inline char* dtoa(double value, char* buffer, int maxDecimalPlaces = 324) { + RAPIDJSON_ASSERT(maxDecimalPlaces >= 1); + Double d(value); + if (d.IsZero()) { + if (d.Sign()) + *buffer++ = '-'; // -0.0, Issue #289 + buffer[0] = '0'; + buffer[1] = '.'; + buffer[2] = '0'; + return &buffer[3]; + } + else { + if (value < 0) { + *buffer++ = '-'; + value = -value; + } + int length, K; + Grisu2(value, buffer, &length, &K); + return Prettify(buffer, length, K, maxDecimalPlaces); + } +} + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_DTOA_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/ieee754.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/ieee754.h new file mode 100644 index 00000000..c2684ba2 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/ieee754.h @@ -0,0 +1,78 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_IEEE754_ +#define RAPIDJSON_IEEE754_ + +#include "../rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +class Double { +public: + Double() {} + Double(double d) : d_(d) {} + Double(uint64_t u) : u_(u) {} + + double Value() const { return d_; } + uint64_t Uint64Value() const { return u_; } + + double NextPositiveDouble() const { + RAPIDJSON_ASSERT(!Sign()); + return Double(u_ + 1).Value(); + } + + bool Sign() const { return (u_ & kSignMask) != 0; } + uint64_t Significand() const { return u_ & kSignificandMask; } + int Exponent() const { return static_cast(((u_ & kExponentMask) >> kSignificandSize) - kExponentBias); } + + bool IsNan() const { return (u_ & kExponentMask) == kExponentMask && Significand() != 0; } + bool IsInf() const { return (u_ & kExponentMask) == kExponentMask && Significand() == 0; } + bool IsNanOrInf() const { return (u_ & kExponentMask) == kExponentMask; } + bool IsNormal() const { return (u_ & kExponentMask) != 0 || Significand() == 0; } + bool IsZero() const { return (u_ & (kExponentMask | kSignificandMask)) == 0; } + + uint64_t IntegerSignificand() const { return IsNormal() ? Significand() | kHiddenBit : Significand(); } + int IntegerExponent() const { return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize; } + uint64_t ToBias() const { return (u_ & kSignMask) ? ~u_ + 1 : u_ | kSignMask; } + + static int EffectiveSignificandSize(int order) { + if (order >= -1021) + return 53; + else if (order <= -1074) + return 0; + else + return order + 1074; + } + +private: + static const int kSignificandSize = 52; + static const int kExponentBias = 0x3FF; + static const int kDenormalExponent = 1 - kExponentBias; + static const uint64_t kSignMask = RAPIDJSON_UINT64_C2(0x80000000, 0x00000000); + static const uint64_t kExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000); + static const uint64_t kSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF); + static const uint64_t kHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000); + + union { + double d_; + uint64_t u_; + }; +}; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_IEEE754_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/itoa.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/itoa.h new file mode 100644 index 00000000..01a4e7e7 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/itoa.h @@ -0,0 +1,304 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ITOA_ +#define RAPIDJSON_ITOA_ + +#include "../rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +inline const char* GetDigitsLut() { + static const char cDigitsLut[200] = { + '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9', + '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9', + '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9', + '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9', + '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9', + '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9', + '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9', + '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9', + '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9', + '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9' + }; + return cDigitsLut; +} + +inline char* u32toa(uint32_t value, char* buffer) { + const char* cDigitsLut = GetDigitsLut(); + + if (value < 10000) { + const uint32_t d1 = (value / 100) << 1; + const uint32_t d2 = (value % 100) << 1; + + if (value >= 1000) + *buffer++ = cDigitsLut[d1]; + if (value >= 100) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= 10) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + } + else if (value < 100000000) { + // value = bbbbcccc + const uint32_t b = value / 10000; + const uint32_t c = value % 10000; + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + if (value >= 10000000) + *buffer++ = cDigitsLut[d1]; + if (value >= 1000000) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= 100000) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + } + else { + // value = aabbbbcccc in decimal + + const uint32_t a = value / 100000000; // 1 to 42 + value %= 100000000; + + if (a >= 10) { + const unsigned i = a << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + } + else + *buffer++ = static_cast('0' + static_cast(a)); + + const uint32_t b = value / 10000; // 0 to 9999 + const uint32_t c = value % 10000; // 0 to 9999 + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + *buffer++ = cDigitsLut[d1]; + *buffer++ = cDigitsLut[d1 + 1]; + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + } + return buffer; +} + +inline char* i32toa(int32_t value, char* buffer) { + uint32_t u = static_cast(value); + if (value < 0) { + *buffer++ = '-'; + u = ~u + 1; + } + + return u32toa(u, buffer); +} + +inline char* u64toa(uint64_t value, char* buffer) { + const char* cDigitsLut = GetDigitsLut(); + const uint64_t kTen8 = 100000000; + const uint64_t kTen9 = kTen8 * 10; + const uint64_t kTen10 = kTen8 * 100; + const uint64_t kTen11 = kTen8 * 1000; + const uint64_t kTen12 = kTen8 * 10000; + const uint64_t kTen13 = kTen8 * 100000; + const uint64_t kTen14 = kTen8 * 1000000; + const uint64_t kTen15 = kTen8 * 10000000; + const uint64_t kTen16 = kTen8 * kTen8; + + if (value < kTen8) { + uint32_t v = static_cast(value); + if (v < 10000) { + const uint32_t d1 = (v / 100) << 1; + const uint32_t d2 = (v % 100) << 1; + + if (v >= 1000) + *buffer++ = cDigitsLut[d1]; + if (v >= 100) + *buffer++ = cDigitsLut[d1 + 1]; + if (v >= 10) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + } + else { + // value = bbbbcccc + const uint32_t b = v / 10000; + const uint32_t c = v % 10000; + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + if (value >= 10000000) + *buffer++ = cDigitsLut[d1]; + if (value >= 1000000) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= 100000) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + } + } + else if (value < kTen16) { + const uint32_t v0 = static_cast(value / kTen8); + const uint32_t v1 = static_cast(value % kTen8); + + const uint32_t b0 = v0 / 10000; + const uint32_t c0 = v0 % 10000; + + const uint32_t d1 = (b0 / 100) << 1; + const uint32_t d2 = (b0 % 100) << 1; + + const uint32_t d3 = (c0 / 100) << 1; + const uint32_t d4 = (c0 % 100) << 1; + + const uint32_t b1 = v1 / 10000; + const uint32_t c1 = v1 % 10000; + + const uint32_t d5 = (b1 / 100) << 1; + const uint32_t d6 = (b1 % 100) << 1; + + const uint32_t d7 = (c1 / 100) << 1; + const uint32_t d8 = (c1 % 100) << 1; + + if (value >= kTen15) + *buffer++ = cDigitsLut[d1]; + if (value >= kTen14) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= kTen13) + *buffer++ = cDigitsLut[d2]; + if (value >= kTen12) + *buffer++ = cDigitsLut[d2 + 1]; + if (value >= kTen11) + *buffer++ = cDigitsLut[d3]; + if (value >= kTen10) + *buffer++ = cDigitsLut[d3 + 1]; + if (value >= kTen9) + *buffer++ = cDigitsLut[d4]; + if (value >= kTen8) + *buffer++ = cDigitsLut[d4 + 1]; + + *buffer++ = cDigitsLut[d5]; + *buffer++ = cDigitsLut[d5 + 1]; + *buffer++ = cDigitsLut[d6]; + *buffer++ = cDigitsLut[d6 + 1]; + *buffer++ = cDigitsLut[d7]; + *buffer++ = cDigitsLut[d7 + 1]; + *buffer++ = cDigitsLut[d8]; + *buffer++ = cDigitsLut[d8 + 1]; + } + else { + const uint32_t a = static_cast(value / kTen16); // 1 to 1844 + value %= kTen16; + + if (a < 10) + *buffer++ = static_cast('0' + static_cast(a)); + else if (a < 100) { + const uint32_t i = a << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + } + else if (a < 1000) { + *buffer++ = static_cast('0' + static_cast(a / 100)); + + const uint32_t i = (a % 100) << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + } + else { + const uint32_t i = (a / 100) << 1; + const uint32_t j = (a % 100) << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + *buffer++ = cDigitsLut[j]; + *buffer++ = cDigitsLut[j + 1]; + } + + const uint32_t v0 = static_cast(value / kTen8); + const uint32_t v1 = static_cast(value % kTen8); + + const uint32_t b0 = v0 / 10000; + const uint32_t c0 = v0 % 10000; + + const uint32_t d1 = (b0 / 100) << 1; + const uint32_t d2 = (b0 % 100) << 1; + + const uint32_t d3 = (c0 / 100) << 1; + const uint32_t d4 = (c0 % 100) << 1; + + const uint32_t b1 = v1 / 10000; + const uint32_t c1 = v1 % 10000; + + const uint32_t d5 = (b1 / 100) << 1; + const uint32_t d6 = (b1 % 100) << 1; + + const uint32_t d7 = (c1 / 100) << 1; + const uint32_t d8 = (c1 % 100) << 1; + + *buffer++ = cDigitsLut[d1]; + *buffer++ = cDigitsLut[d1 + 1]; + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + *buffer++ = cDigitsLut[d5]; + *buffer++ = cDigitsLut[d5 + 1]; + *buffer++ = cDigitsLut[d6]; + *buffer++ = cDigitsLut[d6 + 1]; + *buffer++ = cDigitsLut[d7]; + *buffer++ = cDigitsLut[d7 + 1]; + *buffer++ = cDigitsLut[d8]; + *buffer++ = cDigitsLut[d8 + 1]; + } + + return buffer; +} + +inline char* i64toa(int64_t value, char* buffer) { + uint64_t u = static_cast(value); + if (value < 0) { + *buffer++ = '-'; + u = ~u + 1; + } + + return u64toa(u, buffer); +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_ITOA_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/meta.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/meta.h new file mode 100644 index 00000000..5a9aaa42 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/meta.h @@ -0,0 +1,181 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_META_H_ +#define RAPIDJSON_INTERNAL_META_H_ + +#include "../rapidjson.h" + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif +#if defined(_MSC_VER) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(6334) +#endif + +#if RAPIDJSON_HAS_CXX11_TYPETRAITS +#include +#endif + +//@cond RAPIDJSON_INTERNAL +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +// Helper to wrap/convert arbitrary types to void, useful for arbitrary type matching +template struct Void { typedef void Type; }; + +/////////////////////////////////////////////////////////////////////////////// +// BoolType, TrueType, FalseType +// +template struct BoolType { + static const bool Value = Cond; + typedef BoolType Type; +}; +typedef BoolType TrueType; +typedef BoolType FalseType; + + +/////////////////////////////////////////////////////////////////////////////// +// SelectIf, BoolExpr, NotExpr, AndExpr, OrExpr +// + +template struct SelectIfImpl { template struct Apply { typedef T1 Type; }; }; +template <> struct SelectIfImpl { template struct Apply { typedef T2 Type; }; }; +template struct SelectIfCond : SelectIfImpl::template Apply {}; +template struct SelectIf : SelectIfCond {}; + +template struct AndExprCond : FalseType {}; +template <> struct AndExprCond : TrueType {}; +template struct OrExprCond : TrueType {}; +template <> struct OrExprCond : FalseType {}; + +template struct BoolExpr : SelectIf::Type {}; +template struct NotExpr : SelectIf::Type {}; +template struct AndExpr : AndExprCond::Type {}; +template struct OrExpr : OrExprCond::Type {}; + + +/////////////////////////////////////////////////////////////////////////////// +// AddConst, MaybeAddConst, RemoveConst +template struct AddConst { typedef const T Type; }; +template struct MaybeAddConst : SelectIfCond {}; +template struct RemoveConst { typedef T Type; }; +template struct RemoveConst { typedef T Type; }; + + +/////////////////////////////////////////////////////////////////////////////// +// IsSame, IsConst, IsMoreConst, IsPointer +// +template struct IsSame : FalseType {}; +template struct IsSame : TrueType {}; + +template struct IsConst : FalseType {}; +template struct IsConst : TrueType {}; + +template +struct IsMoreConst + : AndExpr::Type, typename RemoveConst::Type>, + BoolType::Value >= IsConst::Value> >::Type {}; + +template struct IsPointer : FalseType {}; +template struct IsPointer : TrueType {}; + +/////////////////////////////////////////////////////////////////////////////// +// IsBaseOf +// +#if RAPIDJSON_HAS_CXX11_TYPETRAITS + +template struct IsBaseOf + : BoolType< ::std::is_base_of::value> {}; + +#else // simplified version adopted from Boost + +template struct IsBaseOfImpl { + RAPIDJSON_STATIC_ASSERT(sizeof(B) != 0); + RAPIDJSON_STATIC_ASSERT(sizeof(D) != 0); + + typedef char (&Yes)[1]; + typedef char (&No) [2]; + + template + static Yes Check(const D*, T); + static No Check(const B*, int); + + struct Host { + operator const B*() const; + operator const D*(); + }; + + enum { Value = (sizeof(Check(Host(), 0)) == sizeof(Yes)) }; +}; + +template struct IsBaseOf + : OrExpr, BoolExpr > >::Type {}; + +#endif // RAPIDJSON_HAS_CXX11_TYPETRAITS + + +////////////////////////////////////////////////////////////////////////// +// EnableIf / DisableIf +// +template struct EnableIfCond { typedef T Type; }; +template struct EnableIfCond { /* empty */ }; + +template struct DisableIfCond { typedef T Type; }; +template struct DisableIfCond { /* empty */ }; + +template +struct EnableIf : EnableIfCond {}; + +template +struct DisableIf : DisableIfCond {}; + +// SFINAE helpers +struct SfinaeTag {}; +template struct RemoveSfinaeTag; +template struct RemoveSfinaeTag { typedef T Type; }; + +#define RAPIDJSON_REMOVEFPTR_(type) \ + typename ::RAPIDJSON_NAMESPACE::internal::RemoveSfinaeTag \ + < ::RAPIDJSON_NAMESPACE::internal::SfinaeTag&(*) type>::Type + +#define RAPIDJSON_ENABLEIF(cond) \ + typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \ + ::Type * = NULL + +#define RAPIDJSON_DISABLEIF(cond) \ + typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \ + ::Type * = NULL + +#define RAPIDJSON_ENABLEIF_RETURN(cond,returntype) \ + typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \ + ::Type + +#define RAPIDJSON_DISABLEIF_RETURN(cond,returntype) \ + typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \ + ::Type + +} // namespace internal +RAPIDJSON_NAMESPACE_END +//@endcond + +#if defined(__GNUC__) || defined(_MSC_VER) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_INTERNAL_META_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/pow10.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/pow10.h new file mode 100644 index 00000000..02f475d7 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/pow10.h @@ -0,0 +1,55 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_POW10_ +#define RAPIDJSON_POW10_ + +#include "../rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +//! Computes integer powers of 10 in double (10.0^n). +/*! This function uses lookup table for fast and accurate results. + \param n non-negative exponent. Must <= 308. + \return 10.0^n +*/ +inline double Pow10(int n) { + static const double e[] = { // 1e-0...1e308: 309 * 8 bytes = 2472 bytes + 1e+0, + 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, + 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, + 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, + 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, + 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100, + 1e+101,1e+102,1e+103,1e+104,1e+105,1e+106,1e+107,1e+108,1e+109,1e+110,1e+111,1e+112,1e+113,1e+114,1e+115,1e+116,1e+117,1e+118,1e+119,1e+120, + 1e+121,1e+122,1e+123,1e+124,1e+125,1e+126,1e+127,1e+128,1e+129,1e+130,1e+131,1e+132,1e+133,1e+134,1e+135,1e+136,1e+137,1e+138,1e+139,1e+140, + 1e+141,1e+142,1e+143,1e+144,1e+145,1e+146,1e+147,1e+148,1e+149,1e+150,1e+151,1e+152,1e+153,1e+154,1e+155,1e+156,1e+157,1e+158,1e+159,1e+160, + 1e+161,1e+162,1e+163,1e+164,1e+165,1e+166,1e+167,1e+168,1e+169,1e+170,1e+171,1e+172,1e+173,1e+174,1e+175,1e+176,1e+177,1e+178,1e+179,1e+180, + 1e+181,1e+182,1e+183,1e+184,1e+185,1e+186,1e+187,1e+188,1e+189,1e+190,1e+191,1e+192,1e+193,1e+194,1e+195,1e+196,1e+197,1e+198,1e+199,1e+200, + 1e+201,1e+202,1e+203,1e+204,1e+205,1e+206,1e+207,1e+208,1e+209,1e+210,1e+211,1e+212,1e+213,1e+214,1e+215,1e+216,1e+217,1e+218,1e+219,1e+220, + 1e+221,1e+222,1e+223,1e+224,1e+225,1e+226,1e+227,1e+228,1e+229,1e+230,1e+231,1e+232,1e+233,1e+234,1e+235,1e+236,1e+237,1e+238,1e+239,1e+240, + 1e+241,1e+242,1e+243,1e+244,1e+245,1e+246,1e+247,1e+248,1e+249,1e+250,1e+251,1e+252,1e+253,1e+254,1e+255,1e+256,1e+257,1e+258,1e+259,1e+260, + 1e+261,1e+262,1e+263,1e+264,1e+265,1e+266,1e+267,1e+268,1e+269,1e+270,1e+271,1e+272,1e+273,1e+274,1e+275,1e+276,1e+277,1e+278,1e+279,1e+280, + 1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300, + 1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308 + }; + RAPIDJSON_ASSERT(n >= 0 && n <= 308); + return e[n]; +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_POW10_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/regex.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/regex.h new file mode 100644 index 00000000..1369ea26 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/regex.h @@ -0,0 +1,731 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_REGEX_H_ +#define RAPIDJSON_INTERNAL_REGEX_H_ + +#include "../allocators.h" +#include "../stream.h" +#include "stack.h" + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(switch-enum) +RAPIDJSON_DIAG_OFF(implicit-fallthrough) +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated +#endif + +#ifndef RAPIDJSON_REGEX_VERBOSE +#define RAPIDJSON_REGEX_VERBOSE 0 +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +/////////////////////////////////////////////////////////////////////////////// +// DecodedStream + +template +class DecodedStream { +public: + DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); } + unsigned Peek() { return codepoint_; } + unsigned Take() { + unsigned c = codepoint_; + if (c) // No further decoding when '\0' + Decode(); + return c; + } + +private: + void Decode() { + if (!Encoding::Decode(ss_, &codepoint_)) + codepoint_ = 0; + } + + SourceStream& ss_; + unsigned codepoint_; +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericRegex + +static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1 +static const SizeType kRegexInvalidRange = ~SizeType(0); + +template +class GenericRegexSearch; + +//! Regular expression engine with subset of ECMAscript grammar. +/*! + Supported regular expression syntax: + - \c ab Concatenation + - \c a|b Alternation + - \c a? Zero or one + - \c a* Zero or more + - \c a+ One or more + - \c a{3} Exactly 3 times + - \c a{3,} At least 3 times + - \c a{3,5} 3 to 5 times + - \c (ab) Grouping + - \c ^a At the beginning + - \c a$ At the end + - \c . Any character + - \c [abc] Character classes + - \c [a-c] Character class range + - \c [a-z0-9_] Character class combination + - \c [^abc] Negated character classes + - \c [^a-c] Negated character class range + - \c [\b] Backspace (U+0008) + - \c \\| \\\\ ... Escape characters + - \c \\f Form feed (U+000C) + - \c \\n Line feed (U+000A) + - \c \\r Carriage return (U+000D) + - \c \\t Tab (U+0009) + - \c \\v Vertical tab (U+000B) + + \note This is a Thompson NFA engine, implemented with reference to + Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).", + https://swtch.com/~rsc/regexp/regexp1.html +*/ +template +class GenericRegex { +public: + typedef Encoding EncodingType; + typedef typename Encoding::Ch Ch; + template friend class GenericRegexSearch; + + GenericRegex(const Ch* source, Allocator* allocator = 0) : + states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), + anchorBegin_(), anchorEnd_() + { + GenericStringStream ss(source); + DecodedStream, Encoding> ds(ss); + Parse(ds); + } + + ~GenericRegex() {} + + bool IsValid() const { + return root_ != kRegexInvalidState; + } + +private: + enum Operator { + kZeroOrOne, + kZeroOrMore, + kOneOrMore, + kConcatenation, + kAlternation, + kLeftParenthesis + }; + + static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.' + static const unsigned kRangeCharacterClass = 0xFFFFFFFE; + static const unsigned kRangeNegationFlag = 0x80000000; + + struct Range { + unsigned start; // + unsigned end; + SizeType next; + }; + + struct State { + SizeType out; //!< Equals to kInvalid for matching state + SizeType out1; //!< Equals to non-kInvalid for split + SizeType rangeStart; + unsigned codepoint; + }; + + struct Frag { + Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {} + SizeType start; + SizeType out; //!< link-list of all output states + SizeType minIndex; + }; + + State& GetState(SizeType index) { + RAPIDJSON_ASSERT(index < stateCount_); + return states_.template Bottom()[index]; + } + + const State& GetState(SizeType index) const { + RAPIDJSON_ASSERT(index < stateCount_); + return states_.template Bottom()[index]; + } + + Range& GetRange(SizeType index) { + RAPIDJSON_ASSERT(index < rangeCount_); + return ranges_.template Bottom()[index]; + } + + const Range& GetRange(SizeType index) const { + RAPIDJSON_ASSERT(index < rangeCount_); + return ranges_.template Bottom()[index]; + } + + template + void Parse(DecodedStream& ds) { + Allocator allocator; + Stack operandStack(&allocator, 256); // Frag + Stack operatorStack(&allocator, 256); // Operator + Stack atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis) + + *atomCountStack.template Push() = 0; + + unsigned codepoint; + while (ds.Peek() != 0) { + switch (codepoint = ds.Take()) { + case '^': + anchorBegin_ = true; + break; + + case '$': + anchorEnd_ = true; + break; + + case '|': + while (!operatorStack.Empty() && *operatorStack.template Top() < kAlternation) + if (!Eval(operandStack, *operatorStack.template Pop(1))) + return; + *operatorStack.template Push() = kAlternation; + *atomCountStack.template Top() = 0; + break; + + case '(': + *operatorStack.template Push() = kLeftParenthesis; + *atomCountStack.template Push() = 0; + break; + + case ')': + while (!operatorStack.Empty() && *operatorStack.template Top() != kLeftParenthesis) + if (!Eval(operandStack, *operatorStack.template Pop(1))) + return; + if (operatorStack.Empty()) + return; + operatorStack.template Pop(1); + atomCountStack.template Pop(1); + ImplicitConcatenation(atomCountStack, operatorStack); + break; + + case '?': + if (!Eval(operandStack, kZeroOrOne)) + return; + break; + + case '*': + if (!Eval(operandStack, kZeroOrMore)) + return; + break; + + case '+': + if (!Eval(operandStack, kOneOrMore)) + return; + break; + + case '{': + { + unsigned n, m; + if (!ParseUnsigned(ds, &n)) + return; + + if (ds.Peek() == ',') { + ds.Take(); + if (ds.Peek() == '}') + m = kInfinityQuantifier; + else if (!ParseUnsigned(ds, &m) || m < n) + return; + } + else + m = n; + + if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}') + return; + ds.Take(); + } + break; + + case '.': + PushOperand(operandStack, kAnyCharacterClass); + ImplicitConcatenation(atomCountStack, operatorStack); + break; + + case '[': + { + SizeType range; + if (!ParseRange(ds, &range)) + return; + SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass); + GetState(s).rangeStart = range; + *operandStack.template Push() = Frag(s, s, s); + } + ImplicitConcatenation(atomCountStack, operatorStack); + break; + + case '\\': // Escape character + if (!CharacterEscape(ds, &codepoint)) + return; // Unsupported escape character + // fall through to default + + default: // Pattern character + PushOperand(operandStack, codepoint); + ImplicitConcatenation(atomCountStack, operatorStack); + } + } + + while (!operatorStack.Empty()) + if (!Eval(operandStack, *operatorStack.template Pop(1))) + return; + + // Link the operand to matching state. + if (operandStack.GetSize() == sizeof(Frag)) { + Frag* e = operandStack.template Pop(1); + Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0)); + root_ = e->start; + +#if RAPIDJSON_REGEX_VERBOSE + printf("root: %d\n", root_); + for (SizeType i = 0; i < stateCount_ ; i++) { + State& s = GetState(i); + printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint); + } + printf("\n"); +#endif + } + } + + SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) { + State* s = states_.template Push(); + s->out = out; + s->out1 = out1; + s->codepoint = codepoint; + s->rangeStart = kRegexInvalidRange; + return stateCount_++; + } + + void PushOperand(Stack& operandStack, unsigned codepoint) { + SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint); + *operandStack.template Push() = Frag(s, s, s); + } + + void ImplicitConcatenation(Stack& atomCountStack, Stack& operatorStack) { + if (*atomCountStack.template Top()) + *operatorStack.template Push() = kConcatenation; + (*atomCountStack.template Top())++; + } + + SizeType Append(SizeType l1, SizeType l2) { + SizeType old = l1; + while (GetState(l1).out != kRegexInvalidState) + l1 = GetState(l1).out; + GetState(l1).out = l2; + return old; + } + + void Patch(SizeType l, SizeType s) { + for (SizeType next; l != kRegexInvalidState; l = next) { + next = GetState(l).out; + GetState(l).out = s; + } + } + + bool Eval(Stack& operandStack, Operator op) { + switch (op) { + case kConcatenation: + RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2); + { + Frag e2 = *operandStack.template Pop(1); + Frag e1 = *operandStack.template Pop(1); + Patch(e1.out, e2.start); + *operandStack.template Push() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex)); + } + return true; + + case kAlternation: + if (operandStack.GetSize() >= sizeof(Frag) * 2) { + Frag e2 = *operandStack.template Pop(1); + Frag e1 = *operandStack.template Pop(1); + SizeType s = NewState(e1.start, e2.start, 0); + *operandStack.template Push() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex)); + return true; + } + return false; + + case kZeroOrOne: + if (operandStack.GetSize() >= sizeof(Frag)) { + Frag e = *operandStack.template Pop(1); + SizeType s = NewState(kRegexInvalidState, e.start, 0); + *operandStack.template Push() = Frag(s, Append(e.out, s), e.minIndex); + return true; + } + return false; + + case kZeroOrMore: + if (operandStack.GetSize() >= sizeof(Frag)) { + Frag e = *operandStack.template Pop(1); + SizeType s = NewState(kRegexInvalidState, e.start, 0); + Patch(e.out, s); + *operandStack.template Push() = Frag(s, s, e.minIndex); + return true; + } + return false; + + default: + RAPIDJSON_ASSERT(op == kOneOrMore); + if (operandStack.GetSize() >= sizeof(Frag)) { + Frag e = *operandStack.template Pop(1); + SizeType s = NewState(kRegexInvalidState, e.start, 0); + Patch(e.out, s); + *operandStack.template Push() = Frag(e.start, s, e.minIndex); + return true; + } + return false; + } + } + + bool EvalQuantifier(Stack& operandStack, unsigned n, unsigned m) { + RAPIDJSON_ASSERT(n <= m); + RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag)); + + if (n == 0) { + if (m == 0) // a{0} not support + return false; + else if (m == kInfinityQuantifier) + Eval(operandStack, kZeroOrMore); // a{0,} -> a* + else { + Eval(operandStack, kZeroOrOne); // a{0,5} -> a? + for (unsigned i = 0; i < m - 1; i++) + CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a? + for (unsigned i = 0; i < m - 1; i++) + Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a? + } + return true; + } + + for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a + CloneTopOperand(operandStack); + + if (m == kInfinityQuantifier) + Eval(operandStack, kOneOrMore); // a{3,} -> a a a+ + else if (m > n) { + CloneTopOperand(operandStack); // a{3,5} -> a a a a + Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a? + for (unsigned i = n; i < m - 1; i++) + CloneTopOperand(operandStack); // a{3,5} -> a a a a? a? + for (unsigned i = n; i < m; i++) + Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a? + } + + for (unsigned i = 0; i < n - 1; i++) + Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a? + + return true; + } + + static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; } + + void CloneTopOperand(Stack& operandStack) { + const Frag src = *operandStack.template Top(); // Copy constructor to prevent invalidation + SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_) + State* s = states_.template Push(count); + memcpy(s, &GetState(src.minIndex), count * sizeof(State)); + for (SizeType j = 0; j < count; j++) { + if (s[j].out != kRegexInvalidState) + s[j].out += count; + if (s[j].out1 != kRegexInvalidState) + s[j].out1 += count; + } + *operandStack.template Push() = Frag(src.start + count, src.out + count, src.minIndex + count); + stateCount_ += count; + } + + template + bool ParseUnsigned(DecodedStream& ds, unsigned* u) { + unsigned r = 0; + if (ds.Peek() < '0' || ds.Peek() > '9') + return false; + while (ds.Peek() >= '0' && ds.Peek() <= '9') { + if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295 + return false; // overflow + r = r * 10 + (ds.Take() - '0'); + } + *u = r; + return true; + } + + template + bool ParseRange(DecodedStream& ds, SizeType* range) { + bool isBegin = true; + bool negate = false; + int step = 0; + SizeType start = kRegexInvalidRange; + SizeType current = kRegexInvalidRange; + unsigned codepoint; + while ((codepoint = ds.Take()) != 0) { + if (isBegin) { + isBegin = false; + if (codepoint == '^') { + negate = true; + continue; + } + } + + switch (codepoint) { + case ']': + if (start == kRegexInvalidRange) + return false; // Error: nothing inside [] + if (step == 2) { // Add trailing '-' + SizeType r = NewRange('-'); + RAPIDJSON_ASSERT(current != kRegexInvalidRange); + GetRange(current).next = r; + } + if (negate) + GetRange(start).start |= kRangeNegationFlag; + *range = start; + return true; + + case '\\': + if (ds.Peek() == 'b') { + ds.Take(); + codepoint = 0x0008; // Escape backspace character + } + else if (!CharacterEscape(ds, &codepoint)) + return false; + // fall through to default + + default: + switch (step) { + case 1: + if (codepoint == '-') { + step++; + break; + } + // fall through to step 0 for other characters + + case 0: + { + SizeType r = NewRange(codepoint); + if (current != kRegexInvalidRange) + GetRange(current).next = r; + if (start == kRegexInvalidRange) + start = r; + current = r; + } + step = 1; + break; + + default: + RAPIDJSON_ASSERT(step == 2); + GetRange(current).end = codepoint; + step = 0; + } + } + } + return false; + } + + SizeType NewRange(unsigned codepoint) { + Range* r = ranges_.template Push(); + r->start = r->end = codepoint; + r->next = kRegexInvalidRange; + return rangeCount_++; + } + + template + bool CharacterEscape(DecodedStream& ds, unsigned* escapedCodepoint) { + unsigned codepoint; + switch (codepoint = ds.Take()) { + case '^': + case '$': + case '|': + case '(': + case ')': + case '?': + case '*': + case '+': + case '.': + case '[': + case ']': + case '{': + case '}': + case '\\': + *escapedCodepoint = codepoint; return true; + case 'f': *escapedCodepoint = 0x000C; return true; + case 'n': *escapedCodepoint = 0x000A; return true; + case 'r': *escapedCodepoint = 0x000D; return true; + case 't': *escapedCodepoint = 0x0009; return true; + case 'v': *escapedCodepoint = 0x000B; return true; + default: + return false; // Unsupported escape character + } + } + + Stack states_; + Stack ranges_; + SizeType root_; + SizeType stateCount_; + SizeType rangeCount_; + + static const unsigned kInfinityQuantifier = ~0u; + + // For SearchWithAnchoring() + bool anchorBegin_; + bool anchorEnd_; +}; + +template +class GenericRegexSearch { +public: + typedef typename RegexType::EncodingType Encoding; + typedef typename Encoding::Ch Ch; + + GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) : + regex_(regex), allocator_(allocator), ownAllocator_(0), + state0_(allocator, 0), state1_(allocator, 0), stateSet_() + { + RAPIDJSON_ASSERT(regex_.IsValid()); + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); + stateSet_ = static_cast(allocator_->Malloc(GetStateSetSize())); + state0_.template Reserve(regex_.stateCount_); + state1_.template Reserve(regex_.stateCount_); + } + + ~GenericRegexSearch() { + Allocator::Free(stateSet_); + RAPIDJSON_DELETE(ownAllocator_); + } + + template + bool Match(InputStream& is) { + return SearchWithAnchoring(is, true, true); + } + + bool Match(const Ch* s) { + GenericStringStream is(s); + return Match(is); + } + + template + bool Search(InputStream& is) { + return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_); + } + + bool Search(const Ch* s) { + GenericStringStream is(s); + return Search(is); + } + +private: + typedef typename RegexType::State State; + typedef typename RegexType::Range Range; + + template + bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) { + DecodedStream ds(is); + + state0_.Clear(); + Stack *current = &state0_, *next = &state1_; + const size_t stateSetSize = GetStateSetSize(); + std::memset(stateSet_, 0, stateSetSize); + + bool matched = AddState(*current, regex_.root_); + unsigned codepoint; + while (!current->Empty() && (codepoint = ds.Take()) != 0) { + std::memset(stateSet_, 0, stateSetSize); + next->Clear(); + matched = false; + for (const SizeType* s = current->template Bottom(); s != current->template End(); ++s) { + const State& sr = regex_.GetState(*s); + if (sr.codepoint == codepoint || + sr.codepoint == RegexType::kAnyCharacterClass || + (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint))) + { + matched = AddState(*next, sr.out) || matched; + if (!anchorEnd && matched) + return true; + } + if (!anchorBegin) + AddState(*next, regex_.root_); + } + internal::Swap(current, next); + } + + return matched; + } + + size_t GetStateSetSize() const { + return (regex_.stateCount_ + 31) / 32 * 4; + } + + // Return whether the added states is a match state + bool AddState(Stack& l, SizeType index) { + RAPIDJSON_ASSERT(index != kRegexInvalidState); + + const State& s = regex_.GetState(index); + if (s.out1 != kRegexInvalidState) { // Split + bool matched = AddState(l, s.out); + return AddState(l, s.out1) || matched; + } + else if (!(stateSet_[index >> 5] & (1u << (index & 31)))) { + stateSet_[index >> 5] |= (1u << (index & 31)); + *l.template PushUnsafe() = index; + } + return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation. + } + + bool MatchRange(SizeType rangeIndex, unsigned codepoint) const { + bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0; + while (rangeIndex != kRegexInvalidRange) { + const Range& r = regex_.GetRange(rangeIndex); + if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end) + return yes; + rangeIndex = r.next; + } + return !yes; + } + + const RegexType& regex_; + Allocator* allocator_; + Allocator* ownAllocator_; + Stack state0_; + Stack state1_; + uint32_t* stateSet_; +}; + +typedef GenericRegex > Regex; +typedef GenericRegexSearch RegexSearch; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_INTERNAL_REGEX_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/stack.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/stack.h new file mode 100644 index 00000000..5c5398c3 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/stack.h @@ -0,0 +1,231 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_STACK_H_ +#define RAPIDJSON_INTERNAL_STACK_H_ + +#include "../allocators.h" +#include "swap.h" + +#if defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +/////////////////////////////////////////////////////////////////////////////// +// Stack + +//! A type-unsafe stack for storing different types of data. +/*! \tparam Allocator Allocator for allocating stack memory. +*/ +template +class Stack { +public: + // Optimization note: Do not allocate memory for stack_ in constructor. + // Do it lazily when first Push() -> Expand() -> Resize(). + Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) { + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + Stack(Stack&& rhs) + : allocator_(rhs.allocator_), + ownAllocator_(rhs.ownAllocator_), + stack_(rhs.stack_), + stackTop_(rhs.stackTop_), + stackEnd_(rhs.stackEnd_), + initialCapacity_(rhs.initialCapacity_) + { + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.stack_ = 0; + rhs.stackTop_ = 0; + rhs.stackEnd_ = 0; + rhs.initialCapacity_ = 0; + } +#endif + + ~Stack() { + Destroy(); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + Stack& operator=(Stack&& rhs) { + if (&rhs != this) + { + Destroy(); + + allocator_ = rhs.allocator_; + ownAllocator_ = rhs.ownAllocator_; + stack_ = rhs.stack_; + stackTop_ = rhs.stackTop_; + stackEnd_ = rhs.stackEnd_; + initialCapacity_ = rhs.initialCapacity_; + + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.stack_ = 0; + rhs.stackTop_ = 0; + rhs.stackEnd_ = 0; + rhs.initialCapacity_ = 0; + } + return *this; + } +#endif + + void Swap(Stack& rhs) RAPIDJSON_NOEXCEPT { + internal::Swap(allocator_, rhs.allocator_); + internal::Swap(ownAllocator_, rhs.ownAllocator_); + internal::Swap(stack_, rhs.stack_); + internal::Swap(stackTop_, rhs.stackTop_); + internal::Swap(stackEnd_, rhs.stackEnd_); + internal::Swap(initialCapacity_, rhs.initialCapacity_); + } + + void Clear() { stackTop_ = stack_; } + + void ShrinkToFit() { + if (Empty()) { + // If the stack is empty, completely deallocate the memory. + Allocator::Free(stack_); + stack_ = 0; + stackTop_ = 0; + stackEnd_ = 0; + } + else + Resize(GetSize()); + } + + // Optimization note: try to minimize the size of this function for force inline. + // Expansion is run very infrequently, so it is moved to another (probably non-inline) function. + template + RAPIDJSON_FORCEINLINE void Reserve(size_t count = 1) { + // Expand the stack if needed + if (RAPIDJSON_UNLIKELY(stackTop_ + sizeof(T) * count > stackEnd_)) + Expand(count); + } + + template + RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) { + Reserve(count); + return PushUnsafe(count); + } + + template + RAPIDJSON_FORCEINLINE T* PushUnsafe(size_t count = 1) { + RAPIDJSON_ASSERT(stackTop_); + RAPIDJSON_ASSERT(stackTop_ + sizeof(T) * count <= stackEnd_); + T* ret = reinterpret_cast(stackTop_); + stackTop_ += sizeof(T) * count; + return ret; + } + + template + T* Pop(size_t count) { + RAPIDJSON_ASSERT(GetSize() >= count * sizeof(T)); + stackTop_ -= count * sizeof(T); + return reinterpret_cast(stackTop_); + } + + template + T* Top() { + RAPIDJSON_ASSERT(GetSize() >= sizeof(T)); + return reinterpret_cast(stackTop_ - sizeof(T)); + } + + template + const T* Top() const { + RAPIDJSON_ASSERT(GetSize() >= sizeof(T)); + return reinterpret_cast(stackTop_ - sizeof(T)); + } + + template + T* End() { return reinterpret_cast(stackTop_); } + + template + const T* End() const { return reinterpret_cast(stackTop_); } + + template + T* Bottom() { return reinterpret_cast(stack_); } + + template + const T* Bottom() const { return reinterpret_cast(stack_); } + + bool HasAllocator() const { + return allocator_ != 0; + } + + Allocator& GetAllocator() { + RAPIDJSON_ASSERT(allocator_); + return *allocator_; + } + + bool Empty() const { return stackTop_ == stack_; } + size_t GetSize() const { return static_cast(stackTop_ - stack_); } + size_t GetCapacity() const { return static_cast(stackEnd_ - stack_); } + +private: + template + void Expand(size_t count) { + // Only expand the capacity if the current stack exists. Otherwise just create a stack with initial capacity. + size_t newCapacity; + if (stack_ == 0) { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); + newCapacity = initialCapacity_; + } else { + newCapacity = GetCapacity(); + newCapacity += (newCapacity + 1) / 2; + } + size_t newSize = GetSize() + sizeof(T) * count; + if (newCapacity < newSize) + newCapacity = newSize; + + Resize(newCapacity); + } + + void Resize(size_t newCapacity) { + const size_t size = GetSize(); // Backup the current size + stack_ = static_cast(allocator_->Realloc(stack_, GetCapacity(), newCapacity)); + stackTop_ = stack_ + size; + stackEnd_ = stack_ + newCapacity; + } + + void Destroy() { + Allocator::Free(stack_); + RAPIDJSON_DELETE(ownAllocator_); // Only delete if it is owned by the stack + } + + // Prohibit copy constructor & assignment operator. + Stack(const Stack&); + Stack& operator=(const Stack&); + + Allocator* allocator_; + Allocator* ownAllocator_; + char *stack_; + char *stackTop_; + char *stackEnd_; + size_t initialCapacity_; +}; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_STACK_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/strfunc.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/strfunc.h new file mode 100644 index 00000000..de41d8f9 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/strfunc.h @@ -0,0 +1,58 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_STRFUNC_H_ +#define RAPIDJSON_INTERNAL_STRFUNC_H_ + +#include "../stream.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +//! Custom strlen() which works on different character types. +/*! \tparam Ch Character type (e.g. char, wchar_t, short) + \param s Null-terminated input string. + \return Number of characters in the string. + \note This has the same semantics as strlen(), the return value is not number of Unicode codepoints. +*/ +template +inline SizeType StrLen(const Ch* s) { + RAPIDJSON_ASSERT(s != 0); + const Ch* p = s; + while (*p) ++p; + return SizeType(p - s); +} + +//! Returns number of code points in a encoded string. +template +bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount) { + RAPIDJSON_ASSERT(s != 0); + RAPIDJSON_ASSERT(outCount != 0); + GenericStringStream is(s); + const typename Encoding::Ch* end = s + length; + SizeType count = 0; + while (is.src_ < end) { + unsigned codepoint; + if (!Encoding::Decode(is, &codepoint)) + return false; + count++; + } + *outCount = count; + return true; +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_INTERNAL_STRFUNC_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/strtod.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/strtod.h new file mode 100644 index 00000000..adf49e34 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/strtod.h @@ -0,0 +1,269 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_STRTOD_ +#define RAPIDJSON_STRTOD_ + +#include "ieee754.h" +#include "biginteger.h" +#include "diyfp.h" +#include "pow10.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +inline double FastPath(double significand, int exp) { + if (exp < -308) + return 0.0; + else if (exp >= 0) + return significand * internal::Pow10(exp); + else + return significand / internal::Pow10(-exp); +} + +inline double StrtodNormalPrecision(double d, int p) { + if (p < -308) { + // Prevent expSum < -308, making Pow10(p) = 0 + d = FastPath(d, -308); + d = FastPath(d, p + 308); + } + else + d = FastPath(d, p); + return d; +} + +template +inline T Min3(T a, T b, T c) { + T m = a; + if (m > b) m = b; + if (m > c) m = c; + return m; +} + +inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp) { + const Double db(b); + const uint64_t bInt = db.IntegerSignificand(); + const int bExp = db.IntegerExponent(); + const int hExp = bExp - 1; + + int dS_Exp2 = 0, dS_Exp5 = 0, bS_Exp2 = 0, bS_Exp5 = 0, hS_Exp2 = 0, hS_Exp5 = 0; + + // Adjust for decimal exponent + if (dExp >= 0) { + dS_Exp2 += dExp; + dS_Exp5 += dExp; + } + else { + bS_Exp2 -= dExp; + bS_Exp5 -= dExp; + hS_Exp2 -= dExp; + hS_Exp5 -= dExp; + } + + // Adjust for binary exponent + if (bExp >= 0) + bS_Exp2 += bExp; + else { + dS_Exp2 -= bExp; + hS_Exp2 -= bExp; + } + + // Adjust for half ulp exponent + if (hExp >= 0) + hS_Exp2 += hExp; + else { + dS_Exp2 -= hExp; + bS_Exp2 -= hExp; + } + + // Remove common power of two factor from all three scaled values + int common_Exp2 = Min3(dS_Exp2, bS_Exp2, hS_Exp2); + dS_Exp2 -= common_Exp2; + bS_Exp2 -= common_Exp2; + hS_Exp2 -= common_Exp2; + + BigInteger dS = d; + dS.MultiplyPow5(static_cast(dS_Exp5)) <<= static_cast(dS_Exp2); + + BigInteger bS(bInt); + bS.MultiplyPow5(static_cast(bS_Exp5)) <<= static_cast(bS_Exp2); + + BigInteger hS(1); + hS.MultiplyPow5(static_cast(hS_Exp5)) <<= static_cast(hS_Exp2); + + BigInteger delta(0); + dS.Difference(bS, &delta); + + return delta.Compare(hS); +} + +inline bool StrtodFast(double d, int p, double* result) { + // Use fast path for string-to-double conversion if possible + // see http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + if (p > 22 && p < 22 + 16) { + // Fast Path Cases In Disguise + d *= internal::Pow10(p - 22); + p = 22; + } + + if (p >= -22 && p <= 22 && d <= 9007199254740991.0) { // 2^53 - 1 + *result = FastPath(d, p); + return true; + } + else + return false; +} + +// Compute an approximation and see if it is within 1/2 ULP +inline bool StrtodDiyFp(const char* decimals, size_t length, size_t decimalPosition, int exp, double* result) { + uint64_t significand = 0; + size_t i = 0; // 2^64 - 1 = 18446744073709551615, 1844674407370955161 = 0x1999999999999999 + for (; i < length; i++) { + if (significand > RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || + (significand == RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) && decimals[i] > '5')) + break; + significand = significand * 10u + static_cast(decimals[i] - '0'); + } + + if (i < length && decimals[i] >= '5') // Rounding + significand++; + + size_t remaining = length - i; + const int kUlpShift = 3; + const int kUlp = 1 << kUlpShift; + int64_t error = (remaining == 0) ? 0 : kUlp / 2; + + DiyFp v(significand, 0); + v = v.Normalize(); + error <<= -v.e; + + const int dExp = static_cast(decimalPosition) - static_cast(i) + exp; + + int actualExp; + DiyFp cachedPower = GetCachedPower10(dExp, &actualExp); + if (actualExp != dExp) { + static const DiyFp kPow10[] = { + DiyFp(RAPIDJSON_UINT64_C2(0xa0000000, 00000000), -60), // 10^1 + DiyFp(RAPIDJSON_UINT64_C2(0xc8000000, 00000000), -57), // 10^2 + DiyFp(RAPIDJSON_UINT64_C2(0xfa000000, 00000000), -54), // 10^3 + DiyFp(RAPIDJSON_UINT64_C2(0x9c400000, 00000000), -50), // 10^4 + DiyFp(RAPIDJSON_UINT64_C2(0xc3500000, 00000000), -47), // 10^5 + DiyFp(RAPIDJSON_UINT64_C2(0xf4240000, 00000000), -44), // 10^6 + DiyFp(RAPIDJSON_UINT64_C2(0x98968000, 00000000), -40) // 10^7 + }; + int adjustment = dExp - actualExp - 1; + RAPIDJSON_ASSERT(adjustment >= 0 && adjustment < 7); + v = v * kPow10[adjustment]; + if (length + static_cast(adjustment)> 19u) // has more digits than decimal digits in 64-bit + error += kUlp / 2; + } + + v = v * cachedPower; + + error += kUlp + (error == 0 ? 0 : 1); + + const int oldExp = v.e; + v = v.Normalize(); + error <<= oldExp - v.e; + + const int effectiveSignificandSize = Double::EffectiveSignificandSize(64 + v.e); + int precisionSize = 64 - effectiveSignificandSize; + if (precisionSize + kUlpShift >= 64) { + int scaleExp = (precisionSize + kUlpShift) - 63; + v.f >>= scaleExp; + v.e += scaleExp; + error = (error >> scaleExp) + 1 + kUlp; + precisionSize -= scaleExp; + } + + DiyFp rounded(v.f >> precisionSize, v.e + precisionSize); + const uint64_t precisionBits = (v.f & ((uint64_t(1) << precisionSize) - 1)) * kUlp; + const uint64_t halfWay = (uint64_t(1) << (precisionSize - 1)) * kUlp; + if (precisionBits >= halfWay + static_cast(error)) { + rounded.f++; + if (rounded.f & (DiyFp::kDpHiddenBit << 1)) { // rounding overflows mantissa (issue #340) + rounded.f >>= 1; + rounded.e++; + } + } + + *result = rounded.ToDouble(); + + return halfWay - static_cast(error) >= precisionBits || precisionBits >= halfWay + static_cast(error); +} + +inline double StrtodBigInteger(double approx, const char* decimals, size_t length, size_t decimalPosition, int exp) { + const BigInteger dInt(decimals, length); + const int dExp = static_cast(decimalPosition) - static_cast(length) + exp; + Double a(approx); + int cmp = CheckWithinHalfULP(a.Value(), dInt, dExp); + if (cmp < 0) + return a.Value(); // within half ULP + else if (cmp == 0) { + // Round towards even + if (a.Significand() & 1) + return a.NextPositiveDouble(); + else + return a.Value(); + } + else // adjustment + return a.NextPositiveDouble(); +} + +inline double StrtodFullPrecision(double d, int p, const char* decimals, size_t length, size_t decimalPosition, int exp) { + RAPIDJSON_ASSERT(d >= 0.0); + RAPIDJSON_ASSERT(length >= 1); + + double result; + if (StrtodFast(d, p, &result)) + return result; + + // Trim leading zeros + while (*decimals == '0' && length > 1) { + length--; + decimals++; + decimalPosition--; + } + + // Trim trailing zeros + while (decimals[length - 1] == '0' && length > 1) { + length--; + decimalPosition--; + exp++; + } + + // Trim right-most digits + const int kMaxDecimalDigit = 780; + if (static_cast(length) > kMaxDecimalDigit) { + int delta = (static_cast(length) - kMaxDecimalDigit); + exp += delta; + decimalPosition -= static_cast(delta); + length = kMaxDecimalDigit; + } + + // If too small, underflow to zero + if (int(length) + exp < -324) + return 0.0; + + if (StrtodDiyFp(decimals, length, decimalPosition, exp, &result)) + return result; + + // Use approximation from StrtodDiyFp and make adjustment with BigInteger comparison + return StrtodBigInteger(result, decimals, length, decimalPosition, exp); +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_STRTOD_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/internal/swap.h b/c++/include/misc/jsonwrapp/rapidjson11/internal/swap.h new file mode 100644 index 00000000..666e49f9 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/internal/swap.h @@ -0,0 +1,46 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_SWAP_H_ +#define RAPIDJSON_INTERNAL_SWAP_H_ + +#include "../rapidjson.h" + +#if defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +//! Custom swap() to avoid dependency on C++ header +/*! \tparam T Type of the arguments to swap, should be instantiated with primitive C++ types only. + \note This has the same semantics as std::swap(). +*/ +template +inline void Swap(T& a, T& b) RAPIDJSON_NOEXCEPT { + T tmp = a; + a = b; + b = tmp; +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_INTERNAL_SWAP_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/istreamwrapper.h b/c++/include/misc/jsonwrapp/rapidjson11/istreamwrapper.h new file mode 100644 index 00000000..8639c8c3 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/istreamwrapper.h @@ -0,0 +1,115 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ISTREAMWRAPPER_H_ +#define RAPIDJSON_ISTREAMWRAPPER_H_ + +#include "stream.h" +#include + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4351) // new behavior: elements of array 'array' will be default initialized +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Wrapper of \c std::basic_istream into RapidJSON's Stream concept. +/*! + The classes can be wrapped including but not limited to: + + - \c std::istringstream + - \c std::stringstream + - \c std::wistringstream + - \c std::wstringstream + - \c std::ifstream + - \c std::fstream + - \c std::wifstream + - \c std::wfstream + + \tparam StreamType Class derived from \c std::basic_istream. +*/ + +template +class BasicIStreamWrapper { +public: + typedef typename StreamType::char_type Ch; + BasicIStreamWrapper(StreamType& stream) : stream_(stream), count_(), peekBuffer_() {} + + Ch Peek() const { + typename StreamType::int_type c = stream_.peek(); + return RAPIDJSON_LIKELY(c != StreamType::traits_type::eof()) ? static_cast(c) : static_cast('\0'); + } + + Ch Take() { + typename StreamType::int_type c = stream_.get(); + if (RAPIDJSON_LIKELY(c != StreamType::traits_type::eof())) { + count_++; + return static_cast(c); + } + else + return '\0'; + } + + // tellg() may return -1 when failed. So we count by ourself. + size_t Tell() const { return count_; } + + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + // For encoding detection only. + const Ch* Peek4() const { + RAPIDJSON_ASSERT(sizeof(Ch) == 1); // Only usable for byte stream. + int i; + bool hasError = false; + for (i = 0; i < 4; ++i) { + typename StreamType::int_type c = stream_.get(); + if (c == StreamType::traits_type::eof()) { + hasError = true; + stream_.clear(); + break; + } + peekBuffer_[i] = static_cast(c); + } + for (--i; i >= 0; --i) + stream_.putback(peekBuffer_[i]); + return !hasError ? peekBuffer_ : 0; + } + +private: + BasicIStreamWrapper(const BasicIStreamWrapper&); + BasicIStreamWrapper& operator=(const BasicIStreamWrapper&); + + StreamType& stream_; + size_t count_; //!< Number of characters read. Note: + mutable Ch peekBuffer_[4]; +}; + +typedef BasicIStreamWrapper IStreamWrapper; +typedef BasicIStreamWrapper WIStreamWrapper; + +#if defined(__clang__) || defined(_MSC_VER) +RAPIDJSON_DIAG_POP +#endif + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_ISTREAMWRAPPER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/license.txt b/c++/include/misc/jsonwrapp/rapidjson11/license.txt new file mode 100644 index 00000000..7ccc161c --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/license.txt @@ -0,0 +1,57 @@ +Tencent is pleased to support the open source community by making RapidJSON available. + +Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. + +If you have downloaded a copy of the RapidJSON binary from Tencent, please note that the RapidJSON binary is licensed under the MIT License. +If you have downloaded a copy of the RapidJSON source code from Tencent, please note that RapidJSON source code is licensed under the MIT License, except for the third-party components listed below which are subject to different license terms. Your integration of RapidJSON into your own projects may require compliance with the MIT License, as well as the other licenses applicable to the third-party components included within RapidJSON. To avoid the problematic JSON license in your own projects, it's sufficient to exclude the bin/jsonchecker/ directory, as it's the only code under the JSON license. +A copy of the MIT License is included in this file. + +Other dependencies and licenses: + +Open Source Software Licensed Under the BSD License: +-------------------------------------------------------------------- + +The msinttypes r29 +Copyright (c) 2006-2013 Alexander Chemeris +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +* Neither the name of copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Open Source Software Licensed Under the JSON License: +-------------------------------------------------------------------- + +json.org +Copyright (c) 2002 JSON.org +All Rights Reserved. + +JSON_checker +Copyright (c) 2002 JSON.org +All Rights Reserved. + + +Terms of the JSON License: +--------------------------------------------------- + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +Terms of the MIT License: +-------------------------------------------------------------------- + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/c++/include/misc/jsonwrapp/rapidjson11/memorybuffer.h b/c++/include/misc/jsonwrapp/rapidjson11/memorybuffer.h new file mode 100644 index 00000000..39bee1de --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/memorybuffer.h @@ -0,0 +1,70 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_MEMORYBUFFER_H_ +#define RAPIDJSON_MEMORYBUFFER_H_ + +#include "stream.h" +#include "internal/stack.h" + +RAPIDJSON_NAMESPACE_BEGIN + +//! Represents an in-memory output byte stream. +/*! + This class is mainly for being wrapped by EncodedOutputStream or AutoUTFOutputStream. + + It is similar to FileWriteBuffer but the destination is an in-memory buffer instead of a file. + + Differences between MemoryBuffer and StringBuffer: + 1. StringBuffer has Encoding but MemoryBuffer is only a byte buffer. + 2. StringBuffer::GetString() returns a null-terminated string. MemoryBuffer::GetBuffer() returns a buffer without terminator. + + \tparam Allocator type for allocating memory buffer. + \note implements Stream concept +*/ +template +struct GenericMemoryBuffer { + typedef char Ch; // byte + + GenericMemoryBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} + + void Put(Ch c) { *stack_.template Push() = c; } + void Flush() {} + + void Clear() { stack_.Clear(); } + void ShrinkToFit() { stack_.ShrinkToFit(); } + Ch* Push(size_t count) { return stack_.template Push(count); } + void Pop(size_t count) { stack_.template Pop(count); } + + const Ch* GetBuffer() const { + return stack_.template Bottom(); + } + + size_t GetSize() const { return stack_.GetSize(); } + + static const size_t kDefaultCapacity = 256; + mutable internal::Stack stack_; +}; + +typedef GenericMemoryBuffer<> MemoryBuffer; + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(MemoryBuffer& memoryBuffer, char c, size_t n) { + std::memset(memoryBuffer.stack_.Push(n), c, n * sizeof(c)); +} + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_MEMORYBUFFER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/memorystream.h b/c++/include/misc/jsonwrapp/rapidjson11/memorystream.h new file mode 100644 index 00000000..1d71d8a4 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/memorystream.h @@ -0,0 +1,71 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_MEMORYSTREAM_H_ +#define RAPIDJSON_MEMORYSTREAM_H_ + +#include "stream.h" + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(unreachable-code) +RAPIDJSON_DIAG_OFF(missing-noreturn) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Represents an in-memory input byte stream. +/*! + This class is mainly for being wrapped by EncodedInputStream or AutoUTFInputStream. + + It is similar to FileReadBuffer but the source is an in-memory buffer instead of a file. + + Differences between MemoryStream and StringStream: + 1. StringStream has encoding but MemoryStream is a byte stream. + 2. MemoryStream needs size of the source buffer and the buffer don't need to be null terminated. StringStream assume null-terminated string as source. + 3. MemoryStream supports Peek4() for encoding detection. StringStream is specified with an encoding so it should not have Peek4(). + \note implements Stream concept +*/ +struct MemoryStream { + typedef char Ch; // byte + + MemoryStream(const Ch *src, size_t size) : src_(src), begin_(src), end_(src + size), size_(size) {} + + Ch Peek() const { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_; } + Ch Take() { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_++; } + size_t Tell() const { return static_cast(src_ - begin_); } + + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + // For encoding detection only. + const Ch* Peek4() const { + return Tell() + 4 <= size_ ? src_ : 0; + } + + const Ch* src_; //!< Current read position. + const Ch* begin_; //!< Original head of the string. + const Ch* end_; //!< End of stream. + size_t size_; //!< Size of the stream. +}; + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_MEMORYBUFFER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/msinttypes/inttypes.h b/c++/include/misc/jsonwrapp/rapidjson11/msinttypes/inttypes.h new file mode 100644 index 00000000..18111286 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/msinttypes/inttypes.h @@ -0,0 +1,316 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +// The above software in this distribution may have been modified by +// THL A29 Limited ("Tencent Modifications"). +// All Tencent Modifications are Copyright (C) 2015 THL A29 Limited. + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "stdint.h" + +// miloyip: VC supports inttypes.h since VC2013 +#if _MSC_VER >= 1800 +#include +#else + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + +#endif // _MSC_VER >= 1800 + +#endif // _MSC_INTTYPES_H_ ] diff --git a/c++/include/misc/jsonwrapp/rapidjson11/msinttypes/stdint.h b/c++/include/misc/jsonwrapp/rapidjson11/msinttypes/stdint.h new file mode 100644 index 00000000..3d4477b9 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/msinttypes/stdint.h @@ -0,0 +1,300 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +// The above software in this distribution may have been modified by +// THL A29 Limited ("Tencent Modifications"). +// All Tencent Modifications are Copyright (C) 2015 THL A29 Limited. + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +// miloyip: Originally Visual Studio 2010 uses its own stdint.h. However it generates warning with INT64_C(), so change to use this file for vs2010. +#if _MSC_VER >= 1600 // [ +#include + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +#undef INT8_C +#undef INT16_C +#undef INT32_C +#undef INT64_C +#undef UINT8_C +#undef UINT16_C +#undef UINT32_C +#undef UINT64_C + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +// These #ifndef's are needed to prevent collisions with . +// Check out Issue 9 for the details. +#ifndef INTMAX_C // [ +# define INTMAX_C INT64_C +#endif // INTMAX_C ] +#ifndef UINTMAX_C // [ +# define UINTMAX_C UINT64_C +#endif // UINTMAX_C ] + +#endif // __STDC_CONSTANT_MACROS ] + +#else // ] _MSC_VER >= 1700 [ + +#include + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we have to wrap include with 'extern "C++" {}' +// or compiler would give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#if defined(__cplusplus) && !defined(_M_ARM) +extern "C" { +#endif +# include +#if defined(__cplusplus) && !defined(_M_ARM) +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +// These #ifndef's are needed to prevent collisions with . +// Check out Issue 9 for the details. +#ifndef INTMAX_C // [ +# define INTMAX_C INT64_C +#endif // INTMAX_C ] +#ifndef UINTMAX_C // [ +# define UINTMAX_C UINT64_C +#endif // UINTMAX_C ] + +#endif // __STDC_CONSTANT_MACROS ] + +#endif // _MSC_VER >= 1600 ] + +#endif // _MSC_STDINT_H_ ] diff --git a/c++/include/misc/jsonwrapp/rapidjson11/ostreamwrapper.h b/c++/include/misc/jsonwrapp/rapidjson11/ostreamwrapper.h new file mode 100644 index 00000000..6f4667c0 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/ostreamwrapper.h @@ -0,0 +1,81 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_OSTREAMWRAPPER_H_ +#define RAPIDJSON_OSTREAMWRAPPER_H_ + +#include "stream.h" +#include + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Wrapper of \c std::basic_ostream into RapidJSON's Stream concept. +/*! + The classes can be wrapped including but not limited to: + + - \c std::ostringstream + - \c std::stringstream + - \c std::wpstringstream + - \c std::wstringstream + - \c std::ifstream + - \c std::fstream + - \c std::wofstream + - \c std::wfstream + + \tparam StreamType Class derived from \c std::basic_ostream. +*/ + +template +class BasicOStreamWrapper { +public: + typedef typename StreamType::char_type Ch; + BasicOStreamWrapper(StreamType& stream) : stream_(stream) {} + + void Put(Ch c) { + stream_.put(c); + } + + void Flush() { + stream_.flush(); + } + + // Not implemented + char Peek() const { RAPIDJSON_ASSERT(false); return 0; } + char Take() { RAPIDJSON_ASSERT(false); return 0; } + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + BasicOStreamWrapper(const BasicOStreamWrapper&); + BasicOStreamWrapper& operator=(const BasicOStreamWrapper&); + + StreamType& stream_; +}; + +typedef BasicOStreamWrapper OStreamWrapper; +typedef BasicOStreamWrapper WOStreamWrapper; + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_OSTREAMWRAPPER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/pointer.h b/c++/include/misc/jsonwrapp/rapidjson11/pointer.h new file mode 100644 index 00000000..97ebaf48 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/pointer.h @@ -0,0 +1,1363 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_POINTER_H_ +#define RAPIDJSON_POINTER_H_ + +#include "document.h" +#include "internal/itoa.h" + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(switch-enum) +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +static const SizeType kPointerInvalidIndex = ~SizeType(0); //!< Represents an invalid index in GenericPointer::Token + +//! Error code of parsing. +/*! \ingroup RAPIDJSON_ERRORS + \see GenericPointer::GenericPointer, GenericPointer::GetParseErrorCode +*/ +enum PointerParseErrorCode { + kPointerParseErrorNone = 0, //!< The parse is successful + + kPointerParseErrorTokenMustBeginWithSolidus, //!< A token must begin with a '/' + kPointerParseErrorInvalidEscape, //!< Invalid escape + kPointerParseErrorInvalidPercentEncoding, //!< Invalid percent encoding in URI fragment + kPointerParseErrorCharacterMustPercentEncode //!< A character must percent encoded in URI fragment +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericPointer + +//! Represents a JSON Pointer. Use Pointer for UTF8 encoding and default allocator. +/*! + This class implements RFC 6901 "JavaScript Object Notation (JSON) Pointer" + (https://tools.ietf.org/html/rfc6901). + + A JSON pointer is for identifying a specific value in a JSON document + (GenericDocument). It can simplify coding of DOM tree manipulation, because it + can access multiple-level depth of DOM tree with single API call. + + After it parses a string representation (e.g. "/foo/0" or URI fragment + representation (e.g. "#/foo/0") into its internal representation (tokens), + it can be used to resolve a specific value in multiple documents, or sub-tree + of documents. + + Contrary to GenericValue, Pointer can be copy constructed and copy assigned. + Apart from assignment, a Pointer cannot be modified after construction. + + Although Pointer is very convenient, please aware that constructing Pointer + involves parsing and dynamic memory allocation. A special constructor with user- + supplied tokens eliminates these. + + GenericPointer depends on GenericDocument and GenericValue. + + \tparam ValueType The value type of the DOM tree. E.g. GenericValue > + \tparam Allocator The allocator type for allocating memory for internal representation. + + \note GenericPointer uses same encoding of ValueType. + However, Allocator of GenericPointer is independent of Allocator of Value. +*/ +template +class GenericPointer { +public: + typedef typename ValueType::EncodingType EncodingType; //!< Encoding type from Value + typedef typename ValueType::Ch Ch; //!< Character type from Value + + //! A token is the basic units of internal representation. + /*! + A JSON pointer string representation "/foo/123" is parsed to two tokens: + "foo" and 123. 123 will be represented in both numeric form and string form. + They are resolved according to the actual value type (object or array). + + For token that are not numbers, or the numeric value is out of bound + (greater than limits of SizeType), they are only treated as string form + (i.e. the token's index will be equal to kPointerInvalidIndex). + + This struct is public so that user can create a Pointer without parsing and + allocation, using a special constructor. + */ + struct Token { + const Ch* name; //!< Name of the token. It has null character at the end but it can contain null character. + SizeType length; //!< Length of the name. + SizeType index; //!< A valid array index, if it is not equal to kPointerInvalidIndex. + }; + + //!@name Constructors and destructor. + //@{ + + //! Default constructor. + GenericPointer(Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {} + + //! Constructor that parses a string or URI fragment representation. + /*! + \param source A null-terminated, string or URI fragment representation of JSON pointer. + \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one. + */ + explicit GenericPointer(const Ch* source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + Parse(source, internal::StrLen(source)); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Constructor that parses a string or URI fragment representation. + /*! + \param source A string or URI fragment representation of JSON pointer. + \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one. + \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + explicit GenericPointer(const std::basic_string& source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + Parse(source.c_str(), source.size()); + } +#endif + + //! Constructor that parses a string or URI fragment representation, with length of the source string. + /*! + \param source A string or URI fragment representation of JSON pointer. + \param length Length of source. + \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one. + \note Slightly faster than the overload without length. + */ + GenericPointer(const Ch* source, size_t length, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + Parse(source, length); + } + + //! Constructor with user-supplied tokens. + /*! + This constructor let user supplies const array of tokens. + This prevents the parsing process and eliminates allocation. + This is preferred for memory constrained environments. + + \param tokens An constant array of tokens representing the JSON pointer. + \param tokenCount Number of tokens. + + \b Example + \code + #define NAME(s) { s, sizeof(s) / sizeof(s[0]) - 1, kPointerInvalidIndex } + #define INDEX(i) { #i, sizeof(#i) - 1, i } + + static const Pointer::Token kTokens[] = { NAME("foo"), INDEX(123) }; + static const Pointer p(kTokens, sizeof(kTokens) / sizeof(kTokens[0])); + // Equivalent to static const Pointer p("/foo/123"); + + #undef NAME + #undef INDEX + \endcode + */ + GenericPointer(const Token* tokens, size_t tokenCount) : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(const_cast(tokens)), tokenCount_(tokenCount), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {} + + //! Copy constructor. + GenericPointer(const GenericPointer& rhs, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + *this = rhs; + } + + //! Destructor. + ~GenericPointer() { + if (nameBuffer_) // If user-supplied tokens constructor is used, nameBuffer_ is nullptr and tokens_ are not deallocated. + Allocator::Free(tokens_); + RAPIDJSON_DELETE(ownAllocator_); + } + + //! Assignment operator. + GenericPointer& operator=(const GenericPointer& rhs) { + if (this != &rhs) { + // Do not delete ownAllcator + if (nameBuffer_) + Allocator::Free(tokens_); + + tokenCount_ = rhs.tokenCount_; + parseErrorOffset_ = rhs.parseErrorOffset_; + parseErrorCode_ = rhs.parseErrorCode_; + + if (rhs.nameBuffer_) + CopyFromRaw(rhs); // Normally parsed tokens. + else { + tokens_ = rhs.tokens_; // User supplied const tokens. + nameBuffer_ = 0; + } + } + return *this; + } + + //@} + + //!@name Append token + //@{ + + //! Append a token and return a new Pointer + /*! + \param token Token to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const Token& token, Allocator* allocator = 0) const { + GenericPointer r; + r.allocator_ = allocator; + Ch *p = r.CopyFromRaw(*this, 1, token.length + 1); + std::memcpy(p, token.name, (token.length + 1) * sizeof(Ch)); + r.tokens_[tokenCount_].name = p; + r.tokens_[tokenCount_].length = token.length; + r.tokens_[tokenCount_].index = token.index; + return r; + } + + //! Append a name token with length, and return a new Pointer + /*! + \param name Name to be appended. + \param length Length of name. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const Ch* name, SizeType length, Allocator* allocator = 0) const { + Token token = { name, length, kPointerInvalidIndex }; + return Append(token, allocator); + } + + //! Append a name token without length, and return a new Pointer + /*! + \param name Name (const Ch*) to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr::Type, Ch> >), (GenericPointer)) + Append(T* name, Allocator* allocator = 0) const { + return Append(name, StrLen(name), allocator); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Append a name token, and return a new Pointer + /*! + \param name Name to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const std::basic_string& name, Allocator* allocator = 0) const { + return Append(name.c_str(), static_cast(name.size()), allocator); + } +#endif + + //! Append a index token, and return a new Pointer + /*! + \param index Index to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(SizeType index, Allocator* allocator = 0) const { + char buffer[21]; + char* end = sizeof(SizeType) == 4 ? internal::u32toa(index, buffer) : internal::u64toa(index, buffer); + SizeType length = static_cast(end - buffer); + buffer[length] = '\0'; + + if (sizeof(Ch) == 1) { + Token token = { reinterpret_cast(buffer), length, index }; + return Append(token, allocator); + } + else { + Ch name[21]; + for (size_t i = 0; i <= length; i++) + name[i] = static_cast(buffer[i]); + Token token = { name, length, index }; + return Append(token, allocator); + } + } + + //! Append a token by value, and return a new Pointer + /*! + \param token token to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const ValueType& token, Allocator* allocator = 0) const { + if (token.IsString()) + return Append(token.GetString(), token.GetStringLength(), allocator); + else { + RAPIDJSON_ASSERT(token.IsUint64()); + RAPIDJSON_ASSERT(token.GetUint64() <= SizeType(~0)); + return Append(static_cast(token.GetUint64()), allocator); + } + } + + //!@name Handling Parse Error + //@{ + + //! Check whether this is a valid pointer. + bool IsValid() const { return parseErrorCode_ == kPointerParseErrorNone; } + + //! Get the parsing error offset in code unit. + size_t GetParseErrorOffset() const { return parseErrorOffset_; } + + //! Get the parsing error code. + PointerParseErrorCode GetParseErrorCode() const { return parseErrorCode_; } + + //@} + + //! Get the allocator of this pointer. + Allocator& GetAllocator() { return *allocator_; } + + //!@name Tokens + //@{ + + //! Get the token array (const version only). + const Token* GetTokens() const { return tokens_; } + + //! Get the number of tokens. + size_t GetTokenCount() const { return tokenCount_; } + + //@} + + //!@name Equality/inequality operators + //@{ + + //! Equality operator. + /*! + \note When any pointers are invalid, always returns false. + */ + bool operator==(const GenericPointer& rhs) const { + if (!IsValid() || !rhs.IsValid() || tokenCount_ != rhs.tokenCount_) + return false; + + for (size_t i = 0; i < tokenCount_; i++) { + if (tokens_[i].index != rhs.tokens_[i].index || + tokens_[i].length != rhs.tokens_[i].length || + (tokens_[i].length != 0 && std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch)* tokens_[i].length) != 0)) + { + return false; + } + } + + return true; + } + + //! Inequality operator. + /*! + \note When any pointers are invalid, always returns true. + */ + bool operator!=(const GenericPointer& rhs) const { return !(*this == rhs); } + + //@} + + //!@name Stringify + //@{ + + //! Stringify the pointer into string representation. + /*! + \tparam OutputStream Type of output stream. + \param os The output stream. + */ + template + bool Stringify(OutputStream& os) const { + return Stringify(os); + } + + //! Stringify the pointer into URI fragment representation. + /*! + \tparam OutputStream Type of output stream. + \param os The output stream. + */ + template + bool StringifyUriFragment(OutputStream& os) const { + return Stringify(os); + } + + //@} + + //!@name Create value + //@{ + + //! Create a value in a subtree. + /*! + If the value is not exist, it creates all parent values and a JSON Null value. + So it always succeed and return the newly created or existing value. + + Remind that it may change types of parents according to tokens, so it + potentially removes previously stored values. For example, if a document + was an array, and "/foo" is used to create a value, then the document + will be changed to an object, and all existing array elements are lost. + + \param root Root value of a DOM subtree to be resolved. It can be any value other than document root. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \param alreadyExist If non-null, it stores whether the resolved value is already exist. + \return The resolved newly created (a JSON Null value), or already exists value. + */ + ValueType& Create(ValueType& root, typename ValueType::AllocatorType& allocator, bool* alreadyExist = 0) const { + RAPIDJSON_ASSERT(IsValid()); + ValueType* v = &root; + bool exist = true; + for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + if (v->IsArray() && t->name[0] == '-' && t->length == 1) { +//NCBI: added SetValueAllocator + v->PushBack(ValueType().SetValueAllocator(&allocator).Move(), allocator); + v = &((*v)[v->Size() - 1]); + exist = false; + } + else { + if (t->index == kPointerInvalidIndex) { // must be object name + if (!v->IsObject()) + v->SetObject(); // Change to Object + } + else { // object name or array index + if (!v->IsArray() && !v->IsObject()) + v->SetArray(); // Change to Array + } + + if (v->IsArray()) { + if (t->index >= v->Size()) { + v->Reserve(t->index + 1, allocator); +//NCBI: added SetValueAllocator + while (t->index >= v->Size()) + v->PushBack(ValueType().SetValueAllocator(&allocator).Move(), allocator); + exist = false; + } + v = &((*v)[t->index]); + } + else { + typename ValueType::MemberIterator m = v->FindMember(GenericStringRef(t->name, t->length)); + if (m == v->MemberEnd()) { +//NCBI: added SetValueAllocator + v->AddMember(ValueType(t->name, t->length, allocator).SetValueAllocator(&allocator).Move(), ValueType().SetValueAllocator(&allocator).Move(), allocator); + m = v->MemberEnd(); + v = &(--m)->value; // Assumes AddMember() appends at the end +// v = &(--v->MemberEnd())->value; // Assumes AddMember() appends at the end + exist = false; + } + else + v = &m->value; + } + } + } + + if (alreadyExist) + *alreadyExist = exist; + + return *v; + } + + //! Creates a value in a document. + /*! + \param document A document to be resolved. + \param alreadyExist If non-null, it stores whether the resolved value is already exist. + \return The resolved newly created, or already exists value. + */ + template + ValueType& Create(GenericDocument& document, bool* alreadyExist = 0) const { + return Create(document, document.GetAllocator(), alreadyExist); + } + + //@} + + //!@name Query value + //@{ + + //! Query a value in a subtree. + /*! + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param unresolvedTokenIndex If the pointer cannot resolve a token in the pointer, this parameter can obtain the index of unresolved token. + \return Pointer to the value if it can be resolved. Otherwise null. + + \note + There are only 3 situations when a value cannot be resolved: + 1. A value in the path is not an array nor object. + 2. An object value does not contain the token. + 3. A token is out of range of an array value. + + Use unresolvedTokenIndex to retrieve the token index. + */ + ValueType* Get(ValueType& root, size_t* unresolvedTokenIndex = 0) const { + RAPIDJSON_ASSERT(IsValid()); + ValueType* v = &root; + for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + switch (v->GetType()) { + case kObjectType: + { + typename ValueType::MemberIterator m = v->FindMember(GenericStringRef(t->name, t->length)); + if (m == v->MemberEnd()) + break; + v = &m->value; + } + continue; + case kArrayType: + if (t->index == kPointerInvalidIndex || t->index >= v->Size()) + break; + v = &((*v)[t->index]); + continue; + default: + break; + } + + // Error: unresolved token + if (unresolvedTokenIndex) + *unresolvedTokenIndex = static_cast(t - tokens_); + return 0; + } + return v; + } + + //! Query a const value in a const subtree. + /*! + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \return Pointer to the value if it can be resolved. Otherwise null. + */ + const ValueType* Get(const ValueType& root, size_t* unresolvedTokenIndex = 0) const { + return Get(const_cast(root), unresolvedTokenIndex); + } + + //@} + + //!@name Query a value with default + //@{ + + //! Query a value in a subtree with default value. + /*! + Similar to Get(), but if the specified value do not exists, it creates all parents and clone the default value. + So that this function always succeed. + + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param defaultValue Default value to be cloned if the value was not exists. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \see Create() + */ + ValueType& GetWithDefault(ValueType& root, const ValueType& defaultValue, typename ValueType::AllocatorType& allocator) const { + bool alreadyExist; + Value& v = Create(root, allocator, &alreadyExist); + return alreadyExist ? v : v.CopyFrom(defaultValue, allocator); + } + + //! Query a value in a subtree with default null-terminated string. + ValueType& GetWithDefault(ValueType& root, const Ch* defaultValue, typename ValueType::AllocatorType& allocator) const { + bool alreadyExist; + Value& v = Create(root, allocator, &alreadyExist); + return alreadyExist ? v : v.SetString(defaultValue, allocator); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Query a value in a subtree with default std::basic_string. + ValueType& GetWithDefault(ValueType& root, const std::basic_string& defaultValue, typename ValueType::AllocatorType& allocator) const { + bool alreadyExist; + Value& v = Create(root, allocator, &alreadyExist); + return alreadyExist ? v : v.SetString(defaultValue, allocator); + } +#endif + + //! Query a value in a subtree with default primitive value. + /*! + \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (ValueType&)) + GetWithDefault(ValueType& root, T defaultValue, typename ValueType::AllocatorType& allocator) const { + return GetWithDefault(root, ValueType(defaultValue).Move(), allocator); + } + + //! Query a value in a document with default value. + template + ValueType& GetWithDefault(GenericDocument& document, const ValueType& defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } + + //! Query a value in a document with default null-terminated string. + template + ValueType& GetWithDefault(GenericDocument& document, const Ch* defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Query a value in a document with default std::basic_string. + template + ValueType& GetWithDefault(GenericDocument& document, const std::basic_string& defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } +#endif + + //! Query a value in a document with default primitive value. + /*! + \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (ValueType&)) + GetWithDefault(GenericDocument& document, T defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } + + //@} + + //!@name Set a value + //@{ + + //! Set a value in a subtree, with move semantics. + /*! + It creates all parents if they are not exist or types are different to the tokens. + So this function always succeeds but potentially remove existing values. + + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param value Value to be set. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \see Create() + */ + ValueType& Set(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = value; + } + + //! Set a value in a subtree, with copy semantics. + ValueType& Set(ValueType& root, const ValueType& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator).CopyFrom(value, allocator); + } + + //! Set a null-terminated string in a subtree. + ValueType& Set(ValueType& root, const Ch* value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = ValueType(value, allocator).Move(); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Set a std::basic_string in a subtree. + ValueType& Set(ValueType& root, const std::basic_string& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = ValueType(value, allocator).Move(); + } +#endif + + //! Set a primitive value in a subtree. + /*! + \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (ValueType&)) + Set(ValueType& root, T value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = ValueType(value).Move(); + } + + //! Set a value in a document, with move semantics. + template + ValueType& Set(GenericDocument& document, ValueType& value) const { + return Create(document) = value; + } + + //! Set a value in a document, with copy semantics. + template + ValueType& Set(GenericDocument& document, const ValueType& value) const { + return Create(document).CopyFrom(value, document.GetAllocator()); + } + + //! Set a null-terminated string in a document. + template + ValueType& Set(GenericDocument& document, const Ch* value) const { + return Create(document) = ValueType(value, document.GetAllocator()).Move(); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Sets a std::basic_string in a document. + template + ValueType& Set(GenericDocument& document, const std::basic_string& value) const { + return Create(document) = ValueType(value, document.GetAllocator()).Move(); + } +#endif + + //! Set a primitive value in a document. + /*! + \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (ValueType&)) + Set(GenericDocument& document, T value) const { + return Create(document) = value; + } + + //@} + + //!@name Swap a value + //@{ + + //! Swap a value with a value in a subtree. + /*! + It creates all parents if they are not exist or types are different to the tokens. + So this function always succeeds but potentially remove existing values. + + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param value Value to be swapped. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \see Create() + */ + ValueType& Swap(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator).Swap(value); + } + + //! Swap a value with a value in a document. + template + ValueType& Swap(GenericDocument& document, ValueType& value) const { + return Create(document).Swap(value); + } + + //@} + + //! Erase a value in a subtree. + /*! + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \return Whether the resolved value is found and erased. + + \note Erasing with an empty pointer \c Pointer(""), i.e. the root, always fail and return false. + */ + bool Erase(ValueType& root) const { + RAPIDJSON_ASSERT(IsValid()); + if (tokenCount_ == 0) // Cannot erase the root + return false; + + ValueType* v = &root; + const Token* last = tokens_ + (tokenCount_ - 1); + for (const Token *t = tokens_; t != last; ++t) { + switch (v->GetType()) { + case kObjectType: + { + typename ValueType::MemberIterator m = v->FindMember(GenericStringRef(t->name, t->length)); + if (m == v->MemberEnd()) + return false; + v = &m->value; + } + break; + case kArrayType: + if (t->index == kPointerInvalidIndex || t->index >= v->Size()) + return false; + v = &((*v)[t->index]); + break; + default: + return false; + } + } + + switch (v->GetType()) { + case kObjectType: + return v->EraseMember(GenericStringRef(last->name, last->length)); + case kArrayType: + if (last->index == kPointerInvalidIndex || last->index >= v->Size()) + return false; + v->Erase(v->Begin() + last->index); + return true; + default: + return false; + } + } + +private: + //! Clone the content from rhs to this. + /*! + \param rhs Source pointer. + \param extraToken Extra tokens to be allocated. + \param extraNameBufferSize Extra name buffer size (in number of Ch) to be allocated. + \return Start of non-occupied name buffer, for storing extra names. + */ + Ch* CopyFromRaw(const GenericPointer& rhs, size_t extraToken = 0, size_t extraNameBufferSize = 0) { + if (!allocator_) // allocator is independently owned. + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); + + size_t nameBufferSize = rhs.tokenCount_; // null terminators for tokens + for (Token *t = rhs.tokens_; t != rhs.tokens_ + rhs.tokenCount_; ++t) + nameBufferSize += t->length; + + tokenCount_ = rhs.tokenCount_ + extraToken; + tokens_ = static_cast(allocator_->Malloc(tokenCount_ * sizeof(Token) + (nameBufferSize + extraNameBufferSize) * sizeof(Ch))); + nameBuffer_ = reinterpret_cast(tokens_ + tokenCount_); + if (rhs.tokenCount_ > 0) { + std::memcpy(tokens_, rhs.tokens_, rhs.tokenCount_ * sizeof(Token)); + } + if (nameBufferSize > 0) { + std::memcpy(nameBuffer_, rhs.nameBuffer_, nameBufferSize * sizeof(Ch)); + } + + // Adjust pointers to name buffer + std::ptrdiff_t diff = nameBuffer_ - rhs.nameBuffer_; + for (Token *t = tokens_; t != tokens_ + rhs.tokenCount_; ++t) + t->name += diff; + + return nameBuffer_ + nameBufferSize; + } + + //! Check whether a character should be percent-encoded. + /*! + According to RFC 3986 2.3 Unreserved Characters. + \param c The character (code unit) to be tested. + */ + bool NeedPercentEncode(Ch c) const { + return !((c >= '0' && c <= '9') || (c >= 'A' && c <='Z') || (c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '_' || c =='~'); + } + + //! Parse a JSON String or its URI fragment representation into tokens. +#ifndef __clang__ // -Wdocumentation + /*! + \param source Either a JSON Pointer string, or its URI fragment representation. Not need to be null terminated. + \param length Length of the source string. + \note Source cannot be JSON String Representation of JSON Pointer, e.g. In "/\u0000", \u0000 will not be unescaped. + */ +#endif + void Parse(const Ch* source, size_t length) { + RAPIDJSON_ASSERT(source != NULL); + RAPIDJSON_ASSERT(nameBuffer_ == 0); + RAPIDJSON_ASSERT(tokens_ == 0); + + // Create own allocator if user did not supply. + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); + + // Count number of '/' as tokenCount + tokenCount_ = 0; + for (const Ch* s = source; s != source + length; s++) + if (*s == '/') + tokenCount_++; + + Token* token = tokens_ = static_cast(allocator_->Malloc(tokenCount_ * sizeof(Token) + length * sizeof(Ch))); + Ch* name = nameBuffer_ = reinterpret_cast(tokens_ + tokenCount_); + size_t i = 0; + + // Detect if it is a URI fragment + bool uriFragment = false; + if (source[i] == '#') { + uriFragment = true; + i++; + } + + if (i != length && source[i] != '/') { + parseErrorCode_ = kPointerParseErrorTokenMustBeginWithSolidus; + goto error; + } + + while (i < length) { + RAPIDJSON_ASSERT(source[i] == '/'); + i++; // consumes '/' + + token->name = name; + bool isNumber = true; + + while (i < length && source[i] != '/') { + Ch c = source[i]; + if (uriFragment) { + // Decoding percent-encoding for URI fragment + if (c == '%') { + PercentDecodeStream is(&source[i], source + length); + GenericInsituStringStream os(name); + Ch* begin = os.PutBegin(); + if (!Transcoder, EncodingType>().Validate(is, os) || !is.IsValid()) { + parseErrorCode_ = kPointerParseErrorInvalidPercentEncoding; + goto error; + } + size_t len = os.PutEnd(begin); + i += is.Tell() - 1; + if (len == 1) + c = *name; + else { + name += len; + isNumber = false; + i++; + continue; + } + } + else if (NeedPercentEncode(c)) { + parseErrorCode_ = kPointerParseErrorCharacterMustPercentEncode; + goto error; + } + } + + i++; + + // Escaping "~0" -> '~', "~1" -> '/' + if (c == '~') { + if (i < length) { + c = source[i]; + if (c == '0') c = '~'; + else if (c == '1') c = '/'; + else { + parseErrorCode_ = kPointerParseErrorInvalidEscape; + goto error; + } + i++; + } + else { + parseErrorCode_ = kPointerParseErrorInvalidEscape; + goto error; + } + } + + // First check for index: all of characters are digit + if (c < '0' || c > '9') + isNumber = false; + + *name++ = c; + } + token->length = static_cast(name - token->name); + if (token->length == 0) + isNumber = false; + *name++ = '\0'; // Null terminator + + // Second check for index: more than one digit cannot have leading zero + if (isNumber && token->length > 1 && token->name[0] == '0') + isNumber = false; + + // String to SizeType conversion + SizeType n = 0; + if (isNumber) { + for (size_t j = 0; j < token->length; j++) { + SizeType m = n * 10 + static_cast(token->name[j] - '0'); + if (m < n) { // overflow detection + isNumber = false; + break; + } + n = m; + } + } + + token->index = isNumber ? n : kPointerInvalidIndex; + token++; + } + + RAPIDJSON_ASSERT(name <= nameBuffer_ + length); // Should not overflow buffer + parseErrorCode_ = kPointerParseErrorNone; + return; + + error: + Allocator::Free(tokens_); + nameBuffer_ = 0; + tokens_ = 0; + tokenCount_ = 0; + parseErrorOffset_ = i; + return; + } + + //! Stringify to string or URI fragment representation. + /*! + \tparam uriFragment True for stringifying to URI fragment representation. False for string representation. + \tparam OutputStream type of output stream. + \param os The output stream. + */ + template + bool Stringify(OutputStream& os) const { + RAPIDJSON_ASSERT(IsValid()); + + if (uriFragment) + os.Put('#'); + + for (Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + os.Put('/'); + for (size_t j = 0; j < t->length; j++) { + Ch c = t->name[j]; + if (c == '~') { + os.Put('~'); + os.Put('0'); + } + else if (c == '/') { + os.Put('~'); + os.Put('1'); + } + else if (uriFragment && NeedPercentEncode(c)) { + // Transcode to UTF8 sequence + GenericStringStream source(&t->name[j]); + PercentEncodeStream target(os); + if (!Transcoder >().Validate(source, target)) + return false; + j += source.Tell() - 1; + } + else + os.Put(c); + } + } + return true; + } + + //! A helper stream for decoding a percent-encoded sequence into code unit. + /*! + This stream decodes %XY triplet into code unit (0-255). + If it encounters invalid characters, it sets output code unit as 0 and + mark invalid, and to be checked by IsValid(). + */ + class PercentDecodeStream { + public: + typedef typename ValueType::Ch Ch; + + //! Constructor + /*! + \param source Start of the stream + \param end Past-the-end of the stream. + */ + PercentDecodeStream(const Ch* source, const Ch* end) : src_(source), head_(source), end_(end), valid_(true) {} + + Ch Take() { + if (*src_ != '%' || src_ + 3 > end_) { // %XY triplet + valid_ = false; + return 0; + } + src_++; + Ch c = 0; + for (int j = 0; j < 2; j++) { + c = static_cast(c << 4); + Ch h = *src_; + if (h >= '0' && h <= '9') c = static_cast(c + h - '0'); + else if (h >= 'A' && h <= 'F') c = static_cast(c + h - 'A' + 10); + else if (h >= 'a' && h <= 'f') c = static_cast(c + h - 'a' + 10); + else { + valid_ = false; + return 0; + } + src_++; + } + return c; + } + + size_t Tell() const { return static_cast(src_ - head_); } + bool IsValid() const { return valid_; } + + private: + const Ch* src_; //!< Current read position. + const Ch* head_; //!< Original head of the string. + const Ch* end_; //!< Past-the-end position. + bool valid_; //!< Whether the parsing is valid. + }; + + //! A helper stream to encode character (UTF-8 code unit) into percent-encoded sequence. + template + class PercentEncodeStream { + public: + PercentEncodeStream(OutputStream& os) : os_(os) {} + void Put(char c) { // UTF-8 must be byte + unsigned char u = static_cast(c); + static const char hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + os_.Put('%'); + os_.Put(static_cast(hexDigits[u >> 4])); + os_.Put(static_cast(hexDigits[u & 15])); + } + private: + OutputStream& os_; + }; + + Allocator* allocator_; //!< The current allocator. It is either user-supplied or equal to ownAllocator_. + Allocator* ownAllocator_; //!< Allocator owned by this Pointer. + Ch* nameBuffer_; //!< A buffer containing all names in tokens. + Token* tokens_; //!< A list of tokens. + size_t tokenCount_; //!< Number of tokens in tokens_. + size_t parseErrorOffset_; //!< Offset in code unit when parsing fail. + PointerParseErrorCode parseErrorCode_; //!< Parsing error code. +}; + +//! GenericPointer for Value (UTF-8, default allocator). +typedef GenericPointer Pointer; + +//!@name Helper functions for GenericPointer +//@{ + +////////////////////////////////////////////////////////////////////////////// + +template +typename T::ValueType& CreateValueByPointer(T& root, const GenericPointer& pointer, typename T::AllocatorType& a) { + return pointer.Create(root, a); +} + +template +typename T::ValueType& CreateValueByPointer(T& root, const CharType(&source)[N], typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Create(root, a); +} + +// No allocator parameter + +template +typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const GenericPointer& pointer) { + return pointer.Create(document); +} + +template +typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const CharType(&source)[N]) { + return GenericPointer(source, N - 1).Create(document); +} + +////////////////////////////////////////////////////////////////////////////// + +template +typename T::ValueType* GetValueByPointer(T& root, const GenericPointer& pointer, size_t* unresolvedTokenIndex = 0) { + return pointer.Get(root, unresolvedTokenIndex); +} + +template +const typename T::ValueType* GetValueByPointer(const T& root, const GenericPointer& pointer, size_t* unresolvedTokenIndex = 0) { + return pointer.Get(root, unresolvedTokenIndex); +} + +template +typename T::ValueType* GetValueByPointer(T& root, const CharType (&source)[N], size_t* unresolvedTokenIndex = 0) { + return GenericPointer(source, N - 1).Get(root, unresolvedTokenIndex); +} + +template +const typename T::ValueType* GetValueByPointer(const T& root, const CharType(&source)[N], size_t* unresolvedTokenIndex = 0) { + return GenericPointer(source, N - 1).Get(root, unresolvedTokenIndex); +} + +////////////////////////////////////////////////////////////////////////////// + +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer& pointer, const typename T::ValueType& defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} + +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer& pointer, const typename T::Ch* defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer& pointer, const std::basic_string& defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename T::ValueType&)) +GetValueByPointerWithDefault(T& root, const GenericPointer& pointer, T2 defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} + +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::ValueType& defaultValue, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).GetWithDefault(root, defaultValue, a); +} + +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::Ch* defaultValue, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).GetWithDefault(root, defaultValue, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const std::basic_string& defaultValue, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).GetWithDefault(root, defaultValue, a); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename T::ValueType&)) +GetValueByPointerWithDefault(T& root, const CharType(&source)[N], T2 defaultValue, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).GetWithDefault(root, defaultValue, a); +} + +// No allocator parameter + +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer& pointer, const typename DocumentType::ValueType& defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} + +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer& pointer, const typename DocumentType::Ch* defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer& pointer, const std::basic_string& defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename DocumentType::ValueType&)) +GetValueByPointerWithDefault(DocumentType& document, const GenericPointer& pointer, T2 defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} + +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& defaultValue) { + return GenericPointer(source, N - 1).GetWithDefault(document, defaultValue); +} + +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* defaultValue) { + return GenericPointer(source, N - 1).GetWithDefault(document, defaultValue); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const std::basic_string& defaultValue) { + return GenericPointer(source, N - 1).GetWithDefault(document, defaultValue); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename DocumentType::ValueType&)) +GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], T2 defaultValue) { + return GenericPointer(source, N - 1).GetWithDefault(document, defaultValue); +} + +////////////////////////////////////////////////////////////////////////////// + +template +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer& pointer, typename T::ValueType& value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +template +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer& pointer, const typename T::ValueType& value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +template +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer& pointer, const typename T::Ch* value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer& pointer, const std::basic_string& value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename T::ValueType&)) +SetValueByPointer(T& root, const GenericPointer& pointer, T2 value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +template +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Set(root, value, a); +} + +template +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::ValueType& value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Set(root, value, a); +} + +template +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::Ch* value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Set(root, value, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const std::basic_string& value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Set(root, value, a); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename T::ValueType&)) +SetValueByPointer(T& root, const CharType(&source)[N], T2 value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Set(root, value, a); +} + +// No allocator parameter + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer& pointer, typename DocumentType::ValueType& value) { + return pointer.Set(document, value); +} + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer& pointer, const typename DocumentType::ValueType& value) { + return pointer.Set(document, value); +} + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer& pointer, const typename DocumentType::Ch* value) { + return pointer.Set(document, value); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer& pointer, const std::basic_string& value) { + return pointer.Set(document, value); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename DocumentType::ValueType&)) +SetValueByPointer(DocumentType& document, const GenericPointer& pointer, T2 value) { + return pointer.Set(document, value); +} + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) { + return GenericPointer(source, N - 1).Set(document, value); +} + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& value) { + return GenericPointer(source, N - 1).Set(document, value); +} + +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* value) { + return GenericPointer(source, N - 1).Set(document, value); +} + +#if RAPIDJSON_HAS_STDSTRING +template +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const std::basic_string& value) { + return GenericPointer(source, N - 1).Set(document, value); +} +#endif + +template +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr, internal::IsGenericValue >), (typename DocumentType::ValueType&)) +SetValueByPointer(DocumentType& document, const CharType(&source)[N], T2 value) { + return GenericPointer(source, N - 1).Set(document, value); +} + +////////////////////////////////////////////////////////////////////////////// + +template +typename T::ValueType& SwapValueByPointer(T& root, const GenericPointer& pointer, typename T::ValueType& value, typename T::AllocatorType& a) { + return pointer.Swap(root, value, a); +} + +template +typename T::ValueType& SwapValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) { + return GenericPointer(source, N - 1).Swap(root, value, a); +} + +template +typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const GenericPointer& pointer, typename DocumentType::ValueType& value) { + return pointer.Swap(document, value); +} + +template +typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) { + return GenericPointer(source, N - 1).Swap(document, value); +} + +////////////////////////////////////////////////////////////////////////////// + +template +bool EraseValueByPointer(T& root, const GenericPointer& pointer) { + return pointer.Erase(root); +} + +template +bool EraseValueByPointer(T& root, const CharType(&source)[N]) { + return GenericPointer(source, N - 1).Erase(root); +} + +//@} + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_POINTER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/prettywriter.h b/c++/include/misc/jsonwrapp/rapidjson11/prettywriter.h new file mode 100644 index 00000000..8b1f6a83 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/prettywriter.h @@ -0,0 +1,290 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_PRETTYWRITER_H_ +#define RAPIDJSON_PRETTYWRITER_H_ + +#include "writer.h" + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#if defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Combination of PrettyWriter format flags. +/*! \see PrettyWriter::SetFormatOptions + */ +enum PrettyFormatOptions { + kFormatDefault = 0, //!< Default pretty formatting. + kFormatSingleLineArray = 1 //!< Format arrays on a single line. +}; + +//! Writer with indentation and spacing. +/*! + \tparam OutputStream Type of ouptut os. + \tparam SourceEncoding Encoding of source string. + \tparam TargetEncoding Encoding of output stream. + \tparam StackAllocator Type of allocator for allocating memory of stack. +*/ +template, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags> +class PrettyWriter : public Writer { +public: + typedef Writer Base; + typedef typename Base::Ch Ch; + + //! Constructor + /*! \param os Output stream. + \param allocator User supplied allocator. If it is null, it will create a private one. + \param levelDepth Initial capacity of stack. + */ + explicit PrettyWriter(OutputStream& os, StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : + Base(os, allocator, levelDepth), indentChar_(' '), indentCharCount_(4), formatOptions_(kFormatDefault), eol_(true) {} + + + explicit PrettyWriter(StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : + Base(allocator, levelDepth), indentChar_(' '), indentCharCount_(4), eol_(true) {} + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + PrettyWriter(PrettyWriter&& rhs) : + Base(std::forward(rhs)), indentChar_(rhs.indentChar_), indentCharCount_(rhs.indentCharCount_), formatOptions_(rhs.formatOptions_), eol_(true) {} +#endif + + //! Set custom indentation. + /*! \param indentChar Character for indentation. Must be whitespace character (' ', '\\t', '\\n', '\\r'). + \param indentCharCount Number of indent characters for each indentation level. + \note The default indentation is 4 spaces. + */ + PrettyWriter& SetIndent(Ch indentChar, unsigned indentCharCount) { + RAPIDJSON_ASSERT(indentChar == ' ' || indentChar == '\t' || indentChar == '\n' || indentChar == '\r'); + indentChar_ = indentChar; + indentCharCount_ = indentCharCount; + return *this; + } + +// NCBI - added + PrettyWriter& SetWriteEol(bool eol) { + eol_ = eol; + return *this; + } + + //! Set pretty writer formatting options. + /*! \param options Formatting options. + */ + PrettyWriter& SetFormatOptions(PrettyFormatOptions options) { + formatOptions_ = options; + return *this; + } + + /*! @name Implementation of Handler + \see Handler + */ + //@{ + + bool Null() { PrettyPrefix(kNullType); return Base::WriteNull(); } + bool Bool(bool b) { PrettyPrefix(b ? kTrueType : kFalseType); return Base::WriteBool(b); } + bool Int(int i) { PrettyPrefix(kNumberType); return Base::WriteInt(i); } + bool Uint(unsigned u) { PrettyPrefix(kNumberType); return Base::WriteUint(u); } + bool Int64(int64_t i64) { PrettyPrefix(kNumberType); return Base::WriteInt64(i64); } + bool Uint64(uint64_t u64) { PrettyPrefix(kNumberType); return Base::WriteUint64(u64); } + bool Double(double d) { PrettyPrefix(kNumberType); return Base::WriteDouble(d); } + + bool RawNumber(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + PrettyPrefix(kNumberType); + return Base::WriteString(str, length); + } + + bool String(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + PrettyPrefix(kStringType); + return Base::WriteString(str, length); + } + +#if RAPIDJSON_HAS_STDSTRING + bool String(const std::basic_string& str) { + return String(str.data(), SizeType(str.size())); + } +#endif + + bool StartObject() { + PrettyPrefix(kObjectType); + new (Base::level_stack_.template Push()) typename Base::Level(false); + return Base::WriteStartObject(); + } + + bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); } + +#if RAPIDJSON_HAS_STDSTRING + bool Key(const std::basic_string& str) { + return Key(str.data(), SizeType(str.size())); + } +#endif + + bool EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); + RAPIDJSON_ASSERT(!Base::level_stack_.template Top()->inArray); + bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; + + if (!empty) { + WriteEol(); + WriteIndent(); + } + bool ret = Base::WriteEndObject(); + (void)ret; + RAPIDJSON_ASSERT(ret == true); + if (Base::level_stack_.Empty()) // end of json text + Base::os_->Flush(); + return true; + } + + bool StartArray() { + PrettyPrefix(kArrayType); + new (Base::level_stack_.template Push()) typename Base::Level(true); + return Base::WriteStartArray(); + } + + bool EndArray(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); + RAPIDJSON_ASSERT(Base::level_stack_.template Top()->inArray); + bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; + + if (!empty && !(formatOptions_ & kFormatSingleLineArray)) { + WriteEol(); + WriteIndent(); + } + bool ret = Base::WriteEndArray(); + (void)ret; + RAPIDJSON_ASSERT(ret == true); + if (Base::level_stack_.Empty()) // end of json text + Base::os_->Flush(); + return true; + } + + //@} + + /*! @name Convenience extensions */ + //@{ + + //! Simpler but slower overload. + bool String(const Ch* str) { return String(str, internal::StrLen(str)); } + bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); } + + //@} + + //! Write a raw JSON value. + /*! + For user to write a stringified JSON as a value. + + \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range. + \param length Length of the json. + \param type Type of the root of json. + \note When using PrettyWriter::RawValue(), the result json may not be indented correctly. + */ + bool RawValue(const Ch* json, size_t length, Type type) { + RAPIDJSON_ASSERT(json != 0); + PrettyPrefix(type); + return Base::WriteRawValue(json, length); + } + +protected: + void PrettyPrefix(Type type) { + (void)type; + if (Base::level_stack_.GetSize() != 0) { // this value is not at root + typename Base::Level* level = Base::level_stack_.template Top(); + + if (level->inArray) { + if (level->valueCount > 0) { + Base::os_->Put(','); // add comma if it is not the first element in array + if (formatOptions_ & kFormatSingleLineArray) + Base::os_->Put(' '); + } + + if (!(formatOptions_ & kFormatSingleLineArray)) { + WriteEol(); + WriteIndent(); + } + } + else { // in object + if (level->valueCount > 0) { + if (level->valueCount % 2 == 0) { + Base::os_->Put(','); + WriteEol(); + } + else { + Base::os_->Put(':'); + Base::os_->Put(' '); + } + } + else + WriteEol(); + + if (level->valueCount % 2 == 0) + WriteIndent(); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else { + RAPIDJSON_ASSERT(!Base::hasRoot_); // Should only has one and only one root. + Base::hasRoot_ = true; + } + } + + void WriteIndent() { + size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_; + PutN(*Base::os_, static_cast(indentChar_), count); + } + +// NCBI +// added WriteEol + void WriteEol() { + if (eol_) { + Base::os_->Put('\n'); + } + } + + Ch indentChar_; + unsigned indentCharCount_; + PrettyFormatOptions formatOptions_; + bool eol_; + +private: + // Prohibit copy constructor & assignment operator. + PrettyWriter(const PrettyWriter&); + PrettyWriter& operator=(const PrettyWriter&); +}; + +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/rapidjson.h b/c++/include/misc/jsonwrapp/rapidjson11/rapidjson.h new file mode 100644 index 00000000..ab5d2dae --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/rapidjson.h @@ -0,0 +1,621 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_RAPIDJSON_H_ +#define RAPIDJSON_RAPIDJSON_H_ + +/*!\file rapidjson.h + \brief common definitions and configuration + + \see RAPIDJSON_CONFIG + */ + +/*! \defgroup RAPIDJSON_CONFIG RapidJSON configuration + \brief Configuration macros for library features + + Some RapidJSON features are configurable to adapt the library to a wide + variety of platforms, environments and usage scenarios. Most of the + features can be configured in terms of overriden or predefined + preprocessor macros at compile-time. + + Some additional customization is available in the \ref RAPIDJSON_ERRORS APIs. + + \note These macros should be given on the compiler command-line + (where applicable) to avoid inconsistent values when compiling + different translation units of a single application. + */ + +#include // malloc(), realloc(), free(), size_t +#include // memset(), memcpy(), memmove(), memcmp() + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_VERSION_STRING +// +// ALWAYS synchronize the following 3 macros with corresponding variables in /CMakeLists.txt. +// + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +// token stringification +#define RAPIDJSON_STRINGIFY(x) RAPIDJSON_DO_STRINGIFY(x) +#define RAPIDJSON_DO_STRINGIFY(x) #x +//!@endcond + +/*! \def RAPIDJSON_MAJOR_VERSION + \ingroup RAPIDJSON_CONFIG + \brief Major version of RapidJSON in integer. +*/ +/*! \def RAPIDJSON_MINOR_VERSION + \ingroup RAPIDJSON_CONFIG + \brief Minor version of RapidJSON in integer. +*/ +/*! \def RAPIDJSON_PATCH_VERSION + \ingroup RAPIDJSON_CONFIG + \brief Patch version of RapidJSON in integer. +*/ +/*! \def RAPIDJSON_VERSION_STRING + \ingroup RAPIDJSON_CONFIG + \brief Version of RapidJSON in ".." string format. +*/ +#define RAPIDJSON_MAJOR_VERSION 1 +#define RAPIDJSON_MINOR_VERSION 1 +#define RAPIDJSON_PATCH_VERSION 0 +#define RAPIDJSON_VERSION_STRING \ + RAPIDJSON_STRINGIFY(RAPIDJSON_MAJOR_VERSION.RAPIDJSON_MINOR_VERSION.RAPIDJSON_PATCH_VERSION) + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NAMESPACE_(BEGIN|END) +/*! \def RAPIDJSON_NAMESPACE + \ingroup RAPIDJSON_CONFIG + \brief provide custom rapidjson namespace + + In order to avoid symbol clashes and/or "One Definition Rule" errors + between multiple inclusions of (different versions of) RapidJSON in + a single binary, users can customize the name of the main RapidJSON + namespace. + + In case of a single nesting level, defining \c RAPIDJSON_NAMESPACE + to a custom name (e.g. \c MyRapidJSON) is sufficient. If multiple + levels are needed, both \ref RAPIDJSON_NAMESPACE_BEGIN and \ref + RAPIDJSON_NAMESPACE_END need to be defined as well: + + \code + // in some .cpp file + #define RAPIDJSON_NAMESPACE my::rapidjson + #define RAPIDJSON_NAMESPACE_BEGIN namespace my { namespace rapidjson { + #define RAPIDJSON_NAMESPACE_END } } + #include "rapidjson/..." + \endcode + + \see rapidjson + */ +/*! \def RAPIDJSON_NAMESPACE_BEGIN + \ingroup RAPIDJSON_CONFIG + \brief provide custom rapidjson namespace (opening expression) + \see RAPIDJSON_NAMESPACE +*/ +/*! \def RAPIDJSON_NAMESPACE_END + \ingroup RAPIDJSON_CONFIG + \brief provide custom rapidjson namespace (closing expression) + \see RAPIDJSON_NAMESPACE +*/ +#ifndef RAPIDJSON_NAMESPACE +#define RAPIDJSON_NAMESPACE rapidjson +#endif +#ifndef RAPIDJSON_NAMESPACE_BEGIN +#define RAPIDJSON_NAMESPACE_BEGIN namespace RAPIDJSON_NAMESPACE { +#endif +#ifndef RAPIDJSON_NAMESPACE_END +#define RAPIDJSON_NAMESPACE_END } +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_HAS_STDSTRING + +#ifndef RAPIDJSON_HAS_STDSTRING +#ifdef RAPIDJSON_DOXYGEN_RUNNING +#define RAPIDJSON_HAS_STDSTRING 1 // force generation of documentation +#else +#define RAPIDJSON_HAS_STDSTRING 0 // no std::string support by default +#endif +/*! \def RAPIDJSON_HAS_STDSTRING + \ingroup RAPIDJSON_CONFIG + \brief Enable RapidJSON support for \c std::string + + By defining this preprocessor symbol to \c 1, several convenience functions for using + \ref rapidjson::GenericValue with \c std::string are enabled, especially + for construction and comparison. + + \hideinitializer +*/ +#endif // !defined(RAPIDJSON_HAS_STDSTRING) + +#if RAPIDJSON_HAS_STDSTRING +#include +#endif // RAPIDJSON_HAS_STDSTRING + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NO_INT64DEFINE + +/*! \def RAPIDJSON_NO_INT64DEFINE + \ingroup RAPIDJSON_CONFIG + \brief Use external 64-bit integer types. + + RapidJSON requires the 64-bit integer types \c int64_t and \c uint64_t types + to be available at global scope. + + If users have their own definition, define RAPIDJSON_NO_INT64DEFINE to + prevent RapidJSON from defining its own types. +*/ +#ifndef RAPIDJSON_NO_INT64DEFINE +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#if defined(_MSC_VER) && (_MSC_VER < 1800) // Visual Studio 2013 +#include "msinttypes/stdint.h" +#include "msinttypes/inttypes.h" +#else +// Other compilers should have this. +#include +#include +#endif +//!@endcond +#ifdef RAPIDJSON_DOXYGEN_RUNNING +#define RAPIDJSON_NO_INT64DEFINE +#endif +#endif // RAPIDJSON_NO_INT64TYPEDEF + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_FORCEINLINE + +#ifndef RAPIDJSON_FORCEINLINE +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#if defined(_MSC_VER) && defined(NDEBUG) +#define RAPIDJSON_FORCEINLINE __forceinline +#elif defined(__GNUC__) && __GNUC__ >= 4 && defined(NDEBUG) +#define RAPIDJSON_FORCEINLINE __attribute__((always_inline)) +#else +#define RAPIDJSON_FORCEINLINE +#endif +//!@endcond +#endif // RAPIDJSON_FORCEINLINE + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ENDIAN +#define RAPIDJSON_LITTLEENDIAN 0 //!< Little endian machine +#define RAPIDJSON_BIGENDIAN 1 //!< Big endian machine + +//! Endianness of the machine. +/*! + \def RAPIDJSON_ENDIAN + \ingroup RAPIDJSON_CONFIG + + GCC 4.6 provided macro for detecting endianness of the target machine. But other + compilers may not have this. User can define RAPIDJSON_ENDIAN to either + \ref RAPIDJSON_LITTLEENDIAN or \ref RAPIDJSON_BIGENDIAN. + + Default detection implemented with reference to + \li https://gcc.gnu.org/onlinedocs/gcc-4.6.0/cpp/Common-Predefined-Macros.html + \li http://www.boost.org/doc/libs/1_42_0/boost/detail/endian.hpp +*/ +#ifndef RAPIDJSON_ENDIAN +// Detect with GCC 4.6's macro +# ifdef __BYTE_ORDER__ +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif // __BYTE_ORDER__ +// Detect with GLIBC's endian.h +# elif defined(__GLIBC__) +# include +# if (__BYTE_ORDER == __LITTLE_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif (__BYTE_ORDER == __BIG_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif // __GLIBC__ +// Detect with _LITTLE_ENDIAN and _BIG_ENDIAN macro +# elif defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +// Detect with architecture macros +# elif defined(__sparc) || defined(__sparc__) || defined(_POWER) || defined(__powerpc__) || defined(__ppc__) || defined(__hpux) || defined(__hppa) || defined(_MIPSEB) || defined(_POWER) || defined(__s390__) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# elif defined(__i386__) || defined(__alpha__) || defined(__ia64) || defined(__ia64__) || defined(_M_IX86) || defined(_M_IA64) || defined(_M_ALPHA) || defined(__amd64) || defined(__amd64__) || defined(_M_AMD64) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(__bfin__) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif defined(_MSC_VER) && defined(_M_ARM) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif defined(RAPIDJSON_DOXYGEN_RUNNING) +# define RAPIDJSON_ENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif +#endif // RAPIDJSON_ENDIAN + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_64BIT + +//! Whether using 64-bit architecture +#ifndef RAPIDJSON_64BIT +#if defined(__LP64__) || (defined(__x86_64__) && defined(__ILP32__)) || defined(_WIN64) || defined(__EMSCRIPTEN__) +#define RAPIDJSON_64BIT 1 +#else +#define RAPIDJSON_64BIT 0 +#endif +#endif // RAPIDJSON_64BIT + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ALIGN + +//! Data alignment of the machine. +/*! \ingroup RAPIDJSON_CONFIG + \param x pointer to align + + Some machines require strict data alignment. Currently the default uses 4 bytes + alignment on 32-bit platforms and 8 bytes alignment for 64-bit platforms. + User can customize by defining the RAPIDJSON_ALIGN function macro. +*/ +#ifndef RAPIDJSON_ALIGN +#if RAPIDJSON_64BIT == 1 +#define RAPIDJSON_ALIGN(x) (((x) + static_cast(7u)) & ~static_cast(7u)) +#else +#define RAPIDJSON_ALIGN(x) (((x) + 3u) & ~3u) +#endif +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_UINT64_C2 + +//! Construct a 64-bit literal by a pair of 32-bit integer. +/*! + 64-bit literal with or without ULL suffix is prone to compiler warnings. + UINT64_C() is C macro which cause compilation problems. + Use this macro to define 64-bit constants by a pair of 32-bit integer. +*/ +#ifndef RAPIDJSON_UINT64_C2 +#define RAPIDJSON_UINT64_C2(high32, low32) ((static_cast(high32) << 32) | static_cast(low32)) +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_48BITPOINTER_OPTIMIZATION + +//! Use only lower 48-bit address for some pointers. +/*! + \ingroup RAPIDJSON_CONFIG + + This optimization uses the fact that current X86-64 architecture only implement lower 48-bit virtual address. + The higher 16-bit can be used for storing other data. + \c GenericValue uses this optimization to reduce its size form 24 bytes to 16 bytes in 64-bit architecture. +*/ +#ifndef RAPIDJSON_48BITPOINTER_OPTIMIZATION +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +#define RAPIDJSON_48BITPOINTER_OPTIMIZATION 1 +#else +#define RAPIDJSON_48BITPOINTER_OPTIMIZATION 0 +#endif +#endif // RAPIDJSON_48BITPOINTER_OPTIMIZATION + +#if RAPIDJSON_48BITPOINTER_OPTIMIZATION == 1 +#if RAPIDJSON_64BIT != 1 +#error RAPIDJSON_48BITPOINTER_OPTIMIZATION can only be set to 1 when RAPIDJSON_64BIT=1 +#endif +#define RAPIDJSON_SETPOINTER(type, p, x) (p = reinterpret_cast((reinterpret_cast(p) & static_cast(RAPIDJSON_UINT64_C2(0xFFFF0000, 0x00000000))) | reinterpret_cast(reinterpret_cast(x)))) +#define RAPIDJSON_GETPOINTER(type, p) (reinterpret_cast(reinterpret_cast(p) & static_cast(RAPIDJSON_UINT64_C2(0x0000FFFF, 0xFFFFFFFF)))) +#else +#define RAPIDJSON_SETPOINTER(type, p, x) (p = (x)) +#define RAPIDJSON_GETPOINTER(type, p) (p) +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_SIMD + +/*! \def RAPIDJSON_SIMD + \ingroup RAPIDJSON_CONFIG + \brief Enable SSE2/SSE4.2 optimization. + + RapidJSON supports optimized implementations for some parsing operations + based on the SSE2 or SSE4.2 SIMD extensions on modern Intel-compatible + processors. + + To enable these optimizations, two different symbols can be defined; + \code + // Enable SSE2 optimization. + #define RAPIDJSON_SSE2 + + // Enable SSE4.2 optimization. + #define RAPIDJSON_SSE42 + \endcode + + \c RAPIDJSON_SSE42 takes precedence, if both are defined. + + If any of these symbols is defined, RapidJSON defines the macro + \c RAPIDJSON_SIMD to indicate the availability of the optimized code. +*/ +#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) \ + || defined(RAPIDJSON_DOXYGEN_RUNNING) +#define RAPIDJSON_SIMD +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NO_SIZETYPEDEFINE + +#ifndef RAPIDJSON_NO_SIZETYPEDEFINE +/*! \def RAPIDJSON_NO_SIZETYPEDEFINE + \ingroup RAPIDJSON_CONFIG + \brief User-provided \c SizeType definition. + + In order to avoid using 32-bit size types for indexing strings and arrays, + define this preprocessor symbol and provide the type rapidjson::SizeType + before including RapidJSON: + \code + #define RAPIDJSON_NO_SIZETYPEDEFINE + namespace rapidjson { typedef ::std::size_t SizeType; } + #include "rapidjson/..." + \endcode + + \see rapidjson::SizeType +*/ +#ifdef RAPIDJSON_DOXYGEN_RUNNING +#define RAPIDJSON_NO_SIZETYPEDEFINE +#endif +RAPIDJSON_NAMESPACE_BEGIN +//! Size type (for string lengths, array sizes, etc.) +/*! RapidJSON uses 32-bit array/string indices even on 64-bit platforms, + instead of using \c size_t. Users may override the SizeType by defining + \ref RAPIDJSON_NO_SIZETYPEDEFINE. +*/ +typedef unsigned SizeType; +RAPIDJSON_NAMESPACE_END +#endif + +// always import std::size_t to rapidjson namespace +RAPIDJSON_NAMESPACE_BEGIN +using std::size_t; +RAPIDJSON_NAMESPACE_END + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ASSERT + +//! Assertion. +/*! \ingroup RAPIDJSON_CONFIG + By default, rapidjson uses C \c assert() for internal assertions. + User can override it by defining RAPIDJSON_ASSERT(x) macro. + + \note Parsing errors are handled and can be customized by the + \ref RAPIDJSON_ERRORS APIs. +*/ +#ifndef RAPIDJSON_ASSERT +#include +#define RAPIDJSON_ASSERT(x) assert(x) +#endif // RAPIDJSON_ASSERT + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_STATIC_ASSERT + +// Adopt from boost +#ifndef RAPIDJSON_STATIC_ASSERT +#ifndef __clang__ +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#endif +RAPIDJSON_NAMESPACE_BEGIN +template struct STATIC_ASSERTION_FAILURE; +template <> struct STATIC_ASSERTION_FAILURE { enum { value = 1 }; }; +template struct StaticAssertTest {}; +RAPIDJSON_NAMESPACE_END + +#define RAPIDJSON_JOIN(X, Y) RAPIDJSON_DO_JOIN(X, Y) +#define RAPIDJSON_DO_JOIN(X, Y) RAPIDJSON_DO_JOIN2(X, Y) +#define RAPIDJSON_DO_JOIN2(X, Y) X##Y + +#if defined(__GNUC__) +#define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE __attribute__((unused)) +#else +#define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE +#endif +#ifndef __clang__ +//!@endcond +#endif + +/*! \def RAPIDJSON_STATIC_ASSERT + \brief (Internal) macro to check for conditions at compile-time + \param x compile-time condition + \hideinitializer + */ +#define RAPIDJSON_STATIC_ASSERT(x) \ + typedef ::RAPIDJSON_NAMESPACE::StaticAssertTest< \ + sizeof(::RAPIDJSON_NAMESPACE::STATIC_ASSERTION_FAILURE)> \ + RAPIDJSON_JOIN(StaticAssertTypedef, __LINE__) RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_LIKELY, RAPIDJSON_UNLIKELY + +//! Compiler branching hint for expression with high probability to be true. +/*! + \ingroup RAPIDJSON_CONFIG + \param x Boolean expression likely to be true. +*/ +#ifndef RAPIDJSON_LIKELY +#if defined(__GNUC__) || defined(__clang__) +#define RAPIDJSON_LIKELY(x) __builtin_expect(!!(x), 1) +#else +#define RAPIDJSON_LIKELY(x) (x) +#endif +#endif + +//! Compiler branching hint for expression with low probability to be true. +/*! + \ingroup RAPIDJSON_CONFIG + \param x Boolean expression unlikely to be true. +*/ +#ifndef RAPIDJSON_UNLIKELY +#if defined(__GNUC__) || defined(__clang__) +#define RAPIDJSON_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +#define RAPIDJSON_UNLIKELY(x) (x) +#endif +#endif + +/////////////////////////////////////////////////////////////////////////////// +// Helpers + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN + +#define RAPIDJSON_MULTILINEMACRO_BEGIN do { +#define RAPIDJSON_MULTILINEMACRO_END \ +} while((void)0, 0) + +// adopted from Boost +#define RAPIDJSON_VERSION_CODE(x,y,z) \ + (((x)*100000) + ((y)*100) + (z)) + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_DIAG_PUSH/POP, RAPIDJSON_DIAG_OFF + +#if defined(__GNUC__) +#define RAPIDJSON_GNUC \ + RAPIDJSON_VERSION_CODE(__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__) +#endif + +#if defined(__clang__) || (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,2,0)) + +#define RAPIDJSON_PRAGMA(x) _Pragma(RAPIDJSON_STRINGIFY(x)) +#define RAPIDJSON_DIAG_PRAGMA(x) RAPIDJSON_PRAGMA(GCC diagnostic x) +#define RAPIDJSON_DIAG_OFF(x) \ + RAPIDJSON_DIAG_PRAGMA(ignored RAPIDJSON_STRINGIFY(RAPIDJSON_JOIN(-W,x))) + +// push/pop support in Clang and GCC>=4.6 +#if defined(__clang__) || (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) +#define RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PRAGMA(push) +#define RAPIDJSON_DIAG_POP RAPIDJSON_DIAG_PRAGMA(pop) +#else // GCC >= 4.2, < 4.6 +#define RAPIDJSON_DIAG_PUSH /* ignored */ +#define RAPIDJSON_DIAG_POP /* ignored */ +#endif + +#elif defined(_MSC_VER) + +// pragma (MSVC specific) +#define RAPIDJSON_PRAGMA(x) __pragma(x) +#define RAPIDJSON_DIAG_PRAGMA(x) RAPIDJSON_PRAGMA(warning(x)) + +#define RAPIDJSON_DIAG_OFF(x) RAPIDJSON_DIAG_PRAGMA(disable: x) +#define RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PRAGMA(push) +#define RAPIDJSON_DIAG_POP RAPIDJSON_DIAG_PRAGMA(pop) + +#else + +#define RAPIDJSON_DIAG_OFF(x) /* ignored */ +#define RAPIDJSON_DIAG_PUSH /* ignored */ +#define RAPIDJSON_DIAG_POP /* ignored */ + +#endif // RAPIDJSON_DIAG_* + +// NCBI +#if NCBI_COMPILER_ICC +#undef RAPIDJSON_DIAG_OFF +#define RAPIDJSON_DIAG_OFF(x) +#endif + +/////////////////////////////////////////////////////////////////////////////// +// C++11 features + +#ifndef RAPIDJSON_HAS_CXX11_RVALUE_REFS +#if defined(__clang__) +#if __has_feature(cxx_rvalue_references) && \ + (defined(_LIBCPP_VERSION) || defined(__GLIBCXX__) && __GLIBCXX__ >= 20080306) +#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1 +#else +#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 0 +#endif +#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,3,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \ + (defined(_MSC_VER) && _MSC_VER >= 1600) + +#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1 +#else +#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 0 +#endif +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + +#ifndef RAPIDJSON_HAS_CXX11_NOEXCEPT +#if defined(__clang__) +#define RAPIDJSON_HAS_CXX11_NOEXCEPT __has_feature(cxx_noexcept) +#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) +// (defined(_MSC_VER) && _MSC_VER >= ????) // not yet supported +#define RAPIDJSON_HAS_CXX11_NOEXCEPT 1 +#else +#define RAPIDJSON_HAS_CXX11_NOEXCEPT 0 +#endif +#endif +#if RAPIDJSON_HAS_CXX11_NOEXCEPT +#define RAPIDJSON_NOEXCEPT noexcept +#else +#define RAPIDJSON_NOEXCEPT /* noexcept */ +#endif // RAPIDJSON_HAS_CXX11_NOEXCEPT + +// no automatic detection, yet +#ifndef RAPIDJSON_HAS_CXX11_TYPETRAITS +#define RAPIDJSON_HAS_CXX11_TYPETRAITS 0 +#endif + +#ifndef RAPIDJSON_HAS_CXX11_RANGE_FOR +#if defined(__clang__) +#define RAPIDJSON_HAS_CXX11_RANGE_FOR __has_feature(cxx_range_for) +#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \ + (defined(_MSC_VER) && _MSC_VER >= 1700) +#define RAPIDJSON_HAS_CXX11_RANGE_FOR 1 +#else +#define RAPIDJSON_HAS_CXX11_RANGE_FOR 0 +#endif +#endif // RAPIDJSON_HAS_CXX11_RANGE_FOR + +//!@endcond + +/////////////////////////////////////////////////////////////////////////////// +// new/delete + +#ifndef RAPIDJSON_NEW +///! customization point for global \c new +#define RAPIDJSON_NEW(TypeName) new TypeName +#endif +#ifndef RAPIDJSON_DELETE +///! customization point for global \c delete +#define RAPIDJSON_DELETE(x) delete x +#endif + +/////////////////////////////////////////////////////////////////////////////// +// Type + +/*! \namespace rapidjson + \brief main RapidJSON namespace + \see RAPIDJSON_NAMESPACE +*/ +RAPIDJSON_NAMESPACE_BEGIN + +//! Type of JSON value +enum Type { + kNullType = 0, //!< null + kFalseType = 1, //!< false + kTrueType = 2, //!< true + kObjectType = 3, //!< object + kArrayType = 4, //!< array + kStringType = 5, //!< string + kNumberType = 6 //!< number +}; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/reader.h b/c++/include/misc/jsonwrapp/rapidjson11/reader.h new file mode 100644 index 00000000..dbb5e16f --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/reader.h @@ -0,0 +1,1864 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_READER_H_ +#define RAPIDJSON_READER_H_ + +/*! \file reader.h */ + +#include "allocators.h" +#include "stream.h" +#include "encodedstream.h" +#include "internal/meta.h" +#include "internal/stack.h" +#include "internal/strtod.h" +#include + +#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER) +#include +#pragma intrinsic(_BitScanForward) +#endif +#ifdef RAPIDJSON_SSE42 +#include +#elif defined(RAPIDJSON_SSE2) +#include +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +RAPIDJSON_DIAG_OFF(4702) // unreachable code +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(old-style-cast) +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(switch-enum) +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define RAPIDJSON_NOTHING /* deliberately empty */ +#ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN +#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \ + RAPIDJSON_MULTILINEMACRO_END +#endif +#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \ + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING) +//!@endcond + +/*! \def RAPIDJSON_PARSE_ERROR_NORETURN + \ingroup RAPIDJSON_ERRORS + \brief Macro to indicate a parse error. + \param parseErrorCode \ref rapidjson::ParseErrorCode of the error + \param offset position of the error in JSON input (\c size_t) + + This macros can be used as a customization point for the internal + error handling mechanism of RapidJSON. + + A common usage model is to throw an exception instead of requiring the + caller to explicitly check the \ref rapidjson::GenericReader::Parse's + return value: + + \code + #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \ + throw ParseException(parseErrorCode, #parseErrorCode, offset) + + #include // std::runtime_error + #include "rapidjson/error/error.h" // rapidjson::ParseResult + + struct ParseException : std::runtime_error, rapidjson::ParseResult { + ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset) + : std::runtime_error(msg), ParseResult(code, offset) {} + }; + + #include "rapidjson/reader.h" + \endcode + + \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse + */ +#ifndef RAPIDJSON_PARSE_ERROR_NORETURN +#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \ + SetParseError(parseErrorCode, offset); \ + RAPIDJSON_MULTILINEMACRO_END +#endif + +/*! \def RAPIDJSON_PARSE_ERROR + \ingroup RAPIDJSON_ERRORS + \brief (Internal) macro to indicate and handle a parse error. + \param parseErrorCode \ref rapidjson::ParseErrorCode of the error + \param offset position of the error in JSON input (\c size_t) + + Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing. + + \see RAPIDJSON_PARSE_ERROR_NORETURN + \hideinitializer + */ +#ifndef RAPIDJSON_PARSE_ERROR +#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \ + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \ + RAPIDJSON_MULTILINEMACRO_END +#endif + +#include "error/error.h" // ParseErrorCode, ParseResult + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// ParseFlag + +/*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS + \ingroup RAPIDJSON_CONFIG + \brief User-defined kParseDefaultFlags definition. + + User can define this as any \c ParseFlag combinations. +*/ +#ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS +#define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags +#endif + +//! Combination of parseFlags +/*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream + */ +enum ParseFlag { + kParseNoFlags = 0, //!< No flags are set. + kParseInsituFlag = 1, //!< In-situ(destructive) parsing. + kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings. + kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing. + kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error. + kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower). + kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments. + kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings. + kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays. + kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles. + kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS +}; + +/////////////////////////////////////////////////////////////////////////////// +// Handler + +/*! \class rapidjson::Handler + \brief Concept for receiving events from GenericReader upon parsing. + The functions return true if no error occurs. If they return false, + the event publisher should terminate the process. +\code +concept Handler { + typename Ch; + + bool Null(); + bool Bool(bool b); + bool Int(int i); + bool Uint(unsigned i); + bool Int64(int64_t i); + bool Uint64(uint64_t i); + bool Double(double d); + /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) + bool RawNumber(const Ch* str, SizeType length, bool copy); + bool String(const Ch* str, SizeType length, bool copy); + bool StartObject(); + bool Key(const Ch* str, SizeType length, bool copy); + bool EndObject(SizeType memberCount); + bool StartArray(); + bool EndArray(SizeType elementCount); +}; +\endcode +*/ +/////////////////////////////////////////////////////////////////////////////// +// BaseReaderHandler + +//! Default implementation of Handler. +/*! This can be used as base class of any reader handler. + \note implements Handler concept +*/ +template, typename Derived = void> +struct BaseReaderHandler { + typedef typename Encoding::Ch Ch; + + typedef typename internal::SelectIf, BaseReaderHandler, Derived>::Type Override; + + bool Default() { return true; } + bool Null() { return static_cast(*this).Default(); } + bool Bool(bool) { return static_cast(*this).Default(); } + bool Int(int) { return static_cast(*this).Default(); } + bool Uint(unsigned) { return static_cast(*this).Default(); } + bool Int64(int64_t) { return static_cast(*this).Default(); } + bool Uint64(uint64_t) { return static_cast(*this).Default(); } + bool Double(double) { return static_cast(*this).Default(); } + /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) + bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast(*this).String(str, len, copy); } + bool String(const Ch*, SizeType, bool) { return static_cast(*this).Default(); } + bool StartObject() { return static_cast(*this).Default(); } + bool Key(const Ch* str, SizeType len, bool copy) { return static_cast(*this).String(str, len, copy); } + bool EndObject(SizeType) { return static_cast(*this).Default(); } + bool StartArray() { return static_cast(*this).Default(); } + bool EndArray(SizeType) { return static_cast(*this).Default(); } +}; + +/////////////////////////////////////////////////////////////////////////////// +// StreamLocalCopy + +namespace internal { + +template::copyOptimization> +class StreamLocalCopy; + +//! Do copy optimization. +template +class StreamLocalCopy { +public: + StreamLocalCopy(Stream& original) : s(original), original_(original) {} + ~StreamLocalCopy() { original_ = s; } + + Stream s; + +private: + StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; + + Stream& original_; +}; + +//! Keep reference. +template +class StreamLocalCopy { +public: + StreamLocalCopy(Stream& original) : s(original) {} + + Stream& s; + +private: + StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; +}; + +} // namespace internal + +/////////////////////////////////////////////////////////////////////////////// +// SkipWhitespace + +//! Skip the JSON white spaces in a stream. +/*! \param is A input stream for skipping white spaces. + \note This function has SSE2/SSE4.2 specialization. +*/ +template +void SkipWhitespace(InputStream& is) { + internal::StreamLocalCopy copy(is); + InputStream& s(copy.s); + + typename InputStream::Ch c; + while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t') + s.Take(); +} + +inline const char* SkipWhitespace(const char* p, const char* end) { + while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + return p; +} + +#ifdef RAPIDJSON_SSE42 +//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + // Fast return for single non-whitespace + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // 16-byte align to the next boundary + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // The rest of string using SIMD + static const char whitespace[16] = " \n\r\t"; + const __m128i w = _mm_loadu_si128(reinterpret_cast(&whitespace[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); + if (r != 16) // some of characters is non-whitespace + return p + r; + } +} + +inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { + // Fast return for single non-whitespace + if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + else + return p; + + // The middle of string using SIMD + static const char whitespace[16] = " \n\r\t"; + const __m128i w = _mm_loadu_si128(reinterpret_cast(&whitespace[0])); + + for (; p <= end - 16; p += 16) { + const __m128i s = _mm_loadu_si128(reinterpret_cast(p)); + const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); + if (r != 16) // some of characters is non-whitespace + return p + r; + } + + return SkipWhitespace(p, end); +} + +#elif defined(RAPIDJSON_SSE2) + +//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + // Fast return for single non-whitespace + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // 16-byte align to the next boundary + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // The rest of string + #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } + static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; + #undef C16 + + const __m128i w0 = _mm_loadu_si128(reinterpret_cast(&whitespaces[0][0])); + const __m128i w1 = _mm_loadu_si128(reinterpret_cast(&whitespaces[1][0])); + const __m128i w2 = _mm_loadu_si128(reinterpret_cast(&whitespaces[2][0])); + const __m128i w3 = _mm_loadu_si128(reinterpret_cast(&whitespaces[3][0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + __m128i x = _mm_cmpeq_epi8(s, w0); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); + unsigned short r = static_cast(~_mm_movemask_epi8(x)); + if (r != 0) { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + _BitScanForward(&offset, r); + return p + offset; +#else + return p + __builtin_ffs(r) - 1; +#endif + } + } +} + +inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { + // Fast return for single non-whitespace + if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + else + return p; + + // The rest of string + #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } + static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; + #undef C16 + + const __m128i w0 = _mm_loadu_si128(reinterpret_cast(&whitespaces[0][0])); + const __m128i w1 = _mm_loadu_si128(reinterpret_cast(&whitespaces[1][0])); + const __m128i w2 = _mm_loadu_si128(reinterpret_cast(&whitespaces[2][0])); + const __m128i w3 = _mm_loadu_si128(reinterpret_cast(&whitespaces[3][0])); + + for (; p <= end - 16; p += 16) { + const __m128i s = _mm_loadu_si128(reinterpret_cast(p)); + __m128i x = _mm_cmpeq_epi8(s, w0); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); + unsigned short r = static_cast(~_mm_movemask_epi8(x)); + if (r != 0) { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + _BitScanForward(&offset, r); + return p + offset; +#else + return p + __builtin_ffs(r) - 1; +#endif + } + } + + return SkipWhitespace(p, end); +} + +#endif // RAPIDJSON_SSE2 + +#ifdef RAPIDJSON_SIMD +//! Template function specialization for InsituStringStream +template<> inline void SkipWhitespace(InsituStringStream& is) { + is.src_ = const_cast(SkipWhitespace_SIMD(is.src_)); +} + +//! Template function specialization for StringStream +template<> inline void SkipWhitespace(StringStream& is) { + is.src_ = SkipWhitespace_SIMD(is.src_); +} + +template<> inline void SkipWhitespace(EncodedInputStream, MemoryStream>& is) { + is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_); +} +#endif // RAPIDJSON_SIMD + +/////////////////////////////////////////////////////////////////////////////// +// GenericReader + +//! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator. +/*! GenericReader parses JSON text from a stream, and send events synchronously to an + object implementing Handler concept. + + It needs to allocate a stack for storing a single decoded string during + non-destructive parsing. + + For in-situ parsing, the decoded string is directly written to the source + text string, no temporary buffer is required. + + A GenericReader object can be reused for parsing multiple JSON text. + + \tparam SourceEncoding Encoding of the input stream. + \tparam TargetEncoding Encoding of the parse output. + \tparam StackAllocator Allocator type for stack. +*/ +template +class GenericReader { +public: + typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type + + //! Constructor. + /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) + \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) + */ + GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {} + + //! Parse JSON text. + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam InputStream Type of input stream, implementing Stream concept. + \tparam Handler Type of handler, implementing Handler concept. + \param is Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template + ParseResult Parse(InputStream& is, Handler& handler) { + if (parseFlags & kParseIterativeFlag) + return IterativeParse(is, handler); + + parseResult_.Clear(); + + ClearStackOnExit scope(*this); + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + + if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell()); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + } + else { + ParseValue(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + + if (!(parseFlags & kParseStopWhenDoneFlag)) { + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + + if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell()); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + } + } + } + + return parseResult_; + } + + //! Parse JSON text (with \ref kParseDefaultFlags) + /*! \tparam InputStream Type of input stream, implementing Stream concept + \tparam Handler Type of handler, implementing Handler concept. + \param is Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template + ParseResult Parse(InputStream& is, Handler& handler) { + return Parse(is, handler); + } + + //! Whether a parse error has occured in the last parsing. + bool HasParseError() const { return parseResult_.IsError(); } + + //! Get the \ref ParseErrorCode of last parsing. + ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); } + + //! Get the position of last parsing error in input, 0 otherwise. + size_t GetErrorOffset() const { return parseResult_.Offset(); } + +protected: + void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); } + +private: + // Prohibit copy constructor & assignment operator. + GenericReader(const GenericReader&); + GenericReader& operator=(const GenericReader&); + + void ClearStack() { stack_.Clear(); } + + // clear stack on any exit from ParseStream, e.g. due to exception + struct ClearStackOnExit { + explicit ClearStackOnExit(GenericReader& r) : r_(r) {} + ~ClearStackOnExit() { r_.ClearStack(); } + private: + GenericReader& r_; + ClearStackOnExit(const ClearStackOnExit&); + ClearStackOnExit& operator=(const ClearStackOnExit&); + }; + + template + void SkipWhitespaceAndComments(InputStream& is) { + SkipWhitespace(is); + + if (parseFlags & kParseCommentsFlag) { + while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) { + if (Consume(is, '*')) { + while (true) { + if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) + RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); + else if (Consume(is, '*')) { + if (Consume(is, '/')) + break; + } + else + is.Take(); + } + } + else if (RAPIDJSON_LIKELY(Consume(is, '/'))) + while (is.Peek() != '\0' && is.Take() != '\n') {} + else + RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); + + SkipWhitespace(is); + } + } + } + + // Parse object: { string : value, ... } + template + void ParseObject(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == '{'); + is.Take(); // Skip '{' + + if (RAPIDJSON_UNLIKELY(!handler.StartObject())) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (Consume(is, '}')) { + if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + + for (SizeType memberCount = 0;;) { + if (RAPIDJSON_UNLIKELY(is.Peek() != '"')) + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); + + ParseString(is, handler, true); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (RAPIDJSON_UNLIKELY(!Consume(is, ':'))) + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + ParseValue(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + ++memberCount; + + switch (is.Peek()) { + case ',': + is.Take(); + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + break; + case '}': + is.Take(); + if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + default: + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy + } + + if (parseFlags & kParseTrailingCommasFlag) { + if (is.Peek() == '}') { + if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + is.Take(); + return; + } + } + } + } + + // Parse array: [ value, ... ] + template + void ParseArray(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == '['); + is.Take(); // Skip '[' + + if (RAPIDJSON_UNLIKELY(!handler.StartArray())) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (Consume(is, ']')) { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + + for (SizeType elementCount = 0;;) { + ParseValue(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + ++elementCount; + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (Consume(is, ',')) { + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + } + else if (Consume(is, ']')) { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); + + if (parseFlags & kParseTrailingCommasFlag) { + if (is.Peek() == ']') { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + is.Take(); + return; + } + } + } + } + + template + void ParseNull(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 'n'); + is.Take(); + + if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) { + if (RAPIDJSON_UNLIKELY(!handler.Null())) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); + } + + template + void ParseTrue(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 't'); + is.Take(); + + if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) { + if (RAPIDJSON_UNLIKELY(!handler.Bool(true))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); + } + + template + void ParseFalse(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 'f'); + is.Take(); + + if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) { + if (RAPIDJSON_UNLIKELY(!handler.Bool(false))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); + } + + template + RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) { + if (RAPIDJSON_LIKELY(is.Peek() == expect)) { + is.Take(); + return true; + } + else + return false; + } + + // Helper function to parse four hexidecimal digits in \uXXXX in ParseString(). + template + unsigned ParseHex4(InputStream& is, size_t escapeOffset) { + unsigned codepoint = 0; + for (int i = 0; i < 4; i++) { + Ch c = is.Peek(); + codepoint <<= 4; + codepoint += static_cast(c); + if (c >= '0' && c <= '9') + codepoint -= '0'; + else if (c >= 'A' && c <= 'F') + codepoint -= 'A' - 10; + else if (c >= 'a' && c <= 'f') + codepoint -= 'a' - 10; + else { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0); + } + is.Take(); + } + return codepoint; + } + + template + class StackStream { + public: + typedef CharType Ch; + + StackStream(internal::Stack& stack) : stack_(stack), length_(0) {} + RAPIDJSON_FORCEINLINE void Put(Ch c) { + *stack_.template Push() = c; + ++length_; + } + + RAPIDJSON_FORCEINLINE void* Push(SizeType count) { + length_ += count; + return stack_.template Push(count); + } + + size_t Length() const { return length_; } + + Ch* Pop() { + return stack_.template Pop(length_); + } + + private: + StackStream(const StackStream&); + StackStream& operator=(const StackStream&); + + internal::Stack& stack_; + SizeType length_; + }; + + // Parse string and generate String event. Different code paths for kParseInsituFlag. + template + void ParseString(InputStream& is, Handler& handler, bool isKey = false) { + internal::StreamLocalCopy copy(is); + InputStream& s(copy.s); + + RAPIDJSON_ASSERT(s.Peek() == '\"'); + s.Take(); // Skip '\"' + + bool success = false; + if (parseFlags & kParseInsituFlag) { + typename InputStream::Ch *head = s.PutBegin(); + ParseStringToStream(s, s); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + size_t length = s.PutEnd(head) - 1; + RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); + const typename TargetEncoding::Ch* const str = reinterpret_cast(head); + success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false)); + } + else { + StackStream stackStream(stack_); + ParseStringToStream(s, stackStream); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + SizeType length = static_cast(stackStream.Length()) - 1; + const typename TargetEncoding::Ch* const str = stackStream.Pop(); + success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true)); + } + if (RAPIDJSON_UNLIKELY(!success)) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell()); + } + + // Parse string to an output is + // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation. + template + RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) { +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + static const char escape[256] = { + Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/', + Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, + 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, + 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 + }; +#undef Z16 +//!@endcond + + for (;;) { + // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation. + if (!(parseFlags & kParseValidateEncodingFlag)) + ScanCopyUnescapedString(is, os); + + Ch c = is.Peek(); + if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape + size_t escapeOffset = is.Tell(); // For invalid escaping, report the inital '\\' as error offset + is.Take(); + Ch e = is.Peek(); + if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast(e)])) { + is.Take(); + os.Put(static_cast(escape[static_cast(e)])); + } + else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode + is.Take(); + unsigned codepoint = ParseHex4(is, escapeOffset); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) { + // Handle UTF-16 surrogate pair + if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + unsigned codepoint2 = ParseHex4(is, escapeOffset); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; + } + TEncoding::Encode(os, codepoint); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset); + } + else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote + is.Take(); + os.Put('\0'); // null-terminate the string + return; + } + else if (RAPIDJSON_UNLIKELY(static_cast(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + if (c == '\0') + RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell()); + else + RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell()); + } + else { + size_t offset = is.Tell(); + if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ? + !Transcoder::Validate(is, os) : + !Transcoder::Transcode(is, os)))) + RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset); + } + } + } + + template + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) { + // Do nothing for generic version + } + +#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) + // StringStream -> StackStream + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream& os) { + const char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) { + is.src_ = p; + return; + } + else + os.Put(*p++); + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast(&space[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + SizeType length; + #ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; + #else + length = static_cast(__builtin_ffs(r) - 1); + #endif + if (length != 0) { + char* q = reinterpret_cast(os.Push(length)); + for (size_t i = 0; i < length; i++) + q[i] = p[i]; + + p += length; + } + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s); + } + + is.src_ = p; + } + + // InsituStringStream -> InsituStringStream + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { + RAPIDJSON_ASSERT(&is == &os); + (void)os; + + if (is.src_ == is.dst_) { + SkipUnescapedString(is); + return; + } + + char* p = is.src_; + char *q = is.dst_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) { + is.src_ = p; + is.dst_ = q; + return; + } + else + *q++ = *p++; + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast(&space[0])); + + for (;; p += 16, q += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + size_t length; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; +#else + length = static_cast(__builtin_ffs(r) - 1); +#endif + for (const char* pend = p + length; p != pend; ) + *q++ = *p++; + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s); + } + + is.src_ = p; + is.dst_ = q; + } + + // When read/write pointers are the same for insitu stream, just skip unescaped characters + static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { + RAPIDJSON_ASSERT(is.src_ == is.dst_); + char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + for (; p != nextAligned; p++) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) { + is.src_ = is.dst_ = p; + return; + } + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast(&space[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + size_t length; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; +#else + length = static_cast(__builtin_ffs(r) - 1); +#endif + p += length; + break; + } + } + + is.src_ = is.dst_ = p; + } +#endif + + template + class NumberStream; + + template + class NumberStream { + public: + typedef typename InputStream::Ch Ch; + + NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; } + + RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); } + RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); } + RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); } + RAPIDJSON_FORCEINLINE void Push(char) {} + + size_t Tell() { return is.Tell(); } + size_t Length() { return 0; } + const char* Pop() { return 0; } + + protected: + NumberStream& operator=(const NumberStream&); + + InputStream& is; + }; + + template + class NumberStream : public NumberStream { + typedef NumberStream Base; + public: + NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {} + + RAPIDJSON_FORCEINLINE Ch TakePush() { + stackStream.Put(static_cast(Base::is.Peek())); + return Base::is.Take(); + } + + RAPIDJSON_FORCEINLINE void Push(char c) { + stackStream.Put(c); + } + + size_t Length() { return stackStream.Length(); } + + const char* Pop() { + stackStream.Put('\0'); + return stackStream.Pop(); + } + + private: + StackStream stackStream; + }; + + template + class NumberStream : public NumberStream { + typedef NumberStream Base; + public: + NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {} + + RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); } + }; + + template + void ParseNumber(InputStream& is, Handler& handler) { + internal::StreamLocalCopy copy(is); + NumberStream s(*this, copy.s); + + size_t startOffset = s.Tell(); + double d = 0.0; + bool useNanOrInf = false; + + // Parse minus + bool minus = Consume(s, '-'); + + // Parse int: zero / ( digit1-9 *DIGIT ) + unsigned i = 0; + uint64_t i64 = 0; + bool use64bit = false; + int significandDigit = 0; + if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) { + i = 0; + s.TakePush(); + } + else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) { + i = static_cast(s.TakePush() - '0'); + + if (minus) + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648 + if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) { + i64 = i; + use64bit = true; + break; + } + } + i = i * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } + else + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295 + if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) { + i64 = i; + use64bit = true; + break; + } + } + i = i * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } + } + // Parse NaN or Infinity here + else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) { + useNanOrInf = true; + if (RAPIDJSON_LIKELY(Consume(s, 'N') && Consume(s, 'a') && Consume(s, 'N'))) { + d = std::numeric_limits::quiet_NaN(); + } + else if (RAPIDJSON_LIKELY(Consume(s, 'I') && Consume(s, 'n') && Consume(s, 'f'))) { + d = (minus ? -std::numeric_limits::infinity() : std::numeric_limits::infinity()); + if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n') + && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + + // Parse 64bit int + bool useDouble = false; + if (use64bit) { + if (minus) + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808 + if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) { + d = static_cast(i64); + useDouble = true; + break; + } + i64 = i64 * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } + else + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615 + if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) { + d = static_cast(i64); + useDouble = true; + break; + } + i64 = i64 * 10 + static_cast(s.TakePush() - '0'); + significandDigit++; + } + } + + // Force double for big integer + if (useDouble) { + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(d >= 1.7976931348623157e307)) // DBL_MAX / 10.0 + RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); + d = d * 10 + (s.TakePush() - '0'); + } + } + + // Parse frac = decimal-point 1*DIGIT + int expFrac = 0; + size_t decimalPosition; + if (Consume(s, '.')) { + decimalPosition = s.Length(); + + if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9'))) + RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell()); + + if (!useDouble) { +#if RAPIDJSON_64BIT + // Use i64 to store significand in 64-bit architecture + if (!use64bit) + i64 = i; + + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path + break; + else { + i64 = i64 * 10 + static_cast(s.TakePush() - '0'); + --expFrac; + if (i64 != 0) + significandDigit++; + } + } + + d = static_cast(i64); +#else + // Use double to store significand in 32-bit architecture + d = static_cast(use64bit ? i64 : i); +#endif + useDouble = true; + } + + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (significandDigit < 17) { + d = d * 10.0 + (s.TakePush() - '0'); + --expFrac; + if (RAPIDJSON_LIKELY(d > 0.0)) + significandDigit++; + } + else + s.TakePush(); + } + } + else + decimalPosition = s.Length(); // decimal position at the end of integer. + + // Parse exp = e [ minus / plus ] 1*DIGIT + int exp = 0; + if (Consume(s, 'e') || Consume(s, 'E')) { + if (!useDouble) { + d = static_cast(use64bit ? i64 : i); + useDouble = true; + } + + bool expMinus = false; + if (Consume(s, '+')) + ; + else if (Consume(s, '-')) + expMinus = true; + + if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = static_cast(s.Take() - '0'); + if (expMinus) { + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = exp * 10 + static_cast(s.Take() - '0'); + if (exp >= 214748364) { // Issue #313: prevent overflow exponent + while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent + s.Take(); + } + } + } + else { // positive exp + int maxExp = 308 - expFrac; + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = exp * 10 + static_cast(s.Take() - '0'); + if (RAPIDJSON_UNLIKELY(exp > maxExp)) + RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); + } + } + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell()); + + if (expMinus) + exp = -exp; + } + + // Finish parsing, call event according to the type of number. + bool cont = true; + + if (parseFlags & kParseNumbersAsStringsFlag) { + if (parseFlags & kParseInsituFlag) { + s.Pop(); // Pop stack no matter if it will be used or not. + typename InputStream::Ch* head = is.PutBegin(); + const size_t length = s.Tell() - startOffset; + RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); + // unable to insert the \0 character here, it will erase the comma after this number + const typename TargetEncoding::Ch* const str = reinterpret_cast(head); + cont = handler.RawNumber(str, SizeType(length), false); + } + else { + SizeType numCharsToCopy = static_cast(s.Length()); + StringStream srcStream(s.Pop()); + StackStream dstStream(stack_); + while (numCharsToCopy--) { + Transcoder, TargetEncoding>::Transcode(srcStream, dstStream); + } + dstStream.Put('\0'); + const typename TargetEncoding::Ch* str = dstStream.Pop(); + const SizeType length = static_cast(dstStream.Length()) - 1; + cont = handler.RawNumber(str, SizeType(length), true); + } + } + else { + size_t length = s.Length(); + const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not. + + if (useDouble) { + int p = exp + expFrac; + if (parseFlags & kParseFullPrecisionFlag) + d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp); + else + d = internal::StrtodNormalPrecision(d, p); + + cont = handler.Double(minus ? -d : d); + } + else if (useNanOrInf) { + cont = handler.Double(d); + } + else { + if (use64bit) { + if (minus) + cont = handler.Int64(static_cast(~i64 + 1)); + else + cont = handler.Uint64(i64); + } + else { + if (minus) + cont = handler.Int(static_cast(~i + 1)); + else + cont = handler.Uint(i); + } + } + } + if (RAPIDJSON_UNLIKELY(!cont)) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset); + } + + // Parse any JSON value + template + void ParseValue(InputStream& is, Handler& handler) { + switch (is.Peek()) { + case 'n': ParseNull (is, handler); break; + case 't': ParseTrue (is, handler); break; + case 'f': ParseFalse (is, handler); break; + case '"': ParseString(is, handler); break; + case '{': ParseObject(is, handler); break; + case '[': ParseArray (is, handler); break; + default : + ParseNumber(is, handler); + break; + + } + } + + // Iterative Parsing + + // States + enum IterativeParsingState { + IterativeParsingStartState = 0, + IterativeParsingFinishState, + IterativeParsingErrorState, + + // Object states + IterativeParsingObjectInitialState, + IterativeParsingMemberKeyState, + IterativeParsingKeyValueDelimiterState, + IterativeParsingMemberValueState, + IterativeParsingMemberDelimiterState, + IterativeParsingObjectFinishState, + + // Array states + IterativeParsingArrayInitialState, + IterativeParsingElementState, + IterativeParsingElementDelimiterState, + IterativeParsingArrayFinishState, + + // Single value state + IterativeParsingValueState + }; + + enum { cIterativeParsingStateCount = IterativeParsingValueState + 1 }; + + // Tokens + enum Token { + LeftBracketToken = 0, + RightBracketToken, + + LeftCurlyBracketToken, + RightCurlyBracketToken, + + CommaToken, + ColonToken, + + StringToken, + FalseToken, + TrueToken, + NullToken, + NumberToken, + + kTokenCount + }; + + RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) { + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define N NumberToken +#define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N + // Maps from ASCII to Token + static const unsigned char tokenMap[256] = { + N16, // 00~0F + N16, // 10~1F + N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F + N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F + N16, // 40~4F + N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F + N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F + N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F + N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF + }; +#undef N +#undef N16 +//!@endcond + + if (sizeof(Ch) == 1 || static_cast(c) < 256) + return static_cast(tokenMap[static_cast(c)]); + else + return NumberToken; + } + + RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) { + // current state x one lookahead token -> new state + static const char G[cIterativeParsingStateCount][kTokenCount] = { + // Start + { + IterativeParsingArrayInitialState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingValueState, // String + IterativeParsingValueState, // False + IterativeParsingValueState, // True + IterativeParsingValueState, // Null + IterativeParsingValueState // Number + }, + // Finish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // Error(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // ObjectInitial + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberKeyState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // MemberKey + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingKeyValueDelimiterState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // KeyValueDelimiter + { + IterativeParsingArrayInitialState, // Left bracket(push MemberValue state) + IterativeParsingErrorState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberValueState, // String + IterativeParsingMemberValueState, // False + IterativeParsingMemberValueState, // True + IterativeParsingMemberValueState, // Null + IterativeParsingMemberValueState // Number + }, + // MemberValue + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingMemberDelimiterState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // MemberDelimiter + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberKeyState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // ObjectFinish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // ArrayInitial + { + IterativeParsingArrayInitialState, // Left bracket(push Element state) + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push Element state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingElementState, // String + IterativeParsingElementState, // False + IterativeParsingElementState, // True + IterativeParsingElementState, // Null + IterativeParsingElementState // Number + }, + // Element + { + IterativeParsingErrorState, // Left bracket + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingElementDelimiterState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // ElementDelimiter + { + IterativeParsingArrayInitialState, // Left bracket(push Element state) + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push Element state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingElementState, // String + IterativeParsingElementState, // False + IterativeParsingElementState, // True + IterativeParsingElementState, // Null + IterativeParsingElementState // Number + }, + // ArrayFinish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // Single Value (sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + } + }; // End of G + + return static_cast(G[state][token]); + } + + // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). + // May return a new state on state pop. + template + RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) { + (void)token; + + switch (dst) { + case IterativeParsingErrorState: + return dst; + + case IterativeParsingObjectInitialState: + case IterativeParsingArrayInitialState: + { + // Push the state(Element or MemeberValue) if we are nested in another array or value of member. + // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop. + IterativeParsingState n = src; + if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState) + n = IterativeParsingElementState; + else if (src == IterativeParsingKeyValueDelimiterState) + n = IterativeParsingMemberValueState; + // Push current state. + *stack_.template Push(1) = n; + // Initialize and push the member/element count. + *stack_.template Push(1) = 0; + // Call handler + bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray(); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return dst; + } + } + + case IterativeParsingMemberKeyState: + ParseString(is, handler, true); + if (HasParseError()) + return IterativeParsingErrorState; + else + return dst; + + case IterativeParsingKeyValueDelimiterState: + RAPIDJSON_ASSERT(token == ColonToken); + is.Take(); + return dst; + + case IterativeParsingMemberValueState: + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return dst; + + case IterativeParsingElementState: + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return dst; + + case IterativeParsingMemberDelimiterState: + case IterativeParsingElementDelimiterState: + is.Take(); + // Update member/element count. + *stack_.template Top() = *stack_.template Top() + 1; + return dst; + + case IterativeParsingObjectFinishState: + { + // Transit from delimiter is only allowed when trailing commas are enabled + if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); + return IterativeParsingErrorState; + } + // Get member count. + SizeType c = *stack_.template Pop(1); + // If the object is not empty, count the last member. + if (src == IterativeParsingMemberValueState) + ++c; + // Restore the state. + IterativeParsingState n = static_cast(*stack_.template Pop(1)); + // Transit to Finish state if this is the topmost scope. + if (n == IterativeParsingStartState) + n = IterativeParsingFinishState; + // Call handler + bool hr = handler.EndObject(c); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return n; + } + } + + case IterativeParsingArrayFinishState: + { + // Transit from delimiter is only allowed when trailing commas are enabled + if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell()); + return IterativeParsingErrorState; + } + // Get element count. + SizeType c = *stack_.template Pop(1); + // If the array is not empty, count the last element. + if (src == IterativeParsingElementState) + ++c; + // Restore the state. + IterativeParsingState n = static_cast(*stack_.template Pop(1)); + // Transit to Finish state if this is the topmost scope. + if (n == IterativeParsingStartState) + n = IterativeParsingFinishState; + // Call handler + bool hr = handler.EndArray(c); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return n; + } + } + + default: + // This branch is for IterativeParsingValueState actually. + // Use `default:` rather than + // `case IterativeParsingValueState:` is for code coverage. + + // The IterativeParsingStartState is not enumerated in this switch-case. + // It is impossible for that case. And it can be caught by following assertion. + + // The IterativeParsingFinishState is not enumerated in this switch-case either. + // It is a "derivative" state which cannot triggered from Predict() directly. + // Therefore it cannot happen here. And it can be caught by following assertion. + RAPIDJSON_ASSERT(dst == IterativeParsingValueState); + + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return IterativeParsingFinishState; + } + } + + template + void HandleError(IterativeParsingState src, InputStream& is) { + if (HasParseError()) { + // Error flag has been set. + return; + } + + switch (src) { + case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return; + case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return; + case IterativeParsingObjectInitialState: + case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return; + case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return; + case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return; + case IterativeParsingKeyValueDelimiterState: + case IterativeParsingArrayInitialState: + case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return; + default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return; + } + } + + template + ParseResult IterativeParse(InputStream& is, Handler& handler) { + parseResult_.Clear(); + ClearStackOnExit scope(*this); + IterativeParsingState state = IterativeParsingStartState; + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + while (is.Peek() != '\0') { + Token t = Tokenize(is.Peek()); + IterativeParsingState n = Predict(state, t); + IterativeParsingState d = Transit(state, t, n, is, handler); + + if (d == IterativeParsingErrorState) { + HandleError(state, is); + break; + } + + state = d; + + // Do not further consume streams if a root JSON has been parsed. + if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState) + break; + + SkipWhitespaceAndComments(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + } + + // Handle the end of file. + if (state != IterativeParsingFinishState) + HandleError(state, is); + + return parseResult_; + } + + static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. + internal::Stack stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. + ParseResult parseResult_; +}; // class GenericReader + +//! Reader with UTF8 encoding and default allocator. +typedef GenericReader, UTF8<> > Reader; + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_READER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/schema.h b/c++/include/misc/jsonwrapp/rapidjson11/schema.h new file mode 100644 index 00000000..20a9287e --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/schema.h @@ -0,0 +1,2053 @@ +// Tencent is pleased to support the open source community by making RapidJSON available-> +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip-> All rights reserved-> +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License-> You may obtain a copy of the License at +// +// http://opensource->org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied-> See the License for the +// specific language governing permissions and limitations under the License-> + +#ifndef RAPIDJSON_SCHEMA_H_ +#define RAPIDJSON_SCHEMA_H_ + +#include "document.h" +#include "pointer.h" +#include // abs, floor + +#if !defined(RAPIDJSON_SCHEMA_USE_INTERNALREGEX) +#define RAPIDJSON_SCHEMA_USE_INTERNALREGEX 1 +#else +#define RAPIDJSON_SCHEMA_USE_INTERNALREGEX 0 +#endif + +#if !RAPIDJSON_SCHEMA_USE_INTERNALREGEX && !defined(RAPIDJSON_SCHEMA_USE_STDREGEX) && (__cplusplus >=201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)) +#define RAPIDJSON_SCHEMA_USE_STDREGEX 1 +#else +#define RAPIDJSON_SCHEMA_USE_STDREGEX 0 +#endif + +#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX +#include "internal/regex.h" +#elif RAPIDJSON_SCHEMA_USE_STDREGEX +#include +#endif + +#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX || RAPIDJSON_SCHEMA_USE_STDREGEX +#define RAPIDJSON_SCHEMA_HAS_REGEX 1 +#else +#define RAPIDJSON_SCHEMA_HAS_REGEX 0 +#endif + +#ifndef RAPIDJSON_SCHEMA_VERBOSE +#define RAPIDJSON_SCHEMA_VERBOSE 0 +#endif + +#if RAPIDJSON_SCHEMA_VERBOSE +#include "stringbuffer.h" +#endif + +RAPIDJSON_DIAG_PUSH + +#if defined(__GNUC__) +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_OFF(weak-vtables) +RAPIDJSON_DIAG_OFF(exit-time-destructors) +RAPIDJSON_DIAG_OFF(c++98-compat-pedantic) +RAPIDJSON_DIAG_OFF(variadic-macros) +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Verbose Utilities + +#if RAPIDJSON_SCHEMA_VERBOSE + +namespace internal { + +inline void PrintInvalidKeyword(const char* keyword) { + printf("Fail keyword: %s\n", keyword); +} + +inline void PrintInvalidKeyword(const wchar_t* keyword) { + wprintf(L"Fail keyword: %ls\n", keyword); +} + +inline void PrintInvalidDocument(const char* document) { + printf("Fail document: %s\n\n", document); +} + +inline void PrintInvalidDocument(const wchar_t* document) { + wprintf(L"Fail document: %ls\n\n", document); +} + +inline void PrintValidatorPointers(unsigned depth, const char* s, const char* d) { + printf("S: %*s%s\nD: %*s%s\n\n", depth * 4, " ", s, depth * 4, " ", d); +} + +inline void PrintValidatorPointers(unsigned depth, const wchar_t* s, const wchar_t* d) { + wprintf(L"S: %*ls%ls\nD: %*ls%ls\n\n", depth * 4, L" ", s, depth * 4, L" ", d); +} + +} // namespace internal + +#endif // RAPIDJSON_SCHEMA_VERBOSE + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_INVALID_KEYWORD_RETURN + +#if RAPIDJSON_SCHEMA_VERBOSE +#define RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword) internal::PrintInvalidKeyword(keyword) +#else +#define RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword) +#endif + +#define RAPIDJSON_INVALID_KEYWORD_RETURN(keyword)\ +RAPIDJSON_MULTILINEMACRO_BEGIN\ + context.invalidKeyword = keyword.GetString();\ + RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword.GetString());\ + return false;\ +RAPIDJSON_MULTILINEMACRO_END + +/////////////////////////////////////////////////////////////////////////////// +// Forward declarations + +template +class GenericSchemaDocument; + +namespace internal { + +template +class Schema; + +/////////////////////////////////////////////////////////////////////////////// +// ISchemaValidator + +class ISchemaValidator { +public: + virtual ~ISchemaValidator() {} + virtual bool IsValid() const = 0; +}; + +/////////////////////////////////////////////////////////////////////////////// +// ISchemaStateFactory + +template +class ISchemaStateFactory { +public: + virtual ~ISchemaStateFactory() {} + virtual ISchemaValidator* CreateSchemaValidator(const SchemaType&) = 0; + virtual void DestroySchemaValidator(ISchemaValidator* validator) = 0; + virtual void* CreateHasher() = 0; + virtual uint64_t GetHashCode(void* hasher) = 0; + virtual void DestroryHasher(void* hasher) = 0; + virtual void* MallocState(size_t size) = 0; + virtual void FreeState(void* p) = 0; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Hasher + +// For comparison of compound value +template +class Hasher { +public: + typedef typename Encoding::Ch Ch; + + Hasher(Allocator* allocator = 0, size_t stackCapacity = kDefaultSize) : stack_(allocator, stackCapacity) {} + + bool Null() { return WriteType(kNullType); } + bool Bool(bool b) { return WriteType(b ? kTrueType : kFalseType); } + bool Int(int i) { Number n; n.u.i = i; n.d = static_cast(i); return WriteNumber(n); } + bool Uint(unsigned u) { Number n; n.u.u = u; n.d = static_cast(u); return WriteNumber(n); } + bool Int64(int64_t i) { Number n; n.u.i = i; n.d = static_cast(i); return WriteNumber(n); } + bool Uint64(uint64_t u) { Number n; n.u.u = u; n.d = static_cast(u); return WriteNumber(n); } + bool Double(double d) { + Number n; + if (d < 0) n.u.i = static_cast(d); + else n.u.u = static_cast(d); + n.d = d; + return WriteNumber(n); + } + + bool RawNumber(const Ch* str, SizeType len, bool) { + WriteBuffer(kNumberType, str, len * sizeof(Ch)); + return true; + } + + bool String(const Ch* str, SizeType len, bool) { + WriteBuffer(kStringType, str, len * sizeof(Ch)); + return true; + } + + bool StartObject() { return true; } + bool Key(const Ch* str, SizeType len, bool copy) { return String(str, len, copy); } + bool EndObject(SizeType memberCount) { + uint64_t h = Hash(0, kObjectType); + uint64_t* kv = stack_.template Pop(memberCount * 2); + for (SizeType i = 0; i < memberCount; i++) + h ^= Hash(kv[i * 2], kv[i * 2 + 1]); // Use xor to achieve member order insensitive + *stack_.template Push() = h; + return true; + } + + bool StartArray() { return true; } + bool EndArray(SizeType elementCount) { + uint64_t h = Hash(0, kArrayType); + uint64_t* e = stack_.template Pop(elementCount); + for (SizeType i = 0; i < elementCount; i++) + h = Hash(h, e[i]); // Use hash to achieve element order sensitive + *stack_.template Push() = h; + return true; + } + + bool IsValid() const { return stack_.GetSize() == sizeof(uint64_t); } + + uint64_t GetHashCode() const { + RAPIDJSON_ASSERT(IsValid()); + return *stack_.template Top(); + } + +private: + static const size_t kDefaultSize = 256; + struct Number { + union U { + uint64_t u; + int64_t i; + }u; + double d; + }; + + bool WriteType(Type type) { return WriteBuffer(type, 0, 0); } + + bool WriteNumber(const Number& n) { return WriteBuffer(kNumberType, &n, sizeof(n)); } + + bool WriteBuffer(Type type, const void* data, size_t len) { + // FNV-1a from http://isthe.com/chongo/tech/comp/fnv/ + uint64_t h = Hash(RAPIDJSON_UINT64_C2(0x84222325, 0xcbf29ce4), type); + const unsigned char* d = static_cast(data); + for (size_t i = 0; i < len; i++) + h = Hash(h, d[i]); + *stack_.template Push() = h; + return true; + } + + static uint64_t Hash(uint64_t h, uint64_t d) { + static const uint64_t kPrime = RAPIDJSON_UINT64_C2(0x00000100, 0x000001b3); + h ^= d; + h *= kPrime; + return h; + } + + Stack stack_; +}; + +/////////////////////////////////////////////////////////////////////////////// +// SchemaValidationContext + +template +struct SchemaValidationContext { + typedef Schema SchemaType; + typedef ISchemaStateFactory SchemaValidatorFactoryType; + typedef typename SchemaType::ValueType ValueType; + typedef typename ValueType::Ch Ch; + + enum PatternValidatorType { + kPatternValidatorOnly, + kPatternValidatorWithProperty, + kPatternValidatorWithAdditionalProperty + }; + + SchemaValidationContext(SchemaValidatorFactoryType& f, const SchemaType* s) : + factory(f), + schema(s), + valueSchema(), + invalidKeyword(), + hasher(), + arrayElementHashCodes(), + validators(), + validatorCount(), + patternPropertiesValidators(), + patternPropertiesValidatorCount(), + patternPropertiesSchemas(), + patternPropertiesSchemaCount(), + valuePatternValidatorType(kPatternValidatorOnly), + propertyExist(), + inArray(false), + valueUniqueness(false), + arrayUniqueness(false) + { + } + + ~SchemaValidationContext() { + if (hasher) + factory.DestroryHasher(hasher); + if (validators) { + for (SizeType i = 0; i < validatorCount; i++) + factory.DestroySchemaValidator(validators[i]); + factory.FreeState(validators); + } + if (patternPropertiesValidators) { + for (SizeType i = 0; i < patternPropertiesValidatorCount; i++) + factory.DestroySchemaValidator(patternPropertiesValidators[i]); + factory.FreeState(patternPropertiesValidators); + } + if (patternPropertiesSchemas) + factory.FreeState(patternPropertiesSchemas); + if (propertyExist) + factory.FreeState(propertyExist); + } + + SchemaValidatorFactoryType& factory; + const SchemaType* schema; + const SchemaType* valueSchema; + const Ch* invalidKeyword; + void* hasher; // Only validator access + void* arrayElementHashCodes; // Only validator access this + ISchemaValidator** validators; + SizeType validatorCount; + ISchemaValidator** patternPropertiesValidators; + SizeType patternPropertiesValidatorCount; + const SchemaType** patternPropertiesSchemas; + SizeType patternPropertiesSchemaCount; + PatternValidatorType valuePatternValidatorType; + PatternValidatorType objectPatternValidatorType; + SizeType arrayElementIndex; + bool* propertyExist; + bool inArray; + bool valueUniqueness; + bool arrayUniqueness; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Schema + +template +class Schema { +public: + typedef typename SchemaDocumentType::ValueType ValueType; + typedef typename SchemaDocumentType::AllocatorType AllocatorType; + typedef typename SchemaDocumentType::PointerType PointerType; + typedef typename ValueType::EncodingType EncodingType; + typedef typename EncodingType::Ch Ch; + typedef SchemaValidationContext Context; + typedef Schema SchemaType; + typedef GenericValue SValue; + friend class GenericSchemaDocument; + + Schema(SchemaDocumentType* schemaDocument, const PointerType& p, const ValueType& value, const ValueType& document, AllocatorType* allocator) : + allocator_(allocator), + typeless_(schemaDocument->GetTypeless()), + enum_(), + enumCount_(), + not_(), + type_((1 << kTotalSchemaType) - 1), // typeless + validatorCount_(), + properties_(), + additionalPropertiesSchema_(), + patternProperties_(), + patternPropertyCount_(), + propertyCount_(), + minProperties_(), + maxProperties_(SizeType(~0)), + additionalProperties_(true), + hasDependencies_(), + hasRequired_(), + hasSchemaDependencies_(), + additionalItemsSchema_(), + itemsList_(), + itemsTuple_(), + itemsTupleCount_(), + minItems_(), + maxItems_(SizeType(~0)), + additionalItems_(true), + uniqueItems_(false), + pattern_(), + minLength_(0), + maxLength_(~SizeType(0)), + exclusiveMinimum_(false), + exclusiveMaximum_(false) + { + typedef typename SchemaDocumentType::ValueType ValueType; + typedef typename ValueType::ConstValueIterator ConstValueIterator; + typedef typename ValueType::ConstMemberIterator ConstMemberIterator; + + if (!value.IsObject()) + return; + + if (const ValueType* v = GetMember(value, GetTypeString())) { + type_ = 0; + if (v->IsString()) + AddType(*v); + else if (v->IsArray()) + for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr) + AddType(*itr); + } + + if (const ValueType* v = GetMember(value, GetEnumString())) + if (v->IsArray() && v->Size() > 0) { + enum_ = static_cast(allocator_->Malloc(sizeof(uint64_t) * v->Size())); + for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr) { + typedef Hasher > EnumHasherType; + char buffer[256 + 24]; + MemoryPoolAllocator<> hasherAllocator(buffer, sizeof(buffer)); + EnumHasherType h(&hasherAllocator, 256); + itr->Accept(h); + enum_[enumCount_++] = h.GetHashCode(); + } + } + + if (schemaDocument) { + AssignIfExist(allOf_, *schemaDocument, p, value, GetAllOfString(), document); + AssignIfExist(anyOf_, *schemaDocument, p, value, GetAnyOfString(), document); + AssignIfExist(oneOf_, *schemaDocument, p, value, GetOneOfString(), document); + } + + if (const ValueType* v = GetMember(value, GetNotString())) { + schemaDocument->CreateSchema(¬_, p.Append(GetNotString(), allocator_), *v, document); + notValidatorIndex_ = validatorCount_; + validatorCount_++; + } + + // Object + + const ValueType* properties = GetMember(value, GetPropertiesString()); + const ValueType* required = GetMember(value, GetRequiredString()); + const ValueType* dependencies = GetMember(value, GetDependenciesString()); + { + // Gather properties from properties/required/dependencies + SValue allProperties(kArrayType); + + if (properties && properties->IsObject()) + for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr) + AddUniqueElement(allProperties, itr->name); + + if (required && required->IsArray()) + for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr) + if (itr->IsString()) + AddUniqueElement(allProperties, *itr); + + if (dependencies && dependencies->IsObject()) + for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) { + AddUniqueElement(allProperties, itr->name); + if (itr->value.IsArray()) + for (ConstValueIterator i = itr->value.Begin(); i != itr->value.End(); ++i) + if (i->IsString()) + AddUniqueElement(allProperties, *i); + } + + if (allProperties.Size() > 0) { + propertyCount_ = allProperties.Size(); + properties_ = static_cast(allocator_->Malloc(sizeof(Property) * propertyCount_)); + for (SizeType i = 0; i < propertyCount_; i++) { + new (&properties_[i]) Property(); + properties_[i].name = allProperties[i]; + properties_[i].schema = typeless_; + } + } + } + + if (properties && properties->IsObject()) { + PointerType q = p.Append(GetPropertiesString(), allocator_); + for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr) { + SizeType index; + if (FindPropertyIndex(itr->name, &index)) + schemaDocument->CreateSchema(&properties_[index].schema, q.Append(itr->name, allocator_), itr->value, document); + } + } + + if (const ValueType* v = GetMember(value, GetPatternPropertiesString())) { + PointerType q = p.Append(GetPatternPropertiesString(), allocator_); + patternProperties_ = static_cast(allocator_->Malloc(sizeof(PatternProperty) * v->MemberCount())); + patternPropertyCount_ = 0; + + for (ConstMemberIterator itr = v->MemberBegin(); itr != v->MemberEnd(); ++itr) { + new (&patternProperties_[patternPropertyCount_]) PatternProperty(); + patternProperties_[patternPropertyCount_].pattern = CreatePattern(itr->name); + schemaDocument->CreateSchema(&patternProperties_[patternPropertyCount_].schema, q.Append(itr->name, allocator_), itr->value, document); + patternPropertyCount_++; + } + } + + if (required && required->IsArray()) + for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr) + if (itr->IsString()) { + SizeType index; + if (FindPropertyIndex(*itr, &index)) { + properties_[index].required = true; + hasRequired_ = true; + } + } + + if (dependencies && dependencies->IsObject()) { + PointerType q = p.Append(GetDependenciesString(), allocator_); + hasDependencies_ = true; + for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) { + SizeType sourceIndex; + if (FindPropertyIndex(itr->name, &sourceIndex)) { + if (itr->value.IsArray()) { + properties_[sourceIndex].dependencies = static_cast(allocator_->Malloc(sizeof(bool) * propertyCount_)); + std::memset(properties_[sourceIndex].dependencies, 0, sizeof(bool)* propertyCount_); + for (ConstValueIterator targetItr = itr->value.Begin(); targetItr != itr->value.End(); ++targetItr) { + SizeType targetIndex; + if (FindPropertyIndex(*targetItr, &targetIndex)) + properties_[sourceIndex].dependencies[targetIndex] = true; + } + } + else if (itr->value.IsObject()) { + hasSchemaDependencies_ = true; + schemaDocument->CreateSchema(&properties_[sourceIndex].dependenciesSchema, q.Append(itr->name, allocator_), itr->value, document); + properties_[sourceIndex].dependenciesValidatorIndex = validatorCount_; + validatorCount_++; + } + } + } + } + + if (const ValueType* v = GetMember(value, GetAdditionalPropertiesString())) { + if (v->IsBool()) + additionalProperties_ = v->GetBool(); + else if (v->IsObject()) + schemaDocument->CreateSchema(&additionalPropertiesSchema_, p.Append(GetAdditionalPropertiesString(), allocator_), *v, document); + } + + AssignIfExist(minProperties_, value, GetMinPropertiesString()); + AssignIfExist(maxProperties_, value, GetMaxPropertiesString()); + + // Array + if (const ValueType* v = GetMember(value, GetItemsString())) { + PointerType q = p.Append(GetItemsString(), allocator_); + if (v->IsObject()) // List validation + schemaDocument->CreateSchema(&itemsList_, q, *v, document); + else if (v->IsArray()) { // Tuple validation + itemsTuple_ = static_cast(allocator_->Malloc(sizeof(const Schema*) * v->Size())); + SizeType index = 0; + for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr, index++) + schemaDocument->CreateSchema(&itemsTuple_[itemsTupleCount_++], q.Append(index, allocator_), *itr, document); + } + } + + AssignIfExist(minItems_, value, GetMinItemsString()); + AssignIfExist(maxItems_, value, GetMaxItemsString()); + + if (const ValueType* v = GetMember(value, GetAdditionalItemsString())) { + if (v->IsBool()) + additionalItems_ = v->GetBool(); + else if (v->IsObject()) + schemaDocument->CreateSchema(&additionalItemsSchema_, p.Append(GetAdditionalItemsString(), allocator_), *v, document); + } + + AssignIfExist(uniqueItems_, value, GetUniqueItemsString()); + + // String + AssignIfExist(minLength_, value, GetMinLengthString()); + AssignIfExist(maxLength_, value, GetMaxLengthString()); + + if (const ValueType* v = GetMember(value, GetPatternString())) + pattern_ = CreatePattern(*v); + + // Number + if (const ValueType* v = GetMember(value, GetMinimumString())) + if (v->IsNumber()) + minimum_.CopyFrom(*v, *allocator_); + + if (const ValueType* v = GetMember(value, GetMaximumString())) + if (v->IsNumber()) + maximum_.CopyFrom(*v, *allocator_); + + AssignIfExist(exclusiveMinimum_, value, GetExclusiveMinimumString()); + AssignIfExist(exclusiveMaximum_, value, GetExclusiveMaximumString()); + + if (const ValueType* v = GetMember(value, GetMultipleOfString())) + if (v->IsNumber() && v->GetDouble() > 0.0) + multipleOf_.CopyFrom(*v, *allocator_); + } + + ~Schema() { + AllocatorType::Free(enum_); + if (properties_) { + for (SizeType i = 0; i < propertyCount_; i++) + properties_[i].~Property(); + AllocatorType::Free(properties_); + } + if (patternProperties_) { + for (SizeType i = 0; i < patternPropertyCount_; i++) + patternProperties_[i].~PatternProperty(); + AllocatorType::Free(patternProperties_); + } + AllocatorType::Free(itemsTuple_); +#if RAPIDJSON_SCHEMA_HAS_REGEX + if (pattern_) { + pattern_->~RegexType(); + AllocatorType::Free(pattern_); + } +#endif + } + + bool BeginValue(Context& context) const { + if (context.inArray) { + if (uniqueItems_) + context.valueUniqueness = true; + + if (itemsList_) + context.valueSchema = itemsList_; + else if (itemsTuple_) { + if (context.arrayElementIndex < itemsTupleCount_) + context.valueSchema = itemsTuple_[context.arrayElementIndex]; + else if (additionalItemsSchema_) + context.valueSchema = additionalItemsSchema_; + else if (additionalItems_) + context.valueSchema = typeless_; + else + RAPIDJSON_INVALID_KEYWORD_RETURN(GetItemsString()); + } + else + context.valueSchema = typeless_; + + context.arrayElementIndex++; + } + return true; + } + + RAPIDJSON_FORCEINLINE bool EndValue(Context& context) const { + if (context.patternPropertiesValidatorCount > 0) { + bool otherValid = false; + SizeType count = context.patternPropertiesValidatorCount; + if (context.objectPatternValidatorType != Context::kPatternValidatorOnly) + otherValid = context.patternPropertiesValidators[--count]->IsValid(); + + bool patternValid = true; + for (SizeType i = 0; i < count; i++) + if (!context.patternPropertiesValidators[i]->IsValid()) { + patternValid = false; + break; + } + + if (context.objectPatternValidatorType == Context::kPatternValidatorOnly) { + if (!patternValid) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString()); + } + else if (context.objectPatternValidatorType == Context::kPatternValidatorWithProperty) { + if (!patternValid || !otherValid) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString()); + } + else if (!patternValid && !otherValid) // kPatternValidatorWithAdditionalProperty) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString()); + } + + if (enum_) { + const uint64_t h = context.factory.GetHashCode(context.hasher); + for (SizeType i = 0; i < enumCount_; i++) + if (enum_[i] == h) + goto foundEnum; + RAPIDJSON_INVALID_KEYWORD_RETURN(GetEnumString()); + foundEnum:; + } + + if (allOf_.schemas) + for (SizeType i = allOf_.begin; i < allOf_.begin + allOf_.count; i++) + if (!context.validators[i]->IsValid()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetAllOfString()); + + if (anyOf_.schemas) { + for (SizeType i = anyOf_.begin; i < anyOf_.begin + anyOf_.count; i++) + if (context.validators[i]->IsValid()) + goto foundAny; + RAPIDJSON_INVALID_KEYWORD_RETURN(GetAnyOfString()); + foundAny:; + } + + if (oneOf_.schemas) { + bool oneValid = false; + for (SizeType i = oneOf_.begin; i < oneOf_.begin + oneOf_.count; i++) + if (context.validators[i]->IsValid()) { + if (oneValid) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetOneOfString()); + else + oneValid = true; + } + if (!oneValid) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetOneOfString()); + } + + if (not_ && context.validators[notValidatorIndex_]->IsValid()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetNotString()); + + return true; + } + + bool Null(Context& context) const { + if (!(type_ & (1 << kNullSchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + return CreateParallelValidator(context); + } + + bool Bool(Context& context, bool) const { + if (!(type_ & (1 << kBooleanSchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + return CreateParallelValidator(context); + } + + bool Int(Context& context, int i) const { + if (!CheckInt(context, i)) + return false; + return CreateParallelValidator(context); + } + + bool Uint(Context& context, unsigned u) const { + if (!CheckUint(context, u)) + return false; + return CreateParallelValidator(context); + } + + bool Int64(Context& context, int64_t i) const { + if (!CheckInt(context, i)) + return false; + return CreateParallelValidator(context); + } + + bool Uint64(Context& context, uint64_t u) const { + if (!CheckUint(context, u)) + return false; + return CreateParallelValidator(context); + } + + bool Double(Context& context, double d) const { + if (!(type_ & (1 << kNumberSchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + if (!minimum_.IsNull() && !CheckDoubleMinimum(context, d)) + return false; + + if (!maximum_.IsNull() && !CheckDoubleMaximum(context, d)) + return false; + + if (!multipleOf_.IsNull() && !CheckDoubleMultipleOf(context, d)) + return false; + + return CreateParallelValidator(context); + } + + bool String(Context& context, const Ch* str, SizeType length, bool) const { + if (!(type_ & (1 << kStringSchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + if (minLength_ != 0 || maxLength_ != SizeType(~0)) { + SizeType count; + if (internal::CountStringCodePoint(str, length, &count)) { + if (count < minLength_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinLengthString()); + if (count > maxLength_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxLengthString()); + } + } + + if (pattern_ && !IsPatternMatch(pattern_, str, length)) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternString()); + + return CreateParallelValidator(context); + } + + bool StartObject(Context& context) const { + if (!(type_ & (1 << kObjectSchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + if (hasDependencies_ || hasRequired_) { + context.propertyExist = static_cast(context.factory.MallocState(sizeof(bool) * propertyCount_)); + std::memset(context.propertyExist, 0, sizeof(bool) * propertyCount_); + } + + if (patternProperties_) { // pre-allocate schema array + SizeType count = patternPropertyCount_ + 1; // extra for valuePatternValidatorType + context.patternPropertiesSchemas = static_cast(context.factory.MallocState(sizeof(const SchemaType*) * count)); + context.patternPropertiesSchemaCount = 0; + std::memset(context.patternPropertiesSchemas, 0, sizeof(SchemaType*) * count); + } + + return CreateParallelValidator(context); + } + + bool Key(Context& context, const Ch* str, SizeType len, bool) const { + if (patternProperties_) { + context.patternPropertiesSchemaCount = 0; + for (SizeType i = 0; i < patternPropertyCount_; i++) + if (patternProperties_[i].pattern && IsPatternMatch(patternProperties_[i].pattern, str, len)) + context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = patternProperties_[i].schema; + } + + SizeType index; + if (FindPropertyIndex(ValueType(str, len).Move(), &index)) { + if (context.patternPropertiesSchemaCount > 0) { + context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = properties_[index].schema; + context.valueSchema = typeless_; + context.valuePatternValidatorType = Context::kPatternValidatorWithProperty; + } + else + context.valueSchema = properties_[index].schema; + + if (context.propertyExist) + context.propertyExist[index] = true; + + return true; + } + + if (additionalPropertiesSchema_) { + if (additionalPropertiesSchema_ && context.patternPropertiesSchemaCount > 0) { + context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = additionalPropertiesSchema_; + context.valueSchema = typeless_; + context.valuePatternValidatorType = Context::kPatternValidatorWithAdditionalProperty; + } + else + context.valueSchema = additionalPropertiesSchema_; + return true; + } + else if (additionalProperties_) { + context.valueSchema = typeless_; + return true; + } + + if (context.patternPropertiesSchemaCount == 0) // patternProperties are not additional properties + RAPIDJSON_INVALID_KEYWORD_RETURN(GetAdditionalPropertiesString()); + + return true; + } + + bool EndObject(Context& context, SizeType memberCount) const { + if (hasRequired_) + for (SizeType index = 0; index < propertyCount_; index++) + if (properties_[index].required) + if (!context.propertyExist[index]) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetRequiredString()); + + if (memberCount < minProperties_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinPropertiesString()); + + if (memberCount > maxProperties_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxPropertiesString()); + + if (hasDependencies_) { + for (SizeType sourceIndex = 0; sourceIndex < propertyCount_; sourceIndex++) + if (context.propertyExist[sourceIndex]) { + if (properties_[sourceIndex].dependencies) { + for (SizeType targetIndex = 0; targetIndex < propertyCount_; targetIndex++) + if (properties_[sourceIndex].dependencies[targetIndex] && !context.propertyExist[targetIndex]) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetDependenciesString()); + } + else if (properties_[sourceIndex].dependenciesSchema) + if (!context.validators[properties_[sourceIndex].dependenciesValidatorIndex]->IsValid()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetDependenciesString()); + } + } + + return true; + } + + bool StartArray(Context& context) const { + if (!(type_ & (1 << kArraySchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + context.arrayElementIndex = 0; + context.inArray = true; + + return CreateParallelValidator(context); + } + + bool EndArray(Context& context, SizeType elementCount) const { + context.inArray = false; + + if (elementCount < minItems_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinItemsString()); + + if (elementCount > maxItems_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxItemsString()); + + return true; + } + + // Generate functions for string literal according to Ch +#define RAPIDJSON_STRING_(name, ...) \ + static const ValueType& Get##name##String() {\ + static const Ch s[] = { __VA_ARGS__, '\0' };\ + static const ValueType v(s, sizeof(s) / sizeof(Ch) - 1);\ + return v;\ + } + + RAPIDJSON_STRING_(Null, 'n', 'u', 'l', 'l') + RAPIDJSON_STRING_(Boolean, 'b', 'o', 'o', 'l', 'e', 'a', 'n') + RAPIDJSON_STRING_(Object, 'o', 'b', 'j', 'e', 'c', 't') + RAPIDJSON_STRING_(Array, 'a', 'r', 'r', 'a', 'y') + RAPIDJSON_STRING_(String, 's', 't', 'r', 'i', 'n', 'g') + RAPIDJSON_STRING_(Number, 'n', 'u', 'm', 'b', 'e', 'r') + RAPIDJSON_STRING_(Integer, 'i', 'n', 't', 'e', 'g', 'e', 'r') + RAPIDJSON_STRING_(Type, 't', 'y', 'p', 'e') + RAPIDJSON_STRING_(Enum, 'e', 'n', 'u', 'm') + RAPIDJSON_STRING_(AllOf, 'a', 'l', 'l', 'O', 'f') + RAPIDJSON_STRING_(AnyOf, 'a', 'n', 'y', 'O', 'f') + RAPIDJSON_STRING_(OneOf, 'o', 'n', 'e', 'O', 'f') + RAPIDJSON_STRING_(Not, 'n', 'o', 't') + RAPIDJSON_STRING_(Properties, 'p', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's') + RAPIDJSON_STRING_(Required, 'r', 'e', 'q', 'u', 'i', 'r', 'e', 'd') + RAPIDJSON_STRING_(Dependencies, 'd', 'e', 'p', 'e', 'n', 'd', 'e', 'n', 'c', 'i', 'e', 's') + RAPIDJSON_STRING_(PatternProperties, 'p', 'a', 't', 't', 'e', 'r', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's') + RAPIDJSON_STRING_(AdditionalProperties, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's') + RAPIDJSON_STRING_(MinProperties, 'm', 'i', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's') + RAPIDJSON_STRING_(MaxProperties, 'm', 'a', 'x', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's') + RAPIDJSON_STRING_(Items, 'i', 't', 'e', 'm', 's') + RAPIDJSON_STRING_(MinItems, 'm', 'i', 'n', 'I', 't', 'e', 'm', 's') + RAPIDJSON_STRING_(MaxItems, 'm', 'a', 'x', 'I', 't', 'e', 'm', 's') + RAPIDJSON_STRING_(AdditionalItems, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'I', 't', 'e', 'm', 's') + RAPIDJSON_STRING_(UniqueItems, 'u', 'n', 'i', 'q', 'u', 'e', 'I', 't', 'e', 'm', 's') + RAPIDJSON_STRING_(MinLength, 'm', 'i', 'n', 'L', 'e', 'n', 'g', 't', 'h') + RAPIDJSON_STRING_(MaxLength, 'm', 'a', 'x', 'L', 'e', 'n', 'g', 't', 'h') + RAPIDJSON_STRING_(Pattern, 'p', 'a', 't', 't', 'e', 'r', 'n') + RAPIDJSON_STRING_(Minimum, 'm', 'i', 'n', 'i', 'm', 'u', 'm') + RAPIDJSON_STRING_(Maximum, 'm', 'a', 'x', 'i', 'm', 'u', 'm') + RAPIDJSON_STRING_(ExclusiveMinimum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'i', 'n', 'i', 'm', 'u', 'm') + RAPIDJSON_STRING_(ExclusiveMaximum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'a', 'x', 'i', 'm', 'u', 'm') + RAPIDJSON_STRING_(MultipleOf, 'm', 'u', 'l', 't', 'i', 'p', 'l', 'e', 'O', 'f') + +#undef RAPIDJSON_STRING_ + +private: + enum SchemaValueType { + kNullSchemaType, + kBooleanSchemaType, + kObjectSchemaType, + kArraySchemaType, + kStringSchemaType, + kNumberSchemaType, + kIntegerSchemaType, + kTotalSchemaType + }; + +#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX + typedef internal::GenericRegex RegexType; +#elif RAPIDJSON_SCHEMA_USE_STDREGEX + typedef std::basic_regex RegexType; +#else + typedef char RegexType; +#endif + + struct SchemaArray { + SchemaArray() : schemas(), count() {} + ~SchemaArray() { AllocatorType::Free(schemas); } + const SchemaType** schemas; + SizeType begin; // begin index of context.validators + SizeType count; + }; + + template + void AddUniqueElement(V1& a, const V2& v) { + for (typename V1::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr) + if (*itr == v) + return; + V1 c(v, *allocator_); + a.PushBack(c, *allocator_); + } + + static const ValueType* GetMember(const ValueType& value, const ValueType& name) { + typename ValueType::ConstMemberIterator itr = value.FindMember(name); + return itr != value.MemberEnd() ? &(itr->value) : 0; + } + + static void AssignIfExist(bool& out, const ValueType& value, const ValueType& name) { + if (const ValueType* v = GetMember(value, name)) + if (v->IsBool()) + out = v->GetBool(); + } + + static void AssignIfExist(SizeType& out, const ValueType& value, const ValueType& name) { + if (const ValueType* v = GetMember(value, name)) + if (v->IsUint64() && v->GetUint64() <= SizeType(~0)) + out = static_cast(v->GetUint64()); + } + + void AssignIfExist(SchemaArray& out, SchemaDocumentType& schemaDocument, const PointerType& p, const ValueType& value, const ValueType& name, const ValueType& document) { + if (const ValueType* v = GetMember(value, name)) { + if (v->IsArray() && v->Size() > 0) { + PointerType q = p.Append(name, allocator_); + out.count = v->Size(); + out.schemas = static_cast(allocator_->Malloc(out.count * sizeof(const Schema*))); + memset(out.schemas, 0, sizeof(Schema*)* out.count); + for (SizeType i = 0; i < out.count; i++) + schemaDocument.CreateSchema(&out.schemas[i], q.Append(i, allocator_), (*v)[i], document); + out.begin = validatorCount_; + validatorCount_ += out.count; + } + } + } + +#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX + template + RegexType* CreatePattern(const ValueType& value) { + if (value.IsString()) { + RegexType* r = new (allocator_->Malloc(sizeof(RegexType))) RegexType(value.GetString()); + if (!r->IsValid()) { + r->~RegexType(); + AllocatorType::Free(r); + r = 0; + } + return r; + } + return 0; + } + + static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType) { + GenericRegexSearch rs(*pattern); + return rs.Search(str); + } +#elif RAPIDJSON_SCHEMA_USE_STDREGEX + template + RegexType* CreatePattern(const ValueType& value) { + if (value.IsString()) + try { + return new (allocator_->Malloc(sizeof(RegexType))) RegexType(value.GetString(), std::size_t(value.GetStringLength()), std::regex_constants::ECMAScript); + } + catch (const std::regex_error&) { + } + return 0; + } + + static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType length) { + std::match_results r; + return std::regex_search(str, str + length, r, *pattern); + } +#else + template + RegexType* CreatePattern(const ValueType&) { return 0; } + + static bool IsPatternMatch(const RegexType*, const Ch *, SizeType) { return true; } +#endif // RAPIDJSON_SCHEMA_USE_STDREGEX + + void AddType(const ValueType& type) { + if (type == GetNullString() ) type_ |= 1 << kNullSchemaType; + else if (type == GetBooleanString()) type_ |= 1 << kBooleanSchemaType; + else if (type == GetObjectString() ) type_ |= 1 << kObjectSchemaType; + else if (type == GetArrayString() ) type_ |= 1 << kArraySchemaType; + else if (type == GetStringString() ) type_ |= 1 << kStringSchemaType; + else if (type == GetIntegerString()) type_ |= 1 << kIntegerSchemaType; + else if (type == GetNumberString() ) type_ |= (1 << kNumberSchemaType) | (1 << kIntegerSchemaType); + } + + bool CreateParallelValidator(Context& context) const { + if (enum_ || context.arrayUniqueness) + context.hasher = context.factory.CreateHasher(); + + if (validatorCount_) { + RAPIDJSON_ASSERT(context.validators == 0); + context.validators = static_cast(context.factory.MallocState(sizeof(ISchemaValidator*) * validatorCount_)); + context.validatorCount = validatorCount_; + + if (allOf_.schemas) + CreateSchemaValidators(context, allOf_); + + if (anyOf_.schemas) + CreateSchemaValidators(context, anyOf_); + + if (oneOf_.schemas) + CreateSchemaValidators(context, oneOf_); + + if (not_) + context.validators[notValidatorIndex_] = context.factory.CreateSchemaValidator(*not_); + + if (hasSchemaDependencies_) { + for (SizeType i = 0; i < propertyCount_; i++) + if (properties_[i].dependenciesSchema) + context.validators[properties_[i].dependenciesValidatorIndex] = context.factory.CreateSchemaValidator(*properties_[i].dependenciesSchema); + } + } + + return true; + } + + void CreateSchemaValidators(Context& context, const SchemaArray& schemas) const { + for (SizeType i = 0; i < schemas.count; i++) + context.validators[schemas.begin + i] = context.factory.CreateSchemaValidator(*schemas.schemas[i]); + } + + // O(n) + bool FindPropertyIndex(const ValueType& name, SizeType* outIndex) const { + SizeType len = name.GetStringLength(); + const Ch* str = name.GetString(); + for (SizeType index = 0; index < propertyCount_; index++) + if (properties_[index].name.GetStringLength() == len && + (std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0)) + { + *outIndex = index; + return true; + } + return false; + } + + bool CheckInt(Context& context, int64_t i) const { + if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType)))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + if (!minimum_.IsNull()) { + if (minimum_.IsInt64()) { + if (exclusiveMinimum_ ? i <= minimum_.GetInt64() : i < minimum_.GetInt64()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString()); + } + else if (minimum_.IsUint64()) { + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString()); // i <= max(int64_t) < minimum.GetUint64() + } + else if (!CheckDoubleMinimum(context, static_cast(i))) + return false; + } + + if (!maximum_.IsNull()) { + if (maximum_.IsInt64()) { + if (exclusiveMaximum_ ? i >= maximum_.GetInt64() : i > maximum_.GetInt64()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString()); + } + else if (maximum_.IsUint64()) + /* do nothing */; // i <= max(int64_t) < maximum_.GetUint64() + else if (!CheckDoubleMaximum(context, static_cast(i))) + return false; + } + + if (!multipleOf_.IsNull()) { + if (multipleOf_.IsUint64()) { + if (static_cast(i >= 0 ? i : -i) % multipleOf_.GetUint64() != 0) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString()); + } + else if (!CheckDoubleMultipleOf(context, static_cast(i))) + return false; + } + + return true; + } + + bool CheckUint(Context& context, uint64_t i) const { + if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType)))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + if (!minimum_.IsNull()) { + if (minimum_.IsUint64()) { + if (exclusiveMinimum_ ? i <= minimum_.GetUint64() : i < minimum_.GetUint64()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString()); + } + else if (minimum_.IsInt64()) + /* do nothing */; // i >= 0 > minimum.Getint64() + else if (!CheckDoubleMinimum(context, static_cast(i))) + return false; + } + + if (!maximum_.IsNull()) { + if (maximum_.IsUint64()) { + if (exclusiveMaximum_ ? i >= maximum_.GetUint64() : i > maximum_.GetUint64()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString()); + } + else if (maximum_.IsInt64()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString()); // i >= 0 > maximum_ + else if (!CheckDoubleMaximum(context, static_cast(i))) + return false; + } + + if (!multipleOf_.IsNull()) { + if (multipleOf_.IsUint64()) { + if (i % multipleOf_.GetUint64() != 0) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString()); + } + else if (!CheckDoubleMultipleOf(context, static_cast(i))) + return false; + } + + return true; + } + + bool CheckDoubleMinimum(Context& context, double d) const { + if (exclusiveMinimum_ ? d <= minimum_.GetDouble() : d < minimum_.GetDouble()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString()); + return true; + } + + bool CheckDoubleMaximum(Context& context, double d) const { + if (exclusiveMaximum_ ? d >= maximum_.GetDouble() : d > maximum_.GetDouble()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString()); + return true; + } + + bool CheckDoubleMultipleOf(Context& context, double d) const { + double a = std::abs(d), b = std::abs(multipleOf_.GetDouble()); + double q = std::floor(a / b); + double r = a - q * b; + if (r > 0.0) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString()); + return true; + } + + struct Property { + Property() : schema(), dependenciesSchema(), dependenciesValidatorIndex(), dependencies(), required(false) {} + ~Property() { AllocatorType::Free(dependencies); } + SValue name; + const SchemaType* schema; + const SchemaType* dependenciesSchema; + SizeType dependenciesValidatorIndex; + bool* dependencies; + bool required; + }; + + struct PatternProperty { + PatternProperty() : schema(), pattern() {} + ~PatternProperty() { + if (pattern) { + pattern->~RegexType(); + AllocatorType::Free(pattern); + } + } + const SchemaType* schema; + RegexType* pattern; + }; + + AllocatorType* allocator_; + const SchemaType* typeless_; + uint64_t* enum_; + SizeType enumCount_; + SchemaArray allOf_; + SchemaArray anyOf_; + SchemaArray oneOf_; + const SchemaType* not_; + unsigned type_; // bitmask of kSchemaType + SizeType validatorCount_; + SizeType notValidatorIndex_; + + Property* properties_; + const SchemaType* additionalPropertiesSchema_; + PatternProperty* patternProperties_; + SizeType patternPropertyCount_; + SizeType propertyCount_; + SizeType minProperties_; + SizeType maxProperties_; + bool additionalProperties_; + bool hasDependencies_; + bool hasRequired_; + bool hasSchemaDependencies_; + + const SchemaType* additionalItemsSchema_; + const SchemaType* itemsList_; + const SchemaType** itemsTuple_; + SizeType itemsTupleCount_; + SizeType minItems_; + SizeType maxItems_; + bool additionalItems_; + bool uniqueItems_; + + RegexType* pattern_; + SizeType minLength_; + SizeType maxLength_; + + SValue minimum_; + SValue maximum_; + SValue multipleOf_; + bool exclusiveMinimum_; + bool exclusiveMaximum_; +}; + +template +struct TokenHelper { + RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) { + *documentStack.template Push() = '/'; + char buffer[21]; + size_t length = static_cast((sizeof(SizeType) == 4 ? u32toa(index, buffer) : u64toa(index, buffer)) - buffer); + for (size_t i = 0; i < length; i++) + *documentStack.template Push() = static_cast(buffer[i]); + } +}; + +// Partial specialized version for char to prevent buffer copying. +template +struct TokenHelper { + RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) { + if (sizeof(SizeType) == 4) { + char *buffer = documentStack.template Push(1 + 10); // '/' + uint + *buffer++ = '/'; + const char* end = internal::u32toa(index, buffer); + documentStack.template Pop(static_cast(10 - (end - buffer))); + } + else { + char *buffer = documentStack.template Push(1 + 20); // '/' + uint64 + *buffer++ = '/'; + const char* end = internal::u64toa(index, buffer); + documentStack.template Pop(static_cast(20 - (end - buffer))); + } + } +}; + +} // namespace internal + +/////////////////////////////////////////////////////////////////////////////// +// IGenericRemoteSchemaDocumentProvider + +template +class IGenericRemoteSchemaDocumentProvider { +public: + typedef typename SchemaDocumentType::Ch Ch; + + virtual ~IGenericRemoteSchemaDocumentProvider() {} + virtual const SchemaDocumentType* GetRemoteDocument(const Ch* uri, SizeType length) = 0; +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericSchemaDocument + +//! JSON schema document. +/*! + A JSON schema document is a compiled version of a JSON schema. + It is basically a tree of internal::Schema. + + \note This is an immutable class (i.e. its instance cannot be modified after construction). + \tparam ValueT Type of JSON value (e.g. \c Value ), which also determine the encoding. + \tparam Allocator Allocator type for allocating memory of this document. +*/ +template +class GenericSchemaDocument { +public: + typedef ValueT ValueType; + typedef IGenericRemoteSchemaDocumentProvider IRemoteSchemaDocumentProviderType; + typedef Allocator AllocatorType; + typedef typename ValueType::EncodingType EncodingType; + typedef typename EncodingType::Ch Ch; + typedef internal::Schema SchemaType; + typedef GenericPointer PointerType; + friend class internal::Schema; + template + friend class GenericSchemaValidator; + + //! Constructor. + /*! + Compile a JSON document into schema document. + + \param document A JSON document as source. + \param remoteProvider An optional remote schema document provider for resolving remote reference. Can be null. + \param allocator An optional allocator instance for allocating memory. Can be null. + */ + explicit GenericSchemaDocument(const ValueType& document, IRemoteSchemaDocumentProviderType* remoteProvider = 0, Allocator* allocator = 0) : + remoteProvider_(remoteProvider), + allocator_(allocator), + ownAllocator_(), + root_(), + typeless_(), + schemaMap_(allocator, kInitialSchemaMapSize), + schemaRef_(allocator, kInitialSchemaRefSize) + { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); + + typeless_ = static_cast(allocator_->Malloc(sizeof(SchemaType))); + new (typeless_) SchemaType(this, PointerType(), ValueType(kObjectType).Move(), ValueType(kObjectType).Move(), 0); + + // Generate root schema, it will call CreateSchema() to create sub-schemas, + // And call AddRefSchema() if there are $ref. + CreateSchemaRecursive(&root_, PointerType(), document, document); + + // Resolve $ref + while (!schemaRef_.Empty()) { + SchemaRefEntry* refEntry = schemaRef_.template Pop(1); + if (const SchemaType* s = GetSchema(refEntry->target)) { + if (refEntry->schema) + *refEntry->schema = s; + + // Create entry in map if not exist + if (!GetSchema(refEntry->source)) { + new (schemaMap_.template Push()) SchemaEntry(refEntry->source, const_cast(s), false, allocator_); + } + } + else if (refEntry->schema) + *refEntry->schema = typeless_; + + refEntry->~SchemaRefEntry(); + } + + RAPIDJSON_ASSERT(root_ != 0); + + schemaRef_.ShrinkToFit(); // Deallocate all memory for ref + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move constructor in C++11 + GenericSchemaDocument(GenericSchemaDocument&& rhs) RAPIDJSON_NOEXCEPT : + remoteProvider_(rhs.remoteProvider_), + allocator_(rhs.allocator_), + ownAllocator_(rhs.ownAllocator_), + root_(rhs.root_), + typeless_(rhs.typeless_), + schemaMap_(std::move(rhs.schemaMap_)), + schemaRef_(std::move(rhs.schemaRef_)) + { + rhs.remoteProvider_ = 0; + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.typeless_ = 0; + } +#endif + + //! Destructor + ~GenericSchemaDocument() { + while (!schemaMap_.Empty()) + schemaMap_.template Pop(1)->~SchemaEntry(); + + if (typeless_) { + typeless_->~SchemaType(); + Allocator::Free(typeless_); + } + + RAPIDJSON_DELETE(ownAllocator_); + } + + //! Get the root schema. + const SchemaType& GetRoot() const { return *root_; } + +private: + //! Prohibit copying + GenericSchemaDocument(const GenericSchemaDocument&); + //! Prohibit assignment + GenericSchemaDocument& operator=(const GenericSchemaDocument&); + + struct SchemaRefEntry { + SchemaRefEntry(const PointerType& s, const PointerType& t, const SchemaType** outSchema, Allocator *allocator) : source(s, allocator), target(t, allocator), schema(outSchema) {} + PointerType source; + PointerType target; + const SchemaType** schema; + }; + + struct SchemaEntry { + SchemaEntry(const PointerType& p, SchemaType* s, bool o, Allocator* allocator) : pointer(p, allocator), schema(s), owned(o) {} + ~SchemaEntry() { + if (owned) { + schema->~SchemaType(); + Allocator::Free(schema); + } + } + PointerType pointer; + SchemaType* schema; + bool owned; + }; + + void CreateSchemaRecursive(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) { + if (schema) + *schema = typeless_; + + if (v.GetType() == kObjectType) { + const SchemaType* s = GetSchema(pointer); + if (!s) + CreateSchema(schema, pointer, v, document); + + for (typename ValueType::ConstMemberIterator itr = v.MemberBegin(); itr != v.MemberEnd(); ++itr) + CreateSchemaRecursive(0, pointer.Append(itr->name, allocator_), itr->value, document); + } + else if (v.GetType() == kArrayType) + for (SizeType i = 0; i < v.Size(); i++) + CreateSchemaRecursive(0, pointer.Append(i, allocator_), v[i], document); + } + + void CreateSchema(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) { + RAPIDJSON_ASSERT(pointer.IsValid()); + if (v.IsObject()) { + if (!HandleRefSchema(pointer, schema, v, document)) { + SchemaType* s = new (allocator_->Malloc(sizeof(SchemaType))) SchemaType(this, pointer, v, document, allocator_); + new (schemaMap_.template Push()) SchemaEntry(pointer, s, true, allocator_); + if (schema) + *schema = s; + } + } + } + + bool HandleRefSchema(const PointerType& source, const SchemaType** schema, const ValueType& v, const ValueType& document) { + static const Ch kRefString[] = { '$', 'r', 'e', 'f', '\0' }; + static const ValueType kRefValue(kRefString, 4); + + typename ValueType::ConstMemberIterator itr = v.FindMember(kRefValue); + if (itr == v.MemberEnd()) + return false; + + if (itr->value.IsString()) { + SizeType len = itr->value.GetStringLength(); + if (len > 0) { + const Ch* s = itr->value.GetString(); + SizeType i = 0; + while (i < len && s[i] != '#') // Find the first # + i++; + + if (i > 0) { // Remote reference, resolve immediately + if (remoteProvider_) { + if (const GenericSchemaDocument* remoteDocument = remoteProvider_->GetRemoteDocument(s, i)) { + PointerType pointer(&s[i], len - i, allocator_); + if (pointer.IsValid()) { + if (const SchemaType* sc = remoteDocument->GetSchema(pointer)) { + if (schema) + *schema = sc; + return true; + } + } + } + } + } + else if (s[i] == '#') { // Local reference, defer resolution + PointerType pointer(&s[i], len - i, allocator_); + if (pointer.IsValid()) { + if (const ValueType* nv = pointer.Get(document)) + if (HandleRefSchema(source, schema, *nv, document)) + return true; + + new (schemaRef_.template Push()) SchemaRefEntry(source, pointer, schema, allocator_); + return true; + } + } + } + } + return false; + } + + const SchemaType* GetSchema(const PointerType& pointer) const { + for (const SchemaEntry* target = schemaMap_.template Bottom(); target != schemaMap_.template End(); ++target) + if (pointer == target->pointer) + return target->schema; + return 0; + } + + PointerType GetPointer(const SchemaType* schema) const { + for (const SchemaEntry* target = schemaMap_.template Bottom(); target != schemaMap_.template End(); ++target) + if (schema == target->schema) + return target->pointer; + return PointerType(); + } + + const SchemaType* GetTypeless() const { return typeless_; } + + static const size_t kInitialSchemaMapSize = 64; + static const size_t kInitialSchemaRefSize = 64; + + IRemoteSchemaDocumentProviderType* remoteProvider_; + Allocator *allocator_; + Allocator *ownAllocator_; + const SchemaType* root_; //!< Root schema. + SchemaType* typeless_; + internal::Stack schemaMap_; // Stores created Pointer -> Schemas + internal::Stack schemaRef_; // Stores Pointer from $ref and schema which holds the $ref +}; + +//! GenericSchemaDocument using Value type. +typedef GenericSchemaDocument SchemaDocument; +//! IGenericRemoteSchemaDocumentProvider using SchemaDocument. +typedef IGenericRemoteSchemaDocumentProvider IRemoteSchemaDocumentProvider; + +/////////////////////////////////////////////////////////////////////////////// +// GenericSchemaValidator + +//! JSON Schema Validator. +/*! + A SAX style JSON schema validator. + It uses a \c GenericSchemaDocument to validate SAX events. + It delegates the incoming SAX events to an output handler. + The default output handler does nothing. + It can be reused multiple times by calling \c Reset(). + + \tparam SchemaDocumentType Type of schema document. + \tparam OutputHandler Type of output handler. Default handler does nothing. + \tparam StateAllocator Allocator for storing the internal validation states. +*/ +template < + typename SchemaDocumentType, + typename OutputHandler = BaseReaderHandler, + typename StateAllocator = CrtAllocator> +class GenericSchemaValidator : + public internal::ISchemaStateFactory, + public internal::ISchemaValidator +{ +public: + typedef typename SchemaDocumentType::SchemaType SchemaType; + typedef typename SchemaDocumentType::PointerType PointerType; + typedef typename SchemaType::EncodingType EncodingType; + typedef typename EncodingType::Ch Ch; + + //! Constructor without output handler. + /*! + \param schemaDocument The schema document to conform to. + \param allocator Optional allocator for storing internal validation states. + \param schemaStackCapacity Optional initial capacity of schema path stack. + \param documentStackCapacity Optional initial capacity of document path stack. + */ + GenericSchemaValidator( + const SchemaDocumentType& schemaDocument, + StateAllocator* allocator = 0, + size_t schemaStackCapacity = kDefaultSchemaStackCapacity, + size_t documentStackCapacity = kDefaultDocumentStackCapacity) + : + schemaDocument_(&schemaDocument), + root_(schemaDocument.GetRoot()), + stateAllocator_(allocator), + ownStateAllocator_(0), + schemaStack_(allocator, schemaStackCapacity), + documentStack_(allocator, documentStackCapacity), + outputHandler_(CreateNullHandler()), + valid_(true) +#if RAPIDJSON_SCHEMA_VERBOSE + , depth_(0) +#endif +// NCBI: added + , invalidSchemaPointer_(PointerType()) + , invalidSchemaKeyword_(0) + , invalidDocumentPointer_(PointerType()) + { + } + + //! Constructor with output handler. + /*! + \param schemaDocument The schema document to conform to. + \param allocator Optional allocator for storing internal validation states. + \param schemaStackCapacity Optional initial capacity of schema path stack. + \param documentStackCapacity Optional initial capacity of document path stack. + */ + GenericSchemaValidator( + const SchemaDocumentType& schemaDocument, + OutputHandler& outputHandler, + StateAllocator* allocator = 0, + size_t schemaStackCapacity = kDefaultSchemaStackCapacity, + size_t documentStackCapacity = kDefaultDocumentStackCapacity) + : + schemaDocument_(&schemaDocument), + root_(schemaDocument.GetRoot()), + stateAllocator_(allocator), + ownStateAllocator_(0), + schemaStack_(allocator, schemaStackCapacity), + documentStack_(allocator, documentStackCapacity), + outputHandler_(outputHandler), + nullHandler_(0), + valid_(true) +#if RAPIDJSON_SCHEMA_VERBOSE + , depth_(0) +#endif +// NCBI: added + , invalidSchemaPointer_(PointerType()) + , invalidSchemaKeyword_(0) + , invalidDocumentPointer_(PointerType()) + { + } + + //! Destructor. + ~GenericSchemaValidator() { + Reset(); + if (nullHandler_) { + nullHandler_->~OutputHandler(); + StateAllocator::Free(nullHandler_); + } + RAPIDJSON_DELETE(ownStateAllocator_); + } + + //! Reset the internal states. + void Reset() { + while (!schemaStack_.Empty()) + PopSchema(); + documentStack_.Clear(); + valid_ = true; +// NCBI: added + invalidSchemaPointer_ = PointerType(); + invalidSchemaKeyword_ = 0; + invalidDocumentPointer_ = PointerType(); + } + + //! Checks whether the current state is valid. + // Implementation of ISchemaValidator + virtual bool IsValid() const { return valid_; } + + //! Gets the JSON pointer pointed to the invalid schema. +// NCBI: changed + PointerType GetInvalidSchemaPointer() const { + return invalidSchemaKeyword_ ? invalidSchemaPointer_ : (schemaStack_.Empty() ? PointerType() : schemaDocument_->GetPointer(&CurrentSchema())); + } + + //! Gets the keyword of invalid schema. + const Ch* GetInvalidSchemaKeyword() const { + return invalidSchemaKeyword_ ? invalidSchemaKeyword_ : (schemaStack_.Empty() ? 0 : CurrentContext().invalidKeyword); + } + + //! Gets the JSON pointer pointed to the invalid value. + PointerType GetInvalidDocumentPointer() const { + return invalidSchemaKeyword_ ? invalidDocumentPointer_ : (documentStack_.Empty() ? PointerType() : PointerType(documentStack_.template Bottom(), documentStack_.GetSize() / sizeof(Ch))); + } + +#if RAPIDJSON_SCHEMA_VERBOSE +#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_() \ +RAPIDJSON_MULTILINEMACRO_BEGIN\ + *documentStack_.template Push() = '\0';\ + documentStack_.template Pop(1);\ + internal::PrintInvalidDocument(documentStack_.template Bottom());\ +RAPIDJSON_MULTILINEMACRO_END +#else +#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_() +#endif + +#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_(method, arg1)\ + if (!valid_) return false; \ + if (!BeginValue() || !CurrentSchema().method arg1) {\ + RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_();\ + return valid_ = false;\ + } + +#define RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2)\ + for (Context* context = schemaStack_.template Bottom(); context != schemaStack_.template End(); context++) {\ + if (context->hasher)\ + static_cast(context->hasher)->method arg2;\ + if (context->validators)\ + for (SizeType i_ = 0; i_ < context->validatorCount; i_++)\ + static_cast(context->validators[i_])->method arg2;\ + if (context->patternPropertiesValidators)\ + for (SizeType i_ = 0; i_ < context->patternPropertiesValidatorCount; i_++)\ + static_cast(context->patternPropertiesValidators[i_])->method arg2;\ + } + +#define RAPIDJSON_SCHEMA_HANDLE_END_(method, arg2)\ + return valid_ = EndValue() && outputHandler_.method arg2 + +#define RAPIDJSON_SCHEMA_HANDLE_VALUE_(method, arg1, arg2) \ + RAPIDJSON_SCHEMA_HANDLE_BEGIN_ (method, arg1);\ + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2);\ + RAPIDJSON_SCHEMA_HANDLE_END_ (method, arg2) + + bool Null() { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Null, (CurrentContext() ), ( )); } + bool Bool(bool b) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Bool, (CurrentContext(), b), (b)); } + bool Int(int i) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int, (CurrentContext(), i), (i)); } + bool Uint(unsigned u) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint, (CurrentContext(), u), (u)); } + bool Int64(int64_t i) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int64, (CurrentContext(), i), (i)); } + bool Uint64(uint64_t u) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint64, (CurrentContext(), u), (u)); } + bool Double(double d) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Double, (CurrentContext(), d), (d)); } + bool RawNumber(const Ch* str, SizeType length, bool copy) + { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); } + bool String(const Ch* str, SizeType length, bool copy) + { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); } + + bool StartObject() { + RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartObject, (CurrentContext())); + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartObject, ()); + return valid_ = outputHandler_.StartObject(); + } + + bool Key(const Ch* str, SizeType len, bool copy) { + if (!valid_) return false; + AppendToken(str, len); + if (!CurrentSchema().Key(CurrentContext(), str, len, copy)) return valid_ = false; + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(Key, (str, len, copy)); + return valid_ = outputHandler_.Key(str, len, copy); + } + + bool EndObject(SizeType memberCount) { + if (!valid_) return false; + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndObject, (memberCount)); + if (!CurrentSchema().EndObject(CurrentContext(), memberCount)) return valid_ = false; + RAPIDJSON_SCHEMA_HANDLE_END_(EndObject, (memberCount)); + } + + bool StartArray() { + RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartArray, (CurrentContext())); + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartArray, ()); + return valid_ = outputHandler_.StartArray(); + } + + bool EndArray(SizeType elementCount) { + if (!valid_) return false; + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndArray, (elementCount)); + if (!CurrentSchema().EndArray(CurrentContext(), elementCount)) return valid_ = false; + RAPIDJSON_SCHEMA_HANDLE_END_(EndArray, (elementCount)); + } + +#undef RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_ +#undef RAPIDJSON_SCHEMA_HANDLE_BEGIN_ +#undef RAPIDJSON_SCHEMA_HANDLE_PARALLEL_ +#undef RAPIDJSON_SCHEMA_HANDLE_VALUE_ + + // Implementation of ISchemaStateFactory + virtual ISchemaValidator* CreateSchemaValidator(const SchemaType& root) { + return new (GetStateAllocator().Malloc(sizeof(GenericSchemaValidator))) GenericSchemaValidator(*schemaDocument_, root, +#if RAPIDJSON_SCHEMA_VERBOSE + depth_ + 1, +#endif + &GetStateAllocator()); + } + + virtual void DestroySchemaValidator(ISchemaValidator* validator) { + GenericSchemaValidator* v = static_cast(validator); + v->~GenericSchemaValidator(); + StateAllocator::Free(v); + } + + virtual void* CreateHasher() { + return new (GetStateAllocator().Malloc(sizeof(HasherType))) HasherType(&GetStateAllocator()); + } + + virtual uint64_t GetHashCode(void* hasher) { + return static_cast(hasher)->GetHashCode(); + } + + virtual void DestroryHasher(void* hasher) { + HasherType* h = static_cast(hasher); + h->~HasherType(); + StateAllocator::Free(h); + } + + virtual void* MallocState(size_t size) { + return GetStateAllocator().Malloc(size); + } + + virtual void FreeState(void* p) { + return StateAllocator::Free(p); + } + +private: + typedef typename SchemaType::Context Context; + typedef GenericValue, StateAllocator> HashCodeArray; + typedef internal::Hasher HasherType; + + GenericSchemaValidator( + const SchemaDocumentType& schemaDocument, + const SchemaType& root, +#if RAPIDJSON_SCHEMA_VERBOSE + unsigned depth, +#endif + StateAllocator* allocator = 0, + size_t schemaStackCapacity = kDefaultSchemaStackCapacity, + size_t documentStackCapacity = kDefaultDocumentStackCapacity) + : + schemaDocument_(&schemaDocument), + root_(root), + stateAllocator_(allocator), + ownStateAllocator_(0), + schemaStack_(allocator, schemaStackCapacity), + documentStack_(allocator, documentStackCapacity), + outputHandler_(CreateNullHandler()), + valid_(true) +#if RAPIDJSON_SCHEMA_VERBOSE + , depth_(depth) +#endif +// NCBI: added + , invalidSchemaPointer_(PointerType()) + , invalidSchemaKeyword_(0) + , invalidDocumentPointer_(PointerType()) + { + } + + StateAllocator& GetStateAllocator() { + if (!stateAllocator_) + stateAllocator_ = ownStateAllocator_ = RAPIDJSON_NEW(StateAllocator)(); + return *stateAllocator_; + } + + bool BeginValue() { + if (schemaStack_.Empty()) + PushSchema(root_); + else { + if (CurrentContext().inArray) + internal::TokenHelper, Ch>::AppendIndexToken(documentStack_, CurrentContext().arrayElementIndex); + + if (!CurrentSchema().BeginValue(CurrentContext())) + return false; + + SizeType count = CurrentContext().patternPropertiesSchemaCount; + const SchemaType** sa = CurrentContext().patternPropertiesSchemas; + typename Context::PatternValidatorType patternValidatorType = CurrentContext().valuePatternValidatorType; + bool valueUniqueness = CurrentContext().valueUniqueness; + RAPIDJSON_ASSERT(CurrentContext().valueSchema); + PushSchema(*CurrentContext().valueSchema); + + if (count > 0) { + CurrentContext().objectPatternValidatorType = patternValidatorType; + ISchemaValidator**& va = CurrentContext().patternPropertiesValidators; + SizeType& validatorCount = CurrentContext().patternPropertiesValidatorCount; + va = static_cast(MallocState(sizeof(ISchemaValidator*) * count)); + for (SizeType i = 0; i < count; i++) + va[validatorCount++] = CreateSchemaValidator(*sa[i]); + } + + CurrentContext().arrayUniqueness = valueUniqueness; + } + return true; + } + + bool EndValue() { + if (!CurrentSchema().EndValue(CurrentContext())) + return false; + +#if RAPIDJSON_SCHEMA_VERBOSE + GenericStringBuffer sb; + schemaDocument_->GetPointer(&CurrentSchema()).Stringify(sb); + + *documentStack_.template Push() = '\0'; + documentStack_.template Pop(1); + internal::PrintValidatorPointers(depth_, sb.GetString(), documentStack_.template Bottom()); +#endif + + uint64_t h = CurrentContext().arrayUniqueness ? static_cast(CurrentContext().hasher)->GetHashCode() : 0; + + PopSchema(); + + if (!schemaStack_.Empty()) { + Context& context = CurrentContext(); + if (context.valueUniqueness) { + HashCodeArray* a = static_cast(context.arrayElementHashCodes); + if (!a) + CurrentContext().arrayElementHashCodes = a = new (GetStateAllocator().Malloc(sizeof(HashCodeArray))) HashCodeArray(kArrayType); + for (typename HashCodeArray::ConstValueIterator itr = a->Begin(); itr != a->End(); ++itr) + if (itr->GetUint64() == h) + RAPIDJSON_INVALID_KEYWORD_RETURN(SchemaType::GetUniqueItemsString()); + a->PushBack(h, GetStateAllocator()); + } + } + + // Remove the last token of document pointer + while (!documentStack_.Empty() && *documentStack_.template Pop(1) != '/') + ; + + return true; + } + + void AppendToken(const Ch* str, SizeType len) { + documentStack_.template Reserve(1 + len * 2); // worst case all characters are escaped as two characters + *documentStack_.template PushUnsafe() = '/'; + for (SizeType i = 0; i < len; i++) { + if (str[i] == '~') { + *documentStack_.template PushUnsafe() = '~'; + *documentStack_.template PushUnsafe() = '0'; + } + else if (str[i] == '/') { + *documentStack_.template PushUnsafe() = '~'; + *documentStack_.template PushUnsafe() = '1'; + } + else + *documentStack_.template PushUnsafe() = str[i]; + } + } + + RAPIDJSON_FORCEINLINE void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push()) Context(*this, &schema); } + + RAPIDJSON_FORCEINLINE void PopSchema() { + Context* c = schemaStack_.template Pop(1); + if (HashCodeArray* a = static_cast(c->arrayElementHashCodes)) { + a->~HashCodeArray(); + StateAllocator::Free(a); + } + c->~Context(); + } + + const SchemaType& CurrentSchema() const { return *schemaStack_.template Top()->schema; } + Context& CurrentContext() { return *schemaStack_.template Top(); } + const Context& CurrentContext() const { return *schemaStack_.template Top(); } + + OutputHandler& CreateNullHandler() { + return *(nullHandler_ = static_cast(GetStateAllocator().Malloc(sizeof(OutputHandler)))); + } + + static const size_t kDefaultSchemaStackCapacity = 1024; + static const size_t kDefaultDocumentStackCapacity = 256; + const SchemaDocumentType* schemaDocument_; + const SchemaType& root_; + StateAllocator* stateAllocator_; + StateAllocator* ownStateAllocator_; + internal::Stack schemaStack_; //!< stack to store the current path of schema (BaseSchemaType *) + internal::Stack documentStack_; //!< stack to store the current path of validating document (Ch) + OutputHandler& outputHandler_; + OutputHandler* nullHandler_; + bool valid_; +#if RAPIDJSON_SCHEMA_VERBOSE + unsigned depth_; +#endif +//NCBI: added + PointerType invalidSchemaPointer_; + const Ch* invalidSchemaKeyword_; + PointerType invalidDocumentPointer_; +public: + template + void SetValidationError(const ExtHandler& validator) { + invalidSchemaPointer_ = validator.GetInvalidSchemaPointer(); + invalidSchemaKeyword_ = validator.GetInvalidSchemaKeyword(); + invalidDocumentPointer_ = validator.GetInvalidDocumentPointer(); + valid_ = invalidSchemaKeyword_ == nullptr; + } +}; + +typedef GenericSchemaValidator SchemaValidator; + +/////////////////////////////////////////////////////////////////////////////// +// SchemaValidatingReader + +//! A helper class for parsing with validation. +/*! + This helper class is a functor, designed as a parameter of \ref GenericDocument::Populate(). + + \tparam parseFlags Combination of \ref ParseFlag. + \tparam InputStream Type of input stream, implementing Stream concept. + \tparam SourceEncoding Encoding of the input stream. + \tparam SchemaDocumentType Type of schema document. + \tparam StackAllocator Allocator type for stack. +*/ +template < + unsigned parseFlags, + typename InputStream, + typename SourceEncoding, + typename SchemaDocumentType = SchemaDocument, + typename StackAllocator = CrtAllocator> +class SchemaValidatingReader { +public: + typedef typename SchemaDocumentType::PointerType PointerType; + typedef typename InputStream::Ch Ch; + + //! Constructor + /*! + \param is Input stream. + \param sd Schema document. + */ + SchemaValidatingReader(InputStream& is, const SchemaDocumentType& sd) : is_(is), sd_(sd), invalidSchemaKeyword_(), isValid_(true) {} + + template + bool operator()(Handler& handler) { + GenericReader reader; + GenericSchemaValidator validator(sd_, handler); + parseResult_ = reader.template Parse(is_, validator); + + isValid_ = validator.IsValid(); + if (isValid_) { + invalidSchemaPointer_ = PointerType(); + invalidSchemaKeyword_ = 0; + invalidDocumentPointer_ = PointerType(); + } + else { + invalidSchemaPointer_ = validator.GetInvalidSchemaPointer(); + invalidSchemaKeyword_ = validator.GetInvalidSchemaKeyword(); + invalidDocumentPointer_ = validator.GetInvalidDocumentPointer(); + } + + return parseResult_; + } + + const ParseResult& GetParseResult() const { return parseResult_; } + bool IsValid() const { return isValid_; } + const PointerType& GetInvalidSchemaPointer() const { return invalidSchemaPointer_; } + const Ch* GetInvalidSchemaKeyword() const { return invalidSchemaKeyword_; } + const PointerType& GetInvalidDocumentPointer() const { return invalidDocumentPointer_; } + +private: + InputStream& is_; + const SchemaDocumentType& sd_; + + ParseResult parseResult_; + PointerType invalidSchemaPointer_; + const Ch* invalidSchemaKeyword_; + PointerType invalidDocumentPointer_; + bool isValid_; +}; + +RAPIDJSON_NAMESPACE_END +RAPIDJSON_DIAG_POP + +#endif // RAPIDJSON_SCHEMA_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/stream.h b/c++/include/misc/jsonwrapp/rapidjson11/stream.h new file mode 100644 index 00000000..fef82c25 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/stream.h @@ -0,0 +1,179 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "rapidjson.h" + +#ifndef RAPIDJSON_STREAM_H_ +#define RAPIDJSON_STREAM_H_ + +#include "encodings.h" + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Stream + +/*! \class rapidjson::Stream + \brief Concept for reading and writing characters. + + For read-only stream, no need to implement PutBegin(), Put(), Flush() and PutEnd(). + + For write-only stream, only need to implement Put() and Flush(). + +\code +concept Stream { + typename Ch; //!< Character type of the stream. + + //! Read the current character from stream without moving the read cursor. + Ch Peek() const; + + //! Read the current character from stream and moving the read cursor to next character. + Ch Take(); + + //! Get the current read cursor. + //! \return Number of characters read from start. + size_t Tell(); + + //! Begin writing operation at the current read pointer. + //! \return The begin writer pointer. + Ch* PutBegin(); + + //! Write a character. + void Put(Ch c); + + //! Flush the buffer. + void Flush(); + + //! End the writing operation. + //! \param begin The begin write pointer returned by PutBegin(). + //! \return Number of characters written. + size_t PutEnd(Ch* begin); +} +\endcode +*/ + +//! Provides additional information for stream. +/*! + By using traits pattern, this type provides a default configuration for stream. + For custom stream, this type can be specialized for other configuration. + See TEST(Reader, CustomStringStream) in readertest.cpp for example. +*/ +template +struct StreamTraits { + //! Whether to make local copy of stream for optimization during parsing. + /*! + By default, for safety, streams do not use local copy optimization. + Stream that can be copied fast should specialize this, like StreamTraits. + */ + enum { copyOptimization = 0 }; +}; + +//! Reserve n characters for writing to a stream. +template +inline void PutReserve(Stream& stream, size_t count) { + (void)stream; + (void)count; +} + +//! Write character to a stream, presuming buffer is reserved. +template +inline void PutUnsafe(Stream& stream, typename Stream::Ch c) { + stream.Put(c); +} + +//! Put N copies of a character to a stream. +template +inline void PutN(Stream& stream, Ch c, size_t n) { + PutReserve(stream, n); + for (size_t i = 0; i < n; i++) + PutUnsafe(stream, c); +} + +/////////////////////////////////////////////////////////////////////////////// +// StringStream + +//! Read-only string stream. +/*! \note implements Stream concept +*/ +template +struct GenericStringStream { + typedef typename Encoding::Ch Ch; + + GenericStringStream(const Ch *src) : src_(src), head_(src) {} + + Ch Peek() const { return *src_; } + Ch Take() { return *src_++; } + size_t Tell() const { return static_cast(src_ - head_); } + + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + const Ch* src_; //!< Current read position. + const Ch* head_; //!< Original head of the string. +}; + +template +struct StreamTraits > { + enum { copyOptimization = 1 }; +}; + +//! String stream with UTF8 encoding. +typedef GenericStringStream > StringStream; + +/////////////////////////////////////////////////////////////////////////////// +// InsituStringStream + +//! A read-write string stream. +/*! This string stream is particularly designed for in-situ parsing. + \note implements Stream concept +*/ +template +struct GenericInsituStringStream { + typedef typename Encoding::Ch Ch; + + GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {} + + // Read + Ch Peek() { return *src_; } + Ch Take() { return *src_++; } + size_t Tell() { return static_cast(src_ - head_); } + + // Write + void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; } + + Ch* PutBegin() { return dst_ = src_; } + size_t PutEnd(Ch* begin) { return static_cast(dst_ - begin); } + void Flush() {} + + Ch* Push(size_t count) { Ch* begin = dst_; dst_ += count; return begin; } + void Pop(size_t count) { dst_ -= count; } + + Ch* src_; + Ch* dst_; + Ch* head_; +}; + +template +struct StreamTraits > { + enum { copyOptimization = 1 }; +}; + +//! Insitu string stream with UTF8 encoding. +typedef GenericInsituStringStream > InsituStringStream; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_STREAM_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/stringbuffer.h b/c++/include/misc/jsonwrapp/rapidjson11/stringbuffer.h new file mode 100644 index 00000000..4e38b82c --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/stringbuffer.h @@ -0,0 +1,121 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_STRINGBUFFER_H_ +#define RAPIDJSON_STRINGBUFFER_H_ + +#include "stream.h" +#include "internal/stack.h" + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS +#include // std::move +#endif + +#include "internal/stack.h" + +#if defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Represents an in-memory output stream. +/*! + \tparam Encoding Encoding of the stream. + \tparam Allocator type for allocating memory buffer. + \note implements Stream concept +*/ +template +class GenericStringBuffer { +public: + typedef typename Encoding::Ch Ch; + + GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericStringBuffer(GenericStringBuffer&& rhs) : stack_(std::move(rhs.stack_)) {} + GenericStringBuffer& operator=(GenericStringBuffer&& rhs) { + if (&rhs != this) + stack_ = std::move(rhs.stack_); + return *this; + } +#endif + + void Put(Ch c) { *stack_.template Push() = c; } + void PutUnsafe(Ch c) { *stack_.template PushUnsafe() = c; } + void Flush() {} + + void Clear() { stack_.Clear(); } + void ShrinkToFit() { + // Push and pop a null terminator. This is safe. + *stack_.template Push() = '\0'; + stack_.ShrinkToFit(); + stack_.template Pop(1); + } + + void Reserve(size_t count) { stack_.template Reserve(count); } + Ch* Push(size_t count) { return stack_.template Push(count); } + Ch* PushUnsafe(size_t count) { return stack_.template PushUnsafe(count); } + void Pop(size_t count) { stack_.template Pop(count); } + + const Ch* GetString() const { + // Push and pop a null terminator. This is safe. + *stack_.template Push() = '\0'; + stack_.template Pop(1); + + return stack_.template Bottom(); + } + + //! Get the size of string in bytes in the string buffer. + size_t GetSize() const { return stack_.GetSize(); } + + //! Get the length of string in Ch in the string buffer. + size_t GetLength() const { return stack_.GetSize() / sizeof(Ch); } + + static const size_t kDefaultCapacity = 256; + mutable internal::Stack stack_; + +private: + // Prohibit copy constructor & assignment operator. + GenericStringBuffer(const GenericStringBuffer&); + GenericStringBuffer& operator=(const GenericStringBuffer&); +}; + +//! String buffer with UTF8 encoding +typedef GenericStringBuffer > StringBuffer; + +template +inline void PutReserve(GenericStringBuffer& stream, size_t count) { + stream.Reserve(count); +} + +template +inline void PutUnsafe(GenericStringBuffer& stream, typename Encoding::Ch c) { + stream.PutUnsafe(c); +} + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(GenericStringBuffer >& stream, char c, size_t n) { + std::memset(stream.stack_.Push(n), c, n * sizeof(c)); +} + +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_STRINGBUFFER_H_ diff --git a/c++/include/misc/jsonwrapp/rapidjson11/writer.h b/c++/include/misc/jsonwrapp/rapidjson11/writer.h new file mode 100644 index 00000000..5b3004b0 --- /dev/null +++ b/c++/include/misc/jsonwrapp/rapidjson11/writer.h @@ -0,0 +1,624 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_WRITER_H_ +#define RAPIDJSON_WRITER_H_ + +#include "stream.h" +#include "internal/stack.h" +#include "internal/strfunc.h" +#include "internal/dtoa.h" +#include "internal/itoa.h" +#include "stringbuffer.h" +#include // placement new + +#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER) +#include +#pragma intrinsic(_BitScanForward) +#endif +#ifdef RAPIDJSON_SSE42 +#include +#elif defined(RAPIDJSON_SSE2) +#include +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(unreachable-code) +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// WriteFlag + +/*! \def RAPIDJSON_WRITE_DEFAULT_FLAGS + \ingroup RAPIDJSON_CONFIG + \brief User-defined kWriteDefaultFlags definition. + + User can define this as any \c WriteFlag combinations. +*/ +#ifndef RAPIDJSON_WRITE_DEFAULT_FLAGS +#define RAPIDJSON_WRITE_DEFAULT_FLAGS kWriteNoFlags +#endif + +//! Combination of writeFlags +enum WriteFlag { + kWriteNoFlags = 0, //!< No flags are set. + kWriteValidateEncodingFlag = 1, //!< Validate encoding of JSON strings. + kWriteNanAndInfFlag = 2, //!< Allow writing of Infinity, -Infinity and NaN. + kWriteDefaultFlags = RAPIDJSON_WRITE_DEFAULT_FLAGS //!< Default write flags. Can be customized by defining RAPIDJSON_WRITE_DEFAULT_FLAGS +}; + +//! JSON writer +/*! Writer implements the concept Handler. + It generates JSON text by events to an output os. + + User may programmatically calls the functions of a writer to generate JSON text. + + On the other side, a writer can also be passed to objects that generates events, + + for example Reader::Parse() and Document::Accept(). + + \tparam OutputStream Type of output stream. + \tparam SourceEncoding Encoding of source string. + \tparam TargetEncoding Encoding of output stream. + \tparam StackAllocator Type of allocator for allocating memory of stack. + \note implements Handler concept +*/ +template, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags> +class Writer { +public: + typedef typename SourceEncoding::Ch Ch; + + static const int kDefaultMaxDecimalPlaces = 324; + + //! Constructor + /*! \param os Output stream. + \param stackAllocator User supplied allocator. If it is null, it will create a private one. + \param levelDepth Initial capacity of stack. + */ + explicit + Writer(OutputStream& os, StackAllocator* stackAllocator = 0, size_t levelDepth = kDefaultLevelDepth) : + os_(&os), level_stack_(stackAllocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {} + + explicit + Writer(StackAllocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) : + os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {} + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + Writer(Writer&& rhs) : + os_(rhs.os_), level_stack_(std::move(rhs.level_stack_)), maxDecimalPlaces_(rhs.maxDecimalPlaces_), hasRoot_(rhs.hasRoot_) { + rhs.os_ = 0; + } +#endif + + //! Reset the writer with a new stream. + /*! + This function reset the writer with a new stream and default settings, + in order to make a Writer object reusable for output multiple JSONs. + + \param os New output stream. + \code + Writer writer(os1); + writer.StartObject(); + // ... + writer.EndObject(); + + writer.Reset(os2); + writer.StartObject(); + // ... + writer.EndObject(); + \endcode + */ + void Reset(OutputStream& os) { + os_ = &os; + hasRoot_ = false; + level_stack_.Clear(); + } + + //! Checks whether the output is a complete JSON. + /*! + A complete JSON has a complete root object or array. + */ + bool IsComplete() const { + return hasRoot_ && level_stack_.Empty(); + } + + int GetMaxDecimalPlaces() const { + return maxDecimalPlaces_; + } + + //! Sets the maximum number of decimal places for double output. + /*! + This setting truncates the output with specified number of decimal places. + + For example, + + \code + writer.SetMaxDecimalPlaces(3); + writer.StartArray(); + writer.Double(0.12345); // "0.123" + writer.Double(0.0001); // "0.0" + writer.Double(1.234567890123456e30); // "1.234567890123456e30" (do not truncate significand for positive exponent) + writer.Double(1.23e-4); // "0.0" (do truncate significand for negative exponent) + writer.EndArray(); + \endcode + + The default setting does not truncate any decimal places. You can restore to this setting by calling + \code + writer.SetMaxDecimalPlaces(Writer::kDefaultMaxDecimalPlaces); + \endcode + */ + void SetMaxDecimalPlaces(int maxDecimalPlaces) { + maxDecimalPlaces_ = maxDecimalPlaces; + } + + /*!@name Implementation of Handler + \see Handler + */ + //@{ + + bool Null() { Prefix(kNullType); return EndValue(WriteNull()); } + bool Bool(bool b) { Prefix(b ? kTrueType : kFalseType); return EndValue(WriteBool(b)); } + bool Int(int i) { Prefix(kNumberType); return EndValue(WriteInt(i)); } + bool Uint(unsigned u) { Prefix(kNumberType); return EndValue(WriteUint(u)); } + bool Int64(int64_t i64) { Prefix(kNumberType); return EndValue(WriteInt64(i64)); } + bool Uint64(uint64_t u64) { Prefix(kNumberType); return EndValue(WriteUint64(u64)); } + + //! Writes the given \c double value to the stream + /*! + \param d The value to be written. + \return Whether it is succeed. + */ + bool Double(double d) { Prefix(kNumberType); return EndValue(WriteDouble(d)); } + + bool RawNumber(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + Prefix(kNumberType); + return EndValue(WriteString(str, length)); + } + + bool String(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + Prefix(kStringType); + return EndValue(WriteString(str, length)); + } + +#if RAPIDJSON_HAS_STDSTRING + bool String(const std::basic_string& str) { + return String(str.data(), SizeType(str.size())); + } +#endif + + bool StartObject() { + Prefix(kObjectType); + new (level_stack_.template Push()) Level(false); + return WriteStartObject(); + } + + bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); } + + bool EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); + RAPIDJSON_ASSERT(!level_stack_.template Top()->inArray); + level_stack_.template Pop(1); + return EndValue(WriteEndObject()); + } + + bool StartArray() { + Prefix(kArrayType); + new (level_stack_.template Push()) Level(true); + return WriteStartArray(); + } + + bool EndArray(SizeType elementCount = 0) { + (void)elementCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); + RAPIDJSON_ASSERT(level_stack_.template Top()->inArray); + level_stack_.template Pop(1); + return EndValue(WriteEndArray()); + } + //@} + + /*! @name Convenience extensions */ + //@{ + + //! Simpler but slower overload. + bool String(const Ch* str) { return String(str, internal::StrLen(str)); } + bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); } + + //@} + + //! Write a raw JSON value. + /*! + For user to write a stringified JSON as a value. + + \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range. + \param length Length of the json. + \param type Type of the root of json. + */ + bool RawValue(const Ch* json, size_t length, Type type) { + RAPIDJSON_ASSERT(json != 0); + Prefix(type); + return EndValue(WriteRawValue(json, length)); + } + +protected: + //! Information for each nested level + struct Level { + Level(bool inArray_) : valueCount(0), inArray(inArray_) {} + size_t valueCount; //!< number of values in this level + bool inArray; //!< true if in array, otherwise in object + }; + + static const size_t kDefaultLevelDepth = 32; + + bool WriteNull() { + PutReserve(*os_, 4); + PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l'); return true; + } + + bool WriteBool(bool b) { + if (b) { + PutReserve(*os_, 4); + PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'r'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'e'); + } + else { + PutReserve(*os_, 5); + PutUnsafe(*os_, 'f'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 's'); PutUnsafe(*os_, 'e'); + } + return true; + } + + bool WriteInt(int i) { + char buffer[11]; + const char* end = internal::i32toa(i, buffer); + PutReserve(*os_, static_cast(end - buffer)); + for (const char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast(*p)); + return true; + } + + bool WriteUint(unsigned u) { + char buffer[10]; + const char* end = internal::u32toa(u, buffer); + PutReserve(*os_, static_cast(end - buffer)); + for (const char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast(*p)); + return true; + } + + bool WriteInt64(int64_t i64) { + char buffer[21]; + const char* end = internal::i64toa(i64, buffer); + PutReserve(*os_, static_cast(end - buffer)); + for (const char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast(*p)); + return true; + } + + bool WriteUint64(uint64_t u64) { + char buffer[20]; + char* end = internal::u64toa(u64, buffer); + PutReserve(*os_, static_cast(end - buffer)); + for (char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast(*p)); + return true; + } + + bool WriteDouble(double d) { + if (internal::Double(d).IsNanOrInf()) { + if (!(writeFlags & kWriteNanAndInfFlag)) + return false; + if (internal::Double(d).IsNan()) { + PutReserve(*os_, 3); + PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N'); + return true; + } + if (internal::Double(d).Sign()) { + PutReserve(*os_, 9); + PutUnsafe(*os_, '-'); + } + else + PutReserve(*os_, 8); + PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f'); + PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y'); + return true; + } + + char buffer[25]; + char* end = internal::dtoa(d, buffer, maxDecimalPlaces_); + PutReserve(*os_, static_cast(end - buffer)); + for (char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast(*p)); + return true; + } + + bool WriteString(const Ch* str, SizeType length) { + static const typename OutputStream::Ch hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + static const char escape[256] = { +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + //0 1 2 3 4 5 6 7 8 9 A B C D E F + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', 'n', 'u', 'f', 'r', 'u', 'u', // 00 + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', // 10 + 0, 0, '"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20 + Z16, Z16, // 30~4F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, // 50 + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 // 60~FF +#undef Z16 + }; + + if (TargetEncoding::supportUnicode) + PutReserve(*os_, 2 + length * 6); // "\uxxxx..." + else + PutReserve(*os_, 2 + length * 12); // "\uxxxx\uyyyy..." + + PutUnsafe(*os_, '\"'); + GenericStringStream is(str); + while (ScanWriteUnescapedString(is, length)) { + const Ch c = is.Peek(); + if (!TargetEncoding::supportUnicode && static_cast(c) >= 0x80) { + // Unicode escaping + unsigned codepoint; + if (RAPIDJSON_UNLIKELY(!SourceEncoding::Decode(is, &codepoint))) + return false; + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, 'u'); + if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) { + PutUnsafe(*os_, hexDigits[(codepoint >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint ) & 15]); + } + else { + RAPIDJSON_ASSERT(codepoint >= 0x010000 && codepoint <= 0x10FFFF); + // Surrogate pair + unsigned s = codepoint - 0x010000; + unsigned lead = (s >> 10) + 0xD800; + unsigned trail = (s & 0x3FF) + 0xDC00; + PutUnsafe(*os_, hexDigits[(lead >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(lead >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(lead >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(lead ) & 15]); + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, 'u'); + PutUnsafe(*os_, hexDigits[(trail >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(trail >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(trail >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(trail ) & 15]); + } + } + else if ((sizeof(Ch) == 1 || static_cast(c) < 256) && RAPIDJSON_UNLIKELY(escape[static_cast(c)])) { + is.Take(); + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, static_cast(escape[static_cast(c)])); + if (escape[static_cast(c)] == 'u') { + PutUnsafe(*os_, '0'); + PutUnsafe(*os_, '0'); + PutUnsafe(*os_, hexDigits[static_cast(c) >> 4]); + PutUnsafe(*os_, hexDigits[static_cast(c) & 0xF]); + } + } + else if (RAPIDJSON_UNLIKELY(!(writeFlags & kWriteValidateEncodingFlag ? + Transcoder::Validate(is, *os_) : + Transcoder::TranscodeUnsafe(is, *os_)))) + return false; + } + PutUnsafe(*os_, '\"'); + return true; + } + + bool ScanWriteUnescapedString(GenericStringStream& is, size_t length) { + return RAPIDJSON_LIKELY(is.Tell() < length); + } + + bool WriteStartObject() { os_->Put('{'); return true; } + bool WriteEndObject() { os_->Put('}'); return true; } + bool WriteStartArray() { os_->Put('['); return true; } + bool WriteEndArray() { os_->Put(']'); return true; } + + bool WriteRawValue(const Ch* json, size_t length) { + PutReserve(*os_, length); + for (size_t i = 0; i < length; i++) { + RAPIDJSON_ASSERT(json[i] != '\0'); + PutUnsafe(*os_, json[i]); + } + return true; + } + + void Prefix(Type type) { + (void)type; + if (RAPIDJSON_LIKELY(level_stack_.GetSize() != 0)) { // this value is not at root + Level* level = level_stack_.template Top(); + if (level->valueCount > 0) { + if (level->inArray) + os_->Put(','); // add comma if it is not the first element in array + else // in object + os_->Put((level->valueCount % 2 == 0) ? ',' : ':'); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else { + RAPIDJSON_ASSERT(!hasRoot_); // Should only has one and only one root. + hasRoot_ = true; + } + } + + // Flush the value if it is the top level one. + bool EndValue(bool ret) { + if (RAPIDJSON_UNLIKELY(level_stack_.Empty())) // end of json text + os_->Flush(); + return ret; + } + + OutputStream* os_; + internal::Stack level_stack_; + int maxDecimalPlaces_; + bool hasRoot_; + +private: + // Prohibit copy constructor & assignment operator. + Writer(const Writer&); + Writer& operator=(const Writer&); +}; + +// Full specialization for StringStream to prevent memory copying + +template<> +inline bool Writer::WriteInt(int i) { + char *buffer = os_->Push(11); + const char* end = internal::i32toa(i, buffer); + os_->Pop(static_cast(11 - (end - buffer))); + return true; +} + +template<> +inline bool Writer::WriteUint(unsigned u) { + char *buffer = os_->Push(10); + const char* end = internal::u32toa(u, buffer); + os_->Pop(static_cast(10 - (end - buffer))); + return true; +} + +template<> +inline bool Writer::WriteInt64(int64_t i64) { + char *buffer = os_->Push(21); + const char* end = internal::i64toa(i64, buffer); + os_->Pop(static_cast(21 - (end - buffer))); + return true; +} + +template<> +inline bool Writer::WriteUint64(uint64_t u) { + char *buffer = os_->Push(20); + const char* end = internal::u64toa(u, buffer); + os_->Pop(static_cast(20 - (end - buffer))); + return true; +} + +template<> +inline bool Writer::WriteDouble(double d) { + if (internal::Double(d).IsNanOrInf()) { + // Note: This code path can only be reached if (RAPIDJSON_WRITE_DEFAULT_FLAGS & kWriteNanAndInfFlag). + if (!(kWriteDefaultFlags & kWriteNanAndInfFlag)) + return false; + if (internal::Double(d).IsNan()) { + PutReserve(*os_, 3); + PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N'); + return true; + } + if (internal::Double(d).Sign()) { + PutReserve(*os_, 9); + PutUnsafe(*os_, '-'); + } + else + PutReserve(*os_, 8); + PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f'); + PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y'); + return true; + } + + char *buffer = os_->Push(25); + char* end = internal::dtoa(d, buffer, maxDecimalPlaces_); + os_->Pop(static_cast(25 - (end - buffer))); + return true; +} + +#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) +template<> +inline bool Writer::ScanWriteUnescapedString(StringStream& is, size_t length) { + if (length < 16) + return RAPIDJSON_LIKELY(is.Tell() < length); + + if (!RAPIDJSON_LIKELY(is.Tell() < length)) + return false; + + const char* p = is.src_; + const char* end = is.head_ + length; + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + const char* endAligned = reinterpret_cast(reinterpret_cast(end) & static_cast(~15)); + if (nextAligned > end) + return true; + + while (p != nextAligned) + if (*p < 0x20 || *p == '\"' || *p == '\\') { + is.src_ = p; + return RAPIDJSON_LIKELY(is.Tell() < length); + } + else + os_->PutUnsafe(*p++); + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast(&space[0])); + + for (; p != endAligned; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + SizeType len; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + len = offset; +#else + len = static_cast(__builtin_ffs(r) - 1); +#endif + char* q = reinterpret_cast(os_->PushUnsafe(len)); + for (size_t i = 0; i < len; i++) + q[i] = p[i]; + + p += len; + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(os_->PushUnsafe(16)), s); + } + + is.src_ = p; + return RAPIDJSON_LIKELY(is.Tell() < length); +} +#endif // defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) + +RAPIDJSON_NAMESPACE_END + +#ifdef _MSC_VER +RAPIDJSON_DIAG_POP +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/c++/include/ncbi_pch.hpp b/c++/include/ncbi_pch.hpp index f16e1bb8..43095d06 100644 --- a/c++/include/ncbi_pch.hpp +++ b/c++/include/ncbi_pch.hpp @@ -1,5 +1,5 @@ #if defined(NCBI_USE_PCH) && !defined(NCBI_PCH__HPP) -/* $Id: ncbi_pch.hpp 575211 2018-11-26 16:41:56Z blastadm $ +/* $Id: ncbi_pch.hpp 582157 2019-03-11 19:07:18Z blastadm $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/include/ncbi_source_ver.h b/c++/include/ncbi_source_ver.h index 5bc6ae1b..160e5dfb 100644 --- a/c++/include/ncbi_source_ver.h +++ b/c++/include/ncbi_source_ver.h @@ -1,4 +1,4 @@ -/* $Id: ncbi_source_ver.h 575211 2018-11-26 16:41:56Z blastadm $ +/* $Id: ncbi_source_ver.h 582157 2019-03-11 19:07:18Z blastadm $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/include/ncbiconf.h b/c++/include/ncbiconf.h index 1a782061..940963e4 100644 --- a/c++/include/ncbiconf.h +++ b/c++/include/ncbiconf.h @@ -1,7 +1,7 @@ #ifndef FORWARDING_NCBICONF_H #define FORWARDING_NCBICONF_H -/* $Id: ncbiconf.h 575211 2018-11-26 16:41:56Z blastadm $ +/* $Id: ncbiconf.h 582157 2019-03-11 19:07:18Z blastadm $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/include/objects/general/Dbtag.hpp b/c++/include/objects/general/Dbtag.hpp index 1fc8d10e..e4ce54fc 100644 --- a/c++/include/objects/general/Dbtag.hpp +++ b/c++/include/objects/general/Dbtag.hpp @@ -1,4 +1,4 @@ -/* $Id: Dbtag.hpp 560226 2018-03-20 16:42:25Z vasilche $ +/* $Id: Dbtag.hpp 576574 2018-12-18 14:11:23Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -218,7 +218,8 @@ public: eDbtagType_RNAcentral, eDbtagType_PeptideAtlas, eDbtagType_EPDnew, - eDbtagType_Ensembl + eDbtagType_Ensembl, + eDbtagType_PseudoCAP }; enum EDbtagGroup { diff --git a/c++/include/objects/general/User_object.hpp b/c++/include/objects/general/User_object.hpp index 1e50678f..4456acfb 100644 --- a/c++/include/objects/general/User_object.hpp +++ b/c++/include/objects/general/User_object.hpp @@ -1,4 +1,4 @@ -/* $Id: User_object.hpp 554760 2018-01-08 18:32:18Z bollin $ +/* $Id: User_object.hpp 578928 2019-01-28 18:17:57Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -261,7 +261,7 @@ public: eBadStatus }; - virtual const char* GetErrCodeString() const + virtual const char* GetErrCodeString() const override { switch (GetErrCode()) { case eUserFieldWithoutLabel: return "User field without label"; diff --git a/c++/include/objects/seq/annot_mapper_exception.hpp b/c++/include/objects/seq/annot_mapper_exception.hpp index a860bf28..1c755103 100644 --- a/c++/include/objects/seq/annot_mapper_exception.hpp +++ b/c++/include/objects/seq/annot_mapper_exception.hpp @@ -1,7 +1,7 @@ #ifndef ANNOT_MAPPER_EXCEPTION__HPP #define ANNOT_MAPPER_EXCEPTION__HPP -/* $Id: annot_mapper_exception.hpp 444487 2014-08-25 16:48:54Z grichenk $ +/* $Id: annot_mapper_exception.hpp 578928 2019-01-28 18:17:57Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -68,7 +68,7 @@ public: eOtherError }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CAnnotMapperException, CException); }; diff --git a/c++/include/objects/seqalign/seqalign_exception.hpp b/c++/include/objects/seqalign/seqalign_exception.hpp index e700db41..e21526fe 100644 --- a/c++/include/objects/seqalign/seqalign_exception.hpp +++ b/c++/include/objects/seqalign/seqalign_exception.hpp @@ -1,4 +1,4 @@ -/* $Id: seqalign_exception.hpp 363131 2012-05-14 15:34:29Z whlavina $ +/* $Id: seqalign_exception.hpp 578928 2019-01-28 18:17:57Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -78,7 +78,7 @@ public: eNotImplemented }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { switch (GetErrCode()) { case eUnsupported: return "eUnsupported"; diff --git a/c++/include/objects/seqloc/Seq_id.hpp b/c++/include/objects/seqloc/Seq_id.hpp index a0d28c09..8ceec1b4 100644 --- a/c++/include/objects/seqloc/Seq_id.hpp +++ b/c++/include/objects/seqloc/Seq_id.hpp @@ -1,4 +1,4 @@ -/* $Id: Seq_id.hpp 572664 2018-10-17 17:03:35Z ivanov $ +/* $Id: Seq_id.hpp 578990 2019-01-29 13:02:24Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -536,27 +536,51 @@ public: /// return the label for a given string enum ELabelType { - eType, - eContent, - eBoth, - eFasta, - eFastaContent, + eType, ///< FASTA-style type, or database in GeneralDbIsContent mode. + eContent, ///< Untagged human-readable accession or the like. + eBoth, ///< Type and content, delimited by a vertical bar. + eFasta, ///< Tagged ID in NCBI's traditional FASTA style. + eFastaContent, ///< Like eFasta, but without any tag. /// default is to show type + content eDefault = eBoth }; enum ELabelFlags { - fLabel_Version = 0x10, + fLabel_Version = 0x10, ///< Show the version + /// For type general, use the database name as the tag + /// and the (text or numeric) key as the content. fLabel_GeneralDbIsContent = 0x20, + fLabel_Trimmed = 0x40, ///< Trim trailing FASTA delimeters. + fLabel_UpperCase = 0x80, ///< Upper case label, with special encoding for PDB chain-ids /// default options - always show the version fLabel_Default = fLabel_Version }; typedef int TLabelFlags; + /// Append a label for this Seq-id to the supplied string. + /// @param label + /// String to append to. + /// @param type + /// Type of label (human-readable type-tagged content, by default). + /// @param flags + /// Flags fine-tuning behavior for human-readable output (ignored + /// in eFasta and eFastaContent mode). + /// @sa ELabelType, ELabelFlags void GetLabel(string* label, ELabelType type = eDefault, TLabelFlags flags = fLabel_Default) const; + /// Append a label for this Seq-id to the supplied string, splitting + /// out the version to a separate output parameter. + /// @note In eFasta and eFastaContent mode, this method includes the + /// version (if any) in the label and does not touch *version. + /// @param label + /// String to append to. + /// @param version + /// Pointer to hold the returned version. + /// @param type + /// Type of label (human-readable type-tagged content, by default). + /// @sa ELabelType void GetLabel(string* label, int* version, ELabelType type = eDefault) const; @@ -689,6 +713,28 @@ public: /// (SeqId/AvoidGi or SEQ_ID_AVOID_GI). static bool AvoidGi(void); + /// Flags specifying special treatment for certain types of Seq-ids in + /// ComposeOSLT(). + /// @sa ComposeOSLT + enum EComposeOSLTFlags { + fAllowLocalId = 0x1 + }; + typedef int TComposeOSLTFlags; + + /// JIRA ID-5188 : Compose OSLT string for the primary id, as well as OSLT + /// strings for the secondary ids, if any. + /// NB: given a single Seq-id, it is not always possible to determine + /// whether it should be treated as primary or secondary if it were part of + /// a list of Seq-ids in a Bioseq. In that case, this function returns it as + /// primary, and the final judgement needs to be made by the caller. + /// @param secondary_ids + /// OSLT strings for the secondary ids + /// @param parse_flags + /// Flags specifying special treatment for certain types of Seq-ids. + /// @return + /// OSLT string for the primary id + string ComposeOSLT(list* secondary_ids = nullptr, + TComposeOSLTFlags parse_flags = 0) const; /// ID length restrictions const static size_t kMaxLocalIDLength = 50; @@ -830,7 +876,7 @@ public: }; /// Translate from the error code value to its string representation. - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; // Standard exception boilerplate code. NCBI_EXCEPTION_DEFAULT(CSeqIdException, CException); diff --git a/c++/include/objects/seqloc/Seq_loc.hpp b/c++/include/objects/seqloc/Seq_loc.hpp index 84ef69b2..d575fc91 100644 --- a/c++/include/objects/seqloc/Seq_loc.hpp +++ b/c++/include/objects/seqloc/Seq_loc.hpp @@ -1,4 +1,4 @@ -/* $Id: Seq_loc.hpp 556251 2018-01-29 14:20:19Z grichenk $ +/* $Id: Seq_loc.hpp 578928 2019-01-28 18:17:57Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -85,7 +85,7 @@ public: eOtherError }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CSeqLocException, CException); }; diff --git a/c++/include/objects/seqtable/seq_table_exception.hpp b/c++/include/objects/seqtable/seq_table_exception.hpp index 004aa76e..eaf3c79d 100644 --- a/c++/include/objects/seqtable/seq_table_exception.hpp +++ b/c++/include/objects/seqtable/seq_table_exception.hpp @@ -1,7 +1,7 @@ #ifndef SEQ_TABLE_EXCEPTION__HPP #define SEQ_TABLE_EXCEPTION__HPP -/* $Id: seq_table_exception.hpp 457991 2015-01-29 19:26:03Z vasilche $ +/* $Id: seq_table_exception.hpp 578928 2019-01-28 18:17:57Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -56,7 +56,7 @@ public: eIncompatibleValueType, ///< Data cannot be converted to asked type eOtherError }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CSeqTableException, CException); }; diff --git a/c++/include/objects/valerr/ValidErrItem.hpp b/c++/include/objects/valerr/ValidErrItem.hpp index f0991d2f..42bac803 100644 --- a/c++/include/objects/valerr/ValidErrItem.hpp +++ b/c++/include/objects/valerr/ValidErrItem.hpp @@ -1,4 +1,4 @@ -/* $Id: ValidErrItem.hpp 572668 2018-10-17 17:13:44Z ivanov $ +/* $Id: ValidErrItem.hpp 581296 2019-02-27 16:27:53Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -346,6 +346,7 @@ enum EErrType { eErr_SEQ_DESCR_TaxonomyNoCommonAncestor, eErr_SEQ_DESCR_TaxonomyNoValidTaxids, eErr_SEQ_DESCR_TaxonomyEmptyInput, + eErr_SEQ_DESCR_ChromosomeWithoutLocation, ERR_CODE_END(SEQ_DESCR), diff --git a/c++/include/objmgr/bioseq_handle.hpp b/c++/include/objmgr/bioseq_handle.hpp index 612eb79d..0fd37ff2 100644 --- a/c++/include/objmgr/bioseq_handle.hpp +++ b/c++/include/objmgr/bioseq_handle.hpp @@ -1,7 +1,7 @@ #ifndef BIOSEQ_HANDLE__HPP #define BIOSEQ_HANDLE__HPP -/* $Id: bioseq_handle.hpp 496211 2016-03-24 15:33:11Z vasilche $ +/* $Id: bioseq_handle.hpp 576406 2018-12-14 15:28:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -518,6 +518,7 @@ public: TDescr& SetDescr(void) const; bool AddSeqdesc(CSeqdesc& d) const; CRef RemoveSeqdesc(const CSeqdesc& d) const; + CRef ReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const; void AddSeq_descr(TDescr& v) const; // inst void SetInst(TInst& v) const; @@ -648,6 +649,7 @@ public: void x_RealSetDescr(TDescr& v) const; bool x_RealAddSeqdesc(CSeqdesc& d) const; CRef x_RealRemoveSeqdesc(const CSeqdesc& d) const; + CRef x_RealReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const; void x_RealAddSeq_descr(TDescr& v) const; void x_RealResetId(void) const; diff --git a/c++/include/objmgr/bioseq_set_handle.hpp b/c++/include/objmgr/bioseq_set_handle.hpp index 3276a7c4..86d5da2c 100644 --- a/c++/include/objmgr/bioseq_set_handle.hpp +++ b/c++/include/objmgr/bioseq_set_handle.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR__BIOSEQ_SET_HANDLE__HPP #define OBJMGR__BIOSEQ_SET_HANDLE__HPP -/* $Id: bioseq_set_handle.hpp 571924 2018-10-04 17:41:25Z ivanov $ +/* $Id: bioseq_set_handle.hpp 576406 2018-12-14 15:28:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -298,6 +298,7 @@ public: TDescr& SetDescr(void) const; bool AddSeqdesc(CSeqdesc& d) const; CRef RemoveSeqdesc(const CSeqdesc& d) const; + CRef ReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const; void AddSeq_descr(TDescr& v) const; /// Create new empty seq-entry @@ -499,6 +500,7 @@ public: void x_RealSetDescr(TDescr& v) const; bool x_RealAddSeqdesc(CSeqdesc& d) const; CRef x_RealRemoveSeqdesc(const CSeqdesc& d) const; + CRef x_RealReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const; void x_RealAddSeq_descr(TDescr& v) const; void x_RealResetId(void) const; diff --git a/c++/include/objmgr/gc_assembly_parser.hpp b/c++/include/objmgr/gc_assembly_parser.hpp index b2e8150a..783181b3 100644 --- a/c++/include/objmgr/gc_assembly_parser.hpp +++ b/c++/include/objmgr/gc_assembly_parser.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_GC_ASSEMBLY_PARSER__HPP #define OBJMGR_GC_ASSEMBLY_PARSER__HPP -/* $Id: gc_assembly_parser.hpp 479439 2015-09-21 13:04:11Z grichenk $ +/* $Id: gc_assembly_parser.hpp 578929 2019-01-28 18:18:01Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -127,7 +127,7 @@ public: eUnsupported, ///< Unsupported type/flag. eOtherError }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CAssemblyParserException, CException); }; diff --git a/c++/include/objmgr/impl/bioseq_base_info.hpp b/c++/include/objmgr/impl/bioseq_base_info.hpp index eb52c969..58eb72b1 100644 --- a/c++/include/objmgr/impl/bioseq_base_info.hpp +++ b/c++/include/objmgr/impl/bioseq_base_info.hpp @@ -1,7 +1,7 @@ #ifndef OBJECTS_OBJMGR_IMPL___BIOSEQ_BASE_INFO__HPP #define OBJECTS_OBJMGR_IMPL___BIOSEQ_BASE_INFO__HPP -/* $Id: bioseq_base_info.hpp 507368 2016-07-18 21:33:41Z vasilche $ +/* $Id: bioseq_base_info.hpp 576406 2018-12-14 15:28:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -87,6 +87,7 @@ public: void ResetDescr(void); bool AddSeqdesc(CSeqdesc& d); CRef RemoveSeqdesc(const CSeqdesc& d); + CRef ReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc); void AddSeq_descr(const TDescr& v); virtual bool x_IsSetDescr(void) const = 0; diff --git a/c++/include/objmgr/impl/scope_impl.hpp b/c++/include/objmgr/impl/scope_impl.hpp index a78254f3..395c7b6f 100644 --- a/c++/include/objmgr/impl/scope_impl.hpp +++ b/c++/include/objmgr/impl/scope_impl.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_IMPL_SCOPE_IMPL__HPP #define OBJMGR_IMPL_SCOPE_IMPL__HPP -/* $Id: scope_impl.hpp 554030 2017-12-27 16:11:11Z gouriano $ +/* $Id: scope_impl.hpp 576250 2018-12-12 13:23:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -70,6 +70,7 @@ class CSeq_data; class CSeq_id; class CSeq_loc; class CBioseq; +class CSeq_submit; // objmgr class CScope; @@ -183,6 +184,10 @@ public: TPriority pri, TExist action); + // Add Seq-annot. + CSeq_entry_Handle AddSeq_submit(CSeq_submit& submit, + TPriority pri); + ////////////////////////////////////////////////////////////////// // Modification of existing object tree CTSE_Handle GetEditHandle(const CTSE_Handle& src_tse); @@ -493,6 +498,7 @@ private: CRef x_MakeDummyTSE(CBioseq& seq) const; CRef x_MakeDummyTSE(CBioseq_set& seqset) const; CRef x_MakeDummyTSE(CSeq_annot& annot) const; + CRef x_MakeDummyTSE(CSeq_submit& submit) const; bool x_IsDummyTSE(const CTSE_Info& tse, const CBioseq_Info& seq) const; bool x_IsDummyTSE(const CTSE_Info& tse, diff --git a/c++/include/objmgr/impl/seq_entry_info.hpp b/c++/include/objmgr/impl/seq_entry_info.hpp index 641b5132..6b9f1759 100644 --- a/c++/include/objmgr/impl/seq_entry_info.hpp +++ b/c++/include/objmgr/impl/seq_entry_info.hpp @@ -1,7 +1,7 @@ #ifndef OBJECTS_OBJMGR_IMPL___SEQ_ENTRY_INFO__HPP #define OBJECTS_OBJMGR_IMPL___SEQ_ENTRY_INFO__HPP -/* $Id: seq_entry_info.hpp 517902 2016-10-28 16:56:25Z vasilche $ +/* $Id: seq_entry_info.hpp 576406 2018-12-14 15:28:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -130,6 +130,7 @@ public: TDescr& SetDescr(void); bool AddSeqdesc(CSeqdesc& d); CRef RemoveSeqdesc(const CSeqdesc& d); + CRef ReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc); // void AddDescr(CSeq_entry_Info& src); void AddSeq_descr(const TDescr& v); diff --git a/c++/include/objmgr/impl/tse_info.hpp b/c++/include/objmgr/impl/tse_info.hpp index 8c8b4b7e..a37b4adc 100644 --- a/c++/include/objmgr/impl/tse_info.hpp +++ b/c++/include/objmgr/impl/tse_info.hpp @@ -1,7 +1,7 @@ #ifndef OBJECTS_OBJMGR_IMPL___TSE_INFO__HPP #define OBJECTS_OBJMGR_IMPL___TSE_INFO__HPP -/* $Id: tse_info.hpp 544387 2017-08-22 19:28:06Z vasilche $ +/* $Id: tse_info.hpp 576250 2018-12-12 13:23:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -61,6 +61,8 @@ class CTSE_ScopeInfo; class CBioseq_Info; class CSeq_entry_Info; class CSeq_annot_Info; +class CSeq_submit; +class CSubmit_block; class CSeq_annot_SNP_Info; class CTSE_Chunk_Info; class CTSE_Split_Info; @@ -181,6 +183,7 @@ class NCBI_XOBJMGR_EXPORT CTSE_Info : public CSeq_entry_Info typedef CSeq_entry_Info TParent; public: /* + /// State of bioseq handle (defined in CBioseq_Handle) enum EBlobState { fState_none = 0, fState_suppress_temp = 1 << 0, @@ -194,11 +197,20 @@ public: fState_conn_failed = 1 << 7, fState_other_error = 1 << 8 }; + /// Type of top level object added to scope (defined in CTSE_Handle) + enum ETopLevelObjectType { + eTopLevelSeq_entry, + eTopLevelBioseq_set, + eTopLevelBioseq, + eTopLevelSeq_annot, + eTopLevelSeq_submit + }; */ typedef CBlobIdKey TBlobId; typedef CBioseq_Handle::TBioseqStateFlags TBlobState; typedef int TBlobVersion; typedef pair TBlobOrder; + typedef CTSE_Handle::ETopLevelObjectType ETopLevelObjectType; // 'ctors // Argument tse will be parentized. @@ -246,6 +258,21 @@ public: // return full blob order object, less is better TBlobOrder GetBlobOrder(void) const; + // return initial top-level object type + ETopLevelObjectType GetTopLevelObjectType() const; + const CSerialObject* GetTopLevelObjectPtr() const; + void SetTopLevelObject(ETopLevelObjectType type, CSerialObject* ptr); + void SetTopLevelObjectType(ETopLevelObjectType type); + + // Seq-submit support: + // return full Seq-submit object, may require to update entry/annot list + bool IsTopLevelSeq_submit() const; + const CSeq_submit& GetTopLevelSeq_submit() const; + // More efficient Seq-submit.sub access + const CSubmit_block& GetTopLevelSubmit_block() const; + CSubmit_block& SetTopLevelSubmit_block() const; + void SetTopLevelSubmit_block(CSubmit_block& sub) const; + const CAnnotName& GetName(void) const; void SetName(const CAnnotName& name); @@ -446,6 +473,8 @@ public: virtual string GetDescription(void) const; + CSeq_submit& x_GetTopLevelSeq_submit() const; + private: friend class CTSE_Guard; friend class CDataSource; @@ -637,6 +666,10 @@ private: // Suppression level TBlobState m_BlobState; + // Initial top-level objects type + ETopLevelObjectType m_TopLevelObjectType; + CRef m_TopLevelObjectPtr; + // TSE has name CAnnotName m_Name; diff --git a/c++/include/objmgr/objmgr_exception.hpp b/c++/include/objmgr/objmgr_exception.hpp index 140aa3f6..096c8727 100644 --- a/c++/include/objmgr/objmgr_exception.hpp +++ b/c++/include/objmgr/objmgr_exception.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_EXCEPTION__HPP #define OBJMGR_EXCEPTION__HPP -/* $Id: objmgr_exception.hpp 541101 2017-07-13 16:39:15Z grichenk $ +/* $Id: objmgr_exception.hpp 578929 2019-01-28 18:18:01Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -63,7 +63,7 @@ public: eMissingData, ///< The sequence doesn't have the data requested eOtherError }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CObjMgrException,CException); }; @@ -83,7 +83,7 @@ public: eSelfReference, ///< Self-reference in seq map is detected eFail ///< Operation failed }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CSeqMapException, CObjMgrException); }; @@ -97,7 +97,7 @@ public: eDataError, ///< Sequence data error eOutOfRange ///< Attempt to access out-of-range iterator }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CSeqVectorException, CObjMgrException); }; @@ -113,7 +113,7 @@ public: eIncomatibleType, ///< Incompatible annotation type (feat/graph/align) eOtherError }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CAnnotException, CObjMgrException); }; @@ -135,7 +135,7 @@ public: eBadConfig, eNotFound }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CLoaderException, CObjMgrException); }; @@ -151,7 +151,7 @@ public: }; typedef int TBlobState; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; CBlobStateException(const CDiagCompileInfo& info, const CException* prev_exception, EErrCode err_code, @@ -172,7 +172,7 @@ public: x_Assign(other); } virtual ~CBlobStateException(void) throw() {} - virtual const char* GetType(void) const { return "CBlobStateException"; } + const char* GetType(void) const override { return "CBlobStateException"; } typedef int TErrCode; TErrCode GetErrCode(void) const { @@ -186,7 +186,7 @@ public: protected: CBlobStateException(void) {} - virtual const CException* x_Clone(void) const + virtual const CException* x_Clone(void) const override { return new CBlobStateException(*this); } @@ -209,7 +209,7 @@ public: eBadResidue, eBadAlignment }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CObjmgrUtilException, CObjMgrException); }; @@ -229,7 +229,7 @@ public: eSegmentsLimitExceded }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CAnnotSearchLimitException, CObjMgrException); }; diff --git a/c++/include/objmgr/prefetch_manager.hpp b/c++/include/objmgr/prefetch_manager.hpp index cabe9374..4c6375db 100644 --- a/c++/include/objmgr/prefetch_manager.hpp +++ b/c++/include/objmgr/prefetch_manager.hpp @@ -1,7 +1,7 @@ #ifndef PREFETCH_MANAGER__HPP #define PREFETCH_MANAGER__HPP -/* $Id: prefetch_manager.hpp 347369 2011-12-16 14:16:32Z vasilche $ +/* $Id: prefetch_manager.hpp 578929 2019-01-28 18:18:01Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -140,7 +140,7 @@ public: enum EErrCode { eFailed }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CPrefetchFailed,CException); }; @@ -152,7 +152,7 @@ public: enum EErrCode { eCanceled }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CPrefetchCanceled,CException); }; diff --git a/c++/include/objmgr/scope.hpp b/c++/include/objmgr/scope.hpp index 3bf4eba7..7505fc26 100644 --- a/c++/include/objmgr/scope.hpp +++ b/c++/include/objmgr/scope.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_SCOPE__HPP #define OBJMGR_SCOPE__HPP -/* $Id: scope.hpp 554030 2017-12-27 16:11:11Z gouriano $ +/* $Id: scope.hpp 576250 2018-12-12 13:23:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -68,6 +68,7 @@ class CSeq_entry; class CBioseq_set; class CBioseq; class CSeq_annot; +class CSeq_submit; class CSeq_id; class CSeq_loc; @@ -316,6 +317,11 @@ public: TPriority pri = kPriority_Default, EExist action = eExist_Throw); + /// Add Seq-submit, return its CSeq_entry_Handle. + /// Add object to the score with possibility to edit it directly. + CSeq_entry_Handle AddSeq_submit(CSeq_submit& submit, + TPriority pri = kPriority_Default); + /// Get editable Biosec handle by regular one CBioseq_EditHandle GetEditHandle(const CBioseq_Handle& seq); diff --git a/c++/include/objmgr/seq_entry_handle.hpp b/c++/include/objmgr/seq_entry_handle.hpp index a63e7158..b694803c 100644 --- a/c++/include/objmgr/seq_entry_handle.hpp +++ b/c++/include/objmgr/seq_entry_handle.hpp @@ -1,7 +1,7 @@ #ifndef SEQ_ENTRY_HANDLE__HPP #define SEQ_ENTRY_HANDLE__HPP -/* $Id: seq_entry_handle.hpp 194592 2010-06-15 18:54:05Z vasilche $ +/* $Id: seq_entry_handle.hpp 576406 2018-12-14 15:28:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -70,6 +70,8 @@ class CTSE_Info; class CSeqdesc; class CBioObjectId; +class CSeq_submit; +class CSubmit_block; class CSeq_entry_ScopeInfo : public CScopeInfo_Base @@ -129,9 +131,17 @@ public: /// has more than one subentry exception is thrown CSeq_entry_Handle GetSingleSubEntry(void) const; + /// Check if this handle is top-level entry + bool IsTopLevelEntry(void) const; + /// Get top level Seq-entry handle CSeq_entry_Handle GetTopLevelEntry(void) const; + /// Seq-submit access functions + bool IsTopLevelSeq_submit(void) const; + const CSeq_submit& GetTopLevelSeq_submit(void) const; + const CSubmit_block& GetTopLevelSubmit_block(void) const; + /// Get Bioseq handle from the TSE of this Seq-entry CBioseq_Handle GetBioseqHandle(const CSeq_id& id) const; CBioseq_Handle GetBioseqHandle(const CSeq_id_Handle& id) const; @@ -255,6 +265,10 @@ public: /// Get parent seq-entry edit handle CSeq_entry_EditHandle GetParentEntry(void) const; + /// Seq-submit support + CSubmit_block& SetTopLevelSubmit_block(void) const; + void SetTopLevelSubmit_block(CSubmit_block& sub) const; + /// Get edit handle of the sub seq-entry /// If current seq-entry is not seq-set or /// has more than one subentry exception is thrown @@ -266,6 +280,7 @@ public: void ResetDescr(void) const; bool AddSeqdesc(CSeqdesc& v) const; CRef RemoveSeqdesc(const CSeqdesc& v) const; + CRef ReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const; void AddDescr(TDescr& v) const; @@ -566,6 +581,7 @@ public: void x_RealResetDescr(void) const; bool x_RealAddSeqdesc(CSeqdesc& v) const; CRef x_RealRemoveSeqdesc(const CSeqdesc& v) const; + CRef x_RealReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const; void x_RealAddSeq_descr(TDescr& v) const; }; diff --git a/c++/include/objmgr/tse_handle.hpp b/c++/include/objmgr/tse_handle.hpp index e8ef7733..f9d96dc4 100644 --- a/c++/include/objmgr/tse_handle.hpp +++ b/c++/include/objmgr/tse_handle.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_TSE_HANDLE__HPP #define OBJMGR_TSE_HANDLE__HPP -/* $Id: tse_handle.hpp 544387 2017-08-22 19:28:06Z vasilche $ +/* $Id: tse_handle.hpp 576250 2018-12-12 13:23:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -60,6 +60,8 @@ class CDataLoader; class CSeq_feat_Handle; class CAnnotObject_Info; class CObject_id; +class CSeq_submit; +class CSubmit_block; class CScopeInfo_Base; class CScopeInfoLocker; @@ -120,6 +122,26 @@ public: /// Get top level Seq-entry handle CSeq_entry_Handle GetTopLevelEntry(void) const; + /// Type of top level object added to scope + enum ETopLevelObjectType { + eTopLevel_Seq_entry, + eTopLevel_Bioseq_set, + eTopLevel_Bioseq, + eTopLevel_Seq_annot, + eTopLevel_Seq_submit + }; + + /// Get type of top level object added to scope + ETopLevelObjectType GetTopLevelObjectType() const; + bool IsTopLevelSeq_submit() const; + /// Return reference to top-level Seq-submit object + /// Throw an exception if it's not available + const CSeq_submit& GetTopLevelSeq_submit() const; + /// More efficient direct access to Seq-submit.sub field + const CSubmit_block& GetTopLevelSubmit_block() const; + CSubmit_block& SetTopLevelSubmit_block() const; + void SetTopLevelSubmit_block(CSubmit_block& sub) const; + /// Get Bioseq handle from this TSE CBioseq_Handle GetBioseqHandle(const CSeq_id& id) const; CBioseq_Handle GetBioseqHandle(const CSeq_id_Handle& id) const; diff --git a/c++/include/objmgr/unsupp_editsaver.hpp b/c++/include/objmgr/unsupp_editsaver.hpp index 7b4ebb33..4d123206 100644 --- a/c++/include/objmgr/unsupp_editsaver.hpp +++ b/c++/include/objmgr/unsupp_editsaver.hpp @@ -1,7 +1,7 @@ #ifndef __UNSUPPORTED_EDIT_SAVER__HPP #define __UNSUPPORTED_EDIT_SAVER__HPP -/* $Id: unsupp_editsaver.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: unsupp_editsaver.hpp 578929 2019-01-28 18:18:01Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -47,7 +47,7 @@ public: enum EErrCode { eUnsupported }; - virtual const char* GetErrCodeString(void) const { + virtual const char* GetErrCodeString(void) const override { switch ( GetErrCode() ) { case eUnsupported: return "Unsupported operation"; diff --git a/c++/include/objmgr/util/autodef.hpp b/c++/include/objmgr/util/autodef.hpp index 20148f89..6813bee9 100644 --- a/c++/include/objmgr/util/autodef.hpp +++ b/c++/include/objmgr/util/autodef.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_UTIL___AUTODEF__HPP #define OBJMGR_UTIL___AUTODEF__HPP -/* $Id: autodef.hpp 572646 2018-10-17 16:58:07Z ivanov $ +/* $Id: autodef.hpp 578991 2019-01-29 13:03:21Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -67,14 +67,14 @@ public: void AddSources(CSeq_entry_Handle se); void AddSources(CBioseq_Handle bh); - CAutoDefModifierCombo* FindBestModifierCombo(); + CRef FindBestModifierCombo(); CAutoDefModifierCombo* GetAllModifierCombo(); CAutoDefModifierCombo* GetEmptyCombo(); unsigned int GetNumAvailableModifiers(); - string GetOneSourceDescription(CBioseq_Handle bh); + string GetOneSourceDescription(const CBioseq_Handle& bh); string GetOneFeatureClauseList(CBioseq_Handle bh, unsigned int genome_val); - string GetOneDefLine(CAutoDefModifierCombo* mod_combo, CBioseq_Handle bh); - string GetOneDefLine(CBioseq_Handle bh); + string GetOneDefLine(CAutoDefModifierCombo* mod_combo, const CBioseq_Handle& bh); + string GetOneDefLine(const CBioseq_Handle& bh); void SetOptionsObject(const CUser_object& user); CRef GetOptionsObject() const { return m_Options.MakeUserObject(); } @@ -106,7 +106,7 @@ public: void SetUseFakePromoters (bool use_fake); void SetCustomFeatureClause(const string& custom_feature_clause); - void SuppressFeature(objects::CFeatListItem feat); + void SuppressFeature(const objects::CFeatListItem& feat); void SuppressFeature(objects::CSeqFeatData::ESubtype subtype); typedef vector> TModifierComboVector; @@ -143,19 +143,11 @@ private: string x_GetNonFeatureListEnding(); - string x_GetFeatureClauses(CBioseq_Handle bh); + string x_GetFeatureClauses(const CBioseq_Handle& bh); string x_GetFeatureClauseProductEnding(const string& feature_clauses, CBioseq_Handle bh); - bool x_AddMiscRNAFeatures(CBioseq_Handle bh, - const CSeq_feat& cf, - const CSeq_loc& mapped_loc, - CAutoDefFeatureClause_Base& main_clause); - bool x_AddtRNAAndOther(CBioseq_Handle bh, - const CSeq_feat& cf, - const CSeq_loc& mapped_loc, - CAutoDefFeatureClause_Base& main_clause); - void x_RemoveOptionalFeatures(CAutoDefFeatureClause_Base *main_clause, CBioseq_Handle bh); + void x_RemoveOptionalFeatures(CAutoDefFeatureClause_Base *main_clause, const CBioseq_Handle& bh); bool x_IsOrgModRequired(unsigned int mod_type); @@ -164,7 +156,7 @@ private: bool x_IsFeatureSuppressed(CSeqFeatData::ESubtype subtype); void GetMasterLocation(CBioseq_Handle &bh, CRange& range); - bool IsSegment(CBioseq_Handle bh); + bool IsSegment(const CBioseq_Handle& bh); bool x_Is5SList(CFeat_CI feat_ci); bool x_IsSingleMiscFeat(CFeat_CI feat_ci); string x_GetHumanSTRFeatureClauses(CBioseq_Handle bh, const CUser_object& comment); diff --git a/c++/include/objmgr/util/autodef_feature_clause.hpp b/c++/include/objmgr/util/autodef_feature_clause.hpp index 638812e1..6fe8c9f8 100644 --- a/c++/include/objmgr/util/autodef_feature_clause.hpp +++ b/c++/include/objmgr/util/autodef_feature_clause.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_UTIL___AUTODEF_FEATURE_CLAUSE__HPP #define OBJMGR_UTIL___AUTODEF_FEATURE_CLAUSE__HPP -/* $Id: autodef_feature_clause.hpp 553103 2017-12-13 12:25:38Z bollin $ +/* $Id: autodef_feature_clause.hpp 578991 2019-01-29 13:03:21Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -294,6 +294,9 @@ public: CAutoDefParsedtRNAClause *s_tRNAClauseFromNote(CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, string comment, bool is_first, bool is_last); +vector > FeatureClauseFactory(CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, const CAutoDefOptions& opts, bool is_single_misc_feat); + + END_SCOPE(objects) END_NCBI_SCOPE diff --git a/c++/include/objmgr/util/autodef_feature_clause_base.hpp b/c++/include/objmgr/util/autodef_feature_clause_base.hpp index 83efe2b3..281cbab7 100644 --- a/c++/include/objmgr/util/autodef_feature_clause_base.hpp +++ b/c++/include/objmgr/util/autodef_feature_clause_base.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_UTIL___AUTODEF_FEATURE_CLAUSE_BASE__HPP #define OBJMGR_UTIL___AUTODEF_FEATURE_CLAUSE_BASE__HPP -/* $Id: autodef_feature_clause_base.hpp 564303 2018-05-24 16:50:22Z bollin $ +/* $Id: autodef_feature_clause_base.hpp 578992 2019-01-29 13:03:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -40,17 +40,19 @@ BEGIN_NCBI_SCOPE BEGIN_SCOPE(objects) + +class CAutoDefFeatureClause; -class NCBI_XOBJEDIT_EXPORT CAutoDefFeatureClause_Base +class NCBI_XOBJEDIT_EXPORT CAutoDefFeatureClause_Base : public CObject { public: - typedef vector TClauseList; + typedef vector > TClauseList; CAutoDefFeatureClause_Base(); virtual ~CAutoDefFeatureClause_Base(); - virtual void AddSubclause (CAutoDefFeatureClause_Base *subclause); + virtual void AddSubclause (CRef subclause); string PrintClause(bool print_typeword, bool typeword_is_plural, bool suppress_allele); @@ -88,9 +90,9 @@ public: bool IsGeneMentioned(CAutoDefFeatureClause_Base *gene_clause); bool IsUnattachedGene() const; bool IsTypewordFirst() const { return m_ShowTypewordFirst; } - bool DisplayAlleleName (); + bool DisplayAlleleName () const; - const string& GetInterval() { return m_Interval; } + const string& GetInterval() const { return m_Interval; } const string& GetTypeword() const { return m_Typeword; } const string& GetDescription() const { return m_Description; } const string& GetProductName() { return m_ProductName; } @@ -161,7 +163,7 @@ public: static vector GetTrnaIntergenicSpacerClausePhrases(const string& comment); static bool IsValidFeatureClausePhrase(const string& phrase); static vector GetFeatureClausePhrases(string comment); - static CAutoDefFeatureClause_Base *ClauseFromPhrase(const string& phrase, CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, bool first, bool last); + static CRef ClauseFromPhrase(const string& phrase, CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, bool first, bool last); protected: @@ -239,7 +241,7 @@ class NCBI_XOBJEDIT_EXPORT CAutoDefExonListClause : public CAutoDefFeatureClause public: CAutoDefExonListClause(CBioseq_Handle bh); - virtual void AddSubclause (CAutoDefFeatureClause_Base *subclause); + virtual void AddSubclause (CRef subclause); virtual void Label(bool suppress_allele); virtual bool IsRecognizedFeature() { return true; } virtual bool IsExonList() { return true; } diff --git a/c++/include/objmgr/util/autodef_mod_combo.hpp b/c++/include/objmgr/util/autodef_mod_combo.hpp index 9db4ba34..b5983671 100644 --- a/c++/include/objmgr/util/autodef_mod_combo.hpp +++ b/c++/include/objmgr/util/autodef_mod_combo.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_UTIL___AUTODEF_MOD_COMBO__HPP #define OBJMGR_UTIL___AUTODEF_MOD_COMBO__HPP -/* $Id: autodef_mod_combo.hpp 572646 2018-10-17 16:58:07Z ivanov $ +/* $Id: autodef_mod_combo.hpp 575407 2018-11-28 17:52:52Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -77,7 +77,7 @@ public: bool HasSubSource(CSubSource::ESubtype st); bool HasOrgMod(COrgMod::ESubtype st); - void AddSource(const CBioSource& bs, string feature_clauses = ""); + void AddSource(const CBioSource& bs, const string& feature_clauses = ""); void AddSubsource(CSubSource::ESubtype st, bool even_if_not_uniquifying = false); void AddOrgMod(COrgMod::ESubtype st, bool even_if_not_uniquifying = false); @@ -114,7 +114,7 @@ public: typedef vector TSubSourceTypeVector; typedef vector TOrgModTypeVector; - typedef vector TGroupListVector; + typedef vector > TGroupListVector; const TGroupListVector& GetGroupList() const { return m_GroupList; } const CAutoDefSourceDescription::TModifierVector& GetModifiers() const { return m_Modifiers; } diff --git a/c++/include/objmgr/util/autodef_source_desc.hpp b/c++/include/objmgr/util/autodef_source_desc.hpp index 8695e760..650a5c55 100644 --- a/c++/include/objmgr/util/autodef_source_desc.hpp +++ b/c++/include/objmgr/util/autodef_source_desc.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_UTIL___AUTODEF_SOURCE_DESC__HPP #define OBJMGR_UTIL___AUTODEF_SOURCE_DESC__HPP -/* $Id: autodef_source_desc.hpp 530276 2017-03-13 18:20:08Z bollin $ +/* $Id: autodef_source_desc.hpp 575401 2018-11-28 17:49:27Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -108,7 +108,7 @@ public: }; -class NCBI_XOBJEDIT_EXPORT CAutoDefSourceDescription +class NCBI_XOBJEDIT_EXPORT CAutoDefSourceDescription : public CObject { public: CAutoDefSourceDescription(const CBioSource& bs, string feature_clauses = ""); diff --git a/c++/include/objmgr/util/autodef_source_group.hpp b/c++/include/objmgr/util/autodef_source_group.hpp index 9baa4a5b..6d1e3bb0 100644 --- a/c++/include/objmgr/util/autodef_source_group.hpp +++ b/c++/include/objmgr/util/autodef_source_group.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_UTIL___AUTODEF_SOURCE_GROUP__HPP #define OBJMGR_UTIL___AUTODEF_SOURCE_GROUP__HPP -/* $Id: autodef_source_group.hpp 530276 2017-03-13 18:20:08Z bollin $ +/* $Id: autodef_source_group.hpp 575406 2018-11-28 17:52:26Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -51,17 +51,17 @@ BEGIN_NCBI_SCOPE BEGIN_SCOPE(objects) -class NCBI_XOBJEDIT_EXPORT CAutoDefSourceGroup +class NCBI_XOBJEDIT_EXPORT CAutoDefSourceGroup : public CObject { public: CAutoDefSourceGroup(); CAutoDefSourceGroup(CAutoDefSourceGroup *value); ~CAutoDefSourceGroup(); - typedef vector TSourceDescriptionVector; + typedef vector > TSourceDescriptionVector; unsigned int GetNumDescriptions(); - void AddSourceDescription(CAutoDefSourceDescription *tmp); + void AddSourceDescription(CRef tmp); CAutoDefSourceDescription *GetSourceDescription(unsigned int index); void GetAvailableModifiers @@ -71,11 +71,14 @@ public: bool GetDefaultExcludeSp(); - void AddSource (CAutoDefSourceDescription *src); + void AddSource (CRef src); bool AddQual (bool IsOrgMod, int subtype, bool keepAfterSemicolon); bool RemoveQual (bool IsOrgMod, int subtype); TSourceDescriptionVector GetSrcList() const { return m_SourceList; } - vector RemoveNonMatchingDescriptions (); + + void SortDescriptions(); + CRef SplitGroup(); + vector > RemoveNonMatchingDescriptions (); CAutoDefSourceDescription::TModifierVector GetModifiersPresentForAll(); CAutoDefSourceDescription::TModifierVector GetModifiersPresentForAny(); diff --git a/c++/include/objmgr/util/indexer.hpp b/c++/include/objmgr/util/indexer.hpp index 1ec74c1f..d16e0bbf 100644 --- a/c++/include/objmgr/util/indexer.hpp +++ b/c++/include/objmgr/util/indexer.hpp @@ -105,7 +105,8 @@ public: fHideIntronFeats = 32, fHideMiscFeats = 64, fShowSNPFeats = 128, - fShowCDDFeats = 256 + fShowCDDFeats = 256, + fGeneRNACDSOnly = 512 }; typedef int TFlags; // Binary "OR" of EFlags diff --git a/c++/include/objmgr/util/objutil.hpp b/c++/include/objmgr/util/objutil.hpp index 09b1e7c2..d47a3c65 100644 --- a/c++/include/objmgr/util/objutil.hpp +++ b/c++/include/objmgr/util/objutil.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_UTIL___OBJUTILS_HPP #define OBJMGR_UTIL___OBJUTILS_HPP -/* $Id: objutil.hpp 558959 2018-03-05 16:53:18Z dondosha $ +/* $Id: objutil.hpp 580124 2019-02-11 16:09:06Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -75,7 +75,7 @@ string JoinString(const list& l, // meet several spaces (spaces and tabs) in succession it replaces them // with one space. Strips all spaces after '(' and before ')' NCBI_XOBJEDIT_EXPORT -void StripSpaces(string& str); +bool StripSpaces(string& str); NCBI_XOBJEDIT_EXPORT bool TrimSpacesAndJunkFromEnds(string& str, bool allow_ellipsis = false); NCBI_XOBJEDIT_EXPORT diff --git a/c++/include/objmgr/util/sequence.hpp b/c++/include/objmgr/util/sequence.hpp index d54d4cda..5a5d1a03 100644 --- a/c++/include/objmgr/util/sequence.hpp +++ b/c++/include/objmgr/util/sequence.hpp @@ -1,7 +1,7 @@ #ifndef SEQUENCE__HPP #define SEQUENCE__HPP -/* $Id: sequence.hpp 567127 2018-07-13 17:55:39Z kans $ +/* $Id: sequence.hpp 578929 2019-01-28 18:18:01Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -729,7 +729,7 @@ public: // Translate the specific error code into a string representations of // that error code. - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CSeqIdFromHandleException, CException); }; diff --git a/c++/include/objtools/align_format/align_format_util.hpp b/c++/include/objtools/align_format/align_format_util.hpp index 6dd034dc..8048e768 100644 --- a/c++/include/objtools/align_format/align_format_util.hpp +++ b/c++/include/objtools/align_format/align_format_util.hpp @@ -1,4 +1,4 @@ -/* $Id: align_format_util.hpp 568473 2018-08-06 16:28:49Z zaretska $ +/* $Id: align_format_util.hpp 577747 2019-01-08 18:06:34Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1070,6 +1070,18 @@ public: ILinkoutDB* linkoutdb, const string& mv_build_name); + static CRef + SortSeqalignForSortableFormat(objects::CSeq_align_set& aln_set, + bool nuc_to_nuc_translation, + int hit_order, + int hsp_order); + + static list< CRef > + SortOneSeqalignForSortableFormat(const objects::CSeq_align_set& source, + bool nuc_to_nuc_translation, + int hit_sort, + int hsp_sort); + /// function for calculating percent match for an alignment. ///@param numerator /// int numerator in percent identity calculation. diff --git a/c++/include/objtools/align_format/format_flags.hpp b/c++/include/objtools/align_format/format_flags.hpp index cd614164..726261d5 100644 --- a/c++/include/objtools/align_format/format_flags.hpp +++ b/c++/include/objtools/align_format/format_flags.hpp @@ -1,4 +1,4 @@ -/* $Id: format_flags.hpp 516923 2016-10-19 14:23:23Z fongah2 $ +/* $Id: format_flags.hpp 577747 2019-01-08 18:06:34Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -76,6 +76,14 @@ NCBI_ALIGN_FORMAT_EXPORT extern string kDfltArgTabularOutputFmt; NCBI_ALIGN_FORMAT_EXPORT extern const string kDfltArgTabularOutputFmtTag; NCBI_ALIGN_FORMAT_EXPORT extern const size_t kDfltLineLength; +NCBI_ALIGN_FORMAT_EXPORT extern const string kArgAlignSeqList; +NCBI_ALIGN_FORMAT_EXPORT extern const string kArgMetadata; +NCBI_ALIGN_FORMAT_EXPORT extern const string kArgQueryIndex; +NCBI_ALIGN_FORMAT_EXPORT extern const string kArgSortHits; +NCBI_ALIGN_FORMAT_EXPORT extern const string kArgSortHSPs; + + + /// Enumeration for all fields that are supported in the tabular output enum ETabularField { diff --git a/c++/include/objtools/align_format/showalign.hpp b/c++/include/objtools/align_format/showalign.hpp index ea48ecf4..faad57dd 100644 --- a/c++/include/objtools/align_format/showalign.hpp +++ b/c++/include/objtools/align_format/showalign.hpp @@ -1,4 +1,4 @@ -/* $Id: showalign.hpp 553793 2017-12-21 16:51:53Z zaretska $ +/* $Id: showalign.hpp 581160 2019-02-26 13:09:53Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -452,8 +452,9 @@ class NCBI_ALIGN_FORMAT_EXPORT CDisplaySeqalign { m_DomainInfo = domain; } - void SetSequencePropertyLabel(const vector* SequencePropertyLabel) { - m_SeqPropertyLabel = SequencePropertyLabel; + void SetSequencePropertyLabel(const vector* SequencePropertyLabel, + EOwnership ownership = eNoOwnership) { + m_SeqPropertyLabel.reset(SequencePropertyLabel, ownership); } //set and add result index in front of seqid in for quick link (for multiple result case) @@ -602,7 +603,7 @@ protected: /// external feature such as phiblast list < FeatureInfo * >* m_QueryFeature; list >* m_DomainInfo; - const vector* m_SeqPropertyLabel; + AutoPtr > m_SeqPropertyLabel; objects::CScope & m_Scope; objects::CAlnVec *m_AV; // current aln vector diff --git a/c++/include/objtools/align_format/showdefline.hpp b/c++/include/objtools/align_format/showdefline.hpp index 32fba3d1..4ed30367 100644 --- a/c++/include/objtools/align_format/showdefline.hpp +++ b/c++/include/objtools/align_format/showdefline.hpp @@ -1,4 +1,4 @@ -/* $Id: showdefline.hpp 553793 2017-12-21 16:51:53Z zaretska $ +/* $Id: showdefline.hpp 577751 2019-01-08 18:07:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -95,7 +95,9 @@ public: eShowNewSeqGif = (1 << 8), //show new sequence gif image eShowPercentIdent = (1 << 9), //show percent identity column eLongSeqId = (1 << 10), //print long sequences id (with bars) - eRealtedInfoLinks = (1 << 11) //Related information links - Linkout links with descriptions + others + eRealtedInfoLinks = (1 << 11), //Related information links - Linkout links with descriptions + others + eShowTotalScore = (1 << 12), //show total score column + eShowQueryCoverage = (1 << 13) //show query coverage column }; ///Data Representing each defline @@ -127,6 +129,30 @@ public: bool advancedView; }; + //Defline info for formatting + struct SDeflineFormattingInfo { + string dfln_url; + string dfln_rid; + string dfln_gi; + string dfln_seqid; + string full_dfln_defline; + string dfln_defline; + string dfln_id; + string dflnFrm_id; + string dflnFASTA_id; + string dflnAccs; + + string score_info; + string dfln_hspnum; + string dfln_alnLen; + string dfln_blast_rank; + string total_bit_string; + string percent_coverage; + string evalue_string; + string percent_identity; + }; + + //data represnting info to record in applog amd metadata struct SAppLogInfo { int topMatchesNum; @@ -346,6 +372,10 @@ public: ///Initialize defline params void Init(void); + ///Get deflines formatting info + ///@return vector of SDeflineFormattingInfo structs for defilens + vector GetFormattingInfo(void); + ///Display defline ///@param out: stream to output /// @@ -487,6 +517,7 @@ protected: //info to record in applog amd metadata SAppLogInfo *m_AppLogInfo; + vector m_SdlFormatInfoVec; ///Internal function to return score info ///@param aln seq-align we are working with [in] @@ -564,6 +595,7 @@ protected: /// void x_DisplayDeflineTableTemplate(CNcbiOstream & out); + void x_InitFormattingInfo(SScoreInfo* sci); //For internal test friend struct ::CShowBlastDeflineTest; }; diff --git a/c++/include/objtools/align_format/tabular.hpp b/c++/include/objtools/align_format/tabular.hpp index 1b3d3448..7b0e0649 100644 --- a/c++/include/objtools/align_format/tabular.hpp +++ b/c++/include/objtools/align_format/tabular.hpp @@ -1,4 +1,4 @@ -/* $Id: tabular.hpp 562063 2018-04-17 14:09:14Z jianye $ +/* $Id: tabular.hpp 577757 2019-01-08 18:09:26Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -66,7 +66,8 @@ public: enum EFieldDelimiter { eTab = 0, ///< Tab eSpace, ///< Space - eComma ///< Comma + eComma, ///< Comma + eCustom ///* matrix=0); - + void SetCustomDelim(string customDelim) { + x_SetFieldDelimiter(eCustom,customDelim); + } /// Print one line of tabular output virtual void Print(void); /// Print the tabular output header @@ -203,7 +206,7 @@ protected: void x_ResetFields(void); /// Set the tabular fields delimiter. /// @param delim Which delimiter to use - void x_SetFieldDelimiter(EFieldDelimiter delim); + void x_SetFieldDelimiter(EFieldDelimiter delim, string customDelim = ""); /// Print the names of all supported fields void x_PrintFieldNames(void); /// Print the value of a given field @@ -307,7 +310,7 @@ protected: void x_CheckTaxDB(); CNcbiOstream& m_Ostream; ///< Stream to write output to - char m_FieldDelimiter; ///< Delimiter character for fields to print. + string m_FieldDelimiter; ///< Delimiter character for fields to print. string m_QuerySeq; ///< Aligned part of the query sequence string m_SubjectSeq; ///< Aligned part of the subject sequence int m_QueryStart; ///< Starting offset in query @@ -365,6 +368,7 @@ private: int m_DbGeneticCode; TSeqRange m_QueryRange; + string m_CustomDelim; }; diff --git a/c++/include/objtools/alnmgr/score_builder_base.hpp b/c++/include/objtools/alnmgr/score_builder_base.hpp index f86580ed..ac9e4f55 100644 --- a/c++/include/objtools/alnmgr/score_builder_base.hpp +++ b/c++/include/objtools/alnmgr/score_builder_base.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_UTIL___SCORE_BUILDER_BASE__HPP #define OBJMGR_UTIL___SCORE_BUILDER_BASE__HPP -/* $Id: score_builder_base.hpp 433851 2014-04-30 14:36:29Z mozese2 $ +/* $Id: score_builder_base.hpp 576144 2018-12-11 15:05:52Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -133,14 +133,14 @@ public: EPercentIdentityType type = eGapped); /// Compute percent coverage of the query (sequence 0) (range 0-100) - double GetPercentCoverage(CScope& scope, const CSeq_align& align); + double GetPercentCoverage(CScope& scope, const CSeq_align& align, unsigned query = 0); /// Compute percent identity or coverage of the query within specified range double GetPercentIdentity(CScope& scope, const CSeq_align& align, const TSeqRange &range, EPercentIdentityType type = eGapped); double GetPercentCoverage(CScope& scope, const CSeq_align& align, - const TSeqRange &range); + const TSeqRange &range, unsigned query = 0); /// Compute percent identity or coverage of the query within specified /// collection of ranges @@ -148,7 +148,8 @@ public: const CRangeCollection &ranges, EPercentIdentityType type = eGapped); double GetPercentCoverage(CScope& scope, const CSeq_align& align, - const CRangeCollection &ranges); + const CRangeCollection &ranges, + unsigned query = 0); /// Compute the number of identities in the alignment int GetIdentityCount (CScope& scope, const CSeq_align& align); diff --git a/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp b/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp index 8a557f39..c60b9bb9 100644 --- a/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp +++ b/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS_BLAST__SEQDB__SEQDB_LMDB_HPP #define OBJTOOLS_READERS_BLAST__SEQDB__SEQDB_LMDB_HPP -/* $Id: seqdb_lmdb.hpp 559974 2018-03-16 15:23:03Z fongah2 $ +/* $Id: seqdb_lmdb.hpp 579233 2019-01-31 16:34:53Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -92,6 +92,12 @@ public: /// Get All Unique Tax Ids for db /// @parma tax_ids Return all unique tax ids found in db void GetDBTaxIds(vector & tax_ids) const; + + /// Get Tax Ids for oid list + /// @param oids Input oid list + /// @param tax_ids Output tax id list + void GetTaxIdsForOids(const vector & oids, set & tax_ids) const; + private: string m_LMDBFile; string m_Oid2SeqIdsFile; diff --git a/c++/include/objtools/blast/seqdb_reader/impl/seqdbatlas.hpp b/c++/include/objtools/blast/seqdb_reader/impl/seqdbatlas.hpp index 63323067..47fe7783 100644 --- a/c++/include/objtools/blast/seqdb_reader/impl/seqdbatlas.hpp +++ b/c++/include/objtools/blast/seqdb_reader/impl/seqdbatlas.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS_SEQDB__SEQDBATLAS_HPP #define OBJTOOLS_READERS_SEQDB__SEQDBATLAS_HPP -/* $Id: seqdbatlas.hpp 540171 2017-07-03 15:08:48Z zaretska $ +/* $Id: seqdbatlas.hpp 579717 2019-02-05 16:53:45Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -53,6 +53,7 @@ #include #include #include +#include BEGIN_NCBI_SCOPE @@ -302,10 +303,12 @@ public: /// Constructor /// /// Initializes the atlas object. - /// - /// @param use_mmap - /// If false, use read(); if true, use mmap() or similar. - CSeqDBAtlas(bool use_mmap); + /// @param use_atlas_lock If true, the atlas lock will be used to protect + /// critical regions, otherwise the Lock() and Unlock() functions will be + /// noops. Setting the parameter to false improves CPU utilization when + /// each thread access a different database volume. It should be set to + /// true in other cases. + CSeqDBAtlas(bool use_atlas_lock); /// The destructor unmaps and frees all associated memory. ~CSeqDBAtlas(); @@ -488,7 +491,7 @@ public: /// This object tracks whether this thread owns the mutex. void Lock(CSeqDBLockHold & locked) { - if (! locked.m_Locked) { + if (m_UseLock && !locked.m_Locked) { m_Lock.Lock(); locked.m_Locked = true; } @@ -505,7 +508,7 @@ public: /// This object tracks whether this thread owns the mutex. void Unlock(CSeqDBLockHold & locked) { - if (locked.m_Locked) { + if (m_UseLock && locked.m_Locked) { locked.m_Locked = false; m_Lock.Unlock(); } @@ -574,7 +577,7 @@ public: return path; } - map< string, CMemoryFile* > &GetFilesMemMap(void){return m_FileMemMap;} + CMemoryFile* GetMemoryFile(const string& fileName); enum EFilesCount{ eFileCounterNoChange, @@ -606,9 +609,6 @@ private: /// Private method to prevent copy construction. CSeqDBAtlas(const CSeqDBAtlas &); - /// Iterator type for m_Pool member. - typedef map::iterator TPoolIter; - /// Try to find the region and free it. /// /// This method looks for the region in the memory pool (m_Pool), @@ -632,36 +632,35 @@ private: /// Protects most of the critical regions of the SeqDB library. CMutex m_Lock; - - /// Bytes of "data" currently known to SeqDBAtlas. This does not - /// include metadata - TIndx m_CurAlloc; + /// Use single atlas lock to protect critical regions. The single lock is + /// not needed if each thread access different database volume. + bool m_UseLock; + /// Maps from pointers to dynamically allocated blocks to the byte /// size of the allocation. map m_Pool; - - /// The most recently assigned FID. - int m_LastFID; - - /// Lookup table of fids by filename. - map m_FileIDs; + /// Bytes of "data" currently known to SeqDBAtlas. This does not + /// include metadata + TIndx m_CurAlloc; + //m_pool was used for mrmory allocation + bool m_Alloc; enum {e_MaxSlice64 = 1 << 30}; /// Cache of file existence and length. + std::mutex m_FileSizeMutex; map< string, pair > m_FileSize; - /// Maxium file size. Uint8 m_MaxFileSize; - /// BlastDB search path. - const string m_SearchPath; - - bool m_Alloc;//m_pool was used for mrmory allocation - map< string, CMemoryFile* > m_FileMemMap; + std::mutex m_FileMemMapMutex; + map > m_FileMemMap; int m_OpenedFilesCount; int m_MaxOpenedFilesCount; + + /// BlastDB search path. + const string m_SearchPath; }; @@ -684,12 +683,16 @@ inline CSeqDBMemReg::~CSeqDBMemReg() class CSeqDBAtlasHolder { public: /// Constructor. - /// @param use_mmap If true, memory mapping will be used. - /// @param flusher The garbage collection callback. /// @param locked The lock hold object for this thread (or NULL). - CSeqDBAtlasHolder(bool use_mmap, - //CSeqDBFlushCB * flusher, - CSeqDBLockHold * lockedp); + /// @param use_atlas_lock If true, the atlas lock will be used to protect + /// critical regions, otherwise the Lock() and Unlock() functions will be + /// noops. Setting the parameter to false improves CPU utilization when + /// each thread access a different database volume. It should be set to + /// true in other cases. + CSeqDBAtlasHolder(CSeqDBLockHold * lockedp, bool use_atlas_lock); + + NCBI_DEPRECATED + CSeqDBAtlasHolder(bool user_atlas_lock, CSeqDBLockHold* lockdep); /// Destructor. ~CSeqDBAtlasHolder(); @@ -757,46 +760,27 @@ public: } //m_Filename is set - void Init(void) { - - - map &fileMemMap = m_Atlas.GetFilesMemMap(); - if(IsIndexFile() && fileMemMap.count(m_Filename) > 0) { - m_MappedFile = fileMemMap[m_Filename]; - x_LogMessage(eMapExists); + void Init(void) + { + try { + if (IsIndexFile()) { + m_MappedFile = m_Atlas.GetMemoryFile(m_Filename); } else { - try { - if(IsIndexFile()) { - CSeqDBLockHold locked(m_Atlas); - m_Atlas.Lock(locked); - if(fileMemMap.count(m_Filename) == 0) { - m_MappedFile = new CMemoryFile(m_Filename); - fileMemMap.insert(map::value_type(m_Filename,m_MappedFile)); - m_Atlas.ChangeOpenedFilseCount(CSeqDBAtlas::eFileCounterIncrement); - x_LogMessage(eMapNewLocked); - } - else { - m_MappedFile = fileMemMap[m_Filename]; - x_LogMessage(eMapExistsLocked); - } - } - else { - m_MappedFile = new CMemoryFile(m_Filename); - m_Atlas.ChangeOpenedFilseCount(CSeqDBAtlas::eFileCounterIncrement); - x_LogMessage(eMapNew); - } - m_Mapped = true; - } - catch(...) { - x_LogMessage(eMapError); - NCBI_THROW(CSeqDBException, - eFileErr, - "Cannot memory map " + m_Filename + ". Number of files opened: " + NStr::IntToString(m_Atlas.GetOpenedFilseCount())); - } - } - - m_DataPtr = (char *) (m_MappedFile->GetPtr()); + m_MappedFile = new CMemoryFile(m_Filename); + m_Atlas.ChangeOpenedFilseCount(CSeqDBAtlas::eFileCounterIncrement); + x_LogMessage(eMapNew); + } + m_Mapped = true; + } + catch (const std::exception&) { + x_LogMessage(eMapError); + NCBI_THROW(CSeqDBException, + eFileErr, + "Cannot memory map " + m_Filename + ". Number of files opened: " + NStr::IntToString(m_Atlas.GetOpenedFilseCount())); + } + + m_DataPtr = (char *)(m_MappedFile->GetPtr()); } diff --git a/c++/include/objtools/blast/seqdb_reader/impl/seqdbisam.hpp b/c++/include/objtools/blast/seqdb_reader/impl/seqdbisam.hpp index c988b68a..b9385320 100644 --- a/c++/include/objtools/blast/seqdb_reader/impl/seqdbisam.hpp +++ b/c++/include/objtools/blast/seqdb_reader/impl/seqdbisam.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS_SEQDB__SEQDBISAM_HPP #define OBJTOOLS_READERS_SEQDB__SEQDBISAM_HPP -/* $Id: seqdbisam.hpp 536658 2017-05-22 15:48:20Z zaretska $ +/* $Id: seqdbisam.hpp 579226 2019-01-31 16:32:08Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -146,7 +146,7 @@ public: typedef int TOid; /// PIG identifiers for numeric indices over protein volumes. - typedef int TPig; + //typedef int TPig; /// Genomic IDs, the most common numerical identifier. // typedef int TGi; diff --git a/c++/include/objtools/blast/seqdb_reader/impl/seqdbvol.hpp b/c++/include/objtools/blast/seqdb_reader/impl/seqdbvol.hpp index 7b76c805..23c883f9 100644 --- a/c++/include/objtools/blast/seqdb_reader/impl/seqdbvol.hpp +++ b/c++/include/objtools/blast/seqdb_reader/impl/seqdbvol.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS_SEQDB__SEQDBVOL_HPP #define OBJTOOLS_READERS_SEQDB__SEQDBVOL_HPP -/* $Id: seqdbvol.hpp 553487 2017-12-18 14:23:38Z fongah2 $ +/* $Id: seqdbvol.hpp 579218 2019-01-31 16:18:47Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -934,9 +934,9 @@ private: bool & have_vol) const { if (! have_user) { - if (m_UserGiList.NotEmpty() && m_UserGiList->GetNumTaxIds() == 0) { + if (m_UserGiList.NotEmpty() && m_UserGiList->GetNumTaxIds() == 0 && m_UserGiList->GetNumPigs() == 0 ) { have_user |= x_ListIncludesId(*m_UserGiList, id); - } else if (m_NegativeList.NotEmpty() && m_NegativeList->GetNumTaxIds() == 0) { + } else if (m_NegativeList.NotEmpty() && m_NegativeList->GetNumTaxIds() == 0 && m_NegativeList->GetNumPigs() == 0 ) { have_user |= x_ListIncludesId(*m_NegativeList, id); } else { have_user = true; diff --git a/c++/include/objtools/blast/seqdb_reader/seqdb.hpp b/c++/include/objtools/blast/seqdb_reader/seqdb.hpp index b8c2152a..0df0c243 100644 --- a/c++/include/objtools/blast/seqdb_reader/seqdb.hpp +++ b/c++/include/objtools/blast/seqdb_reader/seqdb.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_BLAST_SEQDB_READER___SEQDB__HPP #define OBJTOOLS_BLAST_SEQDB_READER___SEQDB__HPP -/* $Id: seqdb.hpp 553487 2017-12-18 14:23:38Z fongah2 $ +/* $Id: seqdb.hpp 579716 2019-02-05 16:53:21Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -326,7 +326,15 @@ public: /// Specify eProtein, eNucleotide, or eUnknown. /// @param gilist /// The database will be filtered by this GI list if non-null. - CSeqDB(const string & dbname, ESeqType seqtype, CSeqDBGiList * gilist = 0); + /// @param use_atlas_lock + /// Enable/diable thread synchronization. If true single Atlas mutex + /// will be used to protect most of critical parts of the code. + /// If false, CSeqdDBAtlas::Lock and CSeqDBAtlas::Unlock functions + /// will be noops. If each thread accesses + /// a different database vloume, then setting this parameter to false + /// will reduce contention. Otherwise it should be set to true. + CSeqDB(const string & dbname, ESeqType seqtype, CSeqDBGiList * gilist = 0, + bool use_atlas_lock = true); /// Short Constructor with Negative ID list. /// @@ -345,6 +353,44 @@ public: ESeqType seqtype, CSeqDBNegativeList * nlist); + /// Short Constructor with Positive and Negative ID list. + /// + /// This version of the constructor assumes the entire OID range + /// will be included, and applies filtering by a negative ID list. + /// Please use quotes ("") around database names that contains + /// space characters. + /// + /// @param dbname + /// A list of database or alias names, seperated by spaces + /// @param seqtype + /// Specify eProtein, eNucleotide, or eUnknown. + /// @param nlist + /// The database will be filtered to not include these GIs or TIs. + CSeqDB(const string & dbname, + ESeqType seqtype, + CSeqDBGiList * gilist, + CSeqDBNegativeList * nlist); + + /// Short Constructor with Positive and Negative ID list with oid range. + /// + /// This version of the constructor assumes the entire OID range + /// will be included, and applies filtering by a negative ID list. + /// Please use quotes ("") around database names that contains + /// space characters. + /// + /// @param dbname + /// A list of database or alias names, seperated by spaces + /// @param seqtype + /// Specify eProtein, eNucleotide, or eUnknown. + /// @param nlist + /// The database will be filtered to not include these GIs or TIs. + CSeqDB(const string & dbname, + ESeqType seqtype, + int oid_begin, + int oid_end, + CSeqDBGiList * gilist, + CSeqDBNegativeList * nlist); + /// Short Constructor with Computed ID list. /// /// This version of the constructor takes a computed CSeqDBIdSet diff --git a/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp b/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp index 1fe6a5a0..88ab063b 100644 --- a/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp +++ b/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_BLAST_SEQDB_READER___SEQDBCOMMON__HPP #define OBJTOOLS_BLAST_SEQDB_READER___SEQDBCOMMON__HPP -/* $Id: seqdbcommon.hpp 559849 2018-03-15 13:57:41Z fongah2 $ +/* $Id: seqdbcommon.hpp 581731 2019-03-05 16:42:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -131,6 +131,8 @@ enum ESeqDBAllocType { typedef Uint8 TTi; +typedef Uint4 TPig; + /// Blast DB v5 seqid list info struct NCBI_XOBJREAD_EXPORT SBlastSeqIdListInfo { @@ -214,6 +216,23 @@ public: vector oids; }; + struct SPigOid { + /// Constuct an SPigOid element from the given pig and oid. + /// @param pig_in A PIG, or 0 if none is available. + /// @param oid_in An OID, or -1 if none is available. + SPigOid(TPig pig_in = 0, int oid_in = -1) + : pig(pig_in), oid(oid_in) + { + } + + /// The PIG or 0 if unknown. + TPig pig; + + /// The OID or -1 if unknown. + int oid; + }; + + /// Possible sorting states enum ESortOrder { /// The array is unsorted or the sortedness is unknown. @@ -273,6 +292,11 @@ public: bool SiToOid(const string &si, int & oid); bool SiToOid(const string &si, int & oid, int & index); + + bool FindPig(TPig pig) const; + bool PigToOid(TPig pig, int & oid); + bool PigToOid(TPig pig, int & oid, int & index); + /// Test for existence of a Seq-id by type. /// /// This method uses FindGi or FindTi if the input ID is a GI or @@ -309,6 +333,11 @@ public: return m_SisOids[index]; } + const SPigOid & GetPigOid(int index) const + { + return m_PigsOids[index]; + } + /// Get the number of GIs in the array. int GetNumGis() const { @@ -337,10 +366,15 @@ public: return (int) m_TaxIdsOids.oids.size(); } + int GetNumPigs() const + { + return (int) m_PigsOids.size(); + } + /// Return false if there are elements present. bool Empty() const { - return ! (GetNumGis() || GetNumSis() || GetNumTis() || GetNumTaxIds()); + return ! (GetNumGis() || GetNumSis() || GetNumTis() || GetNumTaxIds() || GetNumPigs()); } /// Return true if there are elements present. @@ -391,6 +425,11 @@ public: m_SisOids[index].oid = oid; } + void SetPigTranslation(int index, int oid) + { + m_PigsOids[index].oid = oid; + } + int Size() const { return (int) m_GisOids.size(); @@ -429,6 +468,8 @@ public: /// TODO Get the seqid list? void GetSiList(vector& sis) const; + void GetPigList(vector& pigs) const; + set & GetTaxIdsList() { @@ -470,6 +511,19 @@ public: tids.insert(tax_ids.begin(), tax_ids.end()); } + void SetPigList(const vector & list) + { + ITERATE(vector, itr, list) { + m_PigsOids.push_back(*itr); + } + + } + + void AddPig(TPig pig) + { + m_PigsOids.push_back(pig); + } + /// Reserve space for GIs. void ReserveGis(size_t n) { @@ -487,6 +541,11 @@ public: m_SisOids.reserve(n); } + void ReservePigs(size_t n) + { + m_PigsOids.reserve(n); + } + /// Preprocess ids for ISAM string id lookup void PreprocessIdsForISAMSiLookup(); @@ -513,6 +572,8 @@ protected: /// Pairs of Seq-ids and OIDs. vector m_SisOids; + vector m_PigsOids; + STaxIdsOids m_TaxIdsOids; SBlastSeqIdListInfo m_ListInfo; @@ -581,6 +642,31 @@ inline void CSeqDBGiList::SetValue(int index, int oid) m_SisOids[index].oid = oid; } +template < > +inline int CSeqDBGiList::GetSize() const +{ + return (int) m_PigsOids.size(); +} + +template < > +inline TPig CSeqDBGiList::GetKey(int index) const +{ + return m_PigsOids[index].pig; +} + +template < > +inline bool CSeqDBGiList::IsValueSet(int index) const +{ + return (m_PigsOids[index].oid != -1); +} + +template < > +inline void CSeqDBGiList::SetValue(int index, int oid) +{ + m_PigsOids[index].oid = oid; +} + + /// CSeqDBBitVector /// /// This class defines a bit vector that is similar to vector, @@ -810,6 +896,11 @@ public: return (int) m_Sis.size(); } + int GetNumPigs() const + { + return (int) m_Pigs.size(); + } + bool IsGiList() const { return(GetNumGis() > 0); @@ -834,13 +925,18 @@ public: if(size == 0) { size = GetNumTis(); } + + if(size == 0) { + size = GetNumPigs(); + } + return size; } /// Return false if there are elements present. bool Empty() const { - return ! (GetNumGis() || GetNumTis() || GetNumSis()|| GetNumTaxIds()); + return ! (GetNumGis() || GetNumTis() || GetNumSis()|| GetNumTaxIds() || GetNumPigs()); } /// Return true if there are elements present. @@ -919,12 +1015,25 @@ public: m_Gis.reserve( new_list.size() ); m_Gis = new_list; } + + void SetPigList( const vector & new_list ) + { + m_Pigs.clear(); + m_Pigs.reserve( new_list.size() ); + m_Pigs = new_list; + } + /// Build ID set for this negative list. const vector & GetTiList() { return m_Tis; } + const vector & GetPigList() + { + return m_Pigs; + } + const vector & GetSiList() { return m_Sis; @@ -975,6 +1084,8 @@ protected: /// TIs to exclude from the SeqDB instance. vector m_Tis; + vector m_Pigs; + /// SeqIds to exclude from the SeqDB instance. vector m_Sis; set m_TaxIds; @@ -1088,6 +1199,12 @@ void SeqDB_ReadMemoryMixList(const char * fbeginp, vector & sis, bool * in_order); +NCBI_XOBJREAD_EXPORT +void SeqDB_ReadMemoryPigList(const char * fbeginp, + const char * fendp, + vector & pigs, + bool * in_order = 0); + /// Combine and quote a list of database names. /// /// SeqDB permits multiple databases to be opened by a single CSeqDB @@ -1204,6 +1321,12 @@ void SeqDB_ReadMixList(const string & fname, vector & sis, bool * in_order); +NCBI_XOBJREAD_EXPORT +void SeqDB_ReadPigList(const string & fname, + vector & pigs, + bool * in_order = 0); + + /// Read a text or binary GI list from a file. /// /// The GIs in a file are read into the provided vector. If the @@ -1262,7 +1385,9 @@ public: eGiList, eTiList, eSiList, - eMixList + eMixList, + ePigList, + eTaxIdList }; /// Build a GI list from a file. diff --git a/c++/include/objtools/cleanup/autogenerated_cleanup.hpp b/c++/include/objtools/cleanup/autogenerated_cleanup.hpp index 3620d584..3fdecf96 100755 --- a/c++/include/objtools/cleanup/autogenerated_cleanup.hpp +++ b/c++/include/objtools/cleanup/autogenerated_cleanup.hpp @@ -1,7 +1,7 @@ #ifndef AUTOGENERATEDCLEANUP__HPP #define AUTOGENERATEDCLEANUP__HPP -/* $Id: autogenerated_cleanup.hpp 256622 2011-03-07 17:46:10Z kornbluh $ +/* $Id: autogenerated_cleanup.hpp 580135 2019-02-11 16:13:34Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -150,7 +150,7 @@ #include #include -#include +#include "newcleanup.hpp" BEGIN_NCBI_SCOPE diff --git a/c++/include/objtools/cleanup/cleanup.hpp b/c++/include/objtools/cleanup/cleanup.hpp index e4196802..6e596f53 100644 --- a/c++/include/objtools/cleanup/cleanup.hpp +++ b/c++/include/objtools/cleanup/cleanup.hpp @@ -1,7 +1,7 @@ #ifndef CLEANUP___CLEANUP__HPP #define CLEANUP___CLEANUP__HPP -/* $Id: cleanup.hpp 567012 2018-07-11 17:07:11Z bollin $ +/* $Id: cleanup.hpp 580121 2019-02-11 16:07:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -52,6 +53,7 @@ class CSeq_submit; class COrgName; class CSubmit_block; class CAuthor; +class CAuth_list; class CName_std; class CSeq_entry_Handle; @@ -573,7 +575,12 @@ public: // and apply specific cleanups to known types of User-object static bool CleanupUserObject(CUser_object& object); + // for cleaning up authors, lists of authors, and affiliation static bool CleanupAuthor(CAuthor& author, bool fix_initials = true); + static bool CleanupAuthList(CAuth_list& al, bool fix_initials = true); + static void ResetAuthorNames(CAuth_list::TNames& names); + static bool CleanupAffil(CAffil& af); + static bool IsEmpty(const CAuth_list::TAffil& affil); // for cleaning up collection-date subsource qualifiers static bool CleanupCollectionDates(CSeq_entry_Handle seh, bool month_first); @@ -610,6 +617,8 @@ private: static void s_ExtractSuffixFromInitials(CName_std& name); static void s_FixEtAl(CName_std& name); + // for cleaning pubdesc + static bool s_Flatten(CPub_equiv& pub_equiv); }; diff --git a/c++/include/objtools/cleanup/cleanup_pub.hpp b/c++/include/objtools/cleanup/cleanup_pub.hpp new file mode 100644 index 00000000..b9420a7f --- /dev/null +++ b/c++/include/objtools/cleanup/cleanup_pub.hpp @@ -0,0 +1,238 @@ +#ifndef CLEANUP___CLEANUP_PUB__HPP +#define CLEANUP___CLEANUP_PUB__HPP + +/* $Id: cleanup_pub.hpp 580138 2019-02-11 16:15:05Z ivanov $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Author: Colleen Bollin + * + * File Description: + * Basic Cleanup for publications. + * ....... + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +BEGIN_NCBI_SCOPE +BEGIN_SCOPE(objects) + +class CSeq_submit; +class CSubmit_block; +class CAuthor; +class CName_std; + + + +class CPubCleaner : public CObject +{ +public: + + virtual ~CPubCleaner() {}; + virtual bool Clean(bool fix_initials, bool strip_serial) = 0; + virtual bool IsEmpty() = 0; + +protected: + enum EImprintBC { + eImprintBC_AllowStatusChange = 2, + eImprintBC_ForbidStatusChange + }; + + static bool CleanImprint(CImprint& imprint, EImprintBC is_status_change_allowed); +}; + + +class CPubEquivCleaner : public CPubCleaner +{ +public: + CPubEquivCleaner(CPub_equiv& equiv) : m_Equiv(equiv) {}; + virtual ~CPubEquivCleaner() {}; + + virtual bool Clean(bool fix_initials, bool strip_serial); + virtual bool IsEmpty(); + + static bool ShouldWeFixInitials(const CPub_equiv& equiv); + +protected: + CPub_equiv& m_Equiv; + + static bool s_Flatten(CPub_equiv& equiv); +}; + + +class CCitGenCleaner : public CPubCleaner +{ +public: + CCitGenCleaner(CCit_gen& gen) : m_Gen(gen) { } + virtual ~CCitGenCleaner() {}; + + virtual bool Clean(bool fix_initials, bool strip_serial); + virtual bool IsEmpty(); + +protected: + CCit_gen& m_Gen; +}; + + +class CCitSubCleaner : public CPubCleaner +{ +public: + CCitSubCleaner(CCit_sub& sub) : m_Sub(sub) { } + virtual ~CCitSubCleaner() {}; + + virtual bool Clean(bool fix_initials, bool strip_serial); + virtual bool IsEmpty(); + +protected: + CCit_sub& m_Sub; +}; + + +class CCitArtCleaner : public CPubCleaner +{ +public: + CCitArtCleaner(CCit_art& art) : m_Art(art) { } + virtual ~CCitArtCleaner() {}; + + virtual bool Clean(bool fix_initials, bool strip_serial); + virtual bool IsEmpty() { return false; } + +protected: + CCit_art& m_Art; +}; + + +class CCitBookCleaner : public CPubCleaner +{ +public: + CCitBookCleaner(CCit_book& book) : m_Book(book) { } + virtual ~CCitBookCleaner() {}; + + virtual bool Clean(bool fix_initials, bool strip_serial); + virtual bool IsEmpty() { return false; } + +protected: + CCit_book& m_Book; +}; + + +class CCitJourCleaner : public CPubCleaner +{ +public: + CCitJourCleaner(CCit_jour& jour) : m_Jour(jour) { } + virtual ~CCitJourCleaner() {}; + + virtual bool Clean(bool fix_initials, bool strip_serial); + virtual bool IsEmpty() { return false; } + +protected: + CCit_jour& m_Jour; +}; + + +class CCitProcCleaner : public CPubCleaner +{ +public: + CCitProcCleaner(CCit_proc& proc) : m_Proc(proc) { } + virtual ~CCitProcCleaner() {}; + + virtual bool Clean(bool fix_initials, bool strip_serial); + virtual bool IsEmpty() { return false; } + +protected: + CCit_proc& m_Proc; +}; + + +class CCitPatCleaner : public CPubCleaner +{ +public: + CCitPatCleaner(CCit_pat& pat) : m_Pat(pat) { } + virtual ~CCitPatCleaner() {}; + + virtual bool Clean(bool fix_initials, bool strip_serial); + virtual bool IsEmpty() { return false; } + +protected: + CCit_pat& m_Pat; +}; + + +class CCitLetCleaner : public CPubCleaner +{ +public: + CCitLetCleaner(CCit_let& let) : m_Let(let) { } + virtual ~CCitLetCleaner() {}; + + virtual bool Clean(bool fix_initials, bool strip_serial); + virtual bool IsEmpty() { return false; } + +protected: + CCit_let& m_Let; +}; + + +class CMedlineEntryCleaner : public CPubCleaner +{ +public: + CMedlineEntryCleaner(CMedline_entry& men) : m_Men(men) { } + virtual ~CMedlineEntryCleaner() {}; + + virtual bool Clean(bool fix_initials, bool strip_serial); + virtual bool IsEmpty() { return false; } + +protected: + CMedline_entry& m_Men; +}; + + +CRef PubCleanerFactory(CPub& pub); + + +class NCBI_CLEANUP_EXPORT CCleanupPub : public CObject +{ +public: + static bool CleanPubdesc(CPubdesc& pubdesc, bool strip_serial); + + +private: + static bool x_CleanPubdescComment(string& str); +}; + + + +END_SCOPE(objects) +END_NCBI_SCOPE + +#endif /* CLEANUP___CLEANUP__HPP */ diff --git a/c++/include/objtools/cleanup/fix_feature_id.hpp b/c++/include/objtools/cleanup/fix_feature_id.hpp new file mode 100644 index 00000000..dcfbec3d --- /dev/null +++ b/c++/include/objtools/cleanup/fix_feature_id.hpp @@ -0,0 +1,57 @@ +/* $Id: fix_feature_id.hpp 578197 2019-01-15 16:38:39Z ivanov $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * + * Authors: Igor Filippov + */ +#ifndef _FIX_FEATURE_ID_H_ +#define _FIX_FEATURE_ID_H_ + +#include +#include +#include +#include +#include +#include +#include + +BEGIN_NCBI_SCOPE + +class NCBI_CLEANUP_EXPORT CFixFeatureId +{ +public: + static objects::CObject_id::TId s_FindHighestFeatureId(const objects::CSeq_entry_Handle& entry); + static void s_ApplyToSeqInSet(objects::CSeq_entry_Handle tse, map > &changed_feats); + static void s_UpdateFeatureIds(const objects::CSeq_entry_Handle& entry, map > &changed_feats, int offset); + static void s_ReassignFeatureIds(const objects::CSeq_entry_Handle& entry, map > &changed_feats); +private: + static void s_MakeIDPairs(const objects::CSeq_entry_Handle& entry, map &id_pairs); +}; + + +END_NCBI_SCOPE + +#endif + // _FIX_FEATURE_ID_H_ diff --git a/c++/include/objtools/cleanup/newcleanup.hpp b/c++/include/objtools/cleanup/newcleanup.hpp deleted file mode 100755 index 857d739f..00000000 --- a/c++/include/objtools/cleanup/newcleanup.hpp +++ /dev/null @@ -1,325 +0,0 @@ -#ifndef NEWCLEANUP__HPP -#define NEWCLEANUP__HPP - -/* -* =========================================================================== -* -* PUBLIC DOMAIN NOTICE -* National Center for Biotechnology Information -* -* This software/database is a "United States Government Work" under the -* terms of the United States Copyright Act. It was written as part of -* the author's official duties as a United States Government employee and -* thus cannot be copyrighted. This software/database is freely available -* to the public for use. The National Library of Medicine and the U.S. -* Government have not placed any restriction on its use or reproduction. -* -* Although all reasonable efforts have been taken to ensure the accuracy -* and reliability of the software and data, the NLM and the U.S. -* Government do not and cannot warrant the performance or results that -* may be obtained by using this software or data. The NLM and the U.S. -* Government disclaim all warranties, express or implied, including -* warranties of performance, merchantability or fitness for any particular -* purpose. -* -* Please cite the author in any work or product based on this material. -* -* =========================================================================== -* -* Author: Robert Smith, Jonathan Kans -* -* File Description: -* Basic and Extended Cleanup of CSeq_entries. -* -* =========================================================================== -*/ - -#include -#include -#include - -#include - - - -BEGIN_NCBI_SCOPE -BEGIN_SCOPE(objects) - -class CSeq_entry; -class CSeq_submit; -class CBioseq; -class CBioseq_set; -class CSeq_annot; -class CSeq_feat; -class CSeqFeatData; -class CSeq_descr; -class CSeqdesc; -class CSeq_loc; -class CGene_ref; -class CProt_ref; -class CRNA_ref; -class CImp_feat; -class CGb_qual; -class CDbtag; -class CUser_field; -class CUser_object; -class CObject_id; -class CGB_block; -class CEMBL_block; -class CPubdesc; -class CPub_equiv; -class CPub; -class CCit_gen; -class CCit_sub; -class CCit_art; -class CCit_book; -class CCit_pat; -class CCit_let; -class CCit_proc; -class CCit_jour; -class CPubMedId; -class CAuth_list; -class CAuthor; -class CAffil; -class CPerson_id; -class CName_std; -class CBioSource; -class COrg_ref; -class COrgName; -class COrgMod; -class CSubSource; -class CMolInfo; -class CCdregion; -class CDate; -class CDate_std; -class CImprint; -class CSubmit_block; -class CSeq_align; -class CDense_diag; -class CDense_seg; -class CStd_seg; -class CMedline_entry; -class CPub_set; -class CTrna_ext; - -class CSeq_entry_Handle; -class CBioseq_Handle; -class CBioseq_set_Handle; -class CSeq_annot_Handle; -class CSeq_feat_Handle; - -class CObjectManager; -class CScope; - - -class CNewCleanup_imp -{ -public: - - static const int NCBI_CLEANUP_VERSION; - - // some cleanup functions will return a value telling you what to do - enum EAction { - eAction_Nothing = 1, - eAction_Erase - }; - - // Constructor - CNewCleanup_imp (CRef changes, Uint4 options = 0); - - // Destructor - virtual ~CNewCleanup_imp (); - - /// Main methods - - void BasicCleanupSeqEntry ( - CSeq_entry& se - ); - - void BasicCleanupSeqSubmit ( - CSeq_submit& ss - ); - - void BasicCleanupSeqAnnot ( - CSeq_annot& sa - ); - - void ExtendedCleanupSeqEntry ( - CSeq_entry& se - ); - - void ExtendedCleanupSeqSubmit ( - CSeq_submit& ss - ); - - void ExtendedCleanupSeqAnnot ( - CSeq_annot& sa - ); - - void SubmitblockBC(CSubmit_block& sb); - -private: - - // many more methods and variables ... - - // We do not include the usual "x_" prefix for private functions - // because we want to be able to distinguish between higher-level - // functions like those just below, and the lower-level - // functions like those farther below. - - void ChangeMade (CCleanupChange::EChanges e); - - void SetupBC (CSeq_entry& se); - - void SeqsetBC (CBioseq_set& bss); - - void SeqIdBC( CSeq_id &seq_id ); - - void GBblockBC (CGB_block& gbk); - void EMBLblockBC (CEMBL_block& emb); - - void BiosourceBC (CBioSource& bsc); - void OrgrefBC (COrg_ref& org); - void OrgnameBC (COrgName& onm); - void OrgmodBC (COrgMod& omd); - void SubsourceBC (CSubSource& sbs); - - void DbtagBC (CDbtag& dbt); - - void PubdescBC (CPubdesc& pub); - void PubEquivBC (CPub_equiv& pub_equiv); - void PubBC(CPub& pub, bool fix_initials); - void CitGenBC(CCit_gen& cg, bool fix_initials); - void CitSubBC(CCit_sub& cs, bool fix_initials); - void CitArtBC(CCit_art& ca, bool fix_initials); - void CitBookBC(CCit_book& cb, bool fix_initials); - void CitPatBC(CCit_pat& cp, bool fix_initials); - void CitLetBC(CCit_let& cl, bool fix_initials); - void CitProcBC(CCit_proc& cb, bool fix_initials); - void CitJourBC(CCit_jour &j, bool fix_initials); - void MedlineEntryBC(CMedline_entry& ml, bool fix_initials); - void AuthListBC( CAuth_list& al, bool fix_initials ); - void AffilBC( CAffil& af ); - void ImprintBC( CImprint& imp ); - void PubSetBC( CPub_set &pub_set ); - - void ImpFeatBC( CImp_feat& imf, CSeq_feat& sf ); - - void SiteFeatBC( CSeqFeatData::ESite &site, CSeq_feat& sf ); - - void SeqLocBC( CSeq_loc &loc ); - void ConvertSeqLocWholeToInt( CSeq_loc &loc ); - - void SeqfeatBC (CSeq_feat& sf); - - void GBQualBC (CGb_qual& gbq); - void Except_textBC (string& except_text); - - void GenerefBC (CGene_ref& gr); - void ProtrefBC (CProt_ref& pr); - void RnarefBC (CRNA_ref& rr); - - void GeneFeatBC (CGene_ref& gr, CSeq_feat& sf); - void ProtFeatfBC (CProt_ref& pr, CSeq_feat& sf); - void RnaFeatBC (CRNA_ref& rr, CSeq_feat& sf); - void CdregionFeatBC (CCdregion& cds, CSeq_feat& seqfeat); - - void DeltaExtBC( CDelta_ext & delta_ext, CSeq_inst &seq_inst ); - - void UserObjectBC( CUser_object &user_object ); - - // void XxxxxxBC (Cxxxxx& xxx); - - // Prohibit copy constructor & assignment operator - CNewCleanup_imp (const CNewCleanup_imp&); - CNewCleanup_imp& operator= (const CNewCleanup_imp&); - -private: - - enum EGBQualOpt { - eGBQualOpt_normal, - eGBQualOpt_CDSMode - }; - - // Gb_qual cleanup. - EAction GBQualSeqFeatBC(CGb_qual& gbq, CSeq_feat& seqfeat); - - void x_AddNcbiCleanupObject( CSeq_entry &seq_entry ); - - bool x_CleanupRptUnit(CGb_qual& gbq); - void x_ChangeTransposonToMobileElement(CGb_qual& gbq); - void x_ChangeInsertionSeqToMobileElement(CGb_qual& gbq); - void x_ExpandCombinedQuals(CSeq_feat::TQual& quals); - EAction x_GeneGBQualBC( CGene_ref& gene, const CGb_qual& gb_qual ); - EAction x_SeqFeatCDSGBQualBC(CSeq_feat& feat, CCdregion& cds, const CGb_qual& gb_qual); - EAction x_SeqFeatRnaGBQualBC(CSeq_feat& feat, CRNA_ref& rna, CGb_qual& gb_qual); - EAction x_ProtGBQualBC(CProt_ref& prot, const CGb_qual& gb_qual, EGBQualOpt opt ); - - // publication-related cleanup - void x_FlattenPubEquiv(CPub_equiv& pe); - - // Date-related - void x_DateStdBC( CDate_std& date ); - - void x_AddReplaceQual(CSeq_feat& feat, const string& str); - - void x_SeqIntervalBC( CSeq_interval & seq_interval ); - - void x_SplitDbtag( CDbtag &dbt, vector< CRef< CDbtag > > & out_new_dbtags ); - - void x_SeqFeatTRNABC( CSeq_feat& feat, CTrna_ext & tRNA ); - - // modernize PCR Primer - void x_ModernizePCRPrimers( CBioSource &biosrc ); - - void x_FixUnsetMolFromBiomol( CMolInfo& molinfo, CBioseq &bioseq ); - - string x_ExtractSatelliteFromComment( string &comment ); - - void x_CleanupECNumber( string &ec_num ); - - void x_CleanupAndRepairInference( string &inference ); - - void x_MendSatelliteQualifier( string &val ); - - // e.g. if ends with ",..", turn into "..." - void x_FixUpEllipsis( string &str ); - - void x_MoveCdregionXrefsToProt (CCdregion& cds, CSeq_feat& seqfeat); - bool x_InGpsGenomic( const CSeq_feat& seqfeat ); - - void x_AddNonCopiedQual( - vector< CRef< CGb_qual > > &out_quals, - const char *qual, - const char *val ); - - void x_GBQualToOrgRef( COrg_ref &org, CSeq_feat &seqfeat ); - void x_MoveSeqdescOrgToSourceOrg( COrg_ref &org, CSeqdesc &seqdesc ); - void x_MoveSeqfeatOrgToSourceOrg( COrg_ref &org, CSeq_feat &seqfeat ); - - void x_CleanupStringMarkChanged( std::string &str ); - void x_ConvertDoubleQuotesMarkChanged( std::string &str ); - - void x_SortUniqSeqFeat( CSeq_feat& seq_feat ); - void x_SortUniqOrgRef( COrg_ref& org ); - void x_SortUniqBiosource( CBioSource& biosrc ); - - -protected: - - CRef m_Changes; - Uint4 m_Options; - CRef m_Objmgr; - CRef m_Scope; - bool m_IsEmblOrDdbj; - bool m_StripSerial; - bool m_IsGpipe; - - friend class CAutogeneratedCleanup; -}; - - -END_SCOPE(objects) -END_NCBI_SCOPE - -#endif /* NEWCLEANUP__HPP */ diff --git a/c++/include/objtools/data_loaders/genbank/blob_id.hpp b/c++/include/objtools/data_loaders/genbank/blob_id.hpp index b73e7ab6..36f9d9e6 100644 --- a/c++/include/objtools/data_loaders/genbank/blob_id.hpp +++ b/c++/include/objtools/data_loaders/genbank/blob_id.hpp @@ -2,7 +2,7 @@ #define BLOB_ID__HPP_INCLUDED /* */ -/* $Id: blob_id.hpp 440703 2014-07-16 15:38:41Z vasilche $ +/* $Id: blob_id.hpp 578552 2019-01-22 15:38:07Z ivanov $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information @@ -181,6 +181,13 @@ enum EBlobContentsMask typedef int TBlobContentsMask; +enum EGBErrorAction { + eGBErrorAction_ignore, + eGBErrorAction_report, + eGBErrorAction_throw +}; + + END_SCOPE(objects) END_NCBI_SCOPE diff --git a/c++/include/objtools/data_loaders/genbank/gbloader.hpp b/c++/include/objtools/data_loaders/genbank/gbloader.hpp index c9618993..9ba89d63 100644 --- a/c++/include/objtools/data_loaders/genbank/gbloader.hpp +++ b/c++/include/objtools/data_loaders/genbank/gbloader.hpp @@ -1,7 +1,7 @@ #ifndef GBLOADER__HPP_INCLUDED #define GBLOADER__HPP_INCLUDED -/* $Id: gbloader.hpp 572532 2018-10-16 12:00:58Z ivanov $ +/* $Id: gbloader.hpp 578552 2019-01-22 15:38:07Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -454,6 +454,17 @@ public: return m_HasHUPIncluded; } + + EGBErrorAction GetPTISErrorAction(void) const + { + return m_PTISErrorAction; + } + + void SetPTISErrorAction(EGBErrorAction action) + { + m_PTISErrorAction = action; + } + virtual CObjectManager::TPriority GetDefaultPriority(void) const override; protected: @@ -493,6 +504,7 @@ private: bool m_AlwaysLoadNamedAcc; bool m_AddWGSMasterDescr; bool m_HasHUPIncluded; + EGBErrorAction m_PTISErrorAction; string m_WebCookie; // diff --git a/c++/include/objtools/data_loaders/genbank/gbloader_params.h b/c++/include/objtools/data_loaders/genbank/gbloader_params.h index a605bbeb..aa3ee96b 100644 --- a/c++/include/objtools/data_loaders/genbank/gbloader_params.h +++ b/c++/include/objtools/data_loaders/genbank/gbloader_params.h @@ -1,7 +1,7 @@ #ifndef GBLOADER_PARAMS__HPP_INCLUDED #define GBLOADER_PARAMS__HPP_INCLUDED -/* $Id: gbloader_params.h 475861 2015-08-12 15:33:37Z vasilche $ +/* $Id: gbloader_params.h 578552 2019-01-22 15:38:07Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -67,5 +67,10 @@ #define NCBI_GBLOADER_PARAM_ALWAYS_LOAD_NAMED_ACC "ALWAYS_LOAD_NAMED_ACC" /* Add WGS master descriptors to all WGS sequences */ #define NCBI_GBLOADER_PARAM_ADD_WGS_MASTER "ADD_WGS_MASTER" +/* How GBLoader should react on PTIS failure */ +#define NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION "PTIS_ERROR_ACTION" +#define NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION_IGNORE "ignore" +#define NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION_REPORT "report" +#define NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION_THROW "throw" #endif diff --git a/c++/include/objtools/data_loaders/genbank/impl/processors.hpp b/c++/include/objtools/data_loaders/genbank/impl/processors.hpp index 88783bdc..89806bbe 100644 --- a/c++/include/objtools/data_loaders/genbank/impl/processors.hpp +++ b/c++/include/objtools/data_loaders/genbank/impl/processors.hpp @@ -1,6 +1,6 @@ #ifndef PROCESSORS__HPP_INCLUDED #define PROCESSORS__HPP_INCLUDED -/* $Id: processors.hpp 487082 2015-12-15 00:25:55Z vasilche $ +/* $Id: processors.hpp 576534 2018-12-17 19:38:30Z ivanov $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information @@ -337,7 +337,8 @@ public: enum { eSat_ANNOT_CDD = 10, eSat_ANNOT = 26, - eSat_VDB_WGS = 1000 + eSat_VDB_WGS_MIN = 1000, + eSat_VDB_WGS_MAX = 1999 }; enum { eSubSat_SNP = 1<<0, diff --git a/c++/include/objtools/data_loaders/genbank/impl/request_result.hpp b/c++/include/objtools/data_loaders/genbank/impl/request_result.hpp index b37def60..3f551203 100644 --- a/c++/include/objtools/data_loaders/genbank/impl/request_result.hpp +++ b/c++/include/objtools/data_loaders/genbank/impl/request_result.hpp @@ -1,7 +1,7 @@ #ifndef GBLOADER_REQUEST_RESULT__HPP_INCLUDED #define GBLOADER_REQUEST_RESULT__HPP_INCLUDED -/* $Id: request_result.hpp 493171 2016-02-24 19:31:13Z vasilche $ +/* $Id: request_result.hpp 578552 2019-01-22 15:38:07Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1214,6 +1214,8 @@ public: virtual bool GetAddWGSMasterDescr(void) const; + virtual EGBErrorAction GetPTISErrorAction(void) const; + CWriter* GetIdWriter(void) const; CWriter* GetBlobWriter(void) const; diff --git a/c++/include/objtools/data_loaders/genbank/reader.hpp b/c++/include/objtools/data_loaders/genbank/reader.hpp index e67f001c..ef6c86f8 100644 --- a/c++/include/objtools/data_loaders/genbank/reader.hpp +++ b/c++/include/objtools/data_loaders/genbank/reader.hpp @@ -1,6 +1,6 @@ #ifndef READER__HPP_INCLUDED #define READER__HPP_INCLUDED -/* $Id: reader.hpp 568378 2018-08-03 14:54:41Z dondosha $ +/* $Id: reader.hpp 579016 2019-01-29 16:46:05Z ivanov $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information @@ -296,6 +296,9 @@ protected: virtual void x_DisconnectAtSlot(TConn conn, bool failed); // force connection at connection slot with key 'conn' virtual void x_ConnectAtSlot(TConn conn) = 0; + // report failed or stale connection + void x_ReportDisconnect(const char* reader, const char* server, + TConn conn, bool failed) const; private: friend class CReaderAllocatedConnection; diff --git a/c++/include/objtools/data_loaders/genbank/reader_interface.hpp b/c++/include/objtools/data_loaders/genbank/reader_interface.hpp index 270968cc..34ded228 100644 --- a/c++/include/objtools/data_loaders/genbank/reader_interface.hpp +++ b/c++/include/objtools/data_loaders/genbank/reader_interface.hpp @@ -1,6 +1,6 @@ #ifndef READER_INTERFACE__HPP_INCLUDED #define READER_INTERFACE__HPP_INCLUDED -/* $Id: reader_interface.hpp 534193 2017-04-25 14:00:21Z vasilche $ +/* $Id: reader_interface.hpp 579016 2019-01-29 16:46:05Z ivanov $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information @@ -38,7 +38,7 @@ class CReader; END_SCOPE(objects) -NCBI_DECLARE_INTERFACE_VERSION(objects::CReader, "xreader", 7, 0, 0); +NCBI_DECLARE_INTERFACE_VERSION(objects::CReader, "xreader", 8, 0, 0); template<> class CDllResolver_Getter diff --git a/c++/include/objtools/edit/cds_fix.hpp b/c++/include/objtools/edit/cds_fix.hpp index 13d4bc5d..62caabcc 100644 --- a/c++/include/objtools/edit/cds_fix.hpp +++ b/c++/include/objtools/edit/cds_fix.hpp @@ -1,4 +1,4 @@ -/* $Id: cds_fix.hpp 574198 2018-11-07 18:41:16Z ivanov $ +/* $Id: cds_fix.hpp 577635 2019-01-07 19:27:21Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -60,6 +60,7 @@ NCBI_XOBJEDIT_EXPORT CRef GetNewLocalProtId(const string &id_b NCBI_XOBJEDIT_EXPORT CRef GetNewProtId(objects::CBioseq_Handle bsh, int &offset, string& id_label, bool general_only); NCBI_XOBJEDIT_EXPORT vector > GetNewProtIdFromExistingProt(objects::CBioseq_Handle bsh, int &offset, string& id_label); NCBI_XOBJEDIT_EXPORT string GetIdHashOrValue(const string &base, int offset); +NCBI_XOBJEDIT_EXPORT bool IsGeneralIdProtPresent(objects::CSeq_entry_Handle tse); // these methods are being moved to ncbi::feature namespace // please modify your usage of them diff --git a/c++/include/objtools/edit/text_object_description.hpp b/c++/include/objtools/edit/text_object_description.hpp index 737c18fc..a9ce3285 100644 --- a/c++/include/objtools/edit/text_object_description.hpp +++ b/c++/include/objtools/edit/text_object_description.hpp @@ -1,4 +1,4 @@ -/* $Id: text_object_description.hpp 575170 2018-11-26 13:17:25Z ivanov $ +/* $Id: text_object_description.hpp 575302 2018-11-27 15:25:04Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -38,10 +38,12 @@ BEGIN_NCBI_SCOPE BEGIN_SCOPE(objects) BEGIN_SCOPE(edit) -NCBI_DLL_IMPORT string GetTextObjectDescription(const CSeq_feat& sf, CScope& scope); -NCBI_DLL_IMPORT string GetTextObjectDescription(const CSeqdesc& sd, CScope& scope); -NCBI_DLL_IMPORT string GetTextObjectDescription(const CBioseq& bs, CScope& scope); -NCBI_DLL_IMPORT string GetTextObjectDescription(const CBioseq_set& bs, CScope& scope); +NCBI_XOBJEDIT_EXPORT string GetTextObjectDescription(const CSeq_feat& sf, CScope& scope); +NCBI_XOBJEDIT_EXPORT string GetTextObjectDescription(const CSeqdesc& sd, CScope& scope); +NCBI_XOBJEDIT_EXPORT string GetTextObjectDescription(const CBioseq& bs, CScope& scope); +NCBI_XOBJEDIT_EXPORT string GetTextObjectDescription(const CBioseq_set& bs, CScope& scope); +// Giving the same name to all functions was a creative idea! +NCBI_XOBJEDIT_EXPORT void GetTextObjectDescription(const CSeq_feat& seq_feat, CScope& scope, string &type, string &context, string &location, string &locus_tag); END_SCOPE(edit) END_SCOPE(objects) diff --git a/c++/include/objtools/format/flat_file_config.hpp b/c++/include/objtools/format/flat_file_config.hpp index dcc317ee..45d36b57 100644 --- a/c++/include/objtools/format/flat_file_config.hpp +++ b/c++/include/objtools/format/flat_file_config.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT___FLAT_FILE_CONFIG__HPP #define OBJTOOLS_FORMAT___FLAT_FILE_CONFIG__HPP -/* $Id: flat_file_config.hpp 573607 2018-10-30 11:57:34Z ivanov $ +/* $Id: flat_file_config.hpp 578289 2019-01-16 16:33:46Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -196,7 +196,8 @@ public: fDisableAnnotRefs = 1 << 10, fUseSeqEntryIndexer = 1 << 11, fUseAutoDef = 1 << 12, - fIgnoreExistingTitle = 1 << 13 + fIgnoreExistingTitle = 1 << 13, + fGeneRNACDSFeatures = 1 << 14 }; enum EView { @@ -582,6 +583,7 @@ public: bool UseSeqEntryIndexer (void) const; bool UseAutoDef (void) const; bool IgnoreExistingTitle (void) const; + bool GeneRNACDSFeatures (void) const; // setters void SetCustom(const TCustom& custom) { m_Custom = custom; } @@ -595,6 +597,7 @@ public: CFlatFileConfig& SetUseSeqEntryIndexer (bool val = true); CFlatFileConfig& SetUseAutoDef (bool val = true); CFlatFileConfig& SetIgnoreExistingTitle (bool val = true); + CFlatFileConfig& SetGeneRNACDSFeatures (bool val = true); // adjust mode dependant flags for RefSeq void SetRefSeqConventions(void); @@ -781,6 +784,7 @@ CUSTOM_ARG_IMP(DisableAnnotRefs) CUSTOM_ARG_IMP(UseSeqEntryIndexer) CUSTOM_ARG_IMP(UseAutoDef) CUSTOM_ARG_IMP(IgnoreExistingTitle) +CUSTOM_ARG_IMP(GeneRNACDSFeatures) #undef FLAG_ARG_IMP #undef FLAG_ARG_GET diff --git a/c++/include/objtools/format/items/accession_item.hpp b/c++/include/objtools/format/items/accession_item.hpp index fbf856a9..c42d592e 100644 --- a/c++/include/objtools/format/items/accession_item.hpp +++ b/c++/include/objtools/format/items/accession_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___ACCESSION_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___ACCESSION_ITEM__HPP -/* $Id: accession_item.hpp 554019 2017-12-27 15:18:41Z dondosha $ +/* $Id: accession_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -71,6 +71,8 @@ public: const CSeq_loc& GetRegion(void) const; bool IsNuc(void) const; bool IsProt(void) const; + + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/alignment_item.hpp b/c++/include/objtools/format/items/alignment_item.hpp index 9904ec77..103fec6e 100644 --- a/c++/include/objtools/format/items/alignment_item.hpp +++ b/c++/include/objtools/format/items/alignment_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___ALIGNMENT_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___ALIGNMENT_ITEM__HPP -/* $Id: alignment_item.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: alignment_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -57,6 +57,7 @@ public: const CSeq_align& GetAlign(void) const { return *m_Align; } + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); // data diff --git a/c++/include/objtools/format/items/basecount_item.hpp b/c++/include/objtools/format/items/basecount_item.hpp index 56cc9cc6..d5fc2f41 100644 --- a/c++/include/objtools/format/items/basecount_item.hpp +++ b/c++/include/objtools/format/items/basecount_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___BASECOUNT_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___BASECOUNT_ITEM__HPP -/* $Id: basecount_item.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: basecount_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -67,6 +67,7 @@ public: SIZE_TYPE GetT(void) const { return m_T; } SIZE_TYPE GetOther(void) const { return m_Other; } + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/comment_item.hpp b/c++/include/objtools/format/items/comment_item.hpp index 1775c1bd..9f44b469 100644 --- a/c++/include/objtools/format/items/comment_item.hpp +++ b/c++/include/objtools/format/items/comment_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___COMMENT_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___COMMENT_ITEM__HPP -/* $Id: comment_item.hpp 557325 2018-02-14 00:47:52Z kans $ +/* $Id: comment_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -149,6 +149,7 @@ public: static void ResetFirst(void) { sm_FirstComment = true; } + virtual EItem GetItemType(void) const; protected: enum EPeriod { diff --git a/c++/include/objtools/format/items/contig_item.hpp b/c++/include/objtools/format/items/contig_item.hpp index 6c5038c6..23e4ea79 100644 --- a/c++/include/objtools/format/items/contig_item.hpp +++ b/c++/include/objtools/format/items/contig_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___CONTIG_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___CONTIG_ITEM__HPP -/* $Id: contig_item.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: contig_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -60,6 +60,7 @@ public: const CSeq_loc& GetLoc(void) const { return *m_Loc; } + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); // data diff --git a/c++/include/objtools/format/items/ctrl_items.hpp b/c++/include/objtools/format/items/ctrl_items.hpp index 63ae1194..d9d266a6 100644 --- a/c++/include/objtools/format/items/ctrl_items.hpp +++ b/c++/include/objtools/format/items/ctrl_items.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___CTRL_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___CTRL_ITEM__HPP -/* $Id: ctrl_items.hpp 162859 2009-06-10 16:36:10Z ludwigf $ +/* $Id: ctrl_items.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -69,6 +69,8 @@ public: void Format(IFormatter& f, IFlatTextOStream& text_os) const { f.Start(text_os); } + + virtual EItem GetItemType(void) const; private: void x_SetDate(CSeq_entry_Handle); string m_Date; @@ -91,6 +93,8 @@ public: void Format(IFormatter& f, IFlatTextOStream& text_os) const { f.StartSection(*this, text_os); } + + virtual EItem GetItemType(void) const; }; @@ -107,6 +111,8 @@ public: void Format(IFormatter& f, IFlatTextOStream& text_os) const { f.EndSection(*this, text_os); } + + virtual EItem GetItemType(void) const; }; @@ -123,6 +129,8 @@ public: void Format(IFormatter& f, IFlatTextOStream& text_os) const { f.End(text_os); } + + virtual EItem GetItemType(void) const; }; diff --git a/c++/include/objtools/format/items/date_item.hpp b/c++/include/objtools/format/items/date_item.hpp index 8dca5b97..e56fd87a 100644 --- a/c++/include/objtools/format/items/date_item.hpp +++ b/c++/include/objtools/format/items/date_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___DATE_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___DATE_ITEM__HPP -/* $Id: date_item.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: date_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -61,6 +61,7 @@ public: const CDate* GetCreateDate(void) const { return m_CreateDate; } const CDate* GetUpdateDate(void) const { return m_UpdateDate; } + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/dbsource_item.hpp b/c++/include/objtools/format/items/dbsource_item.hpp index e9b6bd05..f6277833 100644 --- a/c++/include/objtools/format/items/dbsource_item.hpp +++ b/c++/include/objtools/format/items/dbsource_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___DBSOURCE_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___DBSOURCE_ITEM__HPP -/* $Id: dbsource_item.hpp 374454 2012-09-11 13:22:19Z kornbluh $ +/* $Id: dbsource_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -58,6 +58,7 @@ public: const TDBSource& GetDBSource(void) const { return m_DBSource; } + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); void x_AddPIRBlock(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/defline_item.hpp b/c++/include/objtools/format/items/defline_item.hpp index 5bc51da4..81fcf9f9 100644 --- a/c++/include/objtools/format/items/defline_item.hpp +++ b/c++/include/objtools/format/items/defline_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___DEFLINE_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___DEFLINE_ITEM__HPP -/* $Id: defline_item.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: defline_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -58,6 +58,7 @@ public: const string& GetDefline(void) const; + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/feature_item.hpp b/c++/include/objtools/format/items/feature_item.hpp index 4e4907d8..9c751b38 100644 --- a/c++/include/objtools/format/items/feature_item.hpp +++ b/c++/include/objtools/format/items/feature_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___FLAT_FEATURE__HPP #define OBJTOOLS_FORMAT_ITEMS___FLAT_FEATURE__HPP -/* $Id: feature_item.hpp 561799 2018-04-11 22:46:12Z kans $ +/* $Id: feature_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -66,6 +66,7 @@ public: const CSeq_id& GetId(void) const { return *m_Id; } // for FTable format + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); @@ -166,6 +167,8 @@ public: // fetaure key (name) string GetKey(void) const; + virtual EItem GetItemType(void) const; + // mapping bool IsMapped (void) const { return m_Mapped != eMapped_not_mapped; } bool IsMappedFromGenomic(void) const { return m_Mapped == eMapped_from_genomic; } @@ -403,6 +406,8 @@ public: void SetLoc(const CSeq_loc& loc); + virtual EItem GetItemType(void) const; + private: typedef CQualContainer TQuals; typedef TQuals::const_iterator TQCI; diff --git a/c++/include/objtools/format/items/gap_item.hpp b/c++/include/objtools/format/items/gap_item.hpp index e80e67a3..d1e20084 100644 --- a/c++/include/objtools/format/items/gap_item.hpp +++ b/c++/include/objtools/format/items/gap_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___GAP_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___GAP_ITEM__HPP -/* $Id: gap_item.hpp 340218 2011-10-06 13:00:57Z kornbluh $ +/* $Id: gap_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -79,6 +79,7 @@ public: bool HasEstimatedLength(void) const; TSeqPos GetEstimatedLength(void) const; + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx) {} diff --git a/c++/include/objtools/format/items/genome_project_item.hpp b/c++/include/objtools/format/items/genome_project_item.hpp index 167c5a07..ce49f011 100644 --- a/c++/include/objtools/format/items/genome_project_item.hpp +++ b/c++/include/objtools/format/items/genome_project_item.hpp @@ -64,6 +64,7 @@ public: const vector & GetProjectNumbers(void) const; const TDBLinkLineVec & GetDBLinkLines(void) const; + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/html_anchor_item.hpp b/c++/include/objtools/format/items/html_anchor_item.hpp index 9005f91e..58e64177 100644 --- a/c++/include/objtools/format/items/html_anchor_item.hpp +++ b/c++/include/objtools/format/items/html_anchor_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___HTML_ANCHOR_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___HTML_ANCHOR_ITEM__HPP -/* $Id: html_anchor_item.hpp 399305 2013-05-13 19:13:43Z grichenk $ +/* $Id: html_anchor_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -47,6 +47,7 @@ public: const string &GetLabelCore(void) const { return m_LabelCore; } TGi GetGI(void) const { return m_GI; } + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/item.hpp b/c++/include/objtools/format/items/item.hpp index a9908ad7..7ba7e629 100644 --- a/c++/include/objtools/format/items/item.hpp +++ b/c++/include/objtools/format/items/item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___ITEM__HPP -/* $Id: item.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -50,6 +50,43 @@ class IFormatter; class NCBI_FORMAT_EXPORT IFlatItem : public CObject { public: + + enum EItem { + eItem_None = 0, + eItem_StartSection, + eItem_StartItem, + eItem_HtmlAnchor, + eItem_Head, + eItem_Locus, + eItem_Date, + eItem_Defline, + eItem_Accession, + eItem_Version, + eItem_Project, + eItem_GenomeProject, + eItem_DbSource, + eItem_Keywords, + eItem_Segment, + eItem_Source, + eItem_Reference, + eItem_Comment, + eItem_Primary, + eItem_FeatHeader, + eItem_SourceFeat, + eItem_Feature, + eItem_Gap, + eItem_BaseCount, + eItem_Origin, + eItem_Sequence, + eItem_Contig, + eItem_Wgs, + eItem_Tsa, + eItem_Alignment, + eItem_EndItem, + eItem_EndSection, + eItem_Slash + }; + virtual void Format(IFormatter& formatter, IFlatTextOStream& text_os) const = 0; @@ -59,6 +96,9 @@ public: // returns true, if the formatter should skip this object virtual bool Skip(void) const = 0; + // returns paragraph type + virtual EItem GetItemType(void) const = 0; + virtual ~IFlatItem(void) {} }; diff --git a/c++/include/objtools/format/items/item_base.hpp b/c++/include/objtools/format/items/item_base.hpp index f816bafc..6b27429b 100644 --- a/c++/include/objtools/format/items/item_base.hpp +++ b/c++/include/objtools/format/items/item_base.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___ITEM_BASE_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___ITEM_BASE_ITEM__HPP -/* $Id: item_base.hpp 532040 2017-03-30 22:02:42Z kans $ +/* $Id: item_base.hpp 578292 2019-01-16 16:35:00Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/include/objtools/format/items/keywords_item.hpp b/c++/include/objtools/format/items/keywords_item.hpp index 86fee121..c0eca474 100644 --- a/c++/include/objtools/format/items/keywords_item.hpp +++ b/c++/include/objtools/format/items/keywords_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___KEYWORDS_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___KEYWORDS_ITEM__HPP -/* $Id: keywords_item.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: keywords_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -64,6 +64,7 @@ public: const TKeywords& GetKeywords(void) const; + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/locus_item.hpp b/c++/include/objtools/format/items/locus_item.hpp index 591b4663..0b1c963e 100644 --- a/c++/include/objtools/format/items/locus_item.hpp +++ b/c++/include/objtools/format/items/locus_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___LOCUS_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___LOCUS_ITEM__HPP -/* $Id: locus_item.hpp 554830 2018-01-09 18:47:38Z kans $ +/* $Id: locus_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -73,6 +73,8 @@ public: const string& GetDivision (void) const; const string& GetDate (void) const; + virtual EItem GetItemType(void) const; + static string GetDivision(const CBioseq_Handle& bsh, const CBioseqContext* ctx = 0); private: diff --git a/c++/include/objtools/format/items/origin_item.hpp b/c++/include/objtools/format/items/origin_item.hpp index 2662b18a..c575abc9 100644 --- a/c++/include/objtools/format/items/origin_item.hpp +++ b/c++/include/objtools/format/items/origin_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___ORIGIN_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___ORIGIN_ITEM__HPP -/* $Id: origin_item.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: origin_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -57,6 +57,7 @@ public: const string& GetOrigin(void) const { return m_Origin; } + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/primary_item.hpp b/c++/include/objtools/format/items/primary_item.hpp index 380b0a2e..4bf01352 100644 --- a/c++/include/objtools/format/items/primary_item.hpp +++ b/c++/include/objtools/format/items/primary_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___PRIMARY_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___PRIMARY_ITEM__HPP -/* $Id: primary_item.hpp 328426 2011-08-03 15:11:31Z kornbluh $ +/* $Id: primary_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -64,6 +64,7 @@ public: const string& GetString(void) const { return m_Str; } + virtual EItem GetItemType(void) const; private: // types typedef CConstRef TAln; diff --git a/c++/include/objtools/format/items/reference_item.hpp b/c++/include/objtools/format/items/reference_item.hpp index 59ff32b2..cbfe47a4 100644 --- a/c++/include/objtools/format/items/reference_item.hpp +++ b/c++/include/objtools/format/items/reference_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___REFERENCE_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___REFERENCE_ITEM__HPP -/* $Id: reference_item.hpp 557236 2018-02-12 22:36:27Z kans $ +/* $Id: reference_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -169,7 +169,7 @@ public: static void ChangeMedlineAuthorsToISO( CRef pub ); - + virtual EItem GetItemType(void) const; private: CReferenceItem(const CReferenceItem& value); diff --git a/c++/include/objtools/format/items/segment_item.hpp b/c++/include/objtools/format/items/segment_item.hpp index a0ff3b60..25cf1d65 100644 --- a/c++/include/objtools/format/items/segment_item.hpp +++ b/c++/include/objtools/format/items/segment_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___SEGMENT_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___SEGMENT_ITEM__HPP -/* $Id: segment_item.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: segment_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -58,6 +58,7 @@ public: size_t GetNum (void) const { return m_Num; } size_t GetCount(void) const { return m_Count; } + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/sequence_item.hpp b/c++/include/objtools/format/items/sequence_item.hpp index 2e038c97..6fff249d 100644 --- a/c++/include/objtools/format/items/sequence_item.hpp +++ b/c++/include/objtools/format/items/sequence_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___SEQUENCE_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___SEQUENCE_ITEM__HPP -/* $Id: sequence_item.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: sequence_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -64,6 +64,7 @@ public: TSeqPos GetTo(void) const; bool IsFirst(void) const; + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/source_item.hpp b/c++/include/objtools/format/items/source_item.hpp index 7dca4ced..3b0ae5fd 100644 --- a/c++/include/objtools/format/items/source_item.hpp +++ b/c++/include/objtools/format/items/source_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___SOURCE_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___SOURCE_ITEM__HPP -/* $Id: source_item.hpp 561033 2018-03-30 19:36:02Z kans $ +/* $Id: source_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -75,6 +75,7 @@ public: TTaxid GetTaxid (void) const; bool IsUsingAnamorph (void) const; + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); void x_GatherInfoIdx(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/tsa_item.hpp b/c++/include/objtools/format/items/tsa_item.hpp index cbd7be51..80937165 100644 --- a/c++/include/objtools/format/items/tsa_item.hpp +++ b/c++/include/objtools/format/items/tsa_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___TSA_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___TSA_ITEM__HPP -/* $Id: tsa_item.hpp 495539 2016-03-17 23:42:34Z kans $ +/* $Id: tsa_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -70,6 +70,7 @@ public: const string& GetFirstID(void) const { return m_First; } const string& GetLastID (void) const { return m_Last; } + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/version_item.hpp b/c++/include/objtools/format/items/version_item.hpp index 3947ec62..73373d1e 100644 --- a/c++/include/objtools/format/items/version_item.hpp +++ b/c++/include/objtools/format/items/version_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___VERSION_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___VERSION_ITEM__HPP -/* $Id: version_item.hpp 399305 2013-05-13 19:13:43Z grichenk $ +/* $Id: version_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -61,6 +61,7 @@ public: TGi GetGi(void) const; const string& GetAccession(void) const; + virtual EItem GetItemType(void) const; private: void x_GatherInfo(CBioseqContext& ctx); diff --git a/c++/include/objtools/format/items/wgs_item.hpp b/c++/include/objtools/format/items/wgs_item.hpp index 844b4b2e..7d329d03 100644 --- a/c++/include/objtools/format/items/wgs_item.hpp +++ b/c++/include/objtools/format/items/wgs_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___WGS_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___WGS_ITEM__HPP -/* $Id: wgs_item.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: wgs_item.hpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -69,6 +69,7 @@ public: const string& GetFirstID(void) const { return m_First; } const string& GetLastID (void) const { return m_Last; } + virtual EItem GetItemType(void) const; private: diff --git a/c++/include/objtools/readers/fasta_exception.hpp b/c++/include/objtools/readers/fasta_exception.hpp index 61018221..6a42652d 100644 --- a/c++/include/objtools/readers/fasta_exception.hpp +++ b/c++/include/objtools/readers/fasta_exception.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS___FASTA_EXCEPTION__HPP #define OBJTOOLS_READERS___FASTA_EXCEPTION__HPP -/* $Id: fasta_exception.hpp 437343 2014-06-05 13:58:46Z ucko $ +/* $Id: fasta_exception.hpp 578929 2019-01-28 18:18:01Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -45,7 +45,7 @@ public: eBadResidues }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { switch (GetErrCode()) { case eBadResidues: return "eBadResidues"; @@ -89,7 +89,7 @@ public: TBadIndexMap m_BadIndexMap; }; - virtual void ReportExtra(ostream& out) const; + virtual void ReportExtra(ostream& out) const override; CBadResiduesException(const CDiagCompileInfo& info, const CException* prev_exception, diff --git a/c++/include/objtools/readers/gff2_data.hpp b/c++/include/objtools/readers/gff2_data.hpp index 703abc69..424f29b8 100644 --- a/c++/include/objtools/readers/gff2_data.hpp +++ b/c++/include/objtools/readers/gff2_data.hpp @@ -1,4 +1,4 @@ -/* $Id: gff2_data.hpp 539953 2017-06-29 13:16:45Z ludwigf $ +/* $Id: gff2_data.hpp 575510 2018-11-29 19:38:03Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -50,6 +50,29 @@ public: public: CGff2Record(); + CGff2Record( + const CGff2Record& rhs): + m_strId(rhs.m_strId), + m_uSeqStart(rhs.m_uSeqStart), + m_uSeqStop(rhs.m_uSeqStop), + m_strSource(rhs.m_strSource), + m_strType(rhs.m_strType), + m_pdScore(nullptr), + m_peStrand(nullptr), + m_pePhase(nullptr) + { + if (rhs.m_pdScore) { + m_pdScore = new double(rhs.Score()); + } + if (rhs.m_peStrand) { + m_peStrand = new ENa_strand(rhs.Strand()); + } + if (rhs.m_pePhase) { + m_pePhase = new TFrame(rhs.Phase()); + } + m_Attributes.insert(rhs.m_Attributes.begin(), rhs.m_Attributes.end()); + }; + virtual ~CGff2Record(); static unsigned int NextId(); @@ -88,9 +111,6 @@ public: TFrame Phase() const { return IsSetPhase() ? *m_pePhase : CCdregion::eFrame_not_set; }; - const string& AttributesLiteral() const { - return m_strAttributes; - }; bool IsSetScore() const { return m_pdScore != 0; @@ -137,36 +157,39 @@ public: CRef, SeqIdResolver = nullptr ) const; - static void TokenizeGFF(vector& columns, const CTempStringEx& line); + static void TokenizeGFF( + vector& columns, + const CTempStringEx& line); + protected: - virtual bool x_AssignAttributesFromGff( + virtual bool xAssignAttributesFromGff( const string&, const string& ); - bool x_SplitGffAttributes( + bool xSplitGffAttributes( const string&, vector< string >& ) const; - virtual bool x_MigrateId( + virtual bool xMigrateId( CRef ) const; - virtual bool x_MigrateStartStopStrand( + virtual bool xMigrateStartStopStrand( CRef ) const; - virtual bool x_MigrateType( + virtual bool xMigrateType( CRef ) const; - virtual bool x_MigrateScore( + virtual bool xMigrateScore( CRef ) const; - virtual bool x_MigratePhase( + virtual bool xMigratePhase( CRef ) const; - virtual bool x_MigrateAttributes( + virtual bool xMigrateAttributes( int, CRef ) const; - virtual bool x_InitFeatureLocation( + virtual bool xInitFeatureLocation( int, CRef, SeqIdResolver = nullptr ) const; @@ -180,12 +203,12 @@ protected: CRef, SeqIdResolver = nullptr ) const; - virtual bool x_MigrateAttributesSubSource( + virtual bool xMigrateAttributesSubSource( int, CRef, TAttributes& ) const; - virtual bool x_MigrateAttributesOrgName( + virtual bool xMigrateAttributesOrgName( int, CRef, TAttributes& ) const; diff --git a/c++/include/objtools/readers/gff2_reader.hpp b/c++/include/objtools/readers/gff2_reader.hpp index 123a18f8..f44d9ed0 100644 --- a/c++/include/objtools/readers/gff2_reader.hpp +++ b/c++/include/objtools/readers/gff2_reader.hpp @@ -1,4 +1,4 @@ - /* $Id: gff2_reader.hpp 560230 2018-03-20 17:29:23Z foleyjp $ + /* $Id: gff2_reader.hpp 575511 2018-11-29 19:38:28Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -127,14 +127,6 @@ public: // // new stuff: // - bool x_ParseBrowserLineGff( - const string&, - CRef< CAnnotdesc >& ); - - virtual bool x_ParseTrackLineGff( - const string&, - CRef< CAnnotdesc >& ); - virtual bool xParseStructuredComment( const string&); @@ -159,11 +151,6 @@ public: CRef&, const string& = ""); - virtual bool x_ParseFeatureGff( - const string&, - TAnnots&, - ILineErrorListener*); - virtual bool x_ParseAlignmentGff( const string& strLine, list& id_list, @@ -208,7 +195,7 @@ public: CRef< CSeq_annot >, ILineErrorListener* =0); - virtual bool x_UpdateAnnotFeature( + virtual bool xUpdateAnnotFeature( const CGff2Record&, CRef< CSeq_annot >, ILineErrorListener* =0); @@ -226,10 +213,6 @@ public: const CGff2Record&, CRef< CSeq_feat > ); - bool x_FeatureTrimQualifiers( - const CGff2Record&, - CRef< CSeq_feat > ); - bool xFeatureSetQualifier( const string&, const string&, diff --git a/c++/include/objtools/readers/gff3_reader.hpp b/c++/include/objtools/readers/gff3_reader.hpp index 1a2d5f56..cb73eeee 100644 --- a/c++/include/objtools/readers/gff3_reader.hpp +++ b/c++/include/objtools/readers/gff3_reader.hpp @@ -1,4 +1,4 @@ - /* $Id: gff3_reader.hpp 547755 2017-10-03 18:03:15Z ludwigf $ + /* $Id: gff3_reader.hpp 575510 2018-11-29 19:38:03Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -40,7 +40,7 @@ #include #include -//#include +#include BEGIN_NCBI_SCOPE @@ -96,14 +96,14 @@ public: protected: virtual CGff2Record* x_CreateRecord() { return new CGff3ReadRecord(); }; - virtual bool x_UpdateAnnotFeature( + virtual bool xInitializeFeature( const CGff2Record&, - CRef< CSeq_annot >, - ILineErrorListener*); + CRef ); - virtual bool x_UpdateFeatureCds( + virtual bool xUpdateAnnotFeature( const CGff2Record&, - CRef); + CRef< CSeq_annot >, + ILineErrorListener*); virtual bool xAddFeatureToAnnot( CRef< CSeq_feat >, @@ -139,6 +139,10 @@ protected: CRef, ILineErrorListener*); + virtual bool xFindFeatureUnderConstruction( + const CGff2Record&, + CRef&); + virtual bool xVerifyCdsParents( const CGff2Record&); @@ -162,9 +166,25 @@ protected: virtual bool xIsIgnoredFeatureType( const string&); + virtual void xAddPendingExon( + const string&, + const CGff2Record&); + virtual void xGetPendingExons( + const string&, + list&); + virtual void xPostProcessAnnot( + CRef&, + ILineErrorListener*); + + // Data: map mCdsParentMap; map > mMrnaLocs; + map mIdToSeqIdMap; + + using PENDING_EXONS = map >; + PENDING_EXONS mPendingExons; + static unsigned int msGenericIdCounter; }; diff --git a/c++/include/objtools/readers/gtf_reader.hpp b/c++/include/objtools/readers/gtf_reader.hpp index 4a6230bb..7cf07a3b 100644 --- a/c++/include/objtools/readers/gtf_reader.hpp +++ b/c++/include/objtools/readers/gtf_reader.hpp @@ -1,4 +1,4 @@ - /* $Id: gtf_reader.hpp 547755 2017-10-03 18:03:15Z ludwigf $ + /* $Id: gtf_reader.hpp 575512 2018-11-29 19:38:56Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -40,6 +40,83 @@ BEGIN_NCBI_SCOPE BEGIN_SCOPE(objects) // namespace ncbi::objects:: +// ============================================================================ +class CGtfAttributes +// ============================================================================ +{ +public: + using MultiValue = vector; + using MultiAttributes = map; + + const MultiAttributes& + Get() const + { + return mAttributes; + }; + + string + ValueOf( + const string& key) const + { + MultiValue values; + GetValues(key, values); + if (values.size() == 1) { + return values.front(); + } + return ""; + } + + bool + HasValue( + const string& key, + const string& value = "") const + { + auto it = mAttributes.find(key); + if (it == mAttributes.end()) { + return false; + } + const auto& values = it->second; + if (values.empty()) { + return false; + } + if (value.empty()) { + return true; + } + return (find(values.begin(), values.end(), value) != values.end()); + }; + + void + GetValues( + const string& key, + MultiValue& values) const + { + const MultiValue empty; + values = empty; + auto it = mAttributes.find(key); + if (it != mAttributes.end()) { + values = it->second; + } + }; + + void + AddValue( + const string& key, + const string& value) + { + auto kit = mAttributes.find(key); + if (kit == mAttributes.end()) { + kit = mAttributes.insert(make_pair(key, MultiValue())).first; + } + auto vit = find(kit->second.begin(), kit->second.end(), value); + if (vit == kit->second.end()) { + kit->second.push_back(value); + } + }; + +protected: + MultiAttributes mAttributes; +}; + // ============================================================================ class CGtfReadRecord // ============================================================================ @@ -49,10 +126,43 @@ public: CGtfReadRecord(): CGff2Record() {}; ~CGtfReadRecord() {}; + const CGtfAttributes& + GtfAttributes() const + { + return mAttributes; + }; + + string + GeneKey() const + { + string geneId = mAttributes.ValueOf("gene_id"); + if (geneId.empty()) { + cerr << "Unexpected: GTF feature without a gene_id." << endl; + } + return geneId; + }; + + string + FeatureKey() const + { + static unsigned int tidCounter(1); + if (Type() == "gene") { + return GeneKey(); + } + + string transcriptId = mAttributes.ValueOf("transcript_id"); + if (transcriptId.empty()) { + transcriptId = "t" + NStr::IntToString(tidCounter++); + } + return GeneKey() + "_" + transcriptId; + } + protected: - bool x_AssignAttributesFromGff( + bool xAssignAttributesFromGff( const string&, const string& ); + + CGtfAttributes mAttributes; }; // ---------------------------------------------------------------------------- @@ -76,116 +186,132 @@ public: protected: virtual CGff2Record* x_CreateRecord() { return new CGtfReadRecord(); }; - virtual bool x_UpdateAnnotFeature( + virtual bool xUpdateAnnotFeature( const CGff2Record&, CRef< CSeq_annot >, ILineErrorListener* =0); virtual bool x_UpdateAnnotCds( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_annot > ); virtual bool x_UpdateAnnotTranscript( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_annot > ); bool x_CreateFeatureId( - const CGff2Record&, + const CGtfReadRecord&, const string&, CRef< CSeq_feat > ); bool x_CreateFeatureLocation( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat > ); bool x_CreateGeneXrefs( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat > ); bool x_CreateMrnaXrefs( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat > ); bool x_CreateCdsXrefs( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat > ); bool x_MergeFeatureLocationSingleInterval( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat > ); bool x_MergeFeatureLocationMultiInterval( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat > ); bool x_CreateParentGene( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_annot > ); bool x_MergeParentGene( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat > ); bool xFeatureSetQualifiersGene( - const CGff2Record& record, + const CGtfReadRecord& record, CRef); bool xFeatureSetQualifiersRna( - const CGff2Record& record, + const CGtfReadRecord& record, CRef); bool xFeatureSetQualifiersCds( - const CGff2Record& record, + const CGtfReadRecord& record, CRef); bool x_CreateParentCds( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_annot > ); bool x_CreateParentMrna( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_annot > ); bool x_MergeParentCds( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat > ); bool x_FeatureSetDataGene( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat > ); virtual bool x_FeatureSetDataRna( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat >, CSeqFeatData::ESubtype ); bool x_FeatureSetDataMRNA( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat > ); bool x_FeatureSetDataCDS( - const CGff2Record&, + const CGtfReadRecord&, + CRef< CSeq_feat > ); + + bool x_FeatureTrimQualifiers( + const CGtfReadRecord&, CRef< CSeq_feat > ); protected: bool x_FindParentGene( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat >& ); bool x_FindParentCds( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat >& ); bool x_FindParentMrna( - const CGff2Record&, + const CGtfReadRecord&, CRef< CSeq_feat >& ); - virtual bool x_ProcessQualifierSpecialCase( - CGff2Record::TAttrCit, + virtual bool xProcessQualifierSpecialCase( + const string&, + const CGtfAttributes::MultiValue&, CRef< CSeq_feat > ); - + + virtual void + xFeatureAddQualifiers( + const string& key, + const CGtfAttributes::MultiValue& values, + CRef pFeature) + { + for (auto value: values) { + pFeature->AddQualifier(key, value); + } + }; + bool x_CdsIsPartial( - const CGff2Record& ); + const CGtfReadRecord& ); typedef map< string, CRef< CSeq_feat > > TIdToFeature; TIdToFeature m_GeneMap; diff --git a/c++/include/objtools/readers/gvf_reader.hpp b/c++/include/objtools/readers/gvf_reader.hpp index 271e9a3c..f6467b54 100644 --- a/c++/include/objtools/readers/gvf_reader.hpp +++ b/c++/include/objtools/readers/gvf_reader.hpp @@ -1,4 +1,4 @@ - /* $Id: gvf_reader.hpp 513672 2016-09-14 14:10:57Z ludwigf $ + /* $Id: gvf_reader.hpp 575504 2018-11-29 19:35:15Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -70,7 +70,7 @@ public: bool SanityCheck() const; protected: - virtual bool x_AssignAttributesFromGff( + virtual bool xAssignAttributesFromGff( const string&, const string& ); void xTraceError( diff --git a/c++/include/objtools/readers/idmapper.hpp b/c++/include/objtools/readers/idmapper.hpp index 43990fd6..b7ee8346 100644 --- a/c++/include/objtools/readers/idmapper.hpp +++ b/c++/include/objtools/readers/idmapper.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_IDMAPPER___IDMAPPER_IMPL__HPP #define OBJTOOLS_IDMAPPER___IDMAPPER_IMPL__HPP -/* $Id: idmapper.hpp 472138 2015-07-07 16:07:55Z grichenk $ +/* $Id: idmapper.hpp 578929 2019-01-28 18:18:01Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -430,7 +430,7 @@ public: eBadSeqId, eOther }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CIdMapperException, CException); }; diff --git a/c++/include/objtools/readers/reader_exception.hpp b/c++/include/objtools/readers/reader_exception.hpp index 83f310b8..c91b15a7 100644 --- a/c++/include/objtools/readers/reader_exception.hpp +++ b/c++/include/objtools/readers/reader_exception.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS___READER_EXCEPTION__HPP #define OBJTOOLS_READERS___READER_EXCEPTION__HPP -/* $Id: reader_exception.hpp 543406 2017-08-09 18:06:39Z foleyjp $ +/* $Id: reader_exception.hpp 578929 2019-01-28 18:18:01Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -61,7 +61,7 @@ public: eWrongGap, eInvalidID }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { switch (GetErrCode()) { case eFormat: return "eFormat"; diff --git a/c++/include/serial/exception.hpp b/c++/include/serial/exception.hpp index 53f98d7d..56e4bb18 100644 --- a/c++/include/serial/exception.hpp +++ b/c++/include/serial/exception.hpp @@ -1,6 +1,6 @@ #ifndef EXCEPTION__HPP #define EXCEPTION__HPP -/* $Id: exception.hpp 547688 2017-10-03 13:21:50Z gouriano $ +/* $Id: exception.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -63,16 +63,16 @@ public: eMissingValue, ///< Mandatory value was missing in the input eNullValue ///< Data value is null }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CSerialException,CException); public: // Combine stream frames info into single message void AddFrameInfo(string frame_info); - virtual void ReportExtra(ostream& out) const; + virtual void ReportExtra(ostream& out) const override; protected: - virtual void x_Assign(const CException& src); + virtual void x_Assign(const CException& src) override; private: string m_FrameStack; @@ -88,7 +88,7 @@ public: eWrite, eUnknownMember }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CUnassignedMember,CSerialException); }; @@ -104,7 +104,7 @@ public: enum EErrCode { eFail }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; static const char* GetName(size_t index, const char* const names[], size_t namesCount); @@ -132,13 +132,13 @@ public: CInvalidChoiceSelection(const CInvalidChoiceSelection& other); virtual ~CInvalidChoiceSelection(void) throw(); - virtual const char* GetType(void) const; + virtual const char* GetType(void) const override; typedef int TErrCode; TErrCode GetErrCode(void) const; protected: CInvalidChoiceSelection(void); - virtual const CException* x_Clone(void) const; + virtual const CException* x_Clone(void) const override; }; class NCBI_XSERIAL_EXPORT CSerialFacetException : public CSerialException @@ -159,7 +159,7 @@ public: eMaxItems, eUniqueItems }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CSerialFacetException,CSerialException); }; diff --git a/c++/include/serial/rpcbase_impl.hpp b/c++/include/serial/rpcbase_impl.hpp index b7b93900..20ab84c2 100644 --- a/c++/include/serial/rpcbase_impl.hpp +++ b/c++/include/serial/rpcbase_impl.hpp @@ -1,7 +1,7 @@ #ifndef SERIAL___RPCBASE_IMPL__HPP #define SERIAL___RPCBASE_IMPL__HPP -/* $Id: rpcbase_impl.hpp 554977 2018-01-11 14:18:53Z gouriano $ +/* $Id: rpcbase_impl.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -147,7 +147,7 @@ public: eOther }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; bool IsSetRetryContext(void) const { return m_RetryCtx; } /// Read retry related data. diff --git a/c++/include/util/align_range_coll.hpp b/c++/include/util/align_range_coll.hpp index 09101362..0ecc7ed2 100644 --- a/c++/include/util/align_range_coll.hpp +++ b/c++/include/util/align_range_coll.hpp @@ -1,7 +1,7 @@ #ifndef UTIL___ALIGN_RANGE_COLL__HPP #define UTIL___ALIGN_RANGE_COLL__HPP -/* $Id: align_range_coll.hpp 500279 2016-05-03 17:12:04Z ivanov $ +/* $Id: align_range_coll.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -51,7 +51,7 @@ public: { } - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { return "CAlignRangeCollection - operation resulted in invalid state."; } diff --git a/c++/include/util/checksum.hpp b/c++/include/util/checksum.hpp index cfc901f6..9a9a7574 100644 --- a/c++/include/util/checksum.hpp +++ b/c++/include/util/checksum.hpp @@ -1,7 +1,7 @@ #ifndef CHECKSUM__HPP #define CHECKSUM__HPP -/* $Id: checksum.hpp 573617 2018-10-30 14:36:09Z ivanov $ +/* $Id: checksum.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -436,7 +436,7 @@ public: }; /// Translate from an error code value to its string representation. - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; // Standard exception boilerplate code. NCBI_EXCEPTION_DEFAULT(CChecksumException, CCoreException); diff --git a/c++/include/util/file_manifest.hpp b/c++/include/util/file_manifest.hpp index cb8a85a3..48bf42c0 100644 --- a/c++/include/util/file_manifest.hpp +++ b/c++/include/util/file_manifest.hpp @@ -1,7 +1,7 @@ #ifndef UTIL___FILE_MANIFEST_HPP__ #define UTIL___FILE_MANIFEST_HPP__ -/* $Id: file_manifest.hpp 492854 2016-02-22 16:33:44Z elisovdn $ +/* $Id: file_manifest.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -99,7 +99,7 @@ public: , eInvalidFilePath }; - virtual const char* GetErrCodeString() const + virtual const char* GetErrCodeString() const override { switch (GetErrCode()) { case eEmptyManifestName: diff --git a/c++/include/util/ncbi_cache.hpp b/c++/include/util/ncbi_cache.hpp index 3e420de2..63422de7 100644 --- a/c++/include/util/ncbi_cache.hpp +++ b/c++/include/util/ncbi_cache.hpp @@ -1,6 +1,6 @@ #ifndef CORELIB___NCBI_CACHE__HPP #define CORELIB___NCBI_CACHE__HPP -/* $Id: ncbi_cache.hpp 560849 2018-03-28 12:46:32Z ivanov $ +/* $Id: ncbi_cache.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -311,7 +311,7 @@ public: eOtherError }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CCacheException, CException); }; diff --git a/c++/include/util/ncbi_table.hpp b/c++/include/util/ncbi_table.hpp index 733227a9..756836a2 100644 --- a/c++/include/util/ncbi_table.hpp +++ b/c++/include/util/ncbi_table.hpp @@ -1,7 +1,7 @@ #ifndef UTIL_NCBITABLE__HPP #define UTIL_NCBITABLE__HPP -/* $Id: ncbi_table.hpp 437336 2014-06-05 13:49:06Z ucko $ +/* $Id: ncbi_table.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * public DOMAIN NOTICE @@ -48,7 +48,7 @@ public: eColumnAlreadyExists ///< Column id has been assigned before }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CNcbiTable_Exception, CException); }; diff --git a/c++/include/util/random_gen.hpp b/c++/include/util/random_gen.hpp index 221f35e9..354dd548 100644 --- a/c++/include/util/random_gen.hpp +++ b/c++/include/util/random_gen.hpp @@ -1,7 +1,7 @@ #ifndef RANDOM_GEN__HPP #define RANDOM_GEN__HPP -/* $Id: random_gen.hpp 459009 2015-02-12 15:18:36Z sadyrovr $ +/* $Id: random_gen.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -187,7 +187,7 @@ public: ///< system-dependent generator. }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { switch (GetErrCode()) { case eUnavailable : return "eUnavailable"; diff --git a/c++/include/util/row_reader.inl b/c++/include/util/row_reader.inl index 29fcd40c..4b284b78 100644 --- a/c++/include/util/row_reader.inl +++ b/c++/include/util/row_reader.inl @@ -1,7 +1,7 @@ #ifndef UTIL___ROW_READER__INL #define UTIL___ROW_READER__INL -/* $Id: row_reader.inl 564203 2018-05-23 12:13:23Z ivanov $ +/* $Id: row_reader.inl 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -241,7 +241,7 @@ public: m_Context(ctxt) NCBI_EXCEPTION_DEFAULT_IMPLEMENTATION(CRowReaderException, CException); - virtual const char * GetErrCodeString(void) const + virtual const char * GetErrCodeString(void) const override { switch (GetErrCode()) { case eUnexpectedRowType: diff --git a/c++/include/util/row_reader_iana_csv.hpp b/c++/include/util/row_reader_iana_csv.hpp index b61e4b91..fef646bc 100644 --- a/c++/include/util/row_reader_iana_csv.hpp +++ b/c++/include/util/row_reader_iana_csv.hpp @@ -1,7 +1,7 @@ #ifndef UTIL___ROW_READER_IANA_CSV__HPP #define UTIL___ROW_READER_IANA_CSV__HPP -/* $Id: row_reader_iana_csv.hpp 564203 2018-05-23 12:13:23Z ivanov $ +/* $Id: row_reader_iana_csv.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -56,7 +56,7 @@ public: eUnexpectedDoubleQuote }; - virtual const char * GetErrCodeString(void) const + virtual const char * GetErrCodeString(void) const override { switch (GetErrCode()) { case eUnbalancedDoubleQuote: diff --git a/c++/include/util/row_reader_iana_tsv.hpp b/c++/include/util/row_reader_iana_tsv.hpp index 40530e9d..2ba46a05 100644 --- a/c++/include/util/row_reader_iana_tsv.hpp +++ b/c++/include/util/row_reader_iana_tsv.hpp @@ -1,7 +1,7 @@ #ifndef UTIL___ROW_READER_IANA_TSV__HPP #define UTIL___ROW_READER_IANA_TSV__HPP -/* $Id: row_reader_iana_tsv.hpp 538310 2017-06-09 13:27:14Z satskyse $ +/* $Id: row_reader_iana_tsv.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -82,7 +82,7 @@ public: eValidateConversion = 7 }; - virtual const char * GetErrCodeString(void) const + virtual const char * GetErrCodeString(void) const override { switch (GetErrCode()) { case eNumberOfFieldsMismatch: diff --git a/c++/include/util/sequtil/sequtil_expt.hpp b/c++/include/util/sequtil/sequtil_expt.hpp index 759d7daf..29550bfe 100644 --- a/c++/include/util/sequtil/sequtil_expt.hpp +++ b/c++/include/util/sequtil/sequtil_expt.hpp @@ -1,7 +1,7 @@ #ifndef UTIL_SEQUTIL___SEQUTIL_EXPT__HPP #define UTIL_SEQUTIL___SEQUTIL_EXPT__HPP -/* $Id: sequtil_expt.hpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: sequtil_expt.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -47,7 +47,7 @@ public: eBadConversion, eBadParameter }; - virtual const char* GetErrCodeString(void) const { + virtual const char* GetErrCodeString(void) const override { switch ( GetErrCode() ) { case eNotSupported: return "Operation not supported"; diff --git a/c++/include/util/sync_queue.hpp b/c++/include/util/sync_queue.hpp index f67b4e08..f6bdff83 100644 --- a/c++/include/util/sync_queue.hpp +++ b/c++/include/util/sync_queue.hpp @@ -1,7 +1,7 @@ #ifndef UTIL___SYNC_QUEUE__HPP #define UTIL___SYNC_QUEUE__HPP -/* $Id: sync_queue.hpp 534859 2017-05-03 12:47:35Z ivanov $ +/* $Id: sync_queue.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -998,7 +998,7 @@ public: eGuardedCopy }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CSyncQueueException, CException); }; diff --git a/c++/include/util/thread_pool.hpp b/c++/include/util/thread_pool.hpp index 9a240145..f1e57f3b 100644 --- a/c++/include/util/thread_pool.hpp +++ b/c++/include/util/thread_pool.hpp @@ -1,7 +1,7 @@ #ifndef UTIL__THREAD_POOL__HPP #define UTIL__THREAD_POOL__HPP -/* $Id: thread_pool.hpp 400966 2013-05-24 17:50:38Z vakatov $ +/* $Id: thread_pool.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -619,7 +619,7 @@ public: ///< by means of methods of another ThreadPool or ///< invalid parameters in the constructor }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CThreadPoolException, CException); }; diff --git a/c++/include/util/util_exception.hpp b/c++/include/util/util_exception.hpp index e3b3deca..4415975c 100644 --- a/c++/include/util/util_exception.hpp +++ b/c++/include/util/util_exception.hpp @@ -1,7 +1,7 @@ #ifndef UTIL_EXCEPTION__HPP #define UTIL_EXCEPTION__HPP -/* $Id: util_exception.hpp 348644 2012-01-03 15:53:45Z vasilche $ +/* $Id: util_exception.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * public DOMAIN NOTICE @@ -53,7 +53,7 @@ public: eWrongCommand, eWrongData }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CUtilException,CException); }; @@ -69,7 +69,7 @@ public: eCanceled, eOverflow }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CIOException, CUtilException); }; @@ -79,7 +79,7 @@ public: enum EErrCode { eEof }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CEofException, CIOException); }; @@ -91,7 +91,7 @@ public: eFull, // attempt to insert into a full queue eTimedOut // Put or WaitForRoom timed out }; - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CBlockingQueueException,CUtilException); }; diff --git a/c++/include/util/util_misc.hpp b/c++/include/util/util_misc.hpp index 20a1643b..4f74d63f 100644 --- a/c++/include/util/util_misc.hpp +++ b/c++/include/util/util_misc.hpp @@ -1,7 +1,7 @@ #ifndef UTIL___UTIL_MISC__HPP #define UTIL___UTIL_MISC__HPP -/* $Id: util_misc.hpp 530021 2017-03-09 19:06:46Z ucko $ +/* $Id: util_misc.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -66,7 +66,7 @@ public: }; /// Translate from the error code value to its string representation - virtual const char* GetErrCodeString(void) const; + virtual const char* GetErrCodeString(void) const override; NCBI_EXCEPTION_DEFAULT(CGetPasswordFromConsoleException, CException); }; diff --git a/c++/include/util/value_convert_policy.hpp b/c++/include/util/value_convert_policy.hpp index e34d59aa..e11083d6 100644 --- a/c++/include/util/value_convert_policy.hpp +++ b/c++/include/util/value_convert_policy.hpp @@ -1,7 +1,7 @@ #ifndef UTIL___VALUE_CONV_POLICY__HPP #define UTIL___VALUE_CONV_POLICY__HPP -/* $Id: value_convert_policy.hpp 499250 2016-04-25 12:27:42Z gouriano $ +/* $Id: value_convert_policy.hpp 578926 2019-01-28 18:17:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NTOICE @@ -47,7 +47,7 @@ public: enum EErrCode { eInvalidConversion }; - virtual const char* GetErrCodeString(void) const + virtual const char* GetErrCodeString(void) const override { return "Invalid run-time type conversion."; } diff --git a/c++/scripts/common/check/lsan.supp b/c++/scripts/common/check/lsan.supp index aa95bfa6..a4e52737 100644 --- a/c++/scripts/common/check/lsan.supp +++ b/c++/scripts/common/check/lsan.supp @@ -11,4 +11,5 @@ leak:^ncbi::CPluginManager_DllResolver::ResolveFile( leak:^ncbi::InitDiagHandler()$ leak:^ncbi::objects::CSeq_id_Mapper::GetInstance()$ leak:^NcbiError_GetOrCreate$ +leak:^NodeTest06c::test_method()$ leak:^void test_static_tls< diff --git a/c++/scripts/common/impl/install.sh b/c++/scripts/common/impl/install.sh index 5a174b16..649cb8ef 100755 --- a/c++/scripts/common/impl/install.sh +++ b/c++/scripts/common/impl/install.sh @@ -16,7 +16,7 @@ echo "[`date`]" -svn_location=`echo '$URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.8.1/c++/scripts/common/impl/install.sh $' | sed "s%\\$[U]RL: *\\([^$][^$]*\\) \\$.*%\\1%"` +svn_location=`echo '$URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.9.0/c++/scripts/common/impl/install.sh $' | sed "s%\\$[U]RL: *\\([^$][^$]*\\) \\$.*%\\1%"` svn_revision=`echo '$Revision: 429376 $' | sed "s%\\$[R]evision: *\\([^$][^$]*\\) \\$.*%\\1%"` script_name=`basename $0` diff --git a/c++/scripts/projects/blast/post_build/macosx/ncbi-blast.sh b/c++/scripts/projects/blast/post_build/macosx/ncbi-blast.sh index 0e3a8322..00a804a7 100755 --- a/c++/scripts/projects/blast/post_build/macosx/ncbi-blast.sh +++ b/c++/scripts/projects/blast/post_build/macosx/ncbi-blast.sh @@ -25,7 +25,7 @@ setup() prep_binary_component_package() { - BLAST_BINS="blastn blastp blastx tblastn tblastx psiblast rpsblast rpstblastn blast_formatter deltablast legacy_blast.pl update_blastdb.pl " + BLAST_BINS="blastn blastp blastx tblastn tblastx psiblast rpsblast rpstblastn blast_formatter deltablast legacy_blast.pl update_blastdb.pl get_species_taxids.sh" MASKING_BINS="windowmasker dustmasker segmasker" DB_BINS="blastdbcmd makeblastdb makeprofiledb makembindex blastdb_aliastool convert2blastmask blastdbcheck" ALL_BINS="$BLAST_BINS $MASKING_BINS $DB_BINS" diff --git a/c++/scripts/projects/blast/post_build/rpm/ncbi-blast.spec b/c++/scripts/projects/blast/post_build/rpm/ncbi-blast.spec index d95bc701..fdf095f4 100644 --- a/c++/scripts/projects/blast/post_build/rpm/ncbi-blast.spec +++ b/c++/scripts/projects/blast/post_build/rpm/ncbi-blast.spec @@ -1,6 +1,6 @@ Name: ncbi-blast Version: BLAST_VERSION+ -Release: 2 +Release: 1 Source0: %{name}-%{version}.tgz Summary: NCBI BLAST finds regions of similarity between biological sequences. Exclusiveos: linux @@ -73,7 +73,7 @@ cd c++/*/build %install %__mkdir_p $RPM_BUILD_ROOT/%_bindir cd c++/*/bin -%__install -m755 blastp blastn blastx tblastn tblastx psiblast rpsblast rpstblastn blast_formatter deltablast makembindex segmasker dustmasker windowmasker makeblastdb makeprofiledb blastdbcmd blastdb_aliastool convert2blastmask blastdbcheck legacy_blast.pl update_blastdb.pl $RPM_BUILD_ROOT/%_bindir +%__install -m755 blastp blastn blastx tblastn tblastx psiblast rpsblast rpstblastn blast_formatter deltablast makembindex segmasker dustmasker windowmasker makeblastdb makeprofiledb blastdbcmd blastdb_aliastool convert2blastmask blastdbcheck legacy_blast.pl update_blastdb.pl get_species_taxids.sh $RPM_BUILD_ROOT/%_bindir %clean rm -rf $RPM_BUILD_ROOT diff --git a/c++/scripts/projects/blast/post_build/win/make_win.py b/c++/scripts/projects/blast/post_build/win/make_win.py index 9b56a376..44da319f 100644 --- a/c++/scripts/projects/blast/post_build/win/make_win.py +++ b/c++/scripts/projects/blast/post_build/win/make_win.py @@ -1,6 +1,6 @@ #! /usr/bin/env python3 """Script to create the Windows installer for BLAST command line applications""" -# $Id: make_win.py 555910 2018-01-24 16:33:11Z camacho $ +# $Id: make_win.py 578261 2019-01-16 12:43:00Z camacho $ # # Author: Christiam camacho @@ -62,7 +62,8 @@ def main(): "blast_formatter.exe", "deltablast.exe", "legacy_blast.pl", - "update_blastdb.pl" + "update_blastdb.pl", + "get_species_taxids.sh" ] cwd = os.getcwd() diff --git a/c++/scripts/projects/blast/post_build/win/ncbi-blast.nsi b/c++/scripts/projects/blast/post_build/win/ncbi-blast.nsi index 71590ba6..282a4eb4 100755 --- a/c++/scripts/projects/blast/post_build/win/ncbi-blast.nsi +++ b/c++/scripts/projects/blast/post_build/win/ncbi-blast.nsi @@ -69,6 +69,7 @@ Section "DefaultSection" SecDflt File "rpstblastn.exe" File "legacy_blast.pl" File "update_blastdb.pl" + File "get_species_taxids.sh" File "makeblastdb.exe" File "makembindex.exe" File "makeprofiledb.exe" @@ -112,6 +113,7 @@ Section "Uninstall" Delete "$INSTDIR\bin\rpstblastn.exe" Delete "$INSTDIR\bin\legacy_blast.pl" Delete "$INSTDIR\bin\update_blastdb.pl" + Delete "$INSTDIR\bin\get_species_taxids.sh" Delete "$INSTDIR\bin\makeblastdb.exe" Delete "$INSTDIR\bin\makembindex.exe" Delete "$INSTDIR\bin\makeprofiledb.exe" diff --git a/c++/scripts/projects/blast/project.lst b/c++/scripts/projects/blast/project.lst index 427ccfc1..543ef1b2 100644 --- a/c++/scripts/projects/blast/project.lst +++ b/c++/scripts/projects/blast/project.lst @@ -90,5 +90,6 @@ objtools/simple$ -objects/.*/demo -objects/.*/unit_test misc$ +misc/jsonwrapp misc/third_party misc/third_party_static diff --git a/c++/scripts/projects/dispatcher/Manifest b/c++/scripts/projects/dispatcher/Manifest index dfc91f71..d80899d1 100644 --- a/c++/scripts/projects/dispatcher/Manifest +++ b/c++/scripts/projects/dispatcher/Manifest @@ -1,4 +1,4 @@ -# $Id: Manifest 562490 2018-04-23 15:55:02Z lavr $ +# $Id: Manifest 581233 2019-02-26 20:58:29Z fukanchi $ # # Author: Sergey Satskiy # @@ -17,10 +17,6 @@ DEFAULT_CONFIGURATIONS: Linux64-Centos:GCC COPY: $bindir/test_mghbn $installdir/bin/ncbi_mghbn COPY: $bindir/test_ncbi_dblb $installdir/bin/ncbi_dblb COPY: $srcdir/src/connect/daemons/mod/*mod* $installdir/src/ -COPY: $srcdir/src/connect/daemons/fwdaemon_conn_stats.py $installdir/ -COPY: $srcdir/src/connect/daemons/fwdaemon_fallback_cmp.py $installdir/ -COPY: $srcdir/src/connect/daemons/fwdaemon_high_load_analysis.py $installdir/ -COPY: $srcdir/src/connect/daemons/fwdaemon_purge_abusers.py $installdir/ COPY: $srcdir/include/connect/*.h $installdir/include/connect/ COPY: $srcdir/include/connect/ext/*.h $installdir/include/connect/ext/ COPY: $srcdir/include/connect/daemons/*.h $installdir/include/connect/daemons/ @@ -38,7 +34,7 @@ POSTBUILD: [ "$platform" != "Cygwin64" ] || { cp -vp /usr/{bin/cygcheck.exe,bin/ # --with-distcc, --with-action etc. Linux64-Centos : ICC : ICC.sh --without-debug --without-mt --with-static --without-runpath --with-local-lbsm --without-gnutls --without-vdb --without-serial --with-flat-makefile Linux64-Centos : GCC : GCC.sh --without-debug --without-mt --with-static --without-runpath --with-local-lbsm --without-gnutls --without-vdb --without-serial --with-flat-makefile -Linux64-Centos7 : GCC : GCC.sh --without-debug --without-mt --with-static --without-runpath --with-local-lbsm --without-gnutls --without-vdb --without-serial --with-flat-makefile FreeBSD64 : Clang : Clang.sh --without-debug --without-mt --with-static --without-runpath --with-local-lbsm --without-gnutls --without-vdb --without-serial --with-flat-makefile IntelMAC : GCC : GCC.sh --without-debug --without-mt --with-static --without-runpath --with-local-lbsm --without-gnutls --without-vdb --without-serial --with-flat-makefile Cygwin64 : GCC : GCC.sh --without-debug --without-mt --with-static --without-runpath --with-local-lbsm --without-gnutls --without-vdb --without-serial --without-flat-makefile + diff --git a/c++/scripts/projects/igblast/ChangeLog b/c++/scripts/projects/igblast/ChangeLog index fe4c2e42..a98e5365 100644 --- a/c++/scripts/projects/igblast/ChangeLog +++ b/c++/scripts/projects/igblast/ChangeLog @@ -1,3 +1,15 @@ +Mar 7, 2019 +Release 1.13.0 +*Determine the V gene reading frame from the end of FWR3 region instead of end of V gene. This is to allow proper determination of the frames for rearrangements that have insertions or deletions near the V gene end. + +*Increase allowed distance between V gene end and J gene start to 225 bp to allow detection of ultra long D/N region. + +*Package the edit_imgt_file.pl script, the internal_data and optional_files folders into the IgBlast release such that it is easy for user to install. + +Nov 26, 2018 +*Release 1.12.0 +*Increase allowed distance between V gene end and J gene start positions (from 90 bp to 150 bp) as well as between V gene end and D gene start positions (from 55 to 120 bp) to accommodate extremely long VDJ junctions found in some antibodies. + Oct 25, 2018 *Release 1.11.0 *Change the 0-based coordinate system to 1-based coordinate system in AIRR format (with -outfmt 19 parameter) for sequence start positions per the new AIRR Rearrangement Schema ( http://docs.airr-community.org/en/latest/datarep/rearrangements.html ). diff --git a/c++/scripts/projects/igblast/Manifest b/c++/scripts/projects/igblast/Manifest index c254dd00..689dcc2b 100644 --- a/c++/scripts/projects/igblast/Manifest +++ b/c++/scripts/projects/igblast/Manifest @@ -1,7 +1,7 @@ # # Filename: Manifest # -# $Id: Manifest 570088 2018-09-04 15:07:58Z camacho $ +# $Id: Manifest 581684 2019-03-04 21:29:54Z jianye $ # # Author: Christiam Camacho # @@ -16,6 +16,10 @@ # It is allowed to have more than one of each statements APP: igblastp igblastn makeblastdb +COPY: $srcdir/src/app/igblast/edit_imgt_file.pl $installdir/bin +COPY: $srcdir/src/app/igblast/internal_data $installdir +COPY: $srcdir/src/app/igblast/optional_file $installdir + POSTBUILD: $srcdir/scripts/projects/igblast/post_build/make_installers.py -v $version $platform $installdir "$tarball" $bindir DEFAULT_CONFIGURE_FLAGS: --without-debug --with-strip --with-openmp --with-mt --with-downloaded-vdb --with-static-vdb --with-build-root=$srcdir/ReleaseMT diff --git a/c++/scripts/projects/igblast/edit_imgt_file.pl b/c++/scripts/projects/igblast/edit_imgt_file.pl index 5b36f26d..bdfe4636 100755 --- a/c++/scripts/projects/igblast/edit_imgt_file.pl +++ b/c++/scripts/projects/igblast/edit_imgt_file.pl @@ -10,8 +10,14 @@ while(my $line=){ #print ("line = $line\n"); if ($line =~ /^>.*\|(TR.+)\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|/){ print(">$1\n"); - } elsif ($line =~ /^>.*\|(IG.+)\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|/){ - print(">$1\n"); + } elsif ($line =~ /^>.*\|(IG.+)\|(.*)\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|.*\|/){ + my $species = $2; + my $id = $1; + if ($species =~ /Mus\s+spretus/i) { #deal with mixed mouse species + $id = $id."_Mus_spretus"; + } + + print(">$id\n"); } else { $line =~ s/\.+//g; #get rid of dot diff --git a/c++/scripts/projects/igblast/post_build/macosx/ncbi-igblast.sh b/c++/scripts/projects/igblast/post_build/macosx/ncbi-igblast.sh index 928ba85d..902a0a5c 100755 --- a/c++/scripts/projects/igblast/post_build/macosx/ncbi-igblast.sh +++ b/c++/scripts/projects/igblast/post_build/macosx/ncbi-igblast.sh @@ -25,7 +25,7 @@ setup() prep_binary_component_package() { - BLAST_BINS="igblastn igblastp" + BLAST_BINS="igblastn igblastp edit_imgt_file.pl" DATA_DIRS="optional_file internal_data" ALL_BINS="$BLAST_BINS" diff --git a/c++/scripts/projects/igblast/post_build/rpm/ncbi-igblast.spec b/c++/scripts/projects/igblast/post_build/rpm/ncbi-igblast.spec index 6306cf7c..799014c7 100644 --- a/c++/scripts/projects/igblast/post_build/rpm/ncbi-igblast.spec +++ b/c++/scripts/projects/igblast/post_build/rpm/ncbi-igblast.spec @@ -24,7 +24,7 @@ cd c++/*/build %install %__mkdir_p $RPM_BUILD_ROOT/%_bindir -%__install -m755 c++/*/bin/igblast[pn] $RPM_BUILD_ROOT/%_bindir +%__install -m755 c++/*/bin/igblast[pn] c++/*/bin/edit_imgt_file.pl $RPM_BUILD_ROOT/%_bindir %__mkdir_p $RPM_BUILD_ROOT/%{_prefix}/share/igblast cp -R c++/src/app/igblast/internal_data $RPM_BUILD_ROOT/%{_prefix}/share/igblast cp -R c++/src/app/igblast/optional_file $RPM_BUILD_ROOT/%{_prefix}/share/igblast diff --git a/c++/scripts/projects/igblast/post_build/win/make_win.py b/c++/scripts/projects/igblast/post_build/win/make_win.py index 83340c52..c1a4936e 100644 --- a/c++/scripts/projects/igblast/post_build/win/make_win.py +++ b/c++/scripts/projects/igblast/post_build/win/make_win.py @@ -1,6 +1,6 @@ #! /usr/bin/env python3 """Script to create the Windows installer for BLAST command line applications""" -# $Id: make_win.py 563079 2018-05-01 21:34:28Z camacho $ +# $Id: make_win.py 580968 2019-02-22 21:24:42Z camacho $ # # Author: Christiam camacho @@ -42,6 +42,7 @@ def main(): VERBOSE = options.VERBOSE apps = [ "igblastn.exe", "igblastp.exe", + "edit_imgt_file.pl", "ncbi-vdb-md.dll" ] diff --git a/c++/scripts/projects/igblast/post_build/win/ncbi-blast.nsi b/c++/scripts/projects/igblast/post_build/win/ncbi-blast.nsi index 427f9e27..e357bf72 100755 --- a/c++/scripts/projects/igblast/post_build/win/ncbi-blast.nsi +++ b/c++/scripts/projects/igblast/post_build/win/ncbi-blast.nsi @@ -62,6 +62,7 @@ Section "DefaultSection" SecDflt File "igblastn.exe" File "igblastp.exe" + File "edit_imgt_file.pl" File "ncbi-vdb-md.dll" SetOutPath "$INSTDIR\doc" diff --git a/c++/scripts/projects/igblast/project.lst b/c++/scripts/projects/igblast/project.lst index 70ff860f..33188b03 100644 --- a/c++/scripts/projects/igblast/project.lst +++ b/c++/scripts/projects/igblast/project.lst @@ -99,5 +99,6 @@ sra/readers/sra sra/readers$ sra$ misc$ +misc/jsonwrapp misc/third_party misc/third_party_static diff --git a/c++/scripts/projects/magicblast/project.lst b/c++/scripts/projects/magicblast/project.lst index effb3f7b..abfed3c2 100644 --- a/c++/scripts/projects/magicblast/project.lst +++ b/c++/scripts/projects/magicblast/project.lst @@ -95,5 +95,7 @@ sra/readers/sra sra/readers$ sra$ misc$ +misc/jsonwrapp misc/third_party misc/third_party_static +misc/jsonwrapp diff --git a/c++/scripts/projects/mod_caf/Manifest b/c++/scripts/projects/mod_caf/Manifest index a9911809..832f6d34 100644 --- a/c++/scripts/projects/mod_caf/Manifest +++ b/c++/scripts/projects/mod_caf/Manifest @@ -1,7 +1,7 @@ # # Filename: Manifest # -# $Id: Manifest 562490 2018-04-23 15:55:02Z lavr $ +# $Id: Manifest 581233 2019-02-26 20:58:29Z fukanchi $ # # Author: Sergey Fukanchik, NCBI # @@ -16,5 +16,3 @@ SRC: src/connect/daemons/mod/mod_caf.c src/connect/daemons/mod/README.mod_caf Linux64-Centos : gcc : GCC.sh --without-debug --with-mt --with-64 --with-static --without-runpath --with-local-lbsm --without-gnutls --without-vdb --with-flat-makefile Linux64-Centos : gcc-dbg : GCC.sh --with-debug --with-mt --with-64 --with-static --without-runpath --with-local-lbsm --without-gnutls --without-vdb --with-flat-makefile -Linux64-Centos7 : gcc : GCC.sh --without-debug --with-mt --with-64 --with-static --without-runpath --with-local-lbsm --without-gnutls --without-vdb --with-flat-makefile -Linux64-Centos7 : gcc-dbg : GCC.sh --with-debug --with-mt --with-64 --with-static --without-runpath --with-local-lbsm --without-gnutls --without-vdb --with-flat-makefile diff --git a/c++/scripts/projects/netschedule/Manifest b/c++/scripts/projects/netschedule/Manifest index 3ee59db9..6502b544 100644 --- a/c++/scripts/projects/netschedule/Manifest +++ b/c++/scripts/projects/netschedule/Manifest @@ -1,7 +1,7 @@ # # Filename: Manifest # -# $Id: Manifest 573342 2018-10-25 17:54:05Z satskyse $ +# $Id: Manifest 581233 2019-02-26 20:58:29Z fukanchi $ # # Author: Sergey Satskiy # @@ -21,7 +21,7 @@ APP: $srcdir/src/app/netschedule/test/ns.py+ $srcdir/src/app/netschedule/test/te APP: $srcdir/src/app/netschedule/utils/ns_health_check.py ETC: src/app/netschedule/netscheduled.ini -DEFAULT_CONFIGURATIONS: Linux64-Centos7:O2g +DEFAULT_CONFIGURATIONS: Linux64-Centos:O2g # Each line describes a single configuration # The format is as follows: @@ -30,12 +30,12 @@ DEFAULT_CONFIGURATIONS: Linux64-Centos7:O2g # Release configurator assumes that this script will eventually call standard configure script and pass all options # to it. So some standard options may be added by release configurator, such as --build-root-sfx, --with-projects, # --with-distcc, --with-action etc. -Linux64-Centos7 : Release : GCC.sh 7.3.0 --without-debug --with-mt --with-flat-makefile -Linux64-Centos7 : Debug : GCC.sh 7.3.0 --with-debug --with-mt --with-flat-makefile -Linux64-Centos7 : O2g : GCC.sh 7.3.0 --without-debug --with-symbols --with-mt --with-flat-makefile +Linux64-Centos : Release : GCC.sh 7.3.0 --without-debug --with-mt --with-flat-makefile +Linux64-Centos : Debug : GCC.sh 7.3.0 --with-debug --with-mt --with-flat-makefile +Linux64-Centos : O2g : GCC.sh 7.3.0 --without-debug --with-symbols --with-mt --with-flat-makefile # --with-max-debug builds with a GCC sanitizer -Linux64-Centos7 : MaxDebug : GCC.sh 7.3.0 --with-max-debug --with-mt --with-flat-makefile +Linux64-Centos : MaxDebug : GCC.sh 7.3.0 --with-max-debug --with-mt --with-flat-makefile USE_COMPONENTS diff --git a/c++/scripts/projects/project_tree_builder/ChangeLog b/c++/scripts/projects/project_tree_builder/ChangeLog index 41538f39..40d8d7fc 100644 --- a/c++/scripts/projects/project_tree_builder/ChangeLog +++ b/c++/scripts/projects/project_tree_builder/ChangeLog @@ -344,3 +344,16 @@ Fixed bugs in PROTOBUF code generation support October 10, 2018 version 4.6.2, CXX-10275 Corrected list of generated files. + +December 11, 2018 +version 4.7.0, CXX-10351 +Fixed to work correctly on XCode 10.1. +Added support of Metal libraries on XCode. +Fixed to allow blanks in third party library include directories. + +December 18, 2018 +version 4.7.1, CXX-10397 +Added PreBuild script on XCode. + + + diff --git a/c++/scripts/projects/project_tree_builder/Manifest b/c++/scripts/projects/project_tree_builder/Manifest index f9f6a9c2..573d0e79 100644 --- a/c++/scripts/projects/project_tree_builder/Manifest +++ b/c++/scripts/projects/project_tree_builder/Manifest @@ -1,7 +1,7 @@ # # Filename: Manifest # -# $Id: Manifest 557433 2018-02-15 14:49:29Z fukanchi $ +# $Id: Manifest 576197 2018-12-11 18:39:31Z gouriano $ # # Author: Sergey Satskiy # @@ -28,11 +28,11 @@ APP: project_tree_builder #Linux32-Centos : debug : GCC.sh --with-debug --without-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c #Linux64-Centos : plain : ICC.sh --without-debug --without-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c -Linux64-Centos : plain : GCC.sh --without-debug --without-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c --without-downloaded-vdb +Linux64-Centos : plain : GCC.sh --without-pcre --without-libdw --without-debug --without-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c --without-downloaded-vdb -Linux64-Ubuntu : plain : GCC.sh --without-debug --without-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c --without-downloaded-vdb +Linux64-Ubuntu : plain : GCC.sh --without-pcre --without-libdw --without-debug --without-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c --without-downloaded-vdb -FreeBSD64 : plain : Clang.sh --without-debug --without-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c --without-pcre --without-downloaded-vdb +FreeBSD64 : plain : Clang.sh --without-pcre --without-libdw --without-debug --without-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c --without-pcre --without-downloaded-vdb IntelMAC : plain : GCC.sh --without-debug --without-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c --without-downloaded-vdb diff --git a/c++/scripts/projects/project_tree_builder/components.link b/c++/scripts/projects/project_tree_builder/components.link index 6854b18f..b7e9c0c5 100644 --- a/c++/scripts/projects/project_tree_builder/components.link +++ b/c++/scripts/projects/project_tree_builder/components.link @@ -1,3 +1,3 @@ [components] -infrastructure 21.0 -core 21.0 +infrastructure 22.0 +core 22.0 diff --git a/c++/scripts/projects/project_tree_builder/project.lst b/c++/scripts/projects/project_tree_builder/project.lst index 779b0a05..829dd341 100644 --- a/c++/scripts/projects/project_tree_builder/project.lst +++ b/c++/scripts/projects/project_tree_builder/project.lst @@ -6,6 +6,7 @@ util/bitset$ util/regexp$ util/xregexp$ util/cache$ +util/checksum serial$ serial/impl$ serial/datatool$ diff --git a/c++/scripts/projects/public/Manifest b/c++/scripts/projects/public/Manifest index 23fac173..71670375 100644 --- a/c++/scripts/projects/public/Manifest +++ b/c++/scripts/projects/public/Manifest @@ -1,7 +1,7 @@ # # Filename: Manifest # -# $Id: Manifest 559684 2018-03-13 19:54:02Z fukanchi $ +# $Id: Manifest 581233 2019-02-26 20:58:29Z fukanchi $ # # Author: # @@ -24,6 +24,9 @@ TAG_PRE_COMMIT: tagprecommit/use_embedded_ptb.py Win64_15 : MSVC15-64-dll : dll 64 ReleaseDLL Win64_15 : MSVC15-64 : static 64 ReleaseMT +Win64_17 : MSVC17-64-dll : dll 64 ReleaseDLL +Win64_17 : MSVC17-64 : static 64 ReleaseMT + Cygwin64 : GCC : GCC.sh --without-debug --with-mt --with-flat-makefile --without-ncbi-c Linux64-Centos : GCC481 : GCC.sh 4.8.1 --without-debug --without-flat-makefile --without-ncbi-c @@ -32,7 +35,7 @@ Linux64-Centos : GCC510 : GCC.sh 5.1.0 --without-debug --with-mt --with-flat Linux64-Centos : GCC530 : GCC.sh 5.3.0 --without-debug --with-mt --with-flat-makefile --without-ncbi-c Linux64-Centos : ICC170 : ICC.sh 17.0 --without-debug --with-mt --with-dll --with-runpath --without-ncbi-c Linux64-Centos : ICC170d : ICC.sh 17.0 --with-debug --with-mt --with-3psw=system --with-runpath --without-ncbi-c -Linux64-Centos7 : Clang : Clang.sh 3.8.0 --with-debug --with-mt --with-flat-makefile --without-ncbi-c +Linux64-Centos : Clang : Clang.sh 3.8.0 --with-debug --with-mt --with-flat-makefile --without-ncbi-c Linux64-Ubuntu : plain-GCC : GCC.sh --without-debug --with-mt --with-flat-makefile --without-ncbi-c Linux32-Ubuntu : plain-GCC : GCC.sh --without-debug --with-mt --with-flat-makefile --without-ncbi-c diff --git a/c++/scripts/projects/public/components.link b/c++/scripts/projects/public/components.link index 5bcaf836..8fdf4977 100644 --- a/c++/scripts/projects/public/components.link +++ b/c++/scripts/projects/public/components.link @@ -1,13 +1,13 @@ [components] -algo 21.0 -app 21.0 -core 21.0 -dbase 21.0 -infrastructure 21.0 -misc 21.0 -objects 21.0 -objtools 21.0 -sample 21.0 -sra 21.0 -web 21.0 +algo 22.0 +app 22.0 +core 22.0 +dbase 22.0 +infrastructure 22.0 +misc 22.0 +objects 22.0 +objtools 22.0 +sample 22.0 +sra 22.0 +web 22.0 diff --git a/c++/scripts/projects/pubseq_gateway/ChangeLog b/c++/scripts/projects/pubseq_gateway/ChangeLog index 6ed27d7a..e1c9ddf2 100644 --- a/c++/scripts/projects/pubseq_gateway/ChangeLog +++ b/c++/scripts/projects/pubseq_gateway/ChangeLog @@ -1,3 +1,33 @@ +Release 1.1.10 + + * UV-1.25.0, DATASTAX-2.11.0, H2O-2.2.5 + * PSG -- also send the data blob's SAT with the NA metainfo (JIRA: + CXX-10491) + * PSG: incorrect request counter in get_na requests (JIRA: CXX-10494) + * PSG core dump in LMDB cache lookup (JIRA: CXX-10510) + * PSG -- make sure to catch all exceptions (JIRA: CXX-10517) + * PSG - always provide "Content-Type" HTTP header (JIRA: CXX-10518) + +Release 1.1.2 (2019-02-12) + + * PSG -- to accumulate smaller responses (RESOLVE, GET_NA) data, send it in + one op (JIRA: CXX-10474) + * PSG -- improve NamedAnnot metainfo retrieval latency (now around 45ms) + (JIRA: CXX-10467) + * Add annot_info to the get_na request + +Release 1.1.1 (2019-02-05) + + * Unified JSON dictionary keys style + * Async NA retrieval improvement: proper handling of non-first page + timeouts + +Release 1.1.0 (2019-02-04) + + * Implementation of ID/get_na + * Adjustments to the changed DB scheme + * Various bug fixes + Release 1.0.0 (2018-10-18) * Initial release based on TRUNK diff --git a/c++/scripts/projects/pubseq_gateway/Manifest b/c++/scripts/projects/pubseq_gateway/Manifest index 2e91ba49..e642d788 100644 --- a/c++/scripts/projects/pubseq_gateway/Manifest +++ b/c++/scripts/projects/pubseq_gateway/Manifest @@ -1,7 +1,7 @@ # # Filename: Manifest # -# $Id: Manifest 572745 2018-10-18 20:08:20Z satskyse $ +# $Id: Manifest 581593 2019-03-04 14:48:08Z satskyse $ # # Author: Sergey Satskiy # @@ -17,7 +17,7 @@ APP: pubseq_gateway ETC: src/app/pubseq_gateway/pubseq_gateway.ini -DEFAULT_CONFIGURATIONS: Linux64-Centos7:O2g +DEFAULT_CONFIGURATIONS: Linux64-Centos:O2g # Each line describes a single configuration # The format is as follows: @@ -26,11 +26,11 @@ DEFAULT_CONFIGURATIONS: Linux64-Centos7:O2g # Release configurator assumes that this script will eventually call standard configure script and pass all options # to it. So some standard options may be added by release configurator, such as --build-root-sfx, --with-projects, # --with-distcc, --with-action etc. -Linux64-Centos7 : Release : GCC.sh 7.3.0 --without-debug --with-mt --with-flat-makefile -Linux64-Centos7 : Debug : GCC.sh 7.3.0 --with-debug --with-mt --with-flat-makefile -Linux64-Centos7 : O2g : GCC.sh 7.3.0 --without-debug --with-symbols --with-mt --with-flat-makefile +Linux64-Centos : Release : GCC.sh 7.3.0 --without-debug --with-mt --with-flat-makefile --with-libuv=$NCBI/libuv-1.25.0 --with-cassandra=$NCBI/datastax-cpp-driver-2.11.0-ncbi1 --with-h2o=$NCBI/h2o-2.2.5 +Linux64-Centos : Debug : GCC.sh 7.3.0 --with-debug --with-mt --with-flat-makefile --with-libuv=$NCBI/libuv-1.25.0 --with-cassandra=$NCBI/datastax-cpp-driver-2.11.0-ncbi1 --with-h2o=$NCBI/h2o-2.2.5 +Linux64-Centos : O2g : GCC.sh 7.3.0 --without-debug --with-symbols --with-mt --with-flat-makefile --with-libuv=$NCBI/libuv-1.25.0 --with-cassandra=$NCBI/datastax-cpp-driver-2.11.0-ncbi1 --with-h2o=$NCBI/h2o-2.2.5 # --with-max-debug builds with a GCC sanitizer -Linux64-Centos7 : MaxDebug : GCC.sh 7.3.0 --with-max-debug --with-mt --with-flat-makefile +Linux64-Centos : MaxDebug : GCC.sh 7.3.0 --with-max-debug --with-mt --with-flat-makefile --with-libuv=$NCBI/libuv-1.25.0 --with-cassandra=$NCBI/datastax-cpp-driver-2.11.0-ncbi1 --with-h2o=$NCBI/h2o-2.2.5 # USE_COMPONENTS diff --git a/c++/scripts/projects/testres-kernel/Manifest b/c++/scripts/projects/testres-kernel/Manifest index c45674c1..76ecb23f 100644 --- a/c++/scripts/projects/testres-kernel/Manifest +++ b/c++/scripts/projects/testres-kernel/Manifest @@ -1,7 +1,7 @@ # # Filename: Manifest # -# $Id: Manifest 551343 2017-11-16 08:53:36Z zakharov $ +# $Id: Manifest 581233 2019-02-26 20:58:29Z fukanchi $ # # Author: Mikhail Zakharov (template by Sergey Satskiy) # @@ -21,7 +21,7 @@ POSTBUILD: $packagedir/copy_modules.py $srcdir $bindir $installdir #ini file COPY: $srcdir/src/internal/cppcore/testres/kernel/testres_kernel.ini $installdir/bin -Linux64-Centos7 : O2g : GCC.sh 4.9.3 --without-debug --with-symbols --with-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c +Linux64-Centos : O2g : GCC.sh 4.9.3 --without-debug --with-symbols --with-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c Linux64-Centos : dbg : GCC.sh 4.9.3 --with-debug --with-symbols --with-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c Linux64-Centos : O2 : GCC.sh 4.9.3 --without-debug --with-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c diff --git a/c++/scripts/projects/testres-scheduler/Manifest b/c++/scripts/projects/testres-scheduler/Manifest index 8c34c7f9..da9d8ae3 100644 --- a/c++/scripts/projects/testres-scheduler/Manifest +++ b/c++/scripts/projects/testres-scheduler/Manifest @@ -1,7 +1,7 @@ # # Filename: Manifest # -# $Id: Manifest 551343 2017-11-16 08:53:36Z zakharov $ +# $Id: Manifest 581233 2019-02-26 20:58:29Z fukanchi $ # # Author: Mikhail Zakharov (template by Sergey Satskiy) # @@ -22,7 +22,7 @@ COPY: $srcdir/src/internal/cppcore/testres/web/presenters/ $installdir #ini file COPY: $srcdir/src/internal/cppcore/testres/scheduler/testres_scheduler.ini $installdir/bin -Linux64-Centos7 : O2g : GCC.sh 4.9.3 --without-debug --with-symbols --with-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c +Linux64-Centos : O2g : GCC.sh 4.9.3 --without-debug --with-symbols --with-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c Linux64-Centos : dbg : GCC.sh 4.9.3 --with-debug --with-symbols --with-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c Linux64-Centos : O2 : GCC.sh 4.9.3 --without-debug --with-mt --with-static --without-runpath --with-flat-makefile --without-ncbi-c diff --git a/c++/scripts/projects/xmlwrapp/Manifest b/c++/scripts/projects/xmlwrapp/Manifest index 5a4a05ef..709e62df 100644 --- a/c++/scripts/projects/xmlwrapp/Manifest +++ b/c++/scripts/projects/xmlwrapp/Manifest @@ -1,7 +1,7 @@ # # Filename: Manifest # -# $Id: Manifest 565088 2018-06-06 16:30:05Z fukanchi $ +# $Id: Manifest 581228 2019-02-26 20:44:40Z fukanchi $ # # Author: Sergey Satskiy # @@ -35,6 +35,8 @@ Linux64-Centos : ICC-17 : ICC.sh 17.0 --without-debug --with-mt Linux64-Centos : ICC-15 : ICC.sh 15.0 --without-debug --with-mt --without-runpath --with-flat-makefile Linux64-Centos : plain-ICC : ICC.sh --without-debug --with-mt --without-runpath --with-flat-makefile +Linux64-Centos : Release : GCC.sh --without-debug --with-mt --without-runpath --with-flat-makefile + Linux64-Ubuntu : plain-GCC : GCC.sh --without-debug --with-mt --without-runpath --with-flat-makefile Linux32-Ubuntu : plain-GCC : GCC.sh --without-debug --with-mt --without-runpath --with-flat-makefile @@ -48,5 +50,4 @@ IntelMAC : plain : GCC.sh --without-debug --with-mt IntelMAC : GCC : GCC.sh --without-debug --with-mt --without-runpath --with-flat-makefile XCode : plain : Xcode.sh 30 -Linux64-Centos7 : Release : GCC.sh --without-debug --with-mt --without-runpath --with-flat-makefile diff --git a/c++/scripts/projects/xmlwrapp/project.lst b/c++/scripts/projects/xmlwrapp/project.lst index 3451eaa1..f3e49852 100644 --- a/c++/scripts/projects/xmlwrapp/project.lst +++ b/c++/scripts/projects/xmlwrapp/project.lst @@ -26,6 +26,7 @@ util/regexp$ util/regexp/testdata$ util/xregexp$ util/bitset$ +util/checksum serial$ serial/datatool$ connect$ diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt index c74db0b5..3573d3d1 100644 --- a/c++/src/CMakeLists.txt +++ b/c++/src/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeLists.txt 575211 2018-11-26 16:41:56Z blastadm $ +# $Id: CMakeLists.txt 582157 2019-03-11 19:07:18Z blastadm $ ############################################################################# ############################################################################## diff --git a/c++/src/Makefile.in b/c++/src/Makefile.in index 595945d1..e460a48c 100644 --- a/c++/src/Makefile.in +++ b/c++/src/Makefile.in @@ -1,4 +1,4 @@ -# $Id: Makefile.in 575211 2018-11-26 16:41:56Z blastadm $ +# $Id: Makefile.in 582157 2019-03-11 19:07:18Z blastadm $ # Master (top-level) makefile for all NCBI C++ projects ################################################################## diff --git a/c++/src/algo/blast/api/blast_options_builder.cpp b/c++/src/algo/blast/api/blast_options_builder.cpp index a8831b4d..7640b835 100644 --- a/c++/src/algo/blast/api/blast_options_builder.cpp +++ b/c++/src/algo/blast/api/blast_options_builder.cpp @@ -1,4 +1,4 @@ -/* $Id: blast_options_builder.cpp 512725 2016-09-02 14:13:18Z fongah2 $ +/* $Id: blast_options_builder.cpp 577721 2019-01-08 17:58:10Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -89,6 +89,9 @@ CBlastOptionsBuilder::ComputeProgram(const string & program, p = "deltablast"; found = true; } + else if (s == "indexed") { + found = true; + } } else if (p == "blastn") { if (s == "megablast") { p = "megablast"; @@ -98,7 +101,7 @@ CBlastOptionsBuilder::ComputeProgram(const string & program, p = "vecscreen"; found = true; } - if ((s == "sra") || (s == "wgs")) { + if ((s == "sra") || (s == "wgs") || (s=="indexed")) { found = true; } } else if (p == "tblastn") { diff --git a/c++/src/algo/blast/api/blast_seqalign.cpp b/c++/src/algo/blast/api/blast_seqalign.cpp index 508a22e6..829d7a71 100644 --- a/c++/src/algo/blast/api/blast_seqalign.cpp +++ b/c++/src/algo/blast/api/blast_seqalign.cpp @@ -1,4 +1,4 @@ -/* $Id: blast_seqalign.cpp 532442 2017-04-05 13:47:44Z boratyng $ +/* $Id: blast_seqalign.cpp 580452 2019-02-14 18:01:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -395,6 +395,60 @@ s_CorrectUASequence(BlastHSP* hsp) return; } +#if _DEBUG +static void +s_ValidateExon(const CSpliced_exon& exon, const CSeq_id& product_id, + const CSeq_id& genomic_id) +{ + int product_start = exon.GetProduct_start().GetNucpos(); + int product_end = exon.GetProduct_end().GetNucpos(); + int genomic_start = exon.GetGenomic_start(); + int genomic_end = exon.GetGenomic_end(); + + int product_length = product_end - product_start + 1; + int genomic_length = genomic_end - genomic_start + 1; + + int p = 0, g = 0; + for (const auto& it : exon.GetParts()) { + switch (it->Which()) { + case CSpliced_exon_chunk::e_Match: + p += it->GetMatch(); + g += it->GetMatch(); + break; + + case CSpliced_exon_chunk::e_Mismatch: + p += it->GetMismatch(); + g += it->GetMismatch(); + break; + + case CSpliced_exon_chunk::e_Product_ins: + p += it->GetProduct_ins(); + break; + + case CSpliced_exon_chunk::e_Genomic_ins: + g += it->GetGenomic_ins(); + break; + + default: + cerr << "Urecognized exon part\t" << product_id.AsFastaString() + << "\t" << genomic_id.AsFastaString() << endl; + } + } + + if (p != product_length) { + cerr << "Product\t" << product_id.AsFastaString() << "\t" + << exon.GetProduct_start().GetNucpos() << "\t" + << product_length << "\t" << p << endl; + } + + if (g != genomic_length) { + cerr << "Genomic\t" << genomic_id.AsFastaString() << "\t" + << exon.GetGenomic_start() << "\t" + << genomic_length << "\t" << g << endl; + } + +} +#endif void MakeSplicedSeg(CSpliced_seg& spliced_seg, CRef product_id, @@ -422,7 +476,17 @@ void MakeSplicedSeg(CSpliced_seg& spliced_seg, for (HSPContainer* h = chain->hsps; h; h = h->next) { BlastHSP* hsp = h->hsp; - _ASSERT(hsp); + HSPContainer* last_h = h; + _ASSERT(hsp && last_h); + + + while (last_h->next && + (last_h->hsp->map_info->right_edge & MAPPER_SPLICE_SIGNAL) == 0) { + + last_h = last_h->next; + } + + BlastHSP* last_hsp = last_h->hsp; _ASSERT(hsp->gap_info->size > 1 || hsp->query.end - hsp->query.offset == @@ -430,17 +494,13 @@ void MakeSplicedSeg(CSpliced_seg& spliced_seg, CRef exon(new CSpliced_exon); exon->SetProduct_start().SetNucpos(hsp->query.offset); - exon->SetProduct_end().SetNucpos(hsp->query.end - 1); + exon->SetProduct_end().SetNucpos(last_hsp->query.end - 1); exon->SetGenomic_start(hsp->subject.offset); - exon->SetGenomic_end(hsp->subject.end - 1); + exon->SetGenomic_end(last_hsp->subject.end - 1); exon->SetProduct_strand(product_strand); exon->SetGenomic_strand(genomic_strand); - const JumperEditsBlock* hsp_edits = hsp->map_info->edits; - int query_pos = hsp->query.offset; - int subject_pos = hsp->subject.offset; - int num_matches = 0; // save splice signal before next exon if (hsp->map_info->left_edge & MAPPER_SPLICE_SIGNAL) { @@ -453,61 +513,99 @@ void MakeSplicedSeg(CSpliced_seg& spliced_seg, } // save splice signal after exon - if (hsp->map_info->right_edge & MAPPER_SPLICE_SIGNAL) { + if (last_hsp->map_info->right_edge & MAPPER_SPLICE_SIGNAL) { CSpliced_exon::TDonor_after_exon::TBases r_bases(2u, ' '); r_bases[0] = BLASTNA_TO_IUPACNA[ - (int)((hsp->map_info->right_edge >> 2) & 3)]; + (int)((last_hsp->map_info->right_edge >> 2) & 3)]; r_bases[1] = BLASTNA_TO_IUPACNA[ - (int)(hsp->map_info->right_edge & 3)]; + (int)(last_hsp->map_info->right_edge & 3)]; exon->SetDonor_after_exon().SetBases(r_bases); } - for (int i=0;i < hsp_edits->num_edits;i++) { - num_matches = hsp_edits->edits[i].query_pos - query_pos; - query_pos += num_matches; - subject_pos += num_matches; + + for (HSPContainer* hh=h,*prev=NULL;hh != last_h->next; + prev = hh, hh = hh->next) { + + int query_pos = hh->hsp->query.offset; + int subject_pos = hh->hsp->subject.offset; + int num_matches = 0; + + // record gaps between HSPs + if (prev) { + + _ASSERT(hh->hsp->query.offset >= prev->hsp->query.end); + _ASSERT(hh->hsp->subject.offset >= prev->hsp->subject.end); + if (hh->hsp->query.offset > prev->hsp->query.end) { + CRef chunk(new CSpliced_exon_chunk); + chunk->SetProduct_ins(hh->hsp->query.offset - + prev->hsp->query.end); + + exon->SetParts().push_back(chunk); + } + + if (hh->hsp->subject.offset > prev->hsp->subject.end) { + CRef chunk(new CSpliced_exon_chunk); + chunk->SetGenomic_ins(hh->hsp->subject.offset - + prev->hsp->subject.end); + + exon->SetParts().push_back(chunk); + } + } + + const JumperEditsBlock* hsp_edits = hh->hsp->map_info->edits; + for (int i=0;i < hsp_edits->num_edits;i++) { + num_matches = hsp_edits->edits[i].query_pos - query_pos; + query_pos += num_matches; + subject_pos += num_matches; + _ASSERT(num_matches >= 0); + if (num_matches > 0) { + // record number of matches + CRef chunk(new CSpliced_exon_chunk); + chunk->SetMatch(num_matches); + exon->SetParts().push_back(chunk); + } + + // record mismatch or gap + CRef chunk(new CSpliced_exon_chunk); + _ASSERT(hsp_edits->edits[i].query_base != kGap || + hsp_edits->edits[i].subject_base != kGap); + + if (hsp_edits->edits[i].query_base == kGap) { + chunk->SetGenomic_ins(1); + subject_pos++; + } + else if (hsp_edits->edits[i].subject_base == kGap) { + chunk->SetProduct_ins(1); + query_pos++; + } + else { + chunk->SetMismatch(1); + query_pos++; + subject_pos++; + } + + exon->SetParts().push_back(chunk); + } + + num_matches = MAX(hh->hsp->query.end - query_pos, 0); + _ASSERT(hh->hsp->query.end - query_pos >= -1); + // an HSP may end with a mismatch or a gap, if a splice signal was + // found and HSP extent was updated (mapping reads to a genome) _ASSERT(num_matches >= 0); if (num_matches > 0) { - // record number of matches CRef chunk(new CSpliced_exon_chunk); chunk->SetMatch(num_matches); exon->SetParts().push_back(chunk); } - - // record mismatch or gap - CRef chunk(new CSpliced_exon_chunk); - _ASSERT(hsp_edits->edits[i].query_base != kGap || - hsp_edits->edits[i].subject_base != kGap); - - if (hsp_edits->edits[i].query_base == kGap) { - chunk->SetGenomic_ins(1); - subject_pos++; - } - else if (hsp_edits->edits[i].subject_base == kGap) { - chunk->SetProduct_ins(1); - query_pos++; - } - else { - chunk->SetMismatch(1); - query_pos++; - subject_pos++; - } - - exon->SetParts().push_back(chunk); } - num_matches = MAX(hsp->query.end - query_pos, 0); - _ASSERT(hsp->query.end - query_pos >= -1); - // an HSP may end with a mismatch or a gap, if a splice signal was - // found and HSP extent was updated (mapping reads to a genome) - _ASSERT(num_matches >= 0); - if (num_matches > 0) { - CRef chunk(new CSpliced_exon_chunk); - chunk->SetMatch(num_matches); - exon->SetParts().push_back(chunk); - } +#if _DEBUG + s_ValidateExon(*exon, *product_id, *genomic_id); +#endif + exons.push_back(exon); + h = last_h; } #if _DEBUG @@ -1087,7 +1185,7 @@ s_BuildScoreList(const BlastHSP * hsp, // Set the E-Value double evalue = (hsp->evalue < SMALLEST_EVALUE) ? 0.0 : hsp->evalue; if (evalue >= 0.0) { - static string score_type = (hsp->num <= 1) ? "e_value" : "sum_e"; + string score_type = (hsp->num <= 1) ? "e_value" : "sum_e"; scores.push_back(s_MakeScore(score_type, evalue, 0, false)); } @@ -1477,6 +1575,10 @@ BlastHitList2SeqAlign_OMF(const BlastHitList * hit_list, TSeqPos subj_length = 0; CRef subject_id; GetSequenceLengthAndId(seqinfo_src, kOid, subject_id, &subj_length); + if(subject_id.Empty()){ + LOG_POST(Info << "No unfiltered subject id for oid " + NStr::UIntToString(kOid)); + continue; + } // Union subject sequence ranges vector ranges; @@ -1497,7 +1599,6 @@ BlastHitList2SeqAlign_OMF(const BlastHitList * hit_list, // Get SeqIds for entrez query restriction. vector seqid_list; GetFilteredRedundantSeqids(*seqinfo_src, hsp_list->oid, seqid_list, subject_id->IsGi()); - // stores a CSeq_align for each matching sequence vector > hit_align; if (is_gapped) { diff --git a/c++/src/algo/blast/api/blast_seqinfosrc_aux.cpp b/c++/src/algo/blast/api/blast_seqinfosrc_aux.cpp index f6733f20..c7c4d7c1 100644 --- a/c++/src/algo/blast/api/blast_seqinfosrc_aux.cpp +++ b/c++/src/algo/blast/api/blast_seqinfosrc_aux.cpp @@ -1,4 +1,4 @@ -/* $Id: blast_seqinfosrc_aux.cpp 520163 2016-11-23 13:38:30Z madden $ +/* $Id: blast_seqinfosrc_aux.cpp 578572 2019-01-22 18:23:26Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -59,7 +59,10 @@ void GetSequenceLengthAndId(const blast::IBlastSeqInfoSrc * seqinfo_src, CRef id = FindBestChoice(seqid_list, CSeq_id::BestRank); if (id.NotEmpty()) { seqid.Reset(new CSeq_id); - SerialAssign(*seqid, *id); + if (id->IsGi()) + SerialAssign(*seqid, *id); + else + SerialAssign(*seqid, *(seqid_list.front())); } *length = seqinfo_src->GetLength(oid); diff --git a/c++/src/algo/blast/api/blast_setup_cxx.cpp b/c++/src/algo/blast/api/blast_setup_cxx.cpp index 5aac4bbb..f3069a6d 100644 --- a/c++/src/algo/blast/api/blast_setup_cxx.cpp +++ b/c++/src/algo/blast/api/blast_setup_cxx.cpp @@ -792,6 +792,9 @@ SetupSubjects_OMF(IBlastQuerySource& subjects, subj = BlastSequenceBlkFree(subj); NCBI_RETHROW_SAME(e, e.GetMsg()); } + } catch (CException & e) { + subj = BlastSequenceBlkFree(subj); + NCBI_RETHROW_SAME(e, e.GetMsg()); } if (Blast_SubjectIsTranslated(prog)) { diff --git a/c++/src/algo/blast/api/magicblast.cpp b/c++/src/algo/blast/api/magicblast.cpp index 2927dca8..2b92f125 100644 --- a/c++/src/algo/blast/api/magicblast.cpp +++ b/c++/src/algo/blast/api/magicblast.cpp @@ -1,4 +1,4 @@ -/* $Id: magicblast.cpp 574908 2018-11-20 18:35:55Z ivanov $ +/* $Id: magicblast.cpp 580452 2019-02-14 18:01:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -227,19 +227,11 @@ static void s_ComputeBtopAndIdentity(const HSPChain* chain, int query_gap = hsp->query.offset - prev->query.end; if (query_gap > 0) { btop += (string)"_" + NStr::IntToString(query_gap) + "_"; + len += hsp->query.offset - prev->query.end; } else if (query_gap < 0) { btop += (string)"(" + NStr::IntToString(-query_gap) + ")"; } - - // gap in query on exon edge - if (hsp->query.offset > prev->query.end) { - btop += (string)"_" + - NStr::IntToString(hsp->query.offset - prev->query.end) + - "_"; - - len += hsp->query.offset - prev->query.end; - } } int query_pos = hsp->query.offset; diff --git a/c++/src/algo/blast/api/magicblast_options.cpp b/c++/src/algo/blast/api/magicblast_options.cpp index a16cd998..6cd366b8 100644 --- a/c++/src/algo/blast/api/magicblast_options.cpp +++ b/c++/src/algo/blast/api/magicblast_options.cpp @@ -1,4 +1,4 @@ -/* $Id: magicblast_options.cpp 561251 2018-04-04 18:25:26Z boratyng $ +/* $Id: magicblast_options.cpp 577781 2019-01-08 18:27:42Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -101,7 +101,6 @@ CMagicBlastOptionsHandle::SetRNAToRNADefaults() SetLookupDbFilter(false); SetSpliceAlignments(false); SetWordSize(30); - SetCutoffScore(70); m_Opts->SetDefaultsMode(false); } @@ -126,7 +125,6 @@ CMagicBlastOptionsHandle::SetGenomeToGenomeDefaults() SetLookupDbFilter(true); SetSpliceAlignments(false); SetWordSize(28); - SetCutoffScore(70); m_Opts->SetDefaultsMode(false); } @@ -204,7 +202,9 @@ CMagicBlastOptionsHandle::SetHitSavingOptionsDefaults() // set some default here, allow INT4MAX to mean infinity m_Opts->SetMaxNumHspPerSequence(0); m_Opts->SetMaxHspsPerSubject(0); - SetCutoffScore(20); + // cutoff zero means use adaptive score threshold that depends on query + // length + SetCutoffScore(0); vector coeffs = {0.0, 0.0}; SetCutoffScoreCoeffs(coeffs); SetMaxEditDistance(INT4_MAX); diff --git a/c++/src/algo/blast/api/objmgr_query_data.cpp b/c++/src/algo/blast/api/objmgr_query_data.cpp index 3f7e0f50..1b7cb1d8 100644 --- a/c++/src/algo/blast/api/objmgr_query_data.cpp +++ b/c++/src/algo/blast/api/objmgr_query_data.cpp @@ -185,21 +185,21 @@ private: const TSeqLocVector* m_Queries; ///< Adaptee in adapter design pattern CRef m_QueryVector; const CBlastOptions* m_Options; - AutoPtr m_QuerySource; + CRef m_QuerySource; }; CObjMgr_LocalQueryData::CObjMgr_LocalQueryData(TSeqLocVector * queries, const CBlastOptions * opts) : m_Queries(queries), m_Options(opts) { - m_QuerySource.reset(new CBlastQuerySourceOM(*queries, opts)); + m_QuerySource.Reset(new CBlastQuerySourceOM(*queries, opts)); } CObjMgr_LocalQueryData::CObjMgr_LocalQueryData(CBlastQueryVector & qv, const CBlastOptions * opts) : m_Queries(NULL), m_QueryVector(& qv), m_Options(opts) { - m_QuerySource.reset(new CBlastQuerySourceOM(qv, opts)); + m_QuerySource.Reset(new CBlastQuerySourceOM(qv, opts)); } BLAST_SequenceBlk* diff --git a/c++/src/algo/blast/api/prelim_stage.cpp b/c++/src/algo/blast/api/prelim_stage.cpp index d0e1a303..7e1144fd 100644 --- a/c++/src/algo/blast/api/prelim_stage.cpp +++ b/c++/src/algo/blast/api/prelim_stage.cpp @@ -66,9 +66,10 @@ CBlastPrelimSearch::CBlastPrelimSearch(CRef query_factory, m_Options(options), m_DbAdapter(NULL), m_DbInfo(&dbinfo) { BlastSeqSrc* seqsrc = CSetupFactory::CreateBlastSeqSrc(dbinfo); + CRef wrapped_src(new TBlastSeqSrc(seqsrc, BlastSeqSrcFree)); x_Init(query_factory, options, CRef(), seqsrc); - m_InternalData->m_SeqSrc.Reset(new TBlastSeqSrc(seqsrc, BlastSeqSrcFree)); + m_InternalData->m_SeqSrc = wrapped_src; } CBlastPrelimSearch::CBlastPrelimSearch(CRef query_factory, @@ -149,7 +150,7 @@ CBlastPrelimSearch::x_LaunchMultiThreadedSearch(SInternalData& internal_data) // -RMH- This appears to be a problem right now. When used...this // can cause all the work to go to a single thread! (-MN- This is fixed in SB-768) - BlastSeqSrcSetNumberOfThreads(m_InternalData->m_SeqSrc->GetPointer(), + BlastSeqSrcSetNumberOfThreads(m_InternalData->m_SeqSrc->GetPointer(), GetNumberOfThreads()); // Create the threads ... @@ -199,12 +200,12 @@ CBlastPrelimSearch::Run() if (! BlastSeqSrcGetNumSeqs(m_InternalData->m_SeqSrc->GetPointer())) { string msg = "GI or TI list filtering resulted in an empty database."; - + m_Messages.AddMessageAllQueries(eBlastSevWarning, - kBlastMessageNoContext, + kBlastMessageNoContext, msg); } - + BlastSeqSrcResetChunkIterator(m_InternalData->m_SeqSrc->GetPointer()); CEffectiveSearchSpacesMemento eff_memento(m_Options); @@ -227,9 +228,9 @@ CBlastPrelimSearch::Run() for (Uint4 i = 0; i < query_splitter->GetNumberOfChunks(); i++) { try { - CRef chunk_qf = + CRef chunk_qf = query_splitter->GetQueryFactoryForChunk(i); - _TRACE("Query chunk " << i << "/" << + _TRACE("Query chunk " << i << "/" << query_splitter->GetNumberOfChunks()); CRef chunk_data = SplitQuery_CreateChunkData(chunk_qf, m_Options, @@ -238,16 +239,16 @@ CBlastPrelimSearch::Run() CRef query_data( chunk_qf->MakeLocalQueryData( &*m_Options ) ); - BLAST_SequenceBlk * chunk_queries = + BLAST_SequenceBlk * chunk_queries = query_data->GetSequenceBlk(); GetDbIndexSetUsingThreadsFn()( IsMultiThreaded() ); - GetDbIndexRunSearchFn()( + GetDbIndexRunSearchFn()( chunk_queries, lut_options, word_options ); if (IsMultiThreaded()) { x_LaunchMultiThreadedSearch(*chunk_data); } else { - retval = + retval = CPrelimSearchRunner(*chunk_data, opts_memento.get())(); if (retval) { NCBI_THROW(CBlastException, eCoreBlastError, @@ -264,7 +265,7 @@ CBlastPrelimSearch::Run() // free this as the query_splitter keeps a reference to the // chunk factories, which in turn keep a reference to the local // query data. - query_data->FlushSequenceData(); + query_data->FlushSequenceData(); } catch (const CBlastException& e) { // This error message is safe to ignore for a given chunk, // because the chunks might end up producing a region of @@ -313,7 +314,7 @@ int CBlastPrelimSearch::CheckInternalData() { int retval = 0; - retval = BlastScoreBlkCheck(m_InternalData->m_ScoreBlk->GetPointer()); + retval = BlastScoreBlkCheck(m_InternalData->m_ScoreBlk->GetPointer()); return retval; } @@ -341,7 +342,7 @@ CBlastPrelimSearch::ComputeBlastHSPResults(BlastHSPStream* stream, hit_param, max_num_hsps, removed_hsps); - if( rm_hsps_info){ + if( rm_hsps_info){ rm_hsps_info->reserve(m_InternalData->m_QueryInfo->num_queries ); for( int query_index = 0 ; query_index < m_InternalData->m_QueryInfo->num_queries ; query_index ++ ){ (*rm_hsps_info)[ query_index ] = removed_hsps[query_index] == FALSE ? false : true; @@ -419,9 +420,10 @@ bool CBlastPrelimSearch::x_BuildStdSegList( vector > > & l IBlastSeqInfoSrc * s_seqInfoSrc = m_DbAdapter->MakeSeqInfoSrc(); EBlastProgramType program = hsp_stream->program; - BlastHSPResults * results = ComputeBlastHSPResults(hsp_stream ); + CStructWrapper results + (ComputeBlastHSPResults(hsp_stream), Blast_HSPResultsFree); - if(NULL == results) + if(NULL == results.GetPointer()) return false; int num_queries = results->num_queries; diff --git a/c++/src/algo/blast/api/rps_aux.cpp b/c++/src/algo/blast/api/rps_aux.cpp index cea4c256..ce0395b0 100644 --- a/c++/src/algo/blast/api/rps_aux.cpp +++ b/c++/src/algo/blast/api/rps_aux.cpp @@ -517,8 +517,6 @@ CBlastRPSInfo::CBlastRPSInfo(const string& rps_dbname, int flags) void CBlastRPSInfo::x_Init(const string& rps_dbname, int flags) { - m_RpsInfo = NULL; - // Obtain the full path to the database string path; try { @@ -534,15 +532,13 @@ void CBlastRPSInfo::x_Init(const string& rps_dbname, int flags) auto_ptr rps_info; // Allocate the core data structure - try { rps_info.reset(new BlastRPSInfo); } + try { m_RpsInfo.reset(new BlastRPSInfo); } catch (const bad_alloc&) { NCBI_THROW(CBlastSystemException, eOutOfMemory, "RPSInfo allocation failed"); } // Assign the pointers to the core data structure - m_RpsInfo = rps_info.release(); - m_RpsInfo->lookup_header = NULL; m_RpsInfo->profile_header = NULL; m_RpsInfo->freq_header = NULL; @@ -609,18 +605,16 @@ void CBlastRPSInfo::x_Init(const string& rps_dbname, int flags) } } +// Trivial at this point, but left out-of-line so that the header doesn't +// need to pull in full declarations of the classes to which it takes CRefs. CBlastRPSInfo::~CBlastRPSInfo() { - if ( !m_RpsInfo ) { - return; - } - delete m_RpsInfo; } const BlastRPSInfo* CBlastRPSInfo::operator()() const { - return m_RpsInfo; + return m_RpsInfo.get(); } double diff --git a/c++/src/algo/blast/api/uniform_search.cpp b/c++/src/algo/blast/api/uniform_search.cpp index 0387ddcb..7bf73a6c 100644 --- a/c++/src/algo/blast/api/uniform_search.cpp +++ b/c++/src/algo/blast/api/uniform_search.cpp @@ -274,6 +274,12 @@ CSearchDatabase::x_InitializeDb() const CRef taxIds(new CSeqDBNegativeList); taxIds->AddTaxIds(m_NegativeGiList->GetTaxIdsList()); m_SeqDb.Reset(new CSeqDB(m_DbName, seq_type, taxIds.GetPointer())); + } else if(m_NegativeGiList->GetNumPigs() > 0) { + CRef pigs(new CSeqDBNegativeList); + vector pig_list; + m_NegativeGiList->GetPigList(pig_list); + pigs->SetPigList(pig_list); + m_SeqDb.Reset(new CSeqDB(m_DbName, seq_type, pigs.GetPointer())); } } diff --git a/c++/src/algo/blast/blastinput/blast_args.cpp b/c++/src/algo/blast/blastinput/blast_args.cpp index 28061d82..618cb991 100644 --- a/c++/src/algo/blast/blastinput/blast_args.cpp +++ b/c++/src/algo/blast/blastinput/blast_args.cpp @@ -1,4 +1,4 @@ -/* $Id: blast_args.cpp 574427 2018-11-13 13:20:04Z ivanov $ +/* $Id: blast_args.cpp 581735 2019-03-05 16:42:54Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -51,6 +51,7 @@ Author: Jason Papadopoulos #include // for CInputException #include // for CSeqMaskerIstatFactory::DiscoverStatType #include +#include #include // for CPsiBlastInputClustalW #include // for CPssmEngine @@ -1432,8 +1433,13 @@ CMappingArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc) arg_desc.AddDefaultKey(kArgScore, "num", "Cutoff score for accepting " "alignments. Can be expressed as a number or a " "function of read length: " - "L,b,a for a * length + b.", - CArgDescriptions::eString, "20"); + "L,b,a for a * length + b.\n" + "Zero means that the cutoff score will be equal to:\n" + "read length, if read length <= 20,\n" + "20, if read length <= 30,\n" + "read length - 10, if read length <= 50,\n" + "40, otherwise.", + CArgDescriptions::eString, "0"); arg_desc.AddOptionalKey(kArgMaxEditDist, "num", "Cutoff edit distance for " "accepting an alignment\nDefault = unlimited", CArgDescriptions::eInteger); @@ -2053,7 +2059,8 @@ CBlastDatabaseArgs::CBlastDatabaseArgs(bool request_mol_type /* = false */, m_IsProtein(true), m_IsMapper(is_mapper), m_IsKBlast(is_kblast), - m_SupportsDatabaseMasking(false) + m_SupportsDatabaseMasking(false), + m_SupportIPGFiltering(false) {} bool @@ -2093,6 +2100,10 @@ CBlastDatabaseArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc) database_args.push_back(kArgTaxIdListFile); database_args.push_back(kArgNegativeTaxIdList); database_args.push_back(kArgNegativeTaxIdListFile); + if (m_SupportIPGFiltering) { + database_args.push_back(kArgIpgList); + database_args.push_back(kArgNegativeIpgList); + } if (m_SupportsDatabaseMasking) { database_args.push_back(kArgDbSoftMask); database_args.push_back(kArgDbHardMask); @@ -2149,7 +2160,17 @@ CBlastDatabaseArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc) "except the specified taxonomy IDs", CArgDescriptions::eString); + if (m_SupportIPGFiltering) { + arg_desc.AddOptionalKey(kArgIpgList, "filename", + "Restrict search of database to list of IPGs", + CArgDescriptions::eString); + // Negative IPG list + arg_desc.AddOptionalKey(kArgNegativeIpgList, "filename", + "Restrict search of database to everything" + " except the specified IPGs", + CArgDescriptions::eString); + } // N.B.: all restricting options are mutually exclusive const vector kBlastDBFilteringOptions = { kArgGiList, @@ -2160,7 +2181,8 @@ CBlastDatabaseArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc) kArgNegativeGiList, kArgNegativeSeqidList, kArgNegativeTaxIdList, - kArgNegativeTaxIdListFile + kArgNegativeTaxIdListFile, + }; for (size_t i = 0; i < kBlastDBFilteringOptions.size(); i++) { for (size_t j = i+1; j < kBlastDBFilteringOptions.size(); j++) { @@ -2264,7 +2286,12 @@ static void s_GetTaxIDList(const string & in, bool isFile, bool isNegativeList, set tax_ids; for(unsigned int i=0; i < ids.size(); i++) { - tax_ids.insert(NStr::StringToInt(ids[i], NStr::fAllowLeadingSpaces | NStr::fAllowTrailingSpaces)); + try { + tax_ids.insert(NStr::StringToInt(ids[i], NStr::fAllowLeadingSpaces | NStr::fAllowTrailingSpaces)); + } + catch(CException & e){ + NCBI_THROW(CInputException, eInvalidInput, "Invalid taxidlist file "); + } } CRef taxid_list(new CSeqDBGiList()); @@ -2320,6 +2347,13 @@ CBlastDatabaseArgs::ExtractAlgorithmOptions(const CArgs& args, } else if (args.Exist(kArgNegativeTaxIdListFile) && args[kArgNegativeTaxIdListFile]) { s_GetTaxIDList(args[kArgNegativeTaxIdListFile].AsString(), true, true, m_SearchDb); + } else if (args.Exist(kArgIpgList) && args[kArgIpgList]) { + string fn(SeqDB_ResolveDbPath(args[kArgIpgList].AsString())); + m_SearchDb->SetGiList(CRef (new CSeqDBFileGiList(fn, CSeqDBFileGiList::ePigList))); + } else if (args.Exist(kArgNegativeIpgList) && args[kArgNegativeIpgList]) { + string fn(SeqDB_ResolveDbPath(args[kArgNegativeIpgList].AsString())); + m_SearchDb->SetNegativeGiList(CRef (new CSeqDBFileGiList(fn, CSeqDBFileGiList::ePigList))); + } if (args.Exist(kArgEntrezQuery) && args[kArgEntrezQuery]) @@ -2417,17 +2451,25 @@ CFormattingArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc) kOutputFormatDescription += "Options 6, 7, 10 and 17 " "can be additionally configured to produce\n" - "a custom format specified by space delimited format specifiers.\n" + "a custom format specified by space delimited format specifiers,\n" + "or in the case of options 6, 7, and 10, by a token specified\n" + "by the delim keyword. E.g.: \"17 delim=@ qacc sacc score\".\n" + "The delim keyword must appear after the numeric output format\n" + "specification.\n" "The supported format specifiers for options 6, 7 and 10 are:\n"; } else { kOutputFormatDescription += "Options 6, 7 and 10 " "can be additionally configured to produce\n" - "a custom format specified by space delimited format specifiers.\n" + "a custom format specified by space delimited format specifiers,\n" + "or by a token specified by the delim keyword.\n" + " E.g.: \"17 delim=@ qacc sacc score\".\n" + "The delim keyword must appear after the numeric output format\n" + "specification.\n" "The supported format specifiers are:\n"; } - + kOutputFormatDescription += DescribeTabularOutputFormatSpecifiers() + string("\n"); if(m_FormatFlags & eIsSAM) { @@ -2494,6 +2536,35 @@ CFormattingArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc) if(!m_IsIgBlast){ arg_desc.AddFlag(kArgProduceHtml, "Produce HTML output?", true); } + + arg_desc.AddOptionalKey(kArgSortHits, "sort_hits", + "Sorting option for hits:\n" + "alignment view options:\n" + " 0 = Sort by evalue,\n" + " 1 = Sort by bit score,\n" + " 2 = Sort by total score,\n" + " 3 = Sort by percent identity,\n" + " 4 = Sort by query coverage\n" + "Not applicable for outfmt > 4\n", + CArgDescriptions::eInteger); + arg_desc.SetConstraint(kArgSortHits, + new CArgAllowValuesBetween(CAlignFormatUtil::eEvalue, + CAlignFormatUtil::eQueryCoverage, + true)); + + arg_desc.AddOptionalKey(kArgSortHSPs, "sort_hsps", + "Sorting option for hps:\n" + " 0 = Sort by hsp evalue,\n" + " 1 = Sort by hsp score,\n" + " 2 = Sort by hsp query start,\n" + " 3 = Sort by hsp percent identity,\n" + " 4 = Sort by hsp subject start\n" + "Not applicable for outfmt != 0\n", + CArgDescriptions::eInteger); + arg_desc.SetConstraint(kArgSortHSPs, + new CArgAllowValuesBetween(CAlignFormatUtil::eHspEvalue, + CAlignFormatUtil::eSubjectStart, + true)); /// Hit list size, listed here for convenience only arg_desc.SetCurrentGroup("Restrict search or results"); @@ -2518,26 +2589,87 @@ bool CFormattingArgs::ArchiveFormatRequested(const CArgs& args) const { EOutputFormat output_fmt; - string ignore; - ParseFormattingString(args, output_fmt, ignore); + string ignore1, ignore2; + ParseFormattingString(args, output_fmt, ignore1, ignore2); return (output_fmt == eArchiveFormat ? true : false); } + +static void s_ValidateCustomDelim(string custom_fmt_spec,string customDelim) +{ + bool error = false; + string checkfield; + custom_fmt_spec = NStr::TruncateSpaces(custom_fmt_spec); + if(custom_fmt_spec.empty()) return; + + //Check if delim is already used + const string kFieldsWithSemicolSeparator = "sallseqid staxids sscinames scomnames sblastnames sskingdoms";//sep = ";" + const string kFramesField = "frames"; //sep = "/" + const string kAllTitlesField ="salltitles"; //sep = "<>"" + + if(customDelim == ";") { + vector tokens; + NStr::Split(kFieldsWithSemicolSeparator," ", tokens); + for(size_t i = 0; i < tokens.size(); i++) { + if(NStr::Find(custom_fmt_spec,tokens[i]) != NPOS) { + checkfield = tokens[i]; + error = true; + break; + } + } + } + else { + if(customDelim == "/") { + checkfield = kFramesField; + } + else if(customDelim == "<>") { + checkfield = kAllTitlesField; + } + if(!checkfield.empty() && NStr::Find(custom_fmt_spec,checkfield) != NPOS) { + error = true; + } + } + + if(error) { + string msg("Your custom record separator (" + customDelim + ") is also used by the format specifier (" + checkfield + + ") to separate multiple entries. Please use a different record separator (delim keyword)."); + NCBI_THROW(CInputException, eInvalidInput, msg); + } +} + void CFormattingArgs::ParseFormattingString(const CArgs& args, EOutputFormat& fmt_type, - string& custom_fmt_spec) const + string& custom_fmt_spec, + string& custom_delim) const { custom_fmt_spec.clear(); if (args[kArgOutputFormat]) { string fmt_choice = NStr::TruncateSpaces(args[kArgOutputFormat].AsString()); - string::size_type pos; + string::size_type pos; if ( (pos = fmt_choice.find_first_of(' ')) != string::npos) { custom_fmt_spec.assign(fmt_choice, pos+1, fmt_choice.size()-(pos+1)); fmt_choice.erase(pos); - } + } + if(!custom_fmt_spec.empty()) { + if(NStr::StartsWith(custom_fmt_spec, "delim")) { + vector tokens; + NStr::Split(custom_fmt_spec," ",tokens); + if(tokens.size() > 0) { + string tag; + bool isValid = NStr::SplitInTwo(tokens[0],"=",tag,custom_delim); + if(!isValid) { + string msg("Delimiter format is invalid. Valid format is delim="); + NCBI_THROW(CInputException, eInvalidInput, msg); + } + else { + custom_fmt_spec = NStr::Replace(custom_fmt_spec,tokens[0],""); + } + } + } + } int val = 0; try { val = NStr::StringToInt(fmt_choice); } catch (const CStringException&) { // probably a conversion error @@ -2561,7 +2693,7 @@ CFormattingArgs::ParseFormattingString(const CArgs& args, fmt_type == eSAM) ) { custom_fmt_spec.clear(); } - } + } } @@ -2569,7 +2701,7 @@ void CFormattingArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt) { - ParseFormattingString(args, m_OutputFormat, m_CustomOutputFormatSpec); + ParseFormattingString(args, m_OutputFormat, m_CustomOutputFormatSpec,m_CustomDelim); if((m_OutputFormat == eSAM) && !(m_FormatFlags & eIsSAM) ){ NCBI_THROW(CInputException, eInvalidInput, "SAM format is only applicable to blastn" ); @@ -2578,7 +2710,7 @@ CFormattingArgs::ExtractAlgorithmOptions(const CArgs& args, NCBI_THROW(CInputException, eInvalidInput, "AIRR rearrangement format is only applicable to igblastn" ); } - + s_ValidateCustomDelim(m_CustomOutputFormatSpec,m_CustomDelim); m_ShowGis = static_cast(args[kArgShowGIs]); if(m_IsIgBlast){ m_Html = false; @@ -2633,6 +2765,10 @@ CFormattingArgs::ExtractAlgorithmOptions(const CArgs& args, if (args[kArgLineLength]) { m_LineLength = args[kArgLineLength].AsInteger(); } + if(args[kArgSortHits]) + { + m_HitsSortOption = args[kArgSortHits].AsInteger(); + } } else { @@ -2656,13 +2792,27 @@ CFormattingArgs::ExtractAlgorithmOptions(const CArgs& args, m_NumDescriptions = hitlist_size; m_NumAlignments = hitlist_size; - } + if(args[kArgSortHits]) { + ERR_POST(Warning << "The parameter -sorthits is ignored for output formats > 4."); + } + } + if(hitlist_size < 5){ ERR_POST(Warning << "Examining 5 or more matches is recommended"); } opt.SetHitlistSize(hitlist_size); + if(args[kArgSortHSPs]) + { + int hspsSortOption = args[kArgSortHSPs].AsInteger(); + if(m_OutputFormat == ePairwise) { + m_HspsSortOption = hspsSortOption; + } + else { + ERR_POST(Warning << "The parameter -sorthsps is ignored for output formats != 0."); + } + } return; } diff --git a/c++/src/algo/blast/blastinput/blast_fasta_input.cpp b/c++/src/algo/blast/blastinput/blast_fasta_input.cpp index 26ea43d1..5c38d62e 100644 --- a/c++/src/algo/blast/blastinput/blast_fasta_input.cpp +++ b/c++/src/algo/blast/blastinput/blast_fasta_input.cpp @@ -1,4 +1,4 @@ -/* $Id: blast_fasta_input.cpp 573648 2018-10-30 18:13:44Z ivanov $ +/* $Id: blast_fasta_input.cpp 581158 2019-02-26 13:07:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -864,6 +864,7 @@ CShortReadFastaInputSource::x_ReadFastqOneSeq(CRef line_reader) CTempString line; string defline_id; CRef retval; + bool empty_sequence = false; // first read defline ++(*line_reader); @@ -911,20 +912,31 @@ CShortReadFastaInputSource::x_ReadFastqOneSeq(CRef line_reader) } bioseq.SetInst().SetMol(CSeq_inst::eMol_na); bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw); - bioseq.SetInst().SetLength(line.length()); - bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(line.data())); + // + read instead of a sequence means that the sequence is empty and + // we reached the second defline + if (line[0] == '+') { + bioseq.SetInst().SetLength(0); + bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna("")); + empty_sequence = true; + } + else { + bioseq.SetInst().SetLength(line.length()); + bioseq.SetInst().SetSeq_data().SetIupacna(CIUPACna(line.data())); + m_BasesAdded += line.length(); + } - m_BasesAdded += line.length(); retval = seq_entry; } - // read and skip second defline - ++(*line_reader); - line = **line_reader; - // skip empty lines - while (!line_reader->AtEOF() && line.empty()) { + if (!empty_sequence) { + // read and skip second defline ++(*line_reader); line = **line_reader; + // skip empty lines + while (!line_reader->AtEOF() && line.empty()) { + ++(*line_reader); + line = **line_reader; + } } if (line[0] != '+') { @@ -933,13 +945,15 @@ CShortReadFastaInputSource::x_ReadFastqOneSeq(CRef line_reader) NStr::IntToString(line_reader->GetLineNumber())); } - // read and skip quality scores - ++(*line_reader); - line = **line_reader; - // skip empty lines - while (!line_reader->AtEOF() && line.empty()) { + if (!empty_sequence) { + // read and skip quality scores ++(*line_reader); line = **line_reader; + // skip empty lines + while (!line_reader->AtEOF() && line.empty()) { + ++(*line_reader); + line = **line_reader; + } } return retval; diff --git a/c++/src/algo/blast/blastinput/blastp_args.cpp b/c++/src/algo/blast/blastinput/blastp_args.cpp index 4087cfec..0aaa6a69 100644 --- a/c++/src/algo/blast/blastinput/blastp_args.cpp +++ b/c++/src/algo/blast/blastinput/blastp_args.cpp @@ -1,4 +1,4 @@ -/* $Id: blastp_args.cpp 565102 2018-06-06 17:25:02Z rackerst $ +/* $Id: blastp_args.cpp 579216 2019-01-31 16:18:17Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -60,6 +60,7 @@ CBlastpAppArgs::CBlastpAppArgs() m_BlastDbArgs.Reset(new CBlastDatabaseArgs); m_BlastDbArgs->SetDatabaseMaskingSupport(true); + m_BlastDbArgs->SetIPGFilteringSupport(true); arg.Reset(m_BlastDbArgs); m_Args.push_back(arg); diff --git a/c++/src/algo/blast/blastinput/blastx_args.cpp b/c++/src/algo/blast/blastinput/blastx_args.cpp index 9f67704f..c32cf636 100644 --- a/c++/src/algo/blast/blastinput/blastx_args.cpp +++ b/c++/src/algo/blast/blastinput/blastx_args.cpp @@ -1,4 +1,4 @@ -/* $Id: blastx_args.cpp 500404 2016-05-04 14:59:01Z camacho $ +/* $Id: blastx_args.cpp 579216 2019-01-31 16:18:17Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -61,6 +61,7 @@ CBlastxAppArgs::CBlastxAppArgs() m_BlastDbArgs.Reset(new CBlastDatabaseArgs); m_BlastDbArgs->SetDatabaseMaskingSupport(true); + m_BlastDbArgs->SetIPGFilteringSupport(true); arg.Reset(m_BlastDbArgs); m_Args.push_back(arg); diff --git a/c++/src/algo/blast/blastinput/cmdline_flags.cpp b/c++/src/algo/blast/blastinput/cmdline_flags.cpp index 4fbb1349..a29318f8 100644 --- a/c++/src/algo/blast/blastinput/cmdline_flags.cpp +++ b/c++/src/algo/blast/blastinput/cmdline_flags.cpp @@ -1,4 +1,4 @@ -/* $Id: cmdline_flags.cpp 562577 2018-04-24 15:51:23Z fongah2 $ +/* $Id: cmdline_flags.cpp 579216 2019-01-31 16:18:17Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -62,6 +62,10 @@ const string kArgNegativeSeqidList("negative_seqidlist"); const string kArgDbSoftMask("db_soft_mask"); const string kArgDbHardMask("db_hard_mask"); +const string kArgIpgList("ipglist"); +const string kArgNegativeIpgList("negative_ipglist"); + + const string kTask("task"); const string kArgQueryGeneticCode("query_gencode"); diff --git a/c++/src/algo/blast/blastinput/magicblast_args.cpp b/c++/src/algo/blast/blastinput/magicblast_args.cpp index 3c1facdf..d331952e 100644 --- a/c++/src/algo/blast/blastinput/magicblast_args.cpp +++ b/c++/src/algo/blast/blastinput/magicblast_args.cpp @@ -1,4 +1,4 @@ -/* $Id: magicblast_args.cpp 552531 2017-12-04 20:45:56Z boratyng $ +/* $Id: magicblast_args.cpp 579714 2019-02-05 16:52:42Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -172,6 +172,34 @@ public: } }; + +/// MT args that allow multiple threads with a FASTA subject +class CMapperMTArgs : public CMTArgs +{ +public: + virtual void ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& /* opts */) { + const int kMaxValue = static_cast(GetCpuCount()); + + if (args.Exist(kArgNumThreads) && + args[kArgNumThreads].HasValue()) { + + // use the minimum of the two: user requested number of threads and + // number of available CPUs for number of threads + int num_threads = args[kArgNumThreads].AsInteger(); + if (num_threads > kMaxValue) { + m_NumThreads = kMaxValue; + + ERR_POST(Warning << (string)"Number of threads was reduced to " + + NStr::IntToString((unsigned int)m_NumThreads) + + " to match the number of available CPUs"); + } + else { + m_NumThreads = num_threads; + } + } + } +}; + CMagicBlastAppArgs::CMagicBlastAppArgs() { // remove search strategy args added in parent class constructor @@ -208,7 +236,7 @@ CMagicBlastAppArgs::CMagicBlastAppArgs() ); m_Args.push_back(arg); - m_MTArgs.Reset(new CMTArgs); + m_MTArgs.Reset(new CMapperMTArgs); arg.Reset(m_MTArgs); m_Args.push_back(arg); diff --git a/c++/src/algo/blast/blastinput/psiblast_args.cpp b/c++/src/algo/blast/blastinput/psiblast_args.cpp index df34d0fb..6d9bf81d 100644 --- a/c++/src/algo/blast/blastinput/psiblast_args.cpp +++ b/c++/src/algo/blast/blastinput/psiblast_args.cpp @@ -1,4 +1,4 @@ -/* $Id: psiblast_args.cpp 516751 2016-10-17 19:04:46Z camacho $ +/* $Id: psiblast_args.cpp 579216 2019-01-31 16:18:17Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -56,6 +56,7 @@ CPsiBlastAppArgs::CPsiBlastAppArgs() SetTask(kDefaultTask); m_BlastDbArgs.Reset(new CBlastDatabaseArgs); + m_BlastDbArgs->SetIPGFilteringSupport(true); arg.Reset(m_BlastDbArgs); m_Args.push_back(arg); diff --git a/c++/src/algo/blast/blastinput/unit_test/blast_scope_src_unit_test.cpp b/c++/src/algo/blast/blastinput/unit_test/blast_scope_src_unit_test.cpp index d22f1e9a..5e0c995d 100644 --- a/c++/src/algo/blast/blastinput/unit_test/blast_scope_src_unit_test.cpp +++ b/c++/src/algo/blast/blastinput/unit_test/blast_scope_src_unit_test.cpp @@ -1,4 +1,4 @@ -/* $Id: blast_scope_src_unit_test.cpp 534442 2017-04-27 12:27:34Z ivanov $ +/* $Id: blast_scope_src_unit_test.cpp 581749 2019-03-05 16:48:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -414,5 +414,27 @@ BOOST_AUTO_TEST_CASE(ForceRemoteBlastDbLoader) { scope_source.GetBlastDbLoaderName()); } +BOOST_AUTO_TEST_CASE(RetrieveSeqUsingPDBIds) +{ + const string dbname="data/pdb_test"; + CAutoNcbiConfigFile afc; + afc.SetProteinBlastDbDataLoader(dbname); + SDataLoaderConfig dlconfig(dbname, true); + dlconfig.m_UseGenbank = false; + BOOST_CHECK_EQUAL(dlconfig.m_BlastDbName, dbname); + CBlastScopeSourceWrapper scope_source(dlconfig); + CRef scope = scope_source.NewScope(); + + CNcbiIfstream ids_file("data/test.pdb_ids"); + string line; + while (getline(ids_file, line)) { + vector d; + NStr::Split(line, " ", d); + const CSeq_id seqid(d[0]); + TSeqPos length = sequence::GetLength(seqid, scope); + BOOST_CHECK_EQUAL(NStr::StringToInt(d[1]), length); + } +} + BOOST_AUTO_TEST_SUITE_END() #endif /* SKIP_DOXYGEN_PROCESSING */ diff --git a/c++/src/algo/blast/blastinput/unit_test/blastinput_unit_test.cpp b/c++/src/algo/blast/blastinput/unit_test/blastinput_unit_test.cpp index 2efa4491..8888c355 100644 --- a/c++/src/algo/blast/blastinput/unit_test/blastinput_unit_test.cpp +++ b/c++/src/algo/blast/blastinput/unit_test/blastinput_unit_test.cpp @@ -1,4 +1,4 @@ -/* $Id: blastinput_unit_test.cpp 548810 2017-10-18 13:38:41Z ivanov $ +/* $Id: blastinput_unit_test.cpp 582148 2019-03-11 18:02:00Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2518,7 +2518,7 @@ BOOST_AUTO_TEST_CASE(ReadSinglePdb) { string pdb_mol("1QCF"); string pdb_chain("A"); - string pdb(pdb_mol + pdb_chain); + string pdb(pdb_mol + '_' + pdb_chain); istringstream instream(pdb); const bool is_protein(true); @@ -2655,7 +2655,7 @@ BOOST_AUTO_TEST_CASE(ReadSinglePdb_InDifferentFormats) string pdb; if (i == 0) { - pdb.assign(pdb_mol + pdb_chain); + pdb.assign(pdb_mol + '|' + pdb_chain); } else { pdb.assign(pdb_mol + "_" + pdb_chain); } diff --git a/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pdb b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pdb new file mode 100644 index 00000000..92d533e8 Binary files /dev/null and b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pdb differ diff --git a/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.phr b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.phr new file mode 100644 index 00000000..36519a80 Binary files /dev/null and b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.phr differ diff --git a/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pin b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pin new file mode 100644 index 00000000..10dc5e12 Binary files /dev/null and b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pin differ diff --git a/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pog b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pog new file mode 100644 index 00000000..b4de78fe Binary files /dev/null and b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pog differ diff --git a/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pos b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pos new file mode 100644 index 00000000..2ec41e58 Binary files /dev/null and b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pos differ diff --git a/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pot b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pot new file mode 100644 index 00000000..7e6b4599 Binary files /dev/null and b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pot differ diff --git a/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.psq b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.psq new file mode 100644 index 00000000..558437e1 Binary files /dev/null and b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.psq differ diff --git a/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.ptf b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.ptf new file mode 100644 index 00000000..5d87ceb9 Binary files /dev/null and b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.ptf differ diff --git a/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pto b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pto new file mode 100644 index 00000000..1e95ccc1 Binary files /dev/null and b/c++/src/algo/blast/blastinput/unit_test/data/pdb_test.pto differ diff --git a/c++/src/algo/blast/blastinput/unit_test/data/test.pdb_ids b/c++/src/algo/blast/blastinput/unit_test/data/test.pdb_ids new file mode 100644 index 00000000..15dcf69c --- /dev/null +++ b/c++/src/algo/blast/blastinput/unit_test/data/test.pdb_ids @@ -0,0 +1,6 @@ +4WZJ_AA 125 +4WZJ_EEE 92 +4WZJ_JJJJ 118 +5AJ4_AB 220 +5AJ4_Ab 190 +5AJ4_B8 188 diff --git a/c++/src/algo/blast/core/blast_engine.c b/c++/src/algo/blast/core/blast_engine.c index cf6811eb..2e2476f5 100644 --- a/c++/src/algo/blast/core/blast_engine.c +++ b/c++/src/algo/blast/core/blast_engine.c @@ -1,4 +1,4 @@ -/* $Id: blast_engine.c 574995 2018-11-21 15:39:18Z ivanov $ +/* $Id: blast_engine.c 581728 2019-03-05 16:37:30Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -79,9 +79,9 @@ #define CONV_NUCL2PROT_COORDINATES(length) (length) / CODON_LENGTH NCBI_XBLAST_EXPORT const int kBlastMajorVersion = 2; -NCBI_XBLAST_EXPORT const int kBlastMinorVersion = 8; -NCBI_XBLAST_EXPORT const int kBlastPatchVersion = 1; -NCBI_XBLAST_EXPORT const char* kBlastReleaseDate = "December-17-2018"; +NCBI_XBLAST_EXPORT const int kBlastMinorVersion = 9; +NCBI_XBLAST_EXPORT const int kBlastPatchVersion = 0; +NCBI_XBLAST_EXPORT const char* kBlastReleaseDate = "April-02-2019"; /** Structure to be passed to s_BlastSearchEngineCore, containing pointers to various preallocated structures and arrays. */ @@ -1800,8 +1800,11 @@ Blast_RunPreliminarySearchWithInterrupt(EBlastProgramType program, BLAST_GapAlignSetUp(program, seq_src, score_options, eff_len_options, ext_options, hit_options, query_info, sbp, &score_params, &ext_params, - &hit_params, &eff_len_params, &gap_align)) != 0) + &hit_params, &eff_len_params, &gap_align)) != 0) { + /* Blast_DiagnosticsUpdate(diagnostics, local_diagnostics); */ + Blast_DiagnosticsFree(local_diagnostics); return status; + } if ((status= BLAST_PreliminarySearchEngine(program, query, query_info, @@ -1810,8 +1813,16 @@ Blast_RunPreliminarySearchWithInterrupt(EBlastProgramType program, ext_params, hit_params, eff_len_params, psi_options, db_options, hsp_stream, local_diagnostics, interrupt_search, - progress_info)) != 0) + progress_info)) != 0) { + gap_align = BLAST_GapAlignStructFree(gap_align); + score_params = BlastScoringParametersFree(score_params); + hit_params = BlastHitSavingParametersFree(hit_params); + ext_params = BlastExtensionParametersFree(ext_params); + eff_len_params = BlastEffectiveLengthsParametersFree(eff_len_params); + /* Blast_DiagnosticsUpdate(diagnostics, local_diagnostics); */ + Blast_DiagnosticsFree(local_diagnostics); return status; + } /* Do not destruct score block here */ gap_align->sbp = NULL; diff --git a/c++/src/algo/blast/core/blast_gapalign.c b/c++/src/algo/blast/core/blast_gapalign.c index 37689274..c1338c21 100644 --- a/c++/src/algo/blast/core/blast_gapalign.c +++ b/c++/src/algo/blast/core/blast_gapalign.c @@ -1,4 +1,4 @@ -/* $Id: blast_gapalign.c 573136 2018-10-23 19:27:19Z fukanchi $ +/* $Id: blast_gapalign.c 581785 2019-03-05 19:21:30Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2795,6 +2795,11 @@ BLAST_GreedyGappedAlignment(const Uint1* query, const Uint1* subject, score_params->gap_open, score_params->gap_extend, &q_ext_r, &s_ext_r, gap_align->greedy_align_mem, fwd_prelim_tback, rem, fence_hit, &fwd_start_point); + + if(fence_hit && *fence_hit) { + return -1; + } + if (score >=0) break; /* double the max distance */ diff --git a/c++/src/algo/blast/core/blast_kappa.c b/c++/src/algo/blast/core/blast_kappa.c index 19378334..7abbe247 100644 --- a/c++/src/algo/blast/core/blast_kappa.c +++ b/c++/src/algo/blast/core/blast_kappa.c @@ -1,4 +1,4 @@ -/* $Id: blast_kappa.c 573792 2018-11-01 15:47:14Z ivanov $ +/* $Id: blast_kappa.c 579190 2019-01-31 13:23:44Z ivanov $ * ========================================================================== * * PUBLIC DOMAIN NOTICE @@ -3831,8 +3831,6 @@ function_cleanup: sfree(thread_data->tld[i]->results->hitlist_array); sfree(thread_data->tld[i]->results); thread_data->tld[i] = SThreadLocalDataFree(thread_data->tld[i]); - results_tld[i] = Blast_HSPResultsFree(results_tld[i]); - s_FreeBlastCompo_QueryInfoArray(&query_info_tld[i], numContexts); } sfree(thread_data->tld); sfree(thread_data); @@ -3866,6 +3864,8 @@ function_cleanup: Blast_CompositionWorkspaceFree(&NRrecord_tld[i]); s_SavedParametersFree(&savedParams_tld[i]); BlastSeqSrcFree(seqsrc_tld[i]); + results_tld[i] = Blast_HSPResultsFree(results_tld[i]); + s_FreeBlastCompo_QueryInfoArray(&query_info_tld[i], numContexts); } sfree(alignments_tld); sfree(compositionTestIndex_tld); diff --git a/c++/src/algo/blast/core/blast_psi.c b/c++/src/algo/blast/core/blast_psi.c index 58dd59a8..bb69d35f 100644 --- a/c++/src/algo/blast/core/blast_psi.c +++ b/c++/src/algo/blast/core/blast_psi.c @@ -742,6 +742,18 @@ PSIDiagnosticsResponseFree(PSIDiagnosticsResponse* diags) sfree(diags->gapless_column_weights); } + if (diags->sigma) { + sfree(diags->sigma); + } + + if (diags->interval_sizes) { + sfree(diags->interval_sizes); + } + + if (diags->num_matching_seqs) { + sfree(diags->num_matching_seqs); + } + if (diags->independent_observations) { sfree(diags->independent_observations); } diff --git a/c++/src/algo/blast/core/greedy_align.c b/c++/src/algo/blast/core/greedy_align.c index f137c8c6..076c8977 100644 --- a/c++/src/algo/blast/core/greedy_align.c +++ b/c++/src/algo/blast/core/greedy_align.c @@ -1,4 +1,4 @@ -/* $Id: greedy_align.c 567051 2018-07-12 13:09:32Z fongah2 $ +/* $Id: greedy_align.c 581785 2019-03-05 19:21:30Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -877,6 +877,11 @@ Int4 BLAST_AffineGreedyAlign (const Uint1* seq1, Int4 len1, index = s_FindFirstMismatch(seq1, seq2, len1, len2, 0, 0, fence_hit, reverse, rem); + if (fence_hit && *fence_hit) { + return -1; + } + + /* update the extents of the alignment, and bail out early if no further work is needed */ @@ -1067,6 +1072,11 @@ Int4 BLAST_AffineGreedyAlign (const Uint1* seq1, Int4 len1, seq1_index, seq2_index, fence_hit, reverse, rem); + if (fence_hit && *fence_hit) { + return -1; + } + + if (index > longest_match_run) { seed->start_q = seq1_index; seed->start_s = seq2_index; diff --git a/c++/src/algo/blast/core/hspfilter_mapper.c b/c++/src/algo/blast/core/hspfilter_mapper.c index 8077ba19..9ae4b180 100644 --- a/c++/src/algo/blast/core/hspfilter_mapper.c +++ b/c++/src/algo/blast/core/hspfilter_mapper.c @@ -1,4 +1,4 @@ -/* $Id: hspfilter_mapper.c 561265 2018-04-04 19:38:40Z boratyng $ +/* $Id: hspfilter_mapper.c 580452 2019-02-14 18:01:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -358,6 +358,16 @@ static Int4 s_FindFragmentEnd(HSPChain* chain) } */ +static Int4 s_ComputeGapScore(Int4 length, Int4 open_score, Int4 extend_score, + Int4 seq_error) +{ + if (length < 4) { + return length * seq_error; + } + + return open_score + MIN(length, 4) * extend_score; +} + /* Compute HSP alignment score from Jumper edit script */ static Int4 s_ComputeAlignmentScore(BlastHSP* hsp, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score) @@ -367,6 +377,8 @@ static Int4 s_ComputeAlignmentScore(BlastHSP* hsp, Int4 mismatch_score, Int4 score = 0; const Int4 kGap = 15; Int4 num_identical = 0; + Int4 query_gap = 0; + Int4 subject_gap = 0; for (i = 0;i < hsp->map_info->edits->num_edits;i++) { JumperEdit* e = &(hsp->map_info->edits->edits[i]); @@ -378,30 +390,46 @@ static Int4 s_ComputeAlignmentScore(BlastHSP* hsp, Int4 mismatch_score, if (e->query_base == kGap) { ASSERT(e->subject_base != kGap); - if (num_matches > 0 || - (i > 0 && hsp->map_info->edits->edits[i - 1].query_base != - kGap)) { + query_gap++; - score += gap_open_score; + if (subject_gap > 0) { + score += s_ComputeGapScore(subject_gap, -12, -1, -4); + subject_gap = 0; } - score += gap_extend_score; } else if (e->subject_base == kGap) { - if (num_matches > 0 || - (i > 0 && hsp->map_info->edits->edits[i - 1].subject_base != - kGap)) { + subject_gap++; + last_pos++; - score += gap_open_score; + if (query_gap > 0) { + score += s_ComputeGapScore(query_gap, -12, -1, -4); + query_gap = 0; } - score += gap_extend_score; - last_pos++; } else { score += mismatch_score; last_pos++; + + if (subject_gap > 0) { + score += s_ComputeGapScore(subject_gap, -12, -1, -4); + subject_gap = 0; + } + if (query_gap > 0) { + score += s_ComputeGapScore(query_gap, -12, -1, -4); + query_gap = 0; + } } } + if (subject_gap > 0) { + score += s_ComputeGapScore(subject_gap, -12, -1, -4); + subject_gap = 0; + } + if (query_gap > 0) { + score += s_ComputeGapScore(query_gap, -12, -1, -4); + query_gap = 0; + } + score += hsp->query.end - last_pos; num_identical += hsp->query.end - last_pos; hsp->num_ident = num_identical; @@ -487,16 +515,6 @@ static Int4 s_ComputeChainScore(HSPChain* chain, score_options->gap_extend); } retval = h->hsp->score; - if (h->hsp->query.offset > 0 && - (h->hsp->map_info->left_edge & MAPPER_SPLICE_SIGNAL) == 0) { - - retval += score_options->no_splice_signal; - } - if (h->hsp->query.end < query_len && - (h->hsp->map_info->right_edge & MAPPER_SPLICE_SIGNAL) == 0) { - - retval += score_options->no_splice_signal; - } prev = h; h = h->next; @@ -514,16 +532,19 @@ static Int4 s_ComputeChainScore(HSPChain* chain, } retval += h->hsp->score; - if (h->hsp->query.offset > 0 && - (h->hsp->map_info->left_edge & MAPPER_SPLICE_SIGNAL) == 0) { + if ((h->hsp->map_info->left_edge & MAPPER_SPLICE_SIGNAL) == 0 || + (prev->hsp->map_info->right_edge & MAPPER_SPLICE_SIGNAL) == 0) { - retval += score_options->no_splice_signal; - } - if (h->hsp->query.end < query_len && - (h->hsp->map_info->right_edge & MAPPER_SPLICE_SIGNAL) == 0) { + Int4 query_gap = + MAX(h->hsp->query.offset - prev->hsp->query.end, 0); - retval += score_options->no_splice_signal; + Int4 subject_gap = + MAX(h->hsp->subject.offset - prev->hsp->subject.end, 0); + + retval += s_ComputeGapScore(query_gap, -12, -1, -4); + retval += s_ComputeGapScore(subject_gap, -12, -1, -4); } + } return retval; @@ -534,16 +555,26 @@ static Boolean s_TestHSPRanges(const BlastHSP* hsp) { Int4 i; Int4 d = 0; + Int4 q = 0, s = 0; + Int4 num_matches; + Int4 last_pos; + const Int4 kGap = 15; for (i=0;i < hsp->gap_info->size;i++) { switch (hsp->gap_info->op_type[i]) { case eGapAlignIns: d -= hsp->gap_info->num[i]; + q += hsp->gap_info->num[i]; break; case eGapAlignDel: d += hsp->gap_info->num[i]; + s += hsp->gap_info->num[i]; break; + case eGapAlignSub: + q += hsp->gap_info->num[i]; + s += hsp->gap_info->num[i]; + default: break; } @@ -554,6 +585,57 @@ static Boolean s_TestHSPRanges(const BlastHSP* hsp) return FALSE; } + ASSERT(hsp->query.end - hsp->query.offset == q); + ASSERT(hsp->subject.end - hsp->subject.offset == s); + + d = 0; + q = 0; + s = 0; + last_pos = hsp->query.offset; + for (i=0;i < hsp->map_info->edits->num_edits;i++) { + + num_matches = hsp->map_info->edits->edits[i].query_pos - last_pos - 1; + if (i == 0 || + (i > 0 && hsp->map_info->edits->edits[i - 1].query_base == kGap)) { + num_matches++; + } + q += num_matches; + s += num_matches; + + ASSERT(hsp->query.offset + q == + hsp->map_info->edits->edits[i].query_pos); + + if (hsp->map_info->edits->edits[i].query_base == kGap) { + d++; + s++; + } + else if (hsp->map_info->edits->edits[i].subject_base == kGap) { + d--; + q++; + } + else { + q++; + s++; + } + + last_pos = hsp->map_info->edits->edits[i].query_pos; + } + num_matches = hsp->query.end - last_pos - 1; + if (hsp->map_info->edits->num_edits == 0 || + (hsp->map_info->edits->num_edits > 0 && + hsp->map_info->edits->edits[hsp->map_info->edits->num_edits - 1].query_base == kGap)) { + + num_matches++; + } + q += num_matches; + s += num_matches; + + ASSERT(hsp->query.end - hsp->query.offset + d == + hsp->subject.end - hsp->subject.offset); + + ASSERT(hsp->query.end - hsp->query.offset == q); + ASSERT(hsp->subject.end - hsp->subject.offset == s); + return TRUE; } #endif @@ -1314,6 +1396,7 @@ static BlastHSP* s_MergeHSPs(const BlastHSP* first, const BlastHSP* second, Int4 gap_info_size; Int4 edits_size; Int4 k; + const Uint1 kGap = 15; if (!first || !second || !query || !score_opts) { return NULL; @@ -1324,20 +1407,25 @@ static BlastHSP* s_MergeHSPs(const BlastHSP* first, const BlastHSP* second, return NULL; } - query_gap = second->query.offset - first->query.end; - subject_gap = second->subject.offset - first->subject.end; + query_gap = second->subject.offset - first->subject.end; + subject_gap = second->query.offset - first->query.end; - if (query_gap < 0 || subject_gap < 0 || query_gap > 1 || - query_gap != subject_gap) { + if (query_gap < 0 || subject_gap < 0) { return NULL; } - gap_info_size = first->gap_info->size + second->gap_info->size + - MAX(query_gap, subject_gap); + if (MAX(query_gap, subject_gap) < 4) { + mismatches = MIN(query_gap, subject_gap); + query_gap -= mismatches; + subject_gap -= mismatches; + } + + gap_info_size = first->gap_info->size + second->gap_info->size + 3; edits_size = first->map_info->edits->num_edits + - second->map_info->edits->num_edits + MAX(query_gap, subject_gap); + second->map_info->edits->num_edits + + mismatches + query_gap + subject_gap; /* FIXME: should be done through an API */ /* reallocate memory for edit scripts */ @@ -1361,10 +1449,6 @@ static BlastHSP* s_MergeHSPs(const BlastHSP* first, const BlastHSP* second, return NULL; } - if (query_gap == subject_gap) { - mismatches = query_gap; - } - /* add mismatches to gap_info */ if (mismatches > 0) { if (merged_hsp->gap_info->op_type[merged_hsp->gap_info->size - 1] @@ -1382,6 +1466,43 @@ static BlastHSP* s_MergeHSPs(const BlastHSP* first, const BlastHSP* second, ASSERT(merged_hsp->gap_info->size <= gap_info_size); } + + /* add query gap to gap info */ + if (query_gap > 0) { + if (merged_hsp->gap_info->op_type[merged_hsp->gap_info->size - 1] + == eGapAlignDel) { + + merged_hsp->gap_info->num[merged_hsp->gap_info->size - 1] += + query_gap; + } + else { + merged_hsp->gap_info->op_type[merged_hsp->gap_info->size] = + eGapAlignDel; + merged_hsp->gap_info->num[merged_hsp->gap_info->size] = + query_gap; + merged_hsp->gap_info->size++; + } + ASSERT(merged_hsp->gap_info->size <= gap_info_size); + } + + /* add subject gap to gap info */ + if (subject_gap > 0) { + if (merged_hsp->gap_info->op_type[merged_hsp->gap_info->size - 1] + == eGapAlignIns) { + + merged_hsp->gap_info->num[merged_hsp->gap_info->size - 1] += + subject_gap; + } + else { + merged_hsp->gap_info->op_type[merged_hsp->gap_info->size] = + eGapAlignIns; + merged_hsp->gap_info->num[merged_hsp->gap_info->size] = + subject_gap; + merged_hsp->gap_info->size++; + } + ASSERT(merged_hsp->gap_info->size <= gap_info_size); + } + /* merge gap_info */ for (k = 0;k < hsp->gap_info->size;k++) { @@ -1403,15 +1524,50 @@ static BlastHSP* s_MergeHSPs(const BlastHSP* first, const BlastHSP* second, /* add mismatches to jumper edits */ if (mismatches > 0) { - JumperEdit* edit = merged_hsp->map_info->edits->edits + - merged_hsp->map_info->edits->num_edits++; - edit->query_pos = merged_hsp->query.end; - /* FIXME: Mismatch bases cannot be currently set because there is - no access to query or subject sequence in this function. */ - edit->query_base = query[edit->query_pos]; - edit->subject_base = edit->query_base; + for (k = 0;k < mismatches;k++) { + JumperEdit* edit = merged_hsp->map_info->edits->edits + + merged_hsp->map_info->edits->num_edits++; + + edit->query_pos = merged_hsp->query.end + k; + /* FIXME: Mismatch bases cannot be currently set because there is + no access to query or subject sequence in this function. */ + edit->query_base = query[edit->query_pos]; + edit->subject_base = edit->query_base; + + ASSERT(merged_hsp->map_info->edits->num_edits <= edits_size); + } + } - ASSERT(merged_hsp->map_info->edits->num_edits <= edits_size); + /* add query gap to jumper edits */ + if (query_gap > 0) { + for (k = 0;k < query_gap;k++) { + JumperEdit* edit = merged_hsp->map_info->edits->edits + + merged_hsp->map_info->edits->num_edits++; + + edit->query_pos = merged_hsp->query.end + mismatches; + /* FIXME: Mismatch bases cannot be currently set because there is + no access to query or subject sequence in this function. */ + edit->query_base = kGap; + edit->subject_base = 0; + + ASSERT(merged_hsp->map_info->edits->num_edits <= edits_size); + } + } + + /* add subject gap to jumper edits */ + if (subject_gap > 0) { + for (k = 0;k < subject_gap;k++) { + JumperEdit* edit = merged_hsp->map_info->edits->edits + + merged_hsp->map_info->edits->num_edits++; + + edit->query_pos = merged_hsp->query.end + mismatches + k; + /* FIXME: Mismatch bases cannot be currently set because there is + no access to query or subject sequence in this function. */ + edit->query_base = query[edit->query_pos]; + edit->subject_base = kGap; + + ASSERT(merged_hsp->map_info->edits->num_edits <= edits_size); + } } /* merge jumper edits */ @@ -1984,6 +2140,18 @@ static int s_Finalize(HSPChain** saved, BlastMappingResults* results, } } +#if _DEBUG + for (query_idx = 0; query_idx < query_info->num_queries; query_idx++) { + HSPChain* chain = saved[query_idx]; + for (; chain; chain = chain->next) { + HSPContainer* h = chain->hsps; + for (; h; h = h->next) { + s_TestHSPRanges(h->hsp); + } + } + + } +#endif results->chain_array = saved; results->num_queries = query_info->num_queries; @@ -2640,9 +2808,10 @@ s_FindSpliceJunctionsForGap(BlastHSP* first, BlastHSP* second, /* number of query bases that fall between the HSPs */ query_gap = second->query.offset - first->query.end; - /* we do not have enough subject sequence saved */ - if (query_gap > first->map_info->subject_overhangs->right_len || - query_gap > second->map_info->subject_overhangs->left_len) { + /* we do not have enough subject sequence saved (-1 because we allow up to + one indel) */ + if (query_gap > first->map_info->subject_overhangs->right_len - 2 || + query_gap > second->map_info->subject_overhangs->left_len - 2) { return 0; } @@ -2690,7 +2859,7 @@ s_FindSpliceJunctionsForGap(BlastHSP* first, BlastHSP* second, /* search for the splice signal at the end of intron; allow for up to 1 indel */ - for (i = MAX(start - 1, 0);i <= MIN(start + 1, second_len);i++) { + for (i = MAX(start - 1, 0);i <= MIN(start + 1, second_len - 2);i++) { seq &= 0xf0; seq |= (second->map_info->subject_overhangs->left[i] << 2) | second->map_info->subject_overhangs->left[i + 1]; @@ -2778,7 +2947,7 @@ s_FindSpliceJunctionsForGap(BlastHSP* first, BlastHSP* second, end = query_gap + q; /* allow for up to 1 indel */ - for (i = MAX(end - 1, 0);i <= MIN(end + 1, first_len);i++) { + for (i = MAX(end - 1, 0);i <= MIN(end + 1, first_len - 2);i++) { seq &= 0xf; seq |= (first->map_info->subject_overhangs->right[i] << 6) | (first->map_info->subject_overhangs->right[i + 1] << 4); @@ -2997,6 +3166,54 @@ static Int4 s_FindSpliceSignals(BlastHSP* hsp, Uint1* query, Int4 query_len) */ +static Int4 +s_TrimOverlap(BlastHSP* first, BlastHSP* second) +{ + if (second->query.offset - first->query.end < 0) { + Int4 overlap = first->query.end - second->query.offset; + ASSERT(overlap >= 0); + + if (second->query.end - second->query.offset > overlap) { + + s_TrimHSP(second, overlap, TRUE, TRUE, -4, -4, -4); + } + else { + s_TrimHSP(first, overlap, TRUE, FALSE, -4, -4, -4); + } + + + ASSERT(first->query.end == second->query.offset); + +#if _DEBUG + s_TestHSPRanges(second); +#endif + } + + if (second->subject.offset - first->subject.end < 0) { + Int4 overlap = first->subject.end - second->subject.offset; + ASSERT(overlap >= 0); + + if (second->subject.end - second->subject.offset > overlap) { + + s_TrimHSP(second, overlap, FALSE, TRUE, -4, -4, -4); + } + else { + s_TrimHSP(first, overlap, FALSE, FALSE, -4, -4, -4); + } + + ASSERT(first->subject.end == second->subject.offset); +#if _DEBUG + s_TestHSPRanges(second); +#endif + } + + ASSERT(first->query.end <= second->query.offset); + ASSERT(first->subject.end <= second->subject.offset); + + return 0; +} + + /* Search for splice signals between two HSPs in a chain. The HSPs in the chain must be sorted by query position in asceding order. */ @@ -3015,7 +3232,6 @@ s_FindSpliceJunctions(HSPChain* chains, /* iterate over HSPs in the chain */ for (ch = chains; ch; ch = ch->next) { HSPContainer* h = ch->hsps; - Boolean searched = FALSE; Uint1* query = NULL; Int4 context; Int4 query_len; @@ -3030,28 +3246,15 @@ s_FindSpliceJunctions(HSPChain* chains, HSPContainer* next = h->next; ASSERT(next); - /* process overlap if found */ - if (next->hsp->query.offset <= h->hsp->query.end && - next->hsp->query.offset > h->hsp->query.offset) { - - Boolean consensus_only = TRUE; - if (h->hsp->score > 50 && next->hsp->score > 50) { - consensus_only = FALSE; - } + /* if not a spliced alignment, try merging HSPs into one */ + /* Introns are typically at least 30 bases long, and there can be + a few unalined query bases. */ + if ((next->hsp->subject.offset - h->hsp->subject.end - + (next->hsp->query.offset - h->hsp->query.end) < 30) && - s_FindSpliceJunctionsForOverlaps(h->hsp, next->hsp, query, - query_len, consensus_only); - searched = TRUE; - h = h->next; - } - /* if not a spliced alignment */ - else if (next->hsp->query.offset - h->hsp->query.end < 10 && - /* This condition is needed only because - s_ExtendAlignment funcition is constranined to allow - up to one gap. It can be lifted once the function is - updated */ - abs((next->hsp->query.offset - h->hsp->query.end) - - (next->hsp->subject.offset - h->hsp->subject.end)) < 2) { + /* this condition is needed to align unaligned query bases */ + next->hsp->subject.offset - h->hsp->subject.end < + h->hsp->map_info->subject_overhangs->right_len) { /* save pointer to hsps after next */ HSPContainer* following = h->next->next; @@ -3060,6 +3263,8 @@ s_FindSpliceJunctions(HSPChain* chains, /* duplicate HSPContainer with the two HSPs */ h->next->next = NULL; + s_TrimOverlap(h->hsp, next->hsp); + /* extend the first HSP to cover the gap between HSPs */ if (next->hsp->query.offset - h->hsp->query.end > 1) { BlastHSP* first = h->hsp; @@ -3069,12 +3274,23 @@ s_FindSpliceJunctions(HSPChain* chains, second->query.offset - 1, 0, second->subject.offset - 1 - first->subject.end, scoring_opts, FALSE); + +#if _DEBUG + s_TestHSPRanges(first); +#endif } + /* merge HSPs */ new_hsp = s_MergeHSPs(h->hsp, h->next->hsp, query, scoring_opts); + +#if _DEBUG + s_TestHSPRanges(new_hsp); +#endif + + if (new_hsp) { /* replace the two processed HSPs with the combined one */ @@ -3082,72 +3298,76 @@ s_FindSpliceJunctions(HSPChain* chains, HSPContainerFree(h->next); h->hsp = new_hsp; h->next = following; - searched = TRUE; } else { /* something went wrong with merging, use the initial HSPs */ h->next->next = following; h = h->next; + + ASSERT(!h->next || + (h->hsp->query.end <= h->next->hsp->query.offset && + h->hsp->subject.end <= h->next->hsp->subject.offset)); } } - else if (next->hsp->query.offset - h->hsp->query.end > 0) { - s_FindSpliceJunctionsForGap(h->hsp, next->hsp, query, - query_len, scoring_opts); - searched = TRUE; - h = h->next; - } - else { + /* process overlap if found */ + else if (next->hsp->query.offset <= h->hsp->query.end && + next->hsp->query.offset > h->hsp->query.offset) { + + Boolean consensus_only = TRUE; + if (h->hsp->score > 50 && next->hsp->score > 50) { + consensus_only = FALSE; + } + + s_FindSpliceJunctionsForOverlaps(h->hsp, next->hsp, query, + query_len, consensus_only); + + if ((h->hsp->map_info->right_edge & MAPPER_SPLICE_SIGNAL) == 0) { + + s_TrimOverlap(h->hsp, next->hsp); + } + + +#if _DEBUG + s_TestHSPRanges(h->hsp); +#endif + + h = h->next; } + else if (next->hsp->query.offset - h->hsp->query.end > 0 && + next->hsp->query.offset - h->hsp->query.end < + h->hsp->map_info->subject_overhangs->right_len) { - /* FIXME: if a splice junction cannot be found, we can try looking - for the split between perfect matches */ - } + s_FindSpliceJunctionsForGap(h->hsp, next->hsp, query, + query_len, scoring_opts); - /* Remove HSPs that have the same start position on the query or - subject within a chain. They may arise from modified HSP extents. */ - h = ch->hsps; - while (h->next) { - if (h->hsp->query.offset >= h->next->hsp->query.offset || - h->hsp->subject.offset >= h->next->hsp->subject.offset) { - - if (h->hsp->score >= h->next->hsp->score) { - HSPContainer* remove = h->next; - h->next = h->next->next; - remove->next = NULL; - HSPContainerFree(remove); - } - else { - HSPContainer* remove = h; - HSPContainer* prev = ch->hsps; - if (remove == ch->hsps) { - ch->hsps = remove->next; - h = ch->hsps; - } - else { - while (prev->next && prev->next != h) { - prev = prev->next; - } - ASSERT(prev->next && prev->next == remove); + if ((h->hsp->map_info->right_edge & MAPPER_SPLICE_SIGNAL) == 0) { - prev->next = remove->next; - h = prev; - } - remove->next = NULL; - HSPContainerFree(remove); + s_TrimOverlap(h->hsp, next->hsp); } - continue; +#if _DEBUG + s_TestHSPRanges(h->hsp); +#endif + + h = h->next; } - h = h->next; - } + else { - /* recalculated chain score if splice sites were searched */ - if (searched) { - ch->score = s_ComputeChainScore(ch, scoring_opts, query_len, FALSE); + s_TrimOverlap(h->hsp, next->hsp); + +#if _DEBUG + s_TestHSPRanges(h->hsp); +#endif + + h = h->next; + } } + + /* recalculated chain score */ + ch->score = s_ComputeChainScore(ch, scoring_opts, query_len, TRUE); } s_TestChains(chains); @@ -3186,8 +3406,8 @@ static HSPChain* s_FindBestPath(HSPNode* nodes, Int4 num, HSPPath* path, for (k = i + 1;k < num && is_spliced;k++) { BlastHSP* newhsp = *(nodes[k].hsp); - Int4 new_score = nodes[k].best_score + self_score - - s_GetOverlapCost(newhsp, *(nodes[i].hsp), 4); + Int4 overlap_cost = s_GetOverlapCost(newhsp, *(nodes[i].hsp), 4); + Int4 new_score = nodes[k].best_score + self_score - overlap_cost; /* FIXME: some of the conditions double others */ const Int4 hsp_len = hsp->query.end - hsp->query.offset; @@ -3195,29 +3415,27 @@ static HSPChain* s_FindBestPath(HSPNode* nodes, Int4 num, HSPPath* path, const Int4 overlap_len = MAX(MIN(hsp->query.end, newhsp->query.end) - MAX(hsp->query.offset, newhsp->query.offset), 0); - /* add next HSP to the path only if there is fewer than 10 query - bases unaligned between HSPs, newhsp is not contained within - hsp, newhsp aligns to the subject behind hsp, and score improves */ - /* FIXME: there should be a penalty if new hsp->query.offset > - hsp->query.end */ - if (newhsp->query.offset - hsp->query.end < 10 && - newhsp->query.offset > hsp->query.offset && + const Int4 subj_overlap_len = + MAX(MIN(hsp->subject.end, newhsp->subject.end) - + MAX(hsp->subject.offset, newhsp->subject.offset), 0); + + + /* add next HSP to the chain only if hsp, newhsp aligns to the + subject behind hsp, and score improves */ + if (newhsp->query.offset > hsp->query.offset && newhsp->query.end > hsp->query.end && - newhsp->subject.offset - hsp->subject.end >= - /* the difference on query may be smaller than zero, - this will let as combine HSPs of which extension stopped - too soon (not real introns) */ - newhsp->query.offset - hsp->query.end && + + newhsp->subject.offset > hsp->subject.offset && + newhsp->subject.end > hsp->subject.end && newhsp->subject.offset - hsp->subject.end < kMaxIntronLength && (double)overlap_len / hsp_len < 0.75 && (double)overlap_len / newhsp_len < 0.75 && - new_score > nodes[i].best_score) { + (double)subj_overlap_len / hsp_len < 0.75 && + (double)subj_overlap_len / newhsp_len < 0.75) { - /* prefer paths with identified splice sites to those without - ones */ - /* FIXME: add min intron length to the condition below */ + /* FIXME: this condition may not be necessary */ if (newhsp->subject.offset - hsp->subject.end > 1) { - /* FIXME: The function that finds splice signals modifies + /* The function that finds splice signals modifies HSPs, so we need to clone HSPs here. */ BlastHSP* hsp_copy = Blast_HSPClone(hsp); BlastHSP* newhsp_copy = Blast_HSPClone(newhsp); @@ -3228,16 +3446,12 @@ static HSPChain* s_FindBestPath(HSPNode* nodes, Int4 num, HSPPath* path, s_FindSpliceJunctions(chain, query_blk, query_info, scoring_opts); - - if (chain->hsps->next && - ((chain->hsps->hsp->map_info->right_edge & - MAPPER_SPLICE_SIGNAL) == 0)) { - -/* new_score += scoring_opts->no_splice_signal; */ - /* FIXME: temporarely, do not create chains if splice - signals are not found */ - new_score = 0; - } + + /* update score: add the difference between sum of + two HSP scores minus overalp and the new score + for the merged HSP */ + new_score += chain->score + overlap_cost - + (newhsp->score + self_score); chain = HSPChainFree(chain); } @@ -3677,6 +3891,7 @@ static Boolean s_FindBestPairs(HSPChain** first_list, Int4 min_score, Pairinfo** pair_info_ptr, Int4* max_num_pairs, + Boolean is_spliced, const ScoringOptions* scoring_options) { HSPChain* first; @@ -3685,6 +3900,9 @@ static Boolean s_FindBestPairs(HSPChain** first_list, Int4 conv_bonus = 0; Int4 num_pairs = 0; Boolean found = FALSE; + const Int4 kMaxInsertSize = is_spliced ? + MAGICBLAST_MAX_INSERT_SIZE_SPLICED : + MAGICBLAST_MAX_INSERT_SIZE_NONSPLICED; /* iterate over all pairs of HSP chains for the first and second read of the pair and collect pair information */ @@ -3755,7 +3973,7 @@ static Boolean s_FindBestPairs(HSPChain** first_list, pair_info[num_pairs].distance = distance; /* distance > 0 indicates a convergent pair (typical) */ - if (distance > 0) { + if (distance > 0 && distance < kMaxInsertSize) { Int4 plus_end, minus_end; hsp = plus->hsps; while (hsp->next) { @@ -4232,7 +4450,7 @@ s_BlastHSPMapperSplicedPairedRun(void* data, BlastHSPList* hsp_list) if (first && second) { s_FindBestPairs(&first, &second, 0, &pair_workspace, - &workspace_size, scoring_opts); + &workspace_size, is_spliced, scoring_opts); ASSERT(s_TestChains(first)); ASSERT(s_TestChains(second)); @@ -4245,6 +4463,10 @@ s_BlastHSPMapperSplicedPairedRun(void* data, BlastHSPList* hsp_list) cutoff_score = (cutoff_score_fun[0] + cutoff_score_fun[1] * query_len) / 100; } + else if (params->cutoff_score == 0) { + cutoff_score = GetCutoffScore( + query_info->contexts[query_idx * NUM_STRANDS].query_length); + } /* save all chains and remove ones with scores lower than best score - kPairBonus */ diff --git a/c++/src/algo/blast/core/index_ungapped.c b/c++/src/algo/blast/core/index_ungapped.c index 1ab6a04f..685054e2 100644 --- a/c++/src/algo/blast/core/index_ungapped.c +++ b/c++/src/algo/blast/core/index_ungapped.c @@ -1,4 +1,4 @@ -/* $Id: index_ungapped.c 172185 2009-10-01 17:52:28Z camacho $ +/* $Id: index_ungapped.c 577772 2019-01-08 18:13:04Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -59,10 +59,10 @@ static ir_fp_entry * ir_fp_entry_destroy( ir_fp_entry * e ) */ static ir_fp_entry * ir_fp_entry_create( void ) { - ir_fp_entry * result = (ir_fp_entry *)malloc( sizeof( ir_fp_entry ) ); - + ir_fp_entry * result = (ir_fp_entry *) calloc(1, sizeof( ir_fp_entry ) ); + if( result != 0 ) { - ir_hash_entry * entries = (ir_hash_entry *)calloc( + ir_hash_entry * entries = (ir_hash_entry *) calloc( FP_ENTRY_SIZE, sizeof( ir_hash_entry ) ); if( entries == 0 ) return ir_fp_entry_destroy( result ); result->next = 0; @@ -82,10 +82,10 @@ static ir_fp_entry * ir_fp_entry_create( void ) ir_diag_hash * ir_hash_create( void ) { ir_diag_hash * result = 0; - result = (ir_diag_hash *)malloc( sizeof( ir_diag_hash ) ); + result = (ir_diag_hash *) calloc(1, sizeof( ir_diag_hash ) ); if( result != 0 ) { - ir_hash_entry * entries = (ir_hash_entry *)calloc( + ir_hash_entry * entries = (ir_hash_entry *) calloc( IR_HASH_SIZE, sizeof( ir_hash_entry ) ); if( entries == 0 ) return ir_hash_destroy( result ); result->entries = entries; @@ -99,7 +99,8 @@ ir_diag_hash * ir_hash_create( void ) ir_diag_hash * ir_hash_destroy( ir_diag_hash * hash ) { if( hash != 0 ) { - ir_fp_entry * fpe = hash->free_pool, * fpn; + ir_fp_entry * fpe = hash->free_pool; + ir_fp_entry * fpn; while( fpe != 0 ) { fpn = fpe->next; @@ -134,6 +135,9 @@ ir_hash_entry * ir_locate( if( hash->free == 0 ) { ir_fp_entry * fp = ir_fp_entry_create(); + if (fp == 0) { + return (ir_hash_entry *) 0; + } fp->next = hash->free_pool; hash->free_pool = fp; hash->free = fp->entries; diff --git a/c++/src/algo/blast/core/jumper.c b/c++/src/algo/blast/core/jumper.c index 536c7ca0..567c4f60 100644 --- a/c++/src/algo/blast/core/jumper.c +++ b/c++/src/algo/blast/core/jumper.c @@ -1,4 +1,4 @@ -/* $Id: jumper.c 574907 2018-11-20 18:35:10Z ivanov $ +/* $Id: jumper.c 580452 2019-02-14 18:01:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2679,6 +2679,9 @@ Boolean JumperGoodAlign(const BlastGapAlignStruct* gap_align, context_info->query_length * hit_params->options->cutoff_score_fun[1]) / 100; } + else if (hit_params->options->cutoff_score == 0) { + cutoff_score = GetCutoffScore(context_info->query_length); + } else { cutoff_score = hit_params->options->cutoff_score; } @@ -2944,16 +2947,6 @@ GapEditScript* GapEditScriptCombine(GapEditScript** edit_script_ptr, int JumperFindSpliceSignals(BlastHSP* hsp, Int4 query_len, const Uint1* subject, Int4 subject_len) { - Uint1 signals[NUM_SIGNALS] = {1, /* AC */ - 2, /* AG */ - 4, /* CA */ - 7, /* CT */ - 8, /* GA */ - 11, /* GT */ - 13, /* TC */ - 14 /* TG */ }; - - if (!hsp || !subject) { return -1; } @@ -2962,42 +2955,24 @@ int JumperFindSpliceSignals(BlastHSP* hsp, Int4 query_len, hsp->map_info->left_edge = MAPPER_EXON; } else { - int k; hsp->map_info->left_edge = (UNPACK_BASE(subject, hsp->subject.offset - 2) << 2) | UNPACK_BASE(subject, hsp->subject.offset - 1); - - for (k = 0;k < NUM_SIGNALS;k++) { - if (hsp->map_info->left_edge == signals[k]) { - hsp->map_info->left_edge |= MAPPER_SPLICE_SIGNAL; - break; - } - } } if (hsp->query.end == query_len || hsp->subject.end == subject_len) { hsp->map_info->right_edge = MAPPER_EXON; } else { - int k; hsp->map_info->right_edge = (UNPACK_BASE(subject, hsp->subject.end) << 2) | UNPACK_BASE(subject, hsp->subject.end + 1); - - for (k = 0;k < NUM_SIGNALS;k++) { - if (hsp->map_info->right_edge == signals[k]) { - hsp->map_info->right_edge |= MAPPER_SPLICE_SIGNAL; - break; - } - } } return 0; } -#define MAX_SUBJECT_OVERHANG 30 - SequenceOverhangs* SequenceOverhangsFree(SequenceOverhangs* overhangs) { if (!overhangs) { @@ -3022,6 +2997,7 @@ static Int4 s_SaveSubjectOverhangs(BlastHSP* hsp, Uint1* subject, { SequenceOverhangs* overhangs = NULL; const Int4 kMinOverhangLength = 0; + const Int4 kMaxSubjectOverhang = query_len < 400 ? 30 : 60; if (hsp->query.offset < kMinOverhangLength && query_len - hsp->query.end < kMinOverhangLength) { @@ -3038,7 +3014,7 @@ static Int4 s_SaveSubjectOverhangs(BlastHSP* hsp, Uint1* subject, Int4 i; /* at least two subject bases are needed for the search for splice signals */ - Int4 len = MIN(MAX(hsp->query.offset, 2), MAX_SUBJECT_OVERHANG); + Int4 len = MIN(MAX(hsp->query.offset, 2), kMaxSubjectOverhang); Uint1* overhang = calloc(len, sizeof(Uint1)); if (!overhang) { SequenceOverhangsFree(overhangs); @@ -3057,7 +3033,7 @@ static Int4 s_SaveSubjectOverhangs(BlastHSP* hsp, Uint1* subject, if (hsp->query.end <= query_len - kMinOverhangLength) { Int4 i; Int4 len = - MIN(MAX(query_len - hsp->query.end + 1, 2), MAX_SUBJECT_OVERHANG); + MIN(MAX(query_len - hsp->query.end + 1, 2), kMaxSubjectOverhang); Uint1* overhang = calloc(len, sizeof(Uint1)); if (!overhang) { SequenceOverhangsFree(overhangs); @@ -4574,3 +4550,22 @@ Int2 FilterQueriesForMapping(Uint1* sequence, Int4 length, Int4 offset, } +/* Get alignment cutoff score for a given query length. Note that the function + assumes that score for match is 1 */ +Int4 GetCutoffScore(Int4 query_length) +{ + if (query_length <= 20) { + return query_length; + } + else if (query_length <= 30) { + return 20; + } + else if (query_length <= 50) { + return query_length - 10; + } + else if (query_length < 200) { + return (Int4)(0.6 * query_length); + } + + return 120; +} diff --git a/c++/src/algo/blast/core/jumper.h b/c++/src/algo/blast/core/jumper.h index af8bf513..76fe3556 100644 --- a/c++/src/algo/blast/core/jumper.h +++ b/c++/src/algo/blast/core/jumper.h @@ -1,4 +1,4 @@ -/* $Id: jumper.h 533522 2017-04-17 15:50:34Z boratyng $ +/* $Id: jumper.h 577781 2019-01-08 18:27:42Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -305,6 +305,10 @@ Int2 FilterQueriesForMapping(Uint1* sequence, Int4 length, Int4 offset, BlastSeqLoc** seq_loc); +/** Get alignment cutoff score for a given query length. The function + assumes that score for match is 1 */ +Int4 GetCutoffScore(Int4 query_length); + #ifdef __cplusplus } #endif diff --git a/c++/src/algo/blast/format/blast_format.cpp b/c++/src/algo/blast/format/blast_format.cpp index 4d7845c8..0f0b4bfa 100644 --- a/c++/src/algo/blast/format/blast_format.cpp +++ b/c++/src/algo/blast/format/blast_format.cpp @@ -50,6 +50,8 @@ Author: Jason Papadopoulos #include #include /* NCBI_FAKE_WARNING */ #include +#include +#include // for CSeqDB #include #include @@ -187,6 +189,8 @@ CBlastFormat::CBlastFormat(const blast::CBlastOptions& options, const CNcbiRegistry& registry = app->GetConfig(); m_LongSeqId = (registry.Get("BLAST", "LONG_SEQID") == "1"); } + m_HitsSortOption = -1; + m_HspsSortOption = -1; } CBlastFormat::CBlastFormat(const blast::CBlastOptions& opts, @@ -287,13 +291,14 @@ CBlastFormat::CBlastFormat(const blast::CBlastOptions& opts, m_IsIterative = opts.IsIterativeSearch(); if (m_FormatType == CFormattingArgs::eSAM) { x_InitSAMFormatter(); - } - + } CNcbiApplication* app = CNcbiApplication::Instance(); if (app) { const CNcbiRegistry& registry = app->GetConfig(); m_LongSeqId = (registry.Get("BLAST", "LONG_SEQID") == "1"); } + m_HitsSortOption = -1; + m_HspsSortOption = -1; } CBlastFormat::~CBlastFormat() @@ -486,7 +491,11 @@ CBlastFormat::x_ConfigCShowBlastDefline(CShowBlastDefline& showdef, if (m_LongSeqId) { flags |= CShowBlastDefline::eLongSeqId; } - + if(m_HitsSortOption >= 0) { + flags |= CShowBlastDefline::eShowPercentIdent; + flags |= CShowBlastDefline::eShowTotalScore; + flags |= CShowBlastDefline::eShowQueryCoverage; + } showdef.SetOption(flags); showdef.SetDbName(m_DbName); showdef.SetDbType(!m_DbIsAA); @@ -517,7 +526,7 @@ CBlastFormat::x_SplitSeqAlign(CConstRef full_alignment, new_seqs.Set().push_back(*alignment); } count++; - if(count >= m_NumSummary) + if(count >= (unsigned int)m_NumSummary) break; } } @@ -739,11 +748,14 @@ CBlastFormat::x_PrintTabularReport(const blast::CSearchResults& results, if (m_FormatType == CFormattingArgs::eTabular || m_FormatType == CFormattingArgs::eTabularWithComments || m_FormatType == CFormattingArgs::eCommaSeparatedValues) { - const CBlastTabularInfo::EFieldDelimiter kDelim = + const CBlastTabularInfo::EFieldDelimiter kDelim = (m_FormatType == CFormattingArgs::eCommaSeparatedValues ? CBlastTabularInfo::eComma : CBlastTabularInfo::eTab); - + CBlastTabularInfo tabinfo(m_Outfile, m_CustomOutputFormatSpec, kDelim); + if(!m_CustomDelim.empty()) { + tabinfo.SetCustomDelim(m_CustomDelim); + } tabinfo.SetParseLocalIds(m_BelieveQuery); if((m_IsBl2Seq && (!m_BelieveQuery))|| m_IsRemoteSearch) { tabinfo.SetParseSubjectDefline(true); @@ -1080,6 +1092,280 @@ CBlastFormat::WriteArchive(objects::CPssmWithParameters & pssm, } +void CBlastFormat::x_CreateDeflinesJson(CConstRef aln_set) +{ + + int delineFormatOption = 0; + CShowBlastDefline deflines(*aln_set, *m_Scope,kFormatLineLength,m_NumSummary); + + deflines.SetQueryNumber(1);//m_Query_number + deflines.SetDbType (!m_DbIsAA); + deflines.SetDbName(m_DbName); + delineFormatOption |= CShowBlastDefline::eHtml; + delineFormatOption |= CShowBlastDefline::eShowPercentIdent; + deflines.SetOption(delineFormatOption); //m_defline_option + + //Next three lines are for proper initialization in formatting of defline + CShowBlastDefline::SDeflineTemplates *deflineTemplates = new CShowBlastDefline::SDeflineTemplates; + deflineTemplates->advancedView = true; + deflines.SetDeflineTemplates (deflineTemplates); + + + vector sdlFortInfoVec = deflines.GetFormattingInfo(); + CJson_Document doc; + CJson_Object top_obj = doc.SetObject(); + CJson_Array defline_array = top_obj.insert_array("deflines"); + + for(size_t i = 0; i < sdlFortInfoVec.size(); i++) { + CJson_Object obj = defline_array.push_back_object(); + + obj.insert("dfln_url",sdlFortInfoVec[i]->dfln_url); + obj.insert("dfln_rid",sdlFortInfoVec[i]->dfln_rid); + obj.insert("dfln_gi",sdlFortInfoVec[i]->dfln_gi); + obj.insert("dfln_seqid",sdlFortInfoVec[i]->dfln_seqid); + obj.insert("full_dfln_defline",sdlFortInfoVec[i]->full_dfln_defline); + obj.insert("dfln_defline",sdlFortInfoVec[i]->dfln_defline); + obj.insert("dfln_id",sdlFortInfoVec[i]->dfln_id); + obj.insert("dflnFrm_id",sdlFortInfoVec[i]->dflnFrm_id); + obj.insert("dflnFASTA_id",sdlFortInfoVec[i]->dflnFASTA_id); + obj.insert("dflnAccs",sdlFortInfoVec[i]->dflnAccs); + + obj.insert("score_info",sdlFortInfoVec[i]->score_info); + obj.insert("dfln_hspnum",sdlFortInfoVec[i]->dfln_hspnum); + obj.insert("dfln_alnLen",sdlFortInfoVec[i]->dfln_alnLen); + obj.insert("dfln_blast_rank",sdlFortInfoVec[i]->dfln_blast_rank); + obj.insert("total_bit_string",sdlFortInfoVec[i]->total_bit_string); + obj.insert("percent_coverage",sdlFortInfoVec[i]->percent_coverage); + obj.insert("evalue_string",sdlFortInfoVec[i]->evalue_string); + obj.insert("percent_identity",sdlFortInfoVec[i]->percent_identity); + } + doc.Write(m_Outfile); +} + + +void CBlastFormat::x_DisplayDeflinesWithTemplates(CConstRef aln_set) +{ + x_InitDeflineTemplates(); + _ASSERT(m_DeflineTemplates); + + int delineFormatOption = 0; + CShowBlastDefline deflines(*aln_set, *m_Scope,kFormatLineLength,m_NumSummary); + + deflines.SetQueryNumber(1);//m_Query_number + deflines.SetDbType (!m_DbIsAA); + deflines.SetDbName(m_DbName); + delineFormatOption |= CShowBlastDefline::eHtml; + delineFormatOption |= CShowBlastDefline::eShowPercentIdent; + deflines.SetOption(delineFormatOption); //m_defline_option + deflines.SetDeflineTemplates (m_DeflineTemplates); + + deflines.Init(); + deflines.Display(m_Outfile); +} + + +void CBlastFormat::x_DisplayAlignsWithTemplates(CConstRef aln_set,const blast::CSearchResults& results) +{ + x_InitAlignTemplates(); + _ASSERT(m_AlignTemplates); + + TMaskedQueryRegions masklocs; + results.GetMaskedQueryRegions(masklocs); + + CSeq_align_set copy_aln_set; + CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set, m_NumAlignments); + + CRef seqAlnSet(const_cast(©_aln_set)); + if(!m_AlignSeqList.empty()) { + CAlignFormatUtil::ExtractSeqAlignForSeqList(seqAlnSet, m_AlignSeqList); + } + + CDisplaySeqalign display(*seqAlnSet, *m_Scope, &masklocs, NULL, m_MatrixName); + x_SetAlignParameters(display); + display.SetAlignTemplates(m_AlignTemplates); + + display.DisplaySeqalign(m_Outfile); +} + +void CBlastFormat::x_InitDeflineTemplates(void) +{ + CNcbiApplication* app = CNcbiApplication::Instance(); + if(!app) return; + const CNcbiRegistry& reg = app->GetConfig(); + + + m_DeflineTemplates = new CShowBlastDefline::SDeflineTemplates; + string defLineTmpl; + + m_DeflineTemplates->defLineTmpl = reg.Get("Templates", "DFL_TABLE_ROW"); + m_DeflineTemplates->scoreInfoTmpl = reg.Get("Templates", "DFL_TABLE_SCORE_INFO"); + m_DeflineTemplates->seqInfoTmpl = reg.Get("Templates", "DFL_TABLE_SEQ_INFO"); + m_DeflineTemplates->advancedView = true; +} + +void CBlastFormat::x_InitAlignTemplates(void) +{ + CNcbiApplication* app = CNcbiApplication::Instance(); + if(!app) return; + const CNcbiRegistry& reg = app->GetConfig(); + + m_AlignTemplates = new CDisplaySeqalign::SAlignTemplates; + + m_AlignTemplates->alignHeaderTmpl = reg.Get("Templates", "BLAST_ALIGN_HEADER"); + string blastAlignParamsTemplData = reg.Get("Templates", "BLAST_ALIGN_PARAMS"); + string blastAlignParamsTag = (m_Program == "blastn") ? "ALIGN_PARAMS_NUC" : "ALIGN_PARAMS_PROT"; + string blastAlignProtParamsTable = reg.Get("Templates", blastAlignParamsTag); + m_AlignTemplates->alignInfoTmpl = CAlignFormatUtil::MapTemplate(blastAlignParamsTemplData,"align_params",blastAlignProtParamsTable); + m_AlignTemplates->sortInfoTmpl = reg.Get("Templates", "SORT_ALIGNS_SEQ"); + m_AlignTemplates->alignFeatureTmpl = reg.Get("Templates", "ALN_FEATURES"); + m_AlignTemplates->alignFeatureLinkTmpl = reg.Get("Templates", "ALN_FEATURES_LINK"); + + m_AlignTemplates->alnDefLineTmpl = reg.Get("Templates", "ALN_DEFLINE_ROW"); + m_AlignTemplates->alnTitlesLinkTmpl = reg.Get("Templates", "ALN_DEFLINE_TITLES_LNK"); + m_AlignTemplates->alnTitlesTmpl = reg.Get("Templates", "ALN_DEFLINE_TITLES"); + m_AlignTemplates->alnSeqInfoTmpl = reg.Get("Templates", "ALN_DEFLINE_SEQ_INFO"); + m_AlignTemplates->alignRowTmpl = reg.Get("Templates", "BLAST_ALIGN_ROWS"); + m_AlignTemplates->alignRowTmplLast = reg.Get("Templates", "BLAST_ALIGN_ROWS_LST"); +} + + + +void CBlastFormat::x_SetAlignParameters(CDisplaySeqalign& cds) +{ + + int AlignOption = 0; + + AlignOption += CDisplaySeqalign::eShowMiddleLine; + + if (m_Program == "tblastx") { + AlignOption += CDisplaySeqalign::eTranslateNucToNucAlignment; + } + AlignOption += CDisplaySeqalign::eShowBlastInfo; + AlignOption += CDisplaySeqalign::eShowBlastStyleId; + AlignOption += CDisplaySeqalign::eHtml; + AlignOption += CDisplaySeqalign::eShowSortControls;//*******???? + AlignOption += CDisplaySeqalign::eDynamicFeature; + cds.SetAlignOption(AlignOption); + + cds.SetDbName(m_DbName); + cds.SetDbType(!m_DbIsAA); + cds.SetLineLen(m_LineLength); + + if (m_Program == "blastn" || m_Program == "megablast") { + cds.SetMiddleLineStyle (CDisplaySeqalign::eBar); + cds.SetAlignType(CDisplaySeqalign::eNuc); + } else { + cds.SetMiddleLineStyle (CDisplaySeqalign::eChar); + cds.SetAlignType(CDisplaySeqalign::eProt); + } + cds.SetQueryNumber(1); //m_Query_number + cds.SetSeqLocChar (CDisplaySeqalign::eLowerCase); + cds.SetSeqLocColor ( CDisplaySeqalign::eGrey); + cds.SetMasterGeneticCode(m_QueryGenCode); + cds.SetSlaveGeneticCode(m_DbGenCode); +} + + + +static string s_GetMolType(const CBioseq_Handle& bioseqHandle) +{ + int molType = bioseqHandle.GetBioseqMolType(); + string molTypeString; + + switch(molType) { + case CSeq_inst::eMol_not_set: + molTypeString = "cdna"; + break; + case CSeq_inst::eMol_dna: + molTypeString = "dna"; + break; + case CSeq_inst::eMol_rna: + molTypeString = "rna"; + break; + case CSeq_inst::eMol_aa: + molTypeString = "amino acid"; + break; + case CSeq_inst::eMol_na: + molTypeString = "nucleic acid"; + break; + default: + molTypeString = "Unknown"; + } + return molTypeString; +} + +void +CBlastFormat::PrintReport(const blast::CSearchResults& results, + CBlastFormat::DisplayOption displayOption) +{ + if (displayOption == eMetadata) {//Metadata in json format + CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(), CScope::eGetBioseq_All); + CConstRef bioseq = bhandle.GetBioseqCore(); + + //string seqID = CAlignFormatUtil::GetSeqIdString(*bioseq, m_BelieveQuery); + string seqID; + CConstRef queryID = sequence::GetId(bhandle).GetSeqId(); + CSeq_id::ELabelType labelType = (queryID->IsLocal()) ? CSeq_id::eDefault : CSeq_id::eContent; + queryID->GetLabel(&seqID,labelType); + + + string seqDescr = CBlastFormatUtil::GetSeqDescrString(*bioseq); + seqDescr = seqDescr.empty() ? "None" : seqDescr; + + string molType = s_GetMolType(bhandle); + + int length = 0; + if(bioseq->IsSetInst() && bioseq->GetInst().CanGetLength()){ + length = bioseq->GetInst().GetLength(); + } + + CJson_Document doc; + CJson_Object obj = doc.SetObject(); + obj.insert("Query",seqID); + obj.insert("Query_descr",seqDescr); + obj.insert("IsQueryLocal",queryID->IsLocal()); + obj.insert("Length",NStr::IntToString(length)); + obj.insert("Moltype",molType); + obj.insert("Database",m_DbName); + string dbTitle; + try { + CRef seqdb; + seqdb = new CSeqDB(m_DbName, m_DbIsAA ? CSeqDB::eProtein : CSeqDB::eNucleotide); + dbTitle = seqdb->GetTitle(); + } + catch (...) {/*ignore exceptions for now*/} + obj.insert("Database_descr",dbTitle); + obj.insert("IsDBProtein",m_DbIsAA); + obj.insert("Program",m_Program); + + + if (results.HasErrors()) { + obj.insert("Error",results.GetErrorStrings()); + } + if (results.HasWarnings()) { + obj.insert("Warning",results.GetWarningStrings()); + } + doc.Write(m_Outfile); + } + else { + CConstRef aln_set = results.GetSeqAlign(); + _ASSERT(results.HasAlignments()); + if (m_IsUngappedSearch) { + aln_set.Reset(CDisplaySeqalign::PrepareBlastUngappedSeqalign(*aln_set)); + } + + if (displayOption == eDescriptionsWithTemplates) {//Descriptions with html templates + x_DisplayDeflinesWithTemplates(aln_set); + } + if (displayOption == eDescriptions) {//Descriptions with html templates + x_CreateDeflinesJson(aln_set); + } + else if (displayOption == eAlignments) {// print the alignments with html templates + x_DisplayAlignsWithTemplates(aln_set,results); + } + } +} + void CBlastFormat::PrintOneResultSet(const blast::CSearchResults& results, CConstRef queries, @@ -1191,6 +1477,15 @@ CBlastFormat::PrintOneResultSet(const blast::CSearchResults& results, if (m_IsUngappedSearch) { aln_set.Reset(CDisplaySeqalign::PrepareBlastUngappedSeqalign(*aln_set)); } + + //invoke sorting only for m_HitsSortOption > CAlignFormatUtil::eEvalue or m_HspsSortOption > CAlignFormatUtil::eHspEvalue + if(m_HitsSortOption > 0 || m_HspsSortOption > 0) { + aln_set = CBlastFormatUtil::SortSeqalignForSortableFormat( + *(const_cast(aln_set.GetPointer())), + (m_Program == "tblastx") ? true : false, + m_HitsSortOption, + m_HspsSortOption); + } const bool kIsGlobal = s_IsGlobalSeqAlign(aln_set); diff --git a/c++/src/algo/blast/igblast/igblast.cpp b/c++/src/algo/blast/igblast/igblast.cpp index 2e1e7d43..6c1ed35f 100644 --- a/c++/src/algo/blast/igblast/igblast.cpp +++ b/c++/src/algo/blast/igblast/igblast.cpp @@ -51,11 +51,12 @@ BEGIN_NCBI_SCOPE USING_SCOPE(objects); BEGIN_SCOPE(blast) -static int max_allowed_VJ_distance_with_D = 90; -static int max_allowed_VJ_distance_without_D = 40; -static int max_allowed_VD_distance = 55; +static int max_allowed_VJ_distance_with_D = 225; +static int max_allowed_VJ_distance_without_D = 50; +static int max_allowed_VD_distance = 120; static int extend_length = 30; -static int max_allowed_V_end_to_J_end =150; +static int max_J_length = 70; +static int max_allowed_V_end_to_J_end = max_allowed_VJ_distance_with_D + max_J_length; static int max_v_j_overlap = 7; static int j_wordsize = 7; @@ -1473,6 +1474,7 @@ void CIgBlast::x_AnnotateDomain(CRef &gl_results, // annotate the query frame offset int frame_offset = m_AnnotationInfo.GetFrameOffset(sid); + if (frame_offset >= 0) { int q_start = (*it)->GetSeqStart(0); int q_stop = (*it)->GetSeqStop(0); @@ -1480,7 +1482,28 @@ void CIgBlast::x_AnnotateDomain(CRef &gl_results, int q_dif = q_stop - q_start; int frame_adj = (3 - ((*it)->GetSeqStart(1) + 3 - frame_offset) % 3) %3; annot->m_FrameInfo[0] = (q_mid - q_dir *q_dif)/2 + q_dir * frame_adj; - frame_adj = ((*it)->GetSeqStop(1) + 3 - frame_offset) % 3; + + //counting frame from fwr3 end, not the V end since we need to ignore a few bases + //in the CDR3 to allow any insertion or deletion at V gene end + if (annot->m_DomainInfo[9] > 0) { + int fwr3_stop = annot->m_DomainInfo[9]; + + if (annot->m_MinusStrand) { + + q_start = max(q_start, fwr3_stop); + q_mid = q_start + q_stop; + q_dif = q_stop - q_start; + frame_adj = (s_map.GetSeqPosFromSeqPos(1, 0, q_start, IAlnExplorer::eBackwards) + 3 - frame_offset) % 3; + } else { + q_stop = min(q_stop, fwr3_stop); + q_mid = q_start + q_stop; + q_dif = q_stop - q_start; + frame_adj = (s_map.GetSeqPosFromSeqPos(1, 0, q_stop, IAlnExplorer::eBackwards) + 3 - frame_offset) % 3; + } + } else { + frame_adj = ((*it)->GetSeqStop(1) + 3 - frame_offset) % 3; + } + annot->m_FrameInfo[1] = (q_mid + q_dir *q_dif)/2 - q_dir * frame_adj; } break; diff --git a/c++/src/algo/blast/unit_tests/api/magicblast_unit_test.cpp b/c++/src/algo/blast/unit_tests/api/magicblast_unit_test.cpp index b3f76ec2..7e8932f3 100644 --- a/c++/src/algo/blast/unit_tests/api/magicblast_unit_test.cpp +++ b/c++/src/algo/blast/unit_tests/api/magicblast_unit_test.cpp @@ -1,4 +1,4 @@ -/* $Id: magicblast_unit_test.cpp 551735 2017-11-21 19:36:12Z rackerst $ +/* $Id: magicblast_unit_test.cpp 580452 2019-02-14 18:01:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -191,7 +191,7 @@ BOOST_AUTO_TEST_CASE(MappingNoPairs) CMagicBlast magicblast(query_factory, db_adapter, m_OptHandle); CRef results = magicblast.Run(); - const size_t kExpectedNumResults = 5; + const size_t kExpectedNumResults = 4; BOOST_REQUIRE_EQUAL(results->Get().size(), kExpectedNumResults); SExon exon; @@ -276,21 +276,6 @@ BOOST_AUTO_TEST_CASE(MappingNoPairs) // HSP #4 results_idx++; - expected_hits[results_idx].score = 33; - expected_hits[results_idx].prod_length = 49; - - exon.prod_start = 0; - exon.prod_end = 32; - exon.gen_start = 1033340; - exon.gen_end = 1033372; - exon.prod_strand = eNa_strand_minus; - exon.gen_strand = eNa_strand_plus; - exon.acceptor = ""; - exon.donor = "CT"; - expected_hits[results_idx].exons.push_back(exon); - - // HSP #5 - results_idx++; expected_hits[results_idx].score = 49; expected_hits[results_idx].prod_length = 49; @@ -425,7 +410,7 @@ BOOST_AUTO_TEST_CASE(MappingPaired) exon.prod_strand = eNa_strand_minus; exon.gen_strand = eNa_strand_plus; exon.acceptor = ""; - exon.donor = "AG"; + exon.donor = ""; expected_hits[results_idx].exons.push_back(exon); // HSP #2 @@ -455,7 +440,7 @@ BOOST_AUTO_TEST_CASE(MappingPaired) exon.prod_strand = eNa_strand_minus; exon.gen_strand = eNa_strand_plus; exon.acceptor = ""; - exon.donor = "AG"; + exon.donor = ""; expected_hits[results_idx].exons.push_back(exon); // HSP #4 @@ -484,7 +469,7 @@ BOOST_AUTO_TEST_CASE(MappingPaired) exon.gen_end = 2443327; exon.prod_strand = eNa_strand_plus; exon.gen_strand = eNa_strand_plus; - exon.acceptor = "CT"; + exon.acceptor = ""; exon.donor = ""; expected_hits[results_idx].exons.push_back(exon); diff --git a/c++/src/algo/blast/unit_tests/api/magicblast_unit_test.ini b/c++/src/algo/blast/unit_tests/api/magicblast_unit_test.ini index cf7fad4c..ad102ecf 100644 --- a/c++/src/algo/blast/unit_tests/api/magicblast_unit_test.ini +++ b/c++/src/algo/blast/unit_tests/api/magicblast_unit_test.ini @@ -1,3 +1,3 @@ -; $Id: magicblast_unit_test.ini 516750 2016-10-17 19:01:43Z boratyng $ +; $Id: magicblast_unit_test.ini 577787 2019-01-08 18:29:04Z ivanov $ [UNITTESTS_DISABLE] GLOBAL = OS_Solaris | PLATFORM_Bits32 diff --git a/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp b/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp index a812f6a4..2ad7af0e 100644 --- a/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp +++ b/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp @@ -1,4 +1,4 @@ -/* $Id: version_reference_unit_test.cpp 574993 2018-11-21 15:38:26Z ivanov $ +/* $Id: version_reference_unit_test.cpp 581728 2019-03-05 16:37:30Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -43,8 +43,8 @@ BOOST_AUTO_TEST_SUITE(version_reference) BOOST_AUTO_TEST_CASE(testVersion) { const int kMajor = 2; - const int kMinor = 8; - const int kPatch = 1; + const int kMinor = 9; + const int kPatch = 0; blast::CBlastVersion v; BOOST_REQUIRE_EQUAL(kMajor, v.GetMajor()); BOOST_REQUIRE_EQUAL(kMinor, v.GetMinor()); diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/10_seqs_alias.nal b/c++/src/algo/blast/unit_tests/seqdb_reader/data/10_seqs_alias.nal new file mode 100644 index 00000000..019d8239 --- /dev/null +++ b/c++/src/algo/blast/unit_tests/seqdb_reader/data/10_seqs_alias.nal @@ -0,0 +1,2 @@ +DBLIST 15_seqs_v5 +GILIST 15_seqs.gil diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs.gil b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs.gil new file mode 100644 index 00000000..549f66ee Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs.gil differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.ndb b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.ndb new file mode 100644 index 00000000..19deaf8d Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.ndb differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nhr b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nhr new file mode 100644 index 00000000..ff7cb66b Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nhr differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nin b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nin new file mode 100644 index 00000000..089361e6 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nin differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nnd b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nnd new file mode 100644 index 00000000..5ffbec4a Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nnd differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nni b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nni new file mode 100644 index 00000000..3e572f70 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nni differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nog b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nog new file mode 100644 index 00000000..c304979d Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nog differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nos b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nos new file mode 100644 index 00000000..b31b1bb6 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nos differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.not b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.not new file mode 100644 index 00000000..4b76efd2 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.not differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nsq b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nsq new file mode 100644 index 00000000..ed62779d Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nsq differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.ntf b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.ntf new file mode 100644 index 00000000..f8fa0bb1 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.ntf differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nto b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nto new file mode 100644 index 00000000..6b13a3fc Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/15_seqs_v5.nto differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pdb b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pdb new file mode 100644 index 00000000..dd7893ab Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pdb differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.phr b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.phr new file mode 100644 index 00000000..1a79ca1c Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.phr differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pin b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pin new file mode 100644 index 00000000..8efbb13e Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pin differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pnd b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pnd new file mode 100644 index 00000000..60d8507b Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pnd differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pni b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pni new file mode 100644 index 00000000..f3107c5f Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pni differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pog b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pog new file mode 100644 index 00000000..226cd013 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pog differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pos b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pos new file mode 100644 index 00000000..2a50c1fe Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pos differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pot b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pot new file mode 100644 index 00000000..ada9e75b Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pot differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.ppd b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.ppd new file mode 100644 index 00000000..9c464fd1 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.ppd differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.ppi b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.ppi new file mode 100644 index 00000000..b2385d58 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.ppi differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.psq b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.psq new file mode 100644 index 00000000..21017758 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.psq differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.ptf b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.ptf new file mode 100644 index 00000000..0b76dd1c Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.ptf differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pto b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pto new file mode 100644 index 00000000..4a6c3a8f Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/ipg_test.pto differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/skip_vols_mix.nal b/c++/src/algo/blast/unit_tests/seqdb_reader/data/skip_vols_mix.nal new file mode 100644 index 00000000..518d0758 --- /dev/null +++ b/c++/src/algo/blast/unit_tests/seqdb_reader/data/skip_vols_mix.nal @@ -0,0 +1 @@ +DBLIST 10_seqs_alias vols_v5.01 diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/taxid_alias.nal b/c++/src/algo/blast/unit_tests/seqdb_reader/data/taxid_alias.nal new file mode 100644 index 00000000..c288969e --- /dev/null +++ b/c++/src/algo/blast/unit_tests/seqdb_reader/data/taxid_alias.nal @@ -0,0 +1,2 @@ +DBLIST 15_seqs_v5 +TAXIDLIST taxids.list diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/taxids.list b/c++/src/algo/blast/unit_tests/seqdb_reader/data/taxids.list new file mode 100644 index 00000000..1ee53a04 --- /dev/null +++ b/c++/src/algo/blast/unit_tests/seqdb_reader/data/taxids.list @@ -0,0 +1,2 @@ +10090 +10116 diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nhr b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nhr new file mode 100644 index 00000000..d078b7ef Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nhr differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nin b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nin new file mode 100644 index 00000000..3e3565e0 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nin differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nnd b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nnd new file mode 100644 index 00000000..c8ac4361 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nnd differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nni b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nni new file mode 100644 index 00000000..8cceb052 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nni differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nog b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nog new file mode 100644 index 00000000..29c64b07 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nog differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nsq b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nsq new file mode 100644 index 00000000..d9f6c34c Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.00.nsq differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nhr b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nhr new file mode 100644 index 00000000..f5559f3f Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nhr differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nin b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nin new file mode 100644 index 00000000..5e5ddc91 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nin differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nnd b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nnd new file mode 100644 index 00000000..26158d24 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nnd differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nni b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nni new file mode 100644 index 00000000..cce51345 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nni differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nog b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nog new file mode 100644 index 00000000..e10c531a Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nog differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nsq b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nsq new file mode 100644 index 00000000..03cb7c13 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.01.nsq differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.nal b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.nal new file mode 100644 index 00000000..a1cf0309 --- /dev/null +++ b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.nal @@ -0,0 +1 @@ +DBLIST vols_v5.00 vols_v5.01 diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.ndb b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.ndb new file mode 100644 index 00000000..91a17194 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.ndb differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.nos b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.nos new file mode 100644 index 00000000..2103ff6d Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.nos differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.not b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.not new file mode 100644 index 00000000..8b20b21a Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.not differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.ntf b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.ntf new file mode 100644 index 00000000..f4af1553 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.ntf differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.nto b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.nto new file mode 100644 index 00000000..77ee2510 Binary files /dev/null and b/c++/src/algo/blast/unit_tests/seqdb_reader/data/vols_v5.nto differ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/seqdb_lmdb_unit_test.cpp b/c++/src/algo/blast/unit_tests/seqdb_reader/seqdb_lmdb_unit_test.cpp index 701263f4..2cb6cd4f 100644 --- a/c++/src/algo/blast/unit_tests/seqdb_reader/seqdb_lmdb_unit_test.cpp +++ b/c++/src/algo/blast/unit_tests/seqdb_reader/seqdb_lmdb_unit_test.cpp @@ -189,7 +189,7 @@ BOOST_AUTO_TEST_CASE(Test_Mix_User_SeqIdList_AliasFile) for(blastdb::TOid i=0; db.CheckOrFindOID(i); i++) { found++; } - BOOST_REQUIRE_EQUAL(2, found); + BOOST_REQUIRE_EQUAL(1, found); } BOOST_AUTO_TEST_CASE(Test_Mix_Negative_User_SeqIdList) @@ -238,22 +238,43 @@ BOOST_AUTO_TEST_CASE(Test_Negative_UserSeqIdList_MultiDB) BOOST_AUTO_TEST_CASE(Test_Negative_SeqIdList_With_AliasFile) { - CRef list_file( new CSeqDBFileGiList( "data/alias.seqidlist.bsl", CSeqDBFileGiList::eSiList)); - CRef n_list(new CSeqDBNegativeList()); - n_list->SetListInfo(list_file->GetListInfo()); - vector sis; - list_file->GetSiList(sis); - n_list->ReserveSis(sis.size()); - ITERATE(vector, iter, sis) { - n_list->AddSi(*iter); + { + CRef list_file( new CSeqDBFileGiList( "data/alias.seqidlist.bsl", CSeqDBFileGiList::eSiList)); + CRef n_list(new CSeqDBNegativeList()); + n_list->SetListInfo(list_file->GetListInfo()); + vector sis; + list_file->GetSiList(sis); + n_list->ReserveSis(sis.size()); + ITERATE(vector, iter, sis) { + n_list->AddSi(*iter); + } + CSeqDB db("data/prot_alias_v5", CSeqDB::eProtein, n_list); + + int found = 0; + for(blastdb::TOid oid = 0; db.CheckOrFindOID(oid); oid++) { + found++; + } + BOOST_REQUIRE_EQUAL(0, found); } - CSeqDB db("data/prot_alias_v5", CSeqDB::eProtein, n_list); + { + CRef n_list(new CSeqDBNegativeList()); + const int num_of_sis = 3; + static const string sis[num_of_sis] = {"EAI92731.1", "ZP_00197753", "EAA62830.1"}; + struct SBlastSeqIdListInfo list_info; + list_info.is_v4 = false; + n_list->SetListInfo(list_info); + n_list->ReserveSis(num_of_sis); + for (unsigned int i=0; i < num_of_sis; i++) { + n_list->AddSi(sis[i]); + } + CSeqDB db("data/prot_alias_v5", CSeqDB::eProtein, n_list); - int found = 0; - for(blastdb::TOid oid = 0; db.CheckOrFindOID(oid); oid++) { - found++; - } - BOOST_REQUIRE_EQUAL(19, found); + int found = 0; + for(blastdb::TOid oid = 0; db.CheckOrFindOID(oid); oid++) { + found++; + } + BOOST_REQUIRE_EQUAL(52, found); + } } BOOST_AUTO_TEST_CASE(Test_Negative_Duplicate_SeqIdList_MultiDB) @@ -373,5 +394,94 @@ BOOST_AUTO_TEST_CASE(Test_TaxIdZero) } } +BOOST_AUTO_TEST_CASE(Test_GetTaxIdsForOids) +{ + { + set tax_ids; + CSeqDB db("data/15_seqs_v5", CSeqDB::eNucleotide); + db.GetDBTaxIds(tax_ids); + BOOST_REQUIRE_EQUAL(tax_ids.size(), 4); + } + { + set tax_ids; + CSeqDB db("data/10_seqs_alias", CSeqDB::eNucleotide); + db.GetDBTaxIds(tax_ids); + BOOST_REQUIRE_EQUAL(tax_ids.size(), 2); + } + { + set tax_ids; + CSeqDB db("data/skip_vols_mix", CSeqDB::eNucleotide); + db.GetDBTaxIds(tax_ids); + BOOST_REQUIRE_EQUAL(tax_ids.size(), 3); + } +} + +BOOST_AUTO_TEST_CASE(Test_AliasFileTaxIdsList) +{ + { + CSeqDB db("data/taxid_alias", CSeqDB::eNucleotide); + int found = 0; + for(blastdb::TOid oid = 0; db.CheckOrFindOID(oid); oid++) { + found++; + } + BOOST_REQUIRE_EQUAL(10, found); + } + + { + CSeqDB db("data/taxid_alias data/vols_v5", CSeqDB::eNucleotide); + int found = 0; + for(blastdb::TOid oid = 0; db.CheckOrFindOID(oid); oid++) { + found++; + } + BOOST_REQUIRE_EQUAL(19, found); + } + { + set tax_ids; + tax_ids.insert(10116); + CRef taxid_list(new CSeqDBGiList()); + taxid_list->AddTaxIds(tax_ids); + CSeqDB db("data/taxid_alias", CSeqDB::eNucleotide, taxid_list.GetPointer()); + int found = 0; + for(blastdb::TOid oid = 0; db.CheckOrFindOID(oid); oid++) { + found++; + } + BOOST_REQUIRE_EQUAL(4, found); + } + { + set tax_ids; + tax_ids.insert(10116); + CRef taxid_list(new CSeqDBNegativeList()); + taxid_list->AddTaxIds(tax_ids); + CSeqDB db("data/taxid_alias", CSeqDB::eNucleotide, taxid_list.GetPointer()); + int found = 0; + for(blastdb::TOid oid = 0; db.CheckOrFindOID(oid); oid++) { + found++; + } + BOOST_REQUIRE_EQUAL(6, found); + } +} + +BOOST_AUTO_TEST_CASE(Test_SeqIdList_With_AliasFile) +{ + { + CRef n_list(new CSeqDBGiList()); + const int num_of_sis = 3; + static const string sis[num_of_sis] = {"EAI92731", "ZP_00197753", "EAA62830"}; + struct SBlastSeqIdListInfo list_info; + list_info.is_v4 = false; + n_list->SetListInfo(list_info); + n_list->ReserveSis(num_of_sis); + for (unsigned int i=0; i < num_of_sis; i++) { + n_list->AddSi(sis[i]); + } + CSeqDB db("data/prot_alias_v5", CSeqDB::eProtein, n_list); + + int found = 0; + for(blastdb::TOid oid = 0; db.CheckOrFindOID(oid); oid++) { + found++; + } + BOOST_REQUIRE_EQUAL(3, found); + } +} BOOST_AUTO_TEST_SUITE_END() #endif /* SKIP_DOXYGEN_PROCESSING */ diff --git a/c++/src/algo/blast/unit_tests/seqdb_reader/seqdb_unit_test.cpp b/c++/src/algo/blast/unit_tests/seqdb_reader/seqdb_unit_test.cpp index abeae652..cdbfc435 100644 --- a/c++/src/algo/blast/unit_tests/seqdb_reader/seqdb_unit_test.cpp +++ b/c++/src/algo/blast/unit_tests/seqdb_reader/seqdb_unit_test.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdb_unit_test.cpp 553487 2017-12-18 14:23:38Z fongah2 $ +/* $Id: seqdb_unit_test.cpp 582148 2019-03-11 18:02:00Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -956,7 +956,7 @@ BOOST_AUTO_TEST_CASE(StringIdentSearch) const char * s1[] = { "gi|129295", "sp|P01013|OVAX_CHICK", 0 }; const char * s2[] = - { "gi|433552084", "pdb|1NPQ|A", "1NPQ", "1npqA", + { "gi|433552084", "pdb|1NPQ|A", "1NPQ", "1npq_A", "gi|433552085", "pdb|1NPQ|B", "1npq", 0 }; const char * s3[] = { "1NPQ", 0 }; @@ -4440,6 +4440,154 @@ BOOST_AUTO_TEST_CASE(Test_Mix_User_SeqIdList_AliasFile) BOOST_REQUIRE_EQUAL(2, found); } +BOOST_AUTO_TEST_CASE(PigListSwissprot) +{ + // 2 is not founc in swissprot + const unsigned int num_pigs = 5; + const int pigs[num_pigs] = {4377482, 1287445, 2, 6066974, 5303747}; + const unsigned int num_valid_pig = 4; + + CRef pig_list(new CSeqDBGiList()); + CRef neg_pig_list(new CSeqDBNegativeList()); + + for (unsigned int i =0; i < num_pigs; i++) { + pig_list->AddPig(pigs[i]); + } + + vector p; + pig_list->GetPigList(p); + neg_pig_list->SetPigList(p); + + string db_name = "swissprot"; + + CSeqDB db(db_name, CSeqDB::eProtein); + CSeqDB pig_db(db_name, CSeqDB::eProtein, &* pig_list); + CSeqDB negative_pig_db(db_name, CSeqDB::eProtein, &* neg_pig_list); + + int total_num_seqs = db.GetNumSeqs(); + BOOST_REQUIRE_EQUAL(pig_db.GetNumSeqs(), 4); + BOOST_REQUIRE_EQUAL(negative_pig_db.GetNumSeqs(), (int) (total_num_seqs - num_valid_pig)); + + vector seq_ids; + for(int oid=0; pig_db.CheckOrFindOID(oid); oid++) { + int oid_found = -1; + list< CRef > ids = pig_db.GetSeqIDs(oid); + db.SeqidToOid(*(ids.front()), oid_found); + seq_ids.push_back(ids.front()->GetSeqIdString()); + BOOST_REQUIRE_EQUAL(oid_found, oid); + } + BOOST_REQUIRE_EQUAL(seq_ids.size(), num_valid_pig); + + for(unsigned int i=0; i < seq_ids.size(); i ++){ + vector not_found; + negative_pig_db.AccessionToOids(seq_ids[i], not_found); + BOOST_REQUIRE_EQUAL(not_found.size(), (unsigned int) 0); + } + +} + +BOOST_AUTO_TEST_CASE(CombinedFilters) +{ + // 2 is not founc in swissprot + const unsigned int num_pigs = 5; + const int pigs[num_pigs] = {2, 355704, 863725, 1727116, 24036443}; + string db_name = "data/ipg_test"; + + { + CRef pos_list(new CSeqDBGiList()); + CRef neg_list(new CSeqDBNegativeList()); + + for (unsigned int i =0; i < num_pigs; i++) { + pos_list->AddPig(pigs[i]); + } + + set t; + t.insert(9606); + t.insert(83333); + neg_list->AddTaxIds(t); + + CSeqDB db(db_name, CSeqDB::eProtein, &*pos_list, &* neg_list); + + int total_num_seqs = db.GetNumSeqs(); + BOOST_REQUIRE_EQUAL(total_num_seqs, 1); + + const int check_oids[1] = {12}; + for(int oid=0, c=0; db.CheckOrFindOID(oid); oid++, c++) + BOOST_REQUIRE_EQUAL(oid, check_oids[c]); + } + + { + CRef pos_list(new CSeqDBGiList()); + for (unsigned int i =0; i < num_pigs; i++) { + pos_list->AddPig(pigs[i]); + } + + set t; + t.insert(9606); + t.insert(83333); + pos_list->AddTaxIds(t); + + CSeqDB db(db_name, CSeqDB::eProtein, &*pos_list); + + int total_num_seqs = db.GetNumSeqs(); + BOOST_REQUIRE_EQUAL(total_num_seqs, 3); + + const int check_oids[3] = {2, 6, 8}; + for(int oid=0, c=0; db.CheckOrFindOID(oid); oid++, c++) { + BOOST_REQUIRE_EQUAL(oid, check_oids[c]); + } + } + { + CRef pos_list(new CSeqDBGiList()); + CRef neg_list(new CSeqDBNegativeList()); + + vector p; + for (unsigned int i =0; i < num_pigs; i++) { + p.push_back(pigs[i]); + } + neg_list->SetPigList(p); + + set t; + t.insert(9606); + t.insert(83333); + pos_list->AddTaxIds(t); + + CSeqDB db(db_name, CSeqDB::eProtein, &*pos_list, &* neg_list); + + int total_num_seqs = db.GetNumSeqs(); + BOOST_REQUIRE_EQUAL(total_num_seqs, 5); + + const int check_oids[5] = {0, 1, 3, 5, 7}; + for(int oid=0, c=0; db.CheckOrFindOID(oid); oid++, c++) { + BOOST_REQUIRE_EQUAL(oid, check_oids[c]); + } + } + + { + CRef pos_list(new CSeqDBGiList()); + CRef neg_list(new CSeqDBNegativeList()); + + vector p; + for (unsigned int i =0; i < num_pigs; i++) { + p.push_back(pigs[i]); + } + neg_list->SetPigList(p); + + set t; + t.insert(9606); + t.insert(83333); + pos_list->AddTaxIds(t); + + CSeqDB db(db_name, CSeqDB::eProtein, 1, 4, &*pos_list, &* neg_list); + + const int check_oids[2] = {1, 3 }; + for(int oid=0, c=0; db.CheckOrFindOID(oid); oid++, c++) { + BOOST_REQUIRE_EQUAL(oid, check_oids[c]); + } + } + + +} BOOST_AUTO_TEST_SUITE_END() #endif /* SKIP_DOXYGEN_PROCESSING */ diff --git a/c++/src/app/blast/Makefile.blast_report.app b/c++/src/app/blast/Makefile.blast_report.app new file mode 100755 index 00000000..f4d14583 --- /dev/null +++ b/c++/src/app/blast/Makefile.blast_report.app @@ -0,0 +1,16 @@ +WATCHERS = camacho madden fongah2 + +APP = blast_report +SRC = blast_report +LIB_ = $(BLAST_INPUT_LIBS) $(BLAST_LIBS) xregexp $(PCRE_LIB) $(OBJMGR_LIBS) +LIB = blast_app_util $(LIB_:%=%$(STATIC)) + +# De-universalize Mac builds to work around a PPC toolchain limitation +CFLAGS = $(FAST_CFLAGS:ppc=i386) +CXXFLAGS = $(FAST_CXXFLAGS:ppc=i386) +LDFLAGS = $(FAST_LDFLAGS:ppc=i386) + +CPPFLAGS = -DNCBI_MODULE=BLASTFORMAT $(ORIG_CPPFLAGS) $(BLAST_THIRD_PARTY_INCLUDE) +LIBS = $(BLAST_THIRD_PARTY_LIBS) $(CMPRS_LIBS) $(DL_LIBS) $(NETWORK_LIBS) $(ORIG_LIBS) + +REQUIRES = objects -Cygwin diff --git a/c++/src/app/blast/Makefile.in b/c++/src/app/blast/Makefile.in index f44e5df6..3cbaf5c1 100644 --- a/c++/src/app/blast/Makefile.in +++ b/c++/src/app/blast/Makefile.in @@ -1,4 +1,4 @@ -# $Id: Makefile.in 556928 2018-02-06 19:36:31Z camacho $ +# $Id: Makefile.in 577730 2019-01-08 18:01:26Z ivanov $ # Meta-makefile("APP" project) ################################# @@ -17,6 +17,7 @@ psiblast \ rpsblast \ rpstblastn \ blast_formatter \ +blast_report \ deltablast \ seedtop @@ -51,6 +52,8 @@ igblastp: lib ${MAKE} ${MFLAGS} -f Makefile.igblastp_app blast_formatter: lib ${MAKE} ${MFLAGS} -f Makefile.blast_formatter_app +blast_report: lib + ${MAKE} ${MFLAGS} -f Makefile.blast_report_app seedtop: lib ${MAKE} ${MFLAGS} -f Makefile.seedtop_app deltablast: lib diff --git a/c++/src/app/blast/blast_formatter.cpp b/c++/src/app/blast/blast_formatter.cpp index 4a279887..3a6af497 100644 --- a/c++/src/app/blast/blast_formatter.cpp +++ b/c++/src/app/blast/blast_formatter.cpp @@ -1,4 +1,4 @@ -/* $Id: blast_formatter.cpp 509107 2016-08-03 14:09:45Z fongah2 $ +/* $Id: blast_formatter.cpp 577764 2019-01-08 18:10:59Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -318,7 +318,10 @@ int CBlastFormatterApp::PrintFormattedOutput(void) opts.GetMBIndexLoaded(), NULL, NULL, GetCmdlineArgs(GetArguments())); - formatter.SetLineLength(fmt_args.GetLineLength()); + formatter.SetLineLength(fmt_args.GetLineLength()); + formatter.SetHitsSortOption(fmt_args.GetHitsSortOption()); + formatter.SetHspsSortOption(fmt_args.GetHspsSortOption()); + formatter.SetCustomDelimiter(fmt_args.GetCustomDelimiter()); if(UseXInclude(fmt_args, args[kArgOutput].AsString())) { formatter.SetBaseFile(args[kArgOutput].AsString()); } diff --git a/c++/src/app/blast/blast_report.cpp b/c++/src/app/blast/blast_report.cpp new file mode 100755 index 00000000..d8896282 --- /dev/null +++ b/c++/src/app/blast/blast_report.cpp @@ -0,0 +1,401 @@ +/* $Id: blast_report.cpp 577733 2019-01-08 18:02:13Z ivanov $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Author: Irena Zaretskaya + * + */ + +/** @file blast_report.cpp + * Stand-alone command line HTML report for BLAST. Uses tempalates for descriptions and alignments. Outputs metadata as json + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "blast_app_util.hpp" + + +#ifndef SKIP_DOXYGEN_PROCESSING +USING_NCBI_SCOPE; +USING_SCOPE(blast); +#endif + + +#define EXIT_CODE__FORMAT_SUCCESS 0 +#define EXIT_CODE__NO_RESULTS_FOUND 1 +#define EXIT_CODE__INVALID_INPUT_FORMAT 2 +#define EXIT_CODE__BLAST_ARCHIVE_ERROR 3 +#define EXIT_CODE__CANNOT_ACCESS_FILE 4 +#define EXIT_CODE__QUERY_INDEX_INVALID 5 +#define EXIT_CODE__NETWORK_CONNECTION_ERROR 5 + +class CBlastReportApp : public CNcbiApplication +{ +public: + /** @inheritDoc */ + CBlastReportApp() { + CRef version(new CVersion()); + version->SetVersionInfo(new CBlastVersion()); + SetFullVersion(version); + m_LoadFromArchive = false; + } +private: + /** @inheritDoc */ + virtual void Init(); + /** @inheritDoc */ + virtual int Run(); + + /// Prints the BLAST formatted output + int PrintFormattedOutput(void); + + /// Extracts the queries to be formatted + /// @param query_is_protein Are the queries protein sequences? [in] + CRef x_ExtractQueries(bool query_is_protein); + + /// Build the query from a PSSM + /// @param pssm PSSM to inspect [in] + CRef + x_BuildQueryFromPssm(const CPssmWithParameters& pssm); + + /// Package a scope and Seq-loc into a SSeqLoc from a Bioseq + /// @param bioseq Bioseq to inspect [in] + /// @param scope Scope object to add the sequence data to [in|out] + SSeqLoc x_QueryBioseqToSSeqLoc(const CBioseq& bioseq, CRef scope); + + /// Our link to the NCBI BLAST service + CRef m_RmtBlast; + + /// The source of CScope objects for queries + CRef m_QueryScopeSource; + + /// Tracks whether results come from an archive file. + bool m_LoadFromArchive; +}; + +void CBlastReportApp::Init() +{ + HideStdArgs(fHideLogfile | fHideConffile | fHideFullVersion | fHideXmlHelp | fHideDryRun); + + auto_ptr arg_desc(new CArgDescriptions); + + arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), + "Stand-alone BLAST formatter client, version " + + CBlastVersion().Print()); + + arg_desc->SetCurrentGroup("Input options"); + + + // add input file for seq-align here? + arg_desc->AddKey(kArgArchive, "ArchiveFile", "File containing BLAST Archive format in ASN.1 (i.e.: output format 11)", + CArgDescriptions::eInputFile); + + arg_desc->AddDefaultKey(kArgAlignSeqList, "alignseqlist", "List of comma separated seqids to display", CArgDescriptions::eString, ""); + arg_desc->AddDefaultKey(kArgMetadata, "searchmetadata", "Search Metadata indicator", CArgDescriptions::eBoolean,"f"); + arg_desc->AddDefaultKey(kArgQueryIndex, "queryindex", "Query Index", CArgDescriptions::eInteger, "0"); + + + CFormattingArgs fmt_args; + fmt_args.SetArgumentDescriptions(*arg_desc); + + arg_desc->SetCurrentGroup("Output configuration options"); + arg_desc->AddDefaultKey(kArgOutput, "output_file", "Output file name", + CArgDescriptions::eOutputFile, "-"); + + arg_desc->SetCurrentGroup("Miscellaneous options"); + arg_desc->AddFlag(kArgParseDeflines, + "Should the query and subject defline(s) be parsed?", true); + arg_desc->SetCurrentGroup(""); + + CDebugArgs debug_args; + debug_args.SetArgumentDescriptions(*arg_desc); + + SetupArgDescriptions(arg_desc.release()); +} + +SSeqLoc +CBlastReportApp::x_QueryBioseqToSSeqLoc(const CBioseq& bioseq, + CRef scope) +{ + static bool first_time = true; + _ASSERT(scope); + + if ( !HasRawSequenceData(bioseq) && first_time ) { + _ASSERT(m_QueryScopeSource); + m_QueryScopeSource->AddDataLoaders(scope); + first_time = false; + } + else { + scope->AddBioseq(bioseq); + } + CRef seqloc(new CSeq_loc); + seqloc->SetWhole().Assign(*bioseq.GetFirstId()); + return SSeqLoc(seqloc, scope); +} + +CRef +CBlastReportApp::x_BuildQueryFromPssm(const CPssmWithParameters& pssm) +{ + if ( !pssm.HasQuery() ) { + throw runtime_error("PSSM has no query"); + } + CRef scope(new CScope(*CObjectManager::GetInstance())); + const CSeq_entry& seq_entry = pssm.GetQuery(); + if ( !seq_entry.IsSeq() ) { + throw runtime_error("Cannot have multiple queries in a PSSM"); + } + SSeqLoc ssl = x_QueryBioseqToSSeqLoc(seq_entry.GetSeq(), scope); + CRef retval; + retval.Reset(new CBlastSearchQuery(*ssl.seqloc, *ssl.scope)); + _ASSERT(ssl.scope.GetPointer() == scope.GetPointer()); + return retval; +} + +CRef +CBlastReportApp::x_ExtractQueries(bool query_is_protein) +{ + CRef b4_queries = m_RmtBlast->GetQueries(); + _ASSERT(b4_queries); + const size_t kNumQueries = b4_queries->GetNumQueries(); + + CRef retval(new CBlastQueryVector); + + SDataLoaderConfig dlconfig(query_is_protein, SDataLoaderConfig::eUseNoDataLoaders); + dlconfig.OptimizeForWholeLargeSequenceRetrieval(false); + m_QueryScopeSource.Reset(new CBlastScopeSource(dlconfig)); + + if (b4_queries->IsPssm()) { + retval->AddQuery(x_BuildQueryFromPssm(b4_queries->GetPssm())); + } else if (b4_queries->IsSeq_loc_list()) { + CRef scope = m_QueryScopeSource->NewScope(); + ITERATE(CBlast4_queries::TSeq_loc_list, seqloc, + b4_queries->GetSeq_loc_list()) { + _ASSERT( !(*seqloc)->GetId()->IsLocal() ); + CRef query(new CBlastSearchQuery(**seqloc, + *scope)); + retval->AddQuery(query); + } + } else if (b4_queries->IsBioseq_set()) { + CTypeConstIterator itr(ConstBegin(b4_queries->GetBioseq_set(), + eDetectLoops)); + CRef scope(new CScope(*CObjectManager::GetInstance())); + for (; itr; ++itr) { + SSeqLoc ssl = x_QueryBioseqToSSeqLoc(*itr, scope); + CRef query(new CBlastSearchQuery(*ssl.seqloc, + *ssl.scope)); + retval->AddQuery(query); + } + } + + (void)kNumQueries; // eliminate compiler warning; + _ASSERT(kNumQueries == retval->size()); + return retval; +} + +static int s_GetError(string errorName, string defaultMessage, int defaultErrCode, string &errorMsg,string blastArchName = "") +{ + CNcbiApplication* app = CNcbiApplication::Instance(); + string message = defaultMessage; + int status = 0; + if (app) { + const CNcbiRegistry& registry = app->GetConfig(); + string errorCode; + string errorInfo = registry.Get("Errors", errorName); + if(!errorInfo.empty()) { + NStr::SplitInTwo(errorInfo, ":", errorCode, message); + status = NStr::StringToInt(errorCode,NStr::fConvErr_NoThrow); + message = NStr::Replace(message,"#filename",blastArchName); + } + } + if(!status || message.empty()) { + errorMsg = defaultMessage; + status = defaultErrCode; + } + else { + errorMsg = message; + } + return status; +} + + + +int CBlastReportApp::PrintFormattedOutput(void) +{ + int retval = EXIT_CODE__FORMAT_SUCCESS; + const CArgs& args = GetArgs(); + + + CNcbiOstream& out = args[kArgOutput].AsOutputFile(); + CFormattingArgs fmt_args; + + string alignSeqList = args[kArgAlignSeqList].HasValue() ? args[kArgAlignSeqList].AsString() : kEmptyStr; + bool searchMetadata = args[kArgMetadata].HasValue() ? args[kArgMetadata].AsBoolean() : false; + unsigned int queryIndex = args[kArgQueryIndex].HasValue() ? args[kArgQueryIndex].AsInteger() : 0; + + + + CRef opts_handle = m_RmtBlast->GetSearchOptions(); + CBlastOptions& opts = opts_handle->SetOptions(); + fmt_args.ExtractAlgorithmOptions(args, opts); + {{ + CDebugArgs debug_args; + debug_args.ExtractAlgorithmOptions(args, opts); + if (debug_args.ProduceDebugOutput()) { + opts.DebugDumpText(NcbiCerr, "BLAST options", 1); + } + }} + + + const EBlastProgramType p = opts.GetProgramType(); + + CRef queries = x_ExtractQueries(Blast_QueryIsProtein(p)?true:false); + CRef scope = queries->GetScope(0); + _ASSERT(queries); + + CRef db_args(new CBlastDatabaseArgs()); // FIXME, what about rpsblast? + int filtering_algorithm = -1; + if (m_RmtBlast->IsDbSearch()) + { + CRef db = m_RmtBlast->GetDatabases(); + _ASSERT(db); + _TRACE("Fetching results for " + Blast_ProgramNameFromType(p) + " on " + + db->GetName()); + filtering_algorithm = m_RmtBlast->GetDbFilteringAlgorithmId(); + CRef search_db(new CSearchDatabase(db->GetName(), db->IsProtein() + ? CSearchDatabase::eBlastDbIsProtein + : CSearchDatabase::eBlastDbIsNucleotide)); + db_args->SetSearchDatabase(search_db); + } + + CRef db_adapter; + InitializeSubject(db_args, opts_handle, true, db_adapter, scope); + + + CBlastFormat formatter(opts, *db_adapter, + fmt_args.GetFormattedOutputChoice(), + static_cast(args[kArgParseDeflines]), + out, + fmt_args.GetNumDescriptions(), + fmt_args.GetNumAlignments(), + *scope, + opts.GetMatrixName(), + fmt_args.ShowGis(), + fmt_args.DisplayHtmlOutput(), + opts.GetQueryGeneticCode(), + opts.GetDbGeneticCode(), + opts.GetSumStatisticsMode(), + false, + filtering_algorithm); + + formatter.SetLineLength(fmt_args.GetLineLength()); + formatter.SetAlignSeqList(alignSeqList); + + CRef results = m_RmtBlast->GetResultSet(); + + + try { + if(queryIndex > results->GetNumQueries() - 1) { + string msg; + retval = s_GetError("InvalidQueryIndex", "Invalid query index.", EXIT_CODE__QUERY_INDEX_INVALID, msg); + NCBI_THROW(CInputException, eInvalidInput,msg); + } + + bool hasAlignments = (*results)[queryIndex].HasAlignments(); + //BlastFormatter_PreFetchSequenceData(*results, scope, fmt_args.GetFormattedOutputChoice());//*****Do we need to do this here??? + CBlastFormat::DisplayOption displayOption; + if(searchMetadata) { + displayOption = CBlastFormat::eMetadata; + } + else if(!alignSeqList.empty()){ + displayOption = CBlastFormat::eAlignments; + } + else { + displayOption = CBlastFormat::eDescriptions; + } + if(hasAlignments || displayOption == CBlastFormat::eMetadata) { + formatter.PrintReport((*results)[queryIndex], displayOption); + } + if(!hasAlignments) { + retval = EXIT_CODE__NO_RESULTS_FOUND; + } + }catch (const CException & e) { + cerr << e.GetMsg() << endl; + } + + return retval; +} + + + +int CBlastReportApp::Run(void) +{ + int status = EXIT_CODE__FORMAT_SUCCESS; + const CArgs& args = GetArgs(); + string msg; + try { + SetDiagPostLevel(eDiag_Warning); + if (args[kArgArchive].HasValue()) { + CNcbiIstream& istr = args[kArgArchive].AsInputFile(); + m_RmtBlast.Reset(new CRemoteBlast(istr)); + if (m_RmtBlast->LoadFromArchive()) { + if(!m_RmtBlast->IsErrMsgArchive()) { + status = PrintFormattedOutput(); + return status; + } + else { + status = s_GetError("NetConError", "Network connection error", EXIT_CODE__NETWORK_CONNECTION_ERROR, msg); + } + } + } + } + catch (const CSerialException& e) { + status = s_GetError("InvailInputFormat", "Invalid input format for BLAST Archive.", EXIT_CODE__INVALID_INPUT_FORMAT, msg); + } + catch (const CException& e) { + if (e.GetErrCode() == CBlastException::eInvalidArgument) { + status = s_GetError("ErrorBlastArchive", "Error processing BLAST Archive.", EXIT_CODE__BLAST_ARCHIVE_ERROR, msg); + } + else { + status = s_GetError("ErrorAccessingFile", e.GetMsg(), EXIT_CODE__CANNOT_ACCESS_FILE, msg,args[kArgArchive].AsString()); + } + } + + //cerr << "****retval:" << status << endl; + cerr << msg << endl; + return status; +} + + +#ifndef SKIP_DOXYGEN_PROCESSING +int main(int argc, const char* argv[] /*, const char* envp[]*/) +{ + return CBlastReportApp().AppMain(argc, argv); +} +#endif /* SKIP_DOXYGEN_PROCESSING */ diff --git a/c++/src/app/blast/blast_report.ini b/c++/src/app/blast/blast_report.ini new file mode 100755 index 00000000..5d436dd1 --- /dev/null +++ b/c++/src/app/blast/blast_report.ini @@ -0,0 +1,140 @@ +[Templates] +DFL_TABLE_ROW = \" ind=\"<@dfln_blast_rank@>\" class=\"<@trtp@> dflLnk\" > \ +<@dfln_blast_rank@>\" class=\"cb\" name=\"getSeqGi\" value=\"<@dfln_id@>\" onclick=\"configDescrLinks(event,this)\" /> \ + \ +<@score_info@><@total_bit_string@><@percent_coverage@>%<@evalue_string@><@percent_identity@>% \ +<@seq_info@> +DFL_TABLE_SCORE_INFO = <@bit_string@> +DFL_TABLE_SEQ_INFO = \" class=\"dflSeq\" target=\"lnk<@dfln_rid@>\" title=\"Show report for <@dfln_seqid@>\"><@dfln_gi@><@dfln_seqid@> + +BLAST_ALIGN_HEADER = \ +
\" seqID=\"<@alnSeqGi@>\" accs=\"<@firstSeqID@>\"> \ +\"> \ +\" data-jig=\"ncbipopper\" class=\"toolsCtr dnld dialog\" data-jigconfig=\"hasArrow:'true',arrowDirection:'top',closeEvent: 'click', openEvent: 'click', destPosition: 'top center', triggerPosition: 'bottom right'\" id=\"alnDwnld_<@alnSeqGi@>\" title=\"Download information for <@firstSeqID@>\">Download \ +
\" class=\"popDl\"> \ +
\" checked=\"checked\" id=\"dwFST_<@alnSeqGi@>\" fstaDWType=\"complete\" sbFunc=\"initDownLoadFSTA\" />
\ +
\" id=\"dwFSTAl_<@alnSeqGi@>\" fstaDWType=\"range\" sbFunc=\"initDownLoadFSTA\" />
\ +
\">\" id=\"dwGenBank_<@alnSeqGi@>\" getGi=\"<@alnSeqGi@>\" sbName=\"id\" url=\"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?rettype=gb&retmode=text\" />
\ +
\ + \ + \ +
\ +
\ +\"><@alnCustomLinks@> \ +<@sortInfo@> \ + \ +&currseq=<@alnSeqGi@>\" id=\"alnNxt_<@alnSeqGi@>\" onclick=\"goToNextAlign('dtr_<@alnSeqGi@>',true);\">Next \ +&currseq=<@alnSeqGi@>\" id=\"alnPrv_<@alnSeqGi@>\" onclick=\"goToNextAlign('dtr_<@alnSeqGi@>',false);\">Previous \ +\" id=\"alnDsc_<@alnSeqGi@>\" onclick=\"goToDefLine('dtr_<@alnSeqGi@>');\" href=\"#dtr_<@alnSeqGi@>\">Descriptions \ + \ +
\ +
\"> \ +
\"> \ +<@aln_deflines@> \ +
\ +
\"><@alnLinkOutLinks@>
\ +
\"> + + +BLAST_ALIGN_PARAMS = \ + + +ALIGN_PARAMS_NUC = \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ +
Alignment statistics for match #<@aln_curr_num@>
ScoreExpectIdentitiesGapsStrand\">Frame
<@aln_score@> bits(<@aln_score_bits@>)<@aln_eval@>\">(<@aln_sumN@>)<@aln_match@>(<@aln_ident@>%)<@aln_gaps@>(<@aln_gaps_prc@>%)<@aln_strand@>\"><@aln_frame@>
+ + +ALIGN_PARAMS_PROT = \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ + \ +
Alignment statistics for match #<@aln_curr_num@>
ScoreExpect\">MethodIdentitiesPositivesGaps\">Frame
<@aln_score@> bits(<@aln_score_bits@>)<@aln_eval@>\">(<@aln_sumN@>)\"><@aln_meth@><@aln_match@>(<@aln_ident@>%)<@aln_pos@>(<@aln_pos_prc@>%)<@aln_gaps@>(<@aln_gaps_prc@>%)\"><@aln_frame@>
+ + +SORT_ALIGNS_SEQ = \ + + +ALN_FEATURES = <@aln_feat_info@> + +ALN_FEATURES_LINK = \"><@aln_feat@> + +ALN_DEFLINE_ROW = \ +
\ +<@alnTitle@> \ +
<@seq_info@> r\"><@alnSeqLength@><@alnHspNum@>
\ +
+ +ALN_DEFLINE_TITLES_LNK =

\" >See <@titleNum@> more title(s)

+ +ALN_DEFLINE_TITLES = \ +
\ +
\ +<@seqTitles@> \ +
\ +
+ +ALN_DEFLINE_SEQ_INFO = \" target=\"lnk<@aln_rid@>\" title=\"Show report for <@aln_seqid@>\"><@aln_gi@><@aln_seqid@> + +BLAST_ALIGN_ROWS = \ +
_<@aln_curr_num@>\"> \ +
 \
+<@align_rows@> \
+
\ +
+ +BLAST_ALIGN_ROWS_LST = \ +
_<@aln_curr_num@>\"> \ +
 \
+<@align_rows@> \
+
\ +
\ +
\ +
+ +[Errors] +NoResultsFound=1:No results found. +InvailInputFormat=2:Invalid input format for BLAST Archive. +ErrorBlastArchive=3:Error processing BLAST Archive. +ErrorAccessingFile=4:Error accesing blast archive #filename. +InvalidQueryIndex=5:Invalid query index. +NetConError=6:Network connection error. diff --git a/c++/src/app/blast/blastn_app.cpp b/c++/src/app/blast/blastn_app.cpp index d093b037..003855db 100644 --- a/c++/src/app/blast/blastn_app.cpp +++ b/c++/src/app/blast/blastn_app.cpp @@ -1,4 +1,4 @@ -/* $Id: blastn_app.cpp 571362 2018-09-25 16:52:58Z ivanov $ +/* $Id: blastn_app.cpp 577771 2019-01-08 18:12:51Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -161,6 +161,9 @@ int CBlastnApp::Run(void) formatter.SetQueryRange(query_opts->GetRange()); formatter.SetLineLength(fmt_args->GetLineLength()); + formatter.SetHitsSortOption(fmt_args->GetHitsSortOption()); + formatter.SetHspsSortOption(fmt_args->GetHspsSortOption()); + formatter.SetCustomDelimiter(fmt_args->GetCustomDelimiter()); if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) { formatter.SetBaseFile(args[kArgOutput].AsString()); } diff --git a/c++/src/app/blast/blastp_app.cpp b/c++/src/app/blast/blastp_app.cpp index 50c535a2..fe0e5359 100644 --- a/c++/src/app/blast/blastp_app.cpp +++ b/c++/src/app/blast/blastp_app.cpp @@ -1,4 +1,4 @@ -/* $Id: blastp_app.cpp 571365 2018-09-25 16:53:46Z ivanov $ +/* $Id: blastp_app.cpp 577771 2019-01-08 18:12:51Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -154,7 +154,10 @@ int CBlastpApp::Run(void) formatter.SetQueryRange(query_opts->GetRange()); formatter.SetLineLength(fmt_args->GetLineLength()); - if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) { + formatter.SetHitsSortOption(fmt_args->GetHitsSortOption()); + formatter.SetHspsSortOption(fmt_args->GetHspsSortOption()); + formatter.SetCustomDelimiter(fmt_args->GetCustomDelimiter()); + if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) { formatter.SetBaseFile(args[kArgOutput].AsString()); } formatter.PrintProlog(); diff --git a/c++/src/app/blast/blastx_app.cpp b/c++/src/app/blast/blastx_app.cpp index ea007b2d..b2f30c8b 100644 --- a/c++/src/app/blast/blastx_app.cpp +++ b/c++/src/app/blast/blastx_app.cpp @@ -1,4 +1,4 @@ -/* $Id: blastx_app.cpp 571365 2018-09-25 16:53:46Z ivanov $ +/* $Id: blastx_app.cpp 577771 2019-01-08 18:12:51Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -154,7 +154,10 @@ int CBlastxApp::Run(void) formatter.SetQueryRange(query_opts->GetRange()); formatter.SetLineLength(fmt_args->GetLineLength()); - if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) { + formatter.SetHitsSortOption(fmt_args->GetHitsSortOption()); + formatter.SetHspsSortOption(fmt_args->GetHspsSortOption()); + formatter.SetCustomDelimiter(fmt_args->GetCustomDelimiter()); + if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) { formatter.SetBaseFile(args[kArgOutput].AsString()); } formatter.PrintProlog(); diff --git a/c++/src/app/blast/get_species_taxids.sh b/c++/src/app/blast/get_species_taxids.sh index f7b938b3..d24a0799 100755 --- a/c++/src/app/blast/get_species_taxids.sh +++ b/c++/src/app/blast/get_species_taxids.sh @@ -1,5 +1,5 @@ #!/bin/bash -# $Id: get_species_taxids.sh 559986 2018-03-16 15:55:09Z fongah2 $ +# $Id: get_species_taxids.sh 581766 2019-03-05 17:46:45Z ivanov $ # =========================================================================== # # PUBLIC DOMAIN NOTICE @@ -56,6 +56,14 @@ error_exit() { exit $exit_code; } +check_deps() { + for app in esearch efetch esummary; do + command -v $app >/dev/null 2>&1 || error_exit "Cannot find Entrez EDirect $app tool, please see installation in https://www.ncbi.nlm.nih.gov/books/NBK179288/" + done +} + +check_deps + TAXID="" NAME="" while getopts "ht::n::o::" OPT; do diff --git a/c++/src/app/blast/tblastn_app.cpp b/c++/src/app/blast/tblastn_app.cpp index 6c4306aa..883cbf99 100644 --- a/c++/src/app/blast/tblastn_app.cpp +++ b/c++/src/app/blast/tblastn_app.cpp @@ -1,4 +1,4 @@ -/* $Id: tblastn_app.cpp 571365 2018-09-25 16:53:46Z ivanov $ +/* $Id: tblastn_app.cpp 577771 2019-01-08 18:12:51Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -175,7 +175,10 @@ int CTblastnApp::Run(void) formatter.SetQueryRange(query_opts->GetRange()); formatter.SetLineLength(fmt_args->GetLineLength()); - if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) { + formatter.SetHitsSortOption(fmt_args->GetHitsSortOption()); + formatter.SetHspsSortOption(fmt_args->GetHspsSortOption()); + formatter.SetCustomDelimiter(fmt_args->GetCustomDelimiter()); + if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) { formatter.SetBaseFile(args[kArgOutput].AsString()); } formatter.PrintProlog(); diff --git a/c++/src/app/blast/tblastx_app.cpp b/c++/src/app/blast/tblastx_app.cpp index 8f4359bd..4653625d 100644 --- a/c++/src/app/blast/tblastx_app.cpp +++ b/c++/src/app/blast/tblastx_app.cpp @@ -1,4 +1,4 @@ -/* $Id: tblastx_app.cpp 571365 2018-09-25 16:53:46Z ivanov $ +/* $Id: tblastx_app.cpp 577771 2019-01-08 18:12:51Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -154,7 +154,10 @@ int CTblastxApp::Run(void) formatter.SetQueryRange(query_opts->GetRange()); formatter.SetLineLength(fmt_args->GetLineLength()); - if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) { + formatter.SetHitsSortOption(fmt_args->GetHitsSortOption()); + formatter.SetHspsSortOption(fmt_args->GetHspsSortOption()); + formatter.SetCustomDelimiter(fmt_args->GetCustomDelimiter()); + if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) { formatter.SetBaseFile(args[kArgOutput].AsString()); } formatter.PrintProlog(); diff --git a/c++/src/app/blast/update_blastdb.pl b/c++/src/app/blast/update_blastdb.pl index ef54f1da..99ac14ef 100755 --- a/c++/src/app/blast/update_blastdb.pl +++ b/c++/src/app/blast/update_blastdb.pl @@ -1,5 +1,5 @@ #!/usr/bin/perl -# $Id: update_blastdb.pl 574684 2018-11-16 14:53:37Z ivanov $ +# $Id: update_blastdb.pl 581818 2019-03-06 12:53:38Z ivanov $ # =========================================================================== # # PUBLIC DOMAIN NOTICE @@ -27,8 +27,7 @@ # Author: Christiam Camacho # # File Description: -# Script to download the pre-formatted BLAST databases from the NCBI ftp -# server. +# Script to download the pre-formatted BLAST databases. # # =========================================================================== @@ -40,6 +39,8 @@ use Pod::Usage; use File::stat; use Digest::MD5; use Archive::Tar; +use File::Temp; +use JSON; use constant NCBI_FTP => "ftp.ncbi.nlm.nih.gov"; use constant BLAST_DB_DIR => "/blast/db"; @@ -49,6 +50,12 @@ use constant DEBUG => 0; use constant MAX_DOWNLOAD_ATTEMPTS => 3; use constant EXIT_FAILURE => 2; +use constant GCS_URL => "https://storage.googleapis.com"; +use constant GCP_URL => "http://metadata.google.internal/computeMetadata/v1/instance/id"; +use constant GCP_BUCKET => "blast-db"; +use constant GCP_MANIFEST => "blastdb-manifest.json"; +use constant GCP_MANIFEST_VERSION => "1.0"; + # Process command line options my $opt_verbose = 1; my $opt_quiet = 0; @@ -57,27 +64,37 @@ my $opt_help = 0; my $opt_passive = 1; my $opt_blastdb_ver = 4; my $opt_timeout = 120; -my $opt_showall = 0; +my $opt_showall = undef; my $opt_show_version = 0; my $opt_decompress = 0; +my $opt_source; +my $opt_nt = &get_num_cores(); my $result = GetOptions("verbose+" => \$opt_verbose, "quiet" => \$opt_quiet, "force" => \$opt_force_download, "passive:s" => \$opt_passive, "timeout=i" => \$opt_timeout, - "showall" => \$opt_showall, + "showall:s" => \$opt_showall, "version" => \$opt_show_version, "blastdb_version:i"=> \$opt_blastdb_ver, "decompress" => \$opt_decompress, + "source=s" => \$opt_source, + "num_threads=i" => \$opt_nt, "help" => \$opt_help); $opt_verbose = 0 if $opt_quiet; die "Failed to parse command line options\n" unless $result; pod2usage({-exitval => 0, -verbose => 2}) if $opt_help; +if (length($opt_passive) and ($opt_passive !~ /1|no/i)) { + pod2usage({-exitval => 1, -verbose => 0, + -msg => "Invalid value for passive option: '$opt_passive'"}); +} pod2usage({-exitval => 0, -verbose => 2}) unless (scalar @ARGV or - $opt_showall or + defined($opt_showall) or $opt_show_version); pod2usage({-exitval => 1, -verbose => 0, -msg => "Invalid BLAST database version"}) unless ($opt_blastdb_ver == 4 or $opt_blastdb_ver == 5); +pod2usage({-exitval => 1, -verbose => 0, -msg => "Invalid number of threads"}) + if ($opt_nt <= 0); if (length($opt_passive) and $opt_passive =~ /n|no/i) { $opt_passive = 0; } else { @@ -86,26 +103,113 @@ if (length($opt_passive) and $opt_passive =~ /n|no/i) { my $exit_code = 0; $|++; -# Connect and download files -my $ftp = &connect_to_ftp() unless ($opt_show_version); +my $location = "NCBI"; +unless ($^O =~ /mswin/i) { + $location = system("/usr/bin/curl -sfo /dev/null -H 'Metadata-Flavor: Google' " . GCP_URL) == 0 ? "GCP" : "NCBI"; +} +# Override data source, only for testing +if (defined($opt_source)) { + if ($opt_source =~ /^ncbi/i) { + $location = "NCBI"; + } elsif ($opt_source =~ /^gc/i and $^O !~ /mswin/i) { + $location = "GCP"; + } +} + if ($opt_show_version) { - my $revision = '$Revision: 574684 $'; + my $revision = '$Revision: 581818 $'; $revision =~ s/\$Revision: | \$//g; print "$0 version $revision\n"; -} elsif ($opt_showall) { - print "$_\n" foreach (sort(&get_available_databases())); + exit($exit_code); +} + +my $ftp; + +if ($location eq "GCP") { + #die "Only BLASTDB vesion 5 is supported at GCP\n" if ($opt_blastdb_ver == 4); + my $latest_dir = &get_gcs_latest_dir(); + my ($json, $url) = &get_gcs_blastdb_metadata($latest_dir); + unless (length($json)) { + print STDERR "ERROR: Missing manifest file $url, please report to blast-help\@ncbi.nlm.nih.gov\n"; + exit(2); + } + print "Connected to $location\n" if $opt_verbose; + my $metadata = from_json($json); + unless (exists($$metadata{version}) and ($$metadata{version} eq GCP_MANIFEST_VERSION)) { + print STDERR "ERROR: Invalid version in manifest file $url, please report to blast-help\@ncbi.nlm.nih.gov\n"; + exit(2); + } + if (defined($opt_showall)) { + my $print_header = 1; + foreach my $db (sort keys %$metadata) { + next if ($db =~ /^version$/); + if ($opt_showall =~ /tsv/i) { + printf("%s\t%s\t%9.4f\t%s\n", $db, $$metadata{$db}{description}, + $$metadata{$db}{size}, $$metadata{$db}{last_updated}); + } elsif ($opt_showall =~ /pretty/i) { + if ($print_header) { + printf("%-60s %-120s %-11s %15s\n", "BLASTDB", + "DESCRIPTION", "SIZE (GB)", "LAST_UPDATED"); + $print_header = 0; + } + printf("%-60s %-120s %9.4f %15s\n", $db, $$metadata{$db}{description}, + $$metadata{$db}{size}, $$metadata{$db}{last_updated}); + } else { + print "$db\n"; + } + } + } else { + my @files2download; + for my $requested_db (@ARGV) { + if (exists $$metadata{$requested_db}) { + push @files2download, @{$$metadata{$requested_db}{files}}; + } else { + print STDERR "Warning: $requested_db does not exist in $location ($latest_dir)\n"; + } + } + if (@files2download) { + my $gsutil = &get_gsutil_path(); + my $cmd; + my $fh = File::Temp->new(); + if (defined($gsutil)) { + $cmd = "$gsutil " . ($opt_nt > 1 ? "-m" : "" ) . " -q cp "; + $cmd .= join(" ", @files2download) . " ."; + } else { # fall back to curl + my $url = GCS_URL; + s,gs://,$url/, foreach (@files2download); + if ($opt_nt > 1 and -f "/usr/bin/xargs") { + print $fh join("\n", @files2download); + $cmd = "/usr/bin/xargs -P $opt_nt -a $fh -n 1"; + $cmd .= " -t" if $opt_verbose > 3; + $cmd .= " /usr/bin/curl -sOR"; + } else { + $cmd = "/usr/bin/curl -sR"; + $cmd .= " -O $_" foreach (@files2download); + } + } + print "$cmd\n" if $opt_verbose > 3; + system($cmd); + } + } + } else { - my @files = sort(&get_files_to_download()); - my @files2decompress; - $exit_code = &download(\@files, \@files2decompress); - if ($exit_code == 1) { - foreach (@files2decompress) { - $exit_code = &decompress($_); - last if ($exit_code != 1); + # Connect and download files + $ftp = &connect_to_ftp(); + if (defined $opt_showall) { + print "$_\n" foreach (sort(&get_available_databases($ftp->ls()))); + } else { + my @files = sort(&get_files_to_download()); + my @files2decompress; + $exit_code = &download(\@files, \@files2decompress); + if ($exit_code == 1) { + foreach (@files2decompress) { + $exit_code = &decompress($_); + last if ($exit_code != 1); + } } } + $ftp->quit(); } -$ftp->quit() unless ($opt_show_version); exit($exit_code); @@ -124,7 +228,7 @@ sub connect_to_ftp $ftp_path .= "/v5" if ($opt_blastdb_ver == 5); $ftp->cwd($ftp_path); $ftp->binary(); - print "Connected to NCBI\n" if $opt_verbose; + print "Connected to $location\n" if $opt_verbose; return $ftp; } @@ -338,11 +442,52 @@ sub get_num_volumes return $retval + 1; } +# Retrieves the name of the 'subdirectory' where the latest BLASTDBs residue in GCP +sub get_gcs_latest_dir +{ + my $cmd = "/usr/bin/curl -s " . GCS_URL . "/" . GCP_BUCKET . "/latest-dir"; + chomp(my $retval = `$cmd`); + return $retval; +} + +# Fetches the JSON text containing the BLASTDB metadata in GCS +sub get_gcs_blastdb_metadata +{ + my $latest_dir = shift; + my $url = GCS_URL . "/" . GCP_BUCKET . "/$latest_dir/" . GCP_MANIFEST; + chomp(my $retval = `/usr/bin/curl -sf $url`); + return ($retval, $url); +} + +# Returns the path to the gsutil utility or undef if it is not found +sub get_gsutil_path +{ + foreach (qw(/google/google-cloud-sdk/bin /usr/local/bin /usr/bin /snap/bin)) { + my $path = "$_/gsutil"; + return $path if (-f $path); + } + return undef; +} + +# Returns the number of cores, or 1 if unknown +sub get_num_cores +{ + my $retval = 1; + if ($^O =~ /linux/i) { + open my $fh, "/proc/cpuinfo" or return $retval; + $retval = scalar(map /^processor/, <$fh>); + close($fh); + } elsif ($^O =~ /darwin/i) { + chomp($retval = `/usr/sbin/sysctl -n hw.ncpu`); + } + return $retval; +} + __END__ =head1 NAME -B - Download pre-formatted BLAST databases from NCBI +B - Download pre-formatted BLAST databases =head1 SYNOPSIS @@ -364,6 +509,13 @@ Show all available pre-formatted BLAST databases (default: false). The output of this option lists the database names which should be used when requesting downloads or updates using this script. +It accepts the optional arguments: 'tsv' and 'pretty' to produce tab-separated values +and a human-readable format respectively. These parameters elicit the display of +additional metadata if this is available to the program. +This metadata is displayed in columnar format; the columns represent: + +name, description, size in gigabytes, date of last update (YYYY-MM-DD format). + =item B<--blastdb_version> Specify which BLAST database version to download (default: 4). @@ -396,6 +548,11 @@ Produce no output (default: false). Overrides the B<--verbose> option. Prints this script's version. Overrides all other options. +=item B<--num_cores> + +Sets the number of cores to utilize to perform downloads in parallel when data comes from GCS. +Defaults to all cores (Linux and macos only). + =back =head1 DESCRIPTION diff --git a/c++/src/app/blastdb/Makefile.blastdb_path.app b/c++/src/app/blastdb/Makefile.blastdb_path.app new file mode 100755 index 00000000..76dbaa9b --- /dev/null +++ b/c++/src/app/blastdb/Makefile.blastdb_path.app @@ -0,0 +1,15 @@ +WATCHERS = camacho fongah2 + +APP = blastdb_path +SRC = blastdb_path +LIB_ = $(BLAST_FORMATTER_LIBS) $(BLAST_LIBS) $(OBJMGR_LIBS) +LIB = $(LIB_:%=%$(STATIC)) + +CFLAGS = $(FAST_CFLAGS) +CXXFLAGS = $(FAST_CXXFLAGS) +LDFLAGS = $(FAST_LDFLAGS) + +CPPFLAGS = -DNCBI_MODULE=BLASTDB $(ORIG_CPPFLAGS) $(BLAST_THIRD_PARTY_INCLUDE) +LIBS = $(BLAST_THIRD_PARTY_LIBS) $(CMPRS_LIBS) $(DL_LIBS) $(NETWORK_LIBS) $(ORIG_LIBS) + +REQUIRES = objects -Cygwin diff --git a/c++/src/app/blastdb/Makefile.in b/c++/src/app/blastdb/Makefile.in index fd1c5636..8582a9bf 100644 --- a/c++/src/app/blastdb/Makefile.in +++ b/c++/src/app/blastdb/Makefile.in @@ -1,11 +1,11 @@ -# $Id: Makefile.in 553487 2017-12-18 14:23:38Z fongah2 $ +# $Id: Makefile.in 577773 2019-01-08 18:13:31Z ivanov $ # Meta-makefile("APP" project) ################################# REQUIRES = objects algo -APP_PROJ = blastdbcmd makeblastdb blastdb_aliastool blastdbcheck convert2blastmask blastdbcp makeprofiledb blastdb_convert +APP_PROJ = blastdbcmd makeblastdb blastdb_aliastool blastdbcheck convert2blastmask blastdbcp makeprofiledb blastdb_convert blastdb_path srcdir = @srcdir@ include @builddir@/Makefile.meta @@ -36,4 +36,5 @@ makeprofiledb: blastdb_convert: ${MAKE} ${MFLAGS} -f Makefile.blastdb_convert_app - +blastdb_path: + ${MAKE} ${MFLAGS} -f Makefile.blastdb_path_app diff --git a/c++/src/app/blastdb/blastdb_aliastool.cpp b/c++/src/app/blastdb/blastdb_aliastool.cpp index 85e6877d..1691ba93 100644 --- a/c++/src/app/blastdb/blastdb_aliastool.cpp +++ b/c++/src/app/blastdb/blastdb_aliastool.cpp @@ -1,4 +1,4 @@ -/* $Id: blastdb_aliastool.cpp 553796 2017-12-21 17:07:04Z fongah2 $ +/* $Id: blastdb_aliastool.cpp 577746 2019-01-08 18:06:08Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -118,11 +118,11 @@ const char * const CBlastDBAliasApp::DOCUMENTATION = "\n\n" " of the BLAST search command line binaries or to the -gilist option of\n" " this program to create an alias file for a BLAST database (see below).\n\n" "2) Alias file creation (restricting with GI List or Sequence ID List):\n" -" Creates an alias for a BLAST database and a GI or ID list which restricts\n" -" this database. This is useful if one often searches a subset of a database\n" -" (e.g., based on organism or a curated list). The alias file makes the\n" -" search appear as if one were searching a regular BLAST database rather\n" -" than the subset of one.\n\n" +" Creates an alias for a BLAST database and a GI or ID list which\n" +" restricts this database. This is useful if one often searches a subset\n" +" of a database (e.g., based on organism or a curated list). The alias\n" +" file makes the search appear as if one were searching a regular BLAST\n" +" database rather than the subset of one.\n\n" "3) Alias file creation (aggregating BLAST databases):\n" " Creates an alias for multiple BLAST databases. All databases must be of\n" " the same molecule type (no validation is done). The relevant options are\n" @@ -137,36 +137,39 @@ void CBlastDBAliasApp::Init() auto_ptr arg_desc(new CArgDescriptions); arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), - "Application to create BLAST database aliases, version " - + CBlastVersion().Print() + DOCUMENTATION); + "Application to create BLAST database aliases, version " + + CBlastVersion().Print() + DOCUMENTATION); string dflt("Default = input file name provided to -gi_file_in argument"); dflt += " with the .bgl extension"; - const char* exclusions[] = { kArgDb.c_str(), kArgDbType.c_str(), kArgDbTitle.c_str(), - kArgGiList.c_str(), kArgSeqIdList.c_str(), kArgOutput.c_str(), - "dblist", "num_volumes", "vdblist", "seqid_file_in", "seqid_file_out", - "seqid_file_in", "seqid_file_out", "seqid_db", "seqid_dbtype", "seqid_file_info"}; + set exclusions = { + kArgDb, kArgDbType, kArgDbTitle, kArgGiList, kArgSeqIdList, kArgOutput, + "dblist", "num_volumes", "vdblist", "seqid_file_in", "seqid_file_out", + "seqid_db", "seqid_dbtype", "seqid_file_info" + }; + arg_desc->SetCurrentGroup("GI file conversion options"); + arg_desc->AddOptionalKey("gi_file_in", "input_file", - "Text file to convert, should contain one GI per line", - CArgDescriptions::eInputFile); - for (size_t i = 0; i < sizeof(exclusions)/sizeof(*exclusions); i++) { - arg_desc->SetDependency("gi_file_in", CArgDescriptions::eExcludes, - string(exclusions[i])); + "Text file to convert, should contain one GI per line", + CArgDescriptions::eInputFile); + for (string exclusion : exclusions) { + arg_desc->SetDependency("gi_file_in", CArgDescriptions::eExcludes, exclusion); } + arg_desc->AddOptionalKey("gi_file_out", "output_file", - "File name of converted GI file\n" + dflt, - CArgDescriptions::eOutputFile, - CArgDescriptions::fPreOpen | CArgDescriptions::fBinary); + "File name of converted GI file\n" + dflt, + CArgDescriptions::eOutputFile, + CArgDescriptions::fPreOpen | CArgDescriptions::fBinary); arg_desc->SetDependency("gi_file_out", CArgDescriptions::eRequires, "gi_file_in"); - for (size_t i = 0; i < sizeof(exclusions)/sizeof(*exclusions); i++) { - arg_desc->SetDependency("gi_file_out", CArgDescriptions::eExcludes, - string(exclusions[i])); + for (string exclusion : exclusions) { + arg_desc->SetDependency("gi_file_out", CArgDescriptions::eExcludes, exclusion); } arg_desc->SetCurrentGroup("Alias file creation options"); + arg_desc->AddOptionalKey(kArgDb, "dbname", "BLAST database name", CArgDescriptions::eString); arg_desc->SetDependency(kArgDb, CArgDescriptions::eRequires, kOutput); @@ -175,13 +178,13 @@ void CBlastDBAliasApp::Init() "Molecule type stored in BLAST database", CArgDescriptions::eString, "prot"); arg_desc->SetConstraint(kArgDbType, &(*new CArgAllow_Strings, - "nucl", "prot")); + "nucl", "prot")); arg_desc->AddOptionalKey(kArgDbTitle, "database_title", - "Title for BLAST database\n" - "Default = name of BLAST database provided to -db" - " argument with the -gifile argument appended to it", - CArgDescriptions::eString); + "Title for BLAST database\n" + "Default = name of BLAST database provided to -db" + " argument with the -gifile argument appended to it", + CArgDescriptions::eString); arg_desc->SetDependency(kArgDbTitle, CArgDescriptions::eRequires, kOutput); arg_desc->AddOptionalKey(kArgGiList, "input_file", @@ -214,14 +217,17 @@ void CBlastDBAliasApp::Init() "A space separated list of BLAST database names to" " aggregate", CArgDescriptions::eString); + arg_desc->AddOptionalKey("dblist_file", "file_name", "A file containing a list of BLAST database names" " to aggregate, one per line", CArgDescriptions::eInputFile); + /* For VDBLIST */ arg_desc->AddOptionalKey("vdblist", "vdb_names", "A space separated list of VDB names to aggregate", CArgDescriptions::eString); + arg_desc->AddOptionalKey("vdblist_file", "file_name", "A file containing a list of vdb names" " to aggregate, one per line", @@ -253,43 +259,54 @@ void CBlastDBAliasApp::Init() arg_desc->SetConstraint("num_volumes", new CArgAllowValuesGreaterThanOrEqual(1)); string dflt_seqid("Default = input file name provided to -seqid_file_in argument"); - const char* seqid_exclusions[] = { kArgDb.c_str(), kArgDbType.c_str(), kArgDbTitle.c_str(), - kArgGiList.c_str(), kArgSeqIdList.c_str(), kArgOutput.c_str(), - "dblist", "num_volumes", "vdblist", "gi_file_out", "gi_file_out"}; - arg_desc->SetCurrentGroup("Seqd ID file conversion options"); - arg_desc->AddOptionalKey("seqid_file_in", "input_file", - "Text file to convert, should contain one seq id per line", - CArgDescriptions::eInputFile); - for (size_t i = 0; i < sizeof(seqid_exclusions)/sizeof(*seqid_exclusions); i++) { - arg_desc->SetDependency("seqid_file_in", CArgDescriptions::eExcludes, - string(seqid_exclusions[i])); - } - arg_desc->AddOptionalKey("seqid_title", "seqid_title", "Title for seqid list.\n " + - dflt_seqid, CArgDescriptions::eString); - arg_desc->SetDependency("seqid_title", CArgDescriptions::eRequires, "seqid_file_in"); - arg_desc->AddOptionalKey("seqid_file_out", "output_file", - "File name of converted seq id file\n" + dflt_seqid + " with the .bsl extension", - CArgDescriptions::eString); - arg_desc->AddOptionalKey("seqid_db", "dbname", "BLAST database for seqidlist", - CArgDescriptions::eString); - arg_desc->SetDependency("seqid_db", CArgDescriptions::eRequires, "seqid_file_in"); - - arg_desc->AddOptionalKey("seqid_dbtype", "molecule_type", "Molecule type BLAST database", - CArgDescriptions::eString); - arg_desc->SetDependency("seqid_dbtype", CArgDescriptions::eRequires, "seqid_file_in"); - arg_desc->SetDependency("seqid_dbtype", CArgDescriptions::eRequires, "seqid_db"); - arg_desc->SetConstraint("seqid_dbtype", &(*new CArgAllow_Strings, "nucl", "prot")); - for (size_t i = 0; i < sizeof(seqid_exclusions)/sizeof(*seqid_exclusions); i++) { - arg_desc->SetDependency("seqid_file_out", CArgDescriptions::eExcludes, string(seqid_exclusions[i])); - } + set seqid_exclusions = { + kArgDb, kArgDbType, kArgDbTitle, kArgGiList, kArgSeqIdList, kArgOutput, + "dblist", "num_volumes", "vdblist" + }; + // "gi_file_in" and "gi_file_out" already exclude "seqid_file_in" and + // "seqid_file_out". - const char* seqid_info_exclusions[] = { kArgDb.c_str(), kArgDbType.c_str(), kArgDbTitle.c_str(), - kArgGiList.c_str(), kArgSeqIdList.c_str(), kArgOutput.c_str(), - "dblist", "num_volumes", "vdblist", "gi_file_out", "gi_file_out", "seqid_file_in", "seqid_file_out"}; - arg_desc->AddOptionalKey("seqid_file_info", "seqid_file_info", "Display seqidlist file info", CArgDescriptions::eString); - for (size_t i = 0; i < sizeof(seqid_info_exclusions)/sizeof(*seqid_info_exclusions); i++) { - arg_desc->SetDependency("seqid_info", CArgDescriptions::eExcludes, string(seqid_info_exclusions[i])); - } + arg_desc->SetCurrentGroup("Seqd ID file conversion options"); + + arg_desc->AddOptionalKey("seqid_file_in", "input_file", + "Text file to convert, should contain one seq id per line", + CArgDescriptions::eInputFile); + for (string exclusion : seqid_exclusions) { + arg_desc->SetDependency("seqid_file_in", CArgDescriptions::eExcludes, exclusion); + } + + arg_desc->AddOptionalKey("seqid_title", "seqid_title", "Title for seqid list.\n " + + dflt_seqid, CArgDescriptions::eString); + arg_desc->SetDependency("seqid_title", CArgDescriptions::eRequires, "seqid_file_in"); + + arg_desc->AddOptionalKey("seqid_file_out", "output_file", + "File name of converted seq id file\n" + dflt_seqid + " with the .bsl extension", + CArgDescriptions::eString); + + arg_desc->AddOptionalKey("seqid_db", "dbname", "BLAST database for seqidlist", + CArgDescriptions::eString); + arg_desc->SetDependency("seqid_db", CArgDescriptions::eRequires, "seqid_file_in"); + + arg_desc->AddOptionalKey("seqid_dbtype", "molecule_type", "Molecule type BLAST database", + CArgDescriptions::eString); + arg_desc->SetDependency("seqid_dbtype", CArgDescriptions::eRequires, "seqid_file_in"); + arg_desc->SetDependency("seqid_dbtype", CArgDescriptions::eRequires, "seqid_db"); + arg_desc->SetConstraint("seqid_dbtype", &(*new CArgAllow_Strings, "nucl", "prot")); + + for (string exclusion : seqid_exclusions) { + arg_desc->SetDependency("seqid_file_out", CArgDescriptions::eExcludes, exclusion); + } + + set seqid_info_exclusions = { + kArgDb, kArgDbType, kArgDbTitle, kArgGiList, kArgSeqIdList, kArgOutput, + "dblist", "num_volumes", "vdblist", "seqid_file_in", "seqid_file_out" + }; + // "gi_file_in" and "gi_file_out" already exclude "seqid_file_info". + + arg_desc->AddOptionalKey("seqid_file_info", "seqid_file_info", "Display seqidlist file info", CArgDescriptions::eString); + for (string exclusion : seqid_info_exclusions) { + arg_desc->SetDependency("seqid_file_info", CArgDescriptions::eExcludes, exclusion); + } SetupArgDescriptions(arg_desc.release()); @@ -555,8 +572,21 @@ int CBlastDBAliasApp::Run(void) if (x_GetOperationMode() == eConvertGiFile) { CNcbiIstream& input = args["gi_file_in"].AsInputFile(); - CNcbiOstream& output = args["gi_file_out"].AsOutputFile(); - status = ConvertGiFile(input, output); + string gi_file_out; + if (args["gi_file_out"].HasValue()) { + gi_file_out = args["gi_file_out"].AsString(); + } else { + gi_file_out = args["gi_file_in"].AsString(); + gi_file_out += ".bgl"; + } + { + // output will close at end of scope. + CNcbiOfstream output(gi_file_out.c_str()); + status = ConvertGiFile(input, output); + } + if (!CFile(gi_file_out).Exists()) { + NCBI_THROW(CSeqDBException, eFileErr, gi_file_out + " not written"); + } } else if(x_GetOperationMode() == eConvertSeqIDFile) { status = x_ConvertSeqIDFile(); } else if(x_GetOperationMode() == eSeqIDFileInfo) { diff --git a/c++/src/app/blastdb/blastdb_convert.cpp b/c++/src/app/blastdb/blastdb_convert.cpp index c496418c..648467a1 100644 --- a/c++/src/app/blastdb/blastdb_convert.cpp +++ b/c++/src/app/blastdb/blastdb_convert.cpp @@ -1,4 +1,4 @@ -/* $Id: blastdb_convert.cpp 571367 2018-09-25 16:54:18Z ivanov $ +/* $Id: blastdb_convert.cpp 579231 2019-01-31 16:33:40Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -126,6 +126,9 @@ void CBlastdbConvertApp::Init() arg_desc->AddFlag("update_timestamp", "Update the date of last update in the output database", true); + arg_desc->AddFlag("new_index", + "Generate vol index for filename", + true); arg_desc->AddDefaultKey(kMapSize, "memory_map_size_limit", "Max mempry map size of output file", CArgDescriptions::eInt8, "1000000000000"); @@ -290,6 +293,7 @@ int CBlastdbConvertApp::Run(void) const CSeqDB::ESeqType seqtype = ParseMoleculeTypeString(kMol); const bool kIsProt = (seqtype == CSeqDB::eProtein); string kOutputAbsPath = CDirEntry::CreateAbsolutePath(kOutput); + const bool kNewIndex = args["new_index"].HasValue() ? true: false; m_LogFile = & (args["logfile"].HasValue()? args["logfile"].AsOutputFile() : cout); SetDiagPostLevel(eDiag_Warning); @@ -363,7 +367,7 @@ int CBlastdbConvertApp::Run(void) _TRACE("Processing " << vol_path); string vol_num = NStr::UIntToString(p);; string kOutputVol = output_dir.GetName(); - if(use_index_in_filename) { + if(kNewIndex && use_index_in_filename) { string zero_padding = kEmptyStr; const string path_size_str = NStr::IntToString((int) paths.size()); unsigned int l = (path_size_str.size() < 2) ? 2 : path_size_str.size(); @@ -372,6 +376,20 @@ int CBlastdbConvertApp::Run(void) } kOutputVol += "." + zero_padding + vol_num; } + else if(use_index_in_filename){ + vector parts; + NStr::Split(vol_path, ".", parts); + if(parts.size() < 2) { + NCBI_THROW(CInputException, eInvalidInput, "v4 db has no vol index in filename"); + } + try { + NStr::StringToInt(parts.back()); + } catch (CStringException &){ + NCBI_THROW(CInputException, eInvalidInput, "v4 db has no vol index in filename"); + } + + kOutputVol += "." + parts.back(); + } CRef vol(new CSeqDB(vol_path, seqtype)); _ASSERT(vol); diff --git a/c++/src/app/blastdb/blastdb_path.cpp b/c++/src/app/blastdb/blastdb_path.cpp new file mode 100755 index 00000000..b7e7135a --- /dev/null +++ b/c++/src/app/blastdb/blastdb_path.cpp @@ -0,0 +1,140 @@ +/* $Id: blastdb_path.cpp 577773 2019-01-08 18:13:31Z ivanov $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Author: Irena Zaretskaya + * + */ + +/** @file blastdb_path.cpp + * Command line tool to determine the path to BLAST databases. + */ + +#include +#include +#include +#include +#include +#include "../blast/blast_app_util.hpp" +#include + + +#ifndef SKIP_DOXYGEN_PROCESSING +USING_NCBI_SCOPE; +USING_SCOPE(blast); +#endif + +/// The application class +class CBlastDBCmdApp : public CNcbiApplication +{ +public: + /** @inheritDoc */ + CBlastDBCmdApp() { + CRef version(new CVersion()); + version->SetVersionInfo(new CBlastVersion()); + SetFullVersion(version); + } +private: + /** @inheritDoc */ + virtual void Init(); + /** @inheritDoc */ + virtual int Run(); +}; + + + + + +void CBlastDBCmdApp::Init() +{ + HideStdArgs(fHideConffile | fHideFullVersion | fHideXmlHelp | fHideDryRun); + + auto_ptr arg_desc(new CArgDescriptions); + + // Specify USAGE context + arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), + "BLAST database client, version " + CBlastVersion().Print()); + + arg_desc->SetCurrentGroup("BLAST database options"); + arg_desc->AddDefaultKey(kArgDb, "dbname", "BLAST database name", + CArgDescriptions::eString, "nr"); + + arg_desc->AddDefaultKey(kArgDbType, "molecule_type", + "Molecule type stored in BLAST database", + CArgDescriptions::eString, "nucl"); + arg_desc->SetConstraint(kArgDbType, &(*new CArgAllow_Strings, + "nucl", "prot")); + arg_desc->AddFlag("getvolumespath", "Get .[np]in adn .[np]sq volumes paths", true); + + arg_desc->SetCurrentGroup("Output configuration options"); + arg_desc->AddDefaultKey(kArgOutput, "output_file", "Output file name", + CArgDescriptions::eOutputFile, "-"); + + SetupArgDescriptions(arg_desc.release()); +} + +int CBlastDBCmdApp::Run(void) +{ + int status = 0; + const CArgs& args = GetArgs(); + + try { + CNcbiOstream& out = args["out"].AsOutputFile(); + string dbtype = args[kArgDbType].AsString(); + + if (args["getvolumespath"]) { + CSeqDB::ESeqType seqType = (dbtype == "nucl" ) ? CSeqDB::eNucleotide : CSeqDB::eProtein ; + vector paths; + //CSeqDB::FindVolumePaths(args[kArgDb].AsString(),seqType,paths,&alias_paths,true); + CSeqDB::FindVolumePaths(args[kArgDb].AsString(),seqType,paths); + for( size_t i = 0; i < paths.size();i++) { + out << paths[i] << "." << dbtype.at(0) << "in " << paths[i] << "." << dbtype.at(0) << "sq"; + if(i < paths.size() - 1) out << " "; + } + } + else { + string dbLocation = SeqDB_ResolveDbPathNoExtension(args[kArgDb].AsString(),dbtype.at(0)); + if(dbLocation.empty()) { + status = 1; + } + out << dbLocation << NcbiEndl; + } + } + catch (const CException& e) { + ERR_POST(Error << e.GetMsg()); + status = 1; + } catch (...) { + ERR_POST(Error << "Failed to retrieve requested item"); + status = 1; + } + return status; +} + + +#ifndef SKIP_DOXYGEN_PROCESSING +int main(int argc, const char* argv[] /*, const char* envp[]*/) +{ + return CBlastDBCmdApp().AppMain(argc, argv); +} +#endif /* SKIP_DOXYGEN_PROCESSING */ diff --git a/c++/src/app/blastdb/blastdbcmd.cpp b/c++/src/app/blastdb/blastdbcmd.cpp index bc23290a..43b6c342 100644 --- a/c++/src/app/blastdb/blastdbcmd.cpp +++ b/c++/src/app/blastdb/blastdbcmd.cpp @@ -1,4 +1,4 @@ -/* $Id: blastdbcmd.cpp 571655 2018-10-01 12:46:33Z ivanov $ +/* $Id: blastdbcmd.cpp 581744 2019-03-05 16:46:35Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -126,6 +126,7 @@ private: void x_PrintBlastDatabaseTaxInformation(); + int x_ProcessBatchPig(CBlastDB_Formatter & fmt); }; string s_PreProcessAccessionsForDBv5(const string & id) @@ -144,7 +145,14 @@ string s_PreProcessAccessionsForDBv5(const string & id) if(seqid->IsPir() || seqid->IsPrf()) { return seqid->AsFastaString(); } - rv = seqid->GetSeqIdString(true); + else if (seqid->IsPdb()) { + string tmp = seqid->GetSeqIdString(); + rv = tmp.substr(0,4); + NStr::ToUpper(rv); + rv += tmp.substr(4); + return (rv); + } + return seqid->GetSeqIdString(true); } } @@ -188,9 +196,9 @@ CBlastDBCmdApp::x_ProcessEntry(CBlastDB_Formatter & fmt) const CArgs& args = GetArgs(); _ASSERT(m_BlastDb.NotEmpty()); - if (args["pig"].HasValue()) { + if (args["ipg"].HasValue()) { CSeqDB::TOID oid; - m_BlastDb->PigToOid(args["pig"].AsInteger(),oid); + m_BlastDb->PigToOid(args["ipg"].AsInteger(),oid); fmt.Write(oid, m_Config); } else if (args["entry"].HasValue()) { static const string kDelim(","); @@ -431,6 +439,42 @@ CBlastDBCmdApp::x_ProcessBatchEntry(CBlastDB_Formatter & fmt) } +int +CBlastDBCmdApp::x_ProcessBatchPig(CBlastDB_Formatter & fmt) +{ + int err_found = 0; + const CArgs& args = GetArgs(); + CNcbiIstream& input = args["ipg_batch"].AsInputFile(); + + while (input) { + string line; + NcbiGetlineEOL(input, line); + if ( !line.empty() ) { + string id, format; + NStr::SplitInTwo(line, " \t", id, format, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate); + if(id.empty()) { + continue; + } + if(x_ModifyConfigForBatchEntry(format)) { + err_found ++; + ERR_POST (Error << "Skipped IPG : " << id); + continue; + } + int oid; + int pig = NStr::StringToInt(id, NStr::fConvErr_NoThrow ); + m_BlastDb->PigToOid(pig,oid); + if (oid == -1) { + err_found ++; + ERR_POST (Error << "Skipped IPG: " << id); + continue; + } + + fmt.Write(oid, m_Config); + } + } + return (err_found) ? 1 : 0; +} + void CBlastDBCmdApp::x_InitBlastDB() { @@ -707,9 +751,12 @@ CBlastDBCmdApp::x_ProcessSearchType(CBlastDB_Formatter & fmt) return x_ProcessBatchEntry_NoDup(fmt); } } - else if (args["entry"].HasValue() || args["pig"].HasValue()) { + else if (args["entry"].HasValue() || args["ipg"].HasValue()) { return x_ProcessEntry(fmt); } + else if (args["ipg_batch"].HasValue()) { + return x_ProcessBatchPig(fmt); + } else if(args[kArgTaxIdList].HasValue()|| args[kArgTaxIdListFile].HasValue()) { return x_ProcessTaxIdList(fmt); @@ -806,12 +853,23 @@ void CBlastDBCmdApp::Init() arg_desc->SetDependency("entry_batch", CArgDescriptions::eExcludes, "strand"); arg_desc->SetDependency("entry_batch", CArgDescriptions::eExcludes, "mask_sequence_with"); - arg_desc->AddOptionalKey("pig", "PIG", "PIG to retrieve", + arg_desc->AddOptionalKey("ipg", "IPG", "IPG to retrieve", CArgDescriptions::eInteger); - arg_desc->SetConstraint("pig", new CArgAllowValuesGreaterThanOrEqual(0)); - arg_desc->SetDependency("pig", CArgDescriptions::eExcludes, "entry"); - arg_desc->SetDependency("pig", CArgDescriptions::eExcludes, "entry_batch"); - arg_desc->SetDependency("pig", CArgDescriptions::eExcludes, "target_only"); + arg_desc->SetConstraint("ipg", new CArgAllowValuesGreaterThanOrEqual(0)); + arg_desc->SetDependency("ipg", CArgDescriptions::eExcludes, "entry"); + arg_desc->SetDependency("ipg", CArgDescriptions::eExcludes, "entry_batch"); + arg_desc->SetDependency("ipg", CArgDescriptions::eExcludes, "target_only"); + arg_desc->SetDependency("ipg", CArgDescriptions::eExcludes, "ipg_batch"); + + arg_desc->AddOptionalKey("ipg_batch", "input_file", + "Input file for batch processing (Format: one entry per line, IPG \n" + "followed by optional space-delimited specifier(s) [range|strand|mask_algo_id]", + CArgDescriptions::eInputFile); + arg_desc->SetDependency("ipg_batch", CArgDescriptions::eExcludes, "entry"); + arg_desc->SetDependency("ipg_batch", CArgDescriptions::eExcludes, "entry_batch"); + arg_desc->SetDependency("ipg_batch", CArgDescriptions::eExcludes, "range"); + arg_desc->SetDependency("ipg_batch", CArgDescriptions::eExcludes, "strand"); + arg_desc->SetDependency("ipg_batch", CArgDescriptions::eExcludes, "mask_sequence_with"); arg_desc->AddOptionalKey(kArgTaxIdList, "taxonomy_ids", "Comma-delimited taxonomy identifiers", CArgDescriptions::eString); diff --git a/c++/src/app/blastdb/makeblastdb.cpp b/c++/src/app/blastdb/makeblastdb.cpp index a9b16b1f..78ec6e66 100644 --- a/c++/src/app/blastdb/makeblastdb.cpp +++ b/c++/src/app/blastdb/makeblastdb.cpp @@ -1,4 +1,4 @@ -/* $Id: makeblastdb.cpp 560817 2018-03-27 17:24:45Z rackerst $ +/* $Id: makeblastdb.cpp 577767 2019-01-08 18:11:46Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -526,11 +526,12 @@ private: void CMakeBlastDBApp::x_AddSeqEntries(CNcbiIstream & data, TFormat fmt) { + bool found = false; try { while(!data.eof()) { CSeqEntrySource src(data, fmt, m_SkipUnver); - m_DB->AddSequences(src); + found = found || m_DB->AddSequences(src); } } catch (const CEofException& e) { if (e.GetErrCode() == CEofException::eEof) { @@ -539,6 +540,9 @@ void CMakeBlastDBApp::x_AddSeqEntries(CNcbiIstream & data, TFormat fmt) throw e; } } + if (!found) { + ERR_POST(Warning << "No sequences written"); + } } class CRawSeqDBSource : public IRawSequenceSource { diff --git a/c++/src/build-system/Makefile.meta_l b/c++/src/build-system/Makefile.meta_l index 705f87c8..086111b7 100644 --- a/c++/src/build-system/Makefile.meta_l +++ b/c++/src/build-system/Makefile.meta_l @@ -1,5 +1,5 @@ # -*- makefile-gmake -*- -# $Id: Makefile.meta_l 568728 2018-08-09 18:37:22Z ucko $ +# $Id: Makefile.meta_l 575300 2018-11-27 15:24:32Z ivanov $ ### Rules for building within a single directory @@ -21,6 +21,7 @@ ifneq "$(filter command, $(origin APP_PROJ) $(origin LIB_PROJ))" "" WSDL_PROJ= JSD_PROJ= PROTOBUF_PROJ= + METAL_PROJ= endif endif @@ -221,6 +222,18 @@ $(foreach p,$(APP_PROJ),$(ign)$(call make_one_app_proj,$(p),$(1),)) $(foreach p,$(EXPENDABLE_APP_PROJ),-$(call make_one_app_proj,$(p),$(1),1)) endef +MAKE_METAL = $(MAKE) -f "$(builddir)/Makefile.metal.tmpl" srcdir="$(srcdir)" \ + TMPL="$$i" $(APP_NOCOPY) $(MFLAGS_NR) + +# 1: proj_name, 2: target, 3: is_expendable +define make_one_metal_proj +$(call make_one_cxx_proj,$(1),$(MAKE_METAL),$(2),.metal,unlink,2,$(3)) +endef +# 1: target +define make_each_metal_proj +$(foreach p,$(METAL_PROJ),$(ign)$(call make_one_metal_proj,$(p),$(1),)) +endef + ifneq "$(wildcard $(check_run))" "" # 1: target define make_check @@ -304,12 +317,18 @@ all.nonusr: $(sources:%=%.nonusr) # $(wildcard Makefile.*.libdep) @-echo "`$(DATE)`: starting to build in $(CURDIR)" > .building $(call make_each_lib_proj,all) $(call make_each_app_proj,all) +ifeq "$(OSTYPE)" "darwin" + $(call make_each_metal_proj,all) +endif @-$(RM) -f .building @-echo "`$(DATE)`: local build succeeded in $(CURDIR)" > .built clean.nonusr purge.nonusr: %.nonusr: # %_sources $(call make_each_lib_proj,$*) $(call make_each_app_proj,$*) +ifeq "$(OSTYPE)" "darwin" + $(call make_each_metal_proj,$*) +endif clean_sources purge_sources: $(call make_each_spec,$@) diff --git a/c++/src/build-system/Makefile.metal.in b/c++/src/build-system/Makefile.metal.in new file mode 100644 index 00000000..ecb39145 --- /dev/null +++ b/c++/src/build-system/Makefile.metal.in @@ -0,0 +1,48 @@ +################################# +# $Id: Makefile.metal.in 575300 2018-11-27 15:24:32Z ivanov $ +# Author: Aaron Ucko (ucko@ncbi.nlm.nih.gov) +################################# + +# Tools, hardcoded for now +METAL = xcrun -sdk macosx metal +METALLIB = xcrun -sdk macosx metallib + +OBJ = $(SRC:%=%.air) +XLIB = $(LIB).metallib + +all: $(XLIB) + +clean: + $(RM) $(OBJ) $(XLIB) + +purge: clean deactivate + +deactivate unlink: +ifneq "$(BINCOPY)" "@:" + ifneq "$(srcdir)" "." + ifneq "$(filter-out /.,$(wildcard $(libdir)/.))" "" + -$(RM) $(libdir)/$(XLIB) + endif + endif +endif + +%.air: $(srcdir)/%.metal + $(METAL) -c $< -o $@ + +$(XLIB): $(OBJ) + $(METALLIB) $(OBJ) -o $@ +ifneq "$(BINCOPY)" "@:" + ifneq "$(srcdir)" "." + ifneq "$(filter-out /.,$(wildcard $(libdir)/.))" "" + @if [ -w $(libdir) ]; then \ + echo $(BINCOPY) $(XLIB) $(libdir) && \ + $(BINCOPY) $(XLIB) $(libdir); \ + fi + endif + endif +endif + +# Requirements checking (cribbed from Makefile.rules.in) +XREQLIB=$(patsubst %$(FORCE_STATIC),%,$(patsubst %$(DLL),%,$(XAPP_LIB) $(XDLL_LIBS))) +include $(wildcard $(XREQLIB:%=$(status_dir)/.%.disabled)) +requirements: $(REQUIRES:%=$(status_dir)/%.enabled) diff --git a/c++/src/build-system/Makefile.metal.tmpl.in b/c++/src/build-system/Makefile.metal.tmpl.in new file mode 100644 index 00000000..7d8f589b --- /dev/null +++ b/c++/src/build-system/Makefile.metal.tmpl.in @@ -0,0 +1,24 @@ +################################# +# $Id: Makefile.metal.tmpl.in 575300 2018-11-27 15:24:32Z ivanov $ +# Author: Denis Vakatov (vakatov@ncbi.nlm.nih.gov) +################################# + +# Standard template to build a shader library +############################################# + +builddir = @builddir@ + + +# Configured makefile variables: +# tools, flags, paths +# +include $(builddir)/Makefile.mk + + +# Project definition +include $(srcdir)/Makefile.$(TMPL).metal + + +# Standard build rules +# +include $(builddir)/Makefile.metal diff --git a/c++/src/build-system/Makefile.mk.in b/c++/src/build-system/Makefile.mk.in index 17ca5b58..a502dbb0 100644 --- a/c++/src/build-system/Makefile.mk.in +++ b/c++/src/build-system/Makefile.mk.in @@ -1,5 +1,5 @@ ################################# -# $Id: Makefile.mk.in 574561 2018-11-15 12:30:48Z ivanov $ +# $Id: Makefile.mk.in 577264 2019-01-03 17:38:37Z ivanov $ # Author: Denis Vakatov (vakatov@ncbi.nlm.nih.gov) ################################# # @@ -531,6 +531,8 @@ GLUT_LIBS = @GLUT_LIBS@ GLEW_INCLUDE = @GLEW_INCLUDE@ GLEW_LIBS = @GLEW_LIBS@ GLEW_STATIC_LIBS = @GLEW_STATIC_LIBS@ +GL2PS_INCLUDE = @GL2PS_INCLUDE@ +GL2PS_LIBS = @GL2PS_LIBS@ # wxWidgets (2.6 or newer): headers and libs WXWIDGETS_INCLUDE = @WXWIDGETS_INCLUDE@ diff --git a/c++/src/build-system/aclocal.m4 b/c++/src/build-system/aclocal.m4 index ddf1fced..a7c4c642 100644 --- a/c++/src/build-system/aclocal.m4 +++ b/c++/src/build-system/aclocal.m4 @@ -212,9 +212,9 @@ AC_DEFUN(NCBI_CHECK_THIRD_PARTY_LIB, AC_DEFUN(NCBI_CHECK_THIRD_PARTY_LIB_EX, [if test "$with_$1" != "no"; then - case "$with_$1" in - yes | "" ) ;; - * ) $2_PATH=$with_$1 ;; + case "[$]$2_PATH:$with_$1" in + *:yes | *: | $with_$1* ) ;; + * ) $2_PATH=$with_$1 ;; esac if test "[$]$2_PATH" != /usr -a -d "[$]$2_PATH"; then in_path=" in [$]$2_PATH" diff --git a/c++/src/build-system/cmake/CMake.NCBIComponents.cmake b/c++/src/build-system/cmake/CMake.NCBIComponents.cmake index c761e03b..1580f706 100644 --- a/c++/src/build-system/cmake/CMake.NCBIComponents.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIComponents.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIComponents.cmake 567687 2018-07-23 15:16:32Z gouriano $ +# $Id: CMake.NCBIComponents.cmake 576718 2018-12-19 20:56:03Z dicuccio $ ############################################################################# ## @@ -27,6 +27,7 @@ else() # message("local_lbsm found at ${NCBI_SRC_ROOT}/connect") set(NCBI_COMPONENT_local_lbsm_FOUND YES) set(HAVE_LOCAL_LBSM 1) + set(LOCAL_LBSM ncbi_lbsm ncbi_lbsm_ipc ncbi_lbsmd) else() # message("Component local_lbsm ERROR: not found") set(NCBI_COMPONENT_local_lbsm_FOUND NO) @@ -38,7 +39,7 @@ endif() if (EXISTS ${includedir}/util/regexp) set(NCBI_COMPONENT_LocalPCRE_FOUND YES) set(NCBI_COMPONENT_LocalPCRE_INCLUDE ${includedir}/util/regexp) - set(NCBI_COMPONENT_LocalPCRE_LIBS regexp) + set(NCBI_COMPONENT_LocalPCRE_NCBILIB regexp) else() set(NCBI_COMPONENT_LocalPCRE_FOUND NO) endif() @@ -48,7 +49,7 @@ endif() if (EXISTS ${includedir}/util/compress/zlib) set(NCBI_COMPONENT_LocalZ_FOUND YES) set(NCBI_COMPONENT_LocalZ_INCLUDE ${includedir}/util/compress/zlib) - set(NCBI_COMPONENT_LocalZ_LIBS z) + set(NCBI_COMPONENT_LocalZ_NCBILIB z) else() set(NCBI_COMPONENT_LocalZ_FOUND NO) endif() @@ -58,7 +59,7 @@ endif() if (EXISTS ${includedir}/util/compress/bzip2) set(NCBI_COMPONENT_LocalBZ2_FOUND YES) set(NCBI_COMPONENT_LocalBZ2_INCLUDE ${includedir}/util/compress/bzip2) - set(NCBI_COMPONENT_LocalBZ2_LIBS bz2) + set(NCBI_COMPONENT_LocalBZ2_NCBILIB bz2) else() set(NCBI_COMPONENT_LocalBZ2_FOUND NO) endif() @@ -67,8 +68,8 @@ endif() #LocalLMDB if (EXISTS ${includedir}/util/lmdb) set(NCBI_COMPONENT_LocalLMDB_FOUND YES) - set(NCBI_COMPONENT_LocalLMDB_INCLUDE ${includedir}//util/lmdb) - set(NCBI_COMPONENT_LocalLMDB_LIBS lmdb) + set(NCBI_COMPONENT_LocalLMDB_INCLUDE ${includedir}/util/lmdb) + set(NCBI_COMPONENT_LocalLMDB_NCBILIB lmdb) else() set(NCBI_COMPONENT_LocalLMDB_FOUND NO) endif() @@ -81,24 +82,24 @@ set(FTDS100_INCLUDE ${includedir}/dbapi/driver/ftds100 ${includedir}/dbapi/drive set(NCBI_COMPONENT_FreeTDS_FOUND YES) set(NCBI_COMPONENT_FreeTDS_INCLUDE ${FTDS95_INCLUDE}) -set(NCBI_COMPONENT_FreeTDS_LIBS ct_ftds95) +#set(NCBI_COMPONENT_FreeTDS_LIBS ct_ftds95) ############################################################################# set(NCBI_COMPONENT_Boost.Test.Included_NCBILIB test_boost) set(NCBI_COMPONENT_SQLITE3_NCBILIB sqlitewrapp) set(NCBI_COMPONENT_Sybase_NCBILIB ncbi_xdbapi_ctlib) set(NCBI_COMPONENT_ODBC_NCBILIB ncbi_xdbapi_odbc) -set(NCBI_COMPONENT_FreeTDS_NCBILIB ncbi_xdbapi_ftds) +set(NCBI_COMPONENT_FreeTDS_NCBILIB ct_ftds95 ncbi_xdbapi_ftds) ############################################################################# if (NCBI_EXPERIMENTAL_DISABLE_HUNTER) if (MSVC) - include(${top_src_dir}/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake) + include(${NCBI_TREE_CMAKECFG}//CMake.NCBIComponentsMSVC.cmake) elseif (XCODE) - include(${top_src_dir}/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake) + include(${NCBI_TREE_CMAKECFG}//CMake.NCBIComponentsXCODE.cmake) else() - include(${top_src_dir}/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake) + include(${NCBI_TREE_CMAKECFG}//CMake.NCBIComponentsUNIX.cmake) endif() else() diff --git a/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake b/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake index 871128da..27db577e 100644 --- a/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIComponentsMSVC.cmake 568452 2018-08-06 14:16:30Z ucko $ +# $Id: CMake.NCBIComponentsMSVC.cmake 580833 2019-02-21 15:38:14Z gouriano $ ############################################################################# ## @@ -23,12 +23,12 @@ set(NCBI_ThirdPartyAppsPath //snowman/win-coremake/App/ThirdParty) set(NCBI_PlatformBits 64) if("${CMAKE_GENERATOR}" STREQUAL "Visual Studio 15 2017 Win64") - set(NCBI_ThirdPartyCompiler vs2015.64) + set(NCBI_ThirdPartyCompiler vs2017.64) elseif("${CMAKE_GENERATOR}" STREQUAL "Visual Studio 15 2017") if("${CMAKE_GENERATOR_PLATFORM}" STREQUAL "Win64") - set(NCBI_ThirdPartyCompiler vs2015.64) + set(NCBI_ThirdPartyCompiler vs2017.64) else() - set(NCBI_ThirdPartyCompiler vs2015) + set(NCBI_ThirdPartyCompiler vs2017) set(NCBI_PlatformBits 32) endif() elseif("${CMAKE_GENERATOR}" STREQUAL "Visual Studio 14 2015 Win64") @@ -47,35 +47,73 @@ endif() set(NCBI_ThirdParty_NCBI_C //snowman/win-coremake/Lib/Ncbi/C/${NCBI_ThirdPartyCompiler}/c.current) -set(NCBI_ThirdParty_TLS ${NCBI_ThirdPartyBasePath}/gnutls/${NCBI_ThirdPartyCompiler}/3.4.9) -set(NCBI_ThirdParty_FASTCGI ${NCBI_ThirdPartyBasePath}/fastcgi/${NCBI_ThirdPartyCompiler}/2.4.0) -set(NCBI_ThirdParty_Boost ${NCBI_ThirdPartyBasePath}/boost/${NCBI_ThirdPartyCompiler}/1.61.0) -set(NCBI_ThirdParty_PCRE ${NCBI_ThirdPartyBasePath}/pcre/${NCBI_ThirdPartyCompiler}/7.9) -set(NCBI_ThirdParty_Z ${NCBI_ThirdPartyBasePath}/z/${NCBI_ThirdPartyCompiler}/1.2.8) -set(NCBI_ThirdParty_BZ2 ${NCBI_ThirdPartyBasePath}/bzip2/${NCBI_ThirdPartyCompiler}/1.0.6) -set(NCBI_ThirdParty_LZO ${NCBI_ThirdPartyBasePath}/lzo/${NCBI_ThirdPartyCompiler}/2.05) -set(NCBI_ThirdParty_BerkeleyDB ${NCBI_ThirdPartyBasePath}/berkeleydb/${NCBI_ThirdPartyCompiler}/4.6.21.NC) -set(NCBI_ThirdParty_LMDB ${NCBI_ThirdPartyBasePath}/lmdb/${NCBI_ThirdPartyCompiler}/0.9.21) -set(NCBI_ThirdParty_JPEG ${NCBI_ThirdPartyBasePath}/jpeg/${NCBI_ThirdPartyCompiler}/6b) -set(NCBI_ThirdParty_PNG ${NCBI_ThirdPartyBasePath}/png/${NCBI_ThirdPartyCompiler}/1.2.7) -set(NCBI_ThirdParty_GIF ${NCBI_ThirdPartyBasePath}/gif/${NCBI_ThirdPartyCompiler}/4.1.3) -set(NCBI_ThirdParty_TIFF ${NCBI_ThirdPartyBasePath}/tiff/${NCBI_ThirdPartyCompiler}/3.6.1) -set(NCBI_ThirdParty_XML ${NCBI_ThirdPartyBasePath}/xml/${NCBI_ThirdPartyCompiler}/2.7.8) -set(NCBI_ThirdParty_XSLT ${NCBI_ThirdPartyBasePath}/xslt/${NCBI_ThirdPartyCompiler}/1.1.26) -set(NCBI_ThirdParty_EXSLT ${NCBI_ThirdParty_XSLT}) -set(NCBI_ThirdParty_SQLITE3 ${NCBI_ThirdPartyBasePath}/sqlite/${NCBI_ThirdPartyCompiler}/3.8.10.1) -set(NCBI_ThirdParty_Sybase ${NCBI_ThirdPartyBasePath}/sybase/${NCBI_ThirdPartyCompiler}/15.5) -set(NCBI_ThirdParty_VDB //snowman/trace_software/vdb/vdb-versions/2.9.2-1) -if ("${NCBI_PlatformBits}" EQUAL "64") - set(NCBI_ThirdParty_VDB_ARCH_INC x86_64) - set(NCBI_ThirdParty_VDB_ARCH x86_64/vs2013.64) +if (${NCBI_ThirdPartyCompiler} STREQUAL "vs2017.64" OR ${NCBI_ThirdPartyCompiler} STREQUAL "vs2017") + set(NCBI_ThirdParty_TLS ${NCBI_ThirdPartyBasePath}/gnutls/${NCBI_ThirdPartyCompiler}/3.4.9) + set(NCBI_ThirdParty_FASTCGI ${NCBI_ThirdPartyBasePath}/fastcgi/${NCBI_ThirdPartyCompiler}/2.4.1) + set(NCBI_ThirdParty_Boost ${NCBI_ThirdPartyBasePath}/boost/${NCBI_ThirdPartyCompiler}/1.66.0) + set(NCBI_ThirdParty_PCRE ${NCBI_ThirdPartyBasePath}/pcre/${NCBI_ThirdPartyCompiler}/7.9) + set(NCBI_ThirdParty_Z ${NCBI_ThirdPartyBasePath}/z/${NCBI_ThirdPartyCompiler}/1.2.8) + set(NCBI_ThirdParty_BZ2 ${NCBI_ThirdPartyBasePath}/bzip2/${NCBI_ThirdPartyCompiler}/1.0.6) + set(NCBI_ThirdParty_LZO ${NCBI_ThirdPartyBasePath}/lzo/${NCBI_ThirdPartyCompiler}/2.10) + set(NCBI_ThirdParty_BerkeleyDB ${NCBI_ThirdPartyBasePath}/berkeleydb/${NCBI_ThirdPartyCompiler}/4.6.21.NC) + set(NCBI_ThirdParty_LMDB ${NCBI_ThirdPartyBasePath}/lmdb/${NCBI_ThirdPartyCompiler}/0.9.21) + set(NCBI_ThirdParty_JPEG ${NCBI_ThirdPartyBasePath}/jpeg/${NCBI_ThirdPartyCompiler}/9c) + set(NCBI_ThirdParty_PNG ${NCBI_ThirdPartyBasePath}/png/${NCBI_ThirdPartyCompiler}/1.6.34) + set(NCBI_ThirdParty_GIF ${NCBI_ThirdPartyBasePath}/gif/${NCBI_ThirdPartyCompiler}/4.1.3) + set(NCBI_ThirdParty_TIFF ${NCBI_ThirdPartyBasePath}/tiff/${NCBI_ThirdPartyCompiler}/3.6.1) + set(NCBI_ThirdParty_XML ${NCBI_ThirdPartyBasePath}/xml/${NCBI_ThirdPartyCompiler}/2.7.8) + set(NCBI_ThirdParty_XSLT ${NCBI_ThirdPartyBasePath}/xslt/${NCBI_ThirdPartyCompiler}/1.1.26) + set(NCBI_ThirdParty_EXSLT ${NCBI_ThirdParty_XSLT}) + set(NCBI_ThirdParty_SQLITE3 ${NCBI_ThirdPartyBasePath}/sqlite/${NCBI_ThirdPartyCompiler}/3.22.0) + set(NCBI_ThirdParty_Sybase ${NCBI_ThirdPartyBasePath}/sybase/${NCBI_ThirdPartyCompiler}/15.5) + set(NCBI_ThirdParty_VDB //snowman/trace_software/vdb/vdb-versions/cxx_toolkit/2) + if ("${NCBI_PlatformBits}" EQUAL "64") + set(NCBI_ThirdParty_VDB_ARCH_INC x86_64) + set(NCBI_ThirdParty_VDB_ARCH x86_64/vs2013.64) + else() + set(NCBI_ThirdParty_VDB_ARCH_INC i386) + set(NCBI_ThirdParty_VDB_ARCH i386/vs2013.32) + endif() + + set(NCBI_ThirdParty_PYTHON ${NCBI_ThirdPartyAppsPath}/Python252) + set(NCBI_ThirdParty_GRPC ${NCBI_ThirdPartyBasePath}/grpc/${NCBI_ThirdPartyCompiler}/1.14.1) else() - set(NCBI_ThirdParty_VDB_ARCH_INC i386) - set(NCBI_ThirdParty_VDB_ARCH i386/vs2013.32) -endif() + set(NCBI_ThirdParty_TLS ${NCBI_ThirdPartyBasePath}/gnutls/${NCBI_ThirdPartyCompiler}/3.4.9) + set(NCBI_ThirdParty_FASTCGI ${NCBI_ThirdPartyBasePath}/fastcgi/${NCBI_ThirdPartyCompiler}/2.4.0) + set(NCBI_ThirdParty_Boost ${NCBI_ThirdPartyBasePath}/boost/${NCBI_ThirdPartyCompiler}/1.61.0) + set(NCBI_ThirdParty_PCRE ${NCBI_ThirdPartyBasePath}/pcre/${NCBI_ThirdPartyCompiler}/7.9) + set(NCBI_ThirdParty_Z ${NCBI_ThirdPartyBasePath}/z/${NCBI_ThirdPartyCompiler}/1.2.8) + set(NCBI_ThirdParty_BZ2 ${NCBI_ThirdPartyBasePath}/bzip2/${NCBI_ThirdPartyCompiler}/1.0.6) + set(NCBI_ThirdParty_LZO ${NCBI_ThirdPartyBasePath}/lzo/${NCBI_ThirdPartyCompiler}/2.05) + set(NCBI_ThirdParty_BerkeleyDB ${NCBI_ThirdPartyBasePath}/berkeleydb/${NCBI_ThirdPartyCompiler}/4.6.21.NC) + set(NCBI_ThirdParty_LMDB ${NCBI_ThirdPartyBasePath}/lmdb/${NCBI_ThirdPartyCompiler}/0.9.21) + set(NCBI_ThirdParty_JPEG ${NCBI_ThirdPartyBasePath}/jpeg/${NCBI_ThirdPartyCompiler}/6b) + set(NCBI_ThirdParty_PNG ${NCBI_ThirdPartyBasePath}/png/${NCBI_ThirdPartyCompiler}/1.2.7) + set(NCBI_ThirdParty_GIF ${NCBI_ThirdPartyBasePath}/gif/${NCBI_ThirdPartyCompiler}/4.1.3) + set(NCBI_ThirdParty_TIFF ${NCBI_ThirdPartyBasePath}/tiff/${NCBI_ThirdPartyCompiler}/3.6.1) + set(NCBI_ThirdParty_XML ${NCBI_ThirdPartyBasePath}/xml/${NCBI_ThirdPartyCompiler}/2.7.8) + set(NCBI_ThirdParty_XSLT ${NCBI_ThirdPartyBasePath}/xslt/${NCBI_ThirdPartyCompiler}/1.1.26) + set(NCBI_ThirdParty_EXSLT ${NCBI_ThirdParty_XSLT}) + set(NCBI_ThirdParty_SQLITE3 ${NCBI_ThirdPartyBasePath}/sqlite/${NCBI_ThirdPartyCompiler}/3.8.10.1) + set(NCBI_ThirdParty_Sybase ${NCBI_ThirdPartyBasePath}/sybase/${NCBI_ThirdPartyCompiler}/15.5) + set(NCBI_ThirdParty_VDB //snowman/trace_software/vdb/vdb-versions/cxx_toolkit/2) + if ("${NCBI_PlatformBits}" EQUAL "64") + set(NCBI_ThirdParty_VDB_ARCH_INC x86_64) + set(NCBI_ThirdParty_VDB_ARCH x86_64/vs2013.64) + else() + set(NCBI_ThirdParty_VDB_ARCH_INC i386) + set(NCBI_ThirdParty_VDB_ARCH i386/vs2013.32) + endif() -set(NCBI_ThirdParty_PYTHON ${NCBI_ThirdPartyAppsPath}/Python252) + set(NCBI_ThirdParty_PYTHON ${NCBI_ThirdPartyAppsPath}/Python252) + set(NCBI_ThirdParty_GRPC ${NCBI_ThirdPartyBasePath}/grpc/${NCBI_ThirdPartyCompiler}/1.14.1) +endif() +set(NCBI_ThirdParty_XALAN ${NCBI_ThirdPartyBasePath}/xalan/${NCBI_ThirdPartyCompiler}/1.10.0-20080814) +set(NCBI_ThirdParty_XERCES ${NCBI_ThirdPartyBasePath}/xerces/${NCBI_ThirdPartyCompiler}/2.8.0) +set(NCBI_ThirdParty_FTGL ${NCBI_ThirdPartyBasePath}/ftgl/${NCBI_ThirdPartyCompiler}/2.1.3-rc5) +set(NCBI_ThirdParty_GLEW ${NCBI_ThirdPartyBasePath}/glew/${NCBI_ThirdPartyCompiler}/1.5.8) +set(NCBI_ThirdParty_FreeType ${NCBI_ThirdPartyBasePath}/freetype/${NCBI_ThirdPartyCompiler}/2.4.10) ############################################################################# macro(NCBI_define_component _name) @@ -124,6 +162,9 @@ macro(NCBI_define_component _name) foreach(_lib IN LISTS _args) set(NCBI_COMPONENT_${_name}_LIBS ${NCBI_COMPONENT_${_name}_LIBS} ${_root}/${_libtype}/\$\(Configuration\)/${_lib}) endforeach() + if (EXISTS ${_root}/bin) + set(NCBI_COMPONENT_${_name}_BINPATH ${_root}/bin) + endif() #message("NCBI_COMPONENT_${_name}_INCLUDE ${NCBI_COMPONENT_${_name}_INCLUDE}") #message("NCBI_COMPONENT_${_name}_LIBS ${NCBI_COMPONENT_${_name}_LIBS}") @@ -166,6 +207,11 @@ endif() set(NCBI_COMPONENT_STACKTRACE_FOUND YES) set(NCBI_COMPONENT_STACKTRACE_LIBS dbghelp.lib) +############################################################################## +# UUID +set(NCBI_COMPONENT_UUID_FOUND YES) +set(NCBI_COMPONENT_UUID_LIBS uuid.lib rpcrt4.lib) + ############################################################################# # TLS if (EXISTS ${NCBI_ThirdParty_TLS}/include) @@ -208,13 +254,17 @@ if(NCBI_COMPONENT_Boost.Spirit_FOUND) set(NCBI_COMPONENT_Boost.Spirit_DEFINES BOOST_AUTO_LINK_NOMANGLE) endif() +############################################################################# +# Boost +NCBI_define_component(Boost boost_filesystem.lib boost_iostreams.lib boost_date_time.lib boost_regex.lib boost_system.lib) + ############################################################################# # PCRE NCBI_define_component(PCRE libpcre.lib) if(NOT NCBI_COMPONENT_PCRE_FOUND) set(NCBI_COMPONENT_PCRE_FOUND ${NCBI_COMPONENT_LocalPCRE_FOUND}) set(NCBI_COMPONENT_PCRE_INCLUDE ${NCBI_COMPONENT_LocalPCRE_INCLUDE}) - set(NCBI_COMPONENT_PCRE_LIBS ${NCBI_COMPONENT_LocalPCRE_LIBS}) + set(NCBI_COMPONENT_PCRE_NCBILIB ${NCBI_COMPONENT_LocalPCRE_NCBILIB}) endif() ############################################################################# @@ -223,7 +273,7 @@ NCBI_define_component(Z libz.lib) if(NOT NCBI_COMPONENT_Z_FOUND) set(NCBI_COMPONENT_Z_FOUND ${NCBI_COMPONENT_LocalZ_FOUND}) set(NCBI_COMPONENT_Z_INCLUDE ${NCBI_COMPONENT_LocalZ_INCLUDE}) - set(NCBI_COMPONENT_Z_LIBS ${NCBI_COMPONENT_LocalZ_LIBS}) + set(NCBI_COMPONENT_Z_NCBILIB ${NCBI_COMPONENT_LocalZ_NCBILIB}) endif() ############################################################################# @@ -232,7 +282,7 @@ NCBI_define_component(BZ2 libbzip2.lib) if(NOT NCBI_COMPONENT_BZ2_FOUND) set(NCBI_COMPONENT_BZ2_FOUND ${NCBI_COMPONENT_LocalBZ2_FOUND}) set(NCBI_COMPONENT_BZ2_INCLUDE ${NCBI_COMPONENT_LocalBZ2_INCLUDE}) - set(NCBI_COMPONENT_BZ2_LIBS ${NCBI_COMPONENT_LocalBZ2_LIBS}) + set(NCBI_COMPONENT_BZ2_NCBILIB ${NCBI_COMPONENT_LocalBZ2_NCBILIB}) endif() ############################################################################# @@ -254,7 +304,7 @@ NCBI_define_component(LMDB liblmdb.lib) if(NOT NCBI_COMPONENT_LMDB_FOUND) set(NCBI_COMPONENT_LMDB_FOUND ${NCBI_COMPONENT_LocalLMDB_FOUND}) set(NCBI_COMPONENT_LMDB_INCLUDE ${NCBI_COMPONENT_LocalLMDB_INCLUDE}) - set(NCBI_COMPONENT_LMDB_LIBS ${NCBI_COMPONENT_LocalLMDB_LIBS}) + set(NCBI_COMPONENT_LMDB_NCBILIB ${NCBI_COMPONENT_LocalLMDB_NCBILIB}) endif() ############################################################################# @@ -324,6 +374,8 @@ set(NCBI_COMPONENT_VDB_INCLUDE ${NCBI_ThirdParty_VDB}/interfaces/os/win) set(NCBI_COMPONENT_VDB_LIBS ${NCBI_ThirdParty_VDB}/win/release/${NCBI_ThirdParty_VDB_ARCH}/bin/ncbi-vdb-md.lib) +set(NCBI_COMPONENT_VDB_BINPATH + ${NCBI_ThirdParty_VDB}/win/release/${NCBI_ThirdParty_VDB_ARCH}/bin) set(_found YES) foreach(_inc IN LISTS NCBI_COMPONENT_VDB_INCLUDE NCBI_COMPONENT_VDB_LIBS) @@ -346,3 +398,61 @@ endif() ############################################################################# # PYTHON NCBI_define_component(PYTHON) + +############################################################################## +# GRPC/PROTOBUF + +set(NCBI_PROTOC_APP "${NCBI_ThirdParty_GRPC}/bin/protoc.exe") +set(NCBI_GRPC_PLUGIN "${NCBI_ThirdParty_GRPC}/bin/grpc_cpp_plugin.exe") + +if (EXISTS "${NCBI_ThirdParty_GRPC}/include" AND EXISTS "${NCBI_ThirdParty_GRPC}/lib/libprotobuf.lib") + set(NCBI_COMPONENT_PROTOBUF_FOUND YES) + set(NCBI_COMPONENT_PROTOBUF_INCLUDE ${NCBI_ThirdParty_GRPC}/include) + set(NCBI_COMPONENT_PROTOBUF_LIBS ${NCBI_ThirdParty_GRPC}/lib/libprotobuf.lib) + message("PROTOBUF found at ${NCBI_ThirdParty_GRPC}") +else() + set(NCBI_COMPONENT_PROTOBUF_FOUND NO) +endif() + +############################################################################## +# XALAN +NCBI_define_component(XALAN xalan-c.lib XalanMessages.lib) + +############################################################################## +# XERCES +NCBI_define_component(XERCES xerces-c.lib) +if(NCBI_COMPONENT_XERCES_FOUND) + if(BUILD_SHARED_LIBS) + set(NCBI_COMPONENT_XERCES_DEFINES XERCES_DLL) + else() + set(NCBI_COMPONENT_XERCES_DEFINES XML_LIBRARY) + endif() +endif() + +############################################################################## +# FTGL +NCBI_define_component(FTGL ftgl.lib) +if(NCBI_COMPONENT_FTGL_FOUND) + set(NCBI_COMPONENT_FTGL_DEFINES FTGL_LIBRARY_STATIC) +endif() + +############################################################################## +# FreeType +NCBI_define_component(FreeType freetype.lib) + +############################################################################## +# GLEW +NCBI_define_component(GLEW glew32mx.lib) +if(NCBI_COMPONENT_GLEW_FOUND) + if(BUILD_SHARED_LIBS) + set(NCBI_COMPONENT_GLEW_DEFINES GLEW_MX) + else() + set(NCBI_COMPONENT_GLEW_DEFINES GLEW_MX GLEW_STATIC) + endif() +endif() + +############################################################################## +# OpenGL +set(NCBI_COMPONENT_OpenGL_FOUND YES) +set(NCBI_COMPONENT_OpenGL_LIBS opengl32.lib glu32.lib) + diff --git a/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake b/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake index cb678d3b..d3bd65ec 100644 --- a/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIComponentsUNIX.cmake 574828 2018-11-19 19:24:29Z dicuccio $ +# $Id: CMake.NCBIComponentsUNIX.cmake 580833 2019-02-21 15:38:14Z gouriano $ ############################################################################# ## @@ -52,7 +52,7 @@ option(USE_LOCAL_PCRE "Use a local copy of libpcre") # set(NCBI_TOOLS_ROOT $ENV{NCBI}) if (EXISTS ${NCBI_TOOLS_ROOT}) - set(_NCBI_DEFAULT_PACKAGE_SEARCH_PATH "${CMAKE_CURRENT_SOURCE_DIR}/build-system/cmake/ncbi-defaults") + set(_NCBI_DEFAULT_PACKAGE_SEARCH_PATH "${NCBI_TREE_CMAKECFG}/ncbi-defaults") set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} ${_NCBI_DEFAULT_PACKAGE_SEARCH_PATH} @@ -105,18 +105,18 @@ endif() # # Framework for dealing with external libraries # -include(${top_src_dir}/src/build-system/cmake/FindExternalLibrary.cmake) +include(${NCBI_TREE_CMAKECFG}/FindExternalLibrary.cmake) ############################################################################ # # PCRE additions # -include(${top_src_dir}/src/build-system/cmake/CMakeChecks.pcre.cmake) +include(${NCBI_TREE_CMAKECFG}/CMakeChecks.pcre.cmake) ############################################################################ # # Compression libraries -include(${top_src_dir}/src/build-system/cmake/CMakeChecks.compress.cmake) +include(${NCBI_TREE_CMAKECFG}/CMakeChecks.compress.cmake) ################################# # Some platform-specific system libs that can be linked eventually @@ -215,9 +215,10 @@ if (WIN32) LIBS_HINTS "${WIN32_PACKAGE_ROOT}\\sybase-15.5\\lib") else (WIN32) find_external_library(Sybase - DYNAMIC_ONLY +# DYNAMIC_ONLY INCLUDES sybdb.h - LIBS sybblk_r64 sybdb64 sybct_r64 sybcs_r64 sybtcl_r64 sybcomn_r64 sybintl_r64 sybunic64 +# LIBS sybblk_r64 sybdb64 sybct_r64 sybcs_r64 sybtcl_r64 sybcomn_r64 sybintl_r64 sybunic64 + LIBS sybblk_r64 sybct_r64 sybcs_r64 sybtcl_r64 sybcomn_r64 sybintl_r64 sybunic64 HINTS "/opt/sybase/clients/15.7-64bit/OCS-15_0/") endif (WIN32) @@ -262,6 +263,7 @@ find_package(OpenSSL) if (OpenSSL_FOUND) set(OpenSSL_LIBRARIES ${OPENSSL_LIBRARIES} ${Z_LIBS} ${DL_LIBS}) set(OPENSSL_LIBS ${OPENSSL_LIBRARIES}) + set(HAVE_LIBOPENSSL 1) message(STATUS "OpenSSL_LIBRARIES = ${OpenSSL_LIBRARIES}") endif() @@ -271,7 +273,7 @@ find_external_library(Mysql INCLUDES mysql/mysql.h LIBS mysqlclient EXTRALIBS ${ ############################################################################ # # BerkeleyDB -include(${top_src_dir}/src/build-system/cmake/CMakeChecks.BerkeleyDB.cmake) +include(${NCBI_TREE_CMAKECFG}/CMakeChecks.BerkeleyDB.cmake) # ODBC # FIXME: replace with native CMake check @@ -290,13 +292,13 @@ find_external_library(Python ############################################################################ # # Boost settings -include(${top_src_dir}/src/build-system/cmake/CMakeChecks.boost.cmake) +include(${NCBI_TREE_CMAKECFG}/CMakeChecks.boost.cmake) ############################################################################ # # NCBI C Toolkit: headers and libs # Path overridden in stable components to avoid version skew. -set(NCBI_C_ROOT "${NCBI_TOOLS_ROOT}/c.by-date/20181031a") +set(NCBI_C_ROOT "${NCBI_TOOLS_ROOT}/c++.by-date/production/20190103/C_TOOLKIT") string(REGEX MATCH "DNCBI_INT8_GI|NCBI_STRICT_GI" INT8GI_FOUND "${CMAKE_CXX_FLAGS}") if (NOT "${INT8GI_FOUND}" STREQUAL "") if (EXISTS "${NCBI_C_ROOT}/ncbi.gi64/") @@ -312,7 +314,11 @@ endif () if (EXISTS "${NCBI_CTOOLKIT_PATH}/include64" AND EXISTS "${NCBI_CTOOLKIT_PATH}/lib64") set(NCBI_C_INCLUDE "${NCBI_CTOOLKIT_PATH}/include64") - set(NCBI_C_LIBPATH "${NCBI_CTOOLKIT_PATH}/lib64") + if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") + set(NCBI_C_LIBPATH "${NCBI_CTOOLKIT_PATH}/altlib64") + else() + set(NCBI_C_LIBPATH "${NCBI_CTOOLKIT_PATH}/lib64") + endif() set(NCBI_C_ncbi "ncbi") if (APPLE) set(NCBI_C_ncbi ${NCBI_C_ncbi} -Wl,-framework,ApplicationServices) @@ -333,8 +339,6 @@ else () endif () message(STATUS "HAVE_NCBI_C = ${HAVE_NCBI_C}") -message(STATUS "NCBI_C_ROOT = ${NCBI_C_ROOT}") -message(STATUS "NCBI_CTOOLKIT_PATH = ${NCBI_CTOOLKIT_PATH}") message(STATUS "NCBI_C_INCLUDE = ${NCBI_C_INCLUDE}") message(STATUS "NCBI_C_LIBPATH = ${NCBI_C_LIBPATH}") @@ -368,7 +372,7 @@ endif() ############################################################################ # # wxWidgets -include(${top_src_dir}/src/build-system/cmake/CMakeChecks.wxwidgets.cmake) +include(${NCBI_TREE_CMAKECFG}/CMakeChecks.wxwidgets.cmake) # Fast-CGI set(_fcgi_version "fcgi-2.4.0") @@ -513,12 +517,12 @@ find_external_library(hdf5 ############################################################################ # # SQLite3 -include(${top_src_dir}/src/build-system/cmake/CMakeChecks.sqlite3.cmake) +include(${NCBI_TREE_CMAKECFG}/CMakeChecks.sqlite3.cmake) ############################################################################ # # Various image-format libraries -include(${top_src_dir}/src/build-system/cmake/CMakeChecks.image.cmake) +include(${NCBI_TREE_CMAKECFG}/CMakeChecks.image.cmake) ############################################################################# ## MongoDB @@ -615,17 +619,17 @@ if (WIN32) find_external_library(VDB INCLUDES sra/sradb.h LIBS ncbi-vdb - INCLUDE_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.9.3\\interfaces" - LIBS_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.9.3\\win\\release\\x86_64\\lib") + INCLUDE_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.9.4\\interfaces" + LIBS_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.9.4\\win\\release\\x86_64\\lib") else (WIN32) find_external_library(VDB INCLUDES sra/sradb.h LIBS ncbi-vdb - INCLUDE_HINTS "/opt/ncbi/64/trace_software/vdb/vdb-versions/2.9.2-1/interfaces" - LIBS_HINTS "/opt/ncbi/64/trace_software/vdb/vdb-versions/2.9.2-1/linux/release/x86_64/lib") + INCLUDE_HINTS "/opt/ncbi/64/trace_software/vdb/vdb-versions/cxx_toolkit/2/interfaces" + LIBS_HINTS "/opt/ncbi/64/trace_software/vdb/vdb-versions/cxx_toolkit/2/linux/release/x86_64/lib/") endif (WIN32) -if (${VDB_FOUND}) +if (VDB_FOUND) if (WIN32) set(VDB_INCLUDE "${VDB_INCLUDE}" "${VDB_INCLUDE}\\os\\win" "${VDB_INCLUDE}\\cc\\vc++\\x86_64" "${VDB_INCLUDE}\\cc\\vc++") else (WIN32) @@ -817,8 +821,12 @@ if (PYTHONINTERP_FOUND) set(PYTHON3 ${PYTHON_EXECUTABLE}) set(CD_REPORTER "/am/ncbiapdata/bin/cd_reporter") - set(abs_top_srcdir ${abs_top_src_dir}) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/build-system/run_with_cd_reporter.py.in ${build_root}/build-system/run_with_cd_reporter.py) + if (DEFINED NCBI_EXTERNAL_TREE_ROOT) + set(abs_top_srcdir ${NCBI_EXTERNAL_TREE_ROOT}) + else() + set(abs_top_srcdir ${abs_top_src_dir}) + endif() + configure_file(${NCBI_TREE_BUILDCFG}/run_with_cd_reporter.py.in ${build_root}/build-system/run_with_cd_reporter.py) # copy to build_root and set executable permissions (workaround because configure_file doesn't set permissions) file(COPY ${build_root}/build-system/run_with_cd_reporter.py DESTINATION ${build_root} @@ -881,6 +889,20 @@ set(NCBI_COMPONENT_STACKTRACE_FOUND YES) set(NCBI_COMPONENT_STACKTRACE_INCLUDE ${LIBBACKWARD_INCLUDE_DIR} ${LIBUNWIND_INCLUDE}) set(NCBI_COMPONENT_STACKTRACE_LIBS ${LIBUNWIND_LIBS} ${LIBDW_LIBRARIES}) +############################################################################## +# UUID +if (NOT UUID_LIBS-NOTFOUND) +set(NCBI_COMPONENT_UUID_FOUND YES) +set(NCBI_COMPONENT_UUID_LIBS ${UUID_LIBS}) +endif() + +############################################################################## +# CURL +if (NOT CURL_LIBS-NOTFOUND) +set(NCBI_COMPONENT_CURL_FOUND YES) +set(NCBI_COMPONENT_CURL_LIBS ${CURL_LIBS}) +endif() + ############################################################################# # TLS if(GnuTLS_FOUND) @@ -935,9 +957,19 @@ else() set(NCBI_COMPONENT_Boost.Spirit_FOUND NO) endif() +############################################################################# +# Boost +if(Boost_FOUND) + set(NCBI_COMPONENT_Boost_FOUND YES) + set(NCBI_COMPONENT_Boost_INCLUDE ${Boost_INCLUDE_DIRS}) + set(NCBI_COMPONENT_Boost_LIBS ${Boost_LIBRARIES}) +else() + set(NCBI_COMPONENT_Boost_FOUND NO) +endif() + ############################################################################# # PCRE -if(PCRE_FOUND) +if(PCRE_FOUND AND NOT USE_LOCAL_PCRE) set(NCBI_COMPONENT_PCRE_FOUND YES) set(NCBI_COMPONENT_PCRE_INCLUDE ${PCRE_INCLUDE_DIR}) set(NCBI_COMPONENT_PCRE_LIBS ${PCRE_LIBRARIES}) @@ -945,7 +977,7 @@ if(PCRE_FOUND) else() set(NCBI_COMPONENT_PCRE_FOUND ${NCBI_COMPONENT_LocalPCRE_FOUND}) set(NCBI_COMPONENT_PCRE_INCLUDE ${NCBI_COMPONENT_LocalPCRE_INCLUDE}) - set(NCBI_COMPONENT_PCRE_LIBS ${NCBI_COMPONENT_LocalPCRE_LIBS}) + set(NCBI_COMPONENT_PCRE_NCBILIB ${NCBI_COMPONENT_LocalPCRE_NCBILIB}) endif() ############################################################################# @@ -958,12 +990,12 @@ if(ZLIB_FOUND) else() set(NCBI_COMPONENT_Z_FOUND ${NCBI_COMPONENT_LocalZ_FOUND}) set(NCBI_COMPONENT_Z_INCLUDE ${NCBI_COMPONENT_LocalZ_INCLUDE}) - set(NCBI_COMPONENT_Z_LIBS ${NCBI_COMPONENT_LocalZ_LIBS}) + set(NCBI_COMPONENT_Z_NCBILIB ${NCBI_COMPONENT_LocalZ_NCBILIB}) endif() ############################################################################# # BZ2 -if(BZIP2_FOUND) +if(BZIP2_FOUND AND NOT USE_LOCAL_BZLIB) set(NCBI_COMPONENT_BZ2_FOUND YES) set(NCBI_COMPONENT_BZ2_INCLUDE ${BZIP2_INCLUDE_DIR}) set(NCBI_COMPONENT_BZ2_LIBS ${BZIP2_LIBRARIES}) @@ -971,7 +1003,7 @@ if(BZIP2_FOUND) else() set(NCBI_COMPONENT_BZ2_FOUND ${NCBI_COMPONENT_LocalBZ2_FOUND}) set(NCBI_COMPONENT_BZ2_INCLUDE ${NCBI_COMPONENT_LocalBZ2_INCLUDE}) - set(NCBI_COMPONENT_BZ2_LIBS ${NCBI_COMPONENT_LocalBZ2_LIBS}) + set(NCBI_COMPONENT_BZ2_NCBILIB ${NCBI_COMPONENT_LocalBZ2_NCBILIB}) endif() ############################################################################# @@ -1006,7 +1038,7 @@ if(LMDB_FOUND) else() set(NCBI_COMPONENT_LMDB_FOUND ${NCBI_COMPONENT_LocalLMDB_FOUND}) set(NCBI_COMPONENT_LMDB_INCLUDE ${NCBI_COMPONENT_LocalLMDB_INCLUDE}) - set(NCBI_COMPONENT_LMDB_LIBS ${NCBI_COMPONENT_LocalLMDB_LIBS}) + set(NCBI_COMPONENT_LMDB_NCBILIB ${NCBI_COMPONENT_LocalLMDB_NCBILIB}) endif() ############################################################################# @@ -1074,6 +1106,15 @@ else() set(NCBI_COMPONENT_EXSLT_FOUND NO) endif() +############################################################################# +# XLSXWRITER +if (LIBXLSXWRITER_FOUND) + set(NCBI_COMPONENT_XLSXWRITER_FOUND YES) + set(NCBI_COMPONENT_XLSXWRITER_INCLUDE ${LIBXLSXWRITER_INCLUDE}) + set(NCBI_COMPONENT_XLSXWRITER_LIBS ${LIBXLSXWRITER_LIBS}) + set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} XLSXWRITER") +endif() + ############################################################################# #SQLITE3 if(SQLITE3_FOUND) @@ -1102,6 +1143,7 @@ if(SYBASE_FOUND) set(NCBI_COMPONENT_Sybase_FOUND YES) set(NCBI_COMPONENT_Sybase_INCLUDE ${SYBASE_INCLUDE}) set(NCBI_COMPONENT_Sybase_LIBS ${SYBASE_LIBS}) + set(NCBI_COMPONENT_Sybase_DEFINES SYB_LP64) set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} Sybase") else() set(NCBI_COMPONENT_Sybase_FOUND NO) @@ -1139,6 +1181,132 @@ else() set(NCBI_COMPONENT_VDB_FOUND NO) endif() +############################################################################# +# SAMTOOLS +if(SAMTOOLS_FOUND) + set(NCBI_COMPONENT_SAMTOOLS_FOUND YES) + set(NCBI_COMPONENT_SAMTOOLS_INCLUDE ${SAMTOOLS_INCLUDE}) + set(NCBI_COMPONENT_SAMTOOLS_LIBS ${SAMTOOLS_LIBS}) +# set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} SAMTOOLS") +else() + set(NCBI_COMPONENT_SAMTOOLS_FOUND NO) +endif() + ############################################################################# # PYTHON set(NCBI_COMPONENT_PYTHON_FOUND NO) + +############################################################################# +# OpenSSL +if (OpenSSL_FOUND) + set(NCBI_COMPONENT_OpenSSL_FOUND YES) + set(NCBI_COMPONENT_OpenSSL_INCLUDE ${OpenSSL_INCLUDE}) + set(NCBI_COMPONENT_OpenSSL_LIBS ${OPENSSL_LIBS}) + set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} OpenSSL") +else() + set(NCBI_COMPONENT_OpenSSL_FOUND NO) +endif() + +############################################################################# +# MSGSL +if(HAVE_MSGSL) + set(NCBI_COMPONENT_MSGSL_FOUND YES) + set(NCBI_COMPONENT_MSGSL_INCLUDE ${MSGSL_INCLUDE}) +endif() + +############################################################################# +# SGE +if (SGE_FOUND) + set(NCBI_COMPONENT_SGE_FOUND YES) + set(NCBI_COMPONENT_SGE_INCLUDE ${SGE_INCLUDE}) + set(NCBI_COMPONENT_SGE_LIBS ${SGE_LIBS}) + set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} SGE") +endif() + +############################################################################# +# MONGOCXX +if (MONGOCXX_FOUND) + set(NCBI_COMPONENT_MONGOCXX_FOUND YES) + set(NCBI_COMPONENT_MONGOCXX_INCLUDE ${MONGOCXX_INCLUDE}) + set(NCBI_COMPONENT_MONGOCXX_LIBS ${MONGOCXX_LIB}) + set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} MONGOCXX") +endif() + +############################################################################# +# LEVELDB +if (LEVELDB_FOUND) + set(NCBI_COMPONENT_LEVELDB_FOUND YES) +# set(NCBI_COMPONENT_LEVELDB_INCLUDE ${LEVELDB_INCLUDE}) + set(NCBI_COMPONENT_LEVELDB_LIBS ${LEVELDB_LIBRARIES}) + set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} LEVELDB") +endif() + +############################################################################# +# WGMLST +if (WGMLST_FOUND) + set(NCBI_COMPONENT_WGMLST_FOUND YES) + set(NCBI_COMPONENT_WGMLST_INCLUDE ${WGMLST_INCLUDE}) + set(NCBI_COMPONENT_WGMLST_LIBS ${WGMLST_LIB}) + set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} WGMLST") +endif() + +############################################################################# +# GLPK +if(GLPK_FOUND) + set(NCBI_COMPONENT_GLPK_FOUND YES) + set(NCBI_COMPONENT_GLPK_INCLUDE ${GLPK_INCLUDE}) + set(NCBI_COMPONENT_GLPK_LIBS ${GLPK_LIBS}) + set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} GLPK") +endif() + +############################################################################# +# XALAN +if (XALAN_FOUND) + set(NCBI_COMPONENT_XALAN_FOUND YES) + set(NCBI_COMPONENT_XALAN_INCLUDE ${XALAN_INCLUDE}) + set(NCBI_COMPONENT_XALAN_LIBS ${XALAN_LIBS}) + set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} XALAN") +endif() + +############################################################################# +# XERCES +if (XERCES_FOUND) + set(NCBI_COMPONENT_XERCES_FOUND YES) + set(NCBI_COMPONENT_XERCES_INCLUDE ${XERCES_INCLUDE}) + set(NCBI_COMPONENT_XERCES_LIBS ${XERCES_LIBS}) + set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} XERCES") +endif() + +############################################################################## +# FTGL +if (FTGL_FOUND) + set(NCBI_COMPONENT_FTGL_FOUND YES) + set(NCBI_COMPONENT_FTGL_INCLUDE ${FTGL_INCLUDE_DIR}) + set(NCBI_COMPONENT_FTGL_LIBS ${FTGL_LIBRARIES}) +endif() + +############################################################################## +# FreeType +if (FREETYPE_FOUND) + set(NCBI_COMPONENT_FreeType_FOUND YES) + set(NCBI_COMPONENT_FreeType_INCLUDE ${FREETYPE_INCLUDE_DIRS}) + set(NCBI_COMPONENT_FreeType_LIBS ${FREETYPE_LIBRARIES}) +endif() + +############################################################################# +# GLEW +set(NCBI_COMPONENT_GLEW_FOUND YES) +set(NCBI_COMPONENT_GLEW_INCLUDE ${GLEW_INCLUDE_DIRS}) +set(NCBI_COMPONENT_GLEW_LIBS ${GLEW_LIBRARIES}) + +############################################################################## +# OpenGL +set(NCBI_COMPONENT_OpenGL_FOUND YES) +set(NCBI_COMPONENT_OpenGL_INCLUDE ${OPENGL_INCLUDE_DIRS}) +set(NCBI_COMPONENT_OpenGL_LIBS ${OPENGL_LIBRARIES}) + +############################################################################## +# OSMesa +set(NCBI_COMPONENT_OSMesa_FOUND YES) +set(NCBI_COMPONENT_OSMesa_INCLUDE ${OSMesa_INCLUDE_DIRS}) +set(NCBI_COMPONENT_OSMesa_LIBS ${OSMesa_LIBRARIES}) diff --git a/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake b/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake index 887628cb..f0610c36 100644 --- a/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIComponentsXCODE.cmake 567976 2018-07-27 14:18:31Z gouriano $ +# $Id: CMake.NCBIComponentsXCODE.cmake 576718 2018-12-19 20:56:03Z dicuccio $ ############################################################################# ## @@ -16,11 +16,13 @@ set(NCBI_COMPONENT_XCODE_FOUND YES) +set(NCBI_COMPONENT_unix_FOUND YES) ############################################################################# # common settings set(NCBI_ThirdPartyBasePath /netopt/ncbi_tools) +set(NCBI_PlatformBits 64) -set(NCBI_ThirdParty_TLS ${NCBI_ThirdPartyBasePath}/gnutls-3.4.0 +set(NCBI_ThirdParty_TLS ${NCBI_ThirdPartyBasePath}/gnutls-3.4.0) #set(NCBI_ThirdParty_FASTCGI set(NCBI_ThirdParty_Boost ${NCBI_ThirdPartyBasePath}/boost-1.62.0-ncbi1) #set(NCBI_ThirdParty_PCRE @@ -33,11 +35,13 @@ set(NCBI_ThirdParty_JPEG ${NCBI_ThirdPartyBasePath}/safe-sw) set(NCBI_ThirdParty_PNG /opt/X11) #set(NCBI_ThirdParty_GIF set(NCBI_ThirdParty_TIFF ${NCBI_ThirdPartyBasePath}/safe-sw) -set(NCBI_ThirdParty_XML ${NCBI_ThirdPartyBasePath}/libxml-2.7.8 -set(NCBI_ThirdParty_XSLT ${NCBI_ThirdPartyBasePath}/libxml-2.7.8 +set(NCBI_ThirdParty_XML ${NCBI_ThirdPartyBasePath}/libxml-2.7.8) +set(NCBI_ThirdParty_XSLT ${NCBI_ThirdPartyBasePath}/libxml-2.7.8) set(NCBI_ThirdParty_EXSLT ${NCBI_ThirdParty_XSLT}) set(NCBI_ThirdParty_SQLITE3 ${NCBI_ThirdPartyBasePath}/sqlite-3.8.10.1-ncbi1) #set(NCBI_ThirdParty_Sybase +set(NCBI_ThirdParty_VDB /net/snowman/vol/projects/trace_software/vdb/vdb-versions/2.9.2-1) +set(NCBI_ThirdParty_VDB_ARCH x86_64) ############################################################################# macro(NCBI_define_component _name) @@ -60,7 +64,7 @@ macro(NCBI_define_component _name) set(_found NO) endif() if (_found) - set(_libtype lib) + set(_libtype lib${NCBI_PlatformBits}) foreach(_lib IN LISTS _args) if(NOT EXISTS ${_root}/${_libtype}/${_lib}) message("Component ${_name} ERROR: ${_root}/${_libtype}/${_lib} not found") @@ -134,7 +138,7 @@ NCBI_define_component(Boost.Spirit libboost_thread-mt.a) if(NOT NCBI_COMPONENT_PCRE_FOUND) set(NCBI_COMPONENT_PCRE_FOUND ${NCBI_COMPONENT_LocalPCRE_FOUND}) set(NCBI_COMPONENT_PCRE_INCLUDE ${NCBI_COMPONENT_LocalPCRE_INCLUDE}) - set(NCBI_COMPONENT_PCRE_LIBS ${NCBI_COMPONENT_LocalPCRE_LIBS}) + set(NCBI_COMPONENT_PCRE_NCBILIB ${NCBI_COMPONENT_LocalPCRE_NCBILIB}) endif() ############################################################################# @@ -168,7 +172,7 @@ NCBI_define_component(LMDB liblmdb.a) if(NOT NCBI_COMPONENT_LMDB_FOUND) set(NCBI_COMPONENT_LMDB_FOUND ${NCBI_COMPONENT_LocalLMDB_FOUND}) set(NCBI_COMPONENT_LMDB_INCLUDE ${NCBI_COMPONENT_LocalLMDB_INCLUDE}) - set(NCBI_COMPONENT_LMDB_LIBS ${NCBI_COMPONENT_LocalLMDB_LIBS}) + set(NCBI_COMPONENT_LMDB_NCBILIB ${NCBI_COMPONENT_LocalLMDB_NCBILIB}) endif() ############################################################################# @@ -193,6 +197,7 @@ NCBI_define_component(TIFF libtiff.a) NCBI_define_component(XML libxml2.a) if(NCBI_COMPONENT_XML_FOUND) set(NCBI_COMPONENT_XML_INCLUDE ${NCBI_ThirdParty_XML}/include/libxml2) + set(NCBI_COMPONENT_XML_LIBS ${NCBI_COMPONENT_XML_LIBS} -liconv) endif() ############################################################################# @@ -213,6 +218,10 @@ set(NCBI_COMPONENT_LAPACK_FOUND YES) set(NCBI_COMPONENT_LAPACK_LIBS -llapack) set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} LAPACK") +############################################################################# +# KRB5 +set(KRB5_LIBS "-framework Kerberos" -liconv) + ############################################################################# # Sybase set(NCBI_COMPONENT_Sybase_FOUND NO) @@ -223,9 +232,41 @@ set(NCBI_COMPONENT_MySQL_FOUND NO) ############################################################################# # ODBC -set(NCBI_COMPONENT_ODBC_FOUND YES) +set(NCBI_COMPONENT_ODBC_FOUND NO) +set(ODBC_INCLUDE ${NCBI_INC_ROOT}/dbapi/driver/odbc/unix_odbc + ${NCBI_INC_ROOT}/dbapi/driver/odbc/unix_odbc) +set(NCBI_COMPONENT_ODBC_INCLUDE ${ODBC_INCLUDE}) set(HAVE_ODBC 1) -set(HAVE_ODBCSS_H 1) +set(HAVE_ODBCSS_H 0) + +############################################################################# +# VDB +set(NCBI_COMPONENT_VDB_INCLUDE + ${NCBI_ThirdParty_VDB}/interfaces + ${NCBI_ThirdParty_VDB}/interfaces/cc/gcc/${NCBI_ThirdParty_VDB_ARCH} + ${NCBI_ThirdParty_VDB}/interfaces/cc/gcc + ${NCBI_ThirdParty_VDB}/interfaces/os/mac + ${NCBI_ThirdParty_VDB}/interfaces/os/unix) +set(NCBI_COMPONENT_VDB_LIBS + ${NCBI_ThirdParty_VDB}/mac/release/${NCBI_ThirdParty_VDB_ARCH}/lib/libncbi-vdb.a) + +set(_found YES) +foreach(_inc IN LISTS NCBI_COMPONENT_VDB_INCLUDE NCBI_COMPONENT_VDB_LIBS) + if(NOT EXISTS ${_inc}) + message("Component VDB ERROR: ${_inc} not found") + set(_found NO) + endif() +endforeach() +if(_found) + message("VDB found at ${NCBI_ThirdParty_VDB}") + set(NCBI_COMPONENT_VDB_FOUND YES) + set(HAVE_NCBI_VDB 1) + set(NCBI_ALL_COMPONENTS "${NCBI_ALL_COMPONENTS} VDB") +else() + set(NCBI_COMPONENT_VDB_FOUND NO) + unset(NCBI_COMPONENT_VDB_INCLUDE) + unset(NCBI_COMPONENT_VDB_LIBS) +endif() ############################################################################# # PYTHON diff --git a/c++/src/build-system/cmake/CMake.NCBIptb.cmake b/c++/src/build-system/cmake/CMake.NCBIptb.cmake index a2a574ba..4b8c9e5c 100644 --- a/c++/src/build-system/cmake/CMake.NCBIptb.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIptb.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIptb.cmake 568803 2018-08-10 16:42:57Z gouriano $ +# $Id: CMake.NCBIptb.cmake 581270 2019-02-27 14:52:17Z gouriano $ ############################################################################# ############################################################################# ## @@ -28,6 +28,7 @@ ## NCBI_begin_lib(name) or NCBI_begin_app(name) ## ## NCBI_sources( list of source files) +## NCBI_generated_sources( list of source files) - file extension is mandatory ## NCBI_headers( list of header files) - only relative paths and masks are allowed ## NCBI_resources( list of resource files) - file extension is mandatory ## NCBI_dataspecs( list of data specs - ASN, DTD, XSD etc) - file extension is mandatory @@ -64,10 +65,33 @@ ## ## NCBI_end_lib(result) or NCBI_end_app(result) - argument 'result' is optional ## +##--------------------------------------------------------------------------- +## custom targets +## in CMakeLists.txt: +## NCBI_add_target( list of targets) +## +## in CMakeLists.xxx.txt - add function which defines target, then add target: +## function(xxx_definition) - the function name must be unique +## ... +## add_custom_target(name ...) - the same name as in NCBI_begin_custom_target +## endfunction() +## +## NCBI_begin_custom_target(name) +## NCBI_requires( list of components) +## NCBI_custom_target_dependencies(list of toolkit libraries or apps) +## NCBI_custom_target_definition( xxx_definition) - function name defined above +## NCBI_end_custom_target(result) - argument 'result' is optional +## ############################################################################# function(NCBI_add_root_subdirectory) set(NCBI_CURRENT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + if (NCBI_PTB_HAS_ROOT) + message(FATAL_ERROR "NCBI_add_root_subdirectory() should be called once only (${CMAKE_CURRENT_SOURCE_DIR})") + return() + endif() + set(NCBI_PTB_HAS_ROOT TRUE PARENT_SCOPE) + set(NCBI_PTB_HAS_ROOT TRUE) set(NCBI_PTBMODE_COLLECT_DEPS OFF) if (NCBI_PTBCFG_ENABLE_COLLECTOR) message("Analyzing source tree...") @@ -75,7 +99,7 @@ function(NCBI_add_root_subdirectory) set_property(GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS "") set_property(GLOBAL PROPERTY NCBI_PTBPROP_ALLOWED_PROJECTS "") - NCBI_add_subdirectory(${ARGV}) + NCBI_add_subdirectory(${NCBI_PTBCFG_COMPOSITE_DLL} ${ARGV}) set(NCBI_PTBMODE_COLLECT_DEPS OFF) get_property(_allprojects GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS) @@ -92,6 +116,10 @@ function(NCBI_add_root_subdirectory) if(NOT "${_allowedprojects}" STREQUAL "") message("Collecting projects...") + if (NOT IS_ABSOLUTE ${NCBI_DATATOOL}) + list(APPEND _allowedprojects ${NCBI_DATATOOL}) + endif() + list(REMOVE_DUPLICATES _allowedprojects) foreach(_prj IN LISTS _allowedprojects) NCBI_internal_collect_dependencies(${_prj}) get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj}) @@ -99,19 +127,52 @@ function(NCBI_add_root_subdirectory) endforeach() list(SORT NCBI_PTB_ALLOWED_PROJECTS) list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_PROJECTS) + foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS) + NCBI_internal_collect_requires(${_prj}) + endforeach() #message("NCBI_PTB_ALLOWED_PROJECTS: ${NCBI_PTB_ALLOWED_PROJECTS}") - elseif("${_allprojects}" STREQUAL "") + foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS) + if (NCBI_VERBOSE_PROJECT_${_prj}) + NCBI_internal_print_project_info(${_prj}) + endif() + endforeach() +# elseif("${_allprojects}" STREQUAL "") + else() message(FATAL_ERROR "List of projects is empty") return() endif() message("Configuring projects...") endif() - NCBI_add_subdirectory(${ARGV}) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_COUNT_STATIC 0) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_COUNT_SHARED 0) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_COUNT_CONSOLEAPP 0) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_COUNT_CUSTOM 0) + + NCBI_add_subdirectory(${NCBI_PTBCFG_COMPOSITE_DLL} ${ARGV}) + if (NCBI_PTBCFG_DOINSTALL) + NCBI_internal_install_root(${NCBI_PTBCFG_COMPOSITE_DLL} ${ARGV}) + endif() + + get_property(_app GLOBAL PROPERTY NCBI_PTBPROP_COUNT_CONSOLEAPP) + get_property(_lib GLOBAL PROPERTY NCBI_PTBPROP_COUNT_STATIC) + get_property(_dll GLOBAL PROPERTY NCBI_PTBPROP_COUNT_SHARED) + get_property(_cust GLOBAL PROPERTY NCBI_PTBPROP_COUNT_CUSTOM) + if(BUILD_SHARED_LIBS) + message("Added successfully: ${_app} console apps, ${_dll} shared libs, ${_lib} static libs, ${_cust} custom targets") + else() + message("Added successfully: ${_app} console apps, ${_lib} static libs, ${_cust} custom targets") + endif() endfunction() ############################################################################# function(NCBI_add_subdirectory) + if(NCBI_EXPERIMENTAL_CFG) + if (NOT NCBI_PTB_HAS_ROOT) + message(FATAL_ERROR "In source tree root, NCBI_add_root_subdirectory() must be used instead of NCBI_add_subdirectory() (${CMAKE_CURRENT_SOURCE_DIR})") + return() + endif() + endif() if(NCBI_PTBMODE_PARTS) return() endif() @@ -127,7 +188,7 @@ if(OFF) add_subdirectory(${_sub}) endif() else() - message(WARNING "ERROR: directory not found: ${NCBI_CURRENT_SOURCE_DIR}") + message("WARNING: directory not found: ${NCBI_CURRENT_SOURCE_DIR}") endif() endforeach() else() @@ -138,7 +199,7 @@ else() set(NCBI_CURRENT_SOURCE_DIR ${_curdir}/${_sub}) NCBI_internal_include("${_curdir}/${_sub}/CMakeLists.txt") else() - message(WARNING "ERROR: directory not found: ${_curdir}/${_sub}") + message("WARNING: directory not found: ${_curdir}/${_sub}") endif() endforeach() else() @@ -147,7 +208,7 @@ else() if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_sub}") add_subdirectory(${_sub}) else() - message(WARNING "ERROR: directory not found: ${CMAKE_CURRENT_SOURCE_DIR}/${_sub}") + message("WARNING: directory not found: ${CMAKE_CURRENT_SOURCE_DIR}/${_sub}") endif() endforeach() endif() @@ -166,8 +227,10 @@ if(OFF) NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_lib}.lib.txt) elseif (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_lib}.asn.txt) NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_lib}.asn.txt) + elseif (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/${_lib} AND NOT IS_DIRECTORY ${NCBI_CURRENT_SOURCE_DIR}/${_lib}) + NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/${_lib}) else() - message(WARNING "ERROR: file not found: ${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_lib}.lib.txt") + message("WARNING: Library project not found: ${_lib} (${NCBI_CURRENT_SOURCE_DIR})") endif() endforeach() else() @@ -177,8 +240,10 @@ else() NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_lib}.lib.txt) elseif (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_lib}.asn.txt) NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_lib}.asn.txt) + elseif (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/${_lib} AND NOT IS_DIRECTORY ${NCBI_CURRENT_SOURCE_DIR}/${_lib}) + NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/${_lib}) else() - message(WARNING "ERROR: file not found: ${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_lib}.lib.txt") + message("WARNING: Library project not found: ${_lib} (${NCBI_CURRENT_SOURCE_DIR})") endif() endforeach() else() @@ -188,8 +253,10 @@ else() NCBI_internal_include(CMakeLists.${_lib}.lib.txt) elseif (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.${_lib}.asn.txt) NCBI_internal_include(CMakeLists.${_lib}.asn.txt) + elseif (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/${_lib} AND NOT IS_DIRECTORY ${NCBI_CURRENT_SOURCE_DIR}/${_lib}) + NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/${_lib}) else() - message(WARNING "ERROR: file not found: ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.${_lib}.lib.txt") + message("WARNING: Library project not found: ${_lib} (${NCBI_CURRENT_SOURCE_DIR})") endif() endforeach() endif() @@ -206,8 +273,10 @@ if(OFF) foreach(_app IN LISTS ARGV) if (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_app}.app.txt) NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_app}.app.txt) + elseif (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/${_app} AND NOT IS_DIRECTORY ${NCBI_CURRENT_SOURCE_DIR}/${_app}) + NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/${_app}) else() - message(WARNING "ERROR: file not found: ${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_app}.app.txt") + message("WARNING: App project not found: ${_app} (${NCBI_CURRENT_SOURCE_DIR})") endif() endforeach() else() @@ -215,8 +284,10 @@ else() foreach(_app IN LISTS ARGV) if (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_app}.app.txt) NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_app}.app.txt) + elseif (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/${_app} AND NOT IS_DIRECTORY ${NCBI_CURRENT_SOURCE_DIR}/${_app}) + NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/${_app}) else() - message(WARNING "ERROR: file not found: ${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_app}.app.txt") + message("WARNING: App project not found: ${_app} (${NCBI_CURRENT_SOURCE_DIR})") endif() endforeach() else() @@ -224,21 +295,52 @@ else() foreach(_app IN LISTS ARGV) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.${_app}.app.txt) NCBI_internal_include(CMakeLists.${_app}.app.txt) + elseif (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/${_app} AND NOT IS_DIRECTORY ${NCBI_CURRENT_SOURCE_DIR}/${_app}) + NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/${_app}) else() - message(WARNING "ERROR: file not found: ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.${_app}.app.txt") + message("WARNING: App project not found: ${_app} (${NCBI_CURRENT_SOURCE_DIR})") endif() endforeach() endif() endif() endfunction() +############################################################################# +function(NCBI_add_target) + if(NCBI_PTBMODE_PARTS) + return() + endif() + if(NOT NCBI_EXPERIMENTAL_CFG) + set(NCBI_CURRENT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + foreach(_prj IN LISTS ARGV) + if (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_prj}.txt) + NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/CMakeLists.${_prj}.txt) + elseif (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/${_prj} AND NOT IS_DIRECTORY ${NCBI_CURRENT_SOURCE_DIR}/${_prj}) + NCBI_internal_include(${NCBI_CURRENT_SOURCE_DIR}/${_prj}) + else() + message("WARNING: Target not found: ${_prj} (${NCBI_CURRENT_SOURCE_DIR})") + endif() + endforeach() +endfunction() + ############################################################################# macro(NCBI_begin_lib _name) + if(NOT NCBI_EXPERIMENTAL_CFG) + set(NCBI_CURRENT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + endif() if(NCBI_PTBMODE_PARTS) set(_libname ${_name}.part) else() set(_libname ${_name}) endif() + if (DEFINED NCBI_EXTERNAL_TREE_ROOT AND TARGET ${_name}) + set(_libname ${_libname}.local) + if (NCBI_PTBMODE_COLLECT_DEPS) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_LOCAL_${_name} "${_libname}") + message(STATUS "Imported target ${_name} will be replaced with local ${_libname} (${NCBI_CURRENT_SOURCE_DIR})") + endif() + endif() set(NCBI_PROJECT_lib ${_libname}) if(NCBI_EXPERIMENTAL_CFG) set(NCBI_PROJECT ${_libname}) @@ -279,19 +381,35 @@ macro(NCBI_end_lib) if(NOT DEFINED NCBI_PROJECT_lib) message(SEND_ERROR "${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT}: Unexpected NCBI_end_lib call") endif() - NCBI_internal_add_project() + NCBI_internal_add_project(${ARGV}) unset(NCBI_PROJECT) endmacro() ############################################################################# macro(NCBI_begin_app _name) + if(NOT NCBI_EXPERIMENTAL_CFG) + set(NCBI_CURRENT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + endif() if(NCBI_PTBMODE_PARTS) set(_appname ${_name}.part) else() set(_appname ${_name}) endif() + if (DEFINED NCBI_EXTERNAL_TREE_ROOT AND TARGET ${_name}) + set(_appname ${_appname}.local) + if (NCBI_PTBMODE_COLLECT_DEPS) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_LOCAL_${_name} "${_appname}") + message(STATUS "Imported target ${_name} will be replaced with local ${_appname} (${NCBI_CURRENT_SOURCE_DIR})") + endif() + endif() set(NCBI_PROJECT_app ${_appname}) if(NCBI_EXPERIMENTAL_CFG) + if (NOT DEFINED NCBI_EXTERNAL_TREE_ROOT) + get_property(_dir GLOBAL PROPERTY NCBI_PTBPROP_DIR_${_name}) + if (NOT ${_dir} STREQUAL "" AND NOT ${_dir} STREQUAL ${NCBI_CURRENT_SOURCE_DIR}) + set(_appname ${_appname}-app) + endif() + endif() set(NCBI_PROJECT ${_appname}) set(NCBI_${NCBI_PROJECT}_OUTPUT ${_name}) else() @@ -308,14 +426,51 @@ macro(NCBI_end_app) if(NOT DEFINED NCBI_PROJECT_app) message(SEND_ERROR "${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT}: Unexpected NCBI_end_app call") endif() - NCBI_internal_add_project() + NCBI_internal_add_project(${ARGV}) unset(NCBI_PROJECT) endmacro() +############################################################################# +macro(NCBI_begin_custom_target _name) + if(NOT NCBI_EXPERIMENTAL_CFG) + set(NCBI_CURRENT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + set(NCBI_PROJECT_custom ${_name}) + set(NCBI_PROJECT ${_name}) + set(NCBI_${NCBI_PROJECT}_TYPE CUSTOM) + set(NCBI_PROJECT_ID ${_name}.${NCBI_${NCBI_PROJECT}_TYPE}) +endmacro() + +############################################################################# +macro(NCBI_end_custom_target) + if(NOT DEFINED NCBI_PROJECT_custom) + message(SEND_ERROR "${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT}: Unexpected NCBI_end_custom_target call") + endif() + if(NOT DEFINED NCBI_${NCBI_PROJECT}_DEFINITION) + message(FATAL_ERROR "${NCBI_PROJECT} (${NCBI_CURRENT_SOURCE_DIR}): Custom project definition not provided") + endif() + NCBI_internal_add_project(${ARGV}) + unset(NCBI_PROJECT) +endmacro() + +############################################################################# +macro(NCBI_custom_target_definition _def) + set(NCBI_${NCBI_PROJECT}_DEFINITION ${_def}) + set(NCBI_${NCBI_PROJECT}_CALLBACK 0) +endmacro() + +############################################################################# +macro(NCBI_custom_target_dependencies) + set(NCBI_${NCBI_PROJECT}_NCBILIB ${NCBI_${NCBI_PROJECT}_NCBILIB} "${ARGV}") +endmacro() + ############################################################################# macro(NCBI_sources) set(NCBI_${NCBI_PROJECT}_SOURCES ${NCBI_${NCBI_PROJECT}_SOURCES} "${ARGV}") endmacro() +macro(NCBI_generated_sources) + set(NCBI_${NCBI_PROJECT}_GENERATED_SOURCES ${NCBI_${NCBI_PROJECT}_GENERATED_SOURCES} "${ARGV}") +endmacro() ############################################################################# macro(NCBI_headers) @@ -353,7 +508,7 @@ endmacro() ############################################################################# macro(NCBI_enable_pch) - set(NCBI_${NCBI_PROJECT}_USEPCH ON) + set(NCBI_${NCBI_PROJECT}_USEPCH ${NCBI_DEFAULT_USEPCH}) endmacro() macro(NCBI_disable_pch) set(NCBI_${NCBI_PROJECT}_USEPCH OFF) @@ -435,6 +590,9 @@ macro(NCBI_set_test_command) list(REMOVE_AT _args 0) if ( "${_cmd}" STREQUAL "${NCBI_PROJECT}") set(_cmd "${NCBI_${NCBI_PROJECT}_OUTPUT}") + if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "CONSOLEAPP") + set(_cmd "${_cmd}${CMAKE_EXECUTABLE_SUFFIX}") + endif() endif() if (NOT NCBI_EXPERIMENTAL_CFG) set(_cmd "${EXECUTABLE_OUTPUT_PATH}/${_cmd}") @@ -490,12 +648,18 @@ endmacro() ############################################################################## function(NCBI_internal_collect_dependencies _project) - get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_project} SET) + get_property(_collected GLOBAL PROPERTY NCBI_PTBPROP_COLLECTED_DEPS) + if( ${_project} IN_LIST _collected) + return() + endif() + set_property(GLOBAL PROPERTY NCBI_PTBPROP_COLLECTED_DEPS ${_collected} ${_project}) + + get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DIRECT_DEPS_${_project} SET) if (NOT _prjdeps) - message(WARNING "ERROR: project ${_project} not found") + message("ERROR: project ${_project} not found") return() endif() - get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_project}) + get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DIRECT_DEPS_${_project}) foreach( _value IN LISTS _prjdeps) NCBI_internal_recur_collect_dependencies( ${_project} ${_value}) endforeach() @@ -509,6 +673,7 @@ function(NCBI_internal_collect_dependencies _project) endif() endforeach() if (NOT "${_deps}" STREQUAL "") + list(SORT _deps) list(REMOVE_DUPLICATES _deps) endif() set_property(GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_project} "${_deps}") @@ -517,18 +682,42 @@ endfunction() ############################################################################## function(NCBI_internal_recur_collect_dependencies _project _dep) get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_project}) - get_property(_depdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_dep} SET) + get_property(_depdeps GLOBAL PROPERTY NCBI_PTBPROP_DIRECT_DEPS_${_dep} SET) if (NOT _depdeps) - message(WARNING "ERROR: project ${_dep} not found") - endif() - get_property(_depdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_dep}) - set_property(GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_project} ${_prjdeps} ${_depdeps}) - foreach( _value IN LISTS _depdeps) - list(FIND _prjdeps ${_value} _found) - if (${_found} LESS "0") - NCBI_internal_recur_collect_dependencies( ${_project} ${_value}) + if(NOT TARGET ${_dep}) + message("ERROR: project ${_dep} not found (${_project} requires it)") endif() + return() + endif() + get_property(_depdeps GLOBAL PROPERTY NCBI_PTBPROP_DIRECT_DEPS_${_dep}) + foreach( _value IN LISTS _dep _depdeps) + NCBI_internal_collect_dependencies(${_value}) + get_property(_valuedeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_value}) + set(_prjdeps ${_prjdeps} ${_value} ${_valuedeps}) endforeach() + if (NOT "${_prjdeps}" STREQUAL "") + list(SORT _prjdeps) + list(REMOVE_DUPLICATES _prjdeps) + endif() + set_property(GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_project} ${_prjdeps}) +endfunction() + +############################################################################## +function(NCBI_internal_collect_requires _project) + get_property(_deps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_project}) + if (NOT "${_deps}" STREQUAL "") + list(REMOVE_DUPLICATES _deps) + set(_implreq "") + foreach(_dep IN LISTS _deps) + get_property(_y GLOBAL PROPERTY NCBI_PTBPROP_REQUIRES_${_dep}) + list(APPEND _implreq "${_y}") + endforeach() + if (NOT "${_implreq}" STREQUAL "") + list(SORT _implreq) + list(REMOVE_DUPLICATES _implreq) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_IMPLREQ_${_project} ${_implreq}) + endif() + endif() endfunction() ############################################################################## @@ -579,6 +768,18 @@ function(NCBI_internal_collect_sources) endif() endforeach() + foreach(_file IN LISTS NCBI_${NCBI_PROJECT}_GENERATED_SOURCES) + if(NOT IS_ABSOLUTE ${_file}) + set(_file "${_dir}/${_file}") + endif() + list(APPEND _sources "${_file}") + set_source_files_properties(${_file} PROPERTIES GENERATED TRUE) + get_filename_component(_ext ${_file} EXT) + if("${_ext}" STREQUAL ".c") + list(APPEND _nopch "${_file}") + endif() + endforeach() + if (NOT NCBI_PTBMODE_PARTS AND DEFINED NCBI_DEFAULT_DLLENTRY) if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "SHARED") list(APPEND _sources ${NCBI_DEFAULT_DLLENTRY}) @@ -647,7 +848,7 @@ endfunction() ############################################################################## function(NCBI_internal_add_resources) - if (MSVC) + if (WIN32) if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "CONSOLEAPP") if (DEFINED NCBI_${NCBI_PROJECT}_RESOURCES) set(_res ${NCBI_${NCBI_PROJECT}_RESOURCES}) @@ -691,10 +892,22 @@ function(NCBI_internal_collect_dataspec) foreach(_dataspec IN LISTS NCBI_${NCBI_PROJECT}_DATASPEC) if (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/${_dataspec}) get_filename_component(_basename ${_dataspec} NAME_WE) + get_filename_component(_ext ${_dataspec} EXT) get_filename_component(_path ${NCBI_CURRENT_SOURCE_DIR}/${_dataspec} DIRECTORY) set(_specfiles ${_specfiles} ${NCBI_CURRENT_SOURCE_DIR}/${_dataspec}) - set_source_files_properties(${_path}/${_basename}__.cpp ${_path}/${_basename}___.cpp PROPERTIES GENERATED TRUE) - set(_srcfiles ${_srcfiles} ${_path}/${_basename}__.cpp ${_path}/${_basename}___.cpp) + if ("${_ext}" STREQUAL ".proto") + if ("PROTOBUF" IN_LIST NCBITMP_REQUIRE) + set_source_files_properties(${_path}/${_basename}.pb.cc PROPERTIES GENERATED TRUE) + set(_srcfiles ${_srcfiles} ${_path}/${_basename}.pb.cc) + endif() + if ("GRPC" IN_LIST NCBITMP_REQUIRE) + set_source_files_properties(${_path}/${_basename}.grpc.pb.cc PROPERTIES GENERATED TRUE) + set(_srcfiles ${_srcfiles} ${_path}/${_basename}.grpc.pb.cc) + endif() + else() + set_source_files_properties(${_path}/${_basename}__.cpp ${_path}/${_basename}___.cpp PROPERTIES GENERATED TRUE) + set(_srcfiles ${_srcfiles} ${_path}/${_basename}__.cpp ${_path}/${_basename}___.cpp) + endif() else() message(WARNING "ERROR: file not found: ${NCBI_CURRENT_SOURCE_DIR}/${_dataspec}") endif() @@ -718,39 +931,73 @@ function(NCBI_internal_add_dataspec) set(_filepath ${_dataspec}) get_filename_component(_path ${_filepath} DIRECTORY) file(RELATIVE_PATH _relpath ${NCBI_SRC_ROOT} ${_path}) - set(_module_imports "") - set(_imports "") - - if(EXISTS "${_path}/${_basename}.module") - FILE(READ "${_path}/${_basename}.module" _module_contents) - STRING(REGEX MATCH "MODULE_IMPORT *=[^\n]*[^ \n]" _tmp "${_module_contents}") - STRING(REGEX REPLACE "MODULE_IMPORT *= *" "" _tmp "${_tmp}") - STRING(REGEX REPLACE " *$" "" _imp_list "${_tmp}") - STRING(REGEX REPLACE " " ";" _imp_list "${_imp_list}") - - foreach(_module IN LISTS _imp_list) - set(_module_imports "${_module_imports} ${_module}${_ext}") - endforeach() - if (NOT "${_module_imports}" STREQUAL "") - set(_imports -M ${_module_imports}) + + if ("${_ext}" STREQUAL ".proto") + if ("PROTOBUF" IN_LIST NCBITMP_REQUIRE) + set(_cmd ${NCBI_PROTOC_APP} --cpp_out=${_path} --proto_path=${_path} ${_filepath}) + add_custom_command( + OUTPUT ${_path}/${_basename}.pb.cc + COMMAND ${_cmd} VERBATIM + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_path}/${_basename}.pb.h ${NCBI_INC_ROOT}/${_relpath} VERBATIM + COMMAND ${CMAKE_COMMAND} -E remove -f ${_path}/${_basename}.pb.h VERBATIM + WORKING_DIRECTORY ${top_src_dir} + COMMENT "Generate PROTOC C++ classes from ${_filepath}" + DEPENDS ${_filepath} + VERBATIM + ) + endif() + if ("GRPC" IN_LIST NCBITMP_REQUIRE) + set(_cmd ${NCBI_PROTOC_APP} --grpc_out=${_path} --proto_path=${_path} --plugin=protoc-gen-grpc=${NCBI_GRPC_PLUGIN} ${_filepath}) + add_custom_command( + OUTPUT ${_path}/${_basename}.grpc.pb.cc + COMMAND ${_cmd} VERBATIM + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_path}/${_basename}.grpc.pb.h ${NCBI_INC_ROOT}/${_relpath} VERBATIM + COMMAND ${CMAKE_COMMAND} -E remove -f ${_path}/${_basename}.grpc.pb.h VERBATIM + WORKING_DIRECTORY ${top_src_dir} + COMMENT "Generate GRPC C++ classes from ${_filepath}" + DEPENDS ${_filepath} + VERBATIM + ) + endif() + set(NCBITMP_INCLUDES ${NCBITMP_INCLUDES} ${NCBI_INC_ROOT}/${_relpath} PARENT_SCOPE) + else() + set(_module_imports "") + set(_imports "") + if(EXISTS "${_path}/${_basename}.module") + FILE(READ "${_path}/${_basename}.module" _module_contents) + STRING(REGEX MATCH "MODULE_IMPORT *=[^\n]*[^ \n]" _tmp "${_module_contents}") + STRING(REGEX REPLACE "MODULE_IMPORT *= *" "" _tmp "${_tmp}") + STRING(REGEX REPLACE " *$" "" _imp_list "${_tmp}") + STRING(REGEX REPLACE " " ";" _imp_list "${_imp_list}") + + foreach(_module IN LISTS _imp_list) + set(_module_imports "${_module_imports} ${_module}${_ext}") + endforeach() + if (NOT "${_module_imports}" STREQUAL "") + set(_imports -M ${_module_imports}) + endif() endif() - endif() - set(_oc ${_basename}) - if (NOT "${NCBI_DEFAULT_PCH}" STREQUAL "") - set(_pch -pch ${NCBI_DEFAULT_PCH}) - endif() - set(_od ${_path}/${_basename}.def) - set(_oex -oex " ") - set(_cmd ${NCBI_DATATOOL} ${_oex} ${_pch} -m ${_filepath} -oA -oc ${_oc} -od ${_od} -odi -ocvs -or ${_relpath} -oR ${top_src_dir} ${_imports}) - add_custom_command( - OUTPUT ${_path}/${_basename}__.cpp ${_path}/${_basename}___.cpp - COMMAND ${_cmd} VERBATIM - WORKING_DIRECTORY ${top_src_dir} - COMMENT "Generate C++ classes from ${_filepath}" - DEPENDS ${NCBI_DATATOOL} - VERBATIM - ) + set(_oc ${_basename}) + if (NOT "${NCBI_DEFAULT_PCH}" STREQUAL "") + set(_pch -pch ${NCBI_DEFAULT_PCH}) + endif() + set(_od ${_path}/${_basename}.def) + set(_oex -oex " ") + set(_cmd ${NCBI_DATATOOL} ${_oex} ${_pch} -m ${_filepath} -oA -oc ${_oc} -od ${_od} -odi -ocvs -or ${_relpath} -oR ${top_src_dir} ${_imports}) + set(_depends ${NCBI_DATATOOL} ${_filepath}) + if(EXISTS ${_od}) + set(_depends ${_depends} ${_od}) + endif() + add_custom_command( + OUTPUT ${_path}/${_basename}__.cpp ${_path}/${_basename}___.cpp + COMMAND ${_cmd} VERBATIM + WORKING_DIRECTORY ${top_src_dir} + COMMENT "Generate C++ classes from ${_filepath}" + DEPENDS ${_depends} + VERBATIM + ) + endif() endforeach() endfunction() @@ -791,8 +1038,8 @@ endif() set(_usepch OFF) endif() - if (MSVC) - if (_usepch) + if (_usepch) + if (MSVC) set_target_properties(${NCBI_PROJECT} PROPERTIES COMPILE_FLAGS "/Yu${_pch}") set(_files ${NCBITMP_PROJECT_SOURCES}) if (DEFINED NCBITMP_PROJECT_NOPCH) @@ -826,10 +1073,70 @@ endif() set_source_files_properties(${_file} PROPERTIES COMPILE_DEFINITIONS ${_pchdef}) endforeach() set_source_files_properties(${_pchfile} PROPERTIES COMPILE_FLAGS "/Yc${_pch}") - endif(_usepch) - endif (MSVC) + + elseif (XCODE) + + if (EXISTS ${NCBI_CURRENT_SOURCE_DIR}/${_pch}) + set(_pch ${NCBI_CURRENT_SOURCE_DIR}/${_pch}) + elseif (EXISTS ${NCBI_INC_ROOT}/${_pch}) + set(_pch ${NCBI_INC_ROOT}/${_pch}) + else() + file(RELATIVE_PATH _rel "${NCBI_SRC_ROOT}" "${NCBI_CURRENT_SOURCE_DIR}") + if (EXISTS "${NCBI_INC_ROOT}/${_rel}/${_pch}") + set(_pch "${NCBI_INC_ROOT}/${_rel}/${_pch}") + else() +# message(WARNING "ERROR: in project ${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT}, precompiled header ${_pch} was not found.") + set(_usepch NO) + endif() + endif() + if (_usepch) + set_target_properties(${NCBI_PROJECT} PROPERTIES XCODE_ATTRIBUTE_GCC_PRECOMPILE_PREFIX_HEADER "YES") + set_target_properties(${NCBI_PROJECT} PROPERTIES XCODE_ATTRIBUTE_GCC_PREFIX_HEADER "${_pch}") + endif() + endif() + endif() +endfunction() + +############################################################################## +function(NCBI_internal_install_component_files _comp) + if (NOT DEFINED NCBI_COMPONENT_${_comp}_BINPATH) + return() + endif() + + foreach(_cfg ${CMAKE_CONFIGURATION_TYPES}) + if (DEFINED NCBI_COMPONENT_${_comp}_BINPATH_${_cfg}) + set(_src ${NCBI_COMPONENT_${_comp}_BINPATH_${_cfg}}) + elseif (DEFINED NCBI_COMPONENT_${_comp}_BINPATH) + if (IS_DIRECTORY ${NCBI_COMPONENT_${_comp}_BINPATH}/${_cfg}) + set(_src ${NCBI_COMPONENT_${_comp}_BINPATH}/${_cfg}) + else() + set(_src ${NCBI_COMPONENT_${_comp}_BINPATH}) + endif() + else() + continue() + endif() + if (IS_DIRECTORY ${_src}) + file(GLOB _files LIST_DIRECTORIES false "${_src}/*${CMAKE_SHARED_LIBRARY_SUFFIX}") + foreach(_file IN LISTS _files) +#message("============ COPY ${_file} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${_cfg}") + file(COPY ${_file} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${_cfg}) + endforeach() + endif() + endforeach() endfunction() +############################################################################## +macro(NCBI_internal_parse_sign _input _value _negative) + string(SUBSTRING ${_input} 0 1 _sign) + if ("${_sign}" STREQUAL "-") + string(SUBSTRING ${_input} 1 -1 ${_value}) + set(${_negative} ON) + else() + set(${_value} ${_input}) + set(${_negative} OFF) + endif() +endmacro() + ############################################################################## macro(NCBI_internal_process_project_requires) set(NCBITMP_REQUIRE_NOTFOUND "") @@ -842,15 +1149,13 @@ macro(NCBI_internal_process_project_requires) list(REMOVE_DUPLICATES _all) endif() + set(NCBITMP_REQUIRE ${_all}) + if(NCBI_PTBMODE_COLLECT_DEPS) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_REQUIRES_${NCBI_PROJECT} ${_all}) + endif() + foreach(_req IN LISTS _all) - string(SUBSTRING ${_req} 0 1 _sign) - if ("${_sign}" STREQUAL "-") - string(SUBSTRING ${_req} 1 -1 _value) - set(_negate ON) - else() - set(_value ${_req}) - set(_negate OFF) - endif() + NCBI_internal_parse_sign(${_req} _value _negate) if (NCBI_REQUIRE_${_value}_FOUND OR NCBI_COMPONENT_${_value}_FOUND) if (_negate) set(NCBITMP_REQUIRE_NOTFOUND ${NCBITMP_REQUIRE_NOTFOUND} ${_req}) @@ -863,6 +1168,9 @@ macro(NCBI_internal_process_project_requires) set(NCBITMP_NCBILIB ${NCBITMP_NCBILIB} ${_lib}) endif() endforeach() + if (WIN32 AND NOT NCBI_PTBMODE_COLLECT_DEPS) + NCBI_internal_install_component_files(${_value}) + endif() endif() else() if (_negate) @@ -875,6 +1183,26 @@ macro(NCBI_internal_process_project_requires) endif() endif() endforeach() + + if (NOT NCBI_PTBMODE_COLLECT_DEPS AND NOT ${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "STATIC") + get_property(_all GLOBAL PROPERTY NCBI_PTBPROP_IMPLREQ_${NCBI_PROJECT}) + foreach(_req IN LISTS _all) + NCBI_internal_parse_sign(${_req} _value _negate) + if (NCBI_REQUIRE_${_value}_FOUND OR NCBI_COMPONENT_${_value}_FOUND) + if (_negate) + set(NCBITMP_REQUIRE_NOTFOUND ${NCBITMP_REQUIRE_NOTFOUND} ${_req}) + endif() + else() + if (NOT _negate) + set(NCBITMP_REQUIRE_NOTFOUND ${NCBITMP_REQUIRE_NOTFOUND} ${_req}) + endif() + endif() + endforeach() + endif() + + if (NOT "${NCBITMP_REQUIRE_NOTFOUND}" STREQUAL "") + list(REMOVE_DUPLICATES NCBITMP_REQUIRE_NOTFOUND) + endif() endmacro() ############################################################################## @@ -889,6 +1217,10 @@ macro(NCBI_internal_process_project_components) list(REMOVE_DUPLICATES _all) endif() + if(NCBI_PTBMODE_COLLECT_DEPS) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_COMPONENTS_${NCBI_PROJECT} ${_all}) + endif() + foreach(_value IN LISTS _all) if (NCBI_REQUIRE_${_value}_FOUND OR NCBI_COMPONENT_${_value}_FOUND) set(NCBITMP_INCLUDES ${NCBITMP_INCLUDES} ${NCBI_COMPONENT_${_value}_INCLUDE}) @@ -899,6 +1231,9 @@ macro(NCBI_internal_process_project_components) set(NCBITMP_NCBILIB ${NCBITMP_NCBILIB} ${_lib}) endif() endforeach() + if (WIN32 AND NOT NCBI_PTBMODE_COLLECT_DEPS) + NCBI_internal_install_component_files(${_value}) + endif() else() set(NCBITMP_COMPONENT_NOTFOUND ${NCBITMP_COMPONENT_NOTFOUND} ${_value}) endif() @@ -941,7 +1276,7 @@ function(NCBI_internal_collect_parts) elseif (EXISTS ${_path}/CMakeLists.${_lib}.asn.txt) include(${_path}/CMakeLists.${_lib}.asn.txt) else() - message(WARNING "ERROR: project part not found: ${NCBI_SRC_ROOT}/${_part}") + message(WARNING "ERROR: part of ${_hostproject} project not found: ${NCBI_SRC_ROOT}/${_part}") endif() endforeach() @@ -979,9 +1314,60 @@ macro(NCBI_internal_process_parts) endif() endmacro() +############################################################################## +function(NCBI_internal_process_interface_libraries _lib) + if (NOT TARGET ${_lib}) + return() + endif() + get_target_property(_deps ${_lib} INTERFACE_LINK_LIBRARIES) + if ( NOT "${_deps}" STREQUAL "") + set(_value "") + foreach(_prj IN LISTS _deps) + get_property(_local GLOBAL PROPERTY NCBI_PTBPROP_LOCAL_${_prj}) + if ("${_local}" STREQUAL "") + set(_value ${_value} ${_prj}) + NCBI_internal_process_interface_libraries(${_prj}) + else() + set(_value ${_value} ${_local}) + endif() + endforeach() + set_target_properties(${_lib} PROPERTIES INTERFACE_LINK_LIBRARIES "${_value}") + endif() +endfunction() + +############################################################################## +function(NCBI_internal_verify_ncbilibs) + set(_res "") + set(_exclude "") + foreach(_prj IN LISTS NCBITMP_NCBILIB) + NCBI_internal_parse_sign(${_prj} _value _negate) + list(APPEND _res ${_value}) + if (_negate) + list(APPEND _exclude ${_value}) + endif() + endforeach() + foreach(_prj IN LISTS _exclude) + list(REMOVE_ITEM _res ${_prj}) + endforeach() + set(NCBITMP_NCBILIB ${_res} PARENT_SCOPE) +endfunction() + ############################################################################## function(NCBI_internal_verify_libs) - set(_ncbilib ${NCBITMP_NCBILIB}) + set(_optimize NO) + if (WIN32 AND NCBI_PTBCFG_ENABLE_COLLECTOR AND NOT DEFINED NCBI_EXTERNAL_TREE_ROOT AND NOT DEFINED NCBI_PTBCFG_DOINSTALL) + if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "STATIC") + set(_ncbilib ${NCBITMP_NCBILIB}) + set(_optimize YES) + else() + get_property(_ncbilib GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${NCBI_PROJECT}) + if ("${_ncbilib}" STREQUAL "") + set(_ncbilib ${NCBITMP_NCBILIB}) + endif() + endif() + else() + set(_ncbilib ${NCBITMP_NCBILIB}) + endif() if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) message("NCBI_internal_verify_libs ${NCBI_PROJECT_ID}: on enter = ${_ncbilib}") endif() @@ -991,7 +1377,14 @@ function(NCBI_internal_verify_libs) foreach(_prj IN LISTS _ncbilib) get_property(_host GLOBAL PROPERTY NCBI_PTBPROP_HOST_${_prj}) if ("${_host}" STREQUAL "") - set(_value ${_value} ${_prj}) + if (_optimize) + get_property(_hasspec GLOBAL PROPERTY NCBI_PTBPROP_DATASPEC_${_prj}) + if (_hasspec) + set(_value ${_value} ${_prj}) + endif() + else() + set(_value ${_value} ${_prj}) + endif() else() set(_value ${_value} ${_host}) endif() @@ -1000,6 +1393,20 @@ function(NCBI_internal_verify_libs) if ( NOT "${_value}" STREQUAL "") list(REMOVE_DUPLICATES _value) list(REMOVE_ITEM _value ${NCBI_PROJECT}) + set(_ncbilib ${_value}) + endif() + + if ( NOT "${_ncbilib}" STREQUAL "" AND DEFINED NCBI_EXTERNAL_TREE_ROOT) + set(_value "") + foreach(_prj IN LISTS _ncbilib) + get_property(_local GLOBAL PROPERTY NCBI_PTBPROP_LOCAL_${_prj}) + if ("${_local}" STREQUAL "") + set(_value ${_value} ${_prj}) + NCBI_internal_process_interface_libraries(${_prj}) + else() + set(_value ${_value} ${_local}) + endif() + endforeach() endif() set(_tk_libs "${_value}") @@ -1024,15 +1431,55 @@ endfunction() ############################################################################## function(NCBI_internal_process_project_filters _result) - if(DEFINED NCBI_PTBCFG_PROJECT_LIST) + if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "") + set(_is_good FALSE) + foreach(_prj IN LISTS NCBI_PTBCFG_PROJECT_TARGETS) + if (${NCBI_PROJECT} STREQUAL ${_prj}) + set(${_result} TRUE PARENT_SCOPE) + return() + endif() + if (${NCBI_PROJECT} MATCHES ${_prj}) + set(_is_good TRUE) + endif() + endforeach() + if (NOT _is_good) + set(${_result} FALSE PARENT_SCOPE) + return() + endif() + endif() + if(NOT "${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "") + set(_is_good FALSE) foreach(_dir IN LISTS NCBI_PTBCFG_PROJECT_LIST) if(${NCBI_CURRENT_SOURCE_DIR} MATCHES ${NCBI_SRC_ROOT}/${_dir}) - set(${_result} TRUE PARENT_SCOPE) + set(_is_good TRUE) + endif() + endforeach() + if (NOT _is_good) + set(${_result} FALSE PARENT_SCOPE) + return() + endif() + endif() + if(NOT "${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "") + set(_all ${NCBI__PROJTAG} ${NCBI_${NCBI_PROJECT}_PROJTAG}) + + foreach(_tag IN LISTS _all) + list(FIND NCBI_PTBCFG_PROJECT_TAGS "-${_tag}" _found) + if (${_found} GREATER_EQUAL "0") + set(${_result} FALSE PARENT_SCOPE) return() endif() + + list(FIND NCBI_PTBCFG_PROJECT_TAGS "${_tag}" _found) + if (${_found} LESS "0") + list(FIND NCBI_PTBCFG_PROJECT_TAGS "*" _found) + if (${_found} LESS "0") + set(${_result} FALSE PARENT_SCOPE) + return() + endif() + endif() endforeach() endif() - set(${_result} FALSE PARENT_SCOPE) + set(${_result} TRUE PARENT_SCOPE) endfunction() ############################################################################## @@ -1044,14 +1491,7 @@ macro(NCBI_internal_process_test_requires _test) endif() foreach(_req IN LISTS _all) - string(SUBSTRING ${_req} 0 1 _sign) - if ("${_sign}" STREQUAL "-") - string(SUBSTRING ${_req} 1 -1 _value) - set(_negate ON) - else() - set(_value ${_req}) - set(_negate OFF) - endif() + NCBI_internal_parse_sign( ${_req} _value _negate) if (NCBI_REQUIRE_${_value}_FOUND OR NCBI_COMPONENT_${_value}_FOUND) if (_negate) set(NCBITEST_REQUIRE_NOTFOUND ${NCBITEST_REQUIRE_NOTFOUND} ${_req}) @@ -1077,7 +1517,9 @@ function(NCBI_internal_add_test _test) NCBI_internal_process_test_requires(${_test}) if ( NOT "${NCBITEST_REQUIRE_NOTFOUND}" STREQUAL "") - message("${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT}: Test ${_test} is excluded because of unmet requirements: ${NCBITEST_REQUIRE_NOTFOUND}") + if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) + message("${NCBI_PROJECT} (${NCBI_CURRENT_SOURCE_DIR}): Test ${_test} of project ${NCBI_PROJECT} is excluded because of unmet requirements: ${NCBITEST_REQUIRE_NOTFOUND}") + endif() return() endif() @@ -1096,6 +1538,17 @@ function(NCBI_internal_add_test _test) string(REPLACE ";" " " _args "${NCBITEST_${_test}_ARG}") string(REPLACE ";" " " _assets "${_assets}") + if (XCODE) + set(_extra -DXCODE=TRUE) + endif() + + file(RELATIVE_PATH _outdir "${NCBI_SRC_ROOT}" "${NCBI_CURRENT_SOURCE_DIR}") + if (WIN32 OR XCODE) + set(_outdir ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_TESTING}/$/${_outdir}) + else() + set(_outdir ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_TESTING}/${_outdir}) + endif() + add_test(NAME ${_test} COMMAND ${CMAKE_COMMAND} -DNCBITEST_NAME=${_test} -DNCBITEST_CONFIG=$ @@ -1104,10 +1557,273 @@ function(NCBI_internal_add_test _test) -DNCBITEST_TIMEOUT=${_timeout} -DNCBITEST_BINDIR=${CMAKE_RUNTIME_OUTPUT_DIRECTORY} -DNCBITEST_SOURCEDIR=${NCBI_CURRENT_SOURCE_DIR} + -DNCBITEST_OUTDIR=${_outdir} -DNCBITEST_ASSETS=${_assets} + ${_extra} -P "${NCBITEST_DRIVER}") endfunction() +############################################################################## +function(NCBI_internal_export_hostinfo _file) + if(EXISTS ${_file}) + file(REMOVE ${_file}) + endif() + get_property(_allprojects GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS) + if (NOT "${_allprojects}" STREQUAL "") + set(_hostinfo) + foreach(_prj IN LISTS _allprojects) + get_property(_prjhost GLOBAL PROPERTY NCBI_PTBPROP_HOST_${_prj}) + if (NOT "${_prjhost}" STREQUAL "") + list(APPEND _hostinfo "${_prj} ${_prjhost}\n") + + endif() + endforeach() + if (NOT "${_hostinfo}" STREQUAL "") + file(WRITE ${_file} ${_hostinfo}) + endif() + endif() +endfunction() + +############################################################################## +function(NCBI_internal_import_hostinfo _file) + if(NOT EXISTS ${_file}) + return() + endif() + file(STRINGS ${_file} _hostinfo) + if (NOT "${_hostinfo}" STREQUAL "") + foreach( _item IN LISTS _hostinfo) + string(REPLACE " " ";" _item ${_item}) + if (NOT "${_item}" STREQUAL "") + list(GET _item 0 _prj) + list(GET _item 1 _host) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_HOST_${_prj} ${_host}) + endif() + endforeach() + endif() +endfunction() + +############################################################################## +function(NCBI_internal_install_root) + + file(RELATIVE_PATH _dest "${NCBI_TREE_ROOT}" "${NCBI_BUILD_ROOT}") + + set(_hostinfo ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/${CMAKE_PROJECT_NAME}.hostinfo) + NCBI_internal_export_hostinfo(${_hostinfo}) + if (EXISTS ${_hostinfo}) + install( FILES ${_hostinfo} DESTINATION ${_dest}/${NCBI_DIRNAME_EXPORT} RENAME ${NCBI_PTBCFG_INSTALL_EXPORT}.hostinfo) + endif() + + if (WIN32 OR XCODE) + foreach(_cfg ${CMAKE_CONFIGURATION_TYPES}) + install(EXPORT ${NCBI_PTBCFG_INSTALL_EXPORT}${_cfg} + CONFIGURATIONS ${_cfg} + DESTINATION ${_dest}/${NCBI_DIRNAME_EXPORT} + FILE ${NCBI_PTBCFG_INSTALL_EXPORT}.cmake + ) + endforeach() + else() + install(EXPORT ${NCBI_PTBCFG_INSTALL_EXPORT} + DESTINATION ${_dest}/${NCBI_DIRNAME_EXPORT} + FILE ${NCBI_PTBCFG_INSTALL_EXPORT}.cmake + ) + endif() + +# install headers + get_property(_all_subdirs GLOBAL PROPERTY NCBI_PTBPROP_ROOT_SUBDIR) + list(APPEND _all_subdirs ${NCBI_DIRNAME_COMMON_INCLUDE}) + foreach(_dir IN LISTS _all_subdirs) + if (EXISTS ${NCBI_INC_ROOT}/${_dir}) + install( DIRECTORY ${NCBI_INC_ROOT}/${_dir} DESTINATION ${NCBI_DIRNAME_INCLUDE} + REGEX "/[.].*$" EXCLUDE) + endif() + endforeach() + file(GLOB _files LIST_DIRECTORIES false "${NCBI_INC_ROOT}/*") + install( FILES ${_files} DESTINATION ${NCBI_DIRNAME_INCLUDE}) + +# install sources? + # TODO + + file(GLOB _files LIST_DIRECTORIES false "${NCBI_TREE_BUILDCFG}/*") + install( FILES ${_files} DESTINATION ${NCBI_DIRNAME_BUILDCFG}) + install( DIRECTORY ${NCBI_TREE_CMAKECFG} DESTINATION ${NCBI_DIRNAME_BUILDCFG} + USE_SOURCE_PERMISSIONS REGEX "/[.].*$" EXCLUDE) + + install( DIRECTORY ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_COMMON_SCRIPTS} DESTINATION ${NCBI_DIRNAME_SCRIPTS} + USE_SOURCE_PERMISSIONS REGEX "/[.].*$" EXCLUDE) + + file(RELATIVE_PATH _dest "${NCBI_TREE_ROOT}" "${NCBI_BUILD_ROOT}") + install( DIRECTORY ${NCBI_CFGINC_ROOT} DESTINATION "${_dest}" + REGEX "/[.].*$" EXCLUDE) +endfunction() + +############################################################################## +function(NCBI_internal_install_target) + + if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "STATIC") + set(_haspdb NO) + file(RELATIVE_PATH _dest "${NCBI_TREE_ROOT}" "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}") + elseif (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "SHARED") + set(_haspdb YES) + if (WIN32) + file(RELATIVE_PATH _dest "${NCBI_TREE_ROOT}" "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") + file(RELATIVE_PATH _dest_ar "${NCBI_TREE_ROOT}" "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}") + else() + file(RELATIVE_PATH _dest "${NCBI_TREE_ROOT}" "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}") + endif() + elseif (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "CONSOLEAPP") + set(_haspdb YES) + file(RELATIVE_PATH _dest "${NCBI_TREE_ROOT}" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") + if (DEFINED NCBI_PTBCFG_INSTALL_TAGS) + set(_alltags ${NCBI__PROJTAG} ${NCBI_${NCBI_PROJECT}_PROJTAG}) + foreach(_tag IN LISTS _alltags) + list(FIND NCBI_PTBCFG_INSTALL_TAGS "-${_tag}" _found) + if (${_found} GREATER_EQUAL "0") + if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) + message("${NCBI_PROJECT} will not be installed because of tag ${_tag}") + endif() + return() + endif() + list(FIND NCBI_PTBCFG_INSTALL_TAGS "${_tag}" _found) + if (${_found} LESS "0") + list(FIND NCBI_PTBCFG_INSTALL_TAGS "*" _found) + if (${_found} LESS "0") + if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) + message("${NCBI_PROJECT} will not be installed because of tag ${_tag}") + endif() + return() + endif() + endif() + endforeach() + endif() + else() + return() + endif() + if ("${_dest}" STREQUAL "") + return() + endif() + +# not sure about this part + file(RELATIVE_PATH _rel "${NCBI_SRC_ROOT}" "${NCBI_CURRENT_SOURCE_DIR}") + string(REPLACE "/" ";" _rel ${_rel}) + list(GET _rel 0 _dir) + get_property(_all_subdirs GLOBAL PROPERTY NCBI_PTBPROP_ROOT_SUBDIR) + list(APPEND _all_subdirs ${_dir}) + if (DEFINED NCBI_${NCBI_PROJECT}_PARTS) + foreach(_rel IN LISTS NCBI_${NCBI_PROJECT}_PARTS) + string(REPLACE "/" ";" _rel ${_rel}) + list(GET _rel 0 _dir) + list(APPEND _all_subdirs ${_dir}) + endforeach() + endif() + list(REMOVE_DUPLICATES _all_subdirs) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_ROOT_SUBDIR ${_all_subdirs}) + + if (WIN32 OR XCODE) + foreach(_cfg ${CMAKE_CONFIGURATION_TYPES}) + if (DEFINED _dest_ar) + install( + TARGETS ${NCBI_PROJECT} + EXPORT ${NCBI_PTBCFG_INSTALL_EXPORT}${_cfg} + RUNTIME DESTINATION ${_dest}/${_cfg} + CONFIGURATIONS ${_cfg} + ARCHIVE DESTINATION ${_dest_ar}/${_cfg} + CONFIGURATIONS ${_cfg} + ) + else() + install( + TARGETS ${NCBI_PROJECT} + EXPORT ${NCBI_PTBCFG_INSTALL_EXPORT}${_cfg} + DESTINATION ${_dest}/${_cfg} + CONFIGURATIONS ${_cfg} + ) + endif() + if (WIN32 AND _haspdb) + install(FILES $ + DESTINATION ${_dest}/${_cfg} OPTIONAL + CONFIGURATIONS ${_cfg}) + endif() + endforeach() + else() + install( + TARGETS ${NCBI_PROJECT} + EXPORT ${NCBI_PTBCFG_INSTALL_EXPORT} + DESTINATION ${_dest} + ) + endif() +endfunction() + +############################################################################## +function(NCBI_internal_print_project_info _prj) + message("=============================================================================") + get_property(_dir GLOBAL PROPERTY NCBI_PTBPROP_DIR_${_prj}) + message("PROJECT: ${_prj} (${_dir})") + get_property(_x GLOBAL PROPERTY NCBI_PTBPROP_REQUIRES_${_prj}) + if(NOT "${_x}" STREQUAL "") + list(SORT _x) + message(" REQUIRES: ${_x}") + endif() + get_property(_x GLOBAL PROPERTY NCBI_PTBPROP_COMPONENTS_${_prj}) + if(NOT "${_x}" STREQUAL "") + list(SORT _x) + message(" OPTIONAL COMPONENTS: ${_x}") + endif() + get_property(_x GLOBAL PROPERTY NCBI_PTBPROP_DIRECT_DEPS_${_prj}) + if(NOT "${_x}" STREQUAL "") + message(" DEPENDENCIES: ${_x}") + endif() + get_property(_x GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj}) + if(NOT "${_x}" STREQUAL "") + list(SORT _x) + message("-----------------------------------------------------------------------------") + message(" ALL DEPENDENCIES: ${_x}") + foreach(_dep IN LISTS _x) + get_property(_dir GLOBAL PROPERTY NCBI_PTBPROP_DIR_${_dep}) + message(" ${_dep} (${_dir})") + get_property(_y GLOBAL PROPERTY NCBI_PTBPROP_REQUIRES_${_dep}) + if(NOT "${_y}" STREQUAL "") + list(SORT _y) + message(" REQUIRES: ${_y}") + endif() + get_property(_y GLOBAL PROPERTY NCBI_PTBPROP_COMPONENTS_${_dep}) + if(NOT "${_y}" STREQUAL "") + list(SORT _y) + message(" OPTIONAL COMPONENTS: ${_y}") + endif() + get_property(_y GLOBAL PROPERTY NCBI_PTBPROP_DIRECT_DEPS_${_dep}) + if(NOT "${_y}" STREQUAL "") + list(SORT _y) + message(" DEPENDENCIES: ${_y}") + endif() + endforeach() + endif() + get_property(_x GLOBAL PROPERTY NCBI_PTBPROP_IMPLREQ_${_prj}) + if(NOT "${_x}" STREQUAL "") + message("-----------------------------------------------------------------------------") + message(" ALL REQUIRED COMPONENTS: ${_x}") + foreach(_req IN LISTS _x) + if (NCBI_REQUIRE_${_req}_FOUND) + elseif (NCBI_COMPONENT_${_req}_FOUND) + message(" ${_req}") + if (DEFINED NCBI_COMPONENT_${_req}_INCLUDE) + message(" INCLUDE: ${NCBI_COMPONENT_${_req}_INCLUDE}") + endif() + if (DEFINED NCBI_COMPONENT_${_req}_DEFINES) + message(" DEFINES: ${NCBI_COMPONENT_${_req}_DEFINES}") + endif() + if (DEFINED NCBI_COMPONENT_${_req}_LIBS) + message(" LIBS: ${NCBI_COMPONENT_${_req}_LIBS}") + endif() + if (DEFINED NCBI_COMPONENT_${_req}_BINPATH) + message(" BINPATH: ${NCBI_COMPONENT_${_req}_BINPATH}") + endif() + else() + message(" ${_req} not found") + endif() + endforeach() + endif() + message("=============================================================================") +endfunction() + ############################################################################## function(NCBI_internal_add_project) @@ -1116,7 +1832,7 @@ function(NCBI_internal_add_project) list(FIND NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PROJECT} _found) if (${_found} LESS "0") if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) - message("${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT} is excluded by user's request") + message("${NCBI_PROJECT} (${NCBI_CURRENT_SOURCE_DIR}) is excluded by user's request") endif() if ("${ARGC}" GREATER "0") set(${ARGV0} FALSE PARENT_SCOPE) @@ -1131,7 +1847,7 @@ function(NCBI_internal_add_project) if (NOT "${_hosted}" STREQUAL "") if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) - message("${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT_ID} is excluded because it is part of ${_hosted}") + message("${NCBI_PROJECT_ID} (${NCBI_CURRENT_SOURCE_DIR}) is excluded because it is part of ${_hosted}") endif() if ("${ARGC}" GREATER "0") set(${ARGV0} FALSE PARENT_SCOPE) @@ -1144,7 +1860,7 @@ if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) message("-----------------------------------") message("NCBI_PROJECT = ${NCBI_PROJECT}") message(" TYPE = ${NCBI_${NCBI_PROJECT}_TYPE}") -message(" SOURCES = ${NCBI_${NCBI_PROJECT}_SOURCES}") +message(" SOURCES = ${NCBI_${NCBI_PROJECT}_SOURCES} ${NCBI_${NCBI_PROJECT}_GENERATED_SOURCES}") message(" RESOURCES = ${NCBI_${NCBI_PROJECT}_RESOURCES}") message(" HEADERS = ${NCBI_${NCBI_PROJECT}_HEADERS}") message(" REQUIRES = ${NCBI_${NCBI_PROJECT}_REQUIRES}") @@ -1167,7 +1883,7 @@ endif() NCBI_internal_process_project_requires() if ( NOT "${NCBITMP_REQUIRE_NOTFOUND}" STREQUAL "" AND NOT NCBI_PTBMODE_COLLECT_DEPS) - message("${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT} is excluded because of unmet requirements: ${NCBITMP_REQUIRE_NOTFOUND}") + message("${NCBI_PROJECT} (${NCBI_CURRENT_SOURCE_DIR}) is excluded because of unmet requirements: ${NCBITMP_REQUIRE_NOTFOUND}") if (NOT NCBI_PTBMODE_PARTS) if ("${ARGC}" GREATER "0") set(${ARGV0} FALSE PARENT_SCOPE) @@ -1178,14 +1894,15 @@ endif() NCBI_internal_process_project_components() if ( NOT "${NCBITMP_COMPONENT_NOTFOUND}" STREQUAL "" AND NOT NCBI_PTBMODE_COLLECT_DEPS) - message("${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT}: cannot find optional component: ${NCBITMP_COMPONENT_NOTFOUND}") + message("${NCBI_PROJECT} (${NCBI_CURRENT_SOURCE_DIR}): cannot find optional component: ${NCBITMP_COMPONENT_NOTFOUND}") endif() if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) - message("NCBITMP_INCLUDES = ${NCBITMP_INCLUDES}") - message("NCBITMP_DEFINES = ${NCBITMP_DEFINES}") - message("NCBITMP_NCBILIB = ${NCBITMP_NCBILIB}") - message("NCBITMP_EXTLIB = ${NCBITMP_EXTLIB}") + message(" NCBITMP_INCLUDES = ${NCBITMP_INCLUDES}") + message(" NCBITMP_DEFINES = ${NCBITMP_DEFINES}") + message(" NCBITMP_NCBILIB = ${NCBITMP_NCBILIB}") + message(" NCBITMP_EXTLIB = ${NCBITMP_EXTLIB}") endif() + NCBI_internal_verify_ncbilibs() if (NCBI_PTBMODE_COLLECT_DEPS) if (NCBI_PTBMODE_PARTS) @@ -1193,6 +1910,12 @@ endif() set(NCBITMP_PROJECT_PARTS ${NCBITMP_PROJECT_PARTS} ${NCBI_PROJECT_PARTNAME} PARENT_SCOPE ) endif() set_property(GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${NCBI_PROJECT} "${NCBITMP_NCBILIB}") + set_property(GLOBAL PROPERTY NCBI_PTBPROP_DIRECT_DEPS_${NCBI_PROJECT} "${NCBITMP_NCBILIB}") + set_property(GLOBAL PROPERTY NCBI_PTBPROP_DIR_${NCBI_PROJECT} "${NCBI_CURRENT_SOURCE_DIR}") + if (DEFINED NCBI_${NCBI_PROJECT}_DATASPEC) + set_property(GLOBAL PROPERTY NCBI_PTBPROP_DATASPEC_${NCBI_PROJECT} YES) + endif() + if(NOT NCBI_PTBMODE_PARTS) get_property(_allprojects GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS) set(_allprojects ${_allprojects} ${NCBI_PROJECT}) @@ -1209,6 +1932,14 @@ endif() set(${ARGV0} FALSE PARENT_SCOPE) endif() return() + elseif (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "CONSOLEAPP") + NCBI_internal_process_project_filters(_allowed) + if (NOT _allowed) + if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) + message("${NCBI_PROJECT} (${NCBI_CURRENT_SOURCE_DIR}) is excluded by user's request") + endif() + return() + endif() endif() #message("processing ${NCBI_PROJECT_ID}") @@ -1235,26 +1966,38 @@ endif() NCBI_internal_add_dataspec() NCBI_internal_verify_libs() +#---------------------------------------------------------------------------- if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "STATIC") - set(NCBITMP_DEFINES ${NCBITMP_DEFINES} "_LIB") + if (WIN32) + set(NCBITMP_DEFINES ${NCBITMP_DEFINES} "_LIB") + endif() #message("add static library(${NCBI_PROJECT} STATIC ${NCBITMP_PROJECT_SOURCES} ${NCBITMP_PROJECT_HEADERS} ${NCBITMP_PROJECT_RESOURCES} ${NCBITMP_PROJECT_DATASPEC})") #message("add static library ${NCBI_PROJECT}") add_library(${NCBI_PROJECT} STATIC ${NCBITMP_PROJECT_SOURCES} ${NCBITMP_PROJECT_HEADERS} ${NCBITMP_PROJECT_RESOURCES} ${NCBITMP_PROJECT_DATASPEC}) set(_suffix ${CMAKE_STATIC_LIBRARY_SUFFIX}) + if (NOT WIN32 AND BUILD_SHARED_LIBS ) + target_compile_options(${NCBI_PROJECT} PRIVATE -fPIC) + endif() +#---------------------------------------------------------------------------- elseif (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "SHARED") - set(NCBITMP_DEFINES ${NCBITMP_DEFINES} "_USRDLL") + if (WIN32) + set(NCBITMP_DEFINES ${NCBITMP_DEFINES} "_USRDLL") + endif() #message("add shared library(${NCBI_PROJECT} SHARED ${NCBITMP_PROJECT_SOURCES} ${NCBITMP_PROJECT_HEADERS} ${NCBITMP_PROJECT_RESOURCES} ${NCBITMP_PROJECT_DATASPEC})") #message("add shared library ${NCBI_PROJECT}") add_library(${NCBI_PROJECT} SHARED ${NCBITMP_PROJECT_SOURCES} ${NCBITMP_PROJECT_HEADERS} ${NCBITMP_PROJECT_RESOURCES} ${NCBITMP_PROJECT_DATASPEC}) set(_suffix ${CMAKE_SHARED_LIBRARY_SUFFIX}) +#---------------------------------------------------------------------------- elseif (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "CONSOLEAPP") if(NCBI_EXPERIMENTAL_CFG) - set(NCBITMP_DEFINES ${NCBITMP_DEFINES} "_CONSOLE") + if (WIN32) + set(NCBITMP_DEFINES ${NCBITMP_DEFINES} "_CONSOLE") + endif() #message("add_executable(${NCBI_PROJECT} ${NCBITMP_PROJECT_SOURCES} ${NCBITMP_PROJECT_HEADERS} ${NCBITMP_PROJECT_RESOURCES} ${NCBITMP_PROJECT_DATASPEC})") #message("add executable ${NCBI_PROJECT}") add_executable(${NCBI_PROJECT} ${NCBITMP_PROJECT_SOURCES} ${NCBITMP_PROJECT_HEADERS} ${NCBITMP_PROJECT_RESOURCES} ${NCBITMP_PROJECT_DATASPEC}) @@ -1264,9 +2007,16 @@ endif() add_executable(${NCBI_PROJECT} ${NCBITMP_PROJECT_SOURCES}) endif() +#---------------------------------------------------------------------------- + elseif (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "CUSTOM") + + variable_watch(NCBI_${NCBI_PROJECT}_CALLBACK ${NCBI_${NCBI_PROJECT}_DEFINITION}) + set(NCBI_${NCBI_PROJECT}_CALLBACK 1) + +#---------------------------------------------------------------------------- else() - message("${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT} unsupported project type ${NCBI_${NCBI_PROJECT}_TYPE}") + message("${NCBI_PROJECT} (${NCBI_CURRENT_SOURCE_DIR}) unsupported project type ${NCBI_${NCBI_PROJECT}_TYPE}") if ("${ARGC}" GREATER "0") set(${ARGV0} FALSE PARENT_SCOPE) endif() @@ -1274,30 +2024,65 @@ endif() endif() if (DEFINED NCBI_${NCBI_PROJECT}_OUTPUT) + if(NCBI_EXPERIMENTAL_CFG AND NOT DEFINED NCBI_EXTERNAL_TREE_ROOT AND NOT ${NCBI_PROJECT} STREQUAL ${NCBI_${NCBI_PROJECT}_OUTPUT}) + get_property(_dir GLOBAL PROPERTY NCBI_PTBPROP_DIR_${NCBI_${NCBI_PROJECT}_OUTPUT}) + if (NOT ${_dir} STREQUAL "" AND NOT ${_dir} STREQUAL ${NCBI_CURRENT_SOURCE_DIR}) + message("WARNING: App target ${NCBI_${NCBI_PROJECT}_OUTPUT} (${NCBI_CURRENT_SOURCE_DIR}) cannot be created") + message(" because there is already a target with the same name in ${_dir}") + message(" App target ${NCBI_${NCBI_PROJECT}_OUTPUT} will be renamed into ${NCBI_PROJECT}") + endif() + endif() set_target_properties(${NCBI_PROJECT} PROPERTIES OUTPUT_NAME ${NCBI_${NCBI_PROJECT}_OUTPUT}) endif() + get_property(_count GLOBAL PROPERTY NCBI_PTBPROP_COUNT_${NCBI_${NCBI_PROJECT}_TYPE}) + math(EXPR _count "${_count} + 1") + set_property(GLOBAL PROPERTY NCBI_PTBPROP_COUNT_${NCBI_${NCBI_PROJECT}_TYPE} ${_count}) + if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) -message("${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT_ID} added") -message("NCBITMP_PROJECT_SOURCES ${NCBITMP_PROJECT_SOURCES}") -message("NCBITMP_PROJECT_HEADERS ${NCBITMP_PROJECT_HEADERS}") -message("NCBITMP_PROJECT_RESOURCES ${NCBITMP_PROJECT_RESOURCES}") +message(" ADDED: ${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT_ID}") +message(" NCBITMP_PROJECT_SOURCES ${NCBITMP_PROJECT_SOURCES}") +message(" NCBITMP_PROJECT_HEADERS ${NCBITMP_PROJECT_HEADERS}") +message(" NCBITMP_PROJECT_RESOURCES ${NCBITMP_PROJECT_RESOURCES}") +endif() + + if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "CUSTOM") + + get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DIRECT_DEPS_${NCBI_PROJECT}) + if (NOT "${_prjdeps}" STREQUAL "") + add_dependencies(${NCBI_PROJECT} ${_prjdeps}) + endif() + + else() + target_include_directories(${NCBI_PROJECT} PRIVATE ${NCBITMP_INCLUDES}) + target_compile_definitions(${NCBI_PROJECT} PRIVATE ${NCBITMP_DEFINES}) + +if(OFF) +# this does not seem to have any effect + if (XCODE AND NOT BUILD_SHARED_LIBS) + set_target_properties(${NCBI_PROJECT} PROPERTIES XCODE_ATTRIBUTE_STANDARD_C_PLUS_PLUS_LIBRARY_TYPE "static") + set_target_properties(${NCBI_PROJECT} PROPERTIES XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES") + endif() endif() - target_include_directories(${NCBI_PROJECT} PRIVATE ${NCBITMP_INCLUDES}) - target_compile_definitions(${NCBI_PROJECT} PRIVATE ${NCBITMP_DEFINES}) #message("target_link_libraries: ${NCBI_PROJECT} ${NCBITMP_NCBILIB} ${NCBITMP_EXTLIB}") - target_link_libraries( ${NCBI_PROJECT} ${NCBITMP_NCBILIB} ${NCBITMP_EXTLIB}) + target_link_libraries( ${NCBI_PROJECT} ${NCBITMP_NCBILIB} ${NCBITMP_EXTLIB}) - if (DEFINED _suffix) - set_target_properties( ${NCBI_PROJECT} PROPERTIES PROJECT_LABEL ${NCBI_PROJECT}${_suffix}) + if (DEFINED _suffix) + set_target_properties( ${NCBI_PROJECT} PROPERTIES PROJECT_LABEL ${NCBI_PROJECT}${_suffix}) + endif() + NCBI_internal_define_precompiled_header_usage() endif() - NCBI_internal_define_precompiled_header_usage() if (DEFINED NCBI_ALLTESTS) foreach(_test IN LISTS NCBI_ALLTESTS) NCBI_internal_add_test(${_test}) endforeach() + unset(NCBI_ALLTESTS PARENT_SCOPE) + endif() + + if (NCBI_PTBCFG_DOINSTALL) + NCBI_internal_install_target() endif() if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) diff --git a/c++/src/build-system/cmake/CMake.NCBItoolkit.cmake b/c++/src/build-system/cmake/CMake.NCBItoolkit.cmake new file mode 100644 index 00000000..c5ee1d4e --- /dev/null +++ b/c++/src/build-system/cmake/CMake.NCBItoolkit.cmake @@ -0,0 +1,187 @@ +############################################################################# +# $Id: CMake.NCBItoolkit.cmake 576718 2018-12-19 20:56:03Z dicuccio $ +############################################################################# + +if("${CMAKE_GENERATOR}" STREQUAL "Xcode") + if(NOT DEFINED XCODE) + set(XCODE ON) + endif() +endif() + +############################################################################### +# must be set to OFF on trunk +if(NOT DEFINED NCBI_EXPERIMENTAL) + if (DEFINED NCBI_EXTERNAL_TREE_ROOT) + set(NCBI_EXPERIMENTAL ON) + else() + set(NCBI_EXPERIMENTAL OFF) + endif() +endif() + +if("${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "") + unset(NCBI_PTBCFG_PROJECT_LIST) +endif() +if("${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "") + unset(NCBI_PTBCFG_PROJECT_TAGS) +endif() +if("${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "") + unset(NCBI_PTBCFG_PROJECT_TARGETS) +endif() + +# set(NCBI_PTBCFG_PROJECT_TAGS *;-test) +# set(NCBI_PTBCFG_PROJECT_TARGETS datatool;xcgi$) +# set(NCBI_PTBCFG_PROJECT_LIST corelib serial build-system) +# set(NCBI_VERBOSE_ALLPROJECTS OFF) +# set(NCBI_VERBOSE_PROJECTS xncbi variation_utils) + +if(DEFINED NCBI_VERBOSE_PROJECTS) + foreach(_prj IN LISTS NCBI_VERBOSE_PROJECTS) + set(NCBI_VERBOSE_PROJECT_${_prj} ON) + endforeach() +endif() + +cmake_policy(SET CMP0057 NEW) +if(NCBI_EXPERIMENTAL) + cmake_policy(SET CMP0054 NEW) + + set(NCBI_EXPERIMENTAL_CFG ON) + set(NCBI_EXPERIMENTAL_SUBDIRS ON) + set(NCBI_EXPERIMENTAL_DISABLE_HUNTER ON) + set(NCBI_VERBOSE_ALLPROJECTS OFF) + + if(BUILD_SHARED_LIBS) + if(WIN32 OR XCODE) + set(NCBI_PTBCFG_COMPOSITE_DLL dll) + endif() + endif() + +if(OFF) + if (DEFINED NCBI_PTBCFG_COMPOSITE_DLL + OR DEFINED NCBI_EXTERNAL_TREE_ROOT + OR DEFINED NCBI_VERBOSE_PROJECTS + OR NOT "${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "" + OR NOT "${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "" + OR NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "") + set(NCBI_PTBCFG_ENABLE_COLLECTOR ON) + endif() +else() + set(NCBI_PTBCFG_ENABLE_COLLECTOR ON) +endif() + + if (NOT "${NCBI_PTBCFG_INSTALL_PATH}" STREQUAL "") + set(NCBI_PTBCFG_DOINSTALL ON) + string(REPLACE "\\" "/" NCBI_PTBCFG_INSTALL_PATH ${NCBI_PTBCFG_INSTALL_PATH}) + set(CMAKE_INSTALL_PREFIX "${NCBI_PTBCFG_INSTALL_PATH}" CACHE STRING "Reset the installation destination" FORCE) + set(NCBI_PTBCFG_INSTALL_TAGS "*;-test;-demo") + endif() + set(NCBI_PTBCFG_INSTALL_EXPORT ncbi-cpp-toolkit) + +else() + cmake_policy(SET CMP0054 OLD) + + set(NCBI_EXPERIMENTAL_CFG OFF) + set(NCBI_EXPERIMENTAL_SUBDIRS OFF) + set(NCBI_EXPERIMENTAL_DISABLE_HUNTER OFF) + set(NCBI_VERBOSE_ALLPROJECTS OFF) + set(NCBI_PTBCFG_ENABLE_COLLECTOR OFF) + set(NCBI_PTBCFG_DOINSTALL OFF) +endif() + +if(DEFINED NCBI_PTBCFG_PROJECT_LIST AND EXISTS "${NCBI_PTBCFG_PROJECT_LIST}") + if (NOT IS_DIRECTORY "${NCBI_PTBCFG_PROJECT_LIST}") + file(STRINGS "${NCBI_PTBCFG_PROJECT_LIST}" NCBI_PTBCFG_PROJECT_LIST) + endif() +endif() + +if(DEFINED NCBI_PTBCFG_PROJECT_TAGS AND EXISTS "${NCBI_PTBCFG_PROJECT_TAGS}") + if (NOT IS_DIRECTORY "${NCBI_PTBCFG_PROJECT_TAGS}") + file(STRINGS "${NCBI_PTBCFG_PROJECT_TAGS}" NCBI_PTBCFG_PROJECT_TAGS) + endif() +endif() + +if(DEFINED NCBI_PTBCFG_PROJECT_TARGETS AND EXISTS "${NCBI_PTBCFG_PROJECT_TARGETS}") + if (NOT IS_DIRECTORY "${NCBI_PTBCFG_PROJECT_TARGETS}") + file(STRINGS "${NCBI_PTBCFG_PROJECT_TARGETS}" NCBI_PTBCFG_PROJECT_TARGETS) + endif() +endif() + +############################################################################### +if (OFF) +message("CMAKE_CONFIGURATION_TYPES: ${CMAKE_CONFIGURATION_TYPES}") +set(_cfg_types Debug Release MinSizeRel RelWithDebInfo) + +message("") +message("CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}") +foreach(_cfg ${_cfg_types}) + string(TOUPPER ${_cfg} _upname) + message("CMAKE_C_FLAGS_${_upname}: ${CMAKE_C_FLAGS_${_upname}}") +endforeach() + +message("") +message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") +foreach(_cfg ${_cfg_types}) + string(TOUPPER ${_cfg} _upname) + message("CMAKE_CXX_FLAGS_${_upname}: ${CMAKE_CXX_FLAGS_${_upname}}") +endforeach() + +message("") +message("CMAKE_STATIC_LINKER_FLAGS: ${CMAKE_STATIC_LINKER_FLAGS}") +foreach(_cfg ${_cfg_types}) + string(TOUPPER ${_cfg} _upname) + message("CMAKE_STATIC_LINKER_FLAGS_${_upname}: ${CMAKE_STATIC_LINKER_FLAGS_${_upname}}") +endforeach() + +message("") +message("CMAKE_EXE_LINKER_FLAGS: ${CMAKE_EXE_LINKER_FLAGS}") +foreach(_cfg ${_cfg_types}) + string(TOUPPER ${_cfg} _upname) + message("CMAKE_EXE_LINKER_FLAGS_${_upname}: ${CMAKE_EXE_LINKER_FLAGS_${_upname}}") +endforeach() + +message("") +message("CMAKE_MODULE_LINKER_FLAGS: ${CMAKE_MODULE_LINKER_FLAGS}") +foreach(_cfg ${_cfg_types}) + string(TOUPPER ${_cfg} _upname) + message("CMAKE_MODULE_LINKER_FLAGS_${_upname}: ${CMAKE_MODULE_LINKER_FLAGS_${_upname}}") +endforeach() + +message("") +message("CMAKE_SHARED_LINKER_FLAGS: ${CMAKE_SHARED_LINKER_FLAGS}") +foreach(_cfg ${_cfg_types}) + string(TOUPPER ${_cfg} _upname) + message("CMAKE_SHARED_LINKER_FLAGS_${_upname}: ${CMAKE_SHARED_LINKER_FLAGS_${_upname}}") +endforeach() +#return() +endif() + +############################################################################### +## Initialize Hunter +## +if (NOT NCBI_EXPERIMENTAL_DISABLE_HUNTER) +if (WIN32) + if (NOT HUNTER_ROOT) + set(HUNTER_ROOT ${CMAKE_SOURCE_DIR}/../HunterPackages) + endif() + include(build-system/cmake/HunterGate.cmake) + HunterGate( + URL "https://github.com/ruslo/hunter/archive/v0.18.39.tar.gz" + SHA1 "a6fbc056c3d9d7acdaa0a07c575c9352951c2f6c" + ) +endif() +endif() + +if (DEFINED NCBI_EXTERNAL_TREE_ROOT) + include(${NCBI_EXTERNAL_TREE_ROOT}/src/build-system/cmake/CMakeMacros.cmake) + include(${NCBI_EXTERNAL_TREE_ROOT}/src/build-system/cmake/CMake.NCBIptb.cmake) + include(${NCBI_EXTERNAL_TREE_ROOT}/src/build-system/cmake/CMakeChecks.cmake) + if (EXISTS ${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.cmake) + include(${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.cmake) + else() + message(FATAL_ERROR "${NCBI_PTBCFG_INSTALL_EXPORT} was not found in ${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}") + endif() + NCBI_internal_import_hostinfo(${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.hostinfo) +else() + include(build-system/cmake/CMakeMacros.cmake) + include(build-system/cmake/CMake.NCBIptb.cmake) + include(build-system/cmake/CMakeChecks.cmake) +endif() diff --git a/c++/src/build-system/cmake/CMakeChecks.basic-checks.cmake b/c++/src/build-system/cmake/CMakeChecks.basic-checks.cmake index 58bd6344..65994f92 100644 --- a/c++/src/build-system/cmake/CMakeChecks.basic-checks.cmake +++ b/c++/src/build-system/cmake/CMakeChecks.basic-checks.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeChecks.basic-checks.cmake 563666 2018-05-14 12:55:44Z gouriano $ +# $Id: CMakeChecks.basic-checks.cmake 576924 2018-12-26 20:15:29Z dicuccio $ ############################################################################# # Basic check: headers, functions and types @@ -129,6 +129,10 @@ check_function_exists(strdup HAVE_STRDUP) check_function_exists(strlcat HAVE_STRLCAT) check_function_exists(strlcpy HAVE_STRLCPY) check_function_exists(strndup HAVE_STRNDUP) +check_function_exists(strnlen HAVE_STRNLEN) +check_function_exists(strcasecmp HAVE_STRCASECMP) +check_function_exists(memcchr HAVE_MEMCCHR) +check_function_exists(memrchr HAVE_MEMRCHR) check_function_exists(strsep HAVE_STRSEP) check_function_exists(strtok_r HAVE_STRTOK_R) check_function_exists(timegm HAVE_TIMEGM) diff --git a/c++/src/build-system/cmake/CMakeChecks.cmake b/c++/src/build-system/cmake/CMakeChecks.cmake index 59ce7780..df51d8fd 100644 --- a/c++/src/build-system/cmake/CMakeChecks.cmake +++ b/c++/src/build-system/cmake/CMakeChecks.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeChecks.cmake 568535 2018-08-07 14:27:35Z gouriano $ +# $Id: CMakeChecks.cmake 581275 2019-02-27 15:01:26Z gouriano $ ############################################################################# # # Note: @@ -16,21 +16,60 @@ endif() ############################################################################# # Source tree description # +set(NCBI_DIRNAME_RUNTIME bin) +set(NCBI_DIRNAME_ARCHIVE lib) +if (WIN32) + set(NCBI_DIRNAME_SHARED ${NCBI_DIRNAME_RUNTIME}) +else() + set(NCBI_DIRNAME_SHARED ${NCBI_DIRNAME_ARCHIVE}) +endif() +set(NCBI_DIRNAME_SRC src) +set(NCBI_DIRNAME_INCLUDE include) +set(NCBI_DIRNAME_COMMON_INCLUDE common) +set(NCBI_DIRNAME_CFGINC inc) +set(NCBI_DIRNAME_INTERNAL internal) +set(NCBI_DIRNAME_BUILD build) +set(NCBI_DIRNAME_EXPORT cmake) +set(NCBI_DIRNAME_TESTING testing) +set(NCBI_DIRNAME_SCRIPTS scripts) +set(NCBI_DIRNAME_COMMON_SCRIPTS scripts/common) +set(NCBI_DIRNAME_BUILDCFG ${NCBI_DIRNAME_SRC}/build-system) +set(NCBI_DIRNAME_CMAKECFG ${NCBI_DIRNAME_SRC}/build-system/cmake) + + set(top_src_dir ${CMAKE_CURRENT_SOURCE_DIR}/..) set(abs_top_src_dir ${CMAKE_CURRENT_SOURCE_DIR}/..) - get_filename_component(top_src_dir "${top_src_dir}" ABSOLUTE) get_filename_component(abs_top_src_dir "${abs_top_src_dir}" ABSOLUTE) set(build_root ${CMAKE_BINARY_DIR}) set(builddir ${CMAKE_BINARY_DIR}) -set(includedir0 ${top_src_dir}/include) +set(includedir0 ${top_src_dir}/${NCBI_DIRNAME_INCLUDE}) set(includedir ${includedir0}) -set(incdir ${build_root}/inc) -set(incinternal ${includedir0}/internal) +set(incdir ${build_root}/${NCBI_DIRNAME_CFGINC}) +set(incinternal ${includedir0}/${NCBI_DIRNAME_INTERNAL}) + +set(NCBI_TREE_ROOT ${top_src_dir}) +set(NCBI_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) +set(NCBI_INC_ROOT ${includedir0}) + +if (DEFINED NCBI_EXTERNAL_TREE_ROOT) + string(FIND ${CMAKE_BINARY_DIR} ${NCBI_TREE_ROOT} _pos_root) + string(FIND ${CMAKE_BINARY_DIR} ${NCBI_SRC_ROOT} _pos_src) + if(NOT "${_pos_root}" LESS "0" AND "${_pos_src}" LESS "0" AND NOT "${CMAKE_BINARY_DIR}" STREQUAL "${NCBI_TREE_ROOT}") + get_filename_component(NCBI_BUILD_ROOT "${CMAKE_BINARY_DIR}/.." ABSOLUTE) + else() + get_filename_component(NCBI_BUILD_ROOT "${CMAKE_BINARY_DIR}" ABSOLUTE) + endif() +else() + get_filename_component(NCBI_BUILD_ROOT "${CMAKE_BINARY_DIR}/.." ABSOLUTE) +endif() + + +set(NCBI_CFGINC_ROOT ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_CFGINC}) if (NCBI_EXPERIMENTAL_CFG) - get_filename_component(incdir "${build_root}/../inc" REALPATH) + get_filename_component(incdir "${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_CFGINC}" ABSOLUTE) if (WIN32) set(incdir ${incdir}/\$\(Configuration\)) elseif (XCODE) @@ -40,31 +79,81 @@ endif() if (NOT IS_DIRECTORY ${incinternal}) set(incinternal "") endif() -set(NCBI_BUILD_ROOT ${CMAKE_BINARY_DIR}) -set(NCBI_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) -set(NCBI_INC_ROOT ${includedir0}) - -get_filename_component(top_src_dir "${top_src_dir}" REALPATH) -get_filename_component(abs_top_src_dir "${abs_top_src_dir}" REALPATH) -get_filename_component(build_root "${build_root}" REALPATH) -get_filename_component(includedir "${includedir}" REALPATH) if (NCBI_EXPERIMENTAL_CFG) - get_filename_component(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${NCBI_BUILD_ROOT}/../bin" REALPATH) - if (WIN32) - get_filename_component(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${NCBI_BUILD_ROOT}/../bin" REALPATH) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_RUNTIME}") + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_SHARED}") + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_ARCHIVE}") +else() + get_filename_component(EXECUTABLE_OUTPUT_PATH "${NCBI_BUILD_ROOT}/bin" ABSOLUTE) + get_filename_component(LIBRARY_OUTPUT_PATH "${NCBI_BUILD_ROOT}/lib" ABSOLUTE) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}") + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${LIBRARY_OUTPUT_PATH}") + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${LIBRARY_OUTPUT_PATH}") +endif() + +if (DEFINED NCBI_EXTERNAL_TREE_ROOT) + set(NCBI_TREE_BUILDCFG "${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_BUILDCFG}") + set(NCBI_TREE_CMAKECFG "${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_CMAKECFG}") + set(NCBI_TREE_COMMON_INCLUDE ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/common) +else() + set(NCBI_TREE_BUILDCFG "${NCBI_TREE_ROOT}/${NCBI_DIRNAME_BUILDCFG}") + set(NCBI_TREE_CMAKECFG "${NCBI_TREE_ROOT}/${NCBI_DIRNAME_CMAKECFG}") + set(NCBI_TREE_COMMON_INCLUDE ${includedir}/common) +endif() + +############################################################################ +# OS-specific settings +include(${NCBI_TREE_CMAKECFG}/CMakeChecks.os.cmake) + +############################################################################# +# Build configurations and compiler definitions +include(${NCBI_TREE_CMAKECFG}/CMakeChecks.compiler.cmake) + +############################################################################# +if (DEFINED NCBI_EXTERNAL_TREE_ROOT) + set(_prebuilt_loc) + if ("${NCBI_COMPILER}" STREQUAL "MSVC") + set(_prebuilt_loc "CMake-vs") + if ("${NCBI_COMPILER_VERSION}" LESS "1900") + set(_prebuilt_loc ${_prebuilt_loc}2015) + else() + set(_prebuilt_loc ${_prebuilt_loc}2017) + endif() + if (BUILD_SHARED_LIBS) + set(_prebuilt_loc ${_prebuilt_loc}/dll) + else() + set(_prebuilt_loc ${_prebuilt_loc}/static) + endif() + elseif(XCODE) + set(_prebuilt_loc "CMake-Xcode${NCBI_COMPILER_VERSION}") + if (BUILD_SHARED_LIBS) + set(_prebuilt_loc ${_prebuilt_loc}/dll) + else() + set(_prebuilt_loc ${_prebuilt_loc}/static) + endif() else() - get_filename_component(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${NCBI_BUILD_ROOT}/../lib" REALPATH) + set(_prebuilt_loc "CMake-${NCBI_COMPILER}${NCBI_COMPILER_VERSION}-${CMAKE_BUILD_TYPE}") + if (BUILD_SHARED_LIBS) + set(_prebuilt_loc ${_prebuilt_loc}DLL) + endif() + endif() + set(NCBI_EXTERNAL_BUILD_ROOT ${NCBI_EXTERNAL_TREE_ROOT}/${_prebuilt_loc}) + + if (IS_DIRECTORY ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}) + set(_ext_includedir0 ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}) + if (IS_DIRECTORY ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/${NCBI_DIRNAME_INTERNAL}) + set(_ext_incinternal ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/${NCBI_DIRNAME_INTERNAL}) + endif() endif() - get_filename_component(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${NCBI_BUILD_ROOT}/../lib" REALPATH) + include_directories(${incdir} ${includedir0} ${incinternal} ${_ext_includedir0} ${_ext_incinternal}) else() - get_filename_component(EXECUTABLE_OUTPUT_PATH "${NCBI_BUILD_ROOT}/../bin" REALPATH) - get_filename_component(LIBRARY_OUTPUT_PATH "${NCBI_BUILD_ROOT}/../lib" REALPATH) + include_directories(${incdir} ${includedir0} ${incinternal}) endif() +include_regular_expression("^.*[.](h|hpp|c|cpp|inl|inc)$") -include_directories(${incdir} ${includedir0} ${incinternal}) - -set(CMAKE_MODULE_PATH "${NCBI_SRC_ROOT}/build-system/cmake/" ${CMAKE_MODULE_PATH}) +#set(CMAKE_MODULE_PATH "${NCBI_SRC_ROOT}/build-system/cmake/" ${CMAKE_MODULE_PATH}) +list(APPEND CMAKE_MODULE_PATH "${NCBI_TREE_CMAKECFG}") ############################################################################## # Find datatool app @@ -78,11 +167,10 @@ else() set(NCBI_DATATOOL_BASE "/net/snowman/vol/export2/win-coremake/App/Ncbi/cppcore/datatool/Linux64") endif() -if (EXISTS "${NCBI_SRC_ROOT}/build-system/datatool_version.txt") - FILE(READ "${NCBI_SRC_ROOT}/build-system/datatool_version.txt" _datatool_version) - string(REGEX MATCH "[0-9][0-9.]*[0-9]" _datatool_version "${_datatool_version}") +if (EXISTS "${NCBI_TREE_BUILDCFG}/datatool_version.txt") + FILE(STRINGS "${NCBI_TREE_BUILDCFG}/datatool_version.txt" _datatool_version) else() - set(_datatool_version "2.17.0") + set(_datatool_version "2.18.2") message(WARNING "Failed to find datatool_version.txt, defaulting to version ${_datatool_version})") endif() message(STATUS "Datatool version required by software: ${_datatool_version}") @@ -107,20 +195,12 @@ endif() ############################################################################# # Testing -set(NCBITEST_DRIVER "${NCBI_SRC_ROOT}/build-system/cmake/TestDriver.cmake") +set(NCBITEST_DRIVER "${NCBI_TREE_CMAKECFG}/TestDriver.cmake") enable_testing() -############################################################################ -# OS-specific settings -include(${top_src_dir}/src/build-system/cmake/CMakeChecks.os.cmake) - -############################################################################# -# Build configurations and compiler definitions -include(${top_src_dir}/src/build-system/cmake/CMakeChecks.compiler.cmake) - ############################################################################# # Basic checks -include(${top_src_dir}/src/build-system/cmake/CMakeChecks.basic-checks.cmake) +include(${NCBI_TREE_CMAKECFG}/CMakeChecks.basic-checks.cmake) ############################################################################# # Hunter packages for Windows @@ -141,7 +221,7 @@ endif() ############################################################################# # External libraries -include(${top_src_dir}/src/build-system/cmake/CMake.NCBIComponents.cmake) +include(${NCBI_TREE_CMAKECFG}/CMake.NCBIComponents.cmake) ############################################################################# # Generation of configuration files @@ -217,16 +297,18 @@ if (NCBI_EXPERIMENTAL_CFG) set(NCBI_SIGNATURE "${NCBI_COMPILER}_${NCBI_COMPILER_VERSION}-${_cfg}--${HOST}-${_local_host_name}") if (WIN32) - configure_file(${NCBI_SRC_ROOT}/build-system/cmake/ncbiconf_msvc_site.h.in ${NCBI_BUILD_ROOT}/../inc/${_cfg}/common/config/ncbiconf_msvc_site.h) + configure_file(${NCBI_TREE_CMAKECFG}/ncbiconf_msvc_site.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbiconf_msvc_site.h) elseif (XCODE) - configure_file(${NCBI_SRC_ROOT}/build-system/cmake/ncbiconf_msvc_site.h.in ${NCBI_BUILD_ROOT}/../inc/${_cfg}/common/config/ncbiconf_xcode_site.h) + configure_file(${NCBI_TREE_CMAKECFG}/ncbiconf_msvc_site.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbiconf_xcode_site.h) + endif() + if (EXISTS ${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in) + configure_file(${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbicfg.cfg.c) endif() - configure_file(${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_BUILD_ROOT}/../inc/${_cfg}/common/config/ncbicfg.cfg.c) + configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/ncbi_build_ver.h) endforeach() - if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/corelib/ncbicfg.c) - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/corelib/ncbicfg.c "#include \n") + if(NOT EXISTS ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c) + file(WRITE ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c "#include \n") endif() - return() else() #Linux set(c_ncbi_runpath ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) @@ -234,8 +316,12 @@ if (NCBI_EXPERIMENTAL_CFG) set(SYBASE_PATH "") set(NCBI_SIGNATURE "${NCBI_COMPILER}_${NCBI_COMPILER_VERSION}-${NCBI_BUILD_TYPE}--${HOST_CPU}-${HOST_OS_WITH_VERSION}-${_local_host_name}") - configure_file(${NCBI_SRC_ROOT}/build-system/cmake/config.cmake.h.in ${NCBI_BUILD_ROOT}/../inc/ncbiconf_unix.h) - configure_file(${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_BUILD_ROOT}/corelib/ncbicfg.c) + configure_file(${NCBI_TREE_CMAKECFG}/config.cmake.h.in ${NCBI_CFGINC_ROOT}/ncbiconf_unix.h) + if (EXISTS ${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in) + configure_file(${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c) + endif() + + configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/common/ncbi_build_ver.h) endif() else (NCBI_EXPERIMENTAL_CFG) @@ -255,21 +341,21 @@ configure_file(${includedir}/common/ncbi_build_ver.h.in ${includedir}/common/ncb # OS-specific generated header configs if (UNIX) message(STATUS "Generating ${build_root}/inc/ncbiconf_unix.h...") - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/build-system/cmake/config.cmake.h.in ${build_root}/inc/ncbiconf_unix.h) + configure_file(${NCBI_TREE_CMAKECFG}/config.cmake.h.in ${build_root}/inc/ncbiconf_unix.h) set(_os_specific_config ${build_root}/inc/ncbiconf_unix.h) endif(UNIX) if (WIN32) message(STATUS "Generating ${build_root}/inc/ncbiconf_msvc.h...") - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/build-system/cmake/config.cmake.h.in ${build_root}/inc/ncbiconf_msvc.h) + configure_file(${NCBI_TREE_CMAKECFG}/config.cmake.h.in ${build_root}/inc/ncbiconf_msvc.h) message(STATUS "Generating ${includedir}/common/config/ncbiconf_msvc_site.h...") - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/build-system/cmake/ncbiconf_msvc_site.h.in ${includedir}/common/config/ncbiconf_msvc_site.h) + configure_file(${NCBI_TREE_CMAKECFG}/ncbiconf_msvc_site.h.in ${includedir}/common/config/ncbiconf_msvc_site.h) set(_os_specific_config ${build_root}/inc/ncbiconf_msvc.h ${includedir}/common/config/ncbiconf_msvc_site.h) endif (WIN32) if (APPLE AND NOT UNIX) #XXX message(STATUS "Generating ${build_root}/inc/ncbiconf_xcode.h...") - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/build-system/cmake/config.cmake.h.in ${build_root}/inc/ncbiconf_xcode.h) + configure_file(${NCBI_TREE_CMAKECFG}/config.cmake.h.in ${build_root}/inc/ncbiconf_xcode.h) set(_os_specific_config ${build_root}/inc/ncbiconf_xcode.h) endif (APPLE AND NOT UNIX) @@ -292,5 +378,5 @@ endif (NCBI_EXPERIMENTAL_CFG) # # Dump our final diagnostics -include(${top_src_dir}/src/build-system/cmake/CMakeChecks.final-message.cmake) +include(${NCBI_TREE_CMAKECFG}/CMakeChecks.final-message.cmake) diff --git a/c++/src/build-system/cmake/CMakeChecks.compiler.cmake b/c++/src/build-system/cmake/CMakeChecks.compiler.cmake index 6b20fd0e..cfc3620d 100644 --- a/c++/src/build-system/cmake/CMakeChecks.compiler.cmake +++ b/c++/src/build-system/cmake/CMakeChecks.compiler.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeChecks.compiler.cmake 574985 2018-11-21 14:54:35Z dicuccio $ +# $Id: CMakeChecks.compiler.cmake 576718 2018-12-19 20:56:03Z dicuccio $ ############################################################################# # # This config is designed to capture all compiler and linker definitions and search parameters @@ -25,15 +25,15 @@ if (WIN32) endif() set(CMAKE_CONFIGURATION_TYPES "${CMAKE_CONFIGURATION_TYPES}" CACHE STRING "Reset the configurations" FORCE) - set(CMAKE_CXX_FLAGS_DEBUGDLL "/MDd /Zi /Od /RTC1 /D_DEBUG") - set(CMAKE_CXX_FLAGS_DEBUGMT "/MTd /Zi /Od /RTC1 /D_DEBUG") - set(CMAKE_CXX_FLAGS_RELEASEDLL "/MD /Zi /O2 /Ob1 /DNDEBUG") - set(CMAKE_CXX_FLAGS_RELEASEMT "/MT /Zi /O2 /Ob1 /DNDEBUG") + set(CMAKE_CXX_FLAGS_DEBUGDLL "/MP /MDd /Zi /Od /RTC1 /D_DEBUG") + set(CMAKE_CXX_FLAGS_DEBUGMT "/MP /MTd /Zi /Od /RTC1 /D_DEBUG") + set(CMAKE_CXX_FLAGS_RELEASEDLL "/MP /MD /Zi /O2 /Ob1 /DNDEBUG") + set(CMAKE_CXX_FLAGS_RELEASEMT "/MP /MT /Zi /O2 /Ob1 /DNDEBUG") - set(CMAKE_C_FLAGS_DEBUGDLL "/MDd /Zi /Od /RTC1 /D_DEBUG") - set(CMAKE_C_FLAGS_DEBUGMT "/MTd /Zi /Od /RTC1 /D_DEBUG") - set(CMAKE_C_FLAGS_RELEASEDLL "/MD /Zi /O2 /Ob1 /DNDEBUG") - set(CMAKE_C_FLAGS_RELEASEMT "/MT /Zi /O2 /Ob1 /DNDEBUG") + set(CMAKE_C_FLAGS_DEBUGDLL "/MP /MDd /Zi /Od /RTC1 /D_DEBUG") + set(CMAKE_C_FLAGS_DEBUGMT "/MP /MTd /Zi /Od /RTC1 /D_DEBUG") + set(CMAKE_C_FLAGS_RELEASEDLL "/MP /MD /Zi /O2 /Ob1 /DNDEBUG") + set(CMAKE_C_FLAGS_RELEASEMT "/MP /MT /Zi /O2 /Ob1 /DNDEBUG") set(CMAKE_EXE_LINKER_FLAGS_DEBUGDLL "/DEBUG /INCREMENTAL:NO") set(CMAKE_EXE_LINKER_FLAGS_DEBUGMT "/DEBUG /INCREMENTAL:NO") @@ -52,8 +52,8 @@ if (WIN32) set(NCBI_DEFAULT_USEPCH ON) set(NCBI_DEFAULT_PCH_DEFINE "NCBI_USE_PCH") - set(NCBI_DEFAULT_RESOURCES "${NCBI_SRC_ROOT}/build-system/cmake/ncbi.rc") - set(NCBI_DEFAULT_DLLENTRY "${NCBI_SRC_ROOT}/build-system/cmake/dll_main.cpp") + set(NCBI_DEFAULT_RESOURCES "${NCBI_TREE_CMAKECFG}/ncbi.rc") + set(NCBI_DEFAULT_DLLENTRY "${NCBI_TREE_CMAKECFG}/dll_main.cpp") set(ORIG_LIBS ws2_32.lib) return() @@ -70,15 +70,15 @@ elseif (XCODE) endif() set(CMAKE_CONFIGURATION_TYPES "${CMAKE_CONFIGURATION_TYPES}" CACHE STRING "Reset the configurations" FORCE) - set(CMAKE_CXX_FLAGS_DEBUGDLL "-gdwarf-4 -ggdb3 -O0 -D_DEBUG") - set(CMAKE_CXX_FLAGS_DEBUGMT "-gdwarf-4 -ggdb3 -O0 -D_DEBUG") - set(CMAKE_CXX_FLAGS_RELEASEDLL "-gdwarf-4 -ggdb1 -O3 -DNDEBUG") - set(CMAKE_CXX_FLAGS_RELEASEMT "-gdwarf-4 -ggdb1 -O3 -DNDEBUG") + set(CMAKE_CXX_FLAGS_DEBUGDLL "-g -gdwarf -ggdb3 -O0 -D_DEBUG") + set(CMAKE_CXX_FLAGS_DEBUGMT "-g -gdwarf -ggdb3 -O0 -D_DEBUG") + set(CMAKE_CXX_FLAGS_RELEASEDLL "-Os -DNDEBUG") + set(CMAKE_CXX_FLAGS_RELEASEMT "-Os -DNDEBUG") - set(CMAKE_C_FLAGS_DEBUGDLL "-gdwarf-4 -ggdb3 -g -O0 -D_DEBUG") - set(CMAKE_C_FLAGS_DEBUGMT "-gdwarf-4 -ggdb3 -g -O0 -D_DEBUG") - set(CMAKE_C_FLAGS_RELEASEDLL "-gdwarf-4 -ggdb1 -O3 -DNDEBUG") - set(CMAKE_C_FLAGS_RELEASEMT "-gdwarf-4 -ggdb1 -O3 -DNDEBUG") + set(CMAKE_C_FLAGS_DEBUGDLL "-g -gdwarf -ggdb3 -O0 -D_DEBUG") + set(CMAKE_C_FLAGS_DEBUGMT "-g -gdwarf -ggdb3 -O0 -D_DEBUG") + set(CMAKE_C_FLAGS_RELEASEDLL "-Os -DNDEBUG") + set(CMAKE_C_FLAGS_RELEASEMT "-Os -DNDEBUG") set(CMAKE_EXE_LINKER_FLAGS_DEBUGDLL "-stdlib=libc++ -framework CoreServices") set(CMAKE_EXE_LINKER_FLAGS_DEBUGMT "-stdlib=libc++ -framework CoreServices") @@ -93,6 +93,8 @@ elseif (XCODE) set(NCBI_POSIX_THREADS 1) endif (CMAKE_USE_PTHREADS_INIT) + set(NCBI_DEFAULT_USEPCH ON) + return() #---------------------------------------------------------------------------- diff --git a/c++/src/build-system/cmake/CMakeChecks.final-message.cmake b/c++/src/build-system/cmake/CMakeChecks.final-message.cmake index dde68774..249708c2 100644 --- a/c++/src/build-system/cmake/CMakeChecks.final-message.cmake +++ b/c++/src/build-system/cmake/CMakeChecks.final-message.cmake @@ -26,25 +26,33 @@ function(ShowMainBoilerplate) message("Shared Libs: ${BUILD_SHARED_LIBS}") message("Top Source Dir: ${top_src_dir}") message("Build Root: ${build_root}") - message("Executable Dir: ${EXECUTABLE_OUTPUT_PATH}") - message("Library Dir: ${LIBRARY_OUTPUT_PATH}") + if (NCBI_EXPERIMENTAL_CFG) + message("Executable Dir: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") + message("Archive Dir: ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}") + message("Library Dir: ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") + else() + message("Executable Dir: ${EXECUTABLE_OUTPUT_PATH}") + message("Library Dir: ${LIBRARY_OUTPUT_PATH}") + endif() message("C Compiler: ${CMAKE_C_COMPILER}") message("C++ Compiler: ${CMAKE_CXX_COMPILER}") - if (CMAKE_USE_DISTCC AND DISTCC_EXECUTABLE) message(" distcc: ${DISTCC_EXECUTABLE}") endif() if (CMAKE_USE_CCACHE AND CCACHE_EXECUTABLE) message(" ccache: ${CCACHE_EXECUTABLE}") endif() - message("CFLAGS: ${CMAKE_C_FLAGS}") message("CXXFLAGS: ${CMAKE_CXX_FLAGS}") message("Compile Flags: ${DefsStr}") message("DataTool Ver: ${_datatool_version}") message("DataTool Path: ${NCBI_DATATOOL}") message("") - message("Modules Found: ${MOD_STR}") + if (NCBI_EXPERIMENTAL_CFG) + message("Components Found: ${NCBI_ALL_COMPONENTS}") + else() + message("Modules Found: ${MOD_STR}") + endif() message("------------------------------------------------------------------------------") message("") @@ -52,7 +60,8 @@ endfunction() ShowMainBoilerplate() -add_custom_target(NAME show-config +if (NOT NCBI_EXPERIMENTAL_CFG) +add_custom_target(show-config COMMAND ${CMAKE_COMMAND} -e echo "ShowMainBoilerplate()" COMMAND ${CMAKE_COMMAND} -e echo "PCRE: ${PCRE_LIBRARIES}" COMMAND ${CMAKE_COMMAND} -e echo "Boost: ${Boost_INCLUDE_DIRS}" @@ -65,5 +74,4 @@ add_custom_target(NAME show-config COMMAND ${CMAKE_COMMAND} -e echo "GnuTLS include: ${GNUTLS_INCLUDE}" COMMAND ${CMAKE_COMMAND} -e echo "${EXTERNAL_LIBRARIES_COMMENT}" ) - - +endif() diff --git a/c++/src/build-system/cmake/FindBerkeleyDB.cmake b/c++/src/build-system/cmake/FindBerkeleyDB.cmake index 57a6f1db..53f58c99 100644 --- a/c++/src/build-system/cmake/FindBerkeleyDB.cmake +++ b/c++/src/build-system/cmake/FindBerkeleyDB.cmake @@ -10,6 +10,7 @@ FIND_PATH( BERKELEYDB_INCLUDE_DIR db.h PATHS ${NCBI_TOOLS_ROOT}/BerkeleyDB/include/ /usr/local/include /usr/include + NO_DEFAULT_PATH ) IF (BERKELEYDB_INCLUDE_DIR) @@ -18,6 +19,7 @@ IF (BERKELEYDB_INCLUDE_DIR) "${NCBI_TOOLS_ROOT}/BerkeleyDB/${BUILD_PREFIX}${CMAKE_BUILD_TYPE}/" /usr/local/lib /usr/lib + NO_DEFAULT_PATH ) IF (BERKELEYDB_LIBRARY) diff --git a/c++/src/build-system/cmake/TestDriver.cmake b/c++/src/build-system/cmake/TestDriver.cmake index 1fa6d526..e24c5743 100644 --- a/c++/src/build-system/cmake/TestDriver.cmake +++ b/c++/src/build-system/cmake/TestDriver.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: TestDriver.cmake 568535 2018-08-07 14:27:35Z gouriano $ +# $Id: TestDriver.cmake 576718 2018-12-19 20:56:03Z dicuccio $ ############################################################################# ############################################################################# ## @@ -15,8 +15,7 @@ if (NOT "${NCBITEST_ASSETS}" STREQUAL "") list(REMOVE_DUPLICATES NCBITEST_ASSETS) foreach(_res IN LISTS NCBITEST_ASSETS) if (NOT EXISTS ${NCBITEST_SOURCEDIR}/${_res}) - message(SEND_ERROR "Test ${NCBITEST_NAME} ERROR: asset ${NCBITEST_SOURCEDIR}/${_res} not found") - return() + message(WARNING "Test ${NCBITEST_NAME} WARNING: test asset ${NCBITEST_SOURCEDIR}/${_res} not found") endif() endforeach() endif() @@ -24,24 +23,62 @@ endif() string(RANDOM _subdir) set(_subdir ${NCBITEST_NAME}_${_subdir}) -file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${_subdir}) -set(_workdir ${CMAKE_CURRENT_BINARY_DIR}/${_subdir}) -set(_output ${CMAKE_CURRENT_BINARY_DIR}/_test_out.${NCBITEST_NAME}.txt) +file(MAKE_DIRECTORY ${NCBITEST_OUTDIR}/${_subdir}) +set(_workdir ${NCBITEST_OUTDIR}/${_subdir}) +set(_output ${NCBITEST_OUTDIR}/${NCBITEST_NAME}.output.txt) if(EXISTS ${_output}) file(REMOVE ${_output}) endif() foreach(_res IN LISTS NCBITEST_ASSETS) - file(COPY ${NCBITEST_SOURCEDIR}/${_res} DESTINATION ${_workdir}) + if (EXISTS ${NCBITEST_SOURCEDIR}/${_res}) + file(COPY ${NCBITEST_SOURCEDIR}/${_res} DESTINATION ${_workdir}) + endif() endforeach() if(WIN32) - string(REPLACE "/" "\\" NCBITEST_BINDIR ${NCBITEST_BINDIR}) - set(ENV{PATH} "${NCBITEST_BINDIR}\\${NCBITEST_CONFIG};$ENV{PATH}") + set(_scripts "\\\\snowman\\win-coremake\\Scripts\\internal_scripts\\cpp_common\\impl") + set(_testdata "\\\\snowman\\win-coremake\\scripts\\test_data") + string(REPLACE "/" "\\" _cfg_bin "${NCBITEST_BINDIR}") + set(_cfg_lib "${NCBITEST_BINDIR}\\..\\lib") + if(EXISTS "${_scripts}") + set(_scripts "${_scripts};") + else() + unset(_scripts) + endif() else() - set(ENV{PATH} ".:${NCBITEST_BINDIR}:$ENV{PATH}") + set(_scripts "/am/ncbiapdata/scripts/cpp_common/impl") + set(_testdata "/am/ncbiapdata/test_data") + set(_cfg_bin "${NCBITEST_BINDIR}") + set(_cfg_lib "${NCBITEST_BINDIR}/../lib") + if(EXISTS "${_scripts}") + set(_scripts "${_scripts}:") + else() + unset(_scripts) + endif() endif() +if(EXISTS "${_testdata}") + set(ENV{NCBI_TEST_DATA} "${_testdata}") +endif() + +if(WIN32) + set(ENV{PATH} "${_cfg_bin}\\${NCBITEST_CONFIG};${_cfg_lib}\\${NCBITEST_CONFIG};${_scripts}$ENV{PATH}") + set(ENV{CFG_BIN} "${_cfg_bin}\\${NCBITEST_CONFIG}") + set(ENV{CFG_LIB} "${_cfg_lib}\\${NCBITEST_CONFIG}") +elseif(XCODE) + set(ENV{PATH} ".:${_cfg_bin}/${NCBITEST_CONFIG}:${_cfg_lib}/${NCBITEST_CONFIG}:${_scripts}$ENV{PATH}") + set(ENV{CFG_BIN} "${_cfg_bin}/${NCBITEST_CONFIG}") + set(ENV{CFG_LIB} "${_cfg_lib}/${NCBITEST_CONFIG}") +# set(ENV{CHECK_EXEC} "time") +else() + set(ENV{PATH} ".:${_cfg_bin}:${_cfg_lib}:${_scripts}$ENV{PATH}") + set(ENV{CFG_BIN} "${_cfg_bin}") + set(ENV{CFG_LIB} "${_cfg_lib}") +# set(ENV{CHECK_EXEC} "time") +endif() +set(ENV{CHECK_EXEC} " ") + set(_result "1") execute_process( COMMAND ${NCBITEST_COMMAND} ${NCBITEST_ARGS} @@ -54,5 +91,5 @@ execute_process( file(REMOVE_RECURSE ${_workdir}) if (NOT ${_result} EQUAL "0") - message(SEND_ERROR "Test ${NCBITEST_NAME} failed") + message(SEND_ERROR "Test ${NCBITEST_NAME} failed (error=${_result})") endif() diff --git a/c++/src/build-system/cmake/cmake-cfg-unix.sh b/c++/src/build-system/cmake/cmake-cfg-unix.sh index 662d8939..a12290d3 100755 --- a/c++/src/build-system/cmake/cmake-cfg-unix.sh +++ b/c++/src/build-system/cmake/cmake-cfg-unix.sh @@ -1,7 +1,8 @@ #!/bin/sh ############################################################################# -# $Id: cmake-cfg-unix.sh 568553 2018-08-07 16:41:47Z gouriano $ +# $Id: cmake-cfg-unix.sh 576718 2018-12-19 20:56:03Z dicuccio $ # Configure NCBI C++ toolkit using CMake build system. +# Author: Andrei Gourianov, gouriano@ncbi ############################################################################# initial_dir=`pwd` script_name=`basename $0` @@ -9,6 +10,10 @@ script_dir=`dirname $0` script_dir=`(cd "${script_dir}" ; pwd)` tree_root=`pwd` +host_os=`uname` +if test $host_os = "Darwin"; then + CMAKE_CMD=/Applications/CMake.app/Contents/bin/cmake +fi ############################################################################# if [ -z "${CMAKE_CMD}" ]; then CMAKE_CMD=`which cmake 2>/dev/null` @@ -30,8 +35,13 @@ if [ -z "$CXX" ]; then fi fi if [ -n "$CC" ]; then - CC_NAME=`$CC --version | awk 'NR==1{print $2}' | sed 's/[()]//g'` - CC_VERSION=`$CC --version | awk 'NR==1{print $3}' | sed 's/[.]//g'` + if test $host_os = "Darwin"; then + CC_NAME=`$CC --version 2>/dev/null | awk 'NR==1{print $2}'` + CC_VERSION=`$CC --version 2>/dev/null | awk 'NR==1{print $4}' | sed 's/[.]//g'` + else + CC_NAME=`$CC --version | awk 'NR==1{print $2}' | sed 's/[()]//g'` + CC_VERSION=`$CC --version | awk 'NR==1{print $3}' | sed 's/[.]//g'` + fi fi ############################################################################# @@ -57,14 +67,22 @@ OPTIONS: --without-dll -- build all libraries as static ones (default) --with-dll -- build all libraries as shared ones, unless explicitely requested otherwise + --with-projects="FILE" -- build projects listed in ${tree_root}/FILE + FILE can also be a list of subdirectories of ${tree_root}/src + examples: --with-projects="corelib$;serial" + --with-projects=scripts/projects/ncbi_cpp.lst + --with-tags="tags" -- build projects which have allowed tags only + examples: --with-tags="*;-test" + --with-targets="names" -- build projects which have allowed names only + examples: --with-targets="datatool;xcgi$" + --with-details="names" -- print detailed information about projects + examples: --with-details="datatool;test_hash" --with-ccache -- use ccache if available --with-distcc -- use distcc if available + --with-install="DIR" -- generate rules for installation into DIR directory + examples: --with-install="/usr/CPP_toolkit" --without-experimental -- disable experimental configuration - --with-projects='FILE' -- build projects listed in ${tree_root}/FILE - FILE can also be a list of subdirectories of ${tree_root}/src - examples: --with-projects='corelib$;serial' - --with-projects=scripts/projects/ncbi_cpp.lst - --with-generator='X' -- use generator X + --with-generator="X" -- use generator X EOF generatorfound="" @@ -95,15 +113,18 @@ Quote() { ############################################################################# # parse arguments +do_help="no" while [ $# != 0 ]; do case "$1" in - --help) - Usage - exit 0 + --help|-help|help) + do_help="yes" ;; - --srcdir=*) + --rootdir=*) tree_root=`(cd "${1#*=}" ; pwd)` ;; + --caller=*) + script_name=${1#*=} + ;; --with-static|--without-dll) BUILD_SHARED_LIBS=OFF ;; @@ -140,13 +161,47 @@ while [ $# != 0 ]; do project_list="${tree_root}/$project_list" fi ;; + --with-tags=*) + project_tags=${1#*=} + if [ -e "${tree_root}/$project_tags" ]; then + project_tags="${tree_root}/$project_tags" + fi + ;; + --with-targets=*) + project_targets=${1#*=} + if [ -e "${tree_root}/$project_targets" ]; then + project_targets="${tree_root}/$project_targets" + fi + ;; + --with-details=*) + project_details=${1#*=} + ;; + --with-install=*) + install_path=${1#*=} + ;; *) Error "unknown option: $1" ;; esac shift done +if [ $do_help = "yes" ]; then + Usage + exit 0 +fi +############################################################################# +if test "$generator" = "Xcode"; then + XC=`which xcodebuild 2>/dev/null` + if test $? -ne 0; then + echo ERROR: xcodebuild is not found + exit 1 + fi + CC_NAME=Xcode + CC_VERSION=`xcodebuild -version | awk 'NR==1{print $2}'` + CC= + CXX= +fi ############################################################################# CMAKE_ARGS=-DNCBI_EXPERIMENTAL=${NCBI_EXPERIMENTAL} @@ -161,15 +216,30 @@ if [ -n "$generator" ]; then CMAKE_ARGS="$CMAKE_ARGS -G $(Quote "$generator")" fi CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_LIST=$(Quote "${project_list}")" +CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TAGS=$(Quote "${project_tags}")" +CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TARGETS=$(Quote "${project_targets}")" +CMAKE_ARGS="$CMAKE_ARGS -DNCBI_VERBOSE_PROJECTS=$(Quote "${project_details}")" +if [ -n "$install_path" ]; then + CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_INSTALL_PATH=$(Quote "${install_path}")" +fi CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_BUILD_TYPE=${BUILD_TYPE}" CMAKE_ARGS="$CMAKE_ARGS -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS" -CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_USE_CCACHE=$USE_CCACHE" -CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_USE_DISTCC=$USE_DISTCC" -build_root=CMake-${CC_NAME}${CC_VERSION}-${BUILD_TYPE} -if [ "$BUILD_SHARED_LIBS" == "ON" ]; then - build_root="$build_root"DLL -fi +if test "$generator" = "Xcode"; then + build_root=CMake-${CC_NAME}${CC_VERSION} + if [ "$BUILD_SHARED_LIBS" == "ON" ]; then + build_root="$build_root"/dll + else + build_root="$build_root"/static + fi +else + CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_USE_CCACHE=$USE_CCACHE" + CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_USE_DISTCC=$USE_DISTCC" + build_root=CMake-${CC_NAME}${CC_VERSION}-${BUILD_TYPE} + if [ "$BUILD_SHARED_LIBS" == "ON" ]; then + build_root="$build_root"DLL + fi #build_root="$build_root"64 +fi mkdir -p ${tree_root}/${build_root}/build cd ${tree_root}/${build_root}/build @@ -181,6 +251,6 @@ cd ${tree_root}/${build_root}/build # rm CMakeCache.txt #fi -echo Running "${CMAKE_CMD}" ${CMAKE_ARGS} "${tree_root}/src" +#echo Running "${CMAKE_CMD}" ${CMAKE_ARGS} "${tree_root}/src" eval "${CMAKE_CMD}" ${CMAKE_ARGS} "${tree_root}/src" cd ${initial_dir} diff --git a/c++/src/build-system/cmake/cmake-cfg-vs.bat b/c++/src/build-system/cmake/cmake-cfg-vs.bat index f1b38e12..49576f84 100644 --- a/c++/src/build-system/cmake/cmake-cfg-vs.bat +++ b/c++/src/build-system/cmake/cmake-cfg-vs.bat @@ -1,8 +1,9 @@ @echo off -setlocal +setlocal ENABLEDELAYEDEXPANSION REM ######################################################################### -REM $Id: cmake-cfg-vs.bat 568553 2018-08-07 16:41:47Z gouriano $ +REM $Id: cmake-cfg-vs.bat 576718 2018-12-19 20:56:03Z dicuccio $ REM Configure NCBI C++ toolkit for Visual Studio using CMake build system. +REM Author: Andrei Gourianov, gouriano@ncbi REM ######################################################################### set initial_dir=%CD% @@ -28,6 +29,7 @@ set VISUAL_STUDIO=2017 goto :RUN REM ######################################################################### +REM when specifying path, both "/" and "\" are allowed :USAGE echo USAGE: @@ -44,9 +46,17 @@ echo FILE can also be a list of subdirectories of echo %tree_root%\src echo examples: --with-projects="corelib$;serial" echo --with-projects=scripts/projects/ncbi_cpp.lst +echo --with-tags="tags" -- build projects which have allowed tags only +echo examples: --with-tags="*;-test" +echo --with-targets="names" -- build projects which have allowed names only +echo examples: --with-targets="datatool;xcgi$" +echo --with-details="names" -- print detailed information about projects +echo examples: --with-details="datatool;test_hash" echo --with-vs=N -- use Visual Studio N generator echo examples: --with-vs=2017 (default) echo --with-vs=2015 +echo --with-install="DIR" -- generate rules for installation into "DIR" directory +echo examples: --with-install="D:\CPP toolkit" echo --with-generator="X" -- use generator X echo: @@ -83,25 +93,35 @@ goto :eof REM ######################################################################### REM parse arguments +set do_help= set unknown= -set dest= :PARSEARGS if "%~1"=="" goto :ENDPARSEARGS -if "%dest%"=="lst" (set project_list=%~1& set dest=& goto :CONTINUEPARSEARGS) -if "%dest%"=="vs" (set VISUAL_STUDIO=%~1& set dest=& goto :CONTINUEPARSEARGS) -if "%dest%"=="gen" (set generator=%~1& set dest=& goto :CONTINUEPARSEARGS) -if "%1"=="--help" (call :USAGE& exit /b 0) -if "%1"=="--without-dll" (set BUILD_SHARED_LIBS=OFF& goto :CONTINUEPARSEARGS) -if "%1"=="--with-dll" (set BUILD_SHARED_LIBS=ON& goto :CONTINUEPARSEARGS) -if "%1"=="--with-projects" (set dest=lst& goto :CONTINUEPARSEARGS) -if "%1"=="--with-vs" (set dest=vs& goto :CONTINUEPARSEARGS) -if "%1"=="--with-generator" (set dest=gen& goto :CONTINUEPARSEARGS) +if "%1"=="--help" (set do_help=YES& goto :CONTINUEPARSEARGS) +if "%1"=="-help" (set do_help=YES& goto :CONTINUEPARSEARGS) +if "%1"=="help" (set do_help=YES& goto :CONTINUEPARSEARGS) +if "%1"=="--rootdir" (set tree_root=%~2& shift& goto :CONTINUEPARSEARGS) +if "%1"=="--caller" (set script_name=%~2& shift& goto :CONTINUEPARSEARGS) +if "%1"=="--without-dll" (set BUILD_SHARED_LIBS=OFF& goto :CONTINUEPARSEARGS) +if "%1"=="--with-dll" (set BUILD_SHARED_LIBS=ON& goto :CONTINUEPARSEARGS) +if "%1"=="--with-projects" (set project_list=%~2& shift& goto :CONTINUEPARSEARGS) +if "%1"=="--with-tags" (set project_tags=%~2& shift& goto :CONTINUEPARSEARGS) +if "%1"=="--with-targets" (set project_targets=%~2& shift& goto :CONTINUEPARSEARGS) +if "%1"=="--with-details" (set project_details=%~2& shift& goto :CONTINUEPARSEARGS) +if "%1"=="--with-vs" (set VISUAL_STUDIO=%~2& shift& goto :CONTINUEPARSEARGS) +if "%1"=="--with-install" (set INSTALL_PATH=%~2& shift& goto :CONTINUEPARSEARGS) +if "%1"=="--with-generator" (set generator=%~2& shift& goto :CONTINUEPARSEARGS) set unknown=%unknown% %1 :CONTINUEPARSEARGS shift goto :PARSEARGS :ENDPARSEARGS +if not "%do_help%"=="" ( + call :USAGE + goto :DONE +) + if not "%unknown%"=="" ( call :ERROR unknown option: %unknown% goto :DONE @@ -125,6 +145,16 @@ if not "%project_list%"=="" ( set project_list=%tree_root%\%project_list% ) ) +if not "%project_tags%"=="" ( + if exist "%tree_root%\%project_tags%" ( + set project_tags=%tree_root%\%project_tags% + ) +) +if not "%project_targets%"=="" ( + if exist "%tree_root%\%project_targets%" ( + set project_targets=%tree_root%\%project_targets% + ) +) REM ######################################################################### @@ -134,28 +164,27 @@ if not "%generator%"=="" ( set CMAKE_ARGS=%CMAKE_ARGS% -G "%generator%" ) set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_PROJECT_LIST="%project_list%" +set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_PROJECT_TAGS="%project_tags%" +set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_PROJECT_TARGETS="%project_targets%" +set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_VERBOSE_PROJECTS="%project_details%" +if not "%INSTALL_PATH%"=="" ( + set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_INSTALL_PATH="%INSTALL_PATH%" +) set CMAKE_ARGS=%CMAKE_ARGS% -DBUILD_SHARED_LIBS=%BUILD_SHARED_LIBS% -set build_root=compilers\CMake-%generator_name% +set build_root=CMake-%generator_name% if "%BUILD_SHARED_LIBS%"=="ON" ( set build_root=%build_root%\dll - set project_name=ncbi_cpp_dll ) else ( set build_root=%build_root%\static - set project_name=ncbi_cpp ) -set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_CMAKEPROJECT_NAME=%project_name% if not exist "%tree_root%\%build_root%\build" ( mkdir "%tree_root%\%build_root%\build" ) cd "%tree_root%\%build_root%\build" -REM if exist "CMakeCache.txt" ( -REM del CMakeCache.txt -REM ) - -echo Running "%CMAKE_CMD%" %CMAKE_ARGS% "%tree_root%\src" +REM echo Running "%CMAKE_CMD%" %CMAKE_ARGS% "%tree_root%\src" "%CMAKE_CMD%" %CMAKE_ARGS% "%tree_root%\src" :DONE diff --git a/c++/src/build-system/cmake/cmake-cfg-xcode.sh b/c++/src/build-system/cmake/cmake-cfg-xcode.sh new file mode 100755 index 00000000..2818d229 --- /dev/null +++ b/c++/src/build-system/cmake/cmake-cfg-xcode.sh @@ -0,0 +1,175 @@ +#!/bin/sh +############################################################################# +# $Id: cmake-cfg-xcode.sh 576718 2018-12-19 20:56:03Z dicuccio $ +# Configure NCBI C++ toolkit for XCode using CMake build system. +# Author: Andrei Gourianov, gouriano@ncbi +############################################################################# +initial_dir=`pwd` +script_name=`basename $0` +script_dir=`dirname $0` +script_dir=`(cd "${script_dir}" ; pwd)` +tree_root=`pwd` + +############################################################################# +if [ -z "${CMAKE_CMD}" ]; then + CMAKE_CMD=/Applications/CMake.app/Contents/bin/cmake +fi +if test ! -x $CMAKE_CMD; then + CMAKE_CMD=`which cmake 2>/dev/null` + if test $? -ne 0; then + echo ERROR: CMake is not found + exit 1 + fi +fi + +############################################################################# +# defaults + +BUILD_SHARED_LIBS="OFF" + +############################################################################# +Usage() +{ + cat <&2 +USAGE: + $script_name [OPTION]... +SYNOPSIS: + Configure NCBI C++ toolkit for XCode using CMake build system. +OPTIONS: + --help -- print Usage + --without-dll -- build all libraries as static ones (default) + --with-dll -- build all libraries as shared ones, + unless explicitely requested otherwise + --with-projects="FILE" -- build projects listed in ${tree_root}/FILE + FILE can also be a list of subdirectories of ${tree_root}/src + examples: --with-projects="corelib$;serial" + --with-projects=scripts/projects/ncbi_cpp.lst + --with-tags="tags" -- build projects which have allowed tags only + examples: --with-tags="*;-test" + --with-targets="names" -- build projects which have allowed names only + examples: --with-targets="datatool;xcgi$" + --with-details="names" -- print detailed information about projects + examples: --with-details="datatool;test_hash" + --with-install="DIR" -- generate rules for installation into DIR directory + examples: --with-install="/usr/CPP_toolkit" +EOF + + generatorfound="" + "${CMAKE_CMD}" --help | while IFS= read -r line; do + if [ -z $generatorfound ]; then + if [ "$line" = "Generators" ]; then + generatorfound=yes + fi + else + echo "$line" + fi + done +} + +# has one optional argument: error message +Error() +{ + Usage + test -z "$1" || echo ERROR: $1 1>&2 + cd "$initial_dir" + exit 1 +} + +Quote() { + echo "$1" | sed -e "s|'|'\\\\''|g; 1s/^/'/; \$s/\$/'/" +} + +############################################################################# +# parse arguments + +do_help="no" +generator=Xcode +while [ $# != 0 ]; do + case "$1" in + --help|-help|help) + do_help="yes" + ;; + --rootdir=*) + tree_root=`(cd "${1#*=}" ; pwd)` + ;; + --caller=*) + script_name=${1#*=} + ;; + --without-dll) + BUILD_SHARED_LIBS=OFF + ;; + --with-dll) + BUILD_SHARED_LIBS=ON + ;; + --with-projects=*) + project_list=${1#*=} + if [ -e "${tree_root}/$project_list" ]; then + project_list="${tree_root}/$project_list" + fi + ;; + --with-tags=*) + project_tags=${1#*=} + if [ -e "${tree_root}/$project_tags" ]; then + project_tags="${tree_root}/$project_tags" + fi + ;; + --with-targets=*) + project_targets=${1#*=} + if [ -e "${tree_root}/$project_targets" ]; then + project_targets="${tree_root}/$project_targets" + fi + ;; + --with-details=*) + project_details=${1#*=} + ;; + --with-install=*) + install_path=${1#*=} + ;; + *) + Error "unknown option: $1" + ;; + esac + shift +done +if [ $do_help = "yes" ]; then + Usage + exit 0 +fi + +############################################################################# +XC=`which xcodebuild 2>/dev/null` +if test $? -ne 0; then + echo ERROR: xcodebuild is not found + exit 1 +fi +CC_NAME=Xcode +CC_VERSION=`xcodebuild -version | awk 'NR==1{print $2}'` +############################################################################# + +CMAKE_ARGS="-DNCBI_EXPERIMENTAL=ON -G Xcode" + +CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_LIST=$(Quote "${project_list}")" +CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TAGS=$(Quote "${project_tags}")" +CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TARGETS=$(Quote "${project_targets}")" +CMAKE_ARGS="$CMAKE_ARGS -DNCBI_VERBOSE_PROJECTS=$(Quote "${project_details}")" +if [ -n "$install_path" ]; then + CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_INSTALL_PATH=$(Quote "${install_path}")" +fi +CMAKE_ARGS="$CMAKE_ARGS -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS" + +build_root=CMake-${CC_NAME}${CC_VERSION} +if [ "$BUILD_SHARED_LIBS" == "ON" ]; then + build_root="$build_root"/dll +else + build_root="$build_root"/static +fi + +if test ! -e "${tree_root}/${build_root}/build"; then + mkdir -p "${tree_root}/${build_root}/build" +fi +cd ${tree_root}/${build_root}/build + + +#echo Running "${CMAKE_CMD}" ${CMAKE_ARGS} "${tree_root}/src" +eval "${CMAKE_CMD}" ${CMAKE_ARGS} "${tree_root}/src" +cd ${initial_dir} diff --git a/c++/src/build-system/cmake/cmake-configure b/c++/src/build-system/cmake/cmake-configure index ed6af5e0..e4be127c 100755 --- a/c++/src/build-system/cmake/cmake-configure +++ b/c++/src/build-system/cmake/cmake-configure @@ -41,7 +41,7 @@ if [ -z "$CXX" ]; then CXX=`which g++` fi BUILD_TYPE="Debug" -BUILD_SHARED_LIBS=ON +BUILD_SHARED_LIBS= # NOTE: By default, not set and neither ON nor OFF! PREFIX=/usr/local LIBDIR= CMAKE_ARGS= @@ -239,6 +239,8 @@ add_gpipe_warnings() { #MAIN +DISTCC_OPTS="-DCMAKE_USE_DISTCC=ON" + while [ $# != 0 ]; do case "$1" in "--prefix="*) @@ -359,7 +361,10 @@ while [ $# != 0 ]; do CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_USE_CCACHE=ON";; "--with-distcc") - CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_USE_DISTCC=ON";; + ;; + + "--without-distcc") + DISTCC_OPTS="";; "--with-xcode") CMAKE_ARGS="$CMAKE_ARGS -G Xcode";; @@ -384,7 +389,7 @@ while [ $# != 0 ]; do ;; "--gpipe-cgi") - #EXPERIMENTAL=Int8GI + EXPERIMENTAL=Int8GI BUILD_TYPE="Release" BUILD_SHARED_LIBS=OFF CMAKE_ARGS="$CMAKE_ARGS -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS" @@ -467,6 +472,8 @@ if [ "x${LIBDIR}" = "x" ]; then LIBDIR="${PREFIX}/lib" fi +CMAKE_ARGS="$CMAKE_ARGS $DISTCC_OPTS" + # Unlike CFLAGS/CXXFLAGS/CC/CXX, LDFLAGS isn't handled by CMake, so we # need to parse it here. if [ "x${LDFLAGS}" != "x" ]; then @@ -478,9 +485,10 @@ fi if [ -z "$ROOT" ]; then if [ -z "$GPIPEROOT" ]; then ROOT="CMake-$BUILD_TYPE" - if [ "$BUILD_SHARED_LIBS" == "ON" ]; then - ROOT="$ROOT"DLL - fi + case "$BUILD_SHARED_LIBS" in + ON) ROOT="$ROOT"DLL ;; + OFF) ROOT="$ROOT"Static ;; + esac else ROOT=`cd "${TOP_SRCDIR}/../../../" && /bin/pwd` || exit 1 ROOT=`/usr/bin/dirname "$ROOT"` || exit 1 @@ -529,6 +537,4 @@ if [ -e CMakeCache.txt ]; then rm CMakeCache.txt fi -echo "${CMAKE_CMD}" ${TRACE_CMAKE} ${NINJA_FLAGS} "${TOP_SRCDIR}/../../" -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" -DCMAKE_INSTALL_PREFIX="${PREFIX}" -DCMAKE_INSTALL_LIBDIR="${LIBDIR}" ${CMAKE_ARGS} - eval "${CMAKE_CMD}" ${TRACE_CMAKE} ${NINJA_FLAGS} "${TOP_SRCDIR}/../../" -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" -DCMAKE_INSTALL_PREFIX="${PREFIX}" -DCMAKE_INSTALL_LIBDIR="${LIBDIR}" ${CMAKE_ARGS} diff --git a/c++/src/build-system/cmake/config.cmake.h.in b/c++/src/build-system/cmake/config.cmake.h.in index 18949268..c617f255 100644 --- a/c++/src/build-system/cmake/config.cmake.h.in +++ b/c++/src/build-system/cmake/config.cmake.h.in @@ -343,7 +343,7 @@ /* #undef HAVE_LIBOECHEM */ /* Define to 1 if libssl is available. */ -#define HAVE_LIBOPENSSL 1 +#cmakedefine HAVE_LIBOPENSSL 1 /* Define to 1 if you have libOSMesa. */ #define HAVE_LIBOSMESA 1 @@ -614,12 +614,24 @@ /* Define to 1 if you have the `strcasecmp' function. */ #cmakedefine HAVE_STRCASECMP 1 +/* Define to 1 if you have the `strncasecmp' function. */ +#cmakedefine HAVE_STRNCASECMP 1 + /* Define to 1 if strcasecmp treats letters as lowercase. */ #define HAVE_STRCASECMP_LC 1 /* Define to 1 if you have the `strdup' function. */ #cmakedefine HAVE_STRDUP 1 +/* Define to 1 if you have the `strndup' function. */ +#cmakedefine HAVE_STRNDUP 1 + +/* Define to 1 if you have the `memcchr' function. */ +#cmakedefine HAVE_MEMCCHR 1 + +/* Define to 1 if you have the `memrchr' function. */ +#cmakedefine HAVE_MEMRCHR 1 + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_STRINGS_H 1 diff --git a/c++/src/build-system/cmake/ncbi-defaults/ncbi-mongocxx-config.cmake b/c++/src/build-system/cmake/ncbi-defaults/ncbi-mongocxx-config.cmake index 513fcf0e..8075b5b1 100644 --- a/c++/src/build-system/cmake/ncbi-defaults/ncbi-mongocxx-config.cmake +++ b/c++/src/build-system/cmake/ncbi-defaults/ncbi-mongocxx-config.cmake @@ -61,7 +61,7 @@ set(MONGOCXX_LIBRARIES ) if (NOT BUILD_SHARED_LIBS) - set(MONGOCXX_LIBRARIES ${MONGOCXX_LIBRARIES} /usr/lib/libresolv.so) + set(MONGOCXX_LIBRARIES ${MONGOCXX_LIBRARIES} /lib64/libresolv-2.17.so) endif() diff --git a/c++/src/build-system/cmake/ncbiconf_msvc_site.h.in b/c++/src/build-system/cmake/ncbiconf_msvc_site.h.in index 7a3877d6..019e03e8 100644 --- a/c++/src/build-system/cmake/ncbiconf_msvc_site.h.in +++ b/c++/src/build-system/cmake/ncbiconf_msvc_site.h.in @@ -19,14 +19,14 @@ #define HAVE_LIBMIMETIC 1 /* #undef HAVE_LIBMONGODB */ /* #undef HAVE_LIBMUPARSER */ -#define HAVE_LIBOPENSSL 1 +#cmakedefine HAVE_LIBOPENSSL 1 #cmakedefine HAVE_LIBPNG 1 #cmakedefine HAVE_LIBSQLITE3 1 #cmakedefine HAVE_LIBSYBASE 1 #cmakedefine HAVE_LIBTIFF 1 #cmakedefine HAVE_LIBXML 1 #cmakedefine HAVE_LIBXSLT 1 -/* #undef HAVE_LOCAL_LBSM */ +#cmakedefine HAVE_LOCAL_LBSM 1 #cmakedefine HAVE_NCBI_VDB 1 #cmakedefine HAVE_ODBC 1 #cmakedefine HAVE_ODBCSS_H 1 diff --git a/c++/src/build-system/config.h.in b/c++/src/build-system/config.h.in index 788b07ac..cc17349b 100644 --- a/c++/src/build-system/config.h.in +++ b/c++/src/build-system/config.h.in @@ -357,6 +357,9 @@ /* Define to 1 if libgif is available. */ #undef HAVE_LIBGIF +/* Define to 1 if libgl2ps is available. */ +#undef HAVE_LIBGL2PS + /* Define to 1 if GLEW is available, either in its own library or as part of the standard libraries. */ #undef HAVE_LIBGLEW diff --git a/c++/src/build-system/configure b/c++/src/build-system/configure index 27deecf4..f445e328 100755 --- a/c++/src/build-system/configure +++ b/c++/src/build-system/configure @@ -825,6 +825,8 @@ UNGIF_INCLUDE UNGIF_LIBS XPM_INCLUDE XPM_LIBS +GL2PS_INCLUDE +GL2PS_LIBS freetype_config FTGL_INCLUDE FTGL_LIBS @@ -1676,7 +1678,7 @@ geo included-geo vdb downloaded-vdb static-vdb libunwind libdw backward-cpp \ backward-cpp-sig \ z bz2 lzo pcre mbedtls gmp gcrypt nettle gnutls static-gnutls openssl krb5 \ sybase sybase-local sybase-new ftds mysql \ -orbacus freetype ftgl opengl mesa glut glew glew-mx \ +orbacus freetype ftgl opengl mesa glut glew glew-mx gl2ps \ bdb python perl jni sqlite3 icu boost boost-tag \ sp expat sablot libxml libxslt libexslt xerces xalan zorba \ oechem sge muparser hdf5 \ @@ -1744,6 +1746,7 @@ for x_arg in "$@" ; do | --with-gnutls=* | --with-openssl=* | --with-krb5=* \ | --with-sybase-local=* | --with-ftds=*/* | --with-mysql=* \ | --with-opengl=* | --with-mesa=* | --with-glut=* | --with-glew=* \ + | --with-gl2ps=* \ | --with-wxwidgets=* | --with-freetype=* | --with-ftgl=* \ | --with-fastcgi=*/* | --with-bdb=*/* | --with-orbacus=* \ | --with-odbc=* | --with-python=* | --with-perl=* | --with-jni=* \ @@ -1997,6 +2000,8 @@ Optional Packages: --without-glew do not use GLEW --with-glew=DIR use GLEW installation in DIR --with-glew-mx insist on a multi-context-ready GLEW installation + --without-gl2ps do not use GL2PS + --with-gl2ps=DIR use GL2PS installation in DIR --without-wxwidgets do not use wxWidgets (2.6+) --with-wxwidgets=DIR use wxWidgets installation in DIR --with-wxwidgets-ucs use Unicode builds of wxWidgets @@ -2421,6 +2426,12 @@ case "$with_3psw" in else with_glew=no fi + if test "${with_gl2ps-no}" != "no"; then + { echo "$as_me: error: incompatible options: --with-gl2ps but --without-3psw" >&2 + { (exit 1); exit 1; }; } + else + with_gl2ps=no + fi if test "${with_wxwidgets-no}" != "no"; then { echo "$as_me: error: incompatible options: --with-wxwidgets but --without-3psw" >&2 { (exit 1); exit 1; }; } @@ -3762,6 +3773,18 @@ if test "${with_glew_mx+set}" = set; then fi +# Check whether --with-gl2ps was given. +if test "${with_gl2ps+set}" = set; then + withval=$with_gl2ps; +fi + + +# Check whether --with-gl2ps was given. +if test "${with_gl2ps+set}" = set; then + withval=$with_gl2ps; +fi + + # Check whether --with-wxwidgets was given. if test "${with_wxwidgets+set}" = set; then withval=$with_wxwidgets; @@ -29105,9 +29128,9 @@ no_usr_lib="s,-L$usr_lib ,,g; s,-L/usr/lib/$multiarch ,,g" : ${with_libunwind:=no} if test "$with_libunwind" != "no"; then - case "$with_libunwind" in - yes | "" ) ;; - * ) LIBUNWIND_PATH=$with_libunwind ;; + case "$LIBUNWIND_PATH:$with_libunwind" in + *:yes | *: | $with_libunwind* ) ;; + * ) LIBUNWIND_PATH=$with_libunwind ;; esac if test "$LIBUNWIND_PATH" != /usr -a -d "$LIBUNWIND_PATH"; then in_path=" in $LIBUNWIND_PATH" @@ -29283,9 +29306,9 @@ LIBS="$LIBUNWIND_LIBS $LIBS" orig_LIBS="$LIBUNWIND_LIBS $orig_LIBS" if test "$with_libdw" != "no"; then - case "$with_libdw" in - yes | "" ) ;; - * ) LIBDW_PATH=$with_libdw ;; + case "$LIBDW_PATH:$with_libdw" in + *:yes | *: | $with_libdw* ) ;; + * ) LIBDW_PATH=$with_libdw ;; esac if test "$LIBDW_PATH" != /usr -a -d "$LIBDW_PATH"; then in_path=" in $LIBDW_PATH" @@ -29460,9 +29483,9 @@ LIBS="$LIBDW_LIBS $LIBS" orig_LIBS="$LIBDW_LIBS $orig_LIBS" if test "$with_backward_cpp" != "no"; then - case "$with_backward_cpp" in - yes | "" ) ;; - * ) BACKWARD_CPP_PATH=$with_backward_cpp ;; + case "$BACKWARD_CPP_PATH:$with_backward_cpp" in + *:yes | *: | $with_backward_cpp* ) ;; + * ) BACKWARD_CPP_PATH=$with_backward_cpp ;; esac if test "$BACKWARD_CPP_PATH" != /usr -a -d "$BACKWARD_CPP_PATH"; then in_path=" in $BACKWARD_CPP_PATH" @@ -29652,9 +29675,9 @@ _ACEOF fi if test "$with_z" != "no"; then - case "$with_z" in - yes | "" ) ;; - * ) Z_PATH=$with_z ;; + case "$Z_PATH:$with_z" in + *:yes | *: | $with_z* ) ;; + * ) Z_PATH=$with_z ;; esac if test "$Z_PATH" != /usr -a -d "$Z_PATH"; then in_path=" in $Z_PATH" @@ -29839,9 +29862,9 @@ echo "$as_me: using local zlib copy in $zlocal" >&6;} fi if test "$with_bz2" != "no"; then - case "$with_bz2" in - yes | "" ) ;; - * ) BZ2_PATH=$with_bz2 ;; + case "$BZ2_PATH:$with_bz2" in + *:yes | *: | $with_bz2* ) ;; + * ) BZ2_PATH=$with_bz2 ;; esac if test "$BZ2_PATH" != /usr -a -d "$BZ2_PATH"; then in_path=" in $BZ2_PATH" @@ -30047,9 +30070,9 @@ if test -d "$LZO_PATH"; then esac fi if test "$with_lzo" != "no"; then - case "$with_lzo" in - yes | "" ) ;; - * ) LZO_PATH=$with_lzo ;; + case "$LZO_PATH:$with_lzo" in + *:yes | *: | $with_lzo* ) ;; + * ) LZO_PATH=$with_lzo ;; esac if test "$LZO_PATH" != /usr -a -d "$LZO_PATH"; then in_path=" in $LZO_PATH" @@ -30220,15 +30243,20 @@ _ACEOF +if test -n "$LZO_LIBS" -a "x$with_bin_release" = xyes \ + -a \( -f "$LZO_PATH/lib$bit64_sfx/liblzo2-static.a" \ + -o -f "$LZO_PATH/lib/liblzo2-static.a" \); then + LZO_LIBS="$LZO_LIBPATH -llzo2-static" +fi if test -z "$PCRE_PATH" && pcre-config --version >/dev/null 2>&1; then p=`pcre-config --prefix` test "x$p" = "x/usr" || PCRE_PATH=$p fi if test "$with_pcre" != "no"; then - case "$with_pcre" in - yes | "" ) ;; - * ) PCRE_PATH=$with_pcre ;; + case "$PCRE_PATH:$with_pcre" in + *:yes | *: | $with_pcre* ) ;; + * ) PCRE_PATH=$with_pcre ;; esac if test "$PCRE_PATH" != /usr -a -d "$PCRE_PATH"; then in_path=" in $PCRE_PATH" @@ -30446,9 +30474,9 @@ if test "x${with_mbedtls-no}" != xno; then esac fi if test "$with_mbedtls" != "no"; then - case "$with_mbedtls" in - yes | "" ) ;; - * ) MBEDTLS_PATH=$with_mbedtls ;; + case "$MBEDTLS_PATH:$with_mbedtls" in + *:yes | *: | $with_mbedtls* ) ;; + * ) MBEDTLS_PATH=$with_mbedtls ;; esac if test "$MBEDTLS_PATH" != /usr -a -d "$MBEDTLS_PATH"; then in_path=" in $MBEDTLS_PATH" @@ -30662,9 +30690,9 @@ if test "x$with_gmp" != xno; then esac fi if test "$with_gmp" != "no"; then - case "$with_gmp" in - yes | "" ) ;; - * ) GMP_PATH=$with_gmp ;; + case "$GMP_PATH:$with_gmp" in + *:yes | *: | $with_gmp* ) ;; + * ) GMP_PATH=$with_gmp ;; esac if test "$GMP_PATH" != /usr -a -d "$GMP_PATH"; then in_path=" in $GMP_PATH" @@ -30948,9 +30976,9 @@ else $as_unset GCRYPT_CONFIG_LIBS || test "${GCRYPT_CONFIG_LIBS+set}" != set || { GCRYPT_CONFIG_LIBS=; export GCRYPT_CONFIG_LIBS; } fi if test "$with_gcrypt" != "no"; then - case "$with_gcrypt" in - yes | "" ) ;; - * ) GCRYPT_PATH=$with_gcrypt ;; + case "$GCRYPT_PATH:$with_gcrypt" in + *:yes | *: | $with_gcrypt* ) ;; + * ) GCRYPT_PATH=$with_gcrypt ;; esac if test "$GCRYPT_PATH" != /usr -a -d "$GCRYPT_PATH"; then in_path=" in $GCRYPT_PATH" @@ -31236,9 +31264,9 @@ if test "x$with_nettle" != xno; then fi fi if test "$with_nettle" != "no"; then - case "$with_nettle" in - yes | "" ) ;; - * ) NETTLE_PATH=$with_nettle ;; + case "$NETTLE_PATH:$with_nettle" in + *:yes | *: | $with_nettle* ) ;; + * ) NETTLE_PATH=$with_nettle ;; esac if test "$NETTLE_PATH" != /usr -a -d "$NETTLE_PATH"; then in_path=" in $NETTLE_PATH" @@ -31570,9 +31598,9 @@ else $as_unset GNUTLS_CONFIG_LIBS || test "${GNUTLS_CONFIG_LIBS+set}" != set || { GNUTLS_CONFIG_LIBS=; export GNUTLS_CONFIG_LIBS; } fi if test "$with_gnutls" != "no"; then - case "$with_gnutls" in - yes | "" ) ;; - * ) GNUTLS_PATH=$with_gnutls ;; + case "$GNUTLS_PATH:$with_gnutls" in + *:yes | *: | $with_gnutls* ) ;; + * ) GNUTLS_PATH=$with_gnutls ;; esac if test "$GNUTLS_PATH" != /usr -a -d "$GNUTLS_PATH"; then in_path=" in $GNUTLS_PATH" @@ -32096,9 +32124,9 @@ done fi if test "$with_openssl" != "no"; then - case "$with_openssl" in - yes | "" ) ;; - * ) OPENSSL_PATH=$with_openssl ;; + case "$OPENSSL_PATH:$with_openssl" in + *:yes | *: | $with_openssl* ) ;; + * ) OPENSSL_PATH=$with_openssl ;; esac if test "$OPENSSL_PATH" != /usr -a -d "$OPENSSL_PATH"; then in_path=" in $OPENSSL_PATH" @@ -32342,9 +32370,9 @@ else KRB5_CONFIG_LIBS=$KRB5_LIBS fi if test "$with_krb5" != "no"; then - case "$with_krb5" in - yes | "" ) ;; - * ) KRB5_PATH=$with_krb5 ;; + case "$KRB5_PATH:$with_krb5" in + *:yes | *: | $with_krb5* ) ;; + * ) KRB5_PATH=$with_krb5 ;; esac if test "$KRB5_PATH" != /usr -a -d "$KRB5_PATH"; then in_path=" in $KRB5_PATH" @@ -32837,9 +32865,9 @@ fi if test "$with_curl" != "no"; then - case "$with_curl" in - yes | "" ) ;; - * ) CURL_PATH=$with_curl ;; + case "$CURL_PATH:$with_curl" in + *:yes | *: | $with_curl* ) ;; + * ) CURL_PATH=$with_curl ;; esac if test "$CURL_PATH" != /usr -a -d "$CURL_PATH"; then in_path=" in $CURL_PATH" @@ -40069,6 +40097,8 @@ if test "$with_opengl" != "no"; then case "$OSTYPE" in darwin) # Use native interface OPENGL_LIBS="-framework AGL -framework OpenGL" + # ... and its proprietary successor + OPENGL_LIBS="$OPENGL_LIBS -framework Metal -framework MetalKit" ;; # cygwin) ... ;; *) # Default -- assume X-based @@ -40246,6 +40276,11 @@ if test "$with_opengl" = "no" -o "$ncbi_cv_lib_opengl" = "no"; then echo "$as_me: error: --with-opengl explicitly specified, but no usable version found." >&2;} { (exit 1); exit 1; }; } fi + if test "${with_gl2ps:=no}" != no; then + { { echo "$as_me:$LINENO: error: --with-gl2ps explicitly specified, but no usable version found." >&5 +echo "$as_me: error: --with-gl2ps explicitly specified, but no usable version found." >&2;} + { (exit 1); exit 1; }; } + fi else WithPackages="$WithPackages${WithPackagesSep}OpenGL"; WithPackagesSep=" " @@ -42241,9 +42276,9 @@ fi ### XML/XSL libraries if test "$with_expat" != "no"; then - case "$with_expat" in - yes | "" ) ;; - * ) EXPAT_PATH=$with_expat ;; + case "$EXPAT_PATH:$with_expat" in + *:yes | *: | $with_expat* ) ;; + * ) EXPAT_PATH=$with_expat ;; esac if test "$EXPAT_PATH" != /usr -a -d "$EXPAT_PATH"; then in_path=" in $EXPAT_PATH" @@ -42438,9 +42473,9 @@ if test -d "$SABLOT_PATH"; then esac fi if test "$with_sablot" != "no"; then - case "$with_sablot" in - yes | "" ) ;; - * ) SABLOT_PATH=$with_sablot ;; + case "$SABLOT_PATH:$with_sablot" in + *:yes | *: | $with_sablot* ) ;; + * ) SABLOT_PATH=$with_sablot ;; esac if test "$SABLOT_PATH" != /usr -a -d "$SABLOT_PATH"; then in_path=" in $SABLOT_PATH" @@ -42812,9 +42847,9 @@ fi : ${LIBXSLT_PATH=$LIBXML_PATH} if test "$with_libxslt" != "no"; then - case "$with_libxslt" in - yes | "" ) ;; - * ) LIBXSLT_PATH=$with_libxslt ;; + case "$LIBXSLT_PATH:$with_libxslt" in + *:yes | *: | $with_libxslt* ) ;; + * ) LIBXSLT_PATH=$with_libxslt ;; esac if test "$LIBXSLT_PATH" != /usr -a -d "$LIBXSLT_PATH"; then in_path=" in $LIBXSLT_PATH" @@ -43047,9 +43082,9 @@ else exslt_autodep= fi if test "$with_libexslt" != "no"; then - case "$with_libexslt" in - yes | "" ) ;; - * ) LIBEXSLT_PATH=$with_libexslt ;; + case "$LIBEXSLT_PATH:$with_libexslt" in + *:yes | *: | $with_libexslt* ) ;; + * ) LIBEXSLT_PATH=$with_libexslt ;; esac if test "$LIBEXSLT_PATH" != /usr -a -d "$LIBEXSLT_PATH"; then in_path=" in $LIBEXSLT_PATH" @@ -43973,9 +44008,9 @@ ncbi_rp_L_flags= SQLITE3_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" fi if test "$with_sqlite3" != "no"; then - case "$with_sqlite3" in - yes | "" ) ;; - * ) SQLITE3_PATH=$with_sqlite3 ;; + case "$SQLITE3_PATH:$with_sqlite3" in + *:yes | *: | $with_sqlite3* ) ;; + * ) SQLITE3_PATH=$with_sqlite3 ;; esac if test "$SQLITE3_PATH" != /usr -a -d "$SQLITE3_PATH"; then in_path=" in $SQLITE3_PATH" @@ -44728,9 +44763,9 @@ fi # somewhat kludgish, as we now wanto to add in oeiupac and oedepict, # which depend on oechem.... if test "$with_oechem" != "no"; then - case "$with_oechem" in - yes | "" ) ;; - * ) OECHEM_PATH=$with_oechem ;; + case "$OECHEM_PATH:$with_oechem" in + *:yes | *: | $with_oechem* ) ;; + * ) OECHEM_PATH=$with_oechem ;; esac if test "$OECHEM_PATH" != /usr -a -d "$OECHEM_PATH"; then in_path=" in $OECHEM_PATH" @@ -44967,9 +45002,9 @@ if test -d "$SGE_PATH" -a -z "$SGE_LIBPATH"; then fi fi if test "$with_sge" != "no"; then - case "$with_sge" in - yes | "" ) ;; - * ) SGE_PATH=$with_sge ;; + case "$SGE_PATH:$with_sge" in + *:yes | *: | $with_sge* ) ;; + * ) SGE_PATH=$with_sge ;; esac if test "$SGE_PATH" != /usr -a -d "$SGE_PATH"; then in_path=" in $SGE_PATH" @@ -45172,9 +45207,9 @@ elif test -d "$MUPARSER_PATH/${compiler_pfx}${DEBUG_SFX}${bit64_sfx}/lib"; then fi if test "$with_muparser" != "no"; then - case "$with_muparser" in - yes | "" ) ;; - * ) MUPARSER_PATH=$with_muparser ;; + case "$MUPARSER_PATH:$with_muparser" in + *:yes | *: | $with_muparser* ) ;; + * ) MUPARSER_PATH=$with_muparser ;; esac if test "$MUPARSER_PATH" != /usr -a -d "$MUPARSER_PATH"; then in_path=" in $MUPARSER_PATH" @@ -45376,9 +45411,9 @@ if test -d "$HDF5_PATH"; then fi if test "$with_hdf5" != "no"; then - case "$with_hdf5" in - yes | "" ) ;; - * ) HDF5_PATH=$with_hdf5 ;; + case "$HDF5_PATH:$with_hdf5" in + *:yes | *: | $with_hdf5* ) ;; + * ) HDF5_PATH=$with_hdf5 ;; esac if test "$HDF5_PATH" != /usr -a -d "$HDF5_PATH"; then in_path=" in $HDF5_PATH" @@ -45555,9 +45590,9 @@ _ACEOF # : ${JPEG_PATH=/usr/sfw} # Grr... jpeglib.h isn't especially well-behaved. if test "$with_jpeg" != "no"; then - case "$with_jpeg" in - yes | "" ) ;; - * ) JPEG_PATH=$with_jpeg ;; + case "$JPEG_PATH:$with_jpeg" in + *:yes | *: | $with_jpeg* ) ;; + * ) JPEG_PATH=$with_jpeg ;; esac if test "$JPEG_PATH" != /usr -a -d "$JPEG_PATH"; then in_path=" in $JPEG_PATH" @@ -45738,9 +45773,9 @@ _ACEOF # : ${PNG_PATH=/usr/sfw} if test "$with_png" != "no"; then - case "$with_png" in - yes | "" ) ;; - * ) PNG_PATH=$with_png ;; + case "$PNG_PATH:$with_png" in + *:yes | *: | $with_png* ) ;; + * ) PNG_PATH=$with_png ;; esac if test "$PNG_PATH" != /usr -a -d "$PNG_PATH"; then in_path=" in $PNG_PATH" @@ -45916,9 +45951,9 @@ _ACEOF # : ${TIFF_PATH=/usr/sfw} if test "$with_tiff" != "no"; then - case "$with_tiff" in - yes | "" ) ;; - * ) TIFF_PATH=$with_tiff ;; + case "$TIFF_PATH:$with_tiff" in + *:yes | *: | $with_tiff* ) ;; + * ) TIFF_PATH=$with_tiff ;; esac if test "$TIFF_PATH" != /usr -a -d "$TIFF_PATH"; then in_path=" in $TIFF_PATH" @@ -46094,9 +46129,9 @@ _ACEOF # Paths? with_ungif=$with_gif if test "$with_gif" != "no"; then - case "$with_gif" in - yes | "" ) ;; - * ) GIF_PATH=$with_gif ;; + case "$GIF_PATH:$with_gif" in + *:yes | *: | $with_gif* ) ;; + * ) GIF_PATH=$with_gif ;; esac if test "$GIF_PATH" != /usr -a -d "$GIF_PATH"; then in_path=" in $GIF_PATH" @@ -46271,9 +46306,9 @@ _ACEOF if test "$with_gif" = "no"; then if test "$with_ungif" != "no"; then - case "$with_ungif" in - yes | "" ) ;; - * ) UNGIF_PATH=$with_ungif ;; + case "$UNGIF_PATH:$with_ungif" in + *:yes | *: | $with_ungif* ) ;; + * ) UNGIF_PATH=$with_ungif ;; esac if test "$UNGIF_PATH" != /usr -a -d "$UNGIF_PATH"; then in_path=" in $UNGIF_PATH" @@ -46458,9 +46493,9 @@ fi case "$x_libraries" in */*) : ${XPM_PATH=`dirname "$x_libraries"`} ;; esac if test "$with_xpm" != "no"; then - case "$with_xpm" in - yes | "" ) ;; - * ) XPM_PATH=$with_xpm ;; + case "$XPM_PATH:$with_xpm" in + *:yes | *: | $with_xpm* ) ;; + * ) XPM_PATH=$with_xpm ;; esac if test "$XPM_PATH" != /usr -a -d "$XPM_PATH"; then in_path=" in $XPM_PATH" @@ -46634,6 +46669,209 @@ _ACEOF # The use of X_CFLAGS is probably redundant, but shouldn't hurt. +if test "$with_gl2ps" != "no"; then + if test "${with_gl2ps-yes}" != "yes"; then + GL2PS_PATH=$with_gl2ps + fi + if test -d "$GL2PS_PATH"; then + if test -d "$GL2PS_PATH/$compiler_vpfx$DEBUG_SFX$bit64_sfx"; then + GL2PS_PATH=$GL2PS_PATH/$compiler_vpfx$DEBUG_SFX$bit64_sfx + elif test -d "$GL2PS_PATH/$compiler_pfx$DEBUG_SFX$bit64_sfx"; then + GL2PS_PATH=$GL2PS_PATH/$compiler_pfx$DEBUG_SFX$bit64_sfx + elif test -d "$GL2PS_PATH/$DEBUG_SFX$bit64_sfx"; then + GL2PS_PATH=$GL2PS_PATH/$DEBUG_SFX$bit64_sfx + fi + ncbi_fix_dir_tmp=`if cd $GL2PS_PATH; then $as_unset PWD || test "${PWD+set}" != set || { PWD=; export PWD; }; /bin/pwd; fi` + case "$ncbi_fix_dir_tmp" in + /.*) ncbi_fix_dir_tmp2=`cd $GL2PS_PATH && $smart_pwd 2>/dev/null` + if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then + GL2PS_PATH=$ncbi_fix_dir_tmp2 + else + case "$GL2PS_PATH" in + /*) ;; + * ) GL2PS_PATH=$ncbi_fix_dir_tmp ;; + esac + fi + ;; + /*) GL2PS_PATH=$ncbi_fix_dir_tmp ;; + esac + fi + if test "$with_gl2ps" != "no"; then + case "$GL2PS_PATH:$with_gl2ps" in + *:yes | *: | $with_gl2ps* ) ;; + * ) GL2PS_PATH=$with_gl2ps ;; + esac + if test "$GL2PS_PATH" != /usr -a -d "$GL2PS_PATH"; then + in_path=" in $GL2PS_PATH" + if test -z "$GL2PS_INCLUDE" -a -d "$GL2PS_PATH/include"; then + GL2PS_INCLUDE="-I$GL2PS_PATH/include" + fi + if test -n "$GL2PS_LIBPATH"; then + : + elif test -d "$GL2PS_PATH/lib${bit64_sfx}"; then + ncbi_rp_L_flags= + ncbi_rp_L_sep=$CONF_f_libpath + if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then + for x in $GL2PS_PATH/lib${bit64_sfx}; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + done + GL2PS_LIBPATH="${ncbi_rp_L_flags}" + else + ncbi_rp_R_flags= + ncbi_rp_R_sep=" $CONF_f_runpath" + for x in $GL2PS_PATH/lib${bit64_sfx}; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + x=`echo $x | sed -e "$ncbi_rpath_sed"` + ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x" + ncbi_rp_R_sep=: + done + GL2PS_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" + fi + elif test -d "$GL2PS_PATH/lib"; then + ncbi_rp_L_flags= + ncbi_rp_L_sep=$CONF_f_libpath + if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then + for x in $GL2PS_PATH/lib; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + done + GL2PS_LIBPATH="${ncbi_rp_L_flags}" + else + ncbi_rp_R_flags= + ncbi_rp_R_sep=" $CONF_f_runpath" + for x in $GL2PS_PATH/lib; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + x=`echo $x | sed -e "$ncbi_rpath_sed"` + ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x" + ncbi_rp_R_sep=: + done + GL2PS_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" + fi + fi + GL2PS_LIBS="$GL2PS_LIBPATH -lgl2ps " + else + GL2PS_INCLUDE="" + GL2PS_LIBS="-lgl2ps " + in_path= + fi + { echo "$as_me:$LINENO: checking for libgl2ps$in_path" >&5 +echo $ECHO_N "checking for libgl2ps$in_path... $ECHO_C" >&6; } +if test "${ncbi_cv_lib_gl2ps+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + CPPFLAGS="$OPENGL_INCLUDE $GL2PS_INCLUDE $orig_CPPFLAGS" + LIBS="$GL2PS_LIBS $OPENGL_LIBS $PNG_LIBS $Z_LIBS $X_ALL_LIBS $orig_LIBS" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ +gl2psSpecial(GL2PS_SVG, "foo") + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ncbi_cv_lib_gl2ps=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ncbi_cv_lib_gl2ps=no +fi + +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ncbi_cv_lib_gl2ps" >&5 +echo "${ECHO_T}$ncbi_cv_lib_gl2ps" >&6; } + if test "$ncbi_cv_lib_gl2ps" = "no"; then + if test "${with_gl2ps:=no}" != no; then + { { echo "$as_me:$LINENO: error: --with-gl2ps explicitly specified, but no usable version found." >&5 +echo "$as_me: error: --with-gl2ps explicitly specified, but no usable version found." >&2;} + { (exit 1); exit 1; }; } + fi + fi + fi + if test "$with_gl2ps" = "no"; then + GL2PS_PATH="No_GL2PS" + GL2PS_INCLUDE= + GL2PS_LIBS= + else + WithPackages="$WithPackages${WithPackagesSep}GL2PS"; WithPackagesSep=" " + GL2PS_INCLUDE="$OPENGL_INCLUDE $GL2PS_INCLUDE" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_LIBGL2PS 1 +_ACEOF + + fi + + + +fi + ## FreeType and FTGL if test "$with_freetype" != "no" ; then : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin} @@ -46962,9 +47200,9 @@ ncbi_rp_L_flags= fi if test "$with_ftgl" != "no"; then - case "$with_ftgl" in - yes | "" ) ;; - * ) FTGL_PATH=$with_ftgl ;; + case "$FTGL_PATH:$with_ftgl" in + *:yes | *: | $with_ftgl* ) ;; + * ) FTGL_PATH=$with_ftgl ;; esac if test "$FTGL_PATH" != /usr -a -d "$FTGL_PATH"; then in_path=" in $FTGL_PATH" @@ -47138,9 +47376,9 @@ _ACEOF if test "$with_magic" != "no"; then - case "$with_magic" in - yes | "" ) ;; - * ) MAGIC_PATH=$with_magic ;; + case "$MAGIC_PATH:$with_magic" in + *:yes | *: | $with_magic* ) ;; + * ) MAGIC_PATH=$with_magic ;; esac if test "$MAGIC_PATH" != /usr -a -d "$MAGIC_PATH"; then in_path=" in $MAGIC_PATH" @@ -47378,9 +47616,9 @@ ncbi_rp_L_flags= fi if test "$with_mimetic" != "no"; then - case "$with_mimetic" in - yes | "" ) ;; - * ) MIMETIC_PATH=$with_mimetic ;; + case "$MIMETIC_PATH:$with_mimetic" in + *:yes | *: | $with_mimetic* ) ;; + * ) MIMETIC_PATH=$with_mimetic ;; esac if test "$MIMETIC_PATH" != /usr -a -d "$MIMETIC_PATH"; then in_path=" in $MIMETIC_PATH" @@ -47716,9 +47954,9 @@ fi if test "$with_gsoap" != "no"; then - case "$with_gsoap" in - yes | "" ) ;; - * ) GSOAP_PATH=$with_gsoap ;; + case "$GSOAP_PATH:$with_gsoap" in + *:yes | *: | $with_gsoap* ) ;; + * ) GSOAP_PATH=$with_gsoap ;; esac if test "$GSOAP_PATH" != /usr -a -d "$GSOAP_PATH"; then in_path=" in $GSOAP_PATH" @@ -47942,9 +48180,9 @@ if test "$with_avro" != no; then fi fi if test "$with_avro" != "no"; then - case "$with_avro" in - yes | "" ) ;; - * ) AVRO_PATH=$with_avro ;; + case "$AVRO_PATH:$with_avro" in + *:yes | *: | $with_avro* ) ;; + * ) AVRO_PATH=$with_avro ;; esac if test "$AVRO_PATH" != /usr -a -d "$AVRO_PATH"; then in_path=" in $AVRO_PATH" @@ -48274,9 +48512,9 @@ fi # SASL 2 if test "$with_sasl2" != "no"; then - case "$with_sasl2" in - yes | "" ) ;; - * ) SASL2_PATH=$with_sasl2 ;; + case "$SASL2_PATH:$with_sasl2" in + *:yes | *: | $with_sasl2* ) ;; + * ) SASL2_PATH=$with_sasl2 ;; esac if test "$SASL2_PATH" != /usr -a -d "$SASL2_PATH"; then in_path=" in $SASL2_PATH" @@ -48508,9 +48746,9 @@ ncbi_rp_L_flags= MONGODB_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" fi if test "$with_mongodb" != "no"; then - case "$with_mongodb" in - yes | "" ) ;; - * ) MONGODB_PATH=$with_mongodb ;; + case "$MONGODB_PATH:$with_mongodb" in + *:yes | *: | $with_mongodb* ) ;; + * ) MONGODB_PATH=$with_mongodb ;; esac if test "$MONGODB_PATH" != /usr -a -d "$MONGODB_PATH"; then in_path=" in $MONGODB_PATH" @@ -48752,9 +48990,9 @@ if $m3pc --exists >/dev/null 2>&1; then fi fi if test "$with_mongodb3" != "no"; then - case "$with_mongodb3" in - yes | "" ) ;; - * ) MONGODB3_PATH=$with_mongodb3 ;; + case "$MONGODB3_PATH:$with_mongodb3" in + *:yes | *: | $with_mongodb3* ) ;; + * ) MONGODB3_PATH=$with_mongodb3 ;; esac if test "$MONGODB3_PATH" != /usr -a -d "$MONGODB3_PATH"; then in_path=" in $MONGODB3_PATH" @@ -48928,7 +49166,9 @@ _ACEOF if test -n "$MONGODB3_LIBS"; then MONGODB3_INCLUDE=$MONGODB3_FULL_INCLUDE MONGODB3_LIBS=$MONGODB3_FULL_LIBS - if test -f $MONGODB3_LIBDIR/libmongocxx-static.a; then + if $m3pc-static --exists >/dev/null 2>&1; then + MONGODB3_STATIC_LIBS=`$m3pc-static --libs --static` + elif test -f $MONGODB3_LIBDIR/libmongocxx-static.a; then MONGODB3_STATIC_LIBS=`echo "$MONGODB3_LIBS" | \ sed -e 's/-lmongo[^ ]*/&-static/g; s/-lbson[^ ]*/&-static/g' \ -e 's/\(-l[^ ]*-static[^ ]*\)-static/\1/g'` @@ -49014,9 +49254,9 @@ ncbi_rp_L_flags= GMOCK_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" fi if test "$with_gmock" != "no"; then - case "$with_gmock" in - yes | "" ) ;; - * ) GMOCK_PATH=$with_gmock ;; + case "$GMOCK_PATH:$with_gmock" in + *:yes | *: | $with_gmock* ) ;; + * ) GMOCK_PATH=$with_gmock ;; esac if test "$GMOCK_PATH" != /usr -a -d "$GMOCK_PATH"; then in_path=" in $GMOCK_PATH" @@ -49487,9 +49727,9 @@ fi fi if test "$with_lapack" != "no"; then - case "$with_lapack" in - yes | "" ) ;; - * ) LAPACK_PATH=$with_lapack ;; + case "$LAPACK_PATH:$with_lapack" in + *:yes | *: | $with_lapack* ) ;; + * ) LAPACK_PATH=$with_lapack ;; esac if test "$LAPACK_PATH" != /usr -a -d "$LAPACK_PATH"; then in_path=" in $LAPACK_PATH" @@ -49663,9 +49903,9 @@ _ACEOF # LMDB if test "$with_lmdb" != "no"; then - case "$with_lmdb" in - yes | "" ) ;; - * ) LMDB_PATH=$with_lmdb ;; + case "$LMDB_PATH:$with_lmdb" in + *:yes | *: | $with_lmdb* ) ;; + * ) LMDB_PATH=$with_lmdb ;; esac if test "$LMDB_PATH" != /usr -a -d "$LMDB_PATH"; then in_path=" in $LMDB_PATH" @@ -49941,9 +50181,9 @@ ncbi_rp_L_flags= LIBUV_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" fi if test "$with_libuv" != "no"; then - case "$with_libuv" in - yes | "" ) ;; - * ) LIBUV_PATH=$with_libuv ;; + case "$LIBUV_PATH:$with_libuv" in + *:yes | *: | $with_libuv* ) ;; + * ) LIBUV_PATH=$with_libuv ;; esac if test "$LIBUV_PATH" != /usr -a -d "$LIBUV_PATH"; then in_path=" in $LIBUV_PATH" @@ -50121,6 +50361,10 @@ else fi # libssh2 +case "$with_libssh2" in + yes | no | '' ) ;; + * ) LIBSSH2_PATH=$with_libssh2 ;; +esac if test -d "$LIBSSH2_PATH"; then ncbi_fix_dir_tmp=`if cd $LIBSSH2_PATH; then $as_unset PWD || test "${PWD+set}" != set || { PWD=; export PWD; }; /bin/pwd; fi` case "$ncbi_fix_dir_tmp" in @@ -50192,9 +50436,9 @@ ncbi_rp_L_flags= LIBSSH2_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" fi if test "$with_libssh2" != "no"; then - case "$with_libssh2" in - yes | "" ) ;; - * ) LIBSSH2_PATH=$with_libssh2 ;; + case "$LIBSSH2_PATH:$with_libssh2" in + *:yes | *: | $with_libssh2* ) ;; + * ) LIBSSH2_PATH=$with_libssh2 ;; esac if test "$LIBSSH2_PATH" != /usr -a -d "$LIBSSH2_PATH"; then in_path=" in $LIBSSH2_PATH" @@ -50372,6 +50616,10 @@ else fi # Datastax Cassandra driver +case "$with_cassandra" in + yes | no | '' ) ;; + * ) CASSANDRA_PATH=$with_cassandra ;; +esac if test -d "$CASSANDRA_PATH"; then ncbi_fix_dir_tmp=`if cd $CASSANDRA_PATH; then $as_unset PWD || test "${PWD+set}" != set || { PWD=; export PWD; }; /bin/pwd; fi` case "$ncbi_fix_dir_tmp" in @@ -50412,9 +50660,9 @@ if test -d "$CASSANDRA_PATH"; then done fi if test "$with_cassandra" != "no"; then - case "$with_cassandra" in - yes | "" ) ;; - * ) CASSANDRA_PATH=$with_cassandra ;; + case "$CASSANDRA_PATH:$with_cassandra" in + *:yes | *: | $with_cassandra* ) ;; + * ) CASSANDRA_PATH=$with_cassandra ;; esac if test "$CASSANDRA_PATH" != /usr -a -d "$CASSANDRA_PATH"; then in_path=" in $CASSANDRA_PATH" @@ -50593,6 +50841,10 @@ else fi # HTTP/2 libraries +case "$with_nghttp2" in + yes | no | '' ) ;; + * ) NGHTTP2_PATH=$with_nghttp2 ;; +esac if test -d "$NGHTTP2_PATH"; then ncbi_fix_dir_tmp=`if cd $NGHTTP2_PATH; then $as_unset PWD || test "${PWD+set}" != set || { PWD=; export PWD; }; /bin/pwd; fi` case "$ncbi_fix_dir_tmp" in @@ -50663,9 +50915,9 @@ ncbi_rp_L_flags= NGHTTP2_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" fi if test "$with_nghttp2" != "no"; then - case "$with_nghttp2" in - yes | "" ) ;; - * ) NGHTTP2_PATH=$with_nghttp2 ;; + case "$NGHTTP2_PATH:$with_nghttp2" in + *:yes | *: | $with_nghttp2* ) ;; + * ) NGHTTP2_PATH=$with_nghttp2 ;; esac if test "$NGHTTP2_PATH" != /usr -a -d "$NGHTTP2_PATH"; then in_path=" in $NGHTTP2_PATH" @@ -50843,6 +51095,10 @@ else NGHTTP2_STATIC_LIBS=$NGHTTP2_LIBS fi +case "$with_h2o" in + yes | no | '' ) ;; + * ) H2O_PATH=$with_h2o ;; +esac if test -d "$H2O_PATH"; then ncbi_fix_dir_tmp=`if cd $H2O_PATH; then $as_unset PWD || test "${PWD+set}" != set || { PWD=; export PWD; }; /bin/pwd; fi` case "$ncbi_fix_dir_tmp" in @@ -50883,9 +51139,9 @@ if test -d "$H2O_PATH"; then done fi if test "$with_h2o" != "no"; then - case "$with_h2o" in - yes | "" ) ;; - * ) H2O_PATH=$with_h2o ;; + case "$H2O_PATH:$with_h2o" in + *:yes | *: | $with_h2o* ) ;; + * ) H2O_PATH=$with_h2o ;; esac if test "$H2O_PATH" != /usr -a -d "$H2O_PATH"; then in_path=" in $H2O_PATH" @@ -51064,9 +51320,9 @@ else fi if test "$with_libxlsxwriter" != "no"; then - case "$with_libxlsxwriter" in - yes | "" ) ;; - * ) LIBXLSXWRITER_PATH=$with_libxlsxwriter ;; + case "$LIBXLSXWRITER_PATH:$with_libxlsxwriter" in + *:yes | *: | $with_libxlsxwriter* ) ;; + * ) LIBXLSXWRITER_PATH=$with_libxlsxwriter ;; esac if test "$LIBXLSXWRITER_PATH" != /usr -a -d "$LIBXLSXWRITER_PATH"; then in_path=" in $LIBXLSXWRITER_PATH" @@ -51244,6 +51500,10 @@ else LIBXLSXWRITER_STATIC_LIBS=$LIBXLSXWRITER_LIBS fi +case "$with_grpc" in + yes | no | '' ) ;; + * ) GRPC_PATH=$with_grpc ;; +esac if test -d "$GRPC_PATH"; then ncbi_fix_dir_tmp=`if cd $GRPC_PATH; then $as_unset PWD || test "${PWD+set}" != set || { PWD=; export PWD; }; /bin/pwd; fi` case "$ncbi_fix_dir_tmp" in @@ -51287,9 +51547,9 @@ fi : ${PROTOBUF_PATH=$GRPC_PATH} if test "$with_protobuf" != "no"; then - case "$with_protobuf" in - yes | "" ) ;; - * ) PROTOBUF_PATH=$with_protobuf ;; + case "$PROTOBUF_PATH:$with_protobuf" in + *:yes | *: | $with_protobuf* ) ;; + * ) PROTOBUF_PATH=$with_protobuf ;; esac if test "$PROTOBUF_PATH" != /usr -a -d "$PROTOBUF_PATH"; then in_path=" in $PROTOBUF_PATH" @@ -51477,9 +51737,9 @@ else fi if test "$with_grpc" != "no"; then - case "$with_grpc" in - yes | "" ) ;; - * ) GRPC_PATH=$with_grpc ;; + case "$GRPC_PATH:$with_grpc" in + *:yes | *: | $with_grpc* ) ;; + * ) GRPC_PATH=$with_grpc ;; esac if test "$GRPC_PATH" != /usr -a -d "$GRPC_PATH"; then in_path=" in $GRPC_PATH" @@ -51826,6 +52086,10 @@ echo "$as_me: error: --with-msgsl explicitly specified, but no usable version fo fi fi +case "$with_aws_sdk" in + yes | no | '' ) ;; + * ) AWS_SDK_PATH=$with_aws_sdk ;; +esac if test -d "$AWS_SDK_PATH"; then ncbi_fix_dir_tmp=`if cd $AWS_SDK_PATH; then $as_unset PWD || test "${PWD+set}" != set || { PWD=; export PWD; }; /bin/pwd; fi` case "$ncbi_fix_dir_tmp" in @@ -51866,9 +52130,9 @@ if test -d "$AWS_SDK_PATH"; then done fi if test "$with_aws_sdk" != "no"; then - case "$with_aws_sdk" in - yes | "" ) ;; - * ) AWS_SDK_PATH=$with_aws_sdk ;; + case "$AWS_SDK_PATH:$with_aws_sdk" in + *:yes | *: | $with_aws_sdk* ) ;; + * ) AWS_SDK_PATH=$with_aws_sdk ;; esac if test "$AWS_SDK_PATH" != /usr -a -d "$AWS_SDK_PATH"; then in_path=" in $AWS_SDK_PATH" @@ -52504,7 +52768,7 @@ for x in ChaosMonkey Int8GI StrictGI GCC KCC ICC VisualAge CompaqCompiler Cray W ;; esac done - for x in UUID FUSE Iconv LIBUNWIND LIBDW BACKWARD_CPP Z LocalZ BZ2 LocalBZ2 LZO PCRE LocalPCRE MBEDTLS GMP GCRYPT NETTLE GNUTLS OPENSSL KRB5 CURL Sybase DBLib FreeTDS MySQL BerkeleyDB BerkeleyDB++ ODBC PYTHON PYTHON25 PYTHON26 PYTHON27 PYTHON3 PERL Boost.Chrono Boost.Filesystem Boost.Iostreams Boost.Program-Options Boost.Regex Boost.Spirit Boost.System Boost.Test Boost.Test.Included Boost.Thread C-Toolkit OpenGL MESA GLUT GLEW wxWidgets wx2.8 Fast-CGI LocalSSS LocalMSGMAIL2 SSSUTILS LocalNCBILS NCBILS2 SSSDB SP ORBacus ICU EXPAT SABLOT LIBXML LIBXSLT LIBEXSLT Xerces Xalan Zorba SQLITE3 SQLITE3ASYNC VDB OECHEM SGE MUPARSER HDF5 JPEG PNG TIFF GIF UNGIF XPM FreeType FTGL MAGIC MIMETIC GSOAP AVRO Cereal SASL2 MONGODB MONGODB3 GMOCK LAPACK LMDB LocalLMDB LIBUV LIBSSH2 CASSANDRA NGHTTP2 H2O LIBXLSXWRITER PROTOBUF GRPC MSGSL AWS_SDK; do + for x in UUID FUSE Iconv LIBUNWIND LIBDW BACKWARD_CPP Z LocalZ BZ2 LocalBZ2 LZO PCRE LocalPCRE MBEDTLS GMP GCRYPT NETTLE GNUTLS OPENSSL KRB5 CURL Sybase DBLib FreeTDS MySQL BerkeleyDB BerkeleyDB++ ODBC PYTHON PYTHON25 PYTHON26 PYTHON27 PYTHON3 PERL Boost.Chrono Boost.Filesystem Boost.Iostreams Boost.Program-Options Boost.Regex Boost.Spirit Boost.System Boost.Test Boost.Test.Included Boost.Thread C-Toolkit OpenGL MESA GLUT GLEW wxWidgets wx2.8 Fast-CGI LocalSSS LocalMSGMAIL2 SSSUTILS LocalNCBILS NCBILS2 SSSDB SP ORBacus ICU EXPAT SABLOT LIBXML LIBXSLT LIBEXSLT Xerces Xalan Zorba SQLITE3 SQLITE3ASYNC VDB OECHEM SGE MUPARSER HDF5 JPEG PNG TIFF GIF UNGIF XPM GL2PS FreeType FTGL MAGIC MIMETIC GSOAP AVRO Cereal SASL2 MONGODB MONGODB3 GMOCK LAPACK LMDB LocalLMDB LIBUV LIBSSH2 CASSANDRA NGHTTP2 H2O LIBXLSXWRITER PROTOBUF GRPC MSGSL AWS_SDK; do case " $WithPackages " in *" $x "*) ;; *) WithoutPackages="$WithoutPackages$WithoutPackagesSep$x" @@ -52901,6 +53165,8 @@ c_ncbi_runpath=`echo "$ncbi_runpath" | sed -e 's:\\$\\$:\\$:g'` + + @@ -53880,6 +54146,8 @@ UNGIF_INCLUDE!$UNGIF_INCLUDE$ac_delim UNGIF_LIBS!$UNGIF_LIBS$ac_delim XPM_INCLUDE!$XPM_INCLUDE$ac_delim XPM_LIBS!$XPM_LIBS$ac_delim +GL2PS_INCLUDE!$GL2PS_INCLUDE$ac_delim +GL2PS_LIBS!$GL2PS_LIBS$ac_delim freetype_config!$freetype_config$ac_delim FTGL_INCLUDE!$FTGL_INCLUDE$ac_delim FTGL_LIBS!$FTGL_LIBS$ac_delim @@ -53952,8 +54220,6 @@ GCCPCH!$GCCPCH$ac_delim RUNPATH_ORIGIN!$RUNPATH_ORIGIN$ac_delim NO_STRICT_ALIASING!$NO_STRICT_ALIASING$ac_delim D_SFX!$D_SFX$ac_delim -DEBUG_SFX!$DEBUG_SFX$ac_delim -LIB_OR_DLL!$LIB_OR_DLL$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -53995,6 +54261,8 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +DEBUG_SFX!$DEBUG_SFX$ac_delim +LIB_OR_DLL!$LIB_OR_DLL$ac_delim FORCE_STATIC_LIB!$FORCE_STATIC_LIB$ac_delim APP_LIB_SETTING!$APP_LIB_SETTING$ac_delim APP_LIBS_SETTING!$APP_LIBS_SETTING$ac_delim @@ -54090,8 +54358,6 @@ MYSQL_LIBS!$MYSQL_LIBS$ac_delim BERKELEYDB_INCLUDE!$BERKELEYDB_INCLUDE$ac_delim BERKELEYDB_LIBS!$BERKELEYDB_LIBS$ac_delim BERKELEYDB_STATIC_LIBS!$BERKELEYDB_STATIC_LIBS$ac_delim -BERKELEYDB_CXX_LIBS!$BERKELEYDB_CXX_LIBS$ac_delim -BERKELEYDB_CXX_STATIC_LIBS!$BERKELEYDB_CXX_STATIC_LIBS$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -54133,6 +54399,8 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +BERKELEYDB_CXX_LIBS!$BERKELEYDB_CXX_LIBS$ac_delim +BERKELEYDB_CXX_STATIC_LIBS!$BERKELEYDB_CXX_STATIC_LIBS$ac_delim ODBC_INCLUDE!$ODBC_INCLUDE$ac_delim ODBC_LIBS!$ODBC_LIBS$ac_delim BOOST_INCLUDE!$BOOST_INCLUDE$ac_delim @@ -54228,8 +54496,6 @@ SQLITE3_STATIC_LIBS!$SQLITE3_STATIC_LIBS$ac_delim FREETYPE_INCLUDE!$FREETYPE_INCLUDE$ac_delim FREETYPE_LIBS!$FREETYPE_LIBS$ac_delim GSOAP_PATH!$GSOAP_PATH$ac_delim -AVRO_STATIC_LIBS!$AVRO_STATIC_LIBS$ac_delim -CEREAL_INCLUDE!$CEREAL_INCLUDE$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -54271,6 +54537,8 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +AVRO_STATIC_LIBS!$AVRO_STATIC_LIBS$ac_delim +CEREAL_INCLUDE!$CEREAL_INCLUDE$ac_delim MONGODB_STATIC_LIBS!$MONGODB_STATIC_LIBS$ac_delim MONGODB3_STATIC_LIBS!$MONGODB3_STATIC_LIBS$ac_delim LMDB_LIB!$LMDB_LIB$ac_delim @@ -54332,7 +54600,7 @@ LIBOBJS!$LIBOBJS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 59; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 61; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 diff --git a/c++/src/build-system/configure.ac b/c++/src/build-system/configure.ac index dae752e9..d60fde44 100644 --- a/c++/src/build-system/configure.ac +++ b/c++/src/build-system/configure.ac @@ -1,5 +1,5 @@ ############################################################################# -# $Id: configure.ac 574432 2018-11-13 14:37:02Z ivanov $ +# $Id: configure.ac 581628 2019-03-04 16:49:08Z ivanov $ # Derived from configure.in version 1.173. # ========================================================================== # @@ -67,7 +67,7 @@ case "$with_3psw" in m4_foreach(X, [sss, sssutils, sssdb, vdb, libunwind, z, bz2, lzo, pcre, mbedtls, gmp, gcrypt, nettle, gnutls, openssl, krb5, boost, lmdb, - sybase, ftds, mysql, opengl, mesa, glut, glew, + sybase, ftds, mysql, opengl, mesa, glut, glew, gl2ps, wxwidgets, freetype, ftgl, fastcgi, bdb, orbacus, odbc, python, perl, jni, sqlite3, mimetic, sge, icu, sp, expat, sablot, libxml, libxslt, libexslt, xerces, xalan, zorba, @@ -341,6 +341,10 @@ AC_ARG_WITH(glew, [ --with-glew=DIR use GLEW installation in DIR]) AC_ARG_WITH(glew-mx, [ --with-glew-mx insist on a multi-context-ready GLEW installation]) +AC_ARG_WITH(gl2ps, + [ --without-gl2ps do not use GL2PS]) +AC_ARG_WITH(gl2ps, + [ --with-gl2ps=DIR use GL2PS installation in DIR]) AC_ARG_WITH(wxwidgets, [ --without-wxwidgets do not use wxWidgets (2.6+)]) AC_ARG_WITH(wxwidgets, @@ -617,7 +621,7 @@ geo included-geo vdb downloaded-vdb static-vdb libunwind libdw backward-cpp \ backward-cpp-sig \ z bz2 lzo pcre mbedtls gmp gcrypt nettle gnutls static-gnutls openssl krb5 \ sybase sybase-local sybase-new ftds mysql \ -orbacus freetype ftgl opengl mesa glut glew glew-mx \ +orbacus freetype ftgl opengl mesa glut glew glew-mx gl2ps \ bdb python perl jni sqlite3 icu boost boost-tag \ sp expat sablot libxml libxslt libexslt xerces xalan zorba \ oechem sge muparser hdf5 \ @@ -684,6 +688,7 @@ for x_arg in "$@" ; do | --with-gnutls=* | --with-openssl=* | --with-krb5=* \ | --with-sybase-local=* | --with-ftds=*/* | --with-mysql=* \ | --with-opengl=* | --with-mesa=* | --with-glut=* | --with-glew=* \ + | --with-gl2ps=* \ | --with-wxwidgets=* | --with-freetype=* | --with-ftgl=* \ | --with-fastcgi=*/* | --with-bdb=*/* | --with-orbacus=* \ | --with-odbc=* | --with-python=* | --with-perl=* | --with-jni=* \ @@ -4465,6 +4470,11 @@ fi NCBI_CHECK_THIRD_PARTY_LIB_EX(lzo, LZO, lzo2, [[AC_LANG_PROGRAM([#include ], [[lzo_uint32 c = lzo_crc32(0, (const unsigned char*)"foo", 3);]])]]) +if test -n "$LZO_LIBS" -a "x$with_bin_release" = xyes \ + -a \( -f "$LZO_PATH/lib$bit64_sfx/liblzo2-static.a" \ + -o -f "$LZO_PATH/lib/liblzo2-static.a" \); then + LZO_LIBS="$LZO_LIBPATH -llzo2-static" +fi if test -z "$PCRE_PATH" && pcre-config --version >/dev/null 2>&1; then p=`pcre-config --prefix` @@ -6102,6 +6112,8 @@ if test "$with_opengl" != "no"; then case "$OSTYPE" in darwin) # Use native interface OPENGL_LIBS="-framework AGL -framework OpenGL" + # ... and its proprietary successor + OPENGL_LIBS="$OPENGL_LIBS -framework Metal -framework MetalKit" ;; # cygwin) ... ;; *) # Default -- assume X-based @@ -6156,6 +6168,7 @@ if test "$with_opengl" = "no" -o "$ncbi_cv_lib_opengl" = "no"; then GLEW_LIBS= GLEW_STATIC_LIBS= NCBI_MISSING_PACKAGE(opengl) + NCBI_MISSING_PACKAGE(gl2ps) else NCBI_PACKAGE(OpenGL) AC_DEFINE(HAVE_OPENGL, 1, [Define to 1 if you have OpenGL (-lGL).]) @@ -7481,6 +7494,26 @@ NCBI_CHECK_THIRD_PARTY_LIB(Xpm, $X_CFLAGS) # The use of X_CFLAGS is probably redundant, but shouldn't hurt. +if test "$with_gl2ps" != "no"; then + if test "${with_gl2ps-yes}" != "yes"; then + GL2PS_PATH=$with_gl2ps + fi + if test -d "$GL2PS_PATH"; then + if test -d "$GL2PS_PATH/$compiler_vpfx$DEBUG_SFX$bit64_sfx"; then + GL2PS_PATH=$GL2PS_PATH/$compiler_vpfx$DEBUG_SFX$bit64_sfx + elif test -d "$GL2PS_PATH/$compiler_pfx$DEBUG_SFX$bit64_sfx"; then + GL2PS_PATH=$GL2PS_PATH/$compiler_pfx$DEBUG_SFX$bit64_sfx + elif test -d "$GL2PS_PATH/$DEBUG_SFX$bit64_sfx"; then + GL2PS_PATH=$GL2PS_PATH/$DEBUG_SFX$bit64_sfx + fi + NCBI_FIX_DIR(GL2PS_PATH) + fi + NCBI_CHECK_THIRD_PARTY_LIB(gl2ps, + AC_LANG_PROGRAM([#include ], + [gl2psSpecial(GL2PS_SVG, "foo")]), + [], [$OPENGL_LIBS $PNG_LIBS $Z_LIBS $X_ALL_LIBS], [$OPENGL_INCLUDE]) +fi + ## FreeType and FTGL if test "$with_freetype" != "no" ; then : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin} @@ -7770,7 +7803,9 @@ NCBI_CHECK_THIRD_PARTY_LIB_EX(mongodb3, MONGODB3, mongocxx, if test -n "$MONGODB3_LIBS"; then MONGODB3_INCLUDE=$MONGODB3_FULL_INCLUDE MONGODB3_LIBS=$MONGODB3_FULL_LIBS - if test -f $MONGODB3_LIBDIR/libmongocxx-static.a; then + if $m3pc-static --exists >/dev/null 2>&1; then + MONGODB3_STATIC_LIBS=`$m3pc-static --libs --static` + elif test -f $MONGODB3_LIBDIR/libmongocxx-static.a; then MONGODB3_STATIC_LIBS=`echo "$MONGODB3_LIBS" | \ [sed -e 's/-lmongo[^ ]*/&-static/g; s/-lbson[^ ]*/&-static/g' \ -e 's/\(-l[^ ]*-static[^ ]*\)-static/\1/g']` @@ -7894,6 +7929,10 @@ else fi # libssh2 +case "$with_libssh2" in + yes | no | '' ) ;; + * ) LIBSSH2_PATH=$with_libssh2 ;; +esac if test -d "$LIBSSH2_PATH"; then NCBI_FIX_DIR(LIBSSH2_PATH) for d in "$LIBSSH2_PATH/$compiler_vpfx$DEBUG_SFX$bit64_sfx/lib" \ @@ -7920,6 +7959,10 @@ else fi # Datastax Cassandra driver +case "$with_cassandra" in + yes | no | '' ) ;; + * ) CASSANDRA_PATH=$with_cassandra ;; +esac if test -d "$CASSANDRA_PATH"; then NCBI_FIX_DIR(CASSANDRA_PATH) for d in "$CASSANDRA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ @@ -7945,6 +7988,10 @@ else fi # HTTP/2 libraries +case "$with_nghttp2" in + yes | no | '' ) ;; + * ) NGHTTP2_PATH=$with_nghttp2 ;; +esac if test -d "$NGHTTP2_PATH"; then NCBI_FIX_DIR(NGHTTP2_PATH) for d in "$NGHTTP2_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ @@ -7969,6 +8016,10 @@ else NGHTTP2_STATIC_LIBS=$NGHTTP2_LIBS fi +case "$with_h2o" in + yes | no | '' ) ;; + * ) H2O_PATH=$with_h2o ;; +esac if test -d "$H2O_PATH"; then NCBI_FIX_DIR(H2O_PATH) for d in "$H2O_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ @@ -8004,6 +8055,10 @@ else LIBXLSXWRITER_STATIC_LIBS=$LIBXLSXWRITER_LIBS fi +case "$with_grpc" in + yes | no | '' ) ;; + * ) GRPC_PATH=$with_grpc ;; +esac if test -d "$GRPC_PATH"; then NCBI_FIX_DIR(GRPC_PATH) for d in "$GRPC_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ @@ -8074,6 +8129,10 @@ else NCBI_MISSING_PACKAGE(msgsl) fi +case "$with_aws_sdk" in + yes | no | '' ) ;; + * ) AWS_SDK_PATH=$with_aws_sdk ;; +esac if test -d "$AWS_SDK_PATH"; then NCBI_FIX_DIR(AWS_SDK_PATH) for d in "$AWS_SDK_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ @@ -8772,6 +8831,8 @@ AC_SUBST(GLUT_LIBS) AC_SUBST(GLEW_INCLUDE) AC_SUBST(GLEW_LIBS) AC_SUBST(GLEW_STATIC_LIBS) +AC_SUBST(GL2PS_INCLUDE) +AC_SUBST(GL2PS_LIBS) AC_SUBST(WXWIDGETS_INCLUDE) AC_SUBST(WXWIDGETS_LIBS) AC_SUBST(WXWIDGETS_STATIC_LIBS) diff --git a/c++/src/build-system/install.sh.in b/c++/src/build-system/install.sh.in index f0d0e50b..bfbd4c7d 100644 --- a/c++/src/build-system/install.sh.in +++ b/c++/src/build-system/install.sh.in @@ -17,7 +17,7 @@ echo "[`date`]" -svn_location=`echo '$HeadURL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.8.1/c++/src/build-system/install.sh.in $' | sed "s%\\$[H]eadURL: *\\([^$][^$]*\\) \\$.*%\\1%"` +svn_location=`echo '$HeadURL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.9.0/c++/src/build-system/install.sh.in $' | sed "s%\\$[H]eadURL: *\\([^$][^$]*\\) \\$.*%\\1%"` svn_revision=`echo '$Revision: 541872 $' | sed "s%\\$[R]evision: *\\([^$][^$]*\\) \\$.*%\\1%"` script_name=`basename $0` diff --git a/c++/src/build-system/ncbi_package_version b/c++/src/build-system/ncbi_package_version index dbe59006..c8e38b61 100644 --- a/c++/src/build-system/ncbi_package_version +++ b/c++/src/build-system/ncbi_package_version @@ -1 +1 @@ -2.8.1 +2.9.0 diff --git a/c++/src/build-system/new_module.sh.in b/c++/src/build-system/new_module.sh.in index ab12d8c8..2c191310 100644 --- a/c++/src/build-system/new_module.sh.in +++ b/c++/src/build-system/new_module.sh.in @@ -1,6 +1,6 @@ @script_shell@ # -# $Id: new_module.sh.in 568728 2018-08-09 18:37:22Z ucko $ +# $Id: new_module.sh.in 581619 2019-03-04 16:45:46Z ivanov $ # Authors: Eugene Vasilchenko, NCBI; Aaron Ucko, NCBI top_srcdir="@abs_top_srcdir@" @@ -187,9 +187,9 @@ else # elif test -n "$NCBI"; then fi if test -z "$datatool"; then # keep looking - if test "`echo $bindir/*`" = "$bindir/*"; then - # empty or non-existent (--without-execopy?) - bindir=$build_root/build/serial/datatool + alt_bindir=$build_root/build/serial/datatool + if test -x $alt_bindir/datatool -a \! -x $bindir/datatool; then + bindir=$alt_bindir fi if test -x "$bindir/datatool"; then diff --git a/c++/src/build-system/project_tree_builder.ini b/c++/src/build-system/project_tree_builder.ini index d0434ae5..c8eaba1f 100644 --- a/c++/src/build-system/project_tree_builder.ini +++ b/c++/src/build-system/project_tree_builder.ini @@ -1,4 +1,4 @@ -# $Id: project_tree_builder.ini 573508 2018-10-29 17:38:56Z ivanov $ +# $Id: project_tree_builder.ini 581102 2019-02-25 19:12:59Z ivanov $ ############################################################################### @@ -23,7 +23,7 @@ XCode_UserInc = /usr/include #---------------------------------------------------------------------------- # Location of NCBI C toolkit # Macros used here should be defined either in this section, or in [msvcNNN] -ThirdParty_C_ncbi = \\\\snowman\\win-coremake\\Lib\\Ncbi\\C\\$(msvc_3rd)\\c.current +ThirdParty_C_ncbi = \\\\snowman\\win-coremake\\Lib\\Ncbi\\CXX\\$(msvc_3rd)\\cxx.SC-22-TRIAL\\C_TOOLKIT #---------------------------------------------------------------------------- # Location of custom code generators @@ -105,6 +105,7 @@ Defines = HAVE_BERKELEY_DB \ HAVE_LIBCONNEXT \ HAVE_LIBFASTCGI \ HAVE_LIBGIF \ + HAVE_LIBGL2PS \ HAVE_LIBGLEW \ HAVE_LIBGNUTLS \ HAVE_LIBJPEG \ @@ -208,6 +209,7 @@ ThirdParty_FreeType = $(ThirdPartyBasePath)\\freetype\\$(msvc_3rd)\\2.4.10 ThirdParty_FTGL = $(ThirdPartyBasePath)\\ftgl\\$(msvc_3rd)\\2.1.3-rc5 ThirdParty_GIF = $(ThirdPartyBasePath)\\gif\\$(msvc_3rd)\\4.1.3 ThirdParty_GLEW = $(ThirdPartyBasePath)\\glew\\$(msvc_3rd)\\1.5.8 +ThirdParty_GL2PS = $(ThirdPartyBasePath)\\gl2ps\\$(msvc_3rd)\\1.4.0 ThirdParty_GNUTLS = $(ThirdPartyBasePath)\\gnutls\\$(msvc_3rd)\\3.4.9 ThirdParty_ICU = $(ThirdPartyBasePath)\\icu\\$(msvc_3rd)\\3.2 ThirdParty_JPEG = $(ThirdPartyBasePath)\\jpeg\\$(msvc_3rd)\\6b @@ -233,7 +235,7 @@ ThirdParty_XML = $(ThirdPartyBasePath)\\xml\\$(msvc_3rd)\\2.7.8 ThirdParty_XSLT = $(ThirdPartyBasePath)\\xslt\\$(msvc_3rd)\\1.1.26 ThirdParty_Z = $(ThirdPartyBasePath)\\z\\$(msvc_3rd)\\1.2.8 ThirdParty_JDK = $(ThirdPartyBasePath)\\jdk\\1.6.0_25 -ThirdParty_VDB = $(ThirdPartyVDBBasePath)\\vdb\\vdb-versions\\2.9.3 +ThirdParty_VDB = $(ThirdPartyVDBBasePath)\\vdb\\vdb-versions\\2.9.4 ThirdParty_GRPC = $(ThirdPartyBasePath)\\grpc\\$(msvc_3rd)\\1.14.1 PYTHON_PATH = $(ThirdPartyAppsBasePath)\\Python252\\$(msvc_3rd) @@ -401,6 +403,7 @@ ThirdParty_FreeType = $(ThirdPartyBasePath)\\freetype\\vs2017.64\\2.4.10 ThirdParty_FTGL = $(ThirdPartyBasePath)\\ftgl\\vs2017.64\\2.1.3-rc5 ThirdParty_GIF = $(ThirdPartyBasePath)\\gif\\vs2017.64\\4.1.3 ThirdParty_GLEW = $(ThirdPartyBasePath)\\glew\\vs2017.64\\1.5.8 +ThirdParty_GL2PS = $(ThirdPartyBasePath)\\gl2ps\\vs2017.64\\1.4.0 ThirdParty_GNUTLS = $(ThirdPartyBasePath)\\gnutls\\vs2017.64\\3.4.9 ThirdParty_JPEG = $(ThirdPartyBasePath)\\jpeg\\vs2017.64\\9c ThirdParty_LZO = $(ThirdPartyBasePath)\\lzo\\vs2017.64\\2.10 @@ -423,7 +426,7 @@ ThirdParty_XML = $(ThirdPartyBasePath)\\xml\\vs2017.64\\2.7.8 ThirdParty_XSLT = $(ThirdPartyBasePath)\\xslt\\vs2017.64\\1.1.26 ThirdParty_Z = $(ThirdPartyBasePath)\\z\\vs2017.64\\1.2.11 ThirdParty_JDK = $(ThirdPartyBasePath)\\jdk\\1.6.0_25 -ThirdParty_VDB = $(ThirdPartyVDBBasePath)\\vdb\\vdb-versions\\2.9.3 +ThirdParty_VDB = $(ThirdPartyVDBBasePath)\\vdb\\vdb-versions\\2.9.4 #ThirdParty_GLEW = $(ThirdPartyBasePath)\\glew\\vs2017.64\\2.1.0 #ThirdParty_ICU = $(ThirdPartyBasePath)\\icu\\vs2017.64\\3.2 #ThirdParty_MongoDB = $(ThirdPartyBasePath)\\mongodb\\vs2017.64\\legacy_1.0.0-rc0 @@ -475,10 +478,11 @@ ThirdParty_SQLITE3 = $(XCode_ThirdPartyBasePath)/sqlite-3.8.10.1-ncbi1 ThirdParty_XML = $(XCode_ThirdPartyBasePath)/libxml-2.7.8 ThirdParty_XSLT = $(XCode_ThirdPartyBasePath)/libxml-2.7.8 ThirdParty_GLEW = $(XCode_ThirdPartyBasePath)/glew-1.5.8 +ThirdParty_GL2PS = $(XCode_ThirdPartyBasePath)/gl2ps-1.4.0 ThirdParty_wxWidgets = $(XCode_ThirdPartyBasePath)/wxWidgets-3.1.0-ncbi2 ThirdParty_FreeType = /opt/X11 ThirdParty_FTGL = $(XCode_ThirdPartyBasePath)/ftgl-2.1.3-rc5 -ThirdParty_VDB = $(XCode_ThirdPartyVDBBasePath)/vdb/vdb-versions/2.9.3 +ThirdParty_VDB = $(XCode_ThirdPartyVDBBasePath)/vdb/vdb-versions/2.9.4 ThirdParty_GMP = $(Xcode_ThirdPartyBasePath)/gmp-6.0.0a ThirdParty_Nettle = $(Xcode_ThirdPartyBasePath)/nettle-3.1.1 ThirdParty_GNUTLS = $(Xcode_ThirdPartyBasePath)/gnutls-3.4.0 @@ -575,6 +579,8 @@ FTDS_LIBS = FASTCGI_INCLUDE = $(ThirdParty_fastcgi)/include FREETYPE_INCLUDE = $(ThirdParty_FreeType)/include FTGL_INCLUDE = $(ThirdParty_FTGL)/include +GLEW_INCLUDE = $(ThirdParty_GLEW)/include +GL2PS_INCLUDE = $(ThirdParty_GL2PS)/include GNUTLS_INCLUDE = $(ThirdParty_GNUTLS)/include LIBUV_INCLUDE = $(ThirdParty_UV)/include LIBXML_INCLUDE = $(ThirdParty_XML)/include @@ -869,18 +875,18 @@ DEFINES = __WXMSW__,NCBI_WXWIN_USE_PCH,WXUSINGDLL=1 LIBPATH = $(ThirdParty_wxWidgets)u\\lib_dll\\releasedll [wxWidgets.xcode] -LIB = -lwx_osx_cocoa_gl-3.0 -lwx_osx_cocoa_richtext-3.0 -lwx_osx_cocoa_aui-3.0 -lwx_osx_cocoa_propgrid-3.0 -lwx_osx_cocoa_xrc-3.0 -lwx_osx_cocoa_qa-3.0 -lwx_osx_cocoa_html-3.0 -lwx_osx_cocoa_adv-3.0 -lwx_osx_cocoa_core-3.0 -lwx_base_xml-3.0 -lwx_base_net-3.0 -lwx_base-3.0 -framework Cocoa +LIB = -lwx_osx_cocoa_gl-3.1 -lwx_osx_cocoa_richtext-3.1 -lwx_osx_cocoa_aui-3.1 -lwx_osx_cocoa_propgrid-3.1 -lwx_osx_cocoa_xrc-3.1 -lwx_osx_cocoa_qa-3.1 -lwx_osx_cocoa_html-3.1 -lwx_osx_cocoa_adv-3.1 -lwx_osx_cocoa_core-3.1 -lwx_base_xml-3.1 -lwx_base_net-3.1 -lwx_base-3.1 -framework Cocoa [wxWidgets.xcode.debug.DebugMT] -INCLUDE = $(ThirdParty_wxWidgets)/Clang-Debug$(sfx64)MT/lib/wx/include/osx_cocoa-ansi-3.0 $(ThirdParty_wxWidgets)/include/wx-3.0 +INCLUDE = $(ThirdParty_wxWidgets)/Clang-Debug$(sfx64)MT/lib/wx/include/osx_cocoa-ansi-3.1 $(ThirdParty_wxWidgets)/include/wx-3.1 LIBPATH = $(ThirdParty_wxWidgets)/Clang-Debug$(sfx64)MT/lib [wxWidgets.xcode.debug.DebugDLL] -INCLUDE = $(ThirdParty_wxWidgets)/Clang-Debug$(sfx64)/lib/wx/include/osx_cocoa-ansi-3.0 $(ThirdParty_wxWidgets)/include/wx-3.0 +INCLUDE = $(ThirdParty_wxWidgets)/Clang-Debug$(sfx64)/lib/wx/include/osx_cocoa-ansi-3.1 $(ThirdParty_wxWidgets)/include/wx-3.1 LIBPATH = $(ThirdParty_wxWidgets)/Clang-Debug$(sfx64)/lib [wxWidgets.xcode.release.ReleaseMT] -INCLUDE = $(ThirdParty_wxWidgets)/Clang-Release$(sfx64)MT/lib/wx/include/osx_cocoa-ansi-3.0 $(ThirdParty_wxWidgets)/include/wx-3.0 +INCLUDE = $(ThirdParty_wxWidgets)/Clang-Release$(sfx64)MT/lib/wx/include/osx_cocoa-ansi-3.1 $(ThirdParty_wxWidgets)/include/wx-3.1 LIBPATH = $(ThirdParty_wxWidgets)/Clang-Release$(sfx64)MT/lib [wxWidgets.xcode.release.ReleaseDLL] -INCLUDE = $(ThirdParty_wxWidgets)/Clang-Release$(sfx64)/lib/wx/include/osx_cocoa-ansi-3.0 $(ThirdParty_wxWidgets)/include/wx-3.0 +INCLUDE = $(ThirdParty_wxWidgets)/Clang-Release$(sfx64)/lib/wx/include/osx_cocoa-ansi-3.1 $(ThirdParty_wxWidgets)/include/wx-3.1 LIBPATH = $(ThirdParty_wxWidgets)/Clang-Release$(sfx64)/lib [wxWidgets.xcode.debug] @@ -969,6 +975,7 @@ DEFINES = GLEW_MX INCLUDE = $(ThirdParty_GLEW)/include LIB = -lGLEW LIBPATH = $(ThirdParty_GLEW)/lib$(sfx64) +DEFINES = GLEW_MX [HAVE_LIBGLEW] Component=GLEW @@ -977,6 +984,31 @@ Component=GLEW Component=GLEW +#---------------------------------------------------------------------------- +[GL2PS] +INCLUDE = $(ThirdParty_GL2PS)\\include +LIB = gl2ps.lib +CONFS = DebugDLL ReleaseDLL +[GL2PS.debug.DebugDLL] +LIBPATH = $(ThirdParty_GL2PS)\\lib_static\\debugdll +[GL2PS.release.ReleaseDLL] +LIBPATH = $(ThirdParty_GL2PS)\\lib_static\\releasedll + +[GL2PS.xcode] +INCLUDE = $(ThirdParty_GL2PS)/include +LIB = -lgl2ps +[GL2PS.xcode.debug] +LIBPATH = $(ThirdParty_GL2PS)/Debug/lib +[GL2PS.xcode.release] +LIBPATH = $(ThirdParty_GL2PS)/Release/lib + +[HAVE_LIBGL2PS] +Component=GL2PS + +[GL2PS_LIBS] +Component=GL2PS + + #---------------------------------------------------------------------------- #[GNUTLS] #INCLUDE = $(ThirdParty_GNUTLS)\\include diff --git a/c++/src/build-system/project_tree_builder/mac_prj_generator.cpp b/c++/src/build-system/project_tree_builder/mac_prj_generator.cpp index 908128ba..cabfe2a5 100644 --- a/c++/src/build-system/project_tree_builder/mac_prj_generator.cpp +++ b/c++/src/build-system/project_tree_builder/mac_prj_generator.cpp @@ -1,4 +1,4 @@ -/* $Id: mac_prj_generator.cpp 570296 2018-09-06 16:51:11Z ivanov $ +/* $Id: mac_prj_generator.cpp 576669 2018-12-19 13:11:04Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -202,7 +202,7 @@ void CMacProjectGenerator::Generate(const string& solution) AddString( *dataspec_dependencies, proj_dependency); } if (prj.m_ProjType != CProjKey::eLib && prj.m_ProjType != CProjKey::eDll && - prj.m_ProjType != CProjKey::eApp && prj.m_ProjType != CProjKey::eDataSpec && + prj.m_ProjType != CProjKey::eApp && //prj.m_ProjType != CProjKey::eDataSpec && prj.m_ProjType != CProjKey::eMsvc) { continue; } @@ -234,6 +234,12 @@ void CMacProjectGenerator::Generate(const string& solution) AddString( *file_groups, CreateProjectFileGroups(prj, prj_files, *dict_objects, *build_files)); + // project custom script phase + string proj_prebuild_script( + CreateProjectCustomScriptPhase(prj, prj_files, *dict_objects, "PreBuild")); + if (!proj_prebuild_script.empty()) { + AddString( *build_phases, proj_prebuild_script); + } // project script phase string proj_script( CreateProjectScriptPhase(prj, prj_files, *dict_objects)); @@ -248,7 +254,7 @@ void CMacProjectGenerator::Generate(const string& solution) } // project custom script phase string proj_cust_script( - CreateProjectCustomScriptPhase(prj, prj_files, *dict_objects)); + CreateProjectCustomScriptPhase(prj, prj_files, *dict_objects, "CustomScript")); if (!proj_cust_script.empty()) { AddString( *build_phases, proj_cust_script); } @@ -301,7 +307,11 @@ void CMacProjectGenerator::Generate(const string& solution) } else if (prj.m_ProjType == CProjKey::eApp) { AddString( *dict_product, "path", prj.m_ID); } else if (prj.m_ProjType == CProjKey::eLib) { - AddString( *dict_product, "path", string("lib") + prj.m_ID + string(".a")); + if (prj.m_IsMetallib) { + AddString( *dict_product, "path", prj.m_ID + string(".metallib")); + } else { + AddString( *dict_product, "path", string("lib") + prj.m_ID + string(".a")); + } } AddString( *dict_product, "sourceTree", "BUILT_PRODUCTS_DIR"); } @@ -622,6 +632,7 @@ string CMacProjectGenerator::CreateProjectScriptPhase( script += "if test $? -ne 0; then\n"; script += "cp -p " + infile + " " + outfile + "\n"; script += "fi\n"; + AddString( *outputs, outfile); } // datatool if (!prj.m_DatatoolSources.empty()) { @@ -657,6 +668,7 @@ string CMacProjectGenerator::CreateProjectScriptPhase( script += "export PTB_PLATFORM=\"$ARCHS\"\n"; if (spec_ext == ".proto") { script += "export GENERATOR_PATH=" + GetApp().GetSite().GetConfigureEntry("XCode_CustomCodeGenerator" + spec_ext) + "\n"; + script += "export PROJECT_REQUIRES=" + NStr::Join(prj.m_Requires,";") + "\n"; } else { script += "export DATATOOL_PATH=" + m_OutputDir + "../static/bin/ReleaseDLL\n"; } @@ -711,14 +723,14 @@ string CMacProjectGenerator::CreateProjectScriptPhase( string CMacProjectGenerator::CreateProjectCustomScriptPhase( const CProjItem& prj, const CProjectFileCollector& prj_files, - CDict& dict_objects) + CDict& dict_objects, const string& section) { SCustomScriptInfo info; - prj_files.GetProjectContext().GetMsvcProjectMakefile().GetCustomScriptInfo(info); + prj_files.GetProjectContext().GetMsvcProjectMakefile().GetCustomScriptInfo(info, section); if (!info.m_Script.empty()) { #if USE_VERBOSE_NAMES - string proj_script( GetProjId( prj) + "_cust_script"); + string proj_script( GetProjId( prj) + section); #else string proj_script( GetUUID()); #endif @@ -748,6 +760,7 @@ string CMacProjectGenerator::CreateProjectCustomScriptPhase( } AddString( *dict_script, "shellPath", info.m_Shell); AddString( *dict_script, "shellScript", + CDirEntry::IsAbsolutePath(info.m_Script) ? info.m_Script : GetRelativePath(CDirEntry::ConcatPath(script_loc,info.m_Script))); AddString( *dict_script, "showEnvVarsInLog", "0"); return proj_script; @@ -1235,7 +1248,8 @@ void CMacProjectGenerator::CreateProjectBuildSettings( AddString( *settings, "OTHER_LDFLAGS", ldlib); } - AddString( *settings, "PRODUCT_NAME", GetTargetName(prj)); +// AddString( *settings, "PRODUCT_NAME", GetTargetName(prj)); + AddString( *settings, "PRODUCT_NAME", prj.m_ID); if (prj.m_ProjType == CProjKey::eDataSpec || prj.m_ProjType == CProjKey::eMsvc) { return; } @@ -1490,6 +1504,8 @@ string CMacProjectGenerator::AddFile(CDict& dict, const string& name, bool style filetype = "text.xml"; } else if (ext == ".jsd") { filetype = "text.json"; + } else if (ext == ".metal") { + filetype = "sourcecode.metal"; } else { filetype = "text"; } @@ -1675,6 +1691,13 @@ string CMacProjectGenerator::GetTargetName( const CProjItem& prj) if (prj.m_ProjType == CProjKey::eLib) { return /*string("lib") +*/ prj.m_ID; } + if (prj.m_ProjType == CProjKey::eApp) { + CProjKey klib(CProjKey::eLib, prj.m_ID), kdll(CProjKey::eDll, prj.m_ID); + if (m_Projects_tree.m_Projects.find(klib) != m_Projects_tree.m_Projects.end() || + m_Projects_tree.m_Projects.find(kdll) != m_Projects_tree.m_Projects.end()) { + return string(prj.m_ID) + ".exe"; + } + } return prj.m_ID; } @@ -1704,7 +1727,11 @@ string CMacProjectGenerator::GetMachOType( const CProjItem& prj) string CMacProjectGenerator::GetProductType( const CProjItem& prj) { if (prj.m_ProjType == CProjKey::eLib) { - return "com.apple.product-type.library.static"; + if (prj.m_IsMetallib) { + return "com.apple.product-type.metal-library"; + } else { + return "com.apple.product-type.library.static"; + } } else if (prj.m_ProjType == CProjKey::eDll) { return "com.apple.product-type.library.dynamic"; } else if (prj.m_ProjType == CProjKey::eApp) { @@ -1718,7 +1745,11 @@ string CMacProjectGenerator::GetProductType( const CProjItem& prj) string CMacProjectGenerator::GetExplicitType( const CProjItem& prj) { if (prj.m_ProjType == CProjKey::eLib) { - return "archive.ar"; + if (prj.m_IsMetallib) { + return "archive.metal-library"; + } else { + return "archive.ar"; + } } else if (prj.m_ProjType == CProjKey::eDll) { return "compiled.mach-o.dylib"; } else if (prj.m_ProjType == CProjKey::eApp) { diff --git a/c++/src/build-system/project_tree_builder/mac_prj_generator.hpp b/c++/src/build-system/project_tree_builder/mac_prj_generator.hpp index e634ceae..656bb2b1 100644 --- a/c++/src/build-system/project_tree_builder/mac_prj_generator.hpp +++ b/c++/src/build-system/project_tree_builder/mac_prj_generator.hpp @@ -1,7 +1,7 @@ #ifndef PROJECT_TREE_BUILDER__MAC_PRJ_GENERATOR__HPP #define PROJECT_TREE_BUILDER__MAC_PRJ_GENERATOR__HPP -/* $Id: mac_prj_generator.hpp 431139 2014-04-01 19:11:21Z ucko $ +/* $Id: mac_prj_generator.hpp 576669 2018-12-19 13:11:04Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -79,7 +79,7 @@ private: CDict& dict_objects); string CreateProjectCustomScriptPhase( const CProjItem& prj, const CProjectFileCollector& prj_files, - CDict& dict_objects); + CDict& dict_objects, const string& section); string CreateProjectCopyBinScript( const CProjItem& prj, const CProjectFileCollector& prj_files, CDict& dict_objects); @@ -149,7 +149,7 @@ private: static string GetProjHeaders( const CProjItem& prj); static string GetProjDependency( const CProjItem& prj); static string GetProjContainer( const CProjItem& prj); - static string GetTargetName( const CProjItem& prj); + string GetTargetName( const CProjItem& prj); static string GetMachOType( const CProjItem& prj); static string GetProductType( const CProjItem& prj); diff --git a/c++/src/build-system/project_tree_builder/msvc_configure.cpp b/c++/src/build-system/project_tree_builder/msvc_configure.cpp index 431cddfc..934866c6 100644 --- a/c++/src/build-system/project_tree_builder/msvc_configure.cpp +++ b/c++/src/build-system/project_tree_builder/msvc_configure.cpp @@ -1,4 +1,4 @@ -/* $Id: msvc_configure.cpp 572337 2018-10-11 16:38:17Z ivanov $ +/* $Id: msvc_configure.cpp 576267 2018-12-12 17:34:36Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -311,7 +311,7 @@ void CMsvcConfigure::WriteBuildVer(CMsvcSite& site, const string& root_dir, cons if (!cfg.empty()) { NStr::ReplaceInPlace(cfg_root_inc, cfg, config.GetConfigFullName()); } - string extra = site.GetConfigureEntry("BuildVerPath"); + string extra = CDirEntry::ConvertToOSPath(site.GetConfigureEntry("BuildVerPath")); string filename = CDirEntry::ConcatPath(cfg_root_inc, extra); if (extra.empty()) { diff --git a/c++/src/build-system/project_tree_builder/msvc_makefile.cpp b/c++/src/build-system/project_tree_builder/msvc_makefile.cpp index 54d10c55..625a75e5 100644 --- a/c++/src/build-system/project_tree_builder/msvc_makefile.cpp +++ b/c++/src/build-system/project_tree_builder/msvc_makefile.cpp @@ -1,4 +1,4 @@ -/* $Id: msvc_makefile.cpp 485908 2015-11-30 14:28:08Z gouriano $ +/* $Id: msvc_makefile.cpp 576669 2018-12-19 13:11:04Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -443,7 +443,7 @@ void CMsvcProjectMakefile::GetAdditionalIncludeDirs(const SConfigInfo& config, string dirs_string = GetOpt(m_MakeFile, "AddToProject", "IncludeDirs", config); - NStr::Split(dirs_string, LIST_SEPARATOR, *dirs, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate); + NStr::Split(dirs_string, LIST_SEPARATOR, *dirs, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate | NStr::fSplit_CanQuote); } void CMsvcProjectMakefile::GetHeadersInInclude(const SConfigInfo& config, @@ -528,9 +528,8 @@ CMsvcProjectMakefile::GetCustomBuildInfo(list* info) const } void -CMsvcProjectMakefile::GetCustomScriptInfo(SCustomScriptInfo& info) const +CMsvcProjectMakefile::GetCustomScriptInfo(SCustomScriptInfo& info, const string& sec) const { - string sec("CustomScript"); info.m_Input = m_MakeFile.GetString(sec, "Input"); info.m_Output = m_MakeFile.GetString(sec, "Output"); info.m_Shell = m_MakeFile.GetString(sec, "Shell"); @@ -785,9 +784,9 @@ void CMsvcCombinedProjectMakefile::GetCustomBuildInfo } void CMsvcCombinedProjectMakefile::GetCustomScriptInfo - (SCustomScriptInfo& info) const + (SCustomScriptInfo& info, const string& section) const { - m_ProjectMakefile->GetCustomScriptInfo(info); + m_ProjectMakefile->GetCustomScriptInfo(info, section); } diff --git a/c++/src/build-system/project_tree_builder/msvc_makefile.hpp b/c++/src/build-system/project_tree_builder/msvc_makefile.hpp index 510cc58c..170e01a0 100644 --- a/c++/src/build-system/project_tree_builder/msvc_makefile.hpp +++ b/c++/src/build-system/project_tree_builder/msvc_makefile.hpp @@ -1,6 +1,6 @@ #ifndef PROJECT_TREE_BULDER__MSVC_MAKEFILE__HPP #define PROJECT_TREE_BULDER__MSVC_MAKEFILE__HPP -/* $Id: msvc_makefile.hpp 554978 2018-01-11 15:08:49Z gouriano $ +/* $Id: msvc_makefile.hpp 576669 2018-12-19 13:11:04Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -256,7 +256,7 @@ public: virtual void GetExtraFiles (map >* files) const; - void GetCustomScriptInfo (SCustomScriptInfo& info) const; + void GetCustomScriptInfo (SCustomScriptInfo& info, const string& section) const; string m_ProjectBaseDir; @@ -360,7 +360,7 @@ public: virtual void GetExtraFiles (map >* files) const; - void GetCustomScriptInfo (SCustomScriptInfo& info) const; + void GetCustomScriptInfo (SCustomScriptInfo& info, const string& section) const; private: typedef const CMsvcProjectMakefile* TProjectMakefile; diff --git a/c++/src/build-system/project_tree_builder/msvc_prj_generator.cpp b/c++/src/build-system/project_tree_builder/msvc_prj_generator.cpp index 54fc35ce..b3101faf 100644 --- a/c++/src/build-system/project_tree_builder/msvc_prj_generator.cpp +++ b/c++/src/build-system/project_tree_builder/msvc_prj_generator.cpp @@ -739,6 +739,7 @@ void s_CreateDatatoolCustomBuildInfo(const CProjItem& prj, if (ext == ".proto") { dt_path = GetApp().GetSite().GetConfigureEntry("CustomCodeGenerator" + ext); build_info->m_CommandLine = "set GENERATOR_PATH=" + dt_path + "\n"; + build_info->m_CommandLine += "set PROJECT_REQUIRES=" + NStr::Join(prj.m_Requires,";") + "\n"; } else { build_info->m_CommandLine = "set DATATOOL_PATH=" + dt_path + "\n"; } diff --git a/c++/src/build-system/project_tree_builder/msvc_prj_utils.cpp b/c++/src/build-system/project_tree_builder/msvc_prj_utils.cpp index 4050f31b..e80b2535 100644 --- a/c++/src/build-system/project_tree_builder/msvc_prj_utils.cpp +++ b/c++/src/build-system/project_tree_builder/msvc_prj_utils.cpp @@ -1,4 +1,4 @@ -/* $Id: msvc_prj_utils.cpp 568727 2018-08-09 18:33:41Z gouriano $ +/* $Id: msvc_prj_utils.cpp 576272 2018-12-12 17:36:12Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -445,6 +445,10 @@ string SourceFileExt(const string& file_path) if ( CFile(file).Exists() ) { return ".c.in"; } + file += ".metal"; + if ( CFile(file).Exists() ) { + return ".metal"; + } return ""; } diff --git a/c++/src/build-system/project_tree_builder/msvc_site.cpp b/c++/src/build-system/project_tree_builder/msvc_site.cpp index 78a25af6..7f5a346c 100644 --- a/c++/src/build-system/project_tree_builder/msvc_site.cpp +++ b/c++/src/build-system/project_tree_builder/msvc_site.cpp @@ -1,4 +1,4 @@ -/* $Id: msvc_site.cpp 485908 2015-11-30 14:28:08Z gouriano $ +/* $Id: msvc_site.cpp 576268 2018-12-12 17:35:08Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -334,7 +334,7 @@ void CMsvcSite::GetLibInfo(const string& lib, } else { string include_str = ToOSPath( ProcessMacros(GetOpt(m_Registry, section, "INCLUDE", config),false)); - NStr::Split(include_str, LIST_SEPARATOR, libinfo->m_IncludeDir, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate); + NStr::Split(include_str, LIST_SEPARATOR, libinfo->m_IncludeDir, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate | NStr::fSplit_CanQuote); string defines_str = GetOpt(m_Registry, section, "DEFINES", config); NStr::Split(defines_str, LIST_SEPARATOR, libinfo->m_LibDefines, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate); @@ -563,7 +563,7 @@ void CMsvcSite::GetLibChoiceIncludes( cpp_flags_define); //split on parts list parts; - NStr::Split(include_str, LIST_SEPARATOR, parts, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate); + NStr::Split(include_str, LIST_SEPARATOR, parts, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate | NStr::fSplit_CanQuote | NStr::fSplit_CanQuote); string lib_id; ITERATE(list, p, parts) { diff --git a/c++/src/build-system/project_tree_builder/prj_file_collector.cpp b/c++/src/build-system/project_tree_builder/prj_file_collector.cpp index 572aee47..8b39957b 100644 --- a/c++/src/build-system/project_tree_builder/prj_file_collector.cpp +++ b/c++/src/build-system/project_tree_builder/prj_file_collector.cpp @@ -1,4 +1,4 @@ -/* $Id: prj_file_collector.cpp 568727 2018-08-09 18:33:41Z gouriano $ +/* $Id: prj_file_collector.cpp 576272 2018-12-12 17:36:12Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -250,7 +250,7 @@ bool CProjectFileCollector::GetIncludeDirs(list& inc_dirs, const SConfig inc_dirs.clear(); string alldirs = m_ProjContext.AdditionalIncludeDirectories(cfg); list dirs; - NStr::Split(alldirs, LIST_SEPARATOR, dirs, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate); + NStr::Split(alldirs, LIST_SEPARATOR, dirs, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate | NStr::fSplit_CanQuote); ITERATE( list, i, dirs) { string dir; #ifdef PSEUDO_XCODE @@ -273,7 +273,7 @@ bool CProjectFileCollector::GetLibraryDirs(list& lib_dirs, const SConfig lib_dirs.clear(); string alldirs = m_ProjContext.AdditionalLibraryDirectories(cfg); list dirs; - NStr::Split(alldirs, LIST_SEPARATOR, dirs, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate); + NStr::Split(alldirs, LIST_SEPARATOR, dirs, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate | NStr::fSplit_CanQuote); ITERATE( list, i, dirs) { string dir; #ifdef PSEUDO_XCODE @@ -317,7 +317,7 @@ string CProjectFileCollector::GetFileExtension(const string& file) return ".cpp"; } } - string ext_in[] = {".cpp", ".cpp.in", ".c", ".c.in", kEmptyStr}; + string ext_in[] = {".cpp", ".cpp.in", ".c", ".c.in", ".metal", kEmptyStr}; for (int i=0; !ext_in[i].empty(); ++i) { if ( CFile(file + ext_in[i]).Exists() ) { return ext_in[i]; diff --git a/c++/src/build-system/project_tree_builder/proj_builder_app.cpp b/c++/src/build-system/project_tree_builder/proj_builder_app.cpp index e8986b67..03038627 100644 --- a/c++/src/build-system/project_tree_builder/proj_builder_app.cpp +++ b/c++/src/build-system/project_tree_builder/proj_builder_app.cpp @@ -1,4 +1,4 @@ -/* $Id: proj_builder_app.cpp 572337 2018-10-11 16:38:17Z ivanov $ +/* $Id: proj_builder_app.cpp 576669 2018-12-19 13:11:04Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -405,7 +405,7 @@ struct PIsExcludedByDisuse //----------------------------------------------------------------------------- CProjBulderApp::CProjBulderApp(void) { - SetVersion( CVersionInfo(4,6,2) ); + SetVersion( CVersionInfo(4,7,1) ); m_ScanningWholeTree = false; m_Dll = false; m_AddMissingLibs = false; @@ -1505,7 +1505,7 @@ void CProjBulderApp::GenerateUnixProjects(CProjectItemsTree& projects_tree) } ofs << " $(MAKE) $(MFLAGS)" << " APP_PROJ=" << target_app - << " LIB_PROJ=" << target_lib + << (p->second.m_IsMetallib ? " LIB_PROJ=\"\" METAL_PROJ=" : " LIB_PROJ=") << target_lib << " UNIX_PROJ=" << target_user << " $(MTARGET) $(SKIP_PRELIMINARIES)" << endl << endl; #endif diff --git a/c++/src/build-system/project_tree_builder/proj_item.cpp b/c++/src/build-system/project_tree_builder/proj_item.cpp index 8f4b7abd..b7d349eb 100644 --- a/c++/src/build-system/project_tree_builder/proj_item.cpp +++ b/c++/src/build-system/project_tree_builder/proj_item.cpp @@ -1,4 +1,4 @@ -/* $Id: proj_item.cpp 422076 2013-12-17 18:02:47Z gouriano $ +/* $Id: proj_item.cpp 576272 2018-12-12 17:36:12Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -167,6 +167,7 @@ CProjItem::CProjItem(TProjType type, m_MakeType(maketype), m_GUID(guid), m_IsBundle(false), + m_IsMetallib(false), m_External(false), m_StyleObjcpp(false) { @@ -184,6 +185,7 @@ void CProjItem::Clear(void) m_ProjType = CProjKey::eNoProj; m_MakeType = eMakeType_Undefined; m_IsBundle = false; + m_IsMetallib = false; m_External = false; m_StyleObjcpp = false; m_MkName.clear(); @@ -225,6 +227,7 @@ void CProjItem::SetFrom(const CProjItem& item) m_CustomBuild = item.m_CustomBuild; m_IsBundle = item.m_IsBundle; + m_IsMetallib = item.m_IsMetallib; m_External = item.m_External; m_StyleObjcpp = item.m_StyleObjcpp; m_MkName = item.m_MkName; diff --git a/c++/src/build-system/project_tree_builder/proj_item.hpp b/c++/src/build-system/project_tree_builder/proj_item.hpp index 17884c9b..c61225dd 100644 --- a/c++/src/build-system/project_tree_builder/proj_item.hpp +++ b/c++/src/build-system/project_tree_builder/proj_item.hpp @@ -1,7 +1,7 @@ #ifndef PROJECT_TREE_BUILDER__PROJ_ITEM__HPP #define PROJECT_TREE_BUILDER__PROJ_ITEM__HPP -/* $Id: proj_item.hpp 422076 2013-12-17 18:02:47Z gouriano $ +/* $Id: proj_item.hpp 576272 2018-12-12 17:36:12Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -144,6 +144,7 @@ public: list m_CustomBuild; mutable bool m_IsBundle; + bool m_IsMetallib; bool m_External; bool m_StyleObjcpp; string m_MkName; diff --git a/c++/src/build-system/project_tree_builder/proj_src_resolver.cpp b/c++/src/build-system/project_tree_builder/proj_src_resolver.cpp index f0ef21da..32f62a1c 100644 --- a/c++/src/build-system/project_tree_builder/proj_src_resolver.cpp +++ b/c++/src/build-system/project_tree_builder/proj_src_resolver.cpp @@ -1,4 +1,4 @@ -/* $Id: proj_src_resolver.cpp 568681 2018-08-09 14:49:09Z gouriano $ +/* $Id: proj_src_resolver.cpp 576272 2018-12-12 17:36:12Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -112,6 +112,8 @@ static bool s_SourceFileExists(const string& dir, const string& name) return true; if ( CDirEntry(path + ".c").Exists() ) return true; + if ( CDirEntry(path + ".metal").Exists() ) + return true; return false; } diff --git a/c++/src/build-system/project_tree_builder/proj_tree.cpp b/c++/src/build-system/project_tree_builder/proj_tree.cpp index 85e8a776..19bf8b85 100644 --- a/c++/src/build-system/project_tree_builder/proj_tree.cpp +++ b/c++/src/build-system/project_tree_builder/proj_tree.cpp @@ -1,4 +1,4 @@ -/* $Id: proj_tree.cpp 568727 2018-08-09 18:33:41Z gouriano $ +/* $Id: proj_tree.cpp 576272 2018-12-12 17:36:12Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -203,6 +203,12 @@ void CProjectItemsTree::CreateFrom(const string& root_src, applib_mfilepath, makemsvc, tree, info.m_MakeType); } + else if (info.m_Type == SMakeProjectT::SMakeInInfo::eMetal) { + SLibProjectT::DoCreate(source_base_dir, + proj_name, + applib_mfilepath, + makelib, tree, info.m_MakeType); + } } } } diff --git a/c++/src/build-system/project_tree_builder/proj_tree_builder.cpp b/c++/src/build-system/project_tree_builder/proj_tree_builder.cpp index cb8fb13f..659bfd05 100644 --- a/c++/src/build-system/project_tree_builder/proj_tree_builder.cpp +++ b/c++/src/build-system/project_tree_builder/proj_tree_builder.cpp @@ -1,4 +1,4 @@ -/* $Id: proj_tree_builder.cpp 568727 2018-08-09 18:33:41Z gouriano $ +/* $Id: proj_tree_builder.cpp 576272 2018-12-12 17:36:12Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -158,7 +158,8 @@ CProjItem::TProjType SMakeProjectT::GetProjType(const string& base_dir, return CProjKey::eApp; else if (CDirEntry(fname_msvc).Exists() || CDirEntry(fname_msvc2).Exists() ) return CProjKey::eMsvc; - + else if (CDirEntry(CDirEntry::ConcatPath(base_dir, fname + ".metal")).Exists() ) + return CProjKey::eLib; switch (type) { case SMakeInInfo::eApp: @@ -446,7 +447,7 @@ void SMakeProjectT::CreateIncludeDirs(const list& cpp_flags, GetApp().GetSite().ResolveDefine(CSymResolver::StripDefine(flag), dir_all); if ( !dir_all.empty() ) { list dir_list; - NStr::Split(dir_all, LIST_SEPARATOR, dir_list, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate); + NStr::Split(dir_all, LIST_SEPARATOR, dir_list, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate | NStr::fSplit_CanQuote); ITERATE(list, dir_item, dir_list) { const string& dir = *dir_item; if ( CDirEntry(dir).IsDir() ) { @@ -717,6 +718,12 @@ void SMakeProjectT::AnalyzeMakeIn max(makein_contents.GetMakeType(),eMakeType_Expendable))); } } + p = makein_contents.m_Contents.find("METAL_PROJ"); + if (p != makein_contents.m_Contents.end()) { + + info->push_back(SMakeInInfo(SMakeInInfo::eMetal, p->second, + makein_contents.GetMakeType())); + } } @@ -730,7 +737,7 @@ string SMakeProjectT::CreateMakeAppLibFileName string fname = "Makefile." + projname; switch (proj_type) { - case CProjKey::eLib: fname += ".lib"; break; + case CProjKey::eLib: fname += type == SMakeInInfo::eMetal ? ".metal" : ".lib"; break; case CProjKey::eDll: fname += ".dll"; break; case CProjKey::eApp: fname += ".app"; break; case CProjKey::eMsvc: @@ -1737,6 +1744,9 @@ CProjKey SLibProjectT::DoCreate(const string& source_base_dir, (tree->m_Projects[proj_key]).m_StyleObjcpp = style_objcpp; (tree->m_Projects[proj_key]).m_MkName = applib_mfilepath; (tree->m_Projects[proj_key]).m_DataSource = CSimpleMakeFileContents(applib_mfilepath); + if (CDirEntry(full_makefile_name).GetExt() == ".metal") { + (tree->m_Projects[proj_key]).m_IsMetallib = true; + } k = m->second.m_Contents.find("HEADER_EXPORT"); if (k != m->second.m_Contents.end()) { @@ -2908,6 +2918,7 @@ void CProjectTreeBuilder::ProcessDir(const string& dir_name, map userprojects; vector ordered_subprojects; string topbuilddir; + bool has_metal = false; bool get_order = GetApp().IsScanningWholeTree(); if (is_root && get_order) { topbuilddir = GetApp().GetRegSettings().GetTopBuilddir(); @@ -2980,10 +2991,8 @@ void CProjectTreeBuilder::ProcessDir(const string& dir_name, "LIB_PROJ","EXPENDABLE_LIB_PROJ","POTENTIAL_LIB_PROJ",""}; EMakeFileType libtype[] = { eMakeType_Undefined, eMakeType_Undefined, eMakeType_Undefined, eMakeType_Undefined, - eMakeType_Undefined,eMakeType_Expendable,eMakeType_Potential}; - if (filter->ExcludePotential()) { - libtype[6] = eMakeType_Excluded; - } + eMakeType_Undefined, eMakeType_Undefined, + eMakeType_Undefined,eMakeType_Expendable, filter->ExcludePotential() ? eMakeType_Excluded : eMakeType_Potential}; for (j=0; !libproj[j].empty(); ++j) { k = makefile.m_Contents.find(libproj[j]); if (k != makefile.m_Contents.end()) { @@ -3001,10 +3010,8 @@ void CProjectTreeBuilder::ProcessDir(const string& dir_name, } } string dllproj[] = {"DLL_PROJ","EXPENDABLE_DLL_PROJ","POTENTIAL_DLL_PROJ",""}; - EMakeFileType dlltype[] = {eMakeType_Undefined,eMakeType_Expendable,eMakeType_Potential}; - if (filter->ExcludePotential()) { - dlltype[2] = eMakeType_Excluded; - } + EMakeFileType dlltype[] = {eMakeType_Undefined,eMakeType_Expendable, + filter->ExcludePotential() ? eMakeType_Excluded : eMakeType_Potential}; for (j=0; !dllproj[j].empty(); ++j) { k = makefile.m_Contents.find(dllproj[j]); if (k != makefile.m_Contents.end()) { @@ -3021,6 +3028,25 @@ void CProjectTreeBuilder::ProcessDir(const string& dir_name, } } } + string metallib[] = {"METAL_PROJ", ""}; + EMakeFileType metaltype[] = { eMakeType_Undefined}; + for (j=0; !metallib[j].empty(); ++j) { + k = makefile.m_Contents.find(metallib[j]); + if (k != makefile.m_Contents.end()) { + const list& values = k->second; + for (list::const_iterator i=values.begin(); i!=values.end(); ++i) { + if (i->at(0) == '#') { + break; + } + string mkname("Makefile." + *i + ".metal"); + libprojects[mkname] = max(maketype, metaltype[j]); + if (get_order) { + s_WriteBuildOrder(dir_name,mkname); + } + has_metal = true; + } + } + } string appproj[] = {"APP_PROJ","EXPENDABLE_APP_PROJ","POTENTIAL_APP_PROJ",""}; EMakeFileType apptype[] = {eMakeType_Undefined,eMakeType_Expendable,eMakeType_Potential}; if (filter->ExcludePotential()) { @@ -3058,6 +3084,16 @@ void CProjectTreeBuilder::ProcessDir(const string& dir_name, ProcessMakeLibFile(dir_entry->GetPath(), makefiles, libprojects[name], mkin); } + if (has_metal) { + contents = dir.GetEntries("Makefile.*.metal"); + ITERATE(CDir::TEntries, p, contents) { + const AutoPtr& dir_entry = *p; + const string name = dir_entry->GetName(); + if (libprojects.find(name) != libprojects.end()) { + ProcessMakeLibFile(dir_entry->GetPath(), makefiles, libprojects[name], mkin); + } + } + } } // Process Makefile.*.dll if ( process_projects && !dllprojects.empty()) { diff --git a/c++/src/build-system/project_tree_builder/proj_tree_builder.hpp b/c++/src/build-system/project_tree_builder/proj_tree_builder.hpp index ee1b6d19..cdc27d39 100644 --- a/c++/src/build-system/project_tree_builder/proj_tree_builder.hpp +++ b/c++/src/build-system/project_tree_builder/proj_tree_builder.hpp @@ -1,7 +1,7 @@ #ifndef PROJECT_TREE_BUILDER__PROJ_TREE_BUILDER__HPP #define PROJECT_TREE_BUILDER__PROJ_TREE_BUILDER__HPP -/* $Id: proj_tree_builder.hpp 568727 2018-08-09 18:33:41Z gouriano $ +/* $Id: proj_tree_builder.hpp 576272 2018-12-12 17:36:12Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -68,6 +68,7 @@ struct SMakeProjectT eWSDL, eJSD, eProtobuf, + eMetal, eMsvc } TMakeinType; diff --git a/c++/src/build-system/ptb_version.txt b/c++/src/build-system/ptb_version.txt index c78c4964..7c66fca5 100644 --- a/c++/src/build-system/ptb_version.txt +++ b/c++/src/build-system/ptb_version.txt @@ -1 +1 @@ -4.6.2 +4.7.1 diff --git a/c++/src/build-system/relocate.sh.in b/c++/src/build-system/relocate.sh.in index cfa75f33..3ea058eb 100644 --- a/c++/src/build-system/relocate.sh.in +++ b/c++/src/build-system/relocate.sh.in @@ -1,6 +1,6 @@ @script_shell@ -# $Id: relocate.sh.in 575212 2018-11-26 16:42:37Z blastadm $ +# $Id: relocate.sh.in 582158 2019-03-11 19:08:29Z blastadm $ # Author: Denis Vakatov, NCBI # # Adjust paths to this build tree and the relevant source tree diff --git a/c++/src/connect/CMakeLists.xxconnect.lib.txt b/c++/src/connect/CMakeLists.xxconnect.lib.txt index 1c1874f2..77e5ab1e 100644 --- a/c++/src/connect/CMakeLists.xxconnect.lib.txt +++ b/c++/src/connect/CMakeLists.xxconnect.lib.txt @@ -1,12 +1,13 @@ ############################################################################# -# $Id: CMakeLists.xxconnect.lib.txt 564453 2018-05-29 13:28:08Z gouriano $ +# $Id: CMakeLists.xxconnect.lib.txt 577151 2018-12-31 15:16:55Z dicuccio $ ############################################################################# NCBI_begin_lib(xxconnect) NCBI_sources(${SRC_CXX}) NCBI_headers(*.hpp) NCBI_requires(TLS) - NCBI_uses_toolkit_libraries(connect xncbi) + NCBI_requires(NCBI_C) + NCBI_uses_toolkit_libraries(xncbi) NCBI_project_tags(core) NCBI_project_watchers(lavr mcelhany) NCBI_end_lib() diff --git a/c++/src/connect/ncbi_http_session.cpp b/c++/src/connect/ncbi_http_session.cpp index 3370415d..18dbacd0 100644 --- a/c++/src/connect/ncbi_http_session.cpp +++ b/c++/src/connect/ncbi_http_session.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbi_http_session.cpp 567636 2018-07-21 15:53:54Z mcelhany $ +/* $Id: ncbi_http_session.cpp 576759 2018-12-20 14:37:49Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -831,7 +831,9 @@ void CHttpRequest::x_InitConnection(bool use_form_data) bool CHttpRequest::x_CanSendData(void) const { - return m_Method == eReqMethod_Post || m_Method == eReqMethod_Put; + return m_Method == eReqMethod_Post || + m_Method == eReqMethod_Put || + m_Method == eReqMethod_Patch; } diff --git a/c++/src/connect/ncbi_ipv6.c b/c++/src/connect/ncbi_ipv6.c index 72a10a10..16b22d89 100644 --- a/c++/src/connect/ncbi_ipv6.c +++ b/c++/src/connect/ncbi_ipv6.c @@ -1,4 +1,4 @@ -/* $Id: ncbi_ipv6.c 565391 2018-06-11 18:09:29Z lavr $ +/* $Id: ncbi_ipv6.c 578143 2019-01-14 15:56:16Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -386,12 +386,8 @@ static char* x_IPv6ToString(char* buf, size_t bufsize, { char ipv6[64/*enough for sizeof(8 * "xxxx:")*/]; char ipv4[sizeof("255.255.255.255")]; + size_t i, n, pos, len, zpos, zlen; unsigned short word; - struct { - size_t pos; - size_t len; - } gap[sizeof(addr->octet) / sizeof(word)]; - size_t i, n, z, zlen; char* ptr = ipv6; if (x_NcbiIsIPv4(addr, 1/*compat*/)) { @@ -405,38 +401,37 @@ static char* x_IPv6ToString(char* buf, size_t bufsize, *ipv4 = '\0'; } - gap[0].pos = 0; - i = z = zlen = 0; - while (i <= n) { - memcpy(&word, &addr->octet[i * sizeof(word)], sizeof(word)); - if (i == n || word) { - size_t len = i - gap[z].pos; - if (len > 1) { /*RFC 5952 4.2.2*/ - gap[z++].len = len; - if (zlen < len) - zlen = len; /*RFC 5952 4.2.1*/ + pos = i = zpos = zlen = 0; + for (;;) { + if (i < n) { + memcpy(&word, &addr->octet[i * sizeof(word)], sizeof(word)); + if (!word) { + ++i; + continue; } - if (i == n) - break; - assert(z < sizeof(gap) / sizeof(gap[0])); - gap[z].pos = ++i; - } else - ++i; + } + len = i - pos; + if (len > 1) { /*RFC 5952 4.2.2*/ + if (zlen < len) { + zlen = len; /*RFC 5952 4.2.1*/ + zpos = pos; + } + } + if (i == n) + break; + pos = ++i; } - i = z = 0; + i = 0; while (i < n) { - if (zlen && gap[z].pos == i) { + if (zlen && zpos == i) { assert(zlen > 1); - if (zlen == gap[z].len) { + *ptr++ = ':'; + if (zlen == n - i) *ptr++ = ':'; - if (zlen == n - i) - *ptr++ = ':'; - i += zlen; - zlen = 0; /*RFC 5952 4.2.3*/ - continue; - } - z++; + i += zlen; + zlen = 0; /*RFC 5952 4.2.3*/ + continue; } memcpy(&word, &addr->octet[i * sizeof(word)], sizeof(word)); ptr += sprintf(ptr, &":%x"[!i], /*RFC 5952 4.1, 4.3*/ @@ -451,8 +446,8 @@ static char* x_IPv6ToString(char* buf, size_t bufsize, *ptr++ = ':'; } n = (size_t)(ptr - ipv6); - z = n + i; - if (z < bufsize) { + len = n + i; + if (len < bufsize) { memcpy(buf, ipv6, n); buf += n; memcpy(buf, ipv4, i); diff --git a/c++/src/connect/ncbi_linkerd.c b/c++/src/connect/ncbi_linkerd.c index cd922d03..7742c205 100644 --- a/c++/src/connect/ncbi_linkerd.c +++ b/c++/src/connect/ncbi_linkerd.c @@ -1,4 +1,4 @@ -/* $Id: ncbi_linkerd.c 570469 2018-09-10 13:46:13Z ivanov $ +/* $Id: ncbi_linkerd.c 579032 2019-01-29 19:06:41Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -446,8 +446,9 @@ static EEndpointStatus s_EndpointFromNamerd(SEndpoint* end, SERV_ITER iter) out: ConnNetInfo_Destroy(nd_net_info); SERV_Close(nd_iter); - if (nd_srv_info) + if (nd_srv_info && nd_srv_info != (SSERV_Info*)(-1L)) { free((void*)nd_srv_info); + } return retval; } diff --git a/c++/src/connect/ncbi_namerd.c b/c++/src/connect/ncbi_namerd.c index 7bc3812d..38b8c79b 100644 --- a/c++/src/connect/ncbi_namerd.c +++ b/c++/src/connect/ncbi_namerd.c @@ -1,4 +1,4 @@ -/* $Id: ncbi_namerd.c 570469 2018-09-10 13:46:13Z ivanov $ +/* $Id: ncbi_namerd.c 576145 2018-12-11 15:08:14Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -175,38 +175,6 @@ struct SNAMERD_Data { }; -/* Extra-verbose tracing to make following nested functions easier. */ -#define EXTRA_VERBOSE_DBG 1 -#if defined(EXTRA_VERBOSE_DBG) && defined(_DEBUG) && ! defined(NDEBUG) - -static int s_nest = 0; /* trace nest level */ - -#define NEST_PFX ".........................................................." -#define NEST_PFXX "**********************************************************" -#define MAX_NEST ((int)(sizeof(NEST_PFX)-1)) - -#define PFXI (s_nest++ > MAX_NEST ? NEST_PFXX : NEST_PFX + MAX_NEST - s_nest+1) -#define PFXO (--s_nest > MAX_NEST ? NEST_PFXX : NEST_PFX + MAX_NEST - s_nest ) - -#define TIN( fmt ) CORE_TRACEF(("%s[ " fmt, PFXI )); -#define TIN1(fmt, arg ) CORE_TRACEF(("%s[ " fmt, PFXI, arg )); -#define TIN2(fmt, arg1, arg2) CORE_TRACEF(("%s[ " fmt, PFXI, arg1, arg2)); - -#define TOUT( fmt ) CORE_TRACEF(("%s] " fmt, PFXO )); -#define TOUT1(fmt, arg ) CORE_TRACEF(("%s] " fmt, PFXO, arg )); - -#else - -#define TIN( fmt ) -#define TIN1(fmt, arg ) -#define TIN2(fmt, arg1, arg2) - -#define TOUT( fmt ) -#define TOUT1(fmt, arg ) - -#endif /* EXTRA_VERBOSE_DBG */ - - /* Some static variables needed only to support testing with mock data. Testing with mock data is currently limited to single-threaded tests. */ static int s_initialized = 0; @@ -293,7 +261,7 @@ static EIO_Status s_CONN_Create(SERV_ITER iter, CONNECTOR* c_p, CONN* conn_p) { EIO_Status status = eIO_Unknown; - TIN("s_CONN_Create()"); + CORE_TRACE("Entering s_CONN_Create()"); /* require valid, NULL pointers */ assert(c_p && ! *c_p ); @@ -315,7 +283,7 @@ static EIO_Status s_CONN_Create(SERV_ITER iter, CONNECTOR* c_p, CONN* conn_p) CORE_LOG_X(eNSub_Connect, eLOG_Error, "Unable to create connector."); } - TOUT("s_CONN_Create()"); + CORE_TRACE("Leaving s_CONN_Create()"); return status; } @@ -337,15 +305,15 @@ static void s_UpdateDtab(char** dest_dtab_p, char* src_dtab, int* success_p) char enc_dtab[MAX_QRY_STR_LEN + 1]; size_t new_size, src_size, enc_size; - TIN2("s_UpdateDtab(\"%s\") -- old dtab = \"%s\"", src_dtab, - *dest_dtab_p ? *dest_dtab_p : ""); + CORE_TRACEF(("Entering s_UpdateDtab(\"%s\") -- old dtab = \"%s\"", src_dtab, + *dest_dtab_p ? *dest_dtab_p : "")); if ( ! *success_p) { - TOUT("s_UpdateDtab() -- prior no success"); + CORE_TRACE("Leaving s_UpdateDtab() -- prior no success"); return; } if ( ! *src_dtab) { - TOUT("s_UpdateDtab() -- prior no dtab"); + CORE_TRACE("Leaving s_UpdateDtab() -- prior no dtab"); return; } @@ -381,14 +349,14 @@ static void s_UpdateDtab(char** dest_dtab_p, char* src_dtab, int* success_p) if ( ! new_dtab) { *success_p = 0; CORE_LOG_X(eNSub_Alloc, eLOG_Critical, "Couldn't alloc for dtab."); - TOUT("s_UpdateDtab() -- bad alloc"); + CORE_TRACE("Leaving s_UpdateDtab() -- bad alloc"); return; } /* Update the caller's pointer. */ *dest_dtab_p = new_dtab; - TOUT1("s_UpdateDtab() -- new dtab = \"%s\"", new_dtab); + CORE_TRACEF(("Leaving s_UpdateDtab() -- new dtab = \"%s\"", new_dtab)); } @@ -535,7 +503,7 @@ static int/*bool*/ s_AddServerInfo(struct SNAMERD_Data* data, SSERV_Info* info) && SERV_EqualInfo(info, data->cand[i].info)) { /* Replace older version */ - CORE_TRACE("Replaced older version."); + CORE_TRACE("Replaced older candidate version."); free((void*) data->cand[i].info); data->cand[i].info = info; data->cand[i].status = info->rate; @@ -704,7 +672,7 @@ static EIO_Status s_ReadFullResponse(CONN conn, char** bufp, int num_steps; EIO_Status status = eIO_Unknown; - TIN("s_ReadFullResponse()"); + CORE_TRACE("Entering s_ReadFullResponse()"); assert(bufp); assert(net_info); @@ -721,7 +689,7 @@ static EIO_Status s_ReadFullResponse(CONN conn, char** bufp, free(*bufp); *bufp = NULL; } - TOUT("s_ReadFullResponse() -- bad alloc"); + CORE_TRACE("Leaving s_ReadFullResponse() -- bad alloc"); return eIO_Unknown; } *bufp = new_buf; @@ -753,7 +721,7 @@ static EIO_Status s_ReadFullResponse(CONN conn, char** bufp, ("Read error: %s", IO_StatusStr(status))); free(*bufp); *bufp = NULL; - TOUT("s_ReadFullResponse() -- read problem"); + CORE_TRACE("Leaving s_ReadFullResponse() -- read problem 1"); return status; } @@ -772,7 +740,7 @@ static EIO_Status s_ReadFullResponse(CONN conn, char** bufp, CORE_LOG_X(eNSub_TooLong, eLOG_Error, "Insufficient buffer size."); free(*bufp); *bufp = NULL; - TOUT("s_ReadFullResponse() -- read problem"); + CORE_TRACE("Leaving s_ReadFullResponse() -- read problem 2"); return status; } @@ -788,7 +756,7 @@ static EIO_Status s_ReadFullResponse(CONN conn, char** bufp, CORE_TRACEF(("Got response: %s", *bufp)); - TOUT("s_ReadFullResponse()"); + CORE_TRACE("Leaving s_ReadFullResponse()"); return eIO_Success; } @@ -801,7 +769,7 @@ static int/*bool*/ s_ParseResponse(SERV_ITER iter, CONN conn) char* response = NULL; int/*bool*/ retval = 0; - TIN("s_ParseResponse()"); + CORE_TRACE("Entering s_ParseResponse()"); if (eIO_Success == s_ReadFullResponse(conn, &response, net_info)) { x_JSON_Object *root_obj; @@ -1129,7 +1097,7 @@ static int/*bool*/ s_ParseResponse(SERV_ITER iter, CONN conn) out: if (response) free(response); if (root_value) x_json_value_free(root_value); - TOUT("s_ParseResponse()"); + CORE_TRACE("Leaving s_ParseResponse()"); return retval; } @@ -1141,7 +1109,7 @@ static char* s_GetDtabHeaderFromBuf(const char* buf) char* end; char* dup_hdr; - TIN1("s_GetDtabHeaderFromBuf(\"%s\")", buf ? buf : ""); + CORE_TRACEF(("Entering s_GetDtabHeaderFromBuf(\"%s\")", buf ? buf : "")); if (start && strncasecmp(start, DTAB_HDR_FIELD_NAME ":", sizeof(DTAB_HDR_FIELD_NAME) + 1/*':'*/ - 1/*'\0'*/) == 0) @@ -1159,16 +1127,18 @@ static char* s_GetDtabHeaderFromBuf(const char* buf) if ( ! dup_hdr) { CORE_LOG_X(eNSub_Alloc, eLOG_Critical, "Couldn't alloc for dtab header value."); - TOUT("s_GetDtabHeaderFromBuf() -- bad alloc"); + CORE_TRACE("Leaving s_GetDtabHeaderFromBuf() -- bad alloc"); return NULL; } memcpy(dup_hdr, start, (size_t)(end - start)); dup_hdr[end - start] = NIL; - TOUT1("s_GetDtabHeaderFromBuf() -- got dtab header \"%s\"", dup_hdr); + CORE_TRACEF(( + "Leaving s_GetDtabHeaderFromBuf() -- got dtab header \"%s\"", + dup_hdr)); return dup_hdr; } - TOUT("s_GetDtabHeaderFromBuf()"); + CORE_TRACE("Leaving s_GetDtabHeaderFromBuf()"); return NULL; } @@ -1179,12 +1149,12 @@ static void s_UpdateDtabFromUserHeader(char** dtab_p, int* success_p, { char* dtab = NULL; - TIN2("s_UpdateDtabFromUserHeader(\"%s\") -- success=%d", + CORE_TRACEF(("Entering s_UpdateDtabFromUserHeader(\"%s\") -- success=%d", net_info->http_user_header ? net_info->http_user_header : "", - *success_p); + *success_p)); if ( ! *success_p) { - TOUT("s_UpdateDtabFromUserHeader() -- prior no success"); + CORE_TRACE("Leaving s_UpdateDtabFromUserHeader() -- prior no success"); return; } @@ -1195,7 +1165,7 @@ static void s_UpdateDtabFromUserHeader(char** dtab_p, int* success_p, free(dtab); } - TOUT("s_UpdateDtabFromUserHeader()"); + CORE_TRACE("Leaving s_UpdateDtabFromUserHeader()"); } @@ -1205,11 +1175,11 @@ static void s_UpdateDtabFromRegistry(char** dtab_p, int* success_p, { char val[MAX_QRY_STR_LEN + 1]; - TIN2("s_UpdateDtabFromRegistry(\"%s\") -- success=%d", - service ? service : "", *success_p); + CORE_TRACEF(("Entering s_UpdateDtabFromRegistry(\"%s\") -- success=%d", + service ? service : "", *success_p)); if ( ! *success_p) { - TOUT("s_UpdateDtabFromRegistry() -- prior no success"); + CORE_TRACE("Leaving s_UpdateDtabFromRegistry() -- prior no success"); return; } @@ -1220,13 +1190,13 @@ static void s_UpdateDtabFromRegistry(char** dtab_p, int* success_p, *success_p = 0; CORE_LOG_X(eNSub_Alloc, eLOG_Critical, "Couldn't alloc for dtab from registry."); - TOUT("s_UpdateDtabFromRegistry() -- bad alloc"); + CORE_TRACE("Leaving s_UpdateDtabFromRegistry() -- bad alloc"); return; } s_UpdateDtab(dtab_p, val, success_p); - TOUT("s_UpdateDtabFromRegistry()"); + CORE_TRACE("Leaving s_UpdateDtabFromRegistry()"); } @@ -1246,7 +1216,7 @@ static int/*bool*/ s_ProcessDtab(SConnNetInfo* net_info) int/*bool*/ success = 1; char* dtab = NULL; - TIN("s_ProcessDtab()"); + CORE_TRACE("Entering s_ProcessDtab()"); /* Dtab precedence (highest first): registry > user_header */ s_UpdateDtabFromRegistry(&dtab, &success, net_info->svc); @@ -1266,7 +1236,7 @@ static int/*bool*/ s_ProcessDtab(SConnNetInfo* net_info) if (dtab) free(dtab); - TOUT("s_ProcessDtab()"); + CORE_TRACE("Leaving s_ProcessDtab()"); return success; #undef DTAB_ARGS_SEP } @@ -1279,13 +1249,13 @@ static EHTTP_HeaderParse s_ParseHeader(const char* header, struct SNAMERD_Data* data = (struct SNAMERD_Data*)((SERV_ITER) iter)->data; int code = 0/*success code if any*/; - TIN1("s_ParseHeader(\"%s\")", header); + CORE_TRACEF(("Entering s_ParseHeader(\"%s\")", header)); if (server_error == 400 || server_error == 403 || server_error == 404) { data->fail = 1/*true*/; } else if (sscanf(header, "%*s %d", &code) < 1) { data->eof = 1/*true*/; - TOUT("s_ParseHeader() -- eof=true"); + CORE_TRACE("Leaving s_ParseHeader() -- eof=true"); return eHTTP_HeaderError; } @@ -1293,7 +1263,7 @@ static EHTTP_HeaderParse s_ParseHeader(const char* header, if (code == 204) data->eof = 1/*true*/; - TOUT("s_ParseHeader()"); + CORE_TRACE("Leaving s_ParseHeader()"); return eHTTP_HeaderSuccess; } @@ -1315,7 +1285,7 @@ static int/*bool*/ s_Resolve(SERV_ITER iter) CONN conn = NULL; int/*bool*/ retval = 0; - TIN("s_Resolve()"); + CORE_TRACE("Entering s_Resolve()"); assert( ! (data->eof | data->fail)); /* Handle DTAB, if present. */ @@ -1327,7 +1297,7 @@ static int/*bool*/ s_Resolve(SERV_ITER iter) } s_CONN_Destroy(&c, &conn); - TOUT("s_Resolve()"); + CORE_TRACE("Leaving s_Resolve()"); return retval; } @@ -1376,9 +1346,9 @@ static int/*bool*/ s_Update(SERV_ITER iter, const char* text, int code) /*struct SNAMERD_Data* data = (struct SNAMERD_Data*) iter->data;*/ int retval = 0; - TIN2("s_Update(\"%s\", %d)", text ? text : "", code); + CORE_TRACEF(("Entering s_Update(\"%s\", %d)", text ? text : "", code)); - TOUT1("s_Update() -- %supdated", retval ? "" : "not "); + CORE_TRACEF(("Leaving s_Update() -- %supdated", retval ? "" : "not ")); return retval; } @@ -1396,12 +1366,12 @@ static SSERV_Info* s_GetNextInfo(SERV_ITER iter, HOST_INFO* host_info) SSERV_Info* info; size_t n; - TIN("s_GetNextInfo()"); + CORE_TRACE("Entering s_GetNextInfo()"); assert(data); if (data->n_cand < 1 && data->done) { data->done = 0; - TOUT("s_GetNextInfo() -- end of candidates"); + CORE_TRACE("Leaving s_GetNextInfo() -- end of candidates"); return NULL; } @@ -1410,7 +1380,7 @@ static SSERV_Info* s_GetNextInfo(SERV_ITER iter, HOST_INFO* host_info) s_Resolve(iter); if (data->n_cand < 1) { data->done = 1; - TOUT("s_GetNextInfo() -- resolved no candidates"); + CORE_TRACE("Leaving s_GetNextInfo() -- resolved no candidates"); return NULL; } } @@ -1427,7 +1397,7 @@ static SSERV_Info* s_GetNextInfo(SERV_ITER iter, HOST_INFO* host_info) if (host_info) *host_info = NULL; - TOUT("s_GetNextInfo()"); + CORE_TRACE("Leaving s_GetNextInfo()"); return info; } @@ -1436,7 +1406,7 @@ static void s_Reset(SERV_ITER iter) { struct SNAMERD_Data* data = (struct SNAMERD_Data*) iter->data; - TIN("s_Reset()"); + CORE_TRACE("Entering s_Reset()"); if (data) { data->eof = data->fail = data->done = 0/*false*/; @@ -1452,7 +1422,7 @@ static void s_Reset(SERV_ITER iter) } } - TOUT("s_Reset()"); + CORE_TRACE("Leaving s_Reset()"); } @@ -1460,7 +1430,7 @@ static void s_Close(SERV_ITER iter) { struct SNAMERD_Data* data = (struct SNAMERD_Data*) iter->data; - TIN("s_Close()"); + CORE_TRACE("Entering s_Close()"); /* Make sure s_Reset() has been called - it frees info structs. */ s_Reset(iter); @@ -1472,7 +1442,7 @@ static void s_Close(SERV_ITER iter) free(data); iter->data = NULL; - TOUT("s_Close()"); + CORE_TRACE("Leaving s_Close()"); } @@ -1487,7 +1457,7 @@ extern const SSERV_VTable* SERV_NAMERD_Open(SERV_ITER iter, struct SNAMERD_Data* data; char namerd_env[32]; - TIN1("SERV_NAMERD_Open(\"%s\")", iter->name); + CORE_TRACEF(("Entering SERV_NAMERD_Open(\"%s\")", iter->name)); s_Init(); @@ -1498,7 +1468,7 @@ extern const SSERV_VTable* SERV_NAMERD_Open(SERV_ITER iter, if ( ! iter->name) { CORE_LOG_X(eNSub_BadData, eLOG_Error, "\"iter->name\" is NULL, not able to continue SERV_NAMERD_Open"); - TOUT("SERV_NAMERD_Open() -- fail"); + CORE_TRACE("Leaving SERV_NAMERD_Open() -- fail, no service name"); return NULL; } assert(iter->name); @@ -1509,7 +1479,7 @@ extern const SSERV_VTable* SERV_NAMERD_Open(SERV_ITER iter, CORE_LOGF_X(eNSub_BadData, eLOG_Error, ("Invalid service name \"%s\" - must not begin with '/'.", iter->name)); - TOUT("SERV_NAMERD_Open() -- fail"); + CORE_TRACE("Leaving SERV_NAMERD_Open() -- fail, catalog prefix"); return NULL; } @@ -1517,14 +1487,14 @@ extern const SSERV_VTable* SERV_NAMERD_Open(SERV_ITER iter, if (iter->ismask) { CORE_LOG_X(eNSub_BadData, eLOG_Error, "NAMERD doesn't support masks."); - TOUT("SERV_NAMERD_Open() -- fail"); + CORE_TRACE("Leaving SERV_NAMERD_Open() -- fail, iter is a mask"); return NULL; } if ( ! (data = (struct SNAMERD_Data*) calloc(1, sizeof(*data)))) { CORE_LOG_X(eNSub_Alloc, eLOG_Critical, "Could not allocate for SNAMERD_Data."); - TOUT("SERV_NAMERD_Open() -- fail"); + CORE_TRACE("Leaving SERV_NAMERD_Open() -- fail, bad alloc"); return NULL; } iter->data = data; @@ -1536,7 +1506,7 @@ extern const SSERV_VTable* SERV_NAMERD_Open(SERV_ITER iter, if ( ! new_net_info) { CORE_LOG_X(eNSub_Alloc, eLOG_Critical, "Couldn't create net_info."); s_Close(iter); - TOUT("SERV_NAMERD_Open() -- fail"); + CORE_TRACE("Leaving SERV_NAMERD_Open() -- fail, no new net_info"); return NULL; } data->net_info = ConnNetInfo_Clone(new_net_info); @@ -1546,7 +1516,7 @@ extern const SSERV_VTable* SERV_NAMERD_Open(SERV_ITER iter, if ( ! data->net_info) { CORE_LOG_X(eNSub_Alloc, eLOG_Critical, "Couldn't clone net_info."); s_Close(iter); - TOUT("SERV_NAMERD_Open() -- fail"); + CORE_TRACE("Leaving SERV_NAMERD_Open() -- fail, no net_info clone"); return NULL; } if (new_net_info) { @@ -1557,7 +1527,7 @@ extern const SSERV_VTable* SERV_NAMERD_Open(SERV_ITER iter, CORE_LOG_X(eNSub_BadData, eLOG_Critical, "Couldn't set up standard args."); s_Close(iter); - TOUT("SERV_NAMERD_Open() -- fail"); + CORE_TRACE("Leaving SERV_NAMERD_Open() -- fail, standard args"); return NULL; } @@ -1578,7 +1548,7 @@ extern const SSERV_VTable* SERV_NAMERD_Open(SERV_ITER iter, &data->net_info->http_proxy_port)) { s_Close(iter); - TOUT("SERV_NAMERD_Open() -- fail"); + CORE_TRACE("Leaving SERV_NAMERD_Open() -- fail, http_proxy"); return NULL; } @@ -1627,14 +1597,14 @@ extern const SSERV_VTable* SERV_NAMERD_Open(SERV_ITER iter, || !(data->net_info->stateless && data->net_info->firewall))) { s_Close(iter); - TOUT("SERV_NAMERD_Open() -- fail"); + CORE_TRACE("Leaving SERV_NAMERD_Open() -- fail, stateless, firewall"); return NULL; } /* call GetNextInfo subsequently if info is actually needed */ if (info) *info = NULL; - TOUT("SERV_NAMERD_Open()"); + CORE_TRACE("Leaving SERV_NAMERD_Open()"); return &s_op; } diff --git a/c++/src/connect/services/json_over_uttp.cpp b/c++/src/connect/services/json_over_uttp.cpp index 058ae545..554c1fe8 100644 --- a/c++/src/connect/services/json_over_uttp.cpp +++ b/c++/src/connect/services/json_over_uttp.cpp @@ -1,4 +1,4 @@ -/* $Id: json_over_uttp.cpp 527718 2017-02-15 17:27:37Z sadyrovr $ +/* $Id: json_over_uttp.cpp 577577 2019-01-07 11:22:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -895,84 +895,90 @@ bool CJsonNode::AsBoolean() const m_Impl.GetPointerOrNull())->m_Boolean; } -static void s_Repr_Value(CNcbiOstrstream& oss, const CJsonNode& node); +static void s_Repr_Value(string& os, const CJsonNode& node); -static void s_Repr_Object(CNcbiOstrstream& oss, const CJsonNode& node) +static void s_Repr_Object(string& os, const CJsonNode& node) { CJsonIterator it = node.Iterate(); if (it) { - oss << '"' << it.GetKey() << "\": "; - s_Repr_Value(oss, *it); + os.append(1, '"') + .append(it.GetKey()) + .append("\": "); + s_Repr_Value(os, *it); while (++it) { - oss << ", \"" << it.GetKey() << "\": "; - s_Repr_Value(oss, *it); + os.append(", \"") + .append(it.GetKey()) + .append("\": "); + s_Repr_Value(os, *it); } } } -static void s_Repr_Array(CNcbiOstrstream& oss, const CJsonNode& node) +static void s_Repr_Array(string& os, const CJsonNode& node) { CJsonIterator it = node.Iterate(); if (it) { - s_Repr_Value(oss, *it); + s_Repr_Value(os, *it); while (++it) { - oss << ", "; - s_Repr_Value(oss, *it); + os.append(", "); + s_Repr_Value(os, *it); } } } -static void s_Repr_Value(CNcbiOstrstream& oss, const CJsonNode& node) +static void s_Repr_Value(string& os, const CJsonNode& node) { switch (node.GetNodeType()) { case CJsonNode::eObject: - oss << '{'; - s_Repr_Object(oss, node); - oss << '}'; + os.append(1, '{'); + s_Repr_Object(os, node); + os.append(1, '}'); break; case CJsonNode::eArray: - oss << '['; - s_Repr_Array(oss, node); - oss << ']'; + os.append(1, '['); + s_Repr_Array(os, node); + os.append(1, ']'); break; case CJsonNode::eString: - oss << '"' << NStr::PrintableString(node.AsString()) << '"'; + os.append(1, '"') + .append(NStr::PrintableString(node.AsString())) + .append(1, '"'); break; case CJsonNode::eInteger: - oss << node.AsInteger(); + os.append(NStr::NumericToString(node.AsInteger())); break; case CJsonNode::eDouble: - oss << node.AsDouble(); + os.append(NStr::DoubleToString(node.AsDouble())); break; case CJsonNode::eBoolean: - oss << (node.AsBoolean() ? "true" : "false"); + os.append(node.AsBoolean() ? "true" : "false"); break; default: /* case CJsonNode::eNull: */ - oss << "null"; + os.append("null"); } } string CJsonNode::Repr(TReprFlags flags) const { - CNcbiOstrstream oss; + string os; switch (GetNodeType()) { case CJsonNode::eObject: if (flags & fOmitOutermostBrackets) - s_Repr_Object(oss, *this); + s_Repr_Object(os, *this); else { - oss << '{'; - s_Repr_Object(oss, *this); - oss << '}'; + os.append(1, '{'); + s_Repr_Object(os, *this); + os.append(1, '}'); } break; case CJsonNode::eArray: if (flags & fOmitOutermostBrackets) - s_Repr_Array(oss, *this); + s_Repr_Array(os, *this); else { - oss << '['; - s_Repr_Array(oss, *this); - oss << ']'; + os.append(1, '['); + s_Repr_Array(os, *this); + os.append(1, ']'); } break; case CJsonNode::eString: @@ -981,10 +987,10 @@ string CJsonNode::Repr(TReprFlags flags) const m_Impl.GetPointerOrNull())->m_String); /* FALL THROUGH */ default: - s_Repr_Value(oss, *this); + s_Repr_Value(os, *this); } - return CNcbiOstrstreamToString(oss); + return os; } #define INVALID_FORMAT_ERROR() \ diff --git a/c++/src/corelib/ncbi_message.cpp b/c++/src/corelib/ncbi_message.cpp index 159a724f..af516277 100644 --- a/c++/src/corelib/ncbi_message.cpp +++ b/c++/src/corelib/ncbi_message.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbi_message.cpp 473941 2015-07-23 17:05:00Z grichenk $ +/* $Id: ncbi_message.cpp 579979 2019-02-08 14:16:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/src/corelib/ncbiapp.cpp b/c++/src/corelib/ncbiapp.cpp index 49b0f14a..3feeaf41 100644 --- a/c++/src/corelib/ncbiapp.cpp +++ b/c++/src/corelib/ncbiapp.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbiapp.cpp 563865 2018-05-16 15:00:13Z ucko $ +/* $Id: ncbiapp.cpp 576975 2018-12-27 12:53:02Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1097,17 +1097,9 @@ void CNcbiApplication::SetStdioFlags(TStdioSetupFlags stdio_flags) void CNcbiApplication::x_SetupStdio(void) { -#if 1//!defined(NCBI_COMPILER_GCC) || NCBI_COMPILER_VERSION >= 411 - // CAUTION: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26777 - // fix applied Mar 29, 2006, scheduled for 4.1.1 - if ((m_StdioFlags & fDefault_SyncWithStdio) == 0) { - // SUN WorkShop STL stream library has significant performance loss - // (due to the multiple gratuitous lseeks() in std i/o) - // when sync_with_stdio is TRUE (default), - // so we turn off sync_with_stdio here. + if ((m_StdioFlags & fNoSyncWithStdio) != 0) { IOS_BASE::sync_with_stdio(false); } -#endif if ((m_StdioFlags & fDefault_CinBufferSize) == 0 #ifdef NCBI_OS_UNIX diff --git a/c++/src/corelib/ncbidiag.cpp b/c++/src/corelib/ncbidiag.cpp index c1bf3b73..72ba6c33 100644 --- a/c++/src/corelib/ncbidiag.cpp +++ b/c++/src/corelib/ncbidiag.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbidiag.cpp 569131 2018-08-16 15:35:26Z grichenk $ +/* $Id: ncbidiag.cpp 575462 2018-11-29 12:51:01Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -78,7 +78,7 @@ DEFINE_STATIC_MUTEX(s_DiagPostMutex); static CSafeStatic s_DiagRWLock(CSafeStaticLifeSpan(CSafeStaticLifeSpan::eLifeSpan_Long, 1)); static CSafeStatic s_ReopenEntered; -DEFINE_STATIC_FAST_MUTEX(s_ApproveMutex); +DEFINE_STATIC_MUTEX(s_ApproveMutex); void g_Diag_Use_RWLock(bool enable) @@ -1268,7 +1268,7 @@ CDiagContext::~CDiagContext(void) void CDiagContext::ResetLogRates(void) { - CFastMutexGuard lock(s_ApproveMutex); + CMutexGuard lock(s_ApproveMutex); m_AppLogRC->Reset(GetLogRate_Limit(eLogRate_App), CTimeSpan((long)GetLogRate_Period(eLogRate_App)), CTimeSpan((long)0), @@ -1305,7 +1305,7 @@ unsigned int CDiagContext::GetLogRate_Limit(ELogRate_Type type) const void CDiagContext::SetLogRate_Limit(ELogRate_Type type, unsigned int limit) { - CFastMutexGuard lock(s_ApproveMutex); + CMutexGuard lock(s_ApproveMutex); switch ( type ) { case eLogRate_App: TAppLogRateLimitParam::SetDefault(limit); @@ -1359,7 +1359,7 @@ unsigned int CDiagContext::GetLogRate_Period(ELogRate_Type type) const void CDiagContext::SetLogRate_Period(ELogRate_Type type, unsigned int period) { - CFastMutexGuard lock(s_ApproveMutex); + CMutexGuard lock(s_ApproveMutex); switch ( type ) { case eLogRate_App: TAppLogRatePeriodParam::SetDefault(period); @@ -1405,7 +1405,7 @@ bool CDiagContext::ApproveMessage(SDiagMessage& msg, bool approved = true; if ( IsSetDiagPostFlag(eDPF_AppLog, msg.m_Flags) ) { if ( m_AppLogRC->IsEnabled() ) { - CFastMutexGuard lock(s_ApproveMutex); + CMutexGuard lock(s_ApproveMutex); approved = m_AppLogRC->Approve(); } if ( approved ) { @@ -1421,7 +1421,7 @@ bool CDiagContext::ApproveMessage(SDiagMessage& msg, case eDiag_Info: case eDiag_Trace: if ( m_TraceLogRC->IsEnabled() ) { - CFastMutexGuard lock(s_ApproveMutex); + CMutexGuard lock(s_ApproveMutex); approved = m_TraceLogRC->Approve(); } if ( approved ) { @@ -1434,7 +1434,7 @@ bool CDiagContext::ApproveMessage(SDiagMessage& msg, break; default: if ( m_ErrLogRC->IsEnabled() ) { - CFastMutexGuard lock(s_ApproveMutex); + CMutexGuard lock(s_ApproveMutex); approved = m_ErrLogRC->Approve(); } if ( approved ) { @@ -1523,12 +1523,12 @@ void CDiagContext::x_CreateUID(void) const } -DEFINE_STATIC_FAST_MUTEX(s_CreateGUIDMutex); +DEFINE_STATIC_MUTEX(s_CreateGUIDMutex); CDiagContext::TUID CDiagContext::GetUID(void) const { if ( !m_UID ) { - CFastMutexGuard guard(s_CreateGUIDMutex); + CMutexGuard guard(s_CreateGUIDMutex); if ( !m_UID ) { x_CreateUID(); } @@ -1702,12 +1702,12 @@ void CDiagContext::SetHostIP(const string& ip) } -DEFINE_STATIC_FAST_MUTEX(s_AppNameMutex); +DEFINE_STATIC_MUTEX(s_AppNameMutex); const string& CDiagContext::GetAppName(void) const { if ( !m_AppNameSet ) { - CFastMutexGuard guard(s_AppNameMutex); + CMutexGuard guard(s_AppNameMutex); if ( !m_AppNameSet ) { m_AppName->SetString(CNcbiApplication::GetAppName()); if (CNcbiApplication::Instance() && !m_AppName->IsEmpty()) { @@ -1735,7 +1735,7 @@ void CDiagContext::SetAppName(const string& app_name) ERR_POST("Application name cannot be changed."); return; } - CFastMutexGuard guard(s_AppNameMutex); + CMutexGuard guard(s_AppNameMutex); m_AppName->SetString(app_name); m_AppNameSet = true; if ( m_AppName->IsEncoded() ) { @@ -2763,11 +2763,11 @@ NCBI_PARAM_DEF_EX(string, Log, Session_Id, "", eParam_NoThread, typedef NCBI_PARAM_TYPE(Log, Session_Id) TParamDefaultSessionId; -DEFINE_STATIC_FAST_MUTEX(s_DefaultSidMutex); +DEFINE_STATIC_MUTEX(s_DefaultSidMutex); string CDiagContext::GetDefaultSessionID(void) const { - CFastMutexGuard lock(s_DefaultSidMutex); + CMutexGuard lock(s_DefaultSidMutex); if (m_DefaultSessionId.get() && !m_DefaultSessionId->IsEmpty()) { return m_DefaultSessionId->GetOriginalString(); } @@ -2791,7 +2791,7 @@ string CDiagContext::GetDefaultSessionID(void) const void CDiagContext::SetDefaultSessionID(const string& session_id) { - CFastMutexGuard lock(s_DefaultSidMutex); + CMutexGuard lock(s_DefaultSidMutex); if ( !m_DefaultSessionId.get() ) { m_DefaultSessionId.reset(new CEncodedString); } @@ -2816,7 +2816,7 @@ string CDiagContext::GetEncodedSessionID(void) const return rctx.GetEncodedSessionID(); } GetDefaultSessionID(); // Make sure the default value is initialized. - CFastMutexGuard lock(s_DefaultSidMutex); + CMutexGuard lock(s_DefaultSidMutex); _ASSERT(m_DefaultSessionId.get()); return m_DefaultSessionId->GetEncodedString(); } @@ -2853,7 +2853,7 @@ NCBI_PARAM_DEF_EX(string, Log, Hit_Id, "", eParam_NoThread, typedef NCBI_PARAM_TYPE(Log, Hit_Id) TParamHitId; -DEFINE_STATIC_FAST_MUTEX(s_DefaultHidMutex); +DEFINE_STATIC_MUTEX(s_DefaultHidMutex); bool CDiagContext::x_DiagAtApplicationLevel(void) const { @@ -2881,21 +2881,21 @@ void CDiagContext::x_LogHitID(void) const void CDiagContext::x_LogHitID_WithLock(void) const { - CFastMutexGuard guard(s_DefaultHidMutex); + CMutexGuard guard(s_DefaultHidMutex); x_LogHitID(); } bool CDiagContext::x_IsSetDefaultHitID(void) const { - CFastMutexGuard guard(s_DefaultHidMutex); + CMutexGuard guard(s_DefaultHidMutex); return m_DefaultHitId.get() && !m_DefaultHitId->Empty(); } CSharedHitId CDiagContext::x_GetDefaultHitID(EDefaultHitIDFlags flag) const { - CFastMutexGuard guard(s_DefaultHidMutex); + CMutexGuard guard(s_DefaultHidMutex); if (m_DefaultHitId.get() && !m_DefaultHitId->Empty()) { return *m_DefaultHitId; } @@ -2960,7 +2960,7 @@ CSharedHitId CDiagContext::x_GetDefaultHitID(EDefaultHitIDFlags flag) const void CDiagContext::SetDefaultHitID(const string& hit_id) { - CFastMutexGuard guard(s_DefaultHidMutex); + CMutexGuard guard(s_DefaultHidMutex); if ( !m_DefaultHitId.get() ) { m_DefaultHitId.reset(new CSharedHitId()); } diff --git a/c++/src/corelib/ncbierror.cpp b/c++/src/corelib/ncbierror.cpp index 24c5ae7c..cff1960e 100644 --- a/c++/src/corelib/ncbierror.cpp +++ b/c++/src/corelib/ncbierror.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbierror.cpp 510803 2016-08-16 13:57:32Z ivanov $ +/* $Id: ncbierror.cpp 579979 2019-02-08 14:16:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/src/corelib/ncbiobj.cpp b/c++/src/corelib/ncbiobj.cpp index e2059d6a..9bf43cab 100644 --- a/c++/src/corelib/ncbiobj.cpp +++ b/c++/src/corelib/ncbiobj.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbiobj.cpp 469441 2015-06-04 14:27:27Z vasilche $ +/* $Id: ncbiobj.cpp 580899 2019-02-22 13:04:26Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -182,7 +182,7 @@ static DECLARE_TLS_VAR(CAtomicCounter::TValue, s_LastNewType); typedef pair TLastNewPtrMultipleInfo; typedef vector TLastNewPtrMultiple; #ifdef NCBI_NO_THREADS -static TLastNewPtrMultiple s_LastNewPtrMultiple; +static TLastNewPtrMultiple* s_LastNewPtrMultiple_ptr; #else static TTlsKey s_LastNewPtrMultiple_key; #endif @@ -199,7 +199,11 @@ static TLastNewPtrMultiple& sx_GetLastNewPtrMultiple(void) { #ifdef NCBI_NO_THREADS - return s_LastNewPtrMultiple; + TLastNewPtrMultiple* set = s_LastNewPtrMultiple_ptr; + if ( !set ) { + s_LastNewPtrMultiple_ptr = set = new TLastNewPtrMultiple(); + } + return *set; #else if ( !s_LastNewPtrMultiple_key ) { DEFINE_STATIC_FAST_MUTEX(s_InitMutex); diff --git a/c++/src/corelib/ncbithr.cpp b/c++/src/corelib/ncbithr.cpp index 7b230ce0..10bfa507 100644 --- a/c++/src/corelib/ncbithr.cpp +++ b/c++/src/corelib/ncbithr.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbithr.cpp 560980 2018-03-29 19:14:24Z gouriano $ +/* $Id: ncbithr.cpp 579979 2019-02-08 14:16:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/src/corelib/ncbitime.cpp b/c++/src/corelib/ncbitime.cpp index 5f669b5e..6eabb486 100644 --- a/c++/src/corelib/ncbitime.cpp +++ b/c++/src/corelib/ncbitime.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbitime.cpp 563844 2018-05-16 11:33:29Z ivanov $ +/* $Id: ncbitime.cpp 579979 2019-02-08 14:16:50Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -76,9 +76,9 @@ BEGIN_NCBI_SCOPE // Protective mutex -DEFINE_STATIC_FAST_MUTEX(s_TimeMutex); -DEFINE_STATIC_FAST_MUTEX(s_TimeAdjustMutex); -DEFINE_STATIC_FAST_MUTEX(s_FastLocalTimeMutex); +DEFINE_STATIC_MUTEX(s_TimeMutex); +DEFINE_STATIC_MUTEX(s_TimeAdjustMutex); +DEFINE_STATIC_MUTEX(s_FastLocalTimeMutex); // Store global time/timespan formats in TLS static CStaticTls s_TlsFormatTime; @@ -1391,7 +1391,7 @@ bool CTime::IsDST(void) const NCBI_THROW(CTimeException, eArgument, "The date is empty"); } // MT-Safe protect - CFastMutexGuard LOCK(s_TimeMutex); + CMutexGuard LOCK(s_TimeMutex); return s_IsDST(*this); } @@ -1402,7 +1402,7 @@ time_t CTime::GetTimeT(void) const NCBI_THROW(CTimeException, eArgument, "The date is empty"); } // MT-Safe protect - CFastMutexGuard LOCK(s_TimeMutex); + CMutexGuard LOCK(s_TimeMutex); return s_GetTimeT(*this); } @@ -1523,7 +1523,7 @@ string CTime::AsString(const CTimeFormat& format, TSeconds out_tz) const } #if !defined(TIMEZONE_IS_UNDEFINED) // MT-Safe protect - CFastMutexGuard LOCK(s_TimeMutex); + CMutexGuard LOCK(s_TimeMutex); #endif const CTime* t = this; @@ -1645,7 +1645,7 @@ string CTime::AsString(const CTimeFormat& format, TSeconds out_tz) const CTime& CTime::x_SetTimeMTSafe(const time_t* value) { // MT-Safe protect - CFastMutexGuard LOCK(s_TimeMutex); + CMutexGuard LOCK(s_TimeMutex); x_SetTime(value); return *this; } @@ -2066,7 +2066,7 @@ CTime& CTime::ToTime(ETimeZone tz) return *this; // MT-Safe protect - CFastMutexGuard LOCK(s_TimeMutex); + CMutexGuard LOCK(s_TimeMutex); #if defined(HAVE_LOCALTIME_R) struct tm temp; @@ -2231,7 +2231,7 @@ string CTime::TimeZoneName(void) return kEmptyStr; } // MT-Safe protect - CFastMutexGuard LOCK(s_TimeMutex); + CMutexGuard LOCK(s_TimeMutex); struct tm* t; #if defined(HAVE_LOCALTIME_R) @@ -2353,7 +2353,7 @@ CTime& CTime::x_AdjustTimeImmediately(const CTime& from, bool shift_time) const int kShiftHours = 4; // MT-Safe protect - CFastMutexGuard LOCK(s_TimeAdjustMutex); + CMutexGuard LOCK(s_TimeAdjustMutex); // Special conversion from to CTime tmp(from); @@ -3903,7 +3903,7 @@ CFastLocalTime::CFastLocalTime(unsigned int sec_after_hour) { #if !defined(TIMEZONE_IS_UNDEFINED) // MT-Safe protect: use CTime locking mutex - CFastMutexGuard LOCK(s_TimeMutex); + CMutexGuard LOCK(s_TimeMutex); m_Timezone = (int)TimeZone(); m_Daylight = Daylight(); #endif @@ -3932,7 +3932,7 @@ bool CFastLocalTime::x_Tuneup(time_t timer, long nanosec) return false; // MT-Safe protect: use CTime locking mutex - CFastMutexGuard LOCK(s_TimeMutex); + CMutexGuard LOCK(s_TimeMutex); m_TunedTime.x_SetTime(&timer); m_TunedTime.SetNanoSecond(nanosec); @@ -3944,7 +3944,7 @@ bool CFastLocalTime::x_Tuneup(time_t timer, long nanosec) LOCK.Release(); // Copy tuned time to cached local time - CFastMutexGuard FLT_LOCK(s_FastLocalTimeMutex); + CMutexGuard FLT_LOCK(s_FastLocalTimeMutex); m_LastTuneupTime = timer; m_LocalTime = m_TunedTime; m_LastSysTime = m_LastTuneupTime; @@ -3958,7 +3958,7 @@ bool CFastLocalTime::x_Tuneup(time_t timer, long nanosec) CTime CFastLocalTime::GetLocalTime(void) { - CFastMutexGuard LOCK(eEmptyGuard); + CMutexGuard LOCK(eEmptyGuard); retry: // Get system time @@ -3975,7 +3975,7 @@ retry: int x_daylight = Daylight(); {{ // MT-Safe protect: use CTime locking mutex - CFastMutexGuard LOCK_TM(s_TimeMutex); + CMutexGuard LOCK_TM(s_TimeMutex); x_timezone = TimeZone(); x_daylight = Daylight(); }} @@ -4027,7 +4027,7 @@ int CFastLocalTime::GetLocalTimezone(void) int x_daylight = Daylight(); {{ // MT-Safe protect: use CTime locking mutex - CFastMutexGuard LOCK(s_TimeMutex); + CMutexGuard LOCK(s_TimeMutex); x_timezone = TimeZone(); x_daylight = Daylight(); }} diff --git a/c++/src/misc/CMakeLists.txt b/c++/src/misc/CMakeLists.txt index de9b08e8..641a0b87 100644 --- a/c++/src/misc/CMakeLists.txt +++ b/c++/src/misc/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeLists.txt 565539 2018-06-13 16:19:50Z gouriano $ +# $Id: CMakeLists.txt 581707 2019-03-05 13:56:05Z gouriano $ ############################################################################# @@ -7,8 +7,8 @@ NCBI_add_subdirectory( third_party third_party_static clog grid_cgi xmlwrapp eutils_client hydra_client discrepancy xmlreaders hgvs netstorage jsonwrapp biosample_util data_loaders_util - lapackwrapp cgi_redirect -# pmcidconv_client grpc_integration + lapackwrapp cgi_redirect pmcidconv_client +# grpc_integration ) if(OFF) diff --git a/c++/src/misc/Makefile.in b/c++/src/misc/Makefile.in index bc6e7cab..224289b2 100644 --- a/c++/src/misc/Makefile.in +++ b/c++/src/misc/Makefile.in @@ -1,10 +1,10 @@ -# $Id: Makefile.in 542115 2017-07-26 15:20:27Z ucko $ +# $Id: Makefile.in 581418 2019-02-28 18:50:25Z dobronad $ SUB_PROJ = third_party third_party_static clog grid_cgi xmlwrapp \ eutils_client hydra_client discrepancy xmlreaders \ hgvs netstorage jsonwrapp biosample_util \ data_loaders_util lapackwrapp pmcidconv_client \ - grpc_integration + grpc_integration fix_pub EXPENDABLE_SUB_PROJ = cgi_redirect srcdir = @srcdir@ diff --git a/c++/src/misc/jsonwrapp/CMakeLists.txt b/c++/src/misc/jsonwrapp/CMakeLists.txt new file mode 100644 index 00000000..f9063d58 --- /dev/null +++ b/c++/src/misc/jsonwrapp/CMakeLists.txt @@ -0,0 +1,14 @@ +############################################################################# +# $Id: CMakeLists.txt 565539 2018-06-13 16:19:50Z gouriano $ +############################################################################# + + +NCBI_add_subdirectory(test) + +# CMakeLists.txt autogenerated from /export/home/dicuccio/cpp-cmake/cpp-cmake.2015-01-24/src/misc/jsonwrapp/Makefile.in +# + +# Include projects from this directory + +# Recurse subdirectories +#add_subdirectory(test ) diff --git a/c++/src/misc/jsonwrapp/Makefile.in b/c++/src/misc/jsonwrapp/Makefile.in new file mode 100644 index 00000000..3afe5709 --- /dev/null +++ b/c++/src/misc/jsonwrapp/Makefile.in @@ -0,0 +1,6 @@ +# $Id: Makefile.in 407599 2013-07-23 13:07:19Z gouriano $ + +SUB_PROJ = test + +srcdir = @srcdir@ +include @builddir@/Makefile.meta diff --git a/c++/src/misc/jsonwrapp/test/CMakeLists.test_jsonwrapp.app.txt b/c++/src/misc/jsonwrapp/test/CMakeLists.test_jsonwrapp.app.txt new file mode 100644 index 00000000..ab3b8fcd --- /dev/null +++ b/c++/src/misc/jsonwrapp/test/CMakeLists.test_jsonwrapp.app.txt @@ -0,0 +1,29 @@ +############################################################################# +# $Id: CMakeLists.test_jsonwrapp.app.txt 565539 2018-06-13 16:19:50Z gouriano $ +############################################################################# + +NCBI_begin_app(test_jsonwrapp) + NCBI_sources(test_jsonwrapp) + NCBI_requires(Boost.Test.Included) + NCBI_uses_toolkit_libraries(test_boost) + NCBI_project_tags(test) + NCBI_project_watchers(gouriano) + NCBI_add_test() +NCBI_end_app() + +if(OFF) +# +# +# +add_executable(test_jsonwrapp-app + test_jsonwrapp +) + +set_target_properties(test_jsonwrapp-app PROPERTIES OUTPUT_NAME test_jsonwrapp) + + +target_link_libraries(test_jsonwrapp-app + test_boost +) +endif() + diff --git a/c++/src/misc/jsonwrapp/test/CMakeLists.test_jsonwrapp_old.app.txt b/c++/src/misc/jsonwrapp/test/CMakeLists.test_jsonwrapp_old.app.txt new file mode 100644 index 00000000..a082509d --- /dev/null +++ b/c++/src/misc/jsonwrapp/test/CMakeLists.test_jsonwrapp_old.app.txt @@ -0,0 +1,14 @@ +# +# +# +add_executable(test_jsonwrapp_old-app + test_jsonwrapp_old +) + +set_target_properties(test_jsonwrapp_old-app PROPERTIES OUTPUT_NAME test_jsonwrapp_old) + + +target_link_libraries(test_jsonwrapp_old-app + test_boost xncbi +) + diff --git a/c++/src/misc/jsonwrapp/test/CMakeLists.txt b/c++/src/misc/jsonwrapp/test/CMakeLists.txt new file mode 100644 index 00000000..ca08219f --- /dev/null +++ b/c++/src/misc/jsonwrapp/test/CMakeLists.txt @@ -0,0 +1,14 @@ +############################################################################# +# $Id: CMakeLists.txt 565539 2018-06-13 16:19:50Z gouriano $ +############################################################################# + +NCBI_add_app(test_jsonwrapp) + +if(OFF) +# CMakeLists.txt autogenerated from /export/home/dicuccio/cpp-cmake/cpp-cmake.2015-01-24/src/misc/jsonwrapp/test/Makefile.in +# + +# Include projects from this directory +include(CMakeLists.test_jsonwrapp.app.txt) +endif() + diff --git a/c++/src/misc/jsonwrapp/test/Makefile.in b/c++/src/misc/jsonwrapp/test/Makefile.in new file mode 100644 index 00000000..2e8d671c --- /dev/null +++ b/c++/src/misc/jsonwrapp/test/Makefile.in @@ -0,0 +1,6 @@ +# $Id: Makefile.in 528470 2017-02-23 14:33:44Z gouriano $ + +APP_PROJ = test_jsonwrapp test_jsonwrapp10 + +srcdir = @srcdir@ +include @builddir@/Makefile.meta diff --git a/c++/src/misc/jsonwrapp/test/Makefile.test_jsonwrapp.app b/c++/src/misc/jsonwrapp/test/Makefile.test_jsonwrapp.app new file mode 100644 index 00000000..460e69ad --- /dev/null +++ b/c++/src/misc/jsonwrapp/test/Makefile.test_jsonwrapp.app @@ -0,0 +1,12 @@ +APP = test_jsonwrapp +SRC = test_jsonwrapp + +CPPFLAGS = $(BOOST_INCLUDE) $(ORIG_CPPFLAGS) +LIB = test_boost xncbi + +REQUIRES = Boost.Test.Included + +PROJ_TAG = test +CHECK_CMD = + +WATCHERS = gouriano diff --git a/c++/src/misc/jsonwrapp/test/Makefile.test_jsonwrapp10.app b/c++/src/misc/jsonwrapp/test/Makefile.test_jsonwrapp10.app new file mode 100644 index 00000000..3a762712 --- /dev/null +++ b/c++/src/misc/jsonwrapp/test/Makefile.test_jsonwrapp10.app @@ -0,0 +1,12 @@ +APP = test_jsonwrapp10 +SRC = test_jsonwrapp10 + +CPPFLAGS = $(BOOST_INCLUDE) $(ORIG_CPPFLAGS) +LIB = test_boost xncbi + +REQUIRES = Boost.Test.Included + +PROJ_TAG = test +CHECK_CMD = + +WATCHERS = gouriano diff --git a/c++/src/misc/jsonwrapp/test/test_jsonwrapp.cpp b/c++/src/misc/jsonwrapp/test/test_jsonwrapp.cpp new file mode 100644 index 00000000..bc27f895 --- /dev/null +++ b/c++/src/misc/jsonwrapp/test/test_jsonwrapp.cpp @@ -0,0 +1,1050 @@ + +/* $Id: test_jsonwrapp.cpp 542033 2017-07-25 15:17:01Z gouriano $ +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Author: Andrei Gourianov +* +* File Description: +* JSON Wrapper API test +*/ + +#include +#include +#include + +#define BOOST_AUTO_TEST_MAIN +#include + +#include /* This header must go last */ + + +USING_NCBI_SCOPE; + + +void Printout(size_t offset, const CJson_ConstNode& node) +{ + CJson_Node::EJsonType type = node.GetType(); + string soff(offset,' '); + + cout << "type: "; + switch (type) { + default: cout << "UNKNOWN";break; + case CJson_Node::eNull: cout << "null"; break; + case CJson_Node::eBool: cout << "bool"; break; + case CJson_Node::eObject: cout << "object"; break; + case CJson_Node::eArray: cout << "array"; break; + case CJson_Node::eString: cout << "string"; break; + case CJson_Node::eNumber: cout << "number"; break; + } + cout << "; "; + + cout << "value: "; + if (node.IsValue()) { + CJson_ConstValue value( node.GetValue() ); + cout << node.ToString(); + if (value.IsNull()) { + cout << "null"; + } + if (value.IsBool()) { + cout << (value.GetBool() ? "true" : "false"); + } + if (value.IsNumber()) { + cout << "number"; + } + if (value.IsInt4()) { + cout << " " << value.GetInt4(); + } + if (value.IsUint4()) { + cout << " " << value.GetUint4(); + } + if (value.IsInt8()) { + cout << " " << value.GetInt8(); + } + if (value.IsUint8()) { + cout << " " << value.GetUint8(); + } + if (value.IsDouble()) { + cout << " " << value.GetDouble(); + } + if (value.IsString()) { + cout << " " << value.GetString(); + } + } + if (node.IsObject()) { + cout << "{ " << endl; + CJson_ConstObject obj = node.GetObject(); + CJson_Object::const_iterator mi = obj.begin(); + CJson_Object::const_iterator me = obj.end(); + for( ; mi != me; ++mi) { + CJson_ConstNode tt( mi->value); +// tt.SetNull(); + cout << soff; + cout << "name: " << mi->name << "; "; + Printout( offset+2, tt ); + } + cout << "}"; + } + if (node.IsArray()) { + cout << "[ " << endl << soff; + CJson_ConstArray arr( node.GetArray()); + for (CJson_Array::const_iterator i = arr.begin(); + i != arr.end(); i++) { + Printout( offset+2, *i ); + } + for (size_t i = 0; i < arr.size(); i++) { + Printout( offset+2, arr[i] ); + } + cout << "]"; + } + cout << endl; +} + + +class CCrawler : public CJson_WalkHandler +{ +public: + CCrawler(int test=0) : m_TestNo(test) { + } + ~CCrawler() {} + + virtual bool BeginObject(const std::string& name) { + std::cout << "begin_object " << name + << std::endl + << ", JPath = " << GetCurrentJPath() + << ", JPointer = " << GetCurrentJPointer() + << std::endl; + if (m_TestNo == 1) + { + if (name == "obj3") { + CJson_Document da; + Read(da); + cout << da; + } + } + return true; + } + virtual bool BeginObjectMember(const std::string& name, + const std::string& member) { + std::cout << "begin_object_member " << name << "." << member + << std::endl + << ", JPath = " << GetCurrentJPath() + << ", JPointer = " << GetCurrentJPointer() + << std::endl; + + return true; + } + virtual bool PlainMemberValue(const std::string& name, + const std::string& member, + const CJson_ConstValue& value) { + std::cout << "plain_member_value " << name << "." << member + << " = " << value.ToString() + << std::endl + << ", JPath = " << GetCurrentJPath() + << ", JPointer = " << GetCurrentJPointer() + << std::endl; + return true; + } + virtual bool EndObject(const std::string& name) { + std::cout << "end_object " << name + << std::endl + << ", JPath = " << GetCurrentJPath() + << ", JPointer = " << GetCurrentJPointer() + << std::endl; + return true; + } + + virtual bool BeginArray(const std::string& name) { + std::cout << "begin_array " << name + << std::endl + << ", JPath = " << GetCurrentJPath() + << ", JPointer = " << GetCurrentJPointer() + << std::endl; + if (m_TestNo == 2) + { + if (name == "array") { + CJson_Document da; + Read(da); + cout << da; + } + } + if (m_TestNo == 3) + { + // abort parsing + return false; + } + return true; + } + virtual bool BeginArrayElement(const std::string& name, + size_t index) { + std::cout << "begin_array_element " << name << "[" + << index << "]" + << std::endl + << ", JPath = " << GetCurrentJPath() + << ", JPointer = " << GetCurrentJPointer() + << std::endl; + return true; + } + virtual bool PlainElementValue(const std::string& name, + size_t index, + const CJson_ConstValue& value) { + std::cout << "plain_element_value " << name << "[" + << index << "] = " << value.ToString() + << std::endl + << ", JPath = " << GetCurrentJPath() + << ", JPointer = " << GetCurrentJPointer() + << std::endl; + return true; + } + virtual bool EndArray(const std::string& name) { + std::cout << "end_array " << name + << std::endl + << ", JPath = " << GetCurrentJPath() + << ", JPointer = " << GetCurrentJPointer() + << std::endl; + return true; + } +private: + int m_TestNo; +}; + +BOOST_AUTO_TEST_CASE(s_JsonWrapp) +{ + cout << "Size of data object is " << sizeof(rapidjson::Value) << endl; + + CJson_Document doc(CJson_Value::eObject); + CJson_Object obj( doc.SetObject()); +//will not compile +#if 0 + CJson_Node tn; + CJson_Value tv; + CJson_Array ta; + CJson_Object to; +#endif + +// -------------------------------------------------------------------------- +// add/delete elements into object + { + obj["bool"].SetValue().SetBool(true); + obj["int4"].SetValue().SetInt4(4); + BOOST_CHECK(obj.size() == 2); + obj.erase( obj.begin()); + BOOST_CHECK(obj.size() == 1); + obj.erase( obj.begin(), obj.end()); + BOOST_CHECK(obj.empty()); + + CJson_ConstObject o2(obj); +//will not compile +#if 0 + o2["bool"].IsValue(); +#endif + try { + o2.at("bool").IsValue(); + BOOST_CHECK(false); + } + catch (std::exception& e) { + cout << e.what() << endl;; + } + } + + +// -------------------------------------------------------------------------- +// add elements into object + obj.insert("null"); + BOOST_CHECK(obj["null"].IsNull()); + BOOST_CHECK(!obj.empty()); + BOOST_CHECK(obj.find("null") == obj.begin()); + + { + CJson_Node n1 = obj["null"]; + CJson_ConstNode n2( obj["null"]); + n2 = n1; +//will not compile +#if 0 + CJson_Node n3(n2); + n1 = n2; +#endif + } + + obj.insert("bool", true); + BOOST_CHECK(obj["bool"].IsValue()); + BOOST_CHECK(obj["bool"].GetValue().IsBool()); + BOOST_CHECK(obj["bool"].GetValue().GetBool()); + BOOST_CHECK(obj.size() == 2); + + obj.insert("int4", 4); + BOOST_CHECK(obj["int4"].IsValue()); + BOOST_CHECK(obj["int4"].GetValue().IsNumber()); + BOOST_CHECK(obj["int4"].GetValue().IsInt4()); + BOOST_CHECK(obj["int4"].GetValue().IsInt8()); + BOOST_CHECK(obj["int4"].GetValue().GetInt4() == 4); + + obj.insert("uint4", Uint4(4)); + BOOST_CHECK(obj["uint4"].IsValue()); + BOOST_CHECK(obj["uint4"].GetValue().IsNumber()); + BOOST_CHECK(obj["uint4"].GetValue().IsUint4()); + BOOST_CHECK(obj["uint4"].GetValue().IsUint8()); + BOOST_CHECK(obj["uint4"].GetValue().GetUint4() == 4); + + obj.insert("int8", Int8(-8)); + BOOST_CHECK(obj["int8"].IsValue()); + BOOST_CHECK(obj["int8"].GetValue().IsNumber()); + BOOST_CHECK(obj["int8"].GetValue().IsInt4()); + BOOST_CHECK(obj["int8"].GetValue().IsInt8()); + BOOST_CHECK(obj["int8"].GetValue().GetInt8() == -8); + + obj.insert("uint8", Uint8(8)); + BOOST_CHECK(obj["uint8"].IsValue()); + BOOST_CHECK(obj["uint8"].GetValue().IsNumber()); + BOOST_CHECK(obj["uint8"].GetValue().IsUint4()); + BOOST_CHECK(obj["uint8"].GetValue().IsUint8()); + BOOST_CHECK(obj["uint8"].GetValue().GetUint8() == 8); + + obj["double"].SetValue().SetDouble(12.34); + BOOST_CHECK(obj["double"].GetValue().IsNumber()); + BOOST_CHECK(obj["double"].GetValue().IsDouble()); + BOOST_CHECK(!obj["double"].GetValue().IsUint4()); + BOOST_CHECK(obj["double"].GetValue().GetDouble() == 12.34); + + obj.insert("float", float(34)); + BOOST_CHECK(obj["float"].GetValue().IsNumber()); + BOOST_CHECK(obj["float"].GetValue().IsDouble()); + BOOST_CHECK(!obj["float"].GetValue().IsInt4()); + + obj.insert("string", "stringvalue"); + BOOST_CHECK(!obj["string"].GetValue().IsNumber()); + BOOST_CHECK(obj["string"].GetValue().IsString()); + obj.erase("string"); + BOOST_CHECK(!obj.has("string")); + obj["string"].SetValue().SetString("stringvalue"); + BOOST_CHECK(obj.has("string")); + BOOST_CHECK(obj.size() == 9); + + BOOST_CHECK(obj.find("bool") != obj.end()); + BOOST_CHECK(obj.find("double") != obj.end()); + + { + CJson_Object o2 = obj.insert_object("obj2"); + o2.insert("one", 1); + + CJson_Object o3 = o2.insert_object("obj3"); + BOOST_CHECK(!o3.IsNull()); + o3.insert("two", 2); + CJson_Array a1 = o3.insert_array("array"); + BOOST_CHECK(!a1.IsNull()); + +// -------------------------------------------------------------------------- +// add/delete elements into array + a1.push_back(1); + a1.erase(a1.begin()); + BOOST_CHECK(a1.empty()); + a1.push_back(1); + a1.push_back("two"); + a1.push_back(false); + CJson_Array::iterator ee = a1.erase(a1.begin(), a1.begin()+2); + BOOST_CHECK(ee->IsValue()); + BOOST_CHECK(ee->GetValue().IsBool()); + a1.clear(); + BOOST_CHECK(a1.empty()); + +// -------------------------------------------------------------------------- +// add/ elements into array + a1.push_back(1); + a1.push_back("two"); + a1.push_back(false); + a1.push_back(); + CJson_Array a2 = a1.push_back_array(); + a2.push_back(2); + a2.push_back("three"); + + CJson_Object o4 = a1.push_back_object(); + o4["one"].SetValue().SetInt4(1); + o4["two"].SetValue().SetString("2"); + a1.push_back("last"); + } + + BOOST_CHECK(obj["obj2"].IsObject()); + BOOST_CHECK(obj.has("null")); + obj.erase("null"); + BOOST_CHECK(!obj.has("null")); + obj.insert("null"); + BOOST_CHECK(obj.has("null")); + +// -------------------------------------------------------------------------- +// object iterators + { + CJson_Object::iterator ci; + CJson_Object::iterator ci0 = obj.begin(); + ci = obj.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Object::iterator ci1(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + } + { + CJson_Object::const_iterator ci; + CJson_Object::const_iterator ci0 = obj.begin(); + ci = obj.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Object::const_iterator ci1(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + } + { + CJson_Object::const_iterator ci; + CJson_Object::iterator ci0 = obj.begin(); + ci = obj.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Object::const_iterator ci1(ci); +// will not compile +// CJson_Object::iterator ci2(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + } + +// -------------------------------------------------------------------------- +// traversing object elements + for (CJson_Object::iterator i = obj.begin(); i != obj.end(); ++i) { + CJson_Node v = i->value; + cout << i->name << endl; + } + for (CJson_ConstObject::const_iterator i = obj.begin(); i != obj.end(); ++i) { + CJson_ConstNode vc = i->value; +// will not compile +// CJson_Node v = i->value; + cout << i->name << endl; + } + for (CJson_ConstObject::const_iterator j = obj.begin(); j != obj.end(); ++j) { + cout << j->name << ": " << j->value << endl; + } + ITERATE (CJson_ConstObject, j, obj) { + cout << j->name << ": " << j->value << endl; + } + ITERATE (CJson_Object, j, obj) { + cout << j->name << ": " << j->value << endl; + } + NON_CONST_ITERATE (CJson_ConstObject, j, obj) { + cout << j->name << ": " << j->value << endl; + } + NON_CONST_ITERATE (CJson_Object, j, obj) { + cout << j->name << ": " << j->value << endl; + } + { + CJson_ConstObject::const_iterator j = obj.find("bool"); + if (j != obj.end()) { + cout << j->name << ": " << j->value << endl; + } + } +#if NCBI_HAVE_CXX11 + for_each(obj.begin(), obj.end(), [](const CJson_ConstObject::const_iterator::pair& v) { + cout << v.name << ": " << v.value << endl; + }); + for_each(obj.begin(), obj.end(), [](const CJson_ConstObject_pair& v) { + cout << v.name << ": " << v.value << endl; + }); + for_each(obj.begin(), obj.end(), [](CJson_Object_pair& v) { + cout << v.name << ": " << v.value << endl; + }); + for(const CJson_ConstObject::const_iterator::pair& v : obj) { + cout << v.name << ": " << v.value << endl; + } + for(const CJson_ConstObject_pair& v : obj) { + cout << v.name << ": " << v.value << endl; + } + for(CJson_Object::iterator::pair& v : obj) { + cout << v.name << ": " << v.value << endl; + } + for(CJson_Object_pair& v : obj) { + cout << v.name << ": " << v.value << endl; + } +#endif + +// -------------------------------------------------------------------------- +// array element access + CJson_Array arr = + obj["obj2"].SetObject().at("obj3").SetObject().at("array").SetArray(); + + BOOST_CHECK(!arr.empty()); + BOOST_CHECK(arr.size() == 7); + BOOST_CHECK(arr.back().GetValue().IsString()); + arr.push_back(); + BOOST_CHECK(arr.size() == 8); + BOOST_CHECK(arr.back().IsNull()); + arr.pop_back(); + BOOST_CHECK(arr.size() == 7); + BOOST_CHECK(arr.back().GetValue().IsString()); + BOOST_CHECK(arr.front().GetValue().IsNumber()); + BOOST_CHECK(arr[2].GetValue().IsBool()); + BOOST_CHECK( + obj["obj2"].GetObject().at("obj3").GetObject().at("array"). + GetArray().at(1).GetValue().IsString()); + BOOST_CHECK(obj["obj2"].SetObject()["obj3"].SetObject()["array"].GetArray()[1].GetValue().IsString()); + BOOST_CHECK(obj["obj2"].GetObject()["obj3"].GetObject()["array"].GetArray()[1].GetValue().IsString()); + try { + arr.at(14).IsObject(); + BOOST_CHECK(false); + } + catch (std::exception& e) { + cout << e.what() << endl;; + } + +// -------------------------------------------------------------------------- +// array iterators + { + CJson_Array::iterator ci; + CJson_Array::iterator ci0 = arr.begin(); + ci = ci0 + 2; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci - ci0 == 2); + ci -= 2; + BOOST_CHECK(ci == ci0); + ci = arr.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Array::iterator ci1(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + } + { + CJson_Array::const_iterator ci; + CJson_Array::const_iterator ci0 = arr.begin(); + ci = ci0 + 2; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci - ci0 == 2); + ci -= 2; + BOOST_CHECK(ci == ci0); + ci = arr.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Array::const_iterator ci1(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + } + { + CJson_Array::const_iterator ci; + CJson_Array::iterator ci0 = arr.begin(); + ci = ci0 + 2; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci - ci0 == 2); + ci -= 2; + BOOST_CHECK(ci == ci0); + ci = arr.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Array::const_iterator ci1(ci); +// will not compile +// CJson_Array::iterator ci2(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); +} + +// -------------------------------------------------------------------------- +// traversing array elements + for (CJson_Array::iterator i = arr.begin(); i != arr.end(); ++i) { + CJson_Node v = *i; + if (i->IsValue() && i->GetValue().IsString()) { + cout << v.GetValue().GetString() << endl; + } + } + for (CJson_ConstArray::const_iterator i = arr.begin(); i != arr.end(); ++i) { +// will not compile + //CJson_Node v = *i; + if (i->IsValue() && i->GetValue().IsString()) { + cout << i->GetValue().GetString() << endl; + } + } + ITERATE (CJson_ConstArray, i, arr) { + cout << *i; + } + ERASE_ITERATE (CJson_ConstArray, i, arr) { + cout << *i; + } + ITERATE (CJson_Array, i, arr) { + cout << *i; + } + NON_CONST_ITERATE (CJson_ConstArray, i, arr) { + cout << *i; + } + NON_CONST_ITERATE (CJson_Array, i, arr) { + cout << *i; + } +#if NCBI_HAVE_CXX11 + for_each(arr.begin(), arr.end(), [](const CJson_ConstNode& v) { + Printout(0,v); + }); + for(const CJson_ConstNode& v: arr) { + cout << v; + } + for(CJson_Node& v: arr) { + cout << v; + } +#endif + + + { +// -------------------------------------------------------------------------- +// new document from UTF8 string + CJson_Document docs("{\"null\": null, \"bool\": true, \"str\": \"str\"}"); + cout << docs; + BOOST_CHECK(docs.ReadSucceeded()); + BOOST_CHECK(docs.IsObject()); + BOOST_CHECK(docs.GetObject().at("bool").IsValue()); + BOOST_CHECK(docs.GetObject().at("bool").GetValue().IsBool()); + BOOST_CHECK(docs.GetObject().at("bool").GetValue().GetBool()); + + docs.ParseString("[\"utf8 string expected\", false, null ]"); + cout << docs; + BOOST_CHECK(docs.ReadSucceeded()); + BOOST_CHECK(docs.IsArray()); + BOOST_CHECK(docs.GetArray().size() == 3); + BOOST_CHECK(docs.GetArray().at(1).IsValue()); + BOOST_CHECK(docs.GetArray().at(1).GetValue().IsBool()); + BOOST_CHECK(!docs.GetArray().at(1).GetValue().GetBool()); + +// -------------------------------------------------------------------------- +// new document from array + CJson_Document doc0(arr); + cout << arr; + cout << doc0; + +// -------------------------------------------------------------------------- +// copy array into another array + CJson_Document doc1(CJson_Value::eArray); + CJson_Array arrDst = doc1.SetArray(); + copy(arr.begin(), arr.end(), back_inserter(arrDst)); + BOOST_CHECK(arr == arrDst); + BOOST_CHECK(arrDst == arr); + + CJson_Document doc2(CJson_Value::eArray); + CJson_Array arrDst2 = doc2.SetArray(); + arrDst2.AssignCopy(arr); + BOOST_CHECK(arr == arrDst2); + BOOST_CHECK(arrDst == arrDst2); + +// -------------------------------------------------------------------------- +// find an element in array and change it + CJson_Array::iterator fi = find(arr.begin(), arr.end(), + CJson_Document(CJson_Node::eBool).SetValue().SetBool(false)); + BOOST_CHECK(fi != arr.end()); + BOOST_CHECK(fi->IsValue()); + BOOST_CHECK(fi->GetValue().IsBool()); + BOOST_CHECK(!fi->GetValue().GetBool()); + fi->SetValue().SetBool(true); + BOOST_CHECK(fi->GetValue().GetBool()); + + BOOST_CHECK(1 == count(arr.begin(), arr.end(), + CJson_Document(CJson_Node::eBool).SetValue().SetBool(true))); +#if NCBI_HAVE_CXX11 + for_each(arrDst2.begin(), arrDst2.end(), [](CJson_Node& v) { + v.SetNull(); + }); + BOOST_CHECK(arrDst2.size() == + count(arrDst2.begin(), arrDst2.end(), CJson_Document(CJson_Node::eNull))); + BOOST_CHECK(all_of(arrDst2.begin(), arrDst2.end(), + [](const CJson_ConstNode& v)->bool {return v.IsNull();})); +#endif +// -------------------------------------------------------------------------- +// push_back array into array + arrDst.push_back( arr); + cout << arr; + cout << arrDst; + +// -------------------------------------------------------------------------- +// copy object into another object + CJson_Document doc3(CJson_Value::eObject); + doc3.SetObject().AssignCopy( doc.GetObject()); + BOOST_CHECK(doc == doc3); + BOOST_CHECK(doc.SetObject() == doc3.SetObject()); + BOOST_CHECK(doc.GetObject() == doc3.SetObject()); + cout << doc; + cout << doc3; + +// -------------------------------------------------------------------------- +// create copy of a document + CJson_Document cpy(doc); + BOOST_CHECK(doc == cpy); + BOOST_CHECK(doc.SetObject() == cpy.SetObject()); + BOOST_CHECK(doc.GetObject() == cpy.GetObject()); + + CJson_Document cpy2; + cpy2 = doc; + BOOST_CHECK(cpy2 == cpy); + +// -------------------------------------------------------------------------- +// insert array into object + cpy.SetObject().insert("arr", arr); + cout << cpy; + cout << arr; + +// -------------------------------------------------------------------------- +// insert object into object + cpy.SetObject().insert("ooo", cpy.GetObject()); + cout << cpy; + } + +// -------------------------------------------------------------------------- +// serialization + string filename( CDirEntry::GetTmpName() ); + for (CJson_Array::const_iterator i = arr.begin(); i != arr.end(); ++i) { + CJson_Document doct(*i); + ofstream ofs(filename.c_str()); + ofs << doct; + ofs.close(); + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CJson_Document doct2; + ifs >> doct2; + BOOST_CHECK(doct == doct2); + } + } + { + ofstream ofs(filename.c_str()); + ofs << doc; + } + +// -------------------------------------------------------------------------- +// SAX parsing + { + CCrawler wlk; + doc.Walk(wlk); + } + { + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CCrawler wlk2(0); + CJson_Document::Walk(ifs,wlk2); + } + } + { + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CCrawler wlk2(1); + CJson_Document::Walk(ifs,wlk2); + } + } + { + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CCrawler wlk2(2); + CJson_Document::Walk(ifs,wlk2); + } + } + { + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CCrawler wlk3(3); + CJson_Document::Walk(ifs,wlk3); + } + } + +// -------------------------------------------------------------------------- +// serialization + { + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CJson_Document doc2; + ifs >> doc2; + } + } + CFile(filename).Remove(); + +// -------------------------------------------------------------------------- +// JSON schema + { + string str_schema( + "{\"$schema\": \"http://json-schema.org/draft-04/schema#\",\"title\": \"Product\",\"type\": \"object\",\"properties\": {" + "\"id\": {\"type\": \"integer\"}," + "\"name\": {\"type\": \"string\"}," + "\"price\":{\"type\": \"number\", \"minimum\": 0, \"exclusiveMinimum\": true}," + "\"tags\": {\"type\": \"array\",\"items\": {\"type\": \"string\"},\"minItems\": 1,\"uniqueItems\": true }" + "},\"required\": [\"id\", \"name\", \"price\"]}" + ); + CJson_Document sd(str_schema); + CJson_Schema sc(sd); + CJson_Document testdoc; + testdoc.SetObject().insert("id", 1); + testdoc.SetObject().insert("name", "objname"); + testdoc.SetObject().insert("price", 1.23); + CJson_Array ar = testdoc.SetObject().insert_array("tags"); + ar.push_back("one"); + ar.push_back("two"); + cout << testdoc << endl; + + BOOST_CHECK(sc.Validate(testdoc)); + if (!sc.IsValid()) { + cout << sc.GetValidationError() << endl; + } + BOOST_CHECK(testdoc.Write(filename, sc)); + BOOST_CHECK(testdoc.Read(filename, sc)); + cout << testdoc << endl; + + testdoc.SetObject().at("id").SetValue().SetBool(true); + BOOST_CHECK(!sc.Validate(testdoc)); + cout << sc.GetValidationError() << endl; + BOOST_CHECK(!testdoc.Write(filename, sc)); + cout << sc.GetValidationError() << endl; + + testdoc.SetObject().at("id").SetValue().SetInt4(1); + testdoc.SetObject().at("tags").SetArray().push_back(true); + BOOST_CHECK(!sc.Validate(testdoc)); + cout << sc.GetValidationError() << endl; + BOOST_CHECK(!testdoc.Write(filename, sc)); + cout << sc.GetValidationError() << endl; + + testdoc.SetObject().at("tags").SetArray().pop_back(); + testdoc.SetObject().at("tags").SetArray().push_back("one"); + BOOST_CHECK(!sc.Validate(testdoc)); + cout << sc.GetValidationError() << endl; + BOOST_CHECK(!testdoc.Write(filename, sc)); + cout << sc.GetValidationError() << endl; + } + CFile(filename).Remove(); + +// -------------------------------------------------------------------------- +// JSON pointer + BOOST_CHECK(doc.HasNode("#/obj2/one")); + CJson_ConstNode node = doc.GetNode("#/obj2/one"); + BOOST_CHECK(node.IsValue()); + BOOST_CHECK(node.GetValue().IsInt4()); + BOOST_CHECK(node.GetValue().GetInt4() == 1); + + BOOST_CHECK(doc.HasNode("#/obj2/obj3/array/4/1")); + node = doc.GetNode("#/obj2/obj3/array/4/1"); + BOOST_CHECK(node.IsValue()); + BOOST_CHECK(node.GetValue().IsString()); + BOOST_CHECK(node.GetValue().GetString() == "three"); + + node = doc.GetNode("/obj2/obj3/array/5/one"); + BOOST_CHECK(node.IsValue()); + BOOST_CHECK(node.GetValue().IsInt4()); + BOOST_CHECK(node.GetValue().GetInt4() == 1); + + node = doc.GetNode("/string"); + BOOST_CHECK(node.IsValue()); + BOOST_CHECK(node.GetValue().IsString()); + BOOST_CHECK(node.GetValue().GetString() == "stringvalue"); + + { + CJson_Document testdoc(CJson_Node::eObject); + BOOST_CHECK(!doc.HasNode("/id")); + testdoc.SetNode("/id").SetValue().SetInt4(1); + testdoc.SetNode("/name").SetValue().SetString("objname"); +#if 0 + testdoc.SetNode("/tags/0").SetValue().SetString("one"); + testdoc.SetNode("/tags/1").SetValue().SetString("two"); +#else + CJson_Array ar = testdoc.SetNode("/tags").ResetArray(); + ar.SetNode("/0").SetValue().SetString("one"); + ar.SetNode("/1").SetValue().SetString("two"); + testdoc.SetNode("/tags/2").SetValue().SetInt4(3); + testdoc.SetNode("/tags/3").SetValue().SetInt4(4); +#endif + cout << testdoc << endl; + } + { + CJson_Document testdoc(CJson_Node::eObject); + BOOST_CHECK(!doc.HasNode("/id/one/int")); + testdoc.SetNode("/id/one/int").SetValue().SetInt4(1); + testdoc.SetNode("/id/one/name").SetValue().SetString("one"); + BOOST_CHECK(testdoc.GetNode("/id/one").IsObject()); + testdoc.SetNode("/id/one").SetObject().insert("extra", true); + BOOST_CHECK(testdoc.SetNode("/id").IsObject()); + testdoc.SetNode("/id").SetObject().insert("extra", false); + testdoc.SetNode("/id/two/int").SetValue().SetInt4(2); + testdoc.SetNode("/id/two/name").SetValue().SetString("two"); + testdoc.SetNode("/id/three/0").SetValue().SetInt4(3); + testdoc.SetNode("/id/three/1").SetValue().SetString("three"); + BOOST_CHECK(testdoc.SetNode("/id/three").IsArray()); + testdoc.SetNode("/id/three").SetArray().push_back("tres"); + cout << testdoc << endl; + cout << testdoc.GetNode("/id/one/name").ToString() << endl; + cout << testdoc.GetNode("/id/two").ToString() << endl; + try { + cout << testdoc.GetNode("/id/two/num").ToString() << endl; + BOOST_CHECK(false); + } + catch (std::exception& e) { + cout << e.what() << endl;; + } + } +} diff --git a/c++/src/misc/jsonwrapp/test/test_jsonwrapp10.cpp b/c++/src/misc/jsonwrapp/test/test_jsonwrapp10.cpp new file mode 100644 index 00000000..26797ef3 --- /dev/null +++ b/c++/src/misc/jsonwrapp/test/test_jsonwrapp10.cpp @@ -0,0 +1,908 @@ + +/* $Id: test_jsonwrapp10.cpp 528470 2017-02-23 14:33:44Z gouriano $ +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Author: Andrei Gourianov +* +* File Description: +* JSON Wrapper API test +*/ + +#include +#include +#include + +#define BOOST_AUTO_TEST_MAIN +#include + +#include /* This header must go last */ + + +USING_NCBI_SCOPE; + + +void Printout(size_t offset, const CJson_ConstNode& node) +{ + CJson_Node::EJsonType type = node.GetType(); + string soff(offset,' '); + + cout << "type: "; + switch (type) { + default: cout << "UNKNOWN";break; + case CJson_Node::eNull: cout << "null"; break; + case CJson_Node::eBool: cout << "bool"; break; + case CJson_Node::eObject: cout << "object"; break; + case CJson_Node::eArray: cout << "array"; break; + case CJson_Node::eString: cout << "string"; break; + case CJson_Node::eNumber: cout << "number"; break; + } + cout << "; "; + + cout << "value: "; + if (node.IsValue()) { + CJson_ConstValue value( node.GetValue() ); + cout << node.ToString(); + if (value.IsNull()) { + cout << "null"; + } + if (value.IsBool()) { + cout << (value.GetBool() ? "true" : "false"); + } + if (value.IsNumber()) { + cout << "number"; + } + if (value.IsInt4()) { + cout << " " << value.GetInt4(); + } + if (value.IsUint4()) { + cout << " " << value.GetUint4(); + } + if (value.IsInt8()) { + cout << " " << value.GetInt8(); + } + if (value.IsUint8()) { + cout << " " << value.GetUint8(); + } + if (value.IsDouble()) { + cout << " " << value.GetDouble(); + } + if (value.IsString()) { + cout << " " << value.GetString(); + } + } + if (node.IsObject()) { + cout << "{ " << endl; + CJson_ConstObject obj = node.GetObject(); + CJson_Object::const_iterator mi = obj.begin(); + CJson_Object::const_iterator me = obj.end(); + for( ; mi != me; ++mi) { + CJson_ConstNode tt( mi->value); +// tt.SetNull(); + cout << soff; + cout << "name: " << mi->name << "; "; + Printout( offset+2, tt ); + } + cout << "}"; + } + if (node.IsArray()) { + cout << "[ " << endl << soff; + CJson_ConstArray arr( node.GetArray()); + for (CJson_Array::const_iterator i = arr.begin(); + i != arr.end(); i++) { + Printout( offset+2, *i ); + } + for (size_t i = 0; i < arr.size(); i++) { + Printout( offset+2, arr[i] ); + } + cout << "]"; + } + cout << endl; +} + + +class CCrawler : public CJson_WalkHandler +{ +public: + CCrawler(int test=0) : m_TestNo(test) { + } + ~CCrawler() {} + + virtual bool BeginObject(const std::string& name) { + std::cout << "begin_object " << name << ", JPath = " + << GetCurrentJPath() << std::endl; + if (m_TestNo == 1) + { + if (name == "obj3") { + CJson_Document da; + Read(da); + cout << da; + } + } + return true; + } + virtual bool BeginObjectMember(const std::string& name, + const std::string& member) { + std::cout << "begin_object_member " << name << "." + << member << ", JPath = " + << GetCurrentJPath() << std::endl; + + return true; + } + virtual bool PlainMemberValue(const std::string& name, + const std::string& member, + const CJson_ConstValue& /*value*/) { + std::cout << "plain_member_value " << name + << "." << member << ", JPath = " + << GetCurrentJPath() << std::endl; + return true; + } + virtual bool EndObject(const std::string& name) { + std::cout << "end_object " << name << ", JPath = " + << GetCurrentJPath() << std::endl; + return true; + } + + virtual bool BeginArray(const std::string& name) { + std::cout << "begin_array " << name << ", JPath = " + << GetCurrentJPath() << std::endl; + if (m_TestNo == 2) + { + if (name == "array") { + CJson_Document da; + Read(da); + cout << da; + } + } + if (m_TestNo == 3) + { + // abort parsing + return false; + } + return true; + } + virtual bool BeginArrayElement(const std::string& name, + size_t index) { + std::cout << "begin_array_element " << name << "[" + << index << "], JPath = " + << GetCurrentJPath() << std::endl; + return true; + } + virtual bool PlainElementValue(const std::string& name, + size_t index, + const CJson_ConstValue& /*value*/) { + std::cout << "plain_element_value " << name << "[" + << index << "], JPath = " + << GetCurrentJPath() << std::endl; + return true; + } + virtual bool EndArray(const std::string& name) { + std::cout << "end_array " << name << ", JPath = " + << GetCurrentJPath() << std::endl; + return true; + } +private: + int m_TestNo; +}; + +BOOST_AUTO_TEST_CASE(s_JsonWrapp) +{ + cout << "Size of data object is " << sizeof(rapidjson::Value) << endl; + + CJson_Document doc(CJson_Value::eObject); + CJson_Object obj( doc.SetObject()); +//will not compile +#if 0 + CJson_Node tn; + CJson_Value tv; + CJson_Array ta; + CJson_Object to; +#endif + +// -------------------------------------------------------------------------- +// add/delete elements into object + { + obj["bool"].SetValue().SetBool(true); + obj["int4"].SetValue().SetInt4(4); + BOOST_CHECK(obj.size() == 2); + obj.erase( obj.begin()); + BOOST_CHECK(obj.size() == 1); + obj.erase( obj.begin(), obj.end()); + BOOST_CHECK(obj.empty()); + + CJson_ConstObject o2(obj); +//will not compile +#if 0 + o2["bool"].IsValue(); +#endif + try { + o2.at("bool").IsValue(); + BOOST_CHECK(false); + } + catch (std::exception& e) { + cout << e.what() << endl;; + } + } + + +// -------------------------------------------------------------------------- +// add elements into object + obj.insert("null"); + BOOST_CHECK(obj["null"].IsNull()); + BOOST_CHECK(!obj.empty()); + BOOST_CHECK(obj.find("null") == obj.begin()); + + { + CJson_Node n1 = obj["null"]; + CJson_ConstNode n2( obj["null"]); + n2 = n1; +//will not compile +#if 0 + CJson_Node n3(n2); + n1 = n2; +#endif + } + + obj.insert("bool", true); + BOOST_CHECK(obj["bool"].IsValue()); + BOOST_CHECK(obj["bool"].GetValue().IsBool()); + BOOST_CHECK(obj["bool"].GetValue().GetBool()); + BOOST_CHECK(obj.size() == 2); + + obj.insert("int4", 4); + BOOST_CHECK(obj["int4"].IsValue()); + BOOST_CHECK(obj["int4"].GetValue().IsNumber()); + BOOST_CHECK(obj["int4"].GetValue().IsInt4()); + BOOST_CHECK(obj["int4"].GetValue().IsInt8()); + BOOST_CHECK(obj["int4"].GetValue().GetInt4() == 4); + + obj.insert("uint4", Uint4(4)); + BOOST_CHECK(obj["uint4"].IsValue()); + BOOST_CHECK(obj["uint4"].GetValue().IsNumber()); + BOOST_CHECK(obj["uint4"].GetValue().IsUint4()); + BOOST_CHECK(obj["uint4"].GetValue().IsUint8()); + BOOST_CHECK(obj["uint4"].GetValue().GetUint4() == 4); + + obj.insert("int8", Int8(-8)); + BOOST_CHECK(obj["int8"].IsValue()); + BOOST_CHECK(obj["int8"].GetValue().IsNumber()); + BOOST_CHECK(obj["int8"].GetValue().IsInt4()); + BOOST_CHECK(obj["int8"].GetValue().IsInt8()); + BOOST_CHECK(obj["int8"].GetValue().GetInt8() == -8); + + obj.insert("uint8", Uint8(8)); + BOOST_CHECK(obj["uint8"].IsValue()); + BOOST_CHECK(obj["uint8"].GetValue().IsNumber()); + BOOST_CHECK(obj["uint8"].GetValue().IsUint4()); + BOOST_CHECK(obj["uint8"].GetValue().IsUint8()); + BOOST_CHECK(obj["uint8"].GetValue().GetUint8() == 8); + + obj["double"].SetValue().SetDouble(12.34); + BOOST_CHECK(obj["double"].GetValue().IsNumber()); + BOOST_CHECK(obj["double"].GetValue().IsDouble()); + BOOST_CHECK(!obj["double"].GetValue().IsUint4()); + BOOST_CHECK(obj["double"].GetValue().GetDouble() == 12.34); + + obj.insert("float", float(34)); + BOOST_CHECK(obj["float"].GetValue().IsNumber()); + BOOST_CHECK(obj["float"].GetValue().IsDouble()); + BOOST_CHECK(!obj["float"].GetValue().IsInt4()); + + obj.insert("string", "stringvalue"); + BOOST_CHECK(!obj["string"].GetValue().IsNumber()); + BOOST_CHECK(obj["string"].GetValue().IsString()); + obj.erase("string"); + BOOST_CHECK(!obj.has("string")); + obj["string"].SetValue().SetString("stringvalue"); + BOOST_CHECK(obj.has("string")); + BOOST_CHECK(obj.size() == 9); + + BOOST_CHECK(obj.find("bool") != obj.end()); + BOOST_CHECK(obj.find("double") != obj.end()); + + { + CJson_Object o2 = obj.insert_object("obj2"); + o2.insert("one", 1); + + CJson_Object o3 = o2.insert_object("obj3"); + BOOST_CHECK(!o3.IsNull()); + o3.insert("two", 2); + CJson_Array a1 = o3.insert_array("array"); + BOOST_CHECK(!a1.IsNull()); + +// -------------------------------------------------------------------------- +// add/delete elements into array + a1.push_back(1); + a1.erase(a1.begin()); + BOOST_CHECK(a1.empty()); + a1.push_back(1); + a1.push_back("two"); + a1.push_back(false); + CJson_Array::iterator ee = a1.erase(a1.begin(), a1.begin()+2); + BOOST_CHECK(ee->IsValue()); + BOOST_CHECK(ee->GetValue().IsBool()); + a1.clear(); + BOOST_CHECK(a1.empty()); + +// -------------------------------------------------------------------------- +// add/ elements into array + a1.push_back(1); + a1.push_back("two"); + a1.push_back(false); + a1.push_back(); + CJson_Array a2 = a1.push_back_array(); + a2.push_back(2); + a2.push_back("three"); + + CJson_Object o4 = a1.push_back_object(); + o4["one"].SetValue().SetInt4(1); + o4["two"].SetValue().SetString("2"); + a1.push_back("last"); + } + + BOOST_CHECK(obj["obj2"].IsObject()); + BOOST_CHECK(obj.has("null")); + obj.erase("null"); + BOOST_CHECK(!obj.has("null")); + obj.insert("null"); + BOOST_CHECK(obj.has("null")); + +// -------------------------------------------------------------------------- +// object iterators + { + CJson_Object::iterator ci; + CJson_Object::iterator ci0 = obj.begin(); + ci = obj.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Object::iterator ci1(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + } + { + CJson_Object::const_iterator ci; + CJson_Object::const_iterator ci0 = obj.begin(); + ci = obj.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Object::const_iterator ci1(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + } + { + CJson_Object::const_iterator ci; + CJson_Object::iterator ci0 = obj.begin(); + ci = obj.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Object::const_iterator ci1(ci); +// will not compile +// CJson_Object::iterator ci2(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + } + +// -------------------------------------------------------------------------- +// traversing object elements + for (CJson_Object::iterator i = obj.begin(); i != obj.end(); ++i) { + CJson_Node v = i->value; + cout << i->name << endl; + } + for (CJson_ConstObject::const_iterator i = obj.begin(); i != obj.end(); ++i) { + CJson_ConstNode vc = i->value; +// will not compile +// CJson_Node v = i->value; + cout << i->name << endl; + } + for (CJson_ConstObject::const_iterator j = obj.begin(); j != obj.end(); ++j) { + cout << j->name << ": " << j->value << endl; + } + ITERATE (CJson_ConstObject, j, obj) { + cout << j->name << ": " << j->value << endl; + } + ITERATE (CJson_Object, j, obj) { + cout << j->name << ": " << j->value << endl; + } + NON_CONST_ITERATE (CJson_ConstObject, j, obj) { + cout << j->name << ": " << j->value << endl; + } + NON_CONST_ITERATE (CJson_Object, j, obj) { + cout << j->name << ": " << j->value << endl; + } + { + CJson_ConstObject::const_iterator j = obj.find("bool"); + if (j != obj.end()) { + cout << j->name << ": " << j->value << endl; + } + } +#if NCBI_HAVE_CXX11 + for_each(obj.begin(), obj.end(), [](const CJson_ConstObject::const_iterator::pair& v) { + cout << v.name << ": " << v.value << endl; + }); + for_each(obj.begin(), obj.end(), [](const CJson_ConstObject_pair& v) { + cout << v.name << ": " << v.value << endl; + }); + for_each(obj.begin(), obj.end(), [](CJson_Object_pair& v) { + cout << v.name << ": " << v.value << endl; + }); + for(const CJson_ConstObject::const_iterator::pair& v : obj) { + cout << v.name << ": " << v.value << endl; + } + for(const CJson_ConstObject_pair& v : obj) { + cout << v.name << ": " << v.value << endl; + } + for(CJson_Object::iterator::pair& v : obj) { + cout << v.name << ": " << v.value << endl; + } + for(CJson_Object_pair& v : obj) { + cout << v.name << ": " << v.value << endl; + } +#endif + +// -------------------------------------------------------------------------- +// array element access + CJson_Array arr = + obj["obj2"].SetObject().at("obj3").SetObject().at("array").SetArray(); + + BOOST_CHECK(!arr.empty()); + BOOST_CHECK(arr.size() == 7); + BOOST_CHECK(arr.back().GetValue().IsString()); + arr.push_back(); + BOOST_CHECK(arr.size() == 8); + BOOST_CHECK(arr.back().IsNull()); + arr.pop_back(); + BOOST_CHECK(arr.size() == 7); + BOOST_CHECK(arr.back().GetValue().IsString()); + BOOST_CHECK(arr.front().GetValue().IsNumber()); + BOOST_CHECK(arr[2].GetValue().IsBool()); + BOOST_CHECK( + obj["obj2"].GetObject().at("obj3").GetObject().at("array"). + GetArray().at(1).GetValue().IsString()); + BOOST_CHECK(obj["obj2"].SetObject()["obj3"].SetObject()["array"].GetArray()[1].GetValue().IsString()); + BOOST_CHECK(obj["obj2"].GetObject()["obj3"].GetObject()["array"].GetArray()[1].GetValue().IsString()); + try { + arr.at(14).IsObject(); + BOOST_CHECK(false); + } + catch (std::exception& e) { + cout << e.what() << endl;; + } + +// -------------------------------------------------------------------------- +// array iterators + { + CJson_Array::iterator ci; + CJson_Array::iterator ci0 = arr.begin(); + ci = ci0 + 2; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci - ci0 == 2); + ci -= 2; + BOOST_CHECK(ci == ci0); + ci = arr.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Array::iterator ci1(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + } + { + CJson_Array::const_iterator ci; + CJson_Array::const_iterator ci0 = arr.begin(); + ci = ci0 + 2; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci - ci0 == 2); + ci -= 2; + BOOST_CHECK(ci == ci0); + ci = arr.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Array::const_iterator ci1(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + } + { + CJson_Array::const_iterator ci; + CJson_Array::iterator ci0 = arr.begin(); + ci = ci0 + 2; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci - ci0 == 2); + ci -= 2; + BOOST_CHECK(ci == ci0); + ci = arr.end(); + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci = ci0; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + CJson_Array::const_iterator ci1(ci); +// will not compile +// CJson_Array::iterator ci2(ci); + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci0 == ci1); + ++ci; + BOOST_CHECK(ci != ci0); + BOOST_CHECK(ci0 != ci); + ci--; + BOOST_CHECK(ci == ci0); + BOOST_CHECK(ci0 == ci); + + ci = ci0; + ci1 = ci++; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = ++ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); + ci = ++ci0; + ci1 = ci--; + BOOST_CHECK(ci1 == ci0); + BOOST_CHECK(ci != ci0); + ci = ci0; + ci1 = --ci; + BOOST_CHECK(ci1 != ci0); + BOOST_CHECK(ci1 == ci); +} + +// -------------------------------------------------------------------------- +// traversing array elements + for (CJson_Array::iterator i = arr.begin(); i != arr.end(); ++i) { + CJson_Node v = *i; + if (i->IsValue() && i->GetValue().IsString()) { + cout << v.GetValue().GetString() << endl; + } + } + for (CJson_ConstArray::const_iterator i = arr.begin(); i != arr.end(); ++i) { +// will not compile + //CJson_Node v = *i; + if (i->IsValue() && i->GetValue().IsString()) { + cout << i->GetValue().GetString() << endl; + } + } + ITERATE (CJson_ConstArray, i, arr) { + cout << *i; + } + ERASE_ITERATE (CJson_ConstArray, i, arr) { + cout << *i; + } + ITERATE (CJson_Array, i, arr) { + cout << *i; + } + NON_CONST_ITERATE (CJson_ConstArray, i, arr) { + cout << *i; + } + NON_CONST_ITERATE (CJson_Array, i, arr) { + cout << *i; + } +#if NCBI_HAVE_CXX11 + for_each(arr.begin(), arr.end(), [](const CJson_ConstNode& v) { + Printout(0,v); + }); + for(const CJson_ConstNode& v: arr) { + cout << v; + } + for(CJson_Node& v: arr) { + cout << v; + } +#endif + + + { +// -------------------------------------------------------------------------- +// new document from UTF8 string + CJson_Document docs("{\"null\": null, \"bool\": true, \"str\": \"str\"}"); + cout << docs; + BOOST_CHECK(docs.ReadSucceeded()); + BOOST_CHECK(docs.IsObject()); + BOOST_CHECK(docs.GetObject().at("bool").IsValue()); + BOOST_CHECK(docs.GetObject().at("bool").GetValue().IsBool()); + BOOST_CHECK(docs.GetObject().at("bool").GetValue().GetBool()); + + docs.ParseString("[\"utf8 string expected\", false, null ]"); + cout << docs; + BOOST_CHECK(docs.ReadSucceeded()); + BOOST_CHECK(docs.IsArray()); + BOOST_CHECK(docs.GetArray().size() == 3); + BOOST_CHECK(docs.GetArray().at(1).IsValue()); + BOOST_CHECK(docs.GetArray().at(1).GetValue().IsBool()); + BOOST_CHECK(!docs.GetArray().at(1).GetValue().GetBool()); + +// -------------------------------------------------------------------------- +// new document from array + CJson_Document doc0(arr); + cout << arr; + cout << doc0; + +// -------------------------------------------------------------------------- +// copy array into another array + CJson_Document doc1(CJson_Value::eArray); + CJson_Array arrDst = doc1.SetArray(); + copy(arr.begin(), arr.end(), back_inserter(arrDst)); + BOOST_CHECK(arr == arrDst); + BOOST_CHECK(arrDst == arr); + + CJson_Document doc2(CJson_Value::eArray); + CJson_Array arrDst2 = doc2.SetArray(); + arrDst2.AssignCopy(arr); + BOOST_CHECK(arr == arrDst2); + BOOST_CHECK(arrDst == arrDst2); + +// -------------------------------------------------------------------------- +// find an element in array and change it + CJson_Array::iterator fi = find(arr.begin(), arr.end(), + CJson_Document(CJson_Node::eBool).SetValue().SetBool(false)); + BOOST_CHECK(fi != arr.end()); + BOOST_CHECK(fi->IsValue()); + BOOST_CHECK(fi->GetValue().IsBool()); + BOOST_CHECK(!fi->GetValue().GetBool()); + fi->SetValue().SetBool(true); + BOOST_CHECK(fi->GetValue().GetBool()); + + BOOST_CHECK(1 == count(arr.begin(), arr.end(), + CJson_Document(CJson_Node::eBool).SetValue().SetBool(true))); +#if NCBI_HAVE_CXX11 + for_each(arrDst2.begin(), arrDst2.end(), [](CJson_Node& v) { + v.SetNull(); + }); + BOOST_CHECK(arrDst2.size() == + count(arrDst2.begin(), arrDst2.end(), CJson_Document(CJson_Node::eNull))); + BOOST_CHECK(all_of(arrDst2.begin(), arrDst2.end(), + [](const CJson_ConstNode& v)->bool {return v.IsNull();})); +#endif +// -------------------------------------------------------------------------- +// push_back array into array + arrDst.push_back( arr); + cout << arr; + cout << arrDst; + +// -------------------------------------------------------------------------- +// copy object into another object + CJson_Document doc3(CJson_Value::eObject); + doc3.SetObject().AssignCopy( doc.GetObject()); + BOOST_CHECK(doc == doc3); + BOOST_CHECK(doc.SetObject() == doc3.SetObject()); + BOOST_CHECK(doc.GetObject() == doc3.SetObject()); + cout << doc; + cout << doc3; + +// -------------------------------------------------------------------------- +// create copy of a document + CJson_Document cpy(doc); + BOOST_CHECK(doc == cpy); + BOOST_CHECK(doc.SetObject() == cpy.SetObject()); + BOOST_CHECK(doc.GetObject() == cpy.GetObject()); + + CJson_Document cpy2; + cpy2 = doc; + BOOST_CHECK(cpy2 == cpy); + +// -------------------------------------------------------------------------- +// insert array into object + cpy.SetObject().insert("arr", arr); + cout << cpy; + cout << arr; + +// -------------------------------------------------------------------------- +// insert object into object + cpy.SetObject().insert("ooo", cpy.GetObject()); + cout << cpy; + } + +// -------------------------------------------------------------------------- +// serialization + string filename( CDirEntry::GetTmpName() ); + for (CJson_Array::const_iterator i = arr.begin(); i != arr.end(); ++i) { + CJson_Document doct(*i); + ofstream ofs(filename.c_str()); + ofs << doct; + ofs.close(); + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CJson_Document doct2; + ifs >> doct2; + BOOST_CHECK(doct == doct2); + } + } + { + ofstream ofs(filename.c_str()); + ofs << doc; + } + +// -------------------------------------------------------------------------- +// SAX parsing + { + CCrawler wlk; + doc.Walk(wlk); + } + { + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CCrawler wlk2(0); + CJson_Document::Walk(ifs,wlk2); + } + } + { + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CCrawler wlk2(1); + CJson_Document::Walk(ifs,wlk2); + } + } + { + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CCrawler wlk2(2); + CJson_Document::Walk(ifs,wlk2); + } + } + { + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CCrawler wlk3(3); + CJson_Document::Walk(ifs,wlk3); + } + } + +// -------------------------------------------------------------------------- +// serialization + { + ifstream ifs(filename.c_str()); + if (ifs.is_open()) { + CJson_Document doc2; + ifs >> doc2; + } + } + CFile(filename).Remove(); +} + diff --git a/c++/src/objects/general/Dbtag.cpp b/c++/src/objects/general/Dbtag.cpp index 309660e6..776eedcf 100644 --- a/c++/src/objects/general/Dbtag.cpp +++ b/c++/src/objects/general/Dbtag.cpp @@ -1,4 +1,4 @@ -/* $Id: Dbtag.cpp 572067 2018-10-09 13:30:09Z ivanov $ +/* $Id: Dbtag.cpp 576574 2018-12-18 14:11:23Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -142,6 +142,7 @@ static const TDbxrefPair kApprovedDbXrefs[] = { { "PeptideAtlas", CDbtag::eDbtagType_PeptideAtlas }, { "Phytozome", CDbtag::eDbtagType_Phytozome }, { "PomBase", CDbtag::eDbtagType_PomBase }, + { "PseudoCAP", CDbtag::eDbtagType_PseudoCAP }, { "PseudoCap", CDbtag::eDbtagType_PseudoCap }, { "RAP-DB", CDbtag::eDbtagType_RAP_DB }, { "RATMAP", CDbtag::eDbtagType_RATMAP }, @@ -654,7 +655,7 @@ static const TDbtUrl sc_url_prefix[] = { { CDbtag::eDbtagType_PGN, "http://pgn.cornell.edu/cgi-bin/search/seq_search_result.pl?identifier=" }, // http page states info no longer avail at this website, includes links to look for a new location tested 7/13/2016 { CDbtag::eDbtagType_Phytozome, "https://phytozome.jgi.doe.gov/pz/portal.html#!results?search=0&crown=1&star=1&method=0&searchText=" }, { CDbtag::eDbtagType_PomBase, "http://www.pombase.org/spombe/result/" }, // https not available tested 7/13/2016 - { CDbtag::eDbtagType_PseudoCap, "http://www.pseudomonas.com/getAnnotation.do?locusID=" }, // url not found tested 7/13/2016 + { CDbtag::eDbtagType_PseudoCap, "http://www.pseudomonas.com/primarySequenceFeature/list?c1=name&e1=1&v1=" }, // url not found tested 7/13/2016 { CDbtag::eDbtagType_RAP_DB, "http://rapdb.dna.affrc.go.jp/cgi-bin/gbrowse_details/latest?name=" }, // https appears available, domain appears to exist but http “page not found” with note about release of a major update tested 7/13/2016 { CDbtag::eDbtagType_RATMAP, "https://ratmap.gen.gu.se/ShowSingleLocus.htm?accno=" }, { CDbtag::eDbtagType_RBGE_garden, "https://data.rbge.org.uk/living/" }, @@ -720,6 +721,7 @@ static const TDbtUrl sc_url_prefix[] = { { CDbtag::eDbtagType_PeptideAtlas, "https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/Search?action=GO&search_key=" }, { CDbtag::eDbtagType_EPDnew, "http://epd.vital-it.ch/cgi-bin/get_doc?format=genome&entry=" }, { CDbtag::eDbtagType_Ensembl, "https://www.ensembl.org/id/" }, // url seems incorrect, includes msg user has been redirected and “Error 404 Page not found” tested 7/13/2016 + { CDbtag::eDbtagType_PseudoCAP, "http://www.pseudomonas.com/primarySequenceFeature/list?c1=name&e1=1&v1=" }, // url not found tested 7/13/2016 }; typedef CStaticPairArrayMap TUrlPrefixMap; diff --git a/c++/src/objects/genomecoll/genome_collection.asn b/c++/src/objects/genomecoll/genome_collection.asn index a4366640..4100ddd3 100644 --- a/c++/src/objects/genomecoll/genome_collection.asn +++ b/c++/src/objects/genomecoll/genome_collection.asn @@ -1,4 +1,4 @@ --- $Id: genome_collection.asn 574129 2018-11-06 16:43:37Z ivanov $ +-- $Id: genome_collection.asn 577592 2019-01-07 15:42:24Z ivanov $ -- ********************************************************************** -- -- NCBI Genome Collections @@ -136,6 +136,7 @@ GC-AssemblySet ::= SEQUENCE { -- second part of diploid; linked to another haploid full-assembly -- this one is alt-loci or second full-haploid alternate-haplotype(5), + alternate-pseudohaplotype(6), -- assembly is a collection for annotation annotation-target-set(101), -- analysis set used for sequencing by alignments diff --git a/c++/src/objects/seqfeat/BioSource.cpp b/c++/src/objects/seqfeat/BioSource.cpp index b0f41a7a..87c4a4dd 100644 --- a/c++/src/objects/seqfeat/BioSource.cpp +++ b/c++/src/objects/seqfeat/BioSource.cpp @@ -1,4 +1,4 @@ -/* $Id: BioSource.cpp 546450 2017-09-18 18:15:29Z bollin $ +/* $Id: BioSource.cpp 576575 2018-12-18 14:11:38Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -741,14 +741,29 @@ void CBioSource::UpdateWithBioSample(const CBioSource& biosample, bool force, bo } catch (...) { try { CSubSource::TSubtype subtype = CSubSource::GetSubtypeValue((*it)->GetFieldName()); - if (!NStr::IsBlank((*it)->GetSrcVal())) { - RemoveSubSource(subtype, (*it)->GetSrcVal()); + if (CSubSource::NeedsNoText(subtype)) { + // process diff that involve NeedsNoText subtypes + if (NStr::EqualNocase((*it)->GetSrcVal(), "true")) { + RemoveSubSource(subtype); + } + if (NStr::EqualNocase(sample_val, "true")) { + CRef sub(new CSubSource()); + sub->SetSubtype(subtype); + sub->SetName(""); + subtypes.push_back(sub); + } } - if (!NStr::IsBlank(sample_val)) { - CRef sub(new CSubSource()); - sub->SetSubtype(subtype); - sub->SetName(sample_val); - subtypes.push_back(sub); + else { + // process all other subtypes + if (!NStr::IsBlank((*it)->GetSrcVal())) { + RemoveSubSource(subtype, (*it)->GetSrcVal()); + } + if (!NStr::IsBlank(sample_val)) { + CRef sub(new CSubSource()); + sub->SetSubtype(subtype); + sub->SetName(sample_val); + subtypes.push_back(sub); + } } } catch (...) { NCBI_THROW(CException, eUnknown, "Unknown field name"); diff --git a/c++/src/objects/seqfeat/Genetic_code_table.cpp b/c++/src/objects/seqfeat/Genetic_code_table.cpp index 12b30471..b3c5d5b4 100644 --- a/c++/src/objects/seqfeat/Genetic_code_table.cpp +++ b/c++/src/objects/seqfeat/Genetic_code_table.cpp @@ -1,4 +1,4 @@ -/* $Id: Genetic_code_table.cpp 552836 2017-12-07 23:12:16Z kans $ +/* $Id: Genetic_code_table.cpp 581624 2019-03-04 16:47:19Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -275,7 +275,7 @@ private: TTransTablesById m_TransTablesById; // local copy of genetic code table ASN.1 - static const char * sm_GenCodeTblMemStr []; + static const char * const sm_GenCodeTblMemStr []; }; // single instance of implementation class is initialized before Main @@ -442,6 +442,9 @@ CGen_code_table_imp::CGen_code_table_imp(void) string str; for (size_t i = 0; sm_GenCodeTblMemStr [i]; i++) { str += sm_GenCodeTblMemStr [i]; + if (sm_GenCodeTblMemStr[i][0] == '}') { + break; + } } // create an in memory stream on sm_GenCodeTblMemStr @@ -675,89 +678,10 @@ First T C A G Third */ // local copy of gc.prt genetic code table ASN.1 -const char * CGen_code_table_imp::sm_GenCodeTblMemStr [] = -{ - "Genetic-code-table ::= {\n", - "{ name \"Standard\" , name \"SGC0\" , id 1 ,\n", - "ncbieaa \"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"---M------**--*----M---------------M----------------------------\" } ,\n", - "{ name \"Vertebrate Mitochondrial\" , name \"SGC1\" , id 2 ,\n", - "ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG\",\n", - "sncbieaa \"----------**--------------------MMMM----------**---M------------\" } ,\n", - "{ name \"Yeast Mitochondrial\" , name \"SGC2\" , id 3 ,\n", - "ncbieaa \"FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"----------**----------------------MM----------------------------\" } ,\n", - "{ name \"Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate\n", - "Mitochondrial; Mycoplasma; Spiroplasma\" ,\n", - "name \"SGC3\" , id 4 ,\n", - "ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"--MM------**-------M------------MMMM---------------M------------\" } ,\n", - "{ name \"Invertebrate Mitochondrial\" , name \"SGC4\" , id 5 ,\n", - "ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"---M------**--------------------MMMM---------------M------------\" } ,\n", - "{ name \"Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear\" ,\n", - "name \"SGC5\" , id 6 ,\n", - "ncbieaa \"FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"--------------*--------------------M----------------------------\" } ,\n", - "{ name \"Echinoderm Mitochondrial; Flatworm Mitochondrial\" , name \"SGC8\" , id 9 ,\n", - "ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"----------**-----------------------M---------------M------------\" } ,\n", - "{ name \"Euplotid Nuclear\" , name \"SGC9\" , id 10 ,\n", - "ncbieaa \"FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"----------**-----------------------M----------------------------\" } ,\n", - "{ name \"Bacterial, Archaeal and Plant Plastid\" , id 11 ,\n", - "ncbieaa \"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"---M------**--*----M------------MMMM---------------M------------\" } ,\n", - "{ name \"Alternative Yeast Nuclear\" , id 12 ,\n", - "ncbieaa \"FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"----------**--*----M---------------M----------------------------\" } ,\n", - "{ name \"Ascidian Mitochondrial\" , id 13 ,\n", - "ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"---M------**----------------------MM---------------M------------\" } ,\n", - "{ name \"Alternative Flatworm Mitochondrial\" , id 14 ,\n", - "ncbieaa \"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"-----------*-----------------------M----------------------------\" } ,\n", - "{ name \"Blepharisma Macronuclear\" , id 15 ,\n", - "ncbieaa \"FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"----------*---*--------------------M----------------------------\" } ,\n", - "{ name \"Chlorophycean Mitochondrial\" , id 16 ,\n", - "ncbieaa \"FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"----------*---*--------------------M----------------------------\" } ,\n", - "{ name \"Trematode Mitochondrial\" , id 21 ,\n", - "ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"----------**-----------------------M---------------M------------\" } ,\n", - "{ name \"Scenedesmus obliquus Mitochondrial\" , id 22 ,\n", - "ncbieaa \"FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"------*---*---*--------------------M----------------------------\" } ,\n", - "{ name \"Thraustochytrium Mitochondrial\" , id 23 ,\n", - "ncbieaa \"FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"--*-------**--*-----------------M--M---------------M------------\" } ,\n", - "{ name \"Pterobranchia Mitochondrial\" , id 24 ,\n", - "ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"---M------**-------M---------------M---------------M------------\" } ,\n", - "{ name \"Candidate Division SR1 and Gracilibacteria\" , id 25 ,\n", - "ncbieaa \"FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"---M------**-----------------------M---------------M------------\" } ,\n", - "{ name \"Pachysolen tannophilus Nuclear\" , id 26 ,\n", - "ncbieaa \"FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"----------**--*----M---------------M----------------------------\" } ,\n", - "{ name \"Karyorelict Nuclear\" , id 27 ,\n", - "ncbieaa \"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"--------------*--------------------M----------------------------\" } ,\n", - "{ name \"Condylostoma Nuclear\" , id 28 ,\n", - "ncbieaa \"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"----------**--*--------------------M----------------------------\" } ,\n", - "{ name \"Mesodinium Nuclear\" , id 29 ,\n", - "ncbieaa \"FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"--------------*--------------------M----------------------------\" } ,\n", - "{ name \"Peritrich Nuclear\" , id 30 ,\n", - "ncbieaa \"FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"--------------*--------------------M----------------------------\" } ,\n", - "{ name \"Blastocrithidia Nuclear\" , id 31 ,\n", - "ncbieaa \"FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",\n", - "sncbieaa \"----------**-----------------------M----------------------------\" } };\n", - 0 // to indicate that there is no more data -}; +#define static +#define s_GenCodeTblMemStr CGen_code_table_imp::sm_GenCodeTblMemStr +#include "gc.inc" +#undef static END_objects_SCOPE // namespace ncbi::objects:: diff --git a/c++/src/objects/seqfeat/OrgMod.cpp b/c++/src/objects/seqfeat/OrgMod.cpp index 8e299b28..dca15305 100644 --- a/c++/src/objects/seqfeat/OrgMod.cpp +++ b/c++/src/objects/seqfeat/OrgMod.cpp @@ -1,4 +1,4 @@ -/* $Id: OrgMod.cpp 567323 2018-07-17 17:58:04Z bollin $ +/* $Id: OrgMod.cpp 576749 2018-12-20 13:14:57Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -225,6 +225,10 @@ DEFINE_STATIC_FAST_MUTEX(s_InstitutionCollectionCodeMutex); static void s_ProcessInstitutionCollectionCodeLine(const CTempString& line) { + if (NStr::StartsWith(line, "#")) { + // ignore line, this is a comment + return; + } vector tokens; NStr::Split(line, "\t", tokens); if (tokens.size() < 3) { @@ -276,7 +280,7 @@ static void s_InitializeInstitutionCollectionCodeMaps(void) } string file = g_FindDataFile("institution_codes.txt"); CRef lr; - if ( !file.empty() ) { + if ( !file.empty() && !g_IsDataFileOld(file, kInstitutionCollectionCodeList[0])) { try { lr = ILineReader::New(file); } NCBI_CATCH("s_InitializeInstitutionCollectionCodeMaps") diff --git a/c++/src/objects/seqfeat/Prot_ref.cpp b/c++/src/objects/seqfeat/Prot_ref.cpp index 10ca1fff..b7c53edf 100644 --- a/c++/src/objects/seqfeat/Prot_ref.cpp +++ b/c++/src/objects/seqfeat/Prot_ref.cpp @@ -1,4 +1,4 @@ -/* $Id: Prot_ref.cpp 567323 2018-07-17 17:58:04Z bollin $ +/* $Id: Prot_ref.cpp 580137 2019-02-11 16:14:07Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -208,13 +208,18 @@ const string& CProt_ref::GetECNumberReplacement(const string& old_ecno) } TECNumberReplacementMap::const_iterator it = s_ECNumberReplacementMap.find(old_ecno); - if (it == s_ECNumberReplacementMap.end()) { - NCBI_THROW(CCoreException, eInvalidArg, - "No replacement defined for EC number " + old_ecno); - // alternatively, could return old_ecno or kEmptyStr - } else { + if (it != s_ECNumberReplacementMap.end()) { + // see if this number has also been replaced + auto other_it = s_ECNumberReplacementMap.find(it->second); + while (other_it != s_ECNumberReplacementMap.end()) { + it = other_it; + other_it = s_ECNumberReplacementMap.find(it->second); + } return it->second; + } else { + return kEmptyStr; } + } diff --git a/c++/src/objects/seqfeat/SeqFeatData.cpp b/c++/src/objects/seqfeat/SeqFeatData.cpp index a0c5ce50..68b3aa44 100644 --- a/c++/src/objects/seqfeat/SeqFeatData.cpp +++ b/c++/src/objects/seqfeat/SeqFeatData.cpp @@ -1,4 +1,4 @@ -/* $Id: SeqFeatData.cpp 568924 2018-08-13 19:53:53Z kans $ +/* $Id: SeqFeatData.cpp 581300 2019-02-27 16:29:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -3031,8 +3031,13 @@ START_SUBTYPE(propeptide_aa) ADD_QUAL(pseudogene); ADD_QUAL(standard_name); ADD_QUAL(usedin); -END_SUBTYPE + END_SUBTYPE +START_SUBTYPE(any) + for (size_t it = eQual_allele; it <= eQual_whole_replicon; ++it) { + quals.push_back((EQualifier)it); + } +END_SUBTYPE #undef START_SUBTYPE #undef ADD_QUAL #undef END_SUBTYPE diff --git a/c++/src/objects/seqfeat/SubSource.cpp b/c++/src/objects/seqfeat/SubSource.cpp index 06a7e3f8..1869b976 100644 --- a/c++/src/objects/seqfeat/SubSource.cpp +++ b/c++/src/objects/seqfeat/SubSource.cpp @@ -1,4 +1,4 @@ -/* $Id: SubSource.cpp 572070 2018-10-09 13:31:06Z ivanov $ +/* $Id: SubSource.cpp 581646 2019-03-04 17:38:22Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1268,7 +1268,7 @@ static string s_InsertSpacesBetweenTokens(const string &old_str) if (sym < 0x80) { char c = static_cast(sym); - if (!isalpha(c) && !isdigit(c) && c != '.') + if (!isalpha(c) && !isdigit(c) && c != '.' && c != '-' && c != '+') { new_str += ' '; } @@ -1292,6 +1292,42 @@ static string s_InsertSpacesBetweenTokens(const string &old_str) return new_str; } +static string s_RemoveSpacesWithinNumbers(const string &old_str) +{ + string new_str; + bool is_number = true; + for (string::const_iterator i = old_str.begin(); i != old_str.end(); ++i) + { + TUnicodeSymbol sym = CUtf8::Decode(i); + if (sym < 0x80) + { + char c = static_cast(sym); + size_t j = new_str.size(); + if (j >= 4 && new_str[j-1] == ' ' && new_str[j-2] == '.' && new_str[j-3] == ' ' && isdigit(new_str[j-4]) && isdigit(c)) + { + new_str.pop_back(); + new_str.pop_back(); + new_str.pop_back(); + new_str += '.'; + } + new_str += c; + if (!isdigit(c) && c != '+' && c != '-' && c != '.' && !isspace(c)) + is_number = false; + } + else + { + new_str += ' '; + is_number = false; + } + } + if (is_number) + { + NStr::ReplaceInPlace(new_str, "+", " +"); + NStr::ReplaceInPlace(new_str, "-", " -"); + } + return new_str; +} + static bool s_IsNumber(const string &token, double *result = NULL) { double num = NStr::StringToDouble(token, NStr::fConvErr_NoThrow); @@ -1304,7 +1340,7 @@ static bool s_IsNumber(const string &token, double *result = NULL) return true; } -static string s_NormalizeTokens(vector &tokens, vector &numbers, vector &precision, vector &lat_long, vector &nsew) +static string s_NormalizeTokens(vector &tokens, vector &numbers, vector &anum, vector &precision, vector &lat_long, vector &nsew) { vector pattern; for (size_t i = 0; i < tokens.size(); i++) @@ -1315,6 +1351,7 @@ static string s_NormalizeTokens(vector &tokens, vector &numbers, if (s_IsNumber(token, &num)) { numbers.push_back(num); + anum.push_back(token); pattern.push_back("1"); precision.push_back(0); if (NStr::Find(token, ".") != NPOS && !NStr::EndsWith(token, ".")) @@ -1331,12 +1368,15 @@ static string s_NormalizeTokens(vector &tokens, vector &numbers, if (tmp.size() == 3 && s_IsNumber(tmp[0], &num0) && s_IsNumber(tmp[1], &num1) && s_IsNumber(tmp[2], &num2)) { numbers.push_back(num0); + anum.push_back(tmp[0]); pattern.push_back("1"); precision.push_back(0); numbers.push_back(num1); + anum.push_back(tmp[1]); pattern.push_back("1"); precision.push_back(0); numbers.push_back(num2); + anum.push_back(tmp[2]); pattern.push_back("1"); precision.push_back(0); continue; @@ -1348,30 +1388,31 @@ static string s_NormalizeTokens(vector &tokens, vector &numbers, token = "\""; } - if (NStr::EqualNocase(token, "degrees") || NStr::EqualNocase(token, "deg") || NStr::EqualNocase(token, "degree")) + if (NStr::EqualNocase(token, "degrees") || NStr::EqualNocase(token, "deg") || NStr::EqualNocase(token, "deg.") || NStr::EqualNocase(token, "degree")) { token = "degrees"; pattern.push_back("degrees"); } - else if ( token == "\'" || NStr::EqualNocase(token, "min") || NStr::EqualNocase(token, "minute") || NStr::EqualNocase(token, "minutes")) + else if ( token == "\'" || NStr::EqualNocase(token, "min") || NStr::EqualNocase(token, "min.") || NStr::EqualNocase(token, "minute") || NStr::EqualNocase(token, "minutes")) { token = "\'"; pattern.push_back("\'"); } - else if (token == "\"" || NStr::EqualNocase(token, "sec") || NStr::EqualNocase(token, "second") || NStr::EqualNocase(token, "seconds") || token == "#") + else if (token == "\"" || NStr::EqualNocase(token, "sec") || NStr::EqualNocase(token, "sec.") || NStr::EqualNocase(token, "second") || NStr::EqualNocase(token, "seconds")) { token = "\""; pattern.push_back("\""); } - else if (token == "," || token == ":" || token == "_" || token == "&" || token == "." || token == ";" || NStr::EqualNocase(token, "and")) + else if (token == "," || token == ":" || token == "_" || token == "&" || token == "." || token == ";" || token == "#" || NStr::EqualNocase(token, "and")) { } - else if (NStr::EqualNocase(token, "lattitude") || NStr::EqualNocase(token, "lat")) + else if (NStr::EqualNocase(token, "lattitude") || NStr::EqualNocase(token, "latitude") || NStr::EqualNocase(token, "lat") || NStr::EqualNocase(token, "lat.")) { pattern.push_back("lat"); lat_long.push_back("lat"); } - else if (NStr::EqualNocase(token, "longitude") || NStr::EqualNocase(token, "lo") || NStr::EqualNocase(token, "lon") || NStr::EqualNocase(token, "long")) + else if (NStr::EqualNocase(token, "longitude") || NStr::EqualNocase(token, "lo") || NStr::EqualNocase(token, "lon") || NStr::EqualNocase(token, "long") + || NStr::EqualNocase(token, "lo.") || NStr::EqualNocase(token, "lon.") || NStr::EqualNocase(token, "long.")) { pattern.push_back("lat"); lat_long.push_back("long"); @@ -1391,7 +1432,7 @@ static string s_NormalizeTokens(vector &tokens, vector &numbers, pattern.push_back("N"); nsew.push_back("E"); } - else if (token == "W" || NStr::EqualNocase(token, "west")) + else if (token == "W" || NStr::EqualNocase(token, "west") || token == "Wdeg") { pattern.push_back("N"); nsew.push_back("W"); @@ -1427,7 +1468,7 @@ static string s_NormalizeTokens(vector &tokens, vector &numbers, return NStr::Join(pattern, " "); } -static void s_ReorderNorthSouthEastWest(vector &numbers, vector &precision, const vector &lat_long, const vector &nsew) +static void s_ReorderNorthSouthEastWest(vector &numbers, vector &precision, const vector &lat_long, vector &nsew) { if (numbers.size() != 2) { @@ -1440,6 +1481,8 @@ static void s_ReorderNorthSouthEastWest(vector &numbers, vector &pr { swap(numbers[0], numbers[1]); swap(precision[0], precision[1]); + if (nsew.size() == 2) + swap(nsew[0], nsew[1]); } } else if (!lat_long.empty()) @@ -1449,19 +1492,36 @@ static void s_ReorderNorthSouthEastWest(vector &numbers, vector &pr } if (nsew.size() == 2) { + if ((nsew[0] == "E" || nsew[0] == "W") && + (nsew[1] == "N" || nsew[1] == "S")) + { + swap(numbers[0], numbers[1]); + swap(precision[0], precision[1]); + swap(nsew[0], nsew[1]); + } if (nsew[0] == "N") + { numbers[0] = fabs(numbers[0]); + } else if (nsew[0] == "S") - numbers[0] = -fabs(numbers[0]); + { + if (numbers[0] != 0) + numbers[0] = -fabs(numbers[0]); + } else { numbers.clear(); return; } if (nsew[1] == "E") + { numbers[1] = fabs(numbers[1]); + } else if (nsew[1] == "W") - numbers[1] = -fabs(numbers[1]); + { + if (numbers[1] != 0) + numbers[1] = -fabs(numbers[1]); + } else { numbers.clear(); @@ -1492,7 +1552,8 @@ static void s_GetLatLong(const string &new_str, vector &numbers, vector< NStr::Split(new_str, " ", tokens, NStr::fSplit_Tokenize); vector lat_long; vector nsew; - string pattern = s_NormalizeTokens(tokens, numbers, precision, lat_long, nsew); + vector anum; + string pattern = s_NormalizeTokens(tokens, numbers, anum, precision, lat_long, nsew); if (pattern.empty()) { numbers.clear(); @@ -1500,10 +1561,15 @@ static void s_GetLatLong(const string &new_str, vector &numbers, vector< } vector degrees(2, 0); vector prec(2, 0); + int sign1 = 1; + int sign2 = 1; if ( pattern == "1 1" || pattern == "1 N 1 N" || + pattern == "N 1 N 1" || pattern == "1 degrees N 1 degrees N" || - pattern == "lat 1 lat 1") + pattern == "lat 1 lat 1" || + pattern == "1 N lat 1 N lat" || + pattern == "1 degrees N lat 1 degrees N lat") { degrees[0] = numbers[0]; degrees[1] = numbers[1]; @@ -1512,25 +1578,33 @@ static void s_GetLatLong(const string &new_str, vector &numbers, vector< } else if ((pattern == "1 1 \" 1 1 '" || pattern == "1 degrees 1 \" N 1 degrees 1 ' N") - && numbers[1] < 60 && numbers[3] < 60) + && numbers[1] < 60 && numbers[3] < 60 + && numbers[1] >= 0 && numbers[3] >= 0) { - degrees[0] = numbers[0] + numbers[1] / 3600; - degrees[1] = numbers[2] + numbers[3] / 60; + sign1 = anum[0][0] == '-' ? -1 : 1; + sign2 = anum[2][0] == '-' ? -1 : 1; + degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 3600); + degrees[1] = sign2*(fabs(numbers[2]) + numbers[3] / 60); prec[0] = max(precision[0], precision[1] + 4); prec[1] = max(precision[2], precision[3] + 2); } - else if (pattern == "1 1 ' 1" - && numbers[1] < 60) + else if ( (pattern == "1 1 ' 1" || + pattern == "1 degrees 1 ' N 1 degrees N") + && numbers[1] < 60 + && numbers[1] >= 0) { - degrees[0] = numbers[0] + numbers[1] / 60; + sign1 = anum[0][0] == '-' ? -1 : 1; + degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60); degrees[1] = numbers[2]; prec[0] = max(precision[0], precision[1] + 2); prec[1] = precision[2]; } else if (pattern == "1 1 ' 1 \" 1" - && numbers[1] < 60 && numbers[2] < 60) + && numbers[1] < 60 && numbers[2] < 60 + && numbers[1] >= 0 && numbers[2] >= 0) { - degrees[0] = numbers[0] + numbers[1] / 60 + numbers[2] / 3600; + sign1 = anum[0][0] == '-' ? -1 : 1; + degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600); degrees[1] = numbers[3]; prec[0] = max(max(precision[0], precision[1] + 2), precision[2] + 4); prec[1] = precision[3]; @@ -1538,10 +1612,13 @@ static void s_GetLatLong(const string &new_str, vector &numbers, vector< else if ((pattern == "1 1 ' 1 \" 1 1 '" || pattern == "1 1 1 N 1 1 N" || pattern == "1 degrees 1 ' 1 \" N 1 degrees 1 ' N") - && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60) + && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60 + && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0) { - degrees[0] = numbers[0] + numbers[1] / 60 + numbers[2] / 3600; - degrees[1] = numbers[3] + numbers[4] / 60; + sign1 = anum[0][0] == '-' ? -1 : 1; + sign2 = anum[3][0] == '-' ? -1 : 1; + degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600); + degrees[1] = sign2*(fabs(numbers[3]) + numbers[4] / 60); prec[0] = max(max(precision[0], precision[1] + 2), precision[2] + 4); prec[1] = max(precision[3], precision[4] + 2); } @@ -1552,54 +1629,78 @@ static void s_GetLatLong(const string &new_str, vector &numbers, vector< pattern == "1 degrees 1 ' 1 N 1 degrees 1 ' 1 N" || pattern == "1 degrees 1 1 N 1 degrees 1 1 N" || pattern == "1 1 1 N 1 1 1 N") - && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60 && numbers[5] < 60) + && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60 && numbers[5] < 60 + && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0 && numbers[5] >= 0) { - degrees[0] = numbers[0] + numbers[1] / 60 + numbers[2] / 3600; - degrees[1] = numbers[3] + numbers[4] / 60 + numbers[5] / 3600; + sign1 = anum[0][0] == '-' ? -1 : 1; + sign2 = anum[3][0] == '-' ? -1 : 1; + degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600); + degrees[1] = sign2*(fabs(numbers[3]) + numbers[4] / 60 + numbers[5] / 3600); prec[0] = max(max(precision[0], precision[1] + 2), precision[2] + 4); prec[1] = max(max(precision[3], precision[4] + 2), precision[5] + 4); } else if (( pattern == "1 1 ' 1 1 '" || pattern == "1 1 N 1 1 N" || + pattern == "1 1 ' N 1 1 ' N" || pattern == "1 degrees 1 ' N 1 degrees 1 ' N" || + pattern == "lat 1 degrees 1 ' N lat 1 degrees 1 ' N" || pattern == "1 degrees 1 N 1 degrees 1 N" || - pattern == "1 degrees 1 N 1 degrees 1 ' N") - && numbers[1] < 60 && numbers[3] < 60) + pattern == "1 degrees 1 N 1 degrees 1 ' N" || + pattern == "1 degrees 1 ' N 1 degrees 1 N" || + pattern == "N 1 degrees 1 ' N 1 degrees 1" || + pattern == "N 1 degrees 1 ' N 1 degrees 1 '" || + pattern == "N 1 degrees 1 ' N 1 1 '") + && numbers[1] < 60 && numbers[3] < 60 + && numbers[1] >= 0 && numbers[3] >= 0) { - degrees[0] = numbers[0] + numbers[1] / 60; - degrees[1] = numbers[2] + numbers[3] / 60; + sign1 = anum[0][0] == '-' ? -1 : 1; + sign2 = anum[2][0] == '-' ? -1 : 1; + degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60); + degrees[1] = sign2*(fabs(numbers[2]) + numbers[3] / 60); prec[0] = max(precision[0], precision[1] + 2); prec[1] = max(precision[2], precision[3] + 2); } - else if (pattern == "1 N 1 1 N" - && numbers[2] < 60) + else if ((pattern == "1 N 1 1 N" || + pattern == "1 degrees N 1 degrees 1 ' N") + && numbers[2] < 60 + && numbers[2] >= 0) { + sign2 = anum[1][0] == '-' ? -1 : 1; degrees[0] = numbers[0]; - degrees[1] = numbers[1] + numbers[2] / 60; + degrees[1] = sign2*(fabs(numbers[1]) + numbers[2] / 60); prec[0] = precision[0]; prec[1] = max(precision[1], precision[2] + 2); } - else if (pattern == "1 degrees 1 ' 1 degrees 1 ' 1 \"" - && numbers[1] < 60 && numbers[3] < 60 && numbers[4] < 60) + else if ((pattern == "1 degrees 1 ' 1 degrees 1 ' 1 \"" || + pattern == "N 1 1 N 1 1 1") + && numbers[1] < 60 && numbers[3] < 60 && numbers[4] < 60 + && numbers[1] >= 0 && numbers[3] >= 0 && numbers[4] >= 0) { - degrees[0] = numbers[0] + numbers[1] / 60; - degrees[1] = numbers[2] + numbers[3] / 60 + numbers[4] / 3600; + sign1 = anum[0][0] == '-' ? -1 : 1; + sign2 = anum[2][0] == '-' ? -1 : 1; + degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60); + degrees[1] = sign2*(fabs(numbers[2]) + numbers[3] / 60 + numbers[4] / 3600); prec[0] = max(precision[0], precision[1] + 2); prec[1] = max(max(precision[2], precision[3] + 2), precision[4] + 4); } else if (pattern == "1 degrees 1 degrees 1 ' 1 \"" - && numbers[2] < 60 && numbers[3] < 60) + && numbers[2] < 60 && numbers[3] < 60 + && numbers[2] >= 0 && numbers[3] >= 0) { + sign2 = anum[1][0] == '-' ? -1 : 1; degrees[0] = numbers[0]; - degrees[1] = numbers[1] + numbers[2] / 60 + numbers[3] / 3600; + degrees[1] = sign2*(fabs(numbers[1]) + numbers[2] / 60 + numbers[3] / 3600); prec[0] = precision[0]; prec[1] = max(max(precision[1], precision[2] + 2), precision[3] + 4); } else if (pattern == "1 degrees 1 ' 1 \" N 1 degrees 1 \" N" - && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60) + && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60 + && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0) { - degrees[0] = numbers[0] + numbers[1] / 60 + numbers[2] / 3600; - degrees[1] = numbers[3] + numbers[4] / 3600; + sign1 = anum[0][0] == '-' ? -1 : 1; + sign2 = anum[3][0] == '-' ? -1 : 1; + degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600); + degrees[1] = sign2*(fabs(numbers[3]) + numbers[4] / 3600); prec[0] = max(max(precision[0], precision[1] + 2), precision[2] + 4); prec[1] = max(precision[3], precision[4] + 4); } @@ -1625,23 +1726,24 @@ string CSubSource::FixLatLonFormat (string orig_lat_lon, bool guess) NStr::TrimSuffixInPlace(old_str, "\""); } NStr::ReplaceInPlace(old_str, "\'\'", "\""); - NStr::ReplaceInPlace(old_str, ". ", "."); - NStr::ReplaceInPlace(old_str, " .", "."); - string new_str = s_InsertSpacesBetweenTokens(old_str); + string fixed_str = s_RemoveSpacesWithinNumbers(old_str); + string new_str = s_InsertSpacesBetweenTokens(fixed_str); NStr::Sanitize(new_str); vector numbers; vector precision; s_GetLatLong(new_str, numbers, precision); string res; if (!numbers.empty()) + { res = MakeLatLon(numbers[0], numbers[1], precision[0], precision[1]); + } //cout << "After: " << res << endl; return res; } string CSubSource::MakeLatLon(double lat_value, double lon_value, int lat_precision, int lon_precision ) -{ +{ char ns = 'N'; if (lat_value < 0) { ns = 'S'; @@ -1788,6 +1890,7 @@ static const TWaterPairElem k_water_pair_map[] = { {"East Siberian Sea", "Arctic Ocean"}, {"English Channel", "Atlantic Ocean"}, {"Erythraean Sea", "Indian Ocean"}, + {"Golfo de California", "Pacific Ocean"}, {"Greenland Sea", "Arctic Ocean"}, {"Gulf of Mexico", "Atlantic Ocean"}, {"Gulf of Thailand", "Pacific Ocean"}, @@ -1948,15 +2051,26 @@ string CSubSource::ValidateLatLonCountry (const string& input_countryname, strin string wguess = id->GetGuessWater(); string cguess = id->GetGuessCountry(); + // special case where subsection of country has been identified but is not in coordinates of country + // VR-840 + if (province.empty() && NStr::Equal(cguess, country)) { + delete id; + return kEmptyStr; + } + + if (NStr::EqualNocase (country, "China") && NStr::EqualNocase (cguess, "Hong Kong")) { + delete id; return kEmptyStr; } if (NStr::EqualNocase (country, "USA") && NStr::EqualNocase (cguess, "Puerto Rico")) { + delete id; return kEmptyStr; } if (NStr::EqualNocase (country, "State of Palestine") && (NStr::EqualNocase (cguess, "Gaza Strip") || NStr::EqualNocase (cguess, "West Bank"))) { + delete id; return kEmptyStr; } diff --git a/c++/src/objects/seqfeat/common_tax.inc b/c++/src/objects/seqfeat/common_tax.inc index a7a99496..502c5dda 100644 --- a/c++/src/objects/seqfeat/common_tax.inc +++ b/c++/src/objects/seqfeat/common_tax.inc @@ -1,4 +1,4 @@ -/* $Id: common_tax.inc 523138 2016-12-28 19:53:42Z bollin $ +/* $Id: common_tax.inc 580656 2019-02-19 12:40:38Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -31,904 +31,904 @@ */ static const char* const kOrgRefList[] = { - "Acacia koa\t-\t1\t1\t11\t468172\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Mimosoideae; Acacieae; Acacia", - "Acanthisitta chloris\trifleman\t1\t2\t-\t57068\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Acanthisittidae; Acanthisitta", - "Acanthoscurria geniculata\t-\t1\t5\t-\t575412\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Araneae; Mygalomorphae; Theraphosidae; Acanthoscurria", - "Acinetobacter baumannii\t-\t11\t0\t-\t470\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Moraxellaceae; Acinetobacter; Acinetobacter calcoaceticus/baumannii complex", - "Acinetobacter pittii\t-\t11\t0\t-\t48296\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Moraxellaceae; Acinetobacter; Acinetobacter calcoaceticus/baumannii complex", - "Acinonyx jubatus\tcheetah\t1\t2\t-\t32536\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Acinonychinae; Acinonyx", - "Acropora cervicornis\t-\t1\t4\t-\t6130\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora", - "Acropora digitifera\t-\t1\t4\t-\t70779\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora", - "Acropora millepora\t-\t1\t4\t-\t45264\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora", - "Actias selene\tIndian moon moth\t1\t5\t-\t37776\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Saturniini; Actias", - "Acyrthosiphon pisum\tpea aphid\t1\t5\t-\t7029\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidiformes; Aphidomorpha; Aphidoidea; Aphididae; Aphidinae; Macrosiphini; Acyrthosiphon", - "Adineta vaga\t-\t1\t5\t-\t104782\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Rotifera; Bdelloidea; Adinetida; Adinetidae; Adineta", - "Aedes aegypti\tyellow fever mosquito\t1\t5\t-\t7159\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Culicomorpha; Culicoidea; Culicidae; Culicinae; Aedini; Aedes; Stegomyia", - "Aedes albopictus\tAsian tiger mosquito\t1\t5\t-\t7160\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Culicomorpha; Culicoidea; Culicidae; Culicinae; Aedini; Aedes; Stegomyia", - "Aegilops tauschii\t-\t1\t1\t11\t37682\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Aegilops", - "Aethina tumida\tsmall hive beetle\t1\t5\t-\t116153\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Cucujoidea; Nitidulidae; Nitidulinae; Aethina", - "Agapanthus praecox subsp. orientalis\t-\t1\t1\t11\t547170\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Amaryllidaceae; Agapanthoideae; Agapanthus; Agapanthus praecox", - "Agave deserti\t-\t1\t1\t11\t382119\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Asparagaceae; Agavoideae; Agave", - "Agave tequilana\t-\t1\t1\t11\t386106\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Asparagaceae; Agavoideae; Agave", - "Agrilus planipennis\temerald ash borer\t1\t5\t-\t224129\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Elateriformia; Buprestoidea; Buprestidae; Agrilinae; Agrilus", - "Agrotis segetum\tturnip moth\t1\t5\t-\t47767\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Noctuinae; Agrotis", - "Ailuropoda melanoleuca\tgiant panda\t1\t2\t-\t9646\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Ursidae; Ailuropoda", - "Alexandrium tamarense\t-\t1\t4\t11\t2926\tPLN\tcellular organisms; Eukaryota; Alveolata; Dinophyceae; Gonyaulacales; Gonyaulacaceae; Alexandrium", - "Alligator mississippiensis\tAmerican alligator\t1\t2\t-\t8496\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Crocodylia; Alligatoridae; Alligatorinae; Alligator", - "Alligator sinensis\tChinese alligator\t1\t2\t-\t38654\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Crocodylia; Alligatoridae; Alligatorinae; Alligator", - "Allium cepa\tonion\t1\t1\t11\t4679\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Amaryllidaceae; Allioideae; Allieae; Allium", - "Allium fistulosum\tWelsh onion\t1\t1\t11\t35875\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Amaryllidaceae; Allioideae; Allieae; Allium", - "Allium sativum\tgarlic\t1\t1\t11\t4682\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Amaryllidaceae; Allioideae; Allieae; Allium", - "Amaranthus tricolor\t-\t1\t1\t11\t29722\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Amaranthaceae; Amaranthus", - "Amazona vittata\tPuerto Rican parrot\t1\t2\t-\t241585\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Psittaciformes; Psittacidae; Amazona", - "Ameiurus nebulosus\tbrown bullhead\t1\t2\t-\t27778\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Characiphysae; Siluriformes; Siluroidei; Ictaluridae; Ameiurus", - "Amoebidium parasiticum JAP-7-2\t-\t1\t4\t-\t1069442\tINV\tcellular organisms; Eukaryota; Opisthokonta; Opisthokonta incertae sedis; Ichthyosporea; Ichthyophonida; Amoebidiaceae; Amoebidium; Amoebidium parasiticum", - "Anas platyrhynchos\tmallard\t1\t2\t-\t8839\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Anseriformes; Anatidae; Anas", - "Ancylostoma ceylanicum\t-\t1\t5\t-\t53326\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Ancylostomatoidea; Ancylostomatidae; Ancylostomatinae; Ancylostoma", - "Ancylostoma duodenale\t-\t1\t5\t-\t51022\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Ancylostomatoidea; Ancylostomatidae; Ancylostomatinae; Ancylostoma", - "Anguilla anguilla\tEuropean eel\t1\t2\t-\t7936\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Elopocephalai; Elopocephala; Elopomorpha; Anguilliformes; Anguillidae; Anguilla", - "Anguilla japonica\tJapanese eel\t1\t2\t-\t7937\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Elopocephalai; Elopocephala; Elopomorpha; Anguilliformes; Anguillidae; Anguilla", - "Anisakis simplex\therring worm\t1\t5\t-\t6269\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Ascaridida; Ascaridoidea; Anisakidae; Anisakis; Anisakis simplex complex", - "Anneissia japonica\t-\t1\t9\t-\t1529436\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Pelmatozoa; Crinoidea; Articulata; Comatulida; Comatulidae; Comatulinae; Anneissia", - "Annulipalpia sp. AD-2013\t-\t1\t5\t-\t1499517\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Trichoptera; Annulipalpia; Unclassified Annulipalpia", - "Anolis carolinensis\tgreen anole\t1\t2\t-\t28377\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Iguania; Iguanidae; Polychrotinae; Anolis", - "Anopheles funestus\tAfrican malaria mosquito\t1\t5\t-\t62324\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Culicomorpha; Culicoidea; Culicidae; Anophelinae; Anopheles; Cellia; Myzomyia; funestus group; funestus subgroup", - "Anopheles gambiae\tAfrican malaria mosquito\t1\t5\t-\t7165\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Culicomorpha; Culicoidea; Culicidae; Anophelinae; Anopheles; Cellia; Pyretophorus; gambiae species complex", - "Anopheles sinensis\t-\t1\t5\t-\t74873\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Culicomorpha; Culicoidea; Culicidae; Anophelinae; Anopheles; Anopheles; Laticorn; Myzorhynchus; hyrcanus group", - "Anoplophora glabripennis\tAsian longhorned beetle\t1\t5\t-\t217634\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Chrysomeloidea; Cerambycidae; Lamiinae; Lamiini; Anoplophora", - "Anoplopoma fimbria\tsablefish\t1\t2\t-\t229290\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Perciformes; Cottioidei; Anoplopomatales; Anoplopomatidae; Anoplopoma", - "Anser cygnoides domesticus\t-\t1\t2\t-\t381198\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Anseriformes; Anatidae; Anser; Anser cygnoides", - "Antheraea assama\tIndian muga silkmoth\t1\t5\t-\t91021\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Saturniini; Antheraea", - "Antheraea pernyi\tChinese oak silkmoth\t1\t5\t-\t7119\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Saturniini; Antheraea", - "Antheraea yamamai\tJapanese oak silkmoth\t1\t5\t-\t7121\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Saturniini; Antheraea", - "Anthopleura elegantissima\tclonal anemone\t1\t4\t-\t6110\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Nynantheae; Actiniidae; Anthopleura", - "Anthoxanthum odoratum\t-\t1\t1\t11\t29661\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 1 (Aveneae type); Anthoxanthinae; Anthoxanthum", - "Anthurium andraeanum\t-\t1\t1\t11\t226677\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Alismatales; Araceae; Pothoideae; Potheae; Anthurium", - "Aotus nancymaae\tMa's night monkey\t1\t2\t-\t37293\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Platyrrhini; Aotidae; Aotus", - "Apaloderma vittatum\tbar-tailed trogon\t1\t2\t-\t57397\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Trogoniformes; Trogonidae; Apaloderma", - "Aphelocoma californica obscura\t-\t1\t2\t-\t947029\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Corvoidea; Corvidae; Aphelocoma; Aphelocoma californica", - "Aphyosemion striatum\t-\t1\t2\t-\t60296\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Aphyosemion", - "Apis cerana\tAsiatic honeybee\t1\t5\t-\t7461\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apinae; Apini; Apis", - "Apis florea\tlittle honeybee\t1\t5\t-\t7463\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apinae; Apini; Apis", - "Apis mellifera\thoney bee\t1\t5\t-\t7460\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apinae; Apini; Apis", - "Aplysia californica\tCalifornia sea hare\t1\t5\t-\t6500\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Euopisthobranchia; Aplysiomorpha; Aplysioidea; Aplysiidae; Aplysia", - "Apostichopus japonicus\tJapanese sea cucumber\t1\t9\t-\t307972\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Holothuroidea; Aspidochirotacea; Aspidochirotida; Stichopodidae; Apostichopus", - "Apteryx australis mantelli\t-\t1\t2\t-\t202946\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Palaeognathae; Apterygiformes; Apterygidae; Apteryx; Apteryx australis", - "Aquila chrysaetos canadensis\t-\t1\t2\t-\t216574\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Accipitridae; Accipitrinae; Aquila; Aquila chrysaetos", - "Ara macao\tscarlet macaw\t1\t2\t-\t176014\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Psittaciformes; Psittacidae; Ara", - "Arabidopsis lyrata subsp. lyrata\t-\t1\t1\t11\t81972\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Arabidopsis; Arabidopsis lyrata", - "Arabidopsis thaliana\tthale cress\t1\t1\t11\t3702\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Arabidopsis", - "Arachis duranensis\t-\t1\t1\t11\t130453\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Dalbergieae; Arachis", - "Arachis hypogaea\tpeanut\t1\t1\t11\t3818\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Dalbergieae; Arachis", - "Arachis hypogaea var. vulgaris\t-\t1\t1\t11\t925390\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Dalbergieae; Arachis; Arachis hypogaea; Arachis hypogaea subsp. fastigiata", - "Arachis ipaensis\t-\t1\t1\t11\t130454\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Dalbergieae; Arachis", - "Araucaria cunninghamii\t-\t1\t1\t11\t56994\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Araucariales; Araucariaceae; Araucaria", - "Archaeopsylla erinacei\t-\t1\t5\t-\t48909\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Siphonaptera; Pulicomorpha; Pulicoidea; Pulicidae; Archaeopsyllinae; Archaeopsylla", - "Aretaon asperrimus\tthorny stick insect\t1\t5\t-\t173775\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Phasmatodea; Verophasmatodea; Areolatae; Bacilloidea; Heteropterygidae; Obriminae; Obrimini; Aretaon", - "Argochrysis armilla\t-\t1\t5\t-\t1317734\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Chrysidoidea; Chrysididae; Chrysidinae; Chrysidini; Argochrysis", - "Argulus siamensis\t-\t1\t5\t-\t1167309\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Branchiura; Arguloida; Argulidae; Argulus", - "Arion vulgaris\t-\t1\t5\t-\t1028688\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Eupulmonata; Stylommatophora; Sigmurethra; Arionoidea; Arionidae; Arion", - "Artemisia annua\tsweet wormwood\t1\t1\t11\t35608\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Anthemideae; Artemisiinae; Artemisia", - "artificial sequences\t-\t11\t0\t11\t81077\tSYN\tother sequences", - "Arundo donax\tgiant reed\t1\t1\t11\t35708\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Arundinoideae; Arundineae; Arundo", - "Ascaris suum\tpig roundworm\t1\t5\t-\t6253\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Ascaridida; Ascaridoidea; Ascarididae; Ascaris", - "Aspergillus oryzae\t-\t1\t4\t-\t5062\tPLN\tcellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Ascomycota; saccharomyceta; Pezizomycotina; leotiomyceta; Eurotiomycetes; Eurotiomycetidae; Eurotiales; Aspergillaceae; Aspergillus", - "Aspidistra saxicola\t-\t1\t1\t11\t1197444\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Asparagaceae; Nolinoideae; Aspidistra", - "Astacus astacus\tbroad-fingered crayfish\t1\t5\t-\t6715\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Astacoidea; Astacidae; Astacus", - "Astacus leptodactylus\tnarrow-clawed crayfish\t1\t5\t-\t6717\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Astacoidea; Astacidae; Astacus", - "Asterias amurensis\t-\t1\t9\t-\t7602\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Asterias", - "Asterias forbesi\tForbes's starfish\t1\t9\t-\t7603\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Asterias", - "Asterias rubens\tEuropean starfish\t1\t9\t-\t7604\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Asterias", - "Astyanax mexicanus\tMexican tetra\t1\t2\t-\t7994\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Characiphysae; Characiformes; Characoidei; Characidae; Characidae incertae sedis; Astyanax clade; Astyanax", - "Athetis lepigone\t-\t1\t5\t-\t1223490\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Noctuinae; Athetis", - "Atractaspis aterrima\tmole viper\t1\t2\t-\t1355159\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Lamprophiidae; Atractaspidinae; Atractaspis", - "Aurelia aurita\tmoon jelly\t1\t4\t-\t6145\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Scyphozoa; Semaeostomeae; Ulmaridae; Aurelia", - "Austrofundulus limnaeus\t-\t1\t2\t-\t52670\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Rivulidae; Austrofundulus", - "Avena sativa\toat\t1\t1\t11\t4498\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Aveninae; Avena", - "Avicennia marina\t-\t1\t1\t11\t82927\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Acanthaceae; Avicennioideae; Avicennia", - "Bacillus cereus\t-\t11\t0\t-\t1396\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Bacillales; Bacillaceae; Bacillus; Bacillus cereus group", - "Bacillus licheniformis\t-\t11\t0\t-\t1402\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Bacillales; Bacillaceae; Bacillus; Bacillus subtilis group", - "Bactrocera cucurbitae\tmelon fly\t1\t5\t-\t28588\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Dacini; Bactrocera; Zeugodacus", - "Bactrocera dorsalis\toriental fruit fly\t1\t5\t-\t27457\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Dacini; Bactrocera; Bactrocera; Bactrocera dorsalis species complex", - "Bactrocera latifrons\t-\t1\t5\t-\t174628\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Dacini; Bactrocera; Bactrocera", - "Bactrocera minax\tOriental citrus fly\t1\t5\t-\t104690\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Dacini; Bactrocera; Tetradacus", - "Bactrocera oleae\tolive fruit fly\t1\t5\t-\t104688\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Dacini; Bactrocera; Daculus", - "Balaenoptera acutorostrata scammoni\t-\t1\t2\t-\t310752\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Mysticeti; Balaenopteridae; Balaenoptera; Balaenoptera acutorostrata", - "Balaenoptera bonaerensis\tAntarctic minke whale\t1\t2\t-\t33556\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Mysticeti; Balaenopteridae; Balaenoptera", - "Balearica regulorum gibbericeps\tEast African grey crowned-crane\t1\t2\t-\t100784\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Gruidae; Balearica; Balearica regulorum", - "Banksia hookeriana\t-\t1\t1\t11\t199770\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; stem eudicotyledons; Proteales; Proteaceae; Banksia", - "Bdellocephala annandalei\t-\t1\t9\t-\t1421413\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Planarioidea; Dendrocoelidae; Bdellocephala", - "Bemisia tabaci\t-\t1\t5\t-\t7038\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aleyrodiformes; Aleyrodoidea; Aleyrodidae; Aleyrodinae; Bemisia", - "Beta vulgaris\t-\t1\t1\t11\t161934\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Betoideae; Beta", - "Beta vulgaris subsp. vulgaris\t-\t1\t1\t11\t3555\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Betoideae; Beta; Beta vulgaris", - "Betula platyphylla\t-\t1\t1\t11\t78630\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Betulaceae; Betula", - "Biomphalaria glabrata\t-\t1\t5\t-\t6526\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Hygrophila; Planorboidea; Planorbidae; Biomphalaria", - "Bison bison bison\t-\t1\t2\t-\t43346\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bison; Bison bison", - "Bithynia siamensis goniomphalos\t-\t1\t5\t-\t479249\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Caenogastropoda; Hypsogastropoda; Littorinimorpha; Truncatelloidea; Bithyniidae; Bithynia; Bithynia siamensis", - "Boechera gunnisoniana\t-\t1\t1\t11\t93888\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Boechereae; Boechera", - "Bombina bombina\tfire-bellied toad\t1\t2\t-\t8345\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina", - "Bombina orientalis\t-\t1\t2\t-\t8346\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina", - "Bombina variegata scabra\t-\t1\t2\t-\t251232\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina; Bombina variegata", - "Bombina variegata variegata\t-\t1\t2\t-\t191472\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina; Bombina variegata", - "Bombus impatiens\tcommon eastern bumble bee\t1\t5\t-\t132113\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Bombinae; Bombini; Bombus; Pyrobombus", - "Bombus insularis\t-\t1\t5\t-\t207637\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Bombinae; Bombini; Bombus; Psithyrus", - "Bombus terrestris\tbuff-tailed bumblebee\t1\t5\t-\t30195\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Bombinae; Bombini; Bombus; Bombus", - "Bombyx mori\tdomestic silkworm\t1\t5\t-\t7091\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Bombycidae; Bombycinae; Bombyx", - "Bordetella pertussis\t-\t11\t0\t-\t520\tBCT\tcellular organisms; Bacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Alcaligenaceae; Bordetella", - "Bos mutus\twild yak\t1\t2\t-\t72004\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bos", - "Bos taurus\tcattle\t1\t2\t-\t9913\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bos", - "Botryococcus braunii\t-\t1\t1\t11\t38881\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Chlorophyta; Trebouxiophyceae; Trebouxiophyceae incertae sedis; Botryococcaceae; Botryococcus", - "Brachionus calyciflorus\t-\t1\t5\t-\t104777\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Rotifera; Monogononta; Pseudotrocha; Ploima; Brachionidae; Brachionus", - "Brachycistis timberlakei\t-\t1\t5\t-\t1317728\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Tiphiidae; Brachycistidinae; Brachycistis", - "Brachypodium distachyon\tstiff brome\t1\t1\t11\t15368\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Brachypodieae; Brachypodium", - "Bragasellus peltatus\t-\t1\t5\t-\t1282048\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Bragasellus", - "Branchiostoma lanceolatum\tamphioxus\t1\t5\t-\t7740\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Cephalochordata; Branchiostomidae; Branchiostoma", - "Brassica juncea\t-\t1\t1\t11\t3707\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica", - "Brassica napus\trape\t1\t1\t11\t3708\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica", - "Brassica oleracea var. capitata\tcabbage\t1\t1\t11\t3716\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica; Brassica oleracea", - "Brassica oleracea var. oleracea\t-\t1\t1\t11\t109376\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica; Brassica oleracea", - "Brassica rapa\tfield mustard\t1\t1\t11\t3711\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica", - "Brassica rapa subsp. pekinensis\tChinese cabbage\t1\t1\t11\t51351\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica; Brassica rapa", - "Brassica rapa x Raphanus sativus\t-\t1\t1\t11\t1417620\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica x Raphanus", - "Brassicogethes aeneus\t-\t1\t5\t-\t1431903\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Cucujoidea; Nitidulidae; Meligethinae; Brassicogethes", - "Brugia malayi\t-\t1\t5\t-\t6279\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Spirurida; Filarioidea; Onchocercidae; Brugia", - "Bubalus bubalis\twater buffalo\t1\t2\t-\t89462\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bubalus", - "Buceros rhinoceros silvestris\t-\t1\t2\t-\t175836\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Bucerotiformes; Bucerotidae; Buceros; Buceros rhinoceros", - "Burkholderia ubonensis\t-\t11\t0\t-\t101571\tBCT\tcellular organisms; Bacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Burkholderiaceae; Burkholderia", - "Caenorhabditis elegans\t-\t1\t5\t-\t6239\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis", - "Caenorhabditis remanei\t-\t1\t5\t-\t31234\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis", - "Cajanus cajan\tpigeon pea\t1\t1\t11\t3821\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Cajanus", - "Calanus finmarchicus\t-\t1\t5\t-\t6837\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Gymnoplea; Calanoida; Calanidae; Calanus", - "Calanus glacialis\t-\t1\t5\t-\t113644\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Gymnoplea; Calanoida; Calanidae; Calanus", - "Calidris pugnax\truff\t1\t2\t-\t198806\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Charadriiformes; Scolopacidae; Calidris", - "Caligus rogercresseyi\t-\t1\t5\t-\t217165\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Podoplea; Siphonostomatoida; Caligidae; Caligus", - "Callithrix jacchus\twhite-tufted-ear marmoset\t1\t2\t-\t9483\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Platyrrhini; Cebidae; Callitrichinae; Callithrix", - "Callorhinchus milii\telephant shark\t1\t2\t-\t7868\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Chondrichthyes; Holocephali; Chimaeriformes; Callorhinchidae; Callorhinchus", - "Calypte anna\tAnna's hummingbird\t1\t2\t-\t9244\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Trochiliformes; Trochilidae; Calypte", - "Camelina sativa\tfalse flax\t1\t1\t11\t90675\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Camelina", - "Camellia oleifera\t-\t1\t1\t11\t385388\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Theaceae; Camellia", - "Camellia sinensis\t-\t1\t1\t11\t4442\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Theaceae; Camellia", - "Camellia sinensis var. sinensis\t-\t1\t1\t11\t542762\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Theaceae; Camellia; Camellia sinensis", - "Camelus bactrianus\tBactrian camel\t1\t2\t-\t9837\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Camelus", - "Camelus dromedarius\tArabian camel\t1\t2\t-\t9838\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Camelus", - "Camelus ferus\tWild Bactrian camel\t1\t2\t-\t419612\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Camelus", - "Camponotus floridanus\tFlorida carpenter ant\t1\t5\t-\t104421\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Camponotini; Camponotus", - "Camptotheca acuminata\t-\t1\t1\t11\t16922\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Cornales; Nyssaceae; Camptotheca", - "Campylobacter coli\t-\t11\t0\t-\t195\tBCT\tcellular organisms; Bacteria; Proteobacteria; delta/epsilon subdivisions; Epsilonproteobacteria; Campylobacterales; Campylobacteraceae; Campylobacter", - "Campylobacter jejuni\t-\t11\t0\t-\t197\tBCT\tcellular organisms; Bacteria; Proteobacteria; delta/epsilon subdivisions; Epsilonproteobacteria; Campylobacterales; Campylobacteraceae; Campylobacter", - "Canis lupus familiaris\tdog\t1\t2\t-\t9615\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Canidae; Canis; Canis lupus", - "Cannabis sativa\themp\t1\t1\t11\t3483\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Cannabis", - "Capra hircus\tgoat\t1\t2\t-\t9925\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Capra", - "Caprimulgus carolinensis\tchuck-will's-widow\t1\t2\t-\t279965\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Caprimulgiformes; Caprimulgidae; Caprimulginae; Caprimulgus", - "Capsella rubella\t-\t1\t1\t11\t81985\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Capsella", - "Capsicum annuum\t-\t1\t1\t11\t4072\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Capsiceae; Capsicum", - "Carabus granulatus\t-\t1\t5\t-\t118799\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Adephaga; Caraboidea; Carabidae; Carabinae; Carabini; Carabina; Carabus; Carabus", - "Caragana korshinskii\t-\t1\t1\t11\t220689\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Caraganeae; Caragana", - "Cariama cristata\tred-legged seriema\t1\t2\t-\t54380\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Cariamidae; Cariama", - "Carlito syrichta\tPhilippine tarsier\t1\t2\t-\t1868482\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Tarsiiformes; Tarsiidae; Carlito", - "Catharanthus roseus\tMadagascar periwinkle\t1\t1\t11\t4058\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Apocynaceae; Rauvolfioideae; Vinceae; Catharanthinae; Catharanthus", - "Cathartes aura\tturkey vulture\t1\t2\t-\t43455\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Ciconiiformes; Cathartidae; Cathartes", - "Cavia porcellus\tdomestic guinea pig\t1\t2\t-\t10141\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Caviidae; Cavia", - "Cebus capucinus imitator\t-\t1\t2\t-\t1737458\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Platyrrhini; Cebidae; Cebinae; Cebus; Cebus capucinus", - "Cecidomyiidae sp. BOLD-2016\t-\t1\t5\t-\t1881751\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Bibionomorpha; Sciaroidea; Cecidomyiidae; unclassified Cecidomyiidae", - "Cenchrus americanus\t-\t1\t1\t11\t4543\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Cenchrus", - "Centris flavifrons\t-\t1\t5\t-\t360639\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Anthophorinae; Centridini; Centris", - "Cephalotaxus hainanensis\t-\t1\t1\t11\t191701\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Cupressales; Taxaceae; Cephalotaxus", - "Cerapachys biroi\tclonal raider ant\t1\t5\t-\t443821\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Cerapachyinae; Cerapachyini; Cerapachys", - "Ceratina calcarata\t-\t1\t5\t-\t156304\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Xylocopinae; Ceratinini; Ceratina; Zadontomerus", - "Ceratitis capitata\tMediterranean fruit fly\t1\t5\t-\t7213\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Ceratitidini; Ceratitis; Ceratitis", - "Ceratotherium simum simum\tsouthern white rhinoceros\t1\t2\t-\t73337\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Perissodactyla; Rhinocerotidae; Ceratotherium; Ceratotherium simum", - "Cercis gigantea\t-\t1\t1\t11\t183790\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Cercideae; Cercis", - "Cercocebus atys\tsooty mangabey\t1\t2\t-\t9531\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Cercocebus", - "Chaetura pelagica\tchimney swift\t1\t2\t-\t8897\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Apodiformes; Apodidae; Chaetura", - "Charadrius vociferus\tkilldeer\t1\t2\t-\t50402\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Charadriiformes; Charadriidae; Charadrius", - "Chelonia mydas\tgreen sea turtle\t1\t2\t-\t8469\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Testudines; Cryptodira; Durocryptodira; Americhelydia; Chelonioidea; Cheloniidae; Chelonia", - "Cherax quadricarinatus\t-\t1\t5\t-\t27406\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Parastacoidea; Parastacidae; Cherax", - "Chilo suppressalis\tstriped riceborer\t1\t5\t-\t168631\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Pyraloidea; Crambidae; Crambinae; Chilo", - "Chinavia ubica\t-\t1\t5\t-\t1497372\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Neohemiptera; Prosorrhyncha; Heteroptera; Euheteroptera; Neoheteroptera; Panheteroptera; Pentatomomorpha; Pentatomoidea; Pentatomidae; Pentatominae; Chinavia", - "Chinchilla lanigera\tlong-tailed chinchilla\t1\t2\t-\t34839\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Chinchillidae; Chinchilla", - "Chlamydia trachomatis\t-\t11\t0\t-\t813\tBCT\tcellular organisms; Bacteria; PVC group; Chlamydiae; Chlamydiia; Chlamydiales; Chlamydiaceae; Chlamydia/Chlamydophila group; Chlamydia", - "Chlamydotis macqueenii\tMacqueen's bustard\t1\t2\t-\t187382\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Otididae; Chlamydotis", - "Chlorella sorokiniana\t-\t1\t1\t11\t3076\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Chlorophyta; Trebouxiophyceae; Chlorellales; Chlorellaceae; Chlorella", - "Chloris chloris\tEuropean greenfinch\t1\t2\t-\t37601\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Fringillidae; Carduelinae; Chloris", - "Chlorocebus sabaeus\tgreen monkey\t1\t2\t-\t60711\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Chlorocebus", - "Chorispora bungeana\t-\t1\t1\t11\t238895\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Chorisporeae; Chorispora", - "Chromolaena odorata\t-\t1\t1\t11\t103745\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Heliantheae alliance; Eupatorieae; Chromolaena", - "Chrysemys picta bellii\twestern painted turtle\t1\t2\t-\t8478\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Testudines; Cryptodira; Durocryptodira; Testudinoidea; Emydidae; Chrysemys; Chrysemys picta", - "Chrysochloris asiatica\tCape golden mole\t1\t2\t-\t185453\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Chrysochloridae; Chrysochlorinae; Chrysochloris", - "Chrysopa pallens\t-\t1\t5\t-\t417485\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Neuropterida; Neuroptera; Chrysopidae; Chrysopa", - "Chyphotes mellipes\t-\t1\t5\t-\t292179\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Bradynobaenidae; Chyphotinae; Chyphotes", - "Cicer arietinum\tchickpea\t1\t1\t11\t3827\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Cicereae; Cicer", - "Ciona intestinalis\tvase tunicate\t1\t13\t-\t7719\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Tunicata; Ascidiacea; Enterogona; Phlebobranchia; Cionidae; Ciona", - "Citrus clementina\t-\t1\t1\t11\t85681\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Rutaceae; Aurantioideae; Citrus", - "Citrus sinensis\tsweet orange\t1\t1\t11\t2711\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Rutaceae; Aurantioideae; Citrus", - "Clostridioides difficile\t-\t11\t0\t-\t1496\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Clostridia; Clostridiales; Peptostreptococcaceae; Clostridioides", - "Clostridium botulinum\t-\t11\t0\t-\t1491\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Clostridia; Clostridiales; Clostridiaceae; Clostridium", - "Clupea harengus\tAtlantic herring\t1\t2\t-\t7950\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Clupei; Clupeiformes; Clupeoidei; Clupeidae; Clupeinae; Clupea", - "Cocos nucifera\tcoconut palm\t1\t1\t11\t13894\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Arecales; Arecaceae; Arecoideae; Cocoseae; Attaleinae; Cocos", - "Colaphellus bowringi\t-\t1\t5\t-\t561076\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Chrysomeloidea; Chrysomelidae; Chrysomelinae; Chrysomelini; Colaphellus", - "Colius striatus\tspeckled mousebird\t1\t2\t-\t57412\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coliiformes; Coliidae; Colius", - "Colobus angolensis palliatus\t-\t1\t2\t-\t336983\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Colobinae; Colobus; Colobus angolensis", - "Columba livia\trock pigeon\t1\t2\t-\t8932\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Columbiformes; Columbidae; Columba", - "Condylura cristata\tstar-nosed mole\t1\t2\t-\t143302\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Insectivora; Talpidae; Condylura", - "Copidosoma floridanum\t-\t1\t5\t-\t29053\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Chaldicoidea group; Chalcidoidea; Encyrtidae; Encyrtinae; Copidosoma", - "Corchorus capsularis\t-\t1\t1\t11\t210143\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Grewioideae; Apeibeae; Corchorus", - "Corvus brachyrhynchos\tAmerican crow\t1\t2\t-\t85066\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Corvoidea; Corvidae; Corvus", - "Corvus cornix cornix\t-\t1\t2\t-\t932674\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Corvoidea; Corvidae; Corvus; Corvus cornix", - "Corydalinae sp. KMRSPBM-2012\t-\t1\t5\t-\t1247484\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Neuropterida; Megaloptera; Corydalidae; Corydalinae; unclassified Corydalinae", - "Corylus avellana\t-\t1\t1\t11\t13451\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Betulaceae; Corylus", - "Costus pictus\t-\t1\t1\t11\t168183\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Zingiberales; Costaceae; Costus", - "Coturnix japonica\tJapanese quail\t1\t2\t-\t93934\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Perdicinae; Coturnix", - "Crassostrea angulata\t-\t1\t5\t-\t558553\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Ostreoida; Ostreoidea; Ostreidae; Crassostrea", - "Crassostrea gigas\tPacific oyster\t1\t5\t-\t29159\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Ostreoida; Ostreoidea; Ostreidae; Crassostrea", - "Crataegus pinnatifida\t-\t1\t1\t11\t510735\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Maleae; Crataegus", - "Crepidula fornicata\t-\t1\t5\t-\t176853\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Caenogastropoda; Hypsogastropoda; Littorinimorpha; Calyptraeoidea; Calyptraeidae; Crepidula", - "Cricetulus griseus\tChinese hamster\t1\t2\t-\t10029\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Cricetidae; Cricetinae; Cricetulus", - "Crioscolia alcione\t-\t1\t5\t-\t1317732\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Scoliidae; Crioscolia", - "Ctenomys sociabilis\tsocial tuco-tuco\t1\t2\t-\t43321\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Ctenomyidae; Ctenomys", - "Cucumis melo\tmuskmelon\t1\t1\t11\t3656\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Cucumis", - "Cucumis sativus\tcucumber\t1\t1\t11\t3659\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Cucumis", - "Cunninghamia lanceolata\t-\t1\t1\t11\t28977\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Cupressales; Cupressaceae; Cunninghamia", - "Curcuma longa\tturmeric\t1\t1\t11\t136217\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Zingiberales; Zingiberaceae; Curcuma", - "Cuscuta pentagona\t-\t1\t1\t11\t112407\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Cuscuteae; Cuscuta; Grammica; Cuscuta sect. Cleistogrammica", - "Cylicostephanus goldi\t-\t1\t5\t-\t71465\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Strongyloidea; Strongylidae; Cyathostominae; Cylicostephanus", - "Cynoglossus semilaevis\ttongue sole\t1\t2\t-\t244447\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Carangaria; Pleuronectiformes; Pleuronectoidei; Cynoglossidae; Cynoglossinae; Cynoglossus", - "Cynopterus sphinx\tIndian short-nosed fruit bat\t1\t2\t-\t9400\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Cynopterus", - "Cypridininae sp. BMR-2011\t-\t1\t5\t-\t1032739\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Ostracoda; Myodocopa; Myodocopida; Cypridinoidea; Cypridinidae; unclassified Cypridinidae", - "Cyprinodon variegatus\tsheepshead minnow\t1\t2\t-\t28743\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Cyprinodontidae; Cyprinodontinae; Cyprinodontini; Cyprinodon", - "Cyprinus carpio\tcommon carp\t1\t2\t-\t7962\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Cyprinus", - "Dahlia pinnata\t-\t1\t1\t11\t101596\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Heliantheae alliance; Coreopsideae; Dahlia", - "Danio rerio\tzebrafish\t1\t2\t-\t7955\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Danio", - "Daphnia magna\t-\t1\t5\t-\t35525\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Branchiopoda; Phyllopoda; Diplostraca; Cladocera; Anomopoda; Daphniidae; Daphnia", - "Dastarcus helophoroides\t-\t1\t5\t-\t1169899\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Cucujoidea; Bothrideridae; Dastarcus", - "Dasypus novemcinctus\tnine-banded armadillo\t1\t2\t-\t9361\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Xenarthra; Cingulata; Dasypodidae; Dasypus", - "Daucus carota subsp. sativus\t-\t1\t1\t11\t79200\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Apiales; Apiineae; Apiaceae; Apioideae; Scandiceae; Daucinae; Daucus; Daucus sect. Daucus; Daucus carota", - "Dendroctonus ponderosae\tmountain pine beetle\t1\t5\t-\t77166\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Curculionoidea; Curculionidae; Scolytinae; Dendroctonus", - "Dermacentor variabilis\tAmerican dog tick\t1\t5\t-\t34621\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Rhipicephalinae; Dermacentor", - "Dermanyssus gallinae\t-\t1\t5\t-\t34641\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Mesostigmata; Monogynaspida; Gamasina; Dermanyssoidea; Dermanyssidae; Dermanyssus", - "Dianthus caryophyllus\tclove pink\t1\t1\t11\t3570\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Caryophyllaceae; Caryophylleae; Dianthus", - "Diaphorina citri\tAsian citrus psyllid\t1\t5\t-\t121845\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Psylliformes; Psylloidea; Psyllidae; Diaphorina", - "Dicrocoelium dendriticum\t-\t1\t9\t-\t57078\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Plagiorchiida; Xiphidiata; Plagiorchioidea; Dicrocoeliidae; Dicrocoelium", - "Dinobryon sp. LO226KS\t-\t1\t1\t11\t1825119\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; Chromulinales; Dinobryaceae; Dinobryon", - "Dinoponera quadriceps\t-\t1\t5\t-\t609295\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Ponerinae; Ponerini; Dinoponera", - "Diospyros lotus\t-\t1\t1\t11\t55363\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Ebenaceae; Diospyros", - "Diphyllobothrium latum\t-\t1\t9\t-\t60516\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Cestoda; Eucestoda; Diphyllobothriidea; Diphyllobothriidae; Diphyllobothrium", - "Dipodomys ordii\tOrd's kangaroo rat\t1\t2\t-\t10020\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Heteromyidae; Dipodomyinae; Dipodomys", - "Dorcoceras hygrometricum\t-\t1\t1\t11\t472368\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Gesneriaceae; Didymocarpoideae; Trichosporeae; Loxocarpinae; Dorcoceras", - "Drosophila ananassae\t-\t1\t5\t-\t7217\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; ananassae subgroup; ananassae species complex", - "Drosophila grimshawi\t-\t1\t5\t-\t7222\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Hawaiian Drosophila; picture wing clade; grimshawi clade; grimshawi group; grimshawi subgroup", - "Drosophila melanogaster\tfruit fly\t1\t5\t-\t7227\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; melanogaster subgroup", - "Drosophila miranda\t-\t1\t5\t-\t7229\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; obscura group; pseudoobscura subgroup", - "Drosophila mojavensis\t-\t1\t5\t-\t7230\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Drosophila; repleta group; mulleri subgroup; mojavensis species complex", - "Drosophila persimilis\t-\t1\t5\t-\t7234\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; obscura group; pseudoobscura subgroup", - "Drosophila pseudoobscura pseudoobscura\t-\t1\t5\t-\t46245\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; obscura group; pseudoobscura subgroup; Drosophila pseudoobscura", - "Drosophila rhopaloa\t-\t1\t5\t-\t1041015\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; rhopaloa subgroup", - "Drosophila sechellia\t-\t1\t5\t-\t7238\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; melanogaster subgroup", - "Drosophila simulans\t-\t1\t5\t-\t7240\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; melanogaster subgroup", - "Drosophila suzukii\t-\t1\t5\t-\t28584\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; suzukii subgroup", - "Drosophila virilis\t-\t1\t5\t-\t7244\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Drosophila; virilis group", - "Drosophila willistoni\t-\t1\t5\t-\t7260\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; willistoni group; willistoni subgroup", - "Drosophila yakuba\t-\t1\t5\t-\t7245\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; melanogaster subgroup", - "Dugesia japonica\t-\t1\t9\t-\t6161\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Geoplanoidea; Dugesiidae; Dugesia", - "Echinarachnius parma\t-\t1\t9\t-\t869203\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Gnathostomata; Clypeasteroida; Echinarachniidae; Echinarachnius", - "Echinaster spinulosus\t-\t1\t9\t-\t1451296\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Spinulosacea; Spinulosida; Echinasteridae; Echinaster", - "Echinops telfairi\tsmall Madagascar hedgehog\t1\t2\t-\t9371\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Tenrecidae; Tenrecinae; Echinops", - "Echinostoma caproni\t-\t1\t9\t-\t27848\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Plagiorchiida; Echinostomata; Echinostomatoidea; Echinostomatidae; Echinostoma", - "Eidolon helvum\tstraw-colored fruit bat\t1\t2\t-\t77214\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Eidolon", - "Eimeria mitis\t-\t1\t4\t4\t44415\tINV\tcellular organisms; Eukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Eimeriidae; Eimeria", - "Elaeis guineensis\tAfrican oil palm\t1\t1\t11\t51953\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Arecales; Arecaceae; Arecoideae; Cocoseae; Elaeidinae; Elaeis", - "Elephantulus edwardii\tCape elephant shrew\t1\t2\t-\t28737\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Macroscelidea; Macroscelididae; Elephantulus", - "Elliptio complanata\teastern elliptio\t1\t5\t-\t55832\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Palaeoheterodonta; Unionoida; Unionoidea; Unionidae; Ambleminae; Elliptio", - "Elodea nuttallii\t-\t1\t1\t11\t55313\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Alismatales; Hydrocharitaceae; Elodea", - "Emiliania huxleyi CCMP1516\t-\t1\t4\t11\t280463\tPLN\tcellular organisms; Eukaryota; Haptophyceae; Isochrysidales; Noelaerhabdaceae; Emiliania; Emiliania huxleyi", - "Enterobacter cloacae\t-\t11\t0\t-\t550\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Enterobacter; Enterobacter cloacae complex", - "Enterococcus faecalis\t-\t11\t0\t-\t1351\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Enterococcaceae; Enterococcus", - "Enterococcus faecium\t-\t11\t0\t-\t1352\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Enterococcaceae; Enterococcus", - "Epipyxis sp. PR26KG\t-\t1\t1\t11\t1825121\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; Chromulinales; Dinobryaceae; Epipyxis", - "Eptesicus fuscus\tbig brown bat\t1\t2\t-\t29078\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Eptesicus", - "Equus asinus\tass\t1\t2\t-\t9793\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Perissodactyla; Equidae; Equus; Asinus", - "Equus caballus\thorse\t1\t2\t-\t9796\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Perissodactyla; Equidae; Equus; Equus", - "Equus przewalskii\tPrzewalski's horse\t1\t2\t-\t9798\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Perissodactyla; Equidae; Equus; Equus", - "Erinaceus europaeus\twestern European hedgehog\t1\t2\t-\t9365\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Insectivora; Erinaceidae; Erinaceinae; Erinaceus", - "Eriocheir sinensis\tChinese mitten crab\t1\t5\t-\t95602\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Brachyura; Eubrachyura; Thoracotremata; Grapsoidea; Varunidae; Eriocheir", - "Erythranthe guttata\tspotted monkey flower\t1\t1\t11\t4155\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Phrymaceae; Erythranthe", - "Escherichia coli\t-\t11\t0\t-\t562\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Escherichia", - "Esox lucius\tnorthern pike\t1\t2\t-\t8010\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Protacanthopterygii; Esociformes; Esocidae; Esox", - "Eucalyptus camaldulensis\tMurray red gum\t1\t1\t11\t34316\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Myrtaceae; Myrtoideae; Eucalypteae; Eucalyptus", - "Eucalyptus grandis\t-\t1\t1\t11\t71139\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Myrtaceae; Myrtoideae; Eucalypteae; Eucalyptus", - "Eucidaris tribuloides\t-\t1\t9\t-\t7632\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Perischoechinoidea; Cidaroida; Cidaridae; Eucidaris", - "Eucyclops serrulatus\t-\t1\t5\t-\t84317\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Podoplea; Cyclopoida; Cyclopidae; Eucyclops", - "Eufriesea mexicana\t-\t1\t5\t-\t516756\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Bombinae; Euglossini; Eufriesea", - "Eurypyga helias\tsunbittern\t1\t2\t-\t54383\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Eurypygidae; Eurypyga", - "Eustoma exaltatum subsp. russellianum\t-\t1\t1\t11\t52518\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Gentianaceae; Chironieae; Eustoma; Eustoma exaltatum", - "Evechinus chloroticus\t-\t1\t9\t-\t137513\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Echinometridae; Evechinus", - "Exaiptasia pallida\t-\t1\t4\t-\t1720309\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Aiptasiidae; Exaiptasia", - "Exoneura robusta\t-\t1\t5\t-\t175328\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Xylocopinae; Allodapini; Exoneura", - "Extatosoma tiaratum\tgiant prickly stick insect\t1\t5\t-\t7024\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Tropidoderinae; Extatosoma", - "Fagopyrum esculentum\tcommon buckwheat\t1\t1\t11\t3617\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Polygonaceae; Polygonoideae; Fagopyreae; Fagopyrum", - "Falco cherrug\tSaker falcon\t1\t2\t-\t345164\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Falconidae; Falco", - "Falco peregrinus\tperegrine falcon\t1\t2\t-\t8954\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Falconidae; Falco", - "Fasciola hepatica\tliver fluke\t1\t9\t-\t6192\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Plagiorchiida; Echinostomata; Echinostomatoidea; Fasciolidae; Fasciola", - "Felis catus\tdomestic cat\t1\t2\t-\t9685\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Felinae; Felis", - "Ficedula albicollis\tcollared flycatcher\t1\t2\t-\t59894\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Muscicapidae; Ficedula", - "Ficus carica\tcommon fig\t1\t1\t11\t3494\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Moraceae; Ficus", + "Acanthisitta chloris\trifleman\t1\t2\t-\t57068\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Acanthisittidae; Acanthisitta", + "Acanthoscurria geniculata\t-\t1\t5\t-\t575412\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Araneae; Mygalomorphae; Theraphosidae; Acanthoscurria", + "Acartia tonsa\t-\t1\t5\t-\t136180\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Hexanauplia; Copepoda; Calanoida; Acartiidae; Acartia", + "Acidobacteria bacterium\t-\t11\t0\t-\t1978231\tBCT\tBacteria; Acidobacteria", + "Acinetobacter\t-\t11\t0\t-\t469\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Moraxellaceae; Acinetobacter", + "Acinetobacter baumannii\t-\t11\t0\t-\t470\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Moraxellaceae; Acinetobacter; Acinetobacter calcoaceticus/baumannii complex", + "Acropora cervicornis\t-\t1\t4\t-\t6130\tINV\tEukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora", + "Acropora millepora\t-\t1\t4\t-\t45264\tINV\tEukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora", + "Acropora tenuis\tpurple tipped acropora\t1\t4\t-\t70783\tINV\tEukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora", + "Actinidia\t-\t1\t1\t11\t3624\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Actinidiaceae; Actinidia", + "Acyrthosiphon\t-\t1\t5\t-\t7028\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidomorpha; Aphidoidea; Aphididae; Macrosiphini; Acyrthosiphon", + "Acyrthosiphon pisum\tpea aphid\t1\t5\t-\t7029\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidomorpha; Aphidoidea; Aphididae; Macrosiphini; Acyrthosiphon", + "Aedes\t-\t1\t5\t-\t7158\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Culicinae; Aedini; Aedes", + "Aedes aegypti\tyellow fever mosquito\t1\t5\t-\t7159\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Culicinae; Aedini; Aedes; Stegomyia", + "Aedes albopictus\tAsian tiger mosquito\t1\t5\t-\t7160\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Culicinae; Aedini; Aedes; Stegomyia", + "Aegilops tauschii\t-\t1\t1\t11\t37682\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Aegilops", + "Aegilops tauschii subsp. tauschii\t-\t1\t1\t11\t169297\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Aegilops", + "Agave deserti\t-\t1\t1\t11\t382119\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Asparagaceae; Agavoideae; Agave", + "Agave tequilana\t-\t1\t1\t11\t386106\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Asparagaceae; Agavoideae; Agave", + "Ailuropoda melanoleuca\tgiant panda\t1\t2\t-\t9646\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Ursidae; Ailuropoda", + "Alexandrium tamarense\t-\t1\t4\t11\t2926\tPLN\tEukaryota; Alveolata; Dinophyceae; Gonyaulacales; Gonyaulacaceae; Alexandrium", + "Allium cepa\tonion\t1\t1\t11\t4679\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Amaryllidaceae; Allioideae; Allieae; Allium", + "Allium sativum\tgarlic\t1\t1\t11\t4682\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Amaryllidaceae; Allioideae; Allieae; Allium", + "Alvinella\t-\t1\t5\t-\t6375\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Scolecida; Terebellida; Alvinellidae; Alvinella", + "Alvinella pompejana\t-\t1\t5\t-\t6376\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Scolecida; Terebellida; Alvinellidae; Alvinella", + "Amaranthus tricolor\t-\t1\t1\t11\t29722\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Amaranthaceae; Amaranthus", + "Amborella\t-\t1\t1\t11\t13332\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Amborellales; Amborellaceae; Amborella", + "Amborella trichopoda\t-\t1\t1\t11\t13333\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Amborellales; Amborellaceae; Amborella", + "Ameiurus nebulosus\tbrown bullhead\t1\t2\t-\t27778\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Siluriformes; Ictaluridae; Ameiurus", + "Amoebidium parasiticum JAP-7-2\t-\t1\t4\t-\t1069442\tINV\tEukaryota; Ichthyosporea; Ichthyophonida; Amoebidiaceae; Amoebidium", + "Amphimedon queenslandica\t-\t1\t4\t-\t400682\tINV\tEukaryota; Metazoa; Porifera; Demospongiae; Heteroscleromorpha; Haplosclerida; Niphatidae; Amphimedon", + "Anas platyrhynchos\tmallard\t1\t2\t-\t8839\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Anseriformes; Anatidae; Anatinae; Anas", + "Ancylostoma\t-\t1\t5\t-\t29169\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Strongyloidea; Ancylostomatidae; Ancylostomatinae; Ancylostoma", + "Ancylostoma caninum\tdog hookworm\t1\t5\t-\t29170\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Strongyloidea; Ancylostomatidae; Ancylostomatinae; Ancylostoma", + "Ancylostoma ceylanicum\t-\t1\t5\t-\t53326\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Strongyloidea; Ancylostomatidae; Ancylostomatinae; Ancylostoma", + "Anguilla anguilla\tEuropean eel\t1\t2\t-\t7936\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Anguilliformes; Anguillidae; Anguilla", + "Anguilla japonica\tJapanese eel\t1\t2\t-\t7937\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Anguilliformes; Anguillidae; Anguilla", + "Annulipalpia sp. AD-2013\t-\t1\t5\t-\t1499517\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Trichoptera; Annulipalpia; unclassified Annulipalpia", + "Anolis\t-\t1\t2\t-\t28376\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Iguania; Dactyloidae; Anolis", + "Anolis carolinensis\tgreen anole\t1\t2\t-\t28377\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Iguania; Dactyloidae; Anolis", + "Anopheles\t-\t1\t5\t-\t7164\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Anophelinae; Anopheles", + "Anopheles gambiae\tAfrican malaria mosquito\t1\t5\t-\t7165\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Anophelinae; Anopheles", + "Anoplopoma fimbria\tsablefish\t1\t2\t-\t229290\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Perciformes; Cottioidei; Anoplopomatales; Anoplopomatidae; Anoplopoma", + "Anthopleura elegantissima\tclonal anemone\t1\t4\t-\t6110\tINV\tEukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Actiniidae; Anthopleura", + "Antrostomus carolinensis\tchuck-will's-widow\t1\t2\t-\t279965\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Caprimulgiformes; Caprimulgidae; Antrostomus", + "Aotus nancymaae\tMa's night monkey\t1\t2\t-\t37293\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Platyrrhini; Aotidae; Aotus", + "Apaloderma vittatum\tbar-tailed trogon\t1\t2\t-\t57397\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Trogoniformes; Trogonidae; Apaloderma", + "Aphanomyces astaci\t-\t1\t1\t11\t112090\tPLN\tEukaryota; Stramenopiles; Oomycetes; Saprolegniales; Saprolegniaceae; Aphanomyces", + "Aphis\t-\t1\t5\t-\t80764\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidomorpha; Aphidoidea; Aphididae; Aphidini; Aphis", + "Aphis gossypii\tcotton aphid\t1\t5\t-\t80765\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidomorpha; Aphidoidea; Aphididae; Aphidini; Aphis; Aphis", + "Apis\t-\t1\t5\t-\t7459\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apis", + "Apis mellifera\thoney bee\t1\t5\t-\t7460\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apis", + "Apis mellifera carnica\tCarniolan honeybee\t1\t5\t-\t88217\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apis", + "Aplysia\t-\t1\t5\t-\t6499\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Euopisthobranchia; Aplysiida; Aplysioidea; Aplysiidae; Aplysia", + "Aplysia californica\tCalifornia sea hare\t1\t5\t-\t6500\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Euopisthobranchia; Aplysiida; Aplysioidea; Aplysiidae; Aplysia", + "Apostichopus japonicus\tJapanese sea cucumber\t1\t9\t-\t307972\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Holothuroidea; Aspidochirotacea; Aspidochirotida; Stichopodidae; Apostichopus", + "Apteryx australis mantelli\t-\t1\t2\t-\t202946\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Palaeognathae; Apterygiformes; Apterygidae; Apteryx", + "Aquilegia\t-\t1\t1\t11\t3450\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Ranunculales; Ranunculaceae; Thalictroideae; Aquilegia", + "Aquilegia coerulea\t-\t1\t1\t11\t218851\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Ranunculales; Ranunculaceae; Thalictroideae; Aquilegia", + "Aquilegia formosa x Aquilegia pubescens\t-\t1\t1\t11\t338618\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Ranunculales; Ranunculaceae; Thalictroideae; Aquilegia", + "Ara macao\tScarlet macaw\t1\t2\t-\t176014\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Psittaciformes; Psittacidae; Ara", + "Arabidopsis\t-\t1\t1\t11\t3701\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Arabidopsis", + "Arabidopsis thaliana\tthale cress\t1\t1\t11\t3702\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Arabidopsis", + "Arachis\t-\t1\t1\t11\t3817\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; dalbergioids sensu lato; Dalbergieae; Pterocarpus clade; Arachis", + "Arachis duranensis\t-\t1\t1\t11\t130453\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; dalbergioids sensu lato; Dalbergieae; Pterocarpus clade; Arachis", + "Arachis hypogaea\tpeanut\t1\t1\t11\t3818\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; dalbergioids sensu lato; Dalbergieae; Pterocarpus clade; Arachis", + "Arachis hypogaea var. vulgaris\t-\t1\t1\t11\t925390\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; dalbergioids sensu lato; Dalbergieae; Pterocarpus clade; Arachis", + "Arachis ipaensis\t-\t1\t1\t11\t130454\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; dalbergioids sensu lato; Dalbergieae; Pterocarpus clade; Arachis", + "Aretaon asperrimus\tthorny stick insect\t1\t5\t-\t173775\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Phasmatodea; Verophasmatodea; Areolatae; Bacilloidea; Heteropterygidae; Obriminae; Obrimini; Aretaon", + "Arion vulgaris\t-\t1\t5\t-\t1028688\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Eupulmonata; Stylommatophora; Sigmurethra; Arionoidea; Arionidae; Arion", + "Artemisia\t-\t1\t1\t11\t4219\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Anthemideae; Artemisiinae; Artemisia", + "Artemisia annua\tsweet wormwood\t1\t1\t11\t35608\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Anthemideae; Artemisiinae; Artemisia", + "Arundo donax\tgiant reed\t1\t1\t11\t35708\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Arundinoideae; Arundineae; Arundo", + "Ascaris\t-\t1\t5\t-\t6251\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Spirurina; Ascaridomorpha; Ascaridoidea; Ascarididae; Ascaris", + "Ascaris suum\tpig roundworm\t1\t5\t-\t6253\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Spirurina; Ascaridomorpha; Ascaridoidea; Ascarididae; Ascaris", + "Aspergillus\t-\t1\t4\t-\t5052\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Eurotiomycetes; Eurotiomycetidae; Eurotiales; Aspergillaceae; Aspergillus", + "Astacus leptodactylus\tnarrow-clawed crayfish\t1\t5\t-\t6717\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Astacoidea; Astacidae; Astacus", + "Astyanax\t-\t1\t2\t-\t7993\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Characiformes; Characoidei; Characidae; Characidae incertae sedis; Astyanax clade; Astyanax", + "Astyanax mexicanus\tMexican tetra\t1\t2\t-\t7994\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Characiformes; Characoidei; Characidae; Characidae incertae sedis; Astyanax clade; Astyanax", + "Athetis lepigone\t-\t1\t5\t-\t1223490\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Noctuinae; Athetis", + "Atractaspis aterrima\tmole viper\t1\t2\t-\t1355159\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Lamprophiidae; Atractaspidinae; Atractaspis", + "Aurelia aurita\tmoon jelly\t1\t4\t-\t6145\tINV\tEukaryota; Metazoa; Cnidaria; Scyphozoa; Semaeostomeae; Ulmaridae; Aurelia", + "Avicennia marina\t-\t1\t1\t11\t82927\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Acanthaceae; Avicennioideae; Avicennia", + "Bacillus cereus\t-\t11\t0\t-\t1396\tBCT\tBacteria; Firmicutes; Bacilli; Bacillales; Bacillaceae; Bacillus; Bacillus cereus group", + "Bacillus thuringiensis\t-\t11\t0\t-\t1428\tBCT\tBacteria; Firmicutes; Bacilli; Bacillales; Bacillaceae; Bacillus; Bacillus cereus group", + "bacterium\t-\t11\t0\t-\t1869227\tBCT\tBacteria", + "Balaenoptera bonaerensis\tAntarctic minke whale\t1\t2\t-\t33556\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Mysticeti; Balaenopteridae; Balaenoptera", + "Balearica regulorum gibbericeps\tEast African grey crowned-crane\t1\t2\t-\t100784\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Gruidae; Balearica", + "Bemisia tabaci\t-\t1\t5\t-\t7038\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aleyrodoidea; Aleyrodidae; Aleyrodinae; Bemisia", + "Beta\t-\t1\t1\t11\t3554\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Betoideae; Beta", + "Beta vulgaris\t-\t1\t1\t11\t161934\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Betoideae; Beta", + "Beta vulgaris subsp. vulgaris\t-\t1\t1\t11\t3555\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Betoideae; Beta", + "Betula platyphylla\t-\t1\t1\t11\t78630\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Betulaceae; Betula", + "Bicyclus\tbush browns\t1\t5\t-\t110367\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Papilionoidea; Nymphalidae; Satyrinae; Satyrini; Mycalesina; Bicyclus", + "Bicyclus anynana\tsquinting bush brown\t1\t5\t-\t110368\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Papilionoidea; Nymphalidae; Satyrinae; Satyrini; Mycalesina; Bicyclus", + "Biomphalaria glabrata\t-\t1\t5\t-\t6526\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Hygrophila; Lymnaeoidea; Planorbidae; Biomphalaria", + "Bison bison bison\t-\t1\t2\t-\t43346\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bison", + "Bithynia siamensis goniomphalos\t-\t1\t5\t-\t479249\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Caenogastropoda; Littorinimorpha; Truncatelloidea; Bithyniidae; Bithynia", + "Boechera\t-\t1\t1\t11\t76872\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Boechereae; Boechera", + "Boechera gunnisoniana\t-\t1\t1\t11\t93888\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Boechereae; Boechera", + "Boechera stricta\t-\t1\t1\t11\t72658\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Boechereae; Boechera", + "Bombina bombina\tfire-bellied toad\t1\t2\t-\t8345\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina", + "Bombina variegata variegata\t-\t1\t2\t-\t191472\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina", + "Bombus terrestris\tbuff-tailed bumblebee\t1\t5\t-\t30195\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Bombus; Bombus", + "Bombyx\t-\t1\t5\t-\t7090\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Bombycoidea; Bombycidae; Bombycinae; Bombyx", + "Bombyx mori\tdomestic silkworm\t1\t5\t-\t7091\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Bombycoidea; Bombycidae; Bombycinae; Bombyx", + "Bordetella pertussis\t-\t11\t0\t-\t520\tBCT\tBacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Alcaligenaceae; Bordetella", + "Bos\toxen, cattle\t1\t2\t-\t9903\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bos", + "Bos mutus\twild yak\t1\t2\t-\t72004\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bos", + "Bos taurus\tcattle\t1\t2\t-\t9913\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bos", + "Botryllus\t-\t1\t13\t-\t30300\tINV\tEukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Styelidae; Botryllus", + "Botryllus schlosseri\t-\t1\t13\t-\t30301\tINV\tEukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Styelidae; Botryllus", + "Botryococcus\t-\t1\t1\t11\t38880\tPLN\tEukaryota; Viridiplantae; Chlorophyta; Trebouxiophyceae; Elliptochloris clade; Botryococcus", + "Botryococcus braunii\t-\t1\t1\t11\t38881\tPLN\tEukaryota; Viridiplantae; Chlorophyta; Trebouxiophyceae; Elliptochloris clade; Botryococcus", + "Brachypodium\tfalse bromes\t1\t1\t11\t15367\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Brachypodieae; Brachypodium", + "Brachypodium distachyon\tstiff brome\t1\t1\t11\t15368\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Brachypodieae; Brachypodium", + "Bragasellus molinai\t-\t1\t5\t-\t1281925\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Bragasellus", + "Bragasellus peltatus\t-\t1\t5\t-\t1282048\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Bragasellus", + "Branchiostoma\t-\t1\t5\t-\t7737\tINV\tEukaryota; Metazoa; Chordata; Cephalochordata; Branchiostomidae; Branchiostoma", + "Branchiostoma floridae\tFlorida lancelet\t1\t5\t-\t7739\tINV\tEukaryota; Metazoa; Chordata; Cephalochordata; Branchiostomidae; Branchiostoma", + "Brassica\t-\t1\t1\t11\t3705\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica", + "Brassica juncea\t-\t1\t1\t11\t3707\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica", + "Brassica napus\trape\t1\t1\t11\t3708\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica", + "Brassica oleracea\twild cabbage\t1\t1\t11\t3712\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica", + "Brassica oleracea var. oleracea\t-\t1\t1\t11\t109376\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica", + "Brassica oleracea var. viridis\tkale\t1\t1\t11\t3713\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica", + "Brassica rapa\tfield mustard\t1\t1\t11\t3711\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica", + "Brassica rapa subsp. pekinensis\tChinese cabbage\t1\t1\t11\t51351\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica", + "Breviolum\t-\t1\t4\t11\t2499524\tPLN\tEukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Breviolum", + "Breviolum minutum\t-\t1\t4\t11\t2499525\tPLN\tEukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Breviolum; Breviolum minutum", + "Brugia malayi\t-\t1\t5\t-\t6279\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Spirurina; Spiruromorpha; Filarioidea; Onchocercidae; Brugia", + "Bubalus bubalis\twater buffalo\t1\t2\t-\t89462\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bubalus", + "Buceros rhinoceros silvestris\t-\t1\t2\t-\t175836\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Bucerotiformes; Bucerotidae; Buceros", + "Burkholderia cenocepacia\t-\t11\t0\t-\t95486\tBCT\tBacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Burkholderiaceae; Burkholderia; Burkholderia cepacia complex", + "Burkholderia pseudomallei\t-\t11\t0\t-\t28450\tBCT\tBacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Burkholderiaceae; Burkholderia; pseudomallei group", + "Caenorhabditis\t-\t1\t5\t-\t6237\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis", + "Caenorhabditis elegans\t-\t1\t5\t-\t6239\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis", + "Cajanus\t-\t1\t1\t11\t3820\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Cajanus", + "Cajanus cajan\tpigeon pea\t1\t1\t11\t3821\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Cajanus", + "Calanus finmarchicus\t-\t1\t5\t-\t6837\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Hexanauplia; Copepoda; Calanoida; Calanidae; Calanus", + "Calanus glacialis\t-\t1\t5\t-\t113644\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Hexanauplia; Copepoda; Calanoida; Calanidae; Calanus", + "Callithrix\t-\t1\t2\t-\t9481\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Platyrrhini; Cebidae; Callitrichinae; Callithrix", + "Callithrix jacchus\twhite-tufted-ear marmoset\t1\t2\t-\t9483\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Platyrrhini; Cebidae; Callitrichinae; Callithrix; Callithrix", + "Callorhinchus\t-\t1\t2\t-\t7866\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Chondrichthyes; Holocephali; Chimaeriformes; Callorhinchidae; Callorhinchus", + "Callorhinchus milii\telephant shark\t1\t2\t-\t7868\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Chondrichthyes; Holocephali; Chimaeriformes; Callorhinchidae; Callorhinchus", + "Calvadosia cruxmelitensis\t-\t1\t4\t-\t1843192\tINV\tEukaryota; Metazoa; Cnidaria; Staurozoa; Stauromedusae; Amyostaurida; Kishinouyeidae; Calvadosia", + "Camelina sativa\tfalse flax\t1\t1\t11\t90675\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Camelina", + "Camellia sinensis\t-\t1\t1\t11\t4442\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Theaceae; Camellia", + "Camellia sinensis var. sinensis\t-\t1\t1\t11\t542762\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Theaceae; Camellia", + "Camelus bactrianus\tBactrian camel\t1\t2\t-\t9837\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Camelus", + "Camelus dromedarius\tArabian camel\t1\t2\t-\t9838\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Camelus", + "Camptotheca acuminata\t-\t1\t1\t11\t16922\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; Cornales; Nyssaceae; Camptotheca", + "Campylobacter jejuni\t-\t11\t0\t-\t197\tBCT\tBacteria; Proteobacteria; Epsilonproteobacteria; Campylobacterales; Campylobacteraceae; Campylobacter", + "Canis\t-\t1\t2\t-\t9611\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Canidae; Canis", + "Canis lupus dingo\tdingo\t1\t2\t-\t286419\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Canidae; Canis", + "Canis lupus familiaris\tdog\t1\t2\t-\t9615\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Canidae; Canis", + "Capitella\t-\t1\t5\t-\t51293\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Scolecida; Capitellida; Capitellidae; Capitella", + "Capitella teleta\t-\t1\t5\t-\t283909\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Scolecida; Capitellida; Capitellidae; Capitella", + "Capra hircus\tgoat\t1\t2\t-\t9925\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Capra", + "Capsicum\tpeppers\t1\t1\t11\t4071\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Capsiceae; Capsicum", + "Capsicum annuum\t-\t1\t1\t11\t4072\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Capsiceae; Capsicum", + "Carassius auratus\tgoldfish\t1\t2\t-\t7957\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Carassius", + "Cariama cristata\tRed-legged seriema\t1\t2\t-\t54380\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Cariamiformes; Cariamidae; Cariama", + "Carica\t-\t1\t1\t11\t3648\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Caricaceae; Carica", + "Carica papaya\tpapaya\t1\t1\t11\t3649\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Caricaceae; Carica", + "Caridina multidentata\t-\t1\t5\t-\t293153\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Caridea; Atyoidea; Atyidae; Caridina", + "Carlito syrichta\tPhilippine tarsier\t1\t2\t-\t1868482\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Tarsiiformes; Tarsiidae; Carlito", + "Catharanthus roseus\tMadagascar periwinkle\t1\t1\t11\t4058\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Apocynaceae; Rauvolfioideae; Vinceae; Catharanthinae; Catharanthus", + "Cavia porcellus\tdomestic guinea pig\t1\t2\t-\t10141\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Hystricomorpha; Caviidae; Cavia", + "Cenchrus americanus\t-\t1\t1\t11\t4543\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Cenchrus", + "Centaurea\t-\t1\t1\t11\t41503\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Carduoideae; Cardueae; Centaureinae; Centaurea", + "Ceratotherium simum simum\tsouthern white rhinoceros\t1\t2\t-\t73337\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Perissodactyla; Rhinocerotidae; Ceratotherium", + "Cercocebus atys\tsooty mangabey\t1\t2\t-\t9531\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Cercocebus", + "Chelonia\t-\t1\t2\t-\t8468\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Durocryptodira; Americhelydia; Chelonioidea; Cheloniidae; Chelonia", + "Chelonia mydas\tGreen sea turtle\t1\t2\t-\t8469\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Durocryptodira; Americhelydia; Chelonioidea; Cheloniidae; Chelonia", + "Cherax quadricarinatus\t-\t1\t5\t-\t27406\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Parastacoidea; Parastacidae; Cherax", + "Chinchilla lanigera\tlong-tailed chinchilla\t1\t2\t-\t34839\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Hystricomorpha; Chinchillidae; Chinchilla", + "Chlamydia trachomatis\t-\t11\t0\t-\t813\tBCT\tBacteria; Chlamydiae; Chlamydiales; Chlamydiaceae; Chlamydia/Chlamydophila group; Chlamydia", + "Chlamydomonas\t-\t1\t1\t11\t3052\tPLN\tEukaryota; Viridiplantae; Chlorophyta; Chlorophyceae; Chlamydomonadales; Chlamydomonadaceae; Chlamydomonas", + "Chlamydomonas reinhardtii\t-\t1\t1\t11\t3055\tPLN\tEukaryota; Viridiplantae; Chlorophyta; Chlorophyceae; Chlamydomonadales; Chlamydomonadaceae; Chlamydomonas", + "Chlamydotis macqueenii\tMacqueen's bustard\t1\t2\t-\t187382\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Otididae; Chlamydotis", + "Chlorocebus\t-\t1\t2\t-\t392815\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Chlorocebus", + "Chlorocebus aethiops\tgrivet\t1\t2\t-\t9534\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Chlorocebus", + "Chlorocebus sabaeus\tgreen monkey\t1\t2\t-\t60711\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Chlorocebus", + "Chorispora bungeana\t-\t1\t1\t11\t238895\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Chorisporeae; Chorispora", + "Chromolaena odorata\t-\t1\t1\t11\t103745\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Heliantheae alliance; Eupatorieae; Chromolaena", + "Chrysemys picta bellii\twestern painted turtle\t1\t2\t-\t8478\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Durocryptodira; Testudinoidea; Emydidae; Chrysemys", + "Chrysochloris asiatica\tCape golden mole\t1\t2\t-\t185453\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Afrotheria; Chrysochloridae; Chrysochlorinae; Chrysochloris", + "Cicer\t-\t1\t1\t11\t3826\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Cicereae; Cicer", + "Cicer arietinum\tchickpea\t1\t1\t11\t3827\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Cicereae; Cicer", + "Ciona\t-\t1\t13\t-\t7718\tINV\tEukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Enterogona; Phlebobranchia; Cionidae; Ciona", + "Ciona intestinalis\tvase tunicate\t1\t13\t-\t7719\tINV\tEukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Enterogona; Phlebobranchia; Cionidae; Ciona", + "Ciona savignyi\tPacific transparent sea squirt\t1\t13\t-\t51511\tINV\tEukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Enterogona; Phlebobranchia; Cionidae; Ciona", + "Citrus\t-\t1\t1\t11\t2706\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Rutaceae; Aurantioideae; Citrus", + "Citrus clementina\t-\t1\t1\t11\t85681\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Rutaceae; Aurantioideae; Citrus", + "Citrus sinensis\tsweet orange\t1\t1\t11\t2711\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Rutaceae; Aurantioideae; Citrus", + "Clonorchis\t-\t1\t9\t-\t79922\tINV\tEukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Opisthorchiida; Opisthorchiata; Opisthorchiidae; Clonorchis", + "Clonorchis sinensis\t-\t1\t9\t-\t79923\tINV\tEukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Opisthorchiida; Opisthorchiata; Opisthorchiidae; Clonorchis", + "Clostridioides difficile\t-\t11\t0\t-\t1496\tBCT\tBacteria; Firmicutes; Clostridia; Clostridiales; Peptostreptococcaceae; Clostridioides", + "Clytia\t-\t1\t4\t-\t13436\tINV\tEukaryota; Metazoa; Cnidaria; Hydrozoa; Hydroidolina; Leptothecata; Campanulariidae; Clytia", + "Clytia hemisphaerica\t-\t1\t4\t-\t252671\tINV\tEukaryota; Metazoa; Cnidaria; Hydrozoa; Hydroidolina; Leptothecata; Campanulariidae; Clytia", + "Coccidioides\t-\t1\t4\t-\t5500\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Eurotiomycetes; Eurotiomycetidae; Onygenales; Onygenales incertae sedis; Coccidioides", + "Coccidioides posadasii\t-\t1\t4\t-\t199306\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Eurotiomycetes; Eurotiomycetidae; Onygenales; Onygenales incertae sedis; Coccidioides", + "Cocos nucifera\tcoconut palm\t1\t1\t11\t13894\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Arecaceae; Arecoideae; Cocoseae; Attaleinae; Cocos", + "Coffea\t-\t1\t1\t11\t13442\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Ixoroideae; Gardenieae complex; Bertiereae - Coffeeae clade; Coffeeae; Coffea", + "Coffea arabica\tcoffee\t1\t1\t11\t13443\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Ixoroideae; Gardenieae complex; Bertiereae - Coffeeae clade; Coffeeae; Coffea", + "Coffea canephora\t-\t1\t1\t11\t49390\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Ixoroideae; Gardenieae complex; Bertiereae - Coffeeae clade; Coffeeae; Coffea", + "Colius striatus\tspeckled mousebird\t1\t2\t-\t57412\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coliiformes; Coliidae; Colius", + "Colletotrichum\t-\t1\t4\t-\t5455\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Hypocreomycetidae; Glomerellales; Glomerellaceae; Colletotrichum", + "Colletotrichum graminicola\t-\t1\t4\t-\t31870\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Hypocreomycetidae; Glomerellales; Glomerellaceae; Colletotrichum", + "Condylura cristata\tstar-nosed mole\t1\t2\t-\t143302\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Eulipotyphla; Talpidae; Condylura", + "Coptotermes\t-\t1\t5\t-\t36986\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Dictyoptera; Blattodea; Blattoidea; Termitoidae; Rhinotermitidae; Coptotermes", + "Coptotermes formosanus\tFormosan subterranean termite\t1\t5\t-\t36987\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Dictyoptera; Blattodea; Blattoidea; Termitoidae; Rhinotermitidae; Coptotermes", + "Crassostrea\t-\t1\t5\t-\t6564\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Ostreoida; Ostreoidea; Ostreidae; Crassostrea", + "Crassostrea gigas\tPacific oyster\t1\t5\t-\t29159\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Ostreoida; Ostreoidea; Ostreidae; Crassostrea", + "Cricetulus griseus\tChinese hamster\t1\t2\t-\t10029\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Cricetidae; Cricetinae; Cricetulus", + "Cryptomeria\t-\t1\t1\t11\t3368\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Cupressales; Cupressaceae; Cryptomeria", + "Cryptomeria japonica\tJapanese cedar\t1\t1\t11\t3369\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Cupressales; Cupressaceae; Cryptomeria", + "Cryptosporidium\t-\t1\t4\t4\t5806\tINV\tEukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Cryptosporidiidae; Cryptosporidium", + "Cryptotermes secundus\t-\t1\t5\t-\t105785\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Dictyoptera; Blattodea; Blattoidea; Termitoidae; Kalotermitidae; Cryptotermitinae; Cryptotermes", + "Cucumis\t-\t1\t1\t11\t3655\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Cucumis", + "Cucumis melo\tmuskmelon\t1\t1\t11\t3656\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Cucumis", + "Cucumis sativus\tcucumber\t1\t1\t11\t3659\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Cucumis", + "Culex\t-\t1\t5\t-\t7174\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Culicinae; Culicini; Culex", + "Culex quinquefasciatus\tsouthern house mosquito\t1\t5\t-\t7176\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Culicinae; Culicini; Culex; Culex", + "Cuscuta pentagona\t-\t1\t1\t11\t112407\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Cuscuteae; Cuscuta; Grammica; Cuscuta sect. Cleistogrammica", + "Cynara cardunculus var. scolymus\t-\t1\t1\t11\t59895\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Carduoideae; Cardueae; Carduinae; Cynara", + "Cynoglossus semilaevis\ttongue sole\t1\t2\t-\t244447\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Carangaria; Pleuronectiformes; Pleuronectoidei; Cynoglossidae; Cynoglossinae; Cynoglossus", + "Cynopterus sphinx\tIndian short-nosed fruit bat\t1\t2\t-\t9400\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Cynopterus", + "Cyprinus\t-\t1\t2\t-\t7961\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Cyprinus", + "Cyprinus carpio\tcommon carp\t1\t2\t-\t7962\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Cyprinus", + "Danio\t-\t1\t2\t-\t7954\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Danio", + "Danio rerio\tzebrafish\t1\t2\t-\t7955\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Danio", + "Daphnia\tcommon water fleas\t1\t5\t-\t6668\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Branchiopoda; Diplostraca; Cladocera; Anomopoda; Daphniidae; Daphnia", + "Daphnia magna\t-\t1\t5\t-\t35525\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Branchiopoda; Diplostraca; Cladocera; Anomopoda; Daphniidae; Daphnia", + "Daphnia pulex\tcommon water flea\t1\t5\t-\t6669\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Branchiopoda; Diplostraca; Cladocera; Anomopoda; Daphniidae; Daphnia", + "Dasypus novemcinctus\tnine-banded armadillo\t1\t2\t-\t9361\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Xenarthra; Cingulata; Dasypodidae; Dasypus", + "Dendrobium catenatum\t-\t1\t1\t11\t906689\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae; Epidendroideae; Malaxideae; Dendrobiinae; Dendrobium", + "Dendroctonus\t-\t1\t5\t-\t77156\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Coleoptera; Polyphaga; Cucujiformia; Curculionidae; Scolytinae; Dendroctonus", + "Dendroctonus ponderosae\tmountain pine beetle\t1\t5\t-\t77166\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Coleoptera; Polyphaga; Cucujiformia; Curculionidae; Scolytinae; Dendroctonus", + "Diacamma sp. Okinawa-2006a\t-\t1\t5\t-\t655490\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Ponerinae; Ponerini; Diacamma", + "Diaphorina citri\tAsian citrus psyllid\t1\t5\t-\t121845\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Psylloidea; Liviidae; Diaphorina", + "Dicentrarchus labrax\tEuropean seabass\t1\t2\t-\t13489\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Moronidae; Dicentrarchus", + "Dicrocoelium dendriticum\t-\t1\t9\t-\t57078\tINV\tEukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Plagiorchiida; Xiphidiata; Gorgoderoidea; Dicrocoeliidae; Dicrocoelium", + "Dictyostelium\t-\t1\t1\t11\t5782\tINV\tEukaryota; Amoebozoa; Mycetozoa; Dictyostelids; Dictyosteliales; Dictyosteliaceae; Dictyostelium", + "Dictyostelium discoideum\t-\t1\t1\t11\t44689\tINV\tEukaryota; Amoebozoa; Mycetozoa; Dictyostelids; Dictyosteliales; Dictyosteliaceae; Dictyostelium", + "Diospyros lotus\t-\t1\t1\t11\t55363\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Ebenaceae; Diospyros", + "Diploscapter\t-\t1\t5\t-\t55799\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Rhabditoidea; Rhabditidae; Diploscapter", + "Diploscapter coronatus\t-\t1\t5\t-\t288516\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Rhabditoidea; Rhabditidae; Diploscapter", + "Dipodomys ordii\tOrd's kangaroo rat\t1\t2\t-\t10020\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Castorimorpha; Heteromyidae; Dipodomyinae; Dipodomys", + "Dorcoceras hygrometricum\t-\t1\t1\t11\t472368\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Gesneriaceae; Didymocarpoideae; Trichosporeae; Loxocarpinae; Dorcoceras", + "Drosophila\tfruit flies\t1\t5\t-\t7215\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila", + "Drosophila ananassae\t-\t1\t5\t-\t7217\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila; Sophophora", + "Drosophila melanogaster\tfruit fly\t1\t5\t-\t7227\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila; Sophophora", + "Drosophila sechellia\t-\t1\t5\t-\t7238\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila; Sophophora", + "Drosophila simulans\t-\t1\t5\t-\t7240\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila; Sophophora", + "Dugesia japonica\t-\t1\t9\t-\t6161\tINV\tEukaryota; Metazoa; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Geoplanoidea; Dugesiidae; Dugesia", + "Echinarachnius parma\t-\t1\t9\t-\t869203\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Gnathostomata; Clypeasteroida; Echinarachniidae; Echinarachnius", + "Echinaster spinulosus\t-\t1\t9\t-\t1451296\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Spinulosacea; Spinulosida; Echinasteridae; Echinaster", + "Echinococcus multilocularis\t-\t1\t9\t-\t6211\tINV\tEukaryota; Metazoa; Platyhelminthes; Cestoda; Eucestoda; Cyclophyllidea; Taeniidae; Echinococcus", + "Echinops telfairi\tsmall Madagascar hedgehog\t1\t2\t-\t9371\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Afrotheria; Tenrecidae; Tenrecinae; Echinops", + "Eimeria\t-\t1\t4\t4\t5800\tINV\tEukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Eimeriidae; Eimeria", + "Elaeis guineensis\tAfrican oil palm\t1\t1\t11\t51953\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Arecaceae; Arecoideae; Cocoseae; Elaeidinae; Elaeis", + "Elephantulus edwardii\tCape elephant shrew\t1\t2\t-\t28737\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Afrotheria; Macroscelidea; Macroscelididae; Elephantulus", + "Elliptio complanata\teastern elliptio\t1\t5\t-\t55832\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Bivalvia; Palaeoheterodonta; Unionoida; Unionoidea; Unionidae; Ambleminae; Elliptio", + "Elodea nuttallii\t-\t1\t1\t11\t55313\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Hydrocharitaceae; Elodea", + "Emiliania\t-\t1\t4\t11\t2902\tPLN\tEukaryota; Haptophyceae; Isochrysidales; Noelaerhabdaceae; Emiliania", + "Emiliania huxleyi\t-\t1\t4\t11\t2903\tPLN\tEukaryota; Haptophyceae; Isochrysidales; Noelaerhabdaceae; Emiliania", + "Entamoeba\t-\t1\t1\t11\t5758\tINV\tEukaryota; Amoebozoa; Archamoebae; Entamoebidae; Entamoeba", + "Entamoeba histolytica\t-\t1\t1\t11\t5759\tINV\tEukaryota; Amoebozoa; Archamoebae; Entamoebidae; Entamoeba", + "Enterococcus faecalis\t-\t11\t0\t-\t1351\tBCT\tBacteria; Firmicutes; Bacilli; Lactobacillales; Enterococcaceae; Enterococcus", + "Enterococcus faecium\t-\t11\t0\t-\t1352\tBCT\tBacteria; Firmicutes; Bacilli; Lactobacillales; Enterococcaceae; Enterococcus", + "Eptesicus fuscus\tbig brown bat\t1\t2\t-\t29078\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Eptesicus", + "Equus caballus\thorse\t1\t2\t-\t9796\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Perissodactyla; Equidae; Equus", + "Equus przewalskii\tPrzewalski's horse\t1\t2\t-\t9798\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Perissodactyla; Equidae; Equus", + "Erinaceus europaeus\twestern European hedgehog\t1\t2\t-\t9365\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Eulipotyphla; Erinaceidae; Erinaceinae; Erinaceus", + "Eriocheir sinensis\tChinese mitten crab\t1\t5\t-\t95602\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Brachyura; Eubrachyura; Grapsoidea; Varunidae; Eriocheir", + "Erythranthe\t-\t1\t1\t11\t1502711\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Phrymaceae; Erythranthe", + "Erythranthe guttata\tspotted monkey flower\t1\t1\t11\t4155\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Phrymaceae; Erythranthe", + "Escherichia\t-\t11\t0\t-\t561\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Escherichia", + "Escherichia coli\t-\t11\t0\t-\t562\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Escherichia", + "Escherichia coli O26:H11\t-\t11\t0\t-\t244319\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Escherichia", + "Esox lucius\tnorthern pike\t1\t2\t-\t8010\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Esociformes; Esocidae; Esox", + "Eucalyptus\t-\t1\t1\t11\t3932\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Myrtaceae; Myrtoideae; Eucalypteae; Eucalyptus", + "Eucalyptus camaldulensis\tMurray red gum\t1\t1\t11\t34316\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Myrtaceae; Myrtoideae; Eucalypteae; Eucalyptus", + "Eucalyptus grandis\t-\t1\t1\t11\t71139\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Myrtaceae; Myrtoideae; Eucalypteae; Eucalyptus", + "Eucidaris tribuloides\t-\t1\t9\t-\t7632\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Perischoechinoidea; Cidaroida; Cidaridae; Eucidaris", + "Eufriesea mexicana\t-\t1\t5\t-\t516756\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Eufriesea", + "Eurypyga helias\tsunbittern\t1\t2\t-\t54383\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Eurypygidae; Eurypyga", + "Exaiptasia pallida\t-\t1\t4\t-\t1720309\tINV\tEukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Aiptasiidae; Exaiptasia", + "Extatosoma tiaratum\tgiant prickly stick insect\t1\t5\t-\t7024\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Tropidoderinae; Extatosoma", + "Fagopyrum esculentum\tcommon buckwheat\t1\t1\t11\t3617\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Polygonaceae; Polygonoideae; Fagopyreae; Fagopyrum", + "Fagus sylvatica\tEuropean beech\t1\t1\t11\t28930\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Fagus", + "Falco\tfalcons\t1\t2\t-\t8952\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Falconidae; Falco", + "Fasciola hepatica\tliver fluke\t1\t9\t-\t6192\tINV\tEukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Plagiorchiida; Echinostomata; Echinostomatoidea; Fasciolidae; Fasciola", + "Felis catus\tdomestic cat\t1\t2\t-\t9685\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Felinae; Felis", + "Festuca\t-\t1\t1\t11\t4605\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Loliinae; Festuca", + "Festuca arundinacea\t-\t1\t1\t11\t4606\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Loliinae; Festuca", + "Ficus carica\tcommon fig\t1\t1\t11\t3494\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Moraceae; Ficus", "fish metagenome\t-\t11\t2\t11\t496924\tENV\tunclassified sequences; metagenomes; organismal metagenomes", - "Folsomia candida\t-\t1\t5\t-\t158441\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Collembola; Collembola; Entomobryomorpha; Isotomoidea; Isotomidae; Proisotominae; Folsomia", - "Fopius arisanus\t-\t1\t5\t-\t64838\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Ichneumonoidea; Braconidae; Opiinae; Fopius", - "Formica aquilonia\t-\t1\t5\t-\t258703\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica", - "Formica cinerea\t-\t1\t5\t-\t609761\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica", - "Formica exsecta\t-\t1\t5\t-\t72781\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica", - "Formica fusca\t-\t1\t5\t-\t72779\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica", - "Formica pratensis\t-\t1\t5\t-\t221681\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica", - "Formica pressilabris\t-\t1\t5\t-\t609858\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica", - "Formica truncorum\t-\t1\t5\t-\t72783\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica", - "Fragaria vesca subsp. vesca\t-\t1\t1\t11\t101020\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Rosoideae; Potentilleae; Fragariinae; Fragaria; Fragaria vesca", - "Fragaria x ananassa\tstrawberry\t1\t1\t11\t3747\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Rosoideae; Potentilleae; Fragariinae; Fragaria", - "Frankliniella occidentalis\twestern flower thrips\t1\t5\t-\t133901\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Thysanoptera; Terebrantia; Thripoidea; Thripidae; Thripinae; Frankliniella", - "Fraxinus excelsior\tEuropean ash\t1\t1\t11\t38873\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Oleaceae; Oleeae; Fraxinus", - "Fukomys damarensis\tDamara mole-rat\t1\t2\t-\t885580\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Bathyergidae; Fukomys", - "Fulmarus glacialis\tnorthern fulmar\t1\t2\t-\t30455\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Procellariiformes; Procellariidae; Procellariinae; Fulmarus", - "Fundulus grandis\tGulf killifish\t1\t2\t-\t34779\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Fundulidae; Fundulus", - "Fundulus heteroclitus\tmummichog\t1\t2\t-\t8078\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Fundulidae; Fundulus", - "Gadus morhua\tAtlantic cod\t1\t2\t-\t8049\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Paracanthomorphacea; Zeiogadaria; Gadariae; Gadiformes; Gadoidei; Gadidae; Gadus", - "Galeopterus variegatus\tSunda flying lemur\t1\t2\t-\t482537\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Dermoptera; Cynocephalidae; Galeopterus", - "Gallus gallus\tchicken\t1\t2\t-\t9031\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Phasianinae; Gallus", - "Gammarus chevreuxi\t-\t1\t5\t-\t732109\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Amphipoda; Senticaudata; Gammarida; Gammaridira; Gammaroidea; Gammaridae; Gammarus", - "Gardenia jasminoides\t-\t1\t1\t11\t114476\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Ixoroideae; Gardenieae; Gardenia", - "Gavia stellata\tred-throated loon\t1\t2\t-\t37040\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gaviiformes; Gaviidae; Gavia", - "Gavialis gangeticus\tGharial\t1\t2\t-\t94835\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Crocodylia; Longirostres; Gavialidae; Gavialinae; Gavialis", - "Gekko japonicus\t-\t1\t2\t-\t146911\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Gekkota; Gekkonidae; Gekkoninae; Gekko", - "Geminigera cryophila\t-\t1\t1\t11\t46947\tPLN\tcellular organisms; Eukaryota; Cryptophyta; Pyrenomonadales; Geminigeraceae; Geminigera", + "Formica aquilonia\t-\t1\t5\t-\t258703\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica", + "Formica cinerea\t-\t1\t5\t-\t609761\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica", + "Formica exsecta\t-\t1\t5\t-\t72781\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica", + "Formica fusca\t-\t1\t5\t-\t72779\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica", + "Formica pratensis\t-\t1\t5\t-\t221681\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica", + "Formica pressilabris\t-\t1\t5\t-\t609858\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica", + "Formica truncorum\t-\t1\t5\t-\t72783\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica", + "Fragaria x ananassa\tstrawberry\t1\t1\t11\t3747\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Rosoideae; Potentilleae; Fragariinae; Fragaria", + "Frankliniella occidentalis\twestern flower thrips\t1\t5\t-\t133901\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Thysanoptera; Terebrantia; Thripoidea; Thripidae; Frankliniella", + "Fukomys damarensis\tDamara mole-rat\t1\t2\t-\t885580\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Hystricomorpha; Bathyergidae; Fukomys", + "Fulmarus glacialis\tNorthern fulmar\t1\t2\t-\t30455\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Procellariiformes; Procellariidae; Fulmarus", + "Fundulus\t-\t1\t2\t-\t8077\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Fundulidae; Fundulus", + "Fundulus grandis\tGulf killifish\t1\t2\t-\t34779\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Fundulidae; Fundulus", + "Fundulus heteroclitus\tmummichog\t1\t2\t-\t8078\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Fundulidae; Fundulus", + "Fusarium\t-\t1\t4\t-\t5506\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Hypocreomycetidae; Hypocreales; Nectriaceae; Fusarium", + "Fusarium verticillioides\t-\t1\t4\t-\t117187\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Hypocreomycetidae; Hypocreales; Nectriaceae; Fusarium; Fusarium fujikuroi species complex", + "Gadus\t-\t1\t2\t-\t8048\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Zeiogadaria; Gadariae; Gadiformes; Gadoidei; Gadidae; Gadus", + "Gadus morhua\tAtlantic cod\t1\t2\t-\t8049\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Zeiogadaria; Gadariae; Gadiformes; Gadoidei; Gadidae; Gadus", + "Galeopterus variegatus\tSunda flying lemur\t1\t2\t-\t482537\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Dermoptera; Cynocephalidae; Galeopterus", + "Gallus\t-\t1\t2\t-\t9030\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Phasianinae; Gallus", + "Gallus gallus\tchicken\t1\t2\t-\t9031\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Phasianinae; Gallus", + "Gardenia jasminoides\t-\t1\t1\t11\t114476\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Ixoroideae; Gardenieae complex; Gardenieae - Pavetteae clade; Gardenieae; Gardenia", + "Gasterosteus\t-\t1\t2\t-\t69292\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Perciformes; Cottioidei; Gasterosteales; Gasterosteidae; Gasterosteus", + "Gasterosteus aculeatus\tthree-spined stickleback\t1\t2\t-\t69293\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Perciformes; Cottioidei; Gasterosteales; Gasterosteidae; Gasterosteus", + "Gavia stellata\tred-throated loon\t1\t2\t-\t37040\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gaviiformes; Gaviidae; Gavia", + "Gekko japonicus\t-\t1\t2\t-\t146911\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Gekkota; Gekkonidae; Gekkoninae; Gekko", "Gene trapping vector VICTR76\t-\t11\t0\t11\t447635\tSYN\tother sequences; artificial sequences; vectors", - "Gentiana macrophylla\t-\t1\t1\t11\t50765\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Gentianaceae; Gentianeae; Gentiana", - "Geospiza fortis\tmedium ground-finch\t1\t2\t-\t48883\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Thraupidae; Geospiza", - "Gerbera hybrid cultivar\t-\t1\t1\t11\t18101\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Mutisioideae; Mutisieae; Gerbera", - "Gigaspora margarita\t-\t1\t4\t-\t4874\tPLN\tcellular organisms; Eukaryota; Opisthokonta; Fungi; Mucoromycota; Glomeromycotina; Glomeromycetes; Diversisporales; Gigasporaceae; Gigaspora", - "Glomeris pustulata\t-\t1\t5\t-\t1288506\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Myriapoda; Diplopoda; Pentazonia; Glomerida; Glomeridae; Glomeris", - "Glossoscolex paulistus\t-\t1\t5\t-\t1046353\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Annelida; Clitellata; Oligochaeta; Haplotaxida; Lumbricina; Glossoscolecidae; Glossoscolex", - "Glycera dibranchiata\t-\t1\t5\t-\t6350\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Annelida; Polychaeta; Palpata; Aciculata; Phyllodocida; Glyceridae; Glycera", - "Glycine max\tsoybean\t1\t1\t11\t3847\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Glycine; Soja", - "Gongylonema pulchrum\t-\t1\t5\t-\t637853\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Spirurida; Spiruroidea; Gongylonematidae; Gongylonema", - "Gorilla gorilla gorilla\twestern lowland gorilla\t1\t2\t-\t9595\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Gorilla; Gorilla gorilla", - "Gossypium arboreum\t-\t1\t1\t11\t29729\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium", - "Gossypium hirsutum\t-\t1\t1\t11\t3635\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium", - "Gossypium raimondii\t-\t1\t1\t11\t29730\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium", - "Graminella nigrifrons\t-\t1\t5\t-\t30127\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Clypeorrhyncha; Membracoidea; Cicadellidae; Deltocephalinae; Graminella", + "Geranium pyrenaicum\t-\t1\t1\t11\t379953\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Geraniales; Geraniaceae; Geranium", + "Geranium robertianum\t-\t1\t1\t11\t122183\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Geraniales; Geraniaceae; Geranium", + "Gigaspora margarita\t-\t1\t4\t-\t4874\tPLN\tEukaryota; Fungi; Fungi incertae sedis; Mucoromycota; Glomeromycotina; Glomeromycetes; Diversisporales; Gigasporaceae; Gigaspora", + "Glossoscolex paulistus\t-\t1\t5\t-\t1046353\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Oligochaeta; Haplotaxida; Lumbricina; Glossoscolecidae; Glossoscolex", + "Glycera dibranchiata\t-\t1\t5\t-\t6350\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Palpata; Aciculata; Phyllodocida; Glyceridae; Glycera", + "Glycine\t-\t1\t1\t11\t3846\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine", + "Glycine canescens\t-\t1\t1\t11\t48924\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine", + "Glycine cyrtoloba\t-\t1\t1\t11\t45689\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine", + "Glycine dolichocarpa\t-\t1\t1\t11\t82538\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine", + "Glycine falcata\t-\t1\t1\t11\t45690\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine", + "Glycine max\tsoybean\t1\t1\t11\t3847\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Soja", + "Glycine soja\t-\t1\t1\t11\t3848\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Soja", + "Glycine stenophita\t-\t1\t1\t11\t96944\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine", + "Glycine syndetika\t-\t1\t1\t11\t713886\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine", + "Glycine tomentella\t-\t1\t1\t11\t44015\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine", + "Gorilla gorilla gorilla\twestern lowland gorilla\t1\t2\t-\t9595\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Gorilla", + "Gossypium\t-\t1\t1\t11\t3633\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium", + "Gossypium arboreum\t-\t1\t1\t11\t29729\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium", + "Gossypium hirsutum\tcotton\t1\t1\t11\t3635\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium", + "Gossypium raimondii\t-\t1\t1\t11\t29730\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium", "gut metagenome\t-\t11\t2\t11\t749906\tENV\tunclassified sequences; metagenomes; organismal metagenomes", - "Habropoda laboriosa\t-\t1\t5\t-\t597456\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Anthophorinae; Anthophorini; Habropoda", - "Haliaeetus albicilla\twhite-tailed eagle\t1\t2\t-\t8969\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Accipitridae; Accipitrinae; Haliaeetus", - "Halyomorpha halys\tbrown marmorated stink bug\t1\t5\t-\t286706\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Neohemiptera; Prosorrhyncha; Heteroptera; Euheteroptera; Neoheteroptera; Panheteroptera; Pentatomomorpha; Pentatomoidea; Pentatomidae; Pentatominae; Halyomorpha", - "Hammondia hammondi\t-\t1\t4\t4\t99158\tINV\tcellular organisms; Eukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Sarcocystidae; Hammondia", - "Haplochromis burtoni\tBurton's mouthbrooder\t1\t2\t-\t8153\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Haplochromis", - "Harpegnathos saltator\tJerdon's jumping ant\t1\t5\t-\t610380\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Ponerinae; Ponerini; Harpegnathos", - "Helicobacter pylori\t-\t11\t0\t-\t210\tBCT\tcellular organisms; Bacteria; Proteobacteria; delta/epsilon subdivisions; Epsilonproteobacteria; Campylobacterales; Helicobacteraceae; Helicobacter", - "Helicoverpa assulta\t-\t1\t5\t-\t52344\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Heliothinae; Helicoverpa", - "Heligmosomoides polygyrus\t-\t1\t5\t-\t6339\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Trichostrongyloidea; Heligmosomatidae; Heligmosomoides", - "Henricia sp. AR-2014\t-\t1\t9\t-\t1462731\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Spinulosacea; Spinulosida; Echinasteridae; Henricia", + "Haemophilus influenzae\t-\t11\t0\t-\t727\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Pasteurellales; Pasteurellaceae; Haemophilus", + "Haliaeetus albicilla\twhite-tailed eagle\t1\t2\t-\t8969\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Accipitridae; Accipitrinae; Haliaeetus", + "Haliclystus sanjuanensis\t-\t1\t4\t-\t168739\tINV\tEukaryota; Metazoa; Cnidaria; Staurozoa; Stauromedusae; Myostaurida; Haliclystidae; Haliclystus", + "Halocynthia\t-\t1\t13\t-\t7728\tINV\tEukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Pyuridae; Halocynthia", + "Halocynthia roretzi\t-\t1\t13\t-\t7729\tINV\tEukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Pyuridae; Halocynthia", + "Haplochromis burtoni\tBurton's mouthbrooder\t1\t2\t-\t8153\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Haplochromis", + "Helianthus\t-\t1\t1\t11\t4231\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Heliantheae alliance; Heliantheae; Helianthus", + "Helianthus annuus\tcommon sunflower\t1\t1\t11\t4232\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Heliantheae alliance; Heliantheae; Helianthus", + "Helicobacter pylori\t-\t11\t0\t-\t210\tBCT\tBacteria; Proteobacteria; Epsilonproteobacteria; Campylobacterales; Helicobacteraceae; Helicobacter", + "Helicoverpa armigera\tcotton bollworm\t1\t5\t-\t29058\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Heliothinae; Helicoverpa", + "Helobdella\t-\t1\t5\t-\t6411\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Hirudinea; Hirudinida; Glossiphoniiformes; Glossiphoniidae; Helobdella", + "Helobdella robusta\t-\t1\t5\t-\t6412\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Hirudinea; Hirudinida; Glossiphoniiformes; Glossiphoniidae; Helobdella", + "Hemicentrotus pulcherrimus\t-\t1\t9\t-\t7650\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Strongylocentrotidae; Hemicentrotus", + "Henricia sp. AR-2014\t-\t1\t9\t-\t1462731\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Spinulosacea; Spinulosida; Echinasteridae; Henricia", + "Hepacivirus C\t-\t1\t0\t-\t11103\tVRL\tViruses; ssRNA viruses; ssRNA positive-strand viruses, no DNA stage; Flaviviridae; Hepacivirus", "Hepatitis B virus\t-\t1\t0\t-\t10407\tVRL\tViruses; Retro-transcribing viruses; Hepadnaviridae; Orthohepadnavirus", - "Hepatitis C virus\t-\t1\t0\t-\t11103\tVRL\tViruses; ssRNA viruses; ssRNA positive-strand viruses, no DNA stage; Flaviviridae; Hepacivirus", - "Heterocephalus glaber\tnaked mole-rat\t1\t2\t-\t10181\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Bathyergidae; Heterocephalus", - "Heterodera glycines\tsoybean cyst nematode\t1\t5\t-\t51029\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Tylenchida; Tylenchina; Tylenchoidea; Heteroderidae; Heteroderinae; Heterodera", - "Hevea brasiliensis\t-\t1\t1\t11\t3981\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Micrandreae; Hevea", - "Homalodisca liturata\t-\t1\t5\t-\t320908\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Clypeorrhyncha; Membracoidea; Cicadellidae; Cicadellinae; unclassified Cicadellinae; Homalodisca", - "Homalodisca vitripennis\tglassy-winged sharpshooter\t1\t5\t-\t197043\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Clypeorrhyncha; Membracoidea; Cicadellidae; Cicadellinae; unclassified Cicadellinae; Homalodisca", - "Homo sapiens\thuman\t1\t2\t-\t9606\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Homo", - "Hordeum pubiflorum\t-\t1\t1\t11\t112521\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum", - "Hordeum vulgare\t-\t1\t1\t11\t4513\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum", - "Hordeum vulgare subsp. vulgare\tdomesticated barley\t1\t1\t11\t112509\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum; Hordeum vulgare", + "Heterocephalus glaber\tnaked mole-rat\t1\t2\t-\t10181\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Hystricomorpha; Bathyergidae; Heterocephalus", + "Hevea brasiliensis\trubber tree\t1\t1\t11\t3981\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Micrandreae; Hevea", + "Hippocampus comes\ttiger tail seahorse\t1\t2\t-\t109280\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Syngnathiaria; Syngnathiformes; Syngnathoidei; Syngnathidae; Hippocampus", + "Hirudo\t-\t1\t5\t-\t6420\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Hirudinea; Hirudinida; Hirudiniformes; Hirudinidae; Hirudo", + "Hirudo medicinalis\tmedicinal leech\t1\t5\t-\t6421\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Hirudinea; Hirudinida; Hirudiniformes; Hirudinidae; Hirudo", + "Homo\t-\t1\t2\t-\t9605\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Homo", + "Homo sapiens\thuman\t1\t2\t-\t9606\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Homo", + "Hordeum\t-\t1\t1\t11\t4512\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum", + "Hordeum pubiflorum\t-\t1\t1\t11\t112521\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum", + "Hordeum vulgare\t-\t1\t1\t11\t4513\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum", + "Hordeum vulgare subsp. vulgare\tdomesticated barley\t1\t1\t11\t112509\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum", + "Hucho taimen\ttaimen\t1\t2\t-\t201498\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Hucho", "human gut metagenome\t-\t11\t2\t11\t408170\tENV\tunclassified sequences; metagenomes; organismal metagenomes", - "Human immunodeficiency virus 1\t-\t1\t0\t-\t11676\tVRL\tViruses; Retro-transcribing viruses; Retroviridae; Orthoretrovirinae; Lentivirus; Primate lentivirus group", - "Humulus lupulus\tEuropean hop\t1\t1\t11\t3486\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus", - "Humulus lupulus var. cordifolius\t-\t1\t1\t11\t278022\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus; Humulus lupulus", - "Humulus lupulus var. lupulus\t-\t1\t1\t11\t1571165\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus; Humulus lupulus", - "Hyalella azteca\t-\t1\t5\t-\t294128\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Amphipoda; Senticaudata; Talitrida; Talitroidea; Hyalellidae; Hyalella", - "Hyas araneus\t-\t1\t5\t-\t361634\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Brachyura; Eubrachyura; Heterotremata; Majoidea; Majidae; Hyas", - "Hydra vulgaris\t-\t1\t4\t-\t6087\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Aplanulata; Hydridae; Hydra", - "Hydractinia symbiolongicarpus\t-\t1\t4\t-\t13093\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Filifera; Hydractiniidae; Hydractinia", - "Hynobius chinensis\tChinese salamander\t1\t2\t-\t288313\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Caudata; Cryptobranchoidea; Hynobiidae; Hynobius; Hynobius", - "Hynobius retardatus\tHokkaido salamander\t1\t2\t-\t36312\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Caudata; Cryptobranchoidea; Hynobiidae; Hynobius; Satobius", - "Hypsizygus marmoreus\t-\t1\t4\t-\t39966\tPLN\tcellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Basidiomycota; Agaricomycotina; Agaricomycetes; Agaricomycetidae; Agaricales; Lyophyllaceae; Hypsizygus", - "Ictalurus punctatus\tchannel catfish\t1\t2\t-\t7998\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Characiphysae; Siluriformes; Siluroidei; Ictaluridae; Ictalurus", - "Ictidomys tridecemlineatus\tthirteen-lined ground squirrel\t1\t2\t-\t43179\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Sciuridae; Xerinae; Marmotini; Ictidomys", - "Ipomoea batatas\tsweet potato\t1\t1\t11\t4120\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea", - "Ipomoea nil\tJapanese morning glory\t1\t1\t11\t35883\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea", - "Ipomoea purpurea\tcommon morning-glory\t1\t1\t11\t4121\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea", - "Ipomoea trifida\t-\t1\t1\t11\t35884\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea", - "Ixodes ricinus\tcastor bean tick\t1\t5\t-\t34613\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Ixodinae; Ixodes", - "Ixodes scapularis\tblack-legged tick\t1\t5\t-\t6945\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Ixodinae; Ixodes", - "Jaculus jaculus\tlesser Egyptian jerboa\t1\t2\t-\t51337\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Dipodidae; Dipodinae; Jaculus", - "Jatropha curcas\t-\t1\t1\t11\t180498\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Jatropheae; Jatropha", - "Juglans regia\tEnglish walnut\t1\t1\t11\t51240\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Juglandaceae; Juglans", - "Karelinia caspia\t-\t1\t1\t11\t313960\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Inuleae; Plucheinae; Karelinia", - "Kerria lacca\tcommon lac scale\t1\t5\t-\t473130\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidiformes; Coccoidea; Kerriidae; Kerria", - "Klebsiella pneumoniae\t-\t11\t0\t-\t573\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Klebsiella", - "Kryptolebias marmoratus\tmangrove rivulus\t1\t2\t-\t37003\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Rivulidae; Kryptolebias", - "Lactuca sativa\t-\t1\t1\t11\t4236\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Cichorioideae; Cichorieae; Lactucinae; Lactuca", - "Lactuca serriola\t-\t1\t1\t11\t75943\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Cichorioideae; Cichorieae; Lactucinae; Lactuca", - "Lagenaria siceraria\twhite-flowered gourd\t1\t1\t11\t3668\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Lagenaria", - "Landoltia punctata\t-\t1\t1\t11\t50518\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Alismatales; Araceae; Lemnoideae; Landoltia", - "Larimichthys crocea\tlarge yellow croaker\t1\t2\t-\t215358\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Eupercaria incertae sedis; Sciaenidae; Larimichthys", - "Larix kaempferi\t-\t1\t1\t11\t54800\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Larix", - "Lasius neglectus\t-\t1\t5\t-\t111072\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Lasiini; Lasius; Lasius", - "Lasius turcicus\t-\t1\t5\t-\t235463\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Lasiini; Lasius; Lasius", - "Lates calcarifer\tbarramundi perch\t1\t2\t-\t8187\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Carangaria; Carangiaria incertae sedis; Centropomidae; Lates", - "Lathyrus sativus\t-\t1\t1\t11\t3860\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Fabeae; Lathyrus", - "Latimeria chalumnae\tcoelacanth\t1\t2\t-\t7897\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Coelacanthimorpha; Coelacanthiformes; Coelacanthidae; Latimeria", - "Latimeria menadoensis\tMenado coelacanth\t1\t2\t-\t106881\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Coelacanthimorpha; Coelacanthiformes; Coelacanthidae; Latimeria", - "Latrodectus hesperus\twestern black widow\t1\t5\t-\t256737\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Orbiculariae; Araneoidea; Theridiidae; Latrodectus", - "Legionella pneumophila\t-\t11\t0\t-\t446\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Legionellales; Legionellaceae; Legionella", - "Lepeophtheirus salmonis\tsalmon louse\t1\t5\t-\t72036\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Podoplea; Siphonostomatoida; Caligidae; Lepeophtheirus", - "Lepidonotothen nudifrons\tyellowfin notie\t1\t2\t-\t83203\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Perciformes; Notothenioidei; Nototheniidae; Lepidonotothen", - "Lepidothrix coronata\tblue-crowned manakin\t1\t2\t-\t321398\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Pipridae; Lepidothrix", - "Lepisosteus oculatus\tspotted gar\t1\t2\t-\t7918\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Holostei; Semionotiformes; Lepisosteidae; Lepisosteus", - "Leptasterias sp. AR-2014\t-\t1\t9\t-\t1462732\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Leptasterias", - "Leptonychotes weddellii\tWeddell seal\t1\t2\t-\t9713\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Phocidae; Leptonychotes", - "Leptosomus discolor\tcuckoo roller\t1\t2\t-\t188344\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coraciiformes; Leptosomidae; Leptosomus", - "Leptospira interrogans\t-\t11\t0\t-\t173\tBCT\tcellular organisms; Bacteria; Spirochaetes; Spirochaetia; Leptospirales; Leptospiraceae; Leptospira", - "Limnephilus lunatus\t-\t1\t5\t-\t1218281\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Trichoptera; Integripalpia; Plenitentoria; Limnephiloidea; Limnephilidae; Limnephilinae; Limnephilini; Limnephilus", - "Limnoperna fortunei\t-\t1\t5\t-\t356393\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Mytiloida; Mytiloidea; Mytilidae; Mytilinae; Limnoperna", - "Limulus polyphemus\tAtlantic horseshoe crab\t1\t5\t-\t6850\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Merostomata; Xiphosura; Limulidae; Limulus", - "Linepithema humile\tArgentine ant\t1\t5\t-\t83485\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Dolichoderinae; Linepithema", - "Lingula anatina\t-\t1\t5\t-\t7574\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Brachiopoda; Linguliformea; Lingulata; Lingulida; Linguloidea; Lingulidae; Lingula", - "Lingulodinium polyedrum\t-\t1\t4\t11\t160621\tPLN\tcellular organisms; Eukaryota; Alveolata; Dinophyceae; Gonyaulacales; Lingulodinium", - "Lipotes vexillifer\tYangtze River dolphin\t1\t2\t-\t118797\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Lipotidae; Lipotes", - "Listeria monocytogenes\t-\t11\t0\t-\t1639\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Bacillales; Listeriaceae; Listeria", - "Litchi chinensis\t-\t1\t1\t11\t151069\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Sapindaceae; Litchi", - "Litopenaeus vannamei\tPacific white shrimp\t1\t5\t-\t6689\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Dendrobranchiata; Penaeoidea; Penaeidae; Litopenaeus", - "Loa loa\teye worm\t1\t5\t-\t7209\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Spirurida; Filarioidea; Onchocercidae; Loa", - "Lolium perenne\t-\t1\t1\t11\t4522\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Loliinae; Lolium", - "Lotus corniculatus\t-\t1\t1\t11\t47247\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Loteae; Lotus", - "Loxodonta africana\tAfrican savanna elephant\t1\t2\t-\t9785\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Proboscidea; Elephantidae; Loxodonta", - "Luidia clathrata\t-\t1\t9\t-\t133437\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Paxillosida; Luidiidae; Luidia", - "Lupinus angustifolius\tnarrow-leaved blue lupine\t1\t1\t11\t3871\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Genisteae; Lupinus", - "Lygodium japonicum\t-\t1\t1\t11\t13824\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Moniliformopses; Polypodiidae; Schizaeales; Lygodiaceae; Lygodium", - "Lygus hesperus\tlygus bug\t1\t5\t-\t30085\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Neohemiptera; Prosorrhyncha; Heteroptera; Euheteroptera; Neoheteroptera; Panheteroptera; Cimicomorpha; Cimicoidea; Miridae; Mirinae; Mirini; Lygus", - "Lymnaea stagnalis\tgreat pond snail\t1\t5\t-\t6523\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Hygrophila; Lymnaeoidea; Lymnaeidae; Lymnaea", - "Lynx pardinus\tSpanish lynx\t1\t2\t-\t191816\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Felinae; Lynx", - "Lytechinus variegatus\tgreen sea urchin\t1\t9\t-\t7654\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Temnopleuroida; Toxopneustidae; Lytechinus", - "Macaca fascicularis\tcrab-eating macaque\t1\t2\t-\t9541\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Macaca", - "Macaca mulatta\tRhesus monkey\t1\t2\t-\t9544\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Macaca", - "Macaca nemestrina\tpig-tailed macaque\t1\t2\t-\t9545\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Macaca", - "Macropus eugenii\ttammar wallaby\t1\t2\t-\t9315\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Metatheria; Diprotodontia; Macropodidae; Macropus", - "Malus domestica\tapple\t1\t1\t11\t3750\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Maleae; Malus", - "Manacus vitellinus\tgolden-collared manakin\t1\t2\t-\t328815\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Pipridae; Manacus", - "Mandrillus leucophaeus\tdrill\t1\t2\t-\t9568\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Mandrillus", - "Mangifera indica\tmango\t1\t1\t11\t29780\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Anacardiaceae; Mangifera", - "Manihot esculenta\tcassava\t1\t1\t11\t3983\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Manihoteae; Manihot", - "Manis javanica\tMalayan pangolin\t1\t2\t-\t9974\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Pholidota; Manidae; Manis", + "Human immunodeficiency virus 1\t-\t1\t0\t-\t11676\tVRL\tViruses; Ortervirales; Retroviridae; Orthoretrovirinae; Lentivirus", + "Humulus lupulus\tEuropean hop\t1\t1\t11\t3486\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus", + "Humulus lupulus var. cordifolius\t-\t1\t1\t11\t278022\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus", + "Humulus lupulus var. lupulus\t-\t1\t1\t11\t1571165\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus", + "Hydra\t-\t1\t4\t-\t6083\tINV\tEukaryota; Metazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Aplanulata; Hydridae; Hydra", + "Hydra vulgaris\t-\t1\t4\t-\t6087\tINV\tEukaryota; Metazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Aplanulata; Hydridae; Hydra", + "Hynobius chinensis\tChinese salamander\t1\t2\t-\t288313\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Caudata; Cryptobranchoidea; Hynobiidae; Hynobius; Hynobius", + "Hynobius retardatus\tHokkaido salamander\t1\t2\t-\t36312\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Caudata; Cryptobranchoidea; Hynobiidae; Hynobius; Satobius", + "Ictalurus\t-\t1\t2\t-\t7997\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Siluriformes; Ictaluridae; Ictalurus", + "Ictalurus furcatus\tblue catfish\t1\t2\t-\t66913\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Siluriformes; Ictaluridae; Ictalurus", + "Ictalurus punctatus\tchannel catfish\t1\t2\t-\t7998\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Siluriformes; Ictaluridae; Ictalurus", + "Ictidomys tridecemlineatus\tthirteen-lined ground squirrel\t1\t2\t-\t43179\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Sciuromorpha; Sciuridae; Xerinae; Marmotini; Ictidomys", + "Influenza A virus\t-\t1\t0\t-\t11320\tVRL\tViruses; ssRNA viruses; ssRNA negative-strand viruses; Negarnaviricota; Polyploviricotina; Insthoviricetes; Articulavirales; Orthomyxoviridae; Alphainfluenzavirus", + "Ipomoea\t-\t1\t1\t11\t4119\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea", + "Ipomoea batatas\tsweet potato\t1\t1\t11\t4120\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea", + "Ipomoea nil\tJapanese morning glory\t1\t1\t11\t35883\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea", + "Ipomoea purpurea\tcommon morning-glory\t1\t1\t11\t4121\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea", + "Ipomoea trifida\t-\t1\t1\t11\t35884\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea", + "Ixodes\t-\t1\t5\t-\t6944\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Ixodinae; Ixodes", + "Ixodes scapularis\tblack-legged tick\t1\t5\t-\t6945\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Ixodinae; Ixodes", + "Jaculus jaculus\tlesser Egyptian jerboa\t1\t2\t-\t51337\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Dipodoidea; Dipodidae; Dipodinae; Jaculus", + "Jatropha curcas\t-\t1\t1\t11\t180498\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Jatropheae; Jatropha", + "Juglans regia\tEnglish walnut\t1\t1\t11\t51240\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Juglandaceae; Juglans", + "Karelinia caspia\t-\t1\t1\t11\t313960\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Inuleae; Plucheinae; Karelinia", + "Kerria lacca\tcommon lac scale\t1\t5\t-\t473130\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Coccoidea; Kerriidae; Kerria", + "Klebsiella pneumoniae\t-\t11\t0\t-\t573\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Klebsiella", + "Klebsiella pneumoniae subsp. pneumoniae\t-\t11\t0\t-\t72407\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Klebsiella", + "Lactuca\t-\t1\t1\t11\t4235\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Cichorioideae; Cichorieae; Lactucinae; Lactuca", + "Lactuca sativa\t-\t1\t1\t11\t4236\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Cichorioideae; Cichorieae; Lactucinae; Lactuca", + "Lactuca serriola\t-\t1\t1\t11\t75943\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Cichorioideae; Cichorieae; Lactucinae; Lactuca", + "Lasius neglectus\t-\t1\t5\t-\t111072\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Lasius; Lasius", + "Lasius turcicus\t-\t1\t5\t-\t235463\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Lasius; Lasius", + "Lates calcarifer\tbarramundi perch\t1\t2\t-\t8187\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Carangaria; Carangaria incertae sedis; Centropomidae; Lates", + "Lathyrus sativus\t-\t1\t1\t11\t3860\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Fabeae; Lathyrus", + "Latimeria chalumnae\tcoelacanth\t1\t2\t-\t7897\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Coelacanthiformes; Coelacanthidae; Latimeria", + "Latrodectus hesperus\twestern black widow\t1\t5\t-\t256737\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Araneoidea; Theridiidae; Latrodectus", + "Lepeophtheirus\t-\t1\t5\t-\t72035\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Hexanauplia; Copepoda; Siphonostomatoida; Caligidae; Lepeophtheirus", + "Lepeophtheirus salmonis\tsalmon louse\t1\t5\t-\t72036\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Hexanauplia; Copepoda; Siphonostomatoida; Caligidae; Lepeophtheirus", + "Lepidonotothen nudifrons\tyellowfin notie\t1\t2\t-\t83203\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Perciformes; Notothenioidei; Nototheniidae; Lepidonotothen", + "Lepisosteus oculatus\tspotted gar\t1\t2\t-\t7918\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Holostei; Semionotiformes; Lepisosteidae; Lepisosteus", + "Leptasterias sp. AR-2014\t-\t1\t9\t-\t1462732\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Leptasterias", + "Leptonychotes weddellii\tWeddell seal\t1\t2\t-\t9713\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Phocidae; Leptonychotes", + "Leptosomus discolor\tcuckoo roller\t1\t2\t-\t188344\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coraciiformes; Leptosomidae; Leptosomus", + "Ligia exotica\t-\t1\t5\t-\t142080\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Oniscidea; Diplocheta; Ligiidae; Ligia", + "Limulus polyphemus\tAtlantic horseshoe crab\t1\t5\t-\t6850\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Merostomata; Xiphosura; Limulidae; Limulus", + "Linepithema humile\tArgentine ant\t1\t5\t-\t83485\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Dolichoderinae; Linepithema", + "Lingulodinium polyedra\t-\t1\t4\t11\t160621\tPLN\tEukaryota; Alveolata; Dinophyceae; Gonyaulacales; Lingulodinium", + "Linum\t-\t1\t1\t11\t4005\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Linaceae; Linum", + "Linum usitatissimum\tflax\t1\t1\t11\t4006\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Linaceae; Linum", + "Listeria monocytogenes\t-\t11\t0\t-\t1639\tBCT\tBacteria; Firmicutes; Bacilli; Bacillales; Listeriaceae; Listeria", + "Litchi chinensis\t-\t1\t1\t11\t151069\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Sapindaceae; Litchi", + "Lolium\t-\t1\t1\t11\t4520\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Loliinae; Lolium", + "Lolium perenne\t-\t1\t1\t11\t4522\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Loliinae; Lolium", + "Lottia\t-\t1\t5\t-\t72691\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Patellogastropoda; Lottioidea; Lottiidae; Lottia", + "Lottia gigantea\towl limpet\t1\t5\t-\t225164\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Patellogastropoda; Lottioidea; Lottiidae; Lottia", + "Lotus\t-\t1\t1\t11\t3867\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; robinioid clade; Loteae; Lotus", + "Lotus japonicus\t-\t1\t1\t11\t34305\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; robinioid clade; Loteae; Lotus", + "Lucernaria quadricornis\t-\t1\t4\t-\t1843199\tINV\tEukaryota; Metazoa; Cnidaria; Staurozoa; Stauromedusae; Myostaurida; Lucernariidae; Lucernaria", + "Luidia clathrata\t-\t1\t9\t-\t133437\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Paxillosida; Luidiidae; Luidia", + "Lupinus angustifolius\tnarrow-leaved blue lupine\t1\t1\t11\t3871\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; genistoids sensu lato; core genistoids; Genisteae; Lupinus", + "Lygus hesperus\tlygus bug\t1\t5\t-\t30085\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Heteroptera; Panheteroptera; Cimicomorpha; Miridae; Mirini; Lygus", + "Lymnaea stagnalis\tgreat pond snail\t1\t5\t-\t6523\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Hygrophila; Lymnaeoidea; Lymnaeidae; Lymnaea", + "Lytechinus variegatus\tgreen sea urchin\t1\t9\t-\t7654\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Temnopleuroida; Toxopneustidae; Lytechinus", + "Macaca\tmacaques\t1\t2\t-\t9539\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Macaca", + "Macaca fascicularis\tcrab-eating macaque\t1\t2\t-\t9541\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Macaca", + "Macaca fuscata fuscata\tJapanese macaque\t1\t2\t-\t9543\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Macaca", + "Macaca mulatta\tRhesus monkey\t1\t2\t-\t9544\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Macaca", + "Macaca nemestrina\tpig-tailed macaque\t1\t2\t-\t9545\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Macaca", + "Machilis hrabei\t-\t1\t5\t-\t438506\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Monocondylia; Archaeognatha; Machilidae; Machilis", + "Magnaporthe\t-\t1\t4\t-\t148303\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Sordariomycetidae; Magnaporthales; Magnaporthaceae; Magnaporthe", + "Magnaporthe grisea\t-\t1\t4\t-\t148305\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Sordariomycetidae; Magnaporthales; Magnaporthaceae; Magnaporthe", + "Malus\t-\t1\t1\t11\t3749\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Maleae; Malus", + "Malus domestica\tapple\t1\t1\t11\t3750\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Maleae; Malus", + "Manacus vitellinus\tgolden-collared manakin\t1\t2\t-\t328815\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Pipridae; Manacus", + "Mangifera indica\tmango\t1\t1\t11\t29780\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Anacardiaceae; Mangifera", + "Manihot\t-\t1\t1\t11\t3982\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Manihoteae; Manihot", + "Manihot esculenta\tcassava\t1\t1\t11\t3983\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Manihoteae; Manihot", + "Manis javanica\tMalayan pangolin\t1\t2\t-\t9974\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Pholidota; Manidae; Manis", "marine metagenome\t-\t11\t2\t11\t408172\tENV\tunclassified sequences; metagenomes; ecological metagenomes", "marine sediment metagenome\t-\t11\t2\t11\t412755\tENV\tunclassified sequences; metagenomes; ecological metagenomes", - "Marmota marmota marmota\tAlpine marmot\t1\t2\t-\t9994\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Sciuridae; Xerinae; Marmotini; Marmota; Marmota marmota", - "Marthasterias glacialis\tspiny starfish\t1\t9\t-\t7609\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Marthasterias", - "Maylandia zebra\tzebra mbuna\t1\t2\t-\t106582\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Maylandia; Maylandia zebra complex", - "Medauroidea extradentata\t-\t1\t5\t-\t614211\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Phasmatinae; Clitumnini; Medauroidea", - "Medicago sativa\t-\t1\t1\t11\t3879\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Medicago", - "Medicago truncatula\tbarrel medic\t1\t1\t11\t3880\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Medicago", - "Megachile rotundata\talfalfa leafcutting bee\t1\t5\t-\t143995\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Megachilidae; Megachilinae; Megachilini; Megachile", - "Megaderma lyra\tIndian false vampire\t1\t2\t-\t9413\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Megadermatidae; Megaderma", - "Megajapyx sp. UVienna-2012\t-\t1\t5\t-\t1136246\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Diplura; Diplura; Dicellurata; Japygoidea; Japygidae; Japyginae; Megajapyx", - "Megaselia scalaris\t-\t1\t5\t-\t36166\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Aschiza; Platypezoidea; Phoridae; Metopininae; Megaseliini; Megaselia", - "Meleagris gallopavo\tturkey\t1\t2\t-\t9103\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Meleagridinae; Meleagris", - "Melopsittacus undulatus\tbudgerigar\t1\t2\t-\t13146\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Psittaciformes; Psittaculidae; Melopsittacus", - "Mengenilla moldrzyki\t-\t1\t5\t-\t1155016\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Strepsiptera; Mengenillidia; Mengenilloidea; Mengenillidae; Mengenilla", - "Meretrix meretrix\tAsiatic hard clam\t1\t5\t-\t291251\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Heteroconchia; Euheterodonta; Veneroida; Veneroidea; Veneridae; Meretrix", - "Merops nubicus\tcarmine bee-eater\t1\t2\t-\t57421\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coraciiformes; Meropidae; Merops", - "Mesembryanthemum crystallinum\tcommon iceplant\t1\t1\t11\t3544\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Aizoaceae; Mesembryanthemum; Cryophytum", - "Mesitornis unicolor\tbrown roatelo\t1\t2\t-\t54374\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Mesitornithidae; Mesitornis", - "Mesocricetus auratus\tgolden hamster\t1\t2\t-\t10036\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Cricetidae; Cricetinae; Mesocricetus", - "Metaseiulus occidentalis\twestern predatory mite\t1\t5\t-\t34638\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Mesostigmata; Monogynaspida; Gamasina; Phytoseioidea; Phytoseiidae; Typhlodrominae; Metaseiulus", - "Microcebus murinus\tgray mouse lemur\t1\t2\t-\t30608\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Strepsirrhini; Lemuriformes; Cheirogaleidae; Microcebus", - "Microplitis demolitor\t-\t1\t5\t-\t69319\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Ichneumonoidea; Braconidae; Microgastrinae; Microplitis", - "Micropterix calthella\t-\t1\t5\t-\t41027\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Zeugloptera; Micropterigidae; Micropterix", - "Micropterus floridanus\tFlorida bass\t1\t2\t-\t225391\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus", - "Micropterus salmoides\tlargemouth bass\t1\t2\t-\t27706\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus", - "Micropterus salmoides salmoides\tnorthern largemouth bass\t1\t2\t-\t489037\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus; Micropterus salmoides", - "Microtus ochrogaster\tprairie vole\t1\t2\t-\t79684\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Cricetidae; Arvicolinae; Microtus", - "Miichthys miiuy\tMi-iuy croaker\t1\t2\t-\t240162\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Eupercaria incertae sedis; Sciaenidae; Miichthys", - "mine drainage metagenome\t-\t11\t2\t11\t410659\tENV\tunclassified sequences; metagenomes; ecological metagenomes", - "Mischocyttarus flavitarsis\t-\t1\t5\t-\t231975\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Vespidae; Polistinae; Mischocyttarini; Mischocyttarus", - "Momordica charantia\t-\t1\t1\t11\t3673\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Momordiceae; Momordica", - "Moniezia expansa\tsheep tapeworm\t1\t9\t-\t28841\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Cestoda; Eucestoda; Cyclophyllidea; Anoplocephalidae; Moniezia", - "Monodelphis domestica\tgray short-tailed opossum\t1\t2\t-\t13616\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Metatheria; Didelphimorphia; Didelphidae; Didelphinae; Monodelphis", - "Monomorium chinense\t-\t1\t5\t-\t482359\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Solenopsidini; Monomorium", - "Monomorium pharaonis\tpharaoh ant\t1\t5\t-\t307658\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Solenopsidini; Monomorium", - "Morone chrysops\twhite bass\t1\t2\t-\t46259\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Eupercaria incertae sedis; Moronidae; Morone", - "Morone saxatilis\tstriped sea-bass\t1\t2\t-\t34816\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Eupercaria incertae sedis; Moronidae; Morone", - "Morus notabilis\t-\t1\t1\t11\t981085\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Moraceae; Morus", - "Mus musculus\thouse mouse\t1\t2\t-\t10090\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Muridae; Murinae; Mus; Mus", - "Musa ABB Group\t-\t1\t1\t11\t214693\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Zingiberales; Musaceae; Musa; Musa x paradisiaca", - "Musa acuminata AAA Group\tdessert banana\t1\t1\t11\t214697\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Zingiberales; Musaceae; Musa; Musa acuminata", - "Musa acuminata subsp. malaccensis\twild Malaysian banana\t1\t1\t11\t214687\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Zingiberales; Musaceae; Musa; Musa acuminata", - "Musca domestica\thouse fly\t1\t5\t-\t7370\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Calyptratae; Muscoidea; Muscidae; Muscinae; Muscini; Musca; Musca", - "Mustela putorius furo\tdomestic ferret\t1\t2\t-\t9669\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Mustelidae; Mustelinae; Mustela; Mustela putorius", - "Mycobacterium abscessus\t-\t11\t0\t-\t36809\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Actinobacteria; Actinobacteria; Corynebacteriales; Mycobacteriaceae; Mycobacterium; Mycobacterium chelonae group; Mycobacterium abscessus subgroup", - "Mycobacterium tuberculosis\t-\t11\t0\t-\t1773\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Actinobacteria; Actinobacteria; Corynebacteriales; Mycobacteriaceae; Mycobacterium; Mycobacterium tuberculosis complex", - "Myotis brandtii\tBrandt's bat\t1\t2\t-\t109478\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis", - "Myotis davidii\t-\t1\t2\t-\t225400\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis", - "Myotis lucifugus\tlittle brown bat\t1\t2\t-\t59463\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis", - "Myotis ricketti\tRickett's big-footed Myotis\t1\t2\t-\t203696\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis", - "Myrionecta rubra\t-\t1\t4\t11\t283649\tINV\tcellular organisms; Eukaryota; Alveolata; Ciliophora; Intramacronucleata; Litostomatea; Haptoria; Cyclotrichida; Mesodiniidae; Myrionecta", - "Myrmica rubra\t-\t1\t5\t-\t106198\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Myrmicini; Myrmica", - "Myrmica ruginodis\t-\t1\t5\t-\t34708\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Myrmicini; Myrmica", - "Myrmica sulcinodis\t-\t1\t5\t-\t229918\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Myrmicini; Myrmica", - "Mytilus galloprovincialis\tMediterranean mussel\t1\t5\t-\t29158\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Mytiloida; Mytiloidea; Mytilidae; Mytilinae; Mytilus", - "Nannochorista philpotti\t-\t1\t5\t-\t1260225\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Mecoptera; Nannochoristidae; Nannochorista", - "Nannospalax galili\tUpper Galilee mountains blind mole rat\t1\t2\t-\t1026970\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Spalacidae; Spalacinae; Nannospalax", - "Nanorana parkeri\t-\t1\t2\t-\t125878\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Neobatrachia; Ranoidea; Dicroglossidae; Dicroglossinae; Nanorana", - "Nasonia vitripennis\tjewel wasp\t1\t5\t-\t7425\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Chaldicoidea group; Chalcidoidea; Pteromalidae; Pteromalinae; Nasonia", - "Necator americanus\t-\t1\t5\t-\t51031\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Ancylostomatoidea; Ancylostomatidae; Bunostominae; Necator", - "Neisseria gonorrhoeae\t-\t11\t0\t-\t485\tBCT\tcellular organisms; Bacteria; Proteobacteria; Betaproteobacteria; Neisseriales; Neisseriaceae; Neisseria", - "Neisseria meningitidis\t-\t11\t0\t-\t487\tBCT\tcellular organisms; Bacteria; Proteobacteria; Betaproteobacteria; Neisseriales; Neisseriaceae; Neisseria", - "Nelumbo nucifera\tsacred lotus\t1\t1\t11\t4432\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; stem eudicotyledons; Proteales; Nelumbonaceae; Nelumbo", - "Nematostella vectensis\tstarlet sea anemone\t1\t4\t-\t45351\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Edwardsiidae; Nematostella", - "Neolamarckia cadamba\t-\t1\t1\t11\t153762\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Cinchonoideae; Naucleeae; Neolamarckia", - "Neolamprologus brichardi\t-\t1\t2\t-\t32507\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Lamprologini; Neolamprologus", - "Nestor notabilis\tKea\t1\t2\t-\t176057\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Psittaciformes; Psittacidae; Nestor", - "Nicotiana attenuata\t-\t1\t1\t11\t49451\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana", - "Nicotiana benthamiana\t-\t1\t1\t11\t4100\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana", - "Nicotiana sylvestris\twood tobacco\t1\t1\t11\t4096\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana", - "Nicotiana tabacum\tcommon tobacco\t1\t1\t11\t4097\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana", - "Nicotiana tomentosiformis\t-\t1\t1\t11\t4098\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana", - "Nicrophorus vespilloides\t-\t1\t5\t-\t110193\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Staphyliniformia; Staphylinoidea; Silphidae; Nicrophorinae; Nicrophorus", - "Nilaparvata lugens\tbrown planthopper\t1\t5\t-\t108931\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Neohemiptera; Archaeorrhyncha; Fulgoroidea; Delphacidae; Delphacinae; Nilaparvata", - "Nipponia nippon\tcrested ibis\t1\t2\t-\t128390\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Threskiornithidae; Nipponia", - "Nitella hyalina\t-\t1\t1\t11\t181804\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Charophyceae; Charales; Characeae; Nitella", - "Nitella mirabilis\t-\t1\t1\t11\t231897\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Charophyceae; Charales; Characeae; Nitella", - "Noccaea caerulescens\t-\t1\t1\t11\t107243\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Coluteocarpeae; Noccaea", - "Nomascus leucogenys\tnorthern white-cheeked gibbon\t1\t2\t-\t61853\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hylobatidae; Nomascus", - "Nothobranchius furzeri\tturquoise killifish\t1\t2\t-\t105023\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius", - "Nothobranchius kadleci\t-\t1\t2\t-\t1051664\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius", - "Nothobranchius korthausae\t-\t1\t2\t-\t1143690\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius", - "Nothobranchius kuhntae\tBeira killifish\t1\t2\t-\t321403\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius", - "Nothobranchius pienaari\t-\t1\t2\t-\t704102\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius", - "Nothobranchius rachovii\tbluefin notho\t1\t2\t-\t451742\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius", - "Notholithocarpus densiflorus\t-\t1\t1\t11\t165545\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Notholithocarpus", - "Notothenia coriiceps\tblack rockcod\t1\t2\t-\t8208\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Perciformes; Notothenioidei; Nototheniidae; Notothenia", - "Numida meleagris\thelmeted guineafowl\t1\t2\t-\t8996\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Numididae; Numida", - "Nylanderia pubens\t-\t1\t5\t-\t613973\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Lasiini; Nylanderia", - "Ochotona princeps\tAmerican pika\t1\t2\t-\t9978\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Lagomorpha; Ochotonidae; Ochotona", - "Ochromonas sp. LO244K-D\t-\t1\t1\t11\t1825117\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; Chromulinales; Chromulinaceae; Ochromonas", - "Octodon degus\tdegu\t1\t2\t-\t10160\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Octodontidae; Octodon", - "Octopus bimaculoides\t-\t1\t5\t-\t37653\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Cephalopoda; Coleoidea; Neocoleoidea; Octopodiformes; Octopoda; Incirrata; Octopodidae; Octopus", - "Odobenus rosmarus divergens\tPacific walrus\t1\t2\t-\t9708\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Odobenidae; Odobenus; Odobenus rosmarus", - "Oenococcus oeni\t-\t11\t0\t-\t1247\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Leuconostocaceae; Oenococcus", - "Oesophagostomum dentatum\t-\t1\t5\t-\t61180\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Strongyloidea; Cloacinidae; Oesophagostomum", - "Olavius algarvensis\t-\t1\t5\t-\t188229\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Annelida; Clitellata; Oligochaeta; Haplotaxida; Tubificina; Tubificidae; Phallodrilinae; Olavius", - "Olea europaea\tcommon olive\t1\t1\t11\t4146\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Oleaceae; Oleeae; Olea", - "Onchocerca flexuosa\t-\t1\t5\t-\t387005\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Spirurida; Filarioidea; Onchocercidae; Onchocerca", - "Oncidium hybrid cultivar\t-\t1\t1\t11\t141207\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Orchidaceae; Epidendroideae; Cymbidieae; Oncidiinae; Oncidium", - "Oncorhynchus masou masou\tcherry salmon\t1\t2\t-\t90313\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus; Oncorhynchus masou", - "Oncorhynchus mykiss\trainbow trout\t1\t2\t-\t8022\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus", - "Onthophagus nigriventris\t-\t1\t5\t-\t476074\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Scarabaeiformia; Scarabaeoidea; Scarabaeidae; Scarabaeinae; Scarabaeinae incertae sedis; Onthophagus", - "Ophiocoma echinata\t-\t1\t9\t-\t331088\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Ophiuroidea; Ophiuridea; Ophiurida; Ophiurina; Gnathophiurina; Ophiocomidae; Ophiocominae; Ophiocoma", - "Ophthalmotilapia ventralis\t-\t1\t2\t-\t27755\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Ectodini; Ophthalmotilapia", - "Opisthorchis viverrini\t-\t1\t9\t-\t6198\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Opisthorchiida; Opisthorchiata; Opisthorchiidae; Opisthorchis", - "Orchesella cincta\t-\t1\t5\t-\t48709\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Collembola; Collembola; Entomobryomorpha; Entomobryoidea; Entomobryidae; Orchesellinae; Orchesella", - "Oreochromis niloticus\tNile tilapia\t1\t2\t-\t8128\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Oreochromini; Oreochromis", - "Ornithorhynchus anatinus\tplatypus\t1\t2\t-\t9258\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Prototheria; Monotremata; Ornithorhynchidae; Ornithorhynchus", - "Oropsylla silantiewi\t-\t1\t5\t-\t1461318\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Siphonaptera; Ceratophyllomorpha; Ceratophylloidea; Ceratophyllidae; Oropsylla", - "Orycteropus afer afer\t-\t1\t2\t-\t1230840\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Tubulidentata; Orycteropodidae; Orycteropus; Orycteropus afer", - "Oryctolagus cuniculus\trabbit\t1\t2\t-\t9986\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Lagomorpha; Leporidae; Oryctolagus", - "Oryza brachyantha\tmalo sina\t1\t1\t11\t4533\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", - "Oryza sativa\trice\t1\t1\t11\t4530\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", - "Oryza sativa Indica Group\tlong-grained rice\t1\t1\t11\t39946\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa", - "Oryza sativa Japonica Group\tJapanese rice\t1\t1\t11\t39947\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa", - "Oryzias latipes\tJapanese medaka\t1\t2\t-\t8090\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Beloniformes; Adrianichthyoidei; Adrianichthyidae; Oryziinae; Oryzias", - "Osmia cornuta\t-\t1\t5\t-\t185587\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Megachilidae; Megachilinae; Osmiini; Osmia", - "Ostrinia furnacalis\tAsian corn borer\t1\t5\t-\t93504\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Pyraloidea; Crambidae; Pyraustinae; Ostrinia", - "Ostrinia nubilalis\tEuropean corn borer\t1\t5\t-\t29057\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Pyraloidea; Crambidae; Pyraustinae; Ostrinia", - "Otolemur garnettii\tsmall-eared galago\t1\t2\t-\t30611\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Strepsirrhini; Lorisiformes; Galagidae; Otolemur", - "Ovis aries\tsheep\t1\t2\t-\t9940\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Ovis", - "Ovis aries musimon\tmouflon\t1\t2\t-\t9938\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Ovis; Ovis aries", - "Pachycladon fastigiatum\t-\t1\t1\t11\t106774\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Microlepidieae; Pachycladon", - "Pachypsylla venusta\thackberry petiole gall psyllid\t1\t5\t-\t38123\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Psylliformes; Psylloidea; Psyllidae; Pachypsylla", - "Paeonia lactiflora\tChinese peony\t1\t1\t11\t35924\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Saxifragales; Paeoniaceae; Paeonia", - "Pan paniscus\tpygmy chimpanzee\t1\t2\t-\t9597\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Pan", - "Pan troglodytes\tchimpanzee\t1\t2\t-\t9598\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Pan", - "Pan troglodytes troglodytes\t-\t1\t2\t-\t37011\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Pan; Pan troglodytes", - "Pan troglodytes verus\t-\t1\t2\t-\t37012\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Pan; Pan troglodytes", - "Panax ginseng\t-\t1\t1\t11\t4054\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Apiales; Apiineae; Araliaceae; Panax", - "Panicum hallii var. filipes\t-\t1\t1\t11\t907226\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Panicinae; Panicum; Panicum hallii", - "Panthera tigris altaica\tAmur tiger\t1\t2\t-\t74533\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Pantherinae; Panthera; Panthera tigris", - "Pantholops hodgsonii\tchiru\t1\t2\t-\t59538\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Antilopinae; Pantholops", - "Papilio machaon\tcommon yellow swallowtail\t1\t5\t-\t76193\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Papilionoidea; Papilionidae; Papilioninae; Papilionini; Papilio", - "Papilio xuthus\tAsian swallowtail\t1\t5\t-\t66420\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Papilionoidea; Papilionidae; Papilioninae; Papilionini; Papilio", - "Papio anubis\tolive baboon\t1\t2\t-\t9555\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Papio", - "Papio hamadryas\thamadryas baboon\t1\t2\t-\t9557\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Papio", - "Paramecium tetraurelia strain d4-2\t-\t6\t4\t11\t412030\tINV\tcellular organisms; Eukaryota; Alveolata; Ciliophora; Intramacronucleata; Oligohymenophorea; Peniculida; Parameciidae; Paramecium; Paramecium tetraurelia", - "Parascaris equorum\t-\t1\t5\t-\t6256\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Ascaridida; Ascaridoidea; Ascarididae; Parascaris", - "Parasteatoda tepidariorum\tcommon house spider\t1\t5\t-\t114398\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Orbiculariae; Araneoidea; Theridiidae; Parasteatoda", - "Parus major\tGreat Tit\t1\t2\t-\t9157\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Paridae; Parus", - "Patiria miniata\tbat star\t1\t9\t-\t46514\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Valvatida; Asterinidae; Patiria", - "Patiria pectinifera\t-\t1\t9\t-\t7594\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Valvatida; Asterinidae; Patiria", - "Pecten maximus\t-\t1\t5\t-\t6579\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Pectinoida; Pectinoidea; Pectinidae; Pecten", - "Pedospumella encystans\t-\t1\t1\t11\t1117030\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; Chromulinales; Chromulinaceae; Pedospumella", - "Pelecanus crispus\tDalmatian pelican\t1\t2\t-\t36300\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Pelecanidae; Pelecanus", - "Pelodiscus sinensis\tChinese soft-shelled turtle\t1\t2\t-\t13735\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Testudines; Cryptodira; Trionychia; Trionychidae; Pelodiscus", - "Penaeus monodon\tblack tiger shrimp\t1\t5\t-\t6687\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Dendrobranchiata; Penaeoidea; Penaeidae; Penaeus", - "Pepsis grossa\t-\t1\t5\t-\t1317727\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Pompiloidea; Pompilidae; Pepsinae; Pepsis", - "Perkinsus marinus ATCC 50983\t-\t1\t4\t11\t423536\tINV\tcellular organisms; Eukaryota; Alveolata; Perkinsea; Perkinsida; Perkinsidae; Perkinsus; Perkinsus marinus", - "Peromyscus maniculatus bairdii\tprairie deer mouse\t1\t2\t-\t230844\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Cricetidae; Neotominae; Peromyscus; Peromyscus maniculatus", - "Persicaria minor\t-\t1\t1\t11\t488003\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Polygonaceae; Polygonoideae; Persicarieae; Persicaria", - "Petunia integrifolia subsp. inflata\t-\t1\t1\t11\t212142\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Petunioideae; Petunia; Petunia integrifolia", - "Phaethon lepturus\twhite-tailed tropicbird\t1\t2\t-\t97097\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Phaethontidae; Phaethon", - "Phalacrocorax carbo\tgreat cormorant\t1\t2\t-\t9209\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Phalacrocoracidae; Phalacrocorax", - "Phalaenopsis aphrodite\t-\t1\t1\t11\t212056\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Orchidaceae; Epidendroideae; Vandeae; Aeridinae; Phalaenopsis", - "Phaseolus vulgaris\t-\t1\t1\t11\t3885\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Phaseolus", - "Phoca largha\tspotted seal\t1\t2\t-\t39090\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Phocidae; Phoca", - "Phoenicopterus ruber ruber\t-\t1\t2\t-\t9218\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Phoenicopteriformes; Phoenicopteridae; Phoenicopterus; Phoenicopterus ruber", - "Phoenix dactylifera\tdate palm\t1\t1\t11\t42345\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Arecales; Arecaceae; Coryphoideae; Phoeniceae; Phoenix", - "Physalis alkekengi var. franchetii\t-\t1\t1\t11\t221454\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Physaleae; Physalis; Physalis alkekengi", - "Physalis peruviana\t-\t1\t1\t11\t126903\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Physaleae; Physalis", - "Physcomitrella patens\t-\t1\t1\t11\t3218\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Bryophyta; Bryophytina; Bryopsida; Funariidae; Funariales; Funariaceae; Physcomitrella", - "Physeter catodon\tsperm whale\t1\t2\t-\t9755\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Physeteridae; Physeter", - "Phytophthora cambivora\t-\t1\t1\t11\t53983\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Oomycetes; Peronosporales; Phytophthora", - "Phytophthora cinnamomi\t-\t1\t1\t11\t4785\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Oomycetes; Peronosporales; Phytophthora", - "Phytophthora x alni\t-\t1\t1\t11\t299392\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Oomycetes; Peronosporales; Phytophthora", - "Picea glauca\twhite spruce\t1\t1\t11\t3330\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Picea", - "Picoides pubescens\tdowny woodpecker\t1\t2\t-\t118200\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Piciformes; Picidae; Picoides", - "Pinus massoniana\t-\t1\t1\t11\t88730\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Pinus; Pinus", - "Pinus sylvestris\tScots pine\t1\t1\t11\t3349\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Pinus; Pinus", - "Pinus taeda\tloblolly pine\t1\t1\t11\t3352\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Pinus; Pinus", - "Pisaster ochraceus\tpurple sea star\t1\t9\t-\t7612\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Pisaster", - "Pisum sativum\tpea\t1\t1\t11\t3888\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Fabeae; Pisum", - "Pisum sativum subsp. sativum\t-\t1\t1\t11\t208194\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Fabeae; Pisum; Pisum sativum", - "Plakobranchus ocellatus\t-\t1\t5\t-\t259542\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Sacoglossa; Placobranchoidea; Placobranchidae; Plakobranchus", - "Plasmodium falciparum\tmalaria parasite P. falciparum\t1\t4\t11\t5833\tINV\tcellular organisms; Eukaryota; Alveolata; Apicomplexa; Aconoidasida; Haemosporida; Plasmodiidae; Plasmodium; Plasmodium (Laverania)", - "Plecoglossus altivelis\tayu\t1\t2\t-\t61084\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Stomiatii; Osmeriformes; Plecoglossidae; Plecoglossus", - "Plecoglossus altivelis altivelis\t-\t1\t2\t-\t281464\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Stomiatii; Osmeriformes; Plecoglossidae; Plecoglossus; Plecoglossus altivelis", - "Plukenetia volubilis\t-\t1\t1\t11\t316893\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Acalyphoideae; Plukenetieae; Plukenetia", - "Plutella xylostella\tdiamondback moth\t1\t5\t-\t51655\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Yponomeutoidea; Plutellidae; Plutella", - "Poa infirma\t-\t1\t1\t11\t165094\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Poinae; Poa", - "Poa supina\t-\t1\t1\t11\t289064\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Poinae; Poa", - "Podiceps cristatus\tgreat crested grebe\t1\t2\t-\t345573\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Podicipediformes; Podicipedidae; Podiceps", - "Poecilia formosa\tAmazon molly\t1\t2\t-\t48698\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Poecilia", - "Poecilia latipinna\tsailfin molly\t1\t2\t-\t48699\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Poecilia", - "Poecilia mexicana\t-\t1\t2\t-\t48701\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Poecilia", - "Poecilia reticulata\tguppy\t1\t2\t-\t8081\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Poecilia", - "Poeciliopsis prolifica\tblackstripe livebearer\t1\t2\t-\t188132\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Poeciliopsis", - "Pogonus chalceus\t-\t1\t5\t-\t235516\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Adephaga; Caraboidea; Carabidae; Trechinae; Pogonini; Pogonus", - "Pohlia nutans\t-\t1\t1\t11\t140635\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Bryophyta; Bryophytina; Bryopsida; Bryidae; Bryanae; Bryales; Mniaceae; Pohlia", - "Polistes canadensis\t-\t1\t5\t-\t91411\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Vespidae; Polistinae; Polistini; Polistes", - "Polistes metricus\t-\t1\t5\t-\t91422\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Vespidae; Polistinae; Polistini; Polistes", - "Pongo abelii\tSumatran orangutan\t1\t2\t-\t9601\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Ponginae; Pongo", - "Populus euphratica\tEuphrates poplar\t1\t1\t11\t75702\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Salicaceae; Saliceae; Populus", - "Populus trichocarpa\tblack cottonwood\t1\t1\t11\t3694\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Salicaceae; Saliceae; Populus", - "Porites australiensis\t-\t1\t4\t-\t51061\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Fungiina; Poritidae; Porites", - "Poterioochromonas sp. DS\t-\t1\t1\t11\t519425\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Synurophyceae; Ochromonadales; Ochromonadaceae; Poterioochromonas", - "Priapulus caudatus\t-\t1\t5\t-\t37621\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Scalidophora; Priapulida; Priapulidae; Priapulus", - "Proasellus aragonensis\t-\t1\t5\t-\t1281939\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus arthrodilus\t-\t1\t5\t-\t1281940\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus assaforensis\t-\t1\t5\t-\t1282049\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus cantabricus\t-\t1\t5\t-\t1281948\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus cavaticus\t-\t1\t5\t-\t1281949\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus coiffaiti\t-\t1\t5\t-\t1281953\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus coxalis\t-\t1\t5\t-\t63229\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus ebrensis\t-\t1\t5\t-\t1281961\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus escolai\t-\t1\t5\t-\t1281963\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus grafi\t-\t1\t5\t-\t1281973\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus granadensis\t-\t1\t5\t-\t1281974\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus hercegovinensis\t-\t1\t5\t-\t1281977\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus ibericus\t-\t1\t5\t-\t1281981\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus jaloniacus\t-\t1\t5\t-\t1281986\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus karamani\t-\t1\t5\t-\t1281987\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus margalefi\t-\t1\t5\t-\t1281998\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus meridianus\t-\t1\t5\t-\t1282001\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus ortizi\t-\t1\t5\t-\t1282012\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus racovitzai\t-\t1\t5\t-\t1282023\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus rectus\t-\t1\t5\t-\t1282025\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus solanasi\t-\t1\t5\t-\t1282031\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Proasellus spelaeus\t-\t1\t5\t-\t1282033\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", - "Procambarus clarkii\tred swamp crayfish\t1\t5\t-\t6728\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Astacoidea; Cambaridae; Cambarinae; Procambarus", - "Procotyla fluviatilis\t-\t1\t9\t-\t231627\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Planarioidea; Dendrocoelidae; Procotyla", - "Propithecus coquereli\tCoquerel's sifaka\t1\t2\t-\t379532\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Strepsirrhini; Lemuriformes; Indriidae; Propithecus", - "Prosopis alba\t-\t1\t1\t11\t207710\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Mimosoideae; Mimoseae; Prosopis", - "Protobothrops mucrosquamatus\t-\t1\t2\t-\t103944\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Viperidae; Crotalinae; Protobothrops", - "Protopolystoma xenopodis\t-\t1\t9\t-\t117903\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Monogenea; Polyopisthocotylea; Polystomatidae; Protopolystoma", - "Prunus armeniaca\tapricot\t1\t1\t11\t36596\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Amygdaleae; Prunus", - "Prunus mume\tJapanese apricot\t1\t1\t11\t102107\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Amygdaleae; Prunus", - "Prunus persica\tpeach\t1\t1\t11\t3760\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Amygdaleae; Prunus", - "Prymnesium parvum\t-\t1\t1\t11\t97485\tPLN\tcellular organisms; Eukaryota; Haptophyceae; Prymnesiales; Prymnesiaceae; Prymnesium", - "Pseudacris regilla\tPacific treefrog\t1\t2\t-\t47562\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Neobatrachia; Hyloidea; Hylidae; Hylinae; Hylini; Pseudacris", - "Pseudodiploria strigosa\t-\t1\t4\t-\t1428006\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Faviina; Mussidae; Faviinae; Pseudodiploria", - "Pseudomasaris vespoides\t-\t1\t5\t-\t1317726\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Vespidae; Masarinae; Masarini; Pseudomasaris", - "Pseudomonas aeruginosa\t-\t11\t0\t-\t287\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Pseudomonadaceae; Pseudomonas; Pseudomonas aeruginosa group", - "Pseudopodoces humilis\tTibetan ground-tit\t1\t2\t-\t181119\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Paridae; Pseudopodoces", - "Pseudotsuga menziesii var. menziesii\t-\t1\t1\t11\t278161\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Pseudotsuga; Pseudotsuga menziesii", - "Pteridium aquilinum subsp. aquilinum\t-\t1\t1\t11\t104588\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Moniliformopses; Polypodiidae; Polypodiales; Dennstaedtiaceae; Pteridium; Pteridium aquilinum", - "Pterocles gutturalis\tyellow-throated sandgrouse\t1\t2\t-\t240206\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Ciconiiformes; Pteroclidae; Pterocles", - "Pteronotus parnellii\tParnell's mustached bat\t1\t2\t-\t59476\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Mormoopidae; Pteronotus", - "Pteropus alecto\tblack flying fox\t1\t2\t-\t9402\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Pteropus", - "Pteropus vampyrus\tlarge flying fox\t1\t2\t-\t132908\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Pteropus", - "Ptychodera flava\t-\t1\t9\t-\t63121\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Hemichordata; Enteropneusta; Ptychoderidae; Ptychodera", - "Puccinia psidii\t-\t1\t4\t-\t181123\tPLN\tcellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Basidiomycota; Pucciniomycotina; Pucciniomycetes; Pucciniales; Pucciniaceae; Puccinia", - "Pundamilia nyererei\t-\t1\t2\t-\t303518\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Pundamilia", - "Punica granatum\tpomegranate\t1\t1\t11\t22663\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Lythraceae; Punica", - "Pygocentrus nattereri\tred-bellied piranha\t1\t2\t-\t42514\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Characiphysae; Characiformes; Characoidei; Serrasalmidae; Pygocentrus", - "Pygoscelis adeliae\tAdelie penguin\t1\t2\t-\t9238\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Sphenisciformes; Spheniscidae; Pygoscelis", - "Pyrodinium bahamense var. compressum\t-\t1\t4\t11\t73916\tPLN\tcellular organisms; Eukaryota; Alveolata; Dinophyceae; Gonyaulacales; Goniodomataceae; Pyrodinium; Pyrodinium bahamense", - "Pyrus communis\tpear\t1\t1\t11\t23211\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Maleae; Pyrus", - "Pyrus x bretschneideri\tChinese white pear\t1\t1\t11\t225117\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Maleae; Pyrus", - "Python bivittatus\tBurmese python\t1\t2\t-\t176946\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Henophidia; Pythonidae; Python", - "Quercus suber\t-\t1\t1\t11\t58331\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Quercus", - "Ramulus artemis\t-\t1\t5\t-\t1390046\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Phasmatinae; Clitumnini; Ramulus", - "Rana clamitans\tbronze frog\t1\t2\t-\t145282\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Neobatrachia; Ranoidea; Ranidae; Rana; Aquarana", - "Raphanus sativus\tradish\t1\t1\t11\t3726\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Raphanus", - "Rattus norvegicus\tNorway rat\t1\t2\t-\t10116\tROD\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Muridae; Murinae; Rattus", - "Rauvolfia serpentina\tserpentwood\t1\t1\t11\t4060\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Apocynaceae; Rauvolfioideae; Vinceae; Rauvolfiinae; Rauvolfia", - "Reaumuria trigyna\t-\t1\t1\t11\t1091135\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Tamaricaceae; Reaumuria", - "Rhagoletis zephyria\tsnowberry fruit fly\t1\t5\t-\t28612\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Trypetinae; Carpomyini; Carpomyina; Rhagoletis", - "Rhinolophus ferrumequinum\tgreater horseshoe bat\t1\t2\t-\t59479\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Rhinolophidae; Rhinolophinae; Rhinolophus", - "Rhinopithecus bieti\tblack snub-nosed monkey\t1\t2\t-\t61621\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Colobinae; Rhinopithecus", - "Rhinopithecus roxellana\tgolden snub-nosed monkey\t1\t2\t-\t61622\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Colobinae; Rhinopithecus", - "Rhipicephalus sanguineus\tbrown dog tick\t1\t5\t-\t34632\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Rhipicephalinae; Rhipicephalus; Rhipicephalus; Rhipicephalus sanguineus group", - "Rhizoctonia solani AG-1 IA\t-\t1\t4\t-\t983506\tPLN\tcellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Basidiomycota; Agaricomycotina; Agaricomycetes; Agaricomycetes incertae sedis; Cantharellales; Ceratobasidiaceae; Rhizoctonia; Rhizoctonia solani", - "Rhodinia newara\t-\t1\t5\t-\t1579501\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Saturniini; Rhodinia", - "Ricinus communis\tcastor bean\t1\t1\t11\t3988\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Acalyphoideae; Acalypheae; Ricinus", - "Romanomermis culicivorax\t-\t1\t5\t-\t13658\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Enoplea; Dorylaimia; Mermithida; Mermithoidea; Mermithidae; Romanomermis", - "Rousettus aegyptiacus\tEgyptian rousette\t1\t2\t-\t9407\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Rousettus", - "Rubus hybrid cultivar\t-\t1\t1\t11\t564016\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Rosoideae; Rosoideae unplaced; Rubus", - "Rumex palustris\t-\t1\t1\t11\t50298\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Polygonaceae; Polygonoideae; Rumiceae; Rumex", - "Saccharomyces cerevisiae\tbaker's yeast\t1\t3\t-\t4932\tPLN\tcellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Ascomycota; saccharomyceta; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces", - "Saccharomyces pastorianus\t-\t1\t3\t-\t27292\tPLN\tcellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Ascomycota; saccharomyceta; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces", - "Saccoglossus kowalevskii\t-\t1\t9\t-\t10224\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Hemichordata; Enteropneusta; Harrimaniidae; Saccoglossus", - "Saimiri boliviensis boliviensis\tBolivian squirrel monkey\t1\t2\t-\t39432\tPRI\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Platyrrhini; Cebidae; Saimiriinae; Saimiri; Saimiri boliviensis", - "Salicornia europaea\t-\t1\t1\t11\t206448\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Salicornioideae; Salicornia", - "Salmo salar\tAtlantic salmon\t1\t2\t-\t8030\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Salmo", - "Salmonella enterica\t-\t11\t0\t-\t28901\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella", - "Salmonella enterica subsp. enterica serovar Typhi\t-\t11\t0\t-\t90370\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella; Salmonella enterica; Salmonella enterica subsp. enterica", - "Salmonella enterica subsp. enterica serovar Typhimurium\t-\t11\t0\t-\t90371\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella; Salmonella enterica; Salmonella enterica subsp. enterica", - "Salmonella enterica subsp. enterica serovar Typhimurium str. DT104\t-\t11\t0\t-\t85569\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella; Salmonella enterica; Salmonella enterica subsp. enterica; Salmonella enterica subsp. enterica serovar Typhimurium", - "Samia ricini\tIndian eri silkmoth\t1\t5\t-\t63990\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Attacini; Samia", - "Sarcophilus harrisii\tTasmanian devil\t1\t2\t-\t9305\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Metatheria; Dasyuromorphia; Dasyuridae; Sarcophilus", - "Sarsinebalia urgorrii\t-\t1\t5\t-\t1032695\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Phyllocarida; Leptostraca; Nebaliidae; Sarsinebalia", - "Saussurea involucrata\t-\t1\t1\t11\t200489\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Carduoideae; Cardueae; Carduinae; Saussurea", - "Sceliphron caementarium\t-\t1\t5\t-\t253855\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Sphecoidea; Sphecidae; Sphecinae; Sceliphrini; Sceliphrina; Sceliphron", - "Schistocephalus solidus\t-\t1\t9\t-\t70667\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Cestoda; Eucestoda; Diphyllobothriidea; Diphyllobothriidae; Schistocephalus", - "Schistosoma curassoni\t-\t1\t9\t-\t6186\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma", - "Schistosoma haematobium\t-\t1\t9\t-\t6185\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma", - "Schistosoma japonicum\t-\t1\t9\t-\t6182\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma", - "Schistosoma mansoni\t-\t1\t9\t-\t6183\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma", - "Schistosoma mattheei\t-\t1\t9\t-\t31246\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma", - "Schistosoma rodhaini\t-\t1\t9\t-\t6188\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma", - "Schmidtea mediterranea\t-\t1\t9\t-\t79327\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Geoplanoidea; Dugesiidae; Schmidtea", - "Sclerodactyla briareus\t-\t1\t9\t-\t7710\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Holothuroidea; Dendrochirotacea; Dendrochirotida; Sclerodactylidae; Sclerodactyla", - "Scleropages formosus\tAsian bonytongue\t1\t2\t-\t113540\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Osteoglossocephala; Osteoglossomorpha; Osteoglossiformes; Osteoglossidae; Scleropages", - "Sclerotinia homoeocarpa\t-\t1\t4\t-\t38483\tPLN\tcellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Ascomycota; saccharomyceta; Pezizomycotina; leotiomyceta; sordariomyceta; Leotiomycetes; Helotiales; Sclerotiniaceae; Sclerotinia", - "Scophthalmus maximus\tturbot\t1\t2\t-\t52904\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Carangaria; Pleuronectiformes; Pleuronectoidei; Scophthalmidae; Scophthalmus", - "Scylla olivacea\torange mud crab\t1\t5\t-\t85551\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Brachyura; Eubrachyura; Heterotremata; Portunoidea; Portunidae; Scylla", - "Sebastes nigrocinctus\ttiger rockfish\t1\t2\t-\t72089\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Perciformes; Scorpaenoidei; Sebastidae; Sebastinae; Sebastes", - "Secale cereale\trye\t1\t1\t11\t4550\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Secale", - "Sedum alfredii\t-\t1\t1\t11\t439688\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Saxifragales; Crassulaceae; Sedum", - "Selaginella moellendorffii\t-\t1\t1\t11\t88036\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Lycopodiidae; Selaginellales; Selaginellaceae; Selaginella", - "Serinus canaria\tcommon canary\t1\t2\t-\t9135\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Fringillidae; Carduelinae; Serinus", - "Sesamum indicum\tsesame\t1\t1\t11\t4182\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Pedaliaceae; Sesamum", - "Setaria italica\tfoxtail millet\t1\t1\t11\t4555\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Setaria", - "Shigella sonnei\t-\t11\t0\t-\t624\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Shigella", - "Silene latifolia\t-\t1\t1\t11\t37657\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Caryophyllaceae; Sileneae; Silene", - "Simian immunodeficiency virus\t-\t1\t0\t-\t11723\tVRL\tViruses; Retro-transcribing viruses; Retroviridae; Orthoretrovirinae; Lentivirus; Primate lentivirus group", - "Sinocyclocheilus angustiporus\t-\t1\t2\t-\t307947\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Sinocyclocheilus", - "Sinocyclocheilus anophthalmus\teyeless golden-line fish\t1\t2\t-\t307955\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Sinocyclocheilus", - "Sinocyclocheilus anshuiensis\t-\t1\t2\t-\t1608454\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Sinocyclocheilus", - "Sinocyclocheilus grahami\t-\t1\t2\t-\t75366\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Sinocyclocheilus", - "Sinocyclocheilus rhinocerous\t-\t1\t2\t-\t307959\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Sinocyclocheilus", - "Sinopodophyllum hexandrum\t-\t1\t1\t11\t93608\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; stem eudicotyledons; Ranunculales; Berberidaceae; Podophylloideae; Sinopodophyllum", - "Sipyloidea sipylus\t-\t1\t5\t-\t202427\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Phasmatodea; Verophasmatodea; Anareolatae; Diapheromeridae; Necrosciinae; Sipyloidea", - "soil metagenome\t-\t11\t2\t11\t410658\tENV\tunclassified sequences; metagenomes; ecological metagenomes", - "Solanum chacoense\tChaco potato\t1\t1\t11\t4108\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum", - "Solanum lycopersicum\ttomato\t1\t1\t11\t4081\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum; Lycopersicon", - "Solanum melongena\teggplant\t1\t1\t11\t4111\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum", - "Solanum pennellii\t-\t1\t1\t11\t28526\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum; Lycopersicon", - "Solanum torvum\t-\t1\t1\t11\t119830\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum", - "Solanum tuberosum\tpotato\t1\t1\t11\t4113\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum", - "Solenopsis invicta\tred fire ant\t1\t5\t-\t13686\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Solenopsidini; Solenopsis", - "Sorex araneus\tEuropean shrew\t1\t2\t-\t42254\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Insectivora; Soricidae; Soricinae; Sorex", - "Sorghum bicolor\tsorghum\t1\t1\t11\t4558\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Sorghinae; Sorghum", - "Speleonectes cf. tulumensis BMR-2011\t-\t1\t5\t-\t1032549\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Remipedia; Nectiopoda; Speleonectidae; Speleonectes", - "Sphaerechinus granularis\t-\t1\t9\t-\t39374\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Temnopleuroida; Toxopneustidae; Sphaerechinus", - "Sphaeroforma arctica JP610\t-\t1\t1\t-\t667725\tINV\tcellular organisms; Eukaryota; Opisthokonta; Opisthokonta incertae sedis; Ichthyosporea; Ichthyophonida; Sphaeroforma; Sphaeroforma arctica", - "Sphaeropthalma orestes\t-\t1\t5\t-\t374941\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Pompiloidea; Mutillidae; Sphaeropthalminae; Sphaeropthalma", - "Spinacia oleracea\tspinach\t1\t1\t11\t3562\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Chenopodioideae; Anserineae; Spinacia", - "Spirometra erinaceieuropaei\t-\t1\t9\t-\t99802\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Cestoda; Eucestoda; Diphyllobothriidea; Diphyllobothriidae; Spirometra", - "Spodoptera exigua\tbeet armyworm\t1\t5\t-\t7107\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Amphipyrinae; Spodoptera", - "Spodoptera frugiperda\tfall armyworm\t1\t5\t-\t7108\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Amphipyrinae; Spodoptera", - "Spumella vulgaris\t-\t1\t1\t11\t1117031\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; Chromulinales; Chromulinaceae; Spumella", - "Spumella-like flagellate JBC/S23\t-\t1\t1\t11\t293195\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; unclassified Chrysophyceae; Spumella-like flagellate JB", - "Spumella-like flagellate JBNZ39\t-\t1\t1\t11\t293202\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; unclassified Chrysophyceae; Spumella-like flagellate JB", - "Staphylococcus aureus\t-\t11\t0\t-\t1280\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Bacillales; Staphylococcaceae; Staphylococcus", - "Staphylococcus epidermidis\t-\t11\t0\t-\t1282\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Bacillales; Staphylococcaceae; Staphylococcus", - "Stegastes partitus\tbicolor damselfish\t1\t2\t-\t144197\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Ovalentaria incertae sedis; Pomacentridae; Stegastes", - "Stegodyphus mimosarum\t-\t1\t5\t-\t407821\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Eresoidea; Eresidae; Stegodyphus", - "Stenotrophomonas maltophilia\t-\t11\t0\t-\t40324\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Xanthomonadales; Xanthomonadaceae; Stenotrophomonas; Stenotrophomonas maltophilia group", - "Stigmatomma oregonense\t-\t1\t5\t-\t602440\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Amblyoponinae; Stigmatomma", - "Stomoxys calcitrans\tstable fly\t1\t5\t-\t35570\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Calyptratae; Muscoidea; Muscidae; Muscinae; Stomoxyini; Stomoxys", - "Streptococcus agalactiae\t-\t11\t0\t-\t1311\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus", - "Streptococcus equi subsp. equi\t-\t11\t0\t-\t148942\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus; Streptococcus dysgalactiae group; Streptococcus equi", - "Streptococcus pneumoniae\t-\t11\t0\t-\t1313\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus", - "Streptococcus suis\t-\t11\t0\t-\t1307\tBCT\tcellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus", - "Strongylocentrotus purpuratus\tpurple sea urchin\t1\t9\t-\t7668\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Strongylocentrotidae; Strongylocentrotus", - "Strongylus vulgaris\t-\t1\t5\t-\t40348\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Strongyloidea; Strongylidae; Strongylinae; Strongylus", - "Struthio camelus australis\t-\t1\t2\t-\t441894\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Palaeognathae; Struthioniformes; Struthionidae; Struthio; Struthio camelus", - "Stylophora pistillata\t-\t1\t4\t-\t50429\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Pocilloporidae; Stylophora", - "Sus scrofa\tpig\t1\t2\t-\t9823\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Suina; Suidae; Sus", - "Symbiodinium sp. A1\t-\t1\t4\t11\t503409\tPLN\tcellular organisms; Eukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Symbiodinium; Symbiodinium sp. clades; Symbiodinium sp. clade A", - "Symbiodinium sp. A2\t-\t1\t4\t11\t765178\tPLN\tcellular organisms; Eukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Symbiodinium; Symbiodinium sp. clades; Symbiodinium sp. clade A", - "Symbiodinium sp. B2\t-\t1\t4\t11\t154560\tPLN\tcellular organisms; Eukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Symbiodinium; Symbiodinium sp. clades; Symbiodinium sp. clade B", - "Symphylella vulgaris\t-\t1\t5\t-\t1288507\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Myriapoda; Symphyla; Scolopendrellidae; Symphylella", + "Marthasterias glacialis\tspiny starfish\t1\t9\t-\t7609\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Marthasterias", + "Maylandia zebra\tzebra mbuna\t1\t2\t-\t106582\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Maylandia; Maylandia zebra complex", + "Medauroidea extradentata\t-\t1\t5\t-\t614211\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Phasmatinae; Clitumnini; Medauroidea", + "Medicago\tmedics\t1\t1\t11\t3877\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Trifolieae; Medicago", + "Medicago sativa\t-\t1\t1\t11\t3879\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Trifolieae; Medicago", + "Medicago truncatula\tbarrel medic\t1\t1\t11\t3880\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Trifolieae; Medicago", + "Megaderma lyra\tIndian false vampire\t1\t2\t-\t9413\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Megadermatidae; Megaderma", + "Megaselia scalaris\t-\t1\t5\t-\t36166\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Platypezoidea; Phoridae; Megaseliini; Megaselia", + "Melampyrum\t-\t1\t1\t11\t52724\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Rhinantheae; Melampyrum", + "Melampyrum roseum\t-\t1\t1\t11\t321415\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Rhinantheae; Melampyrum", + "Meleagris gallopavo\tturkey\t1\t2\t-\t9103\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Meleagridinae; Meleagris", + "Meloidogyne\t-\t1\t5\t-\t189290\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Tylenchina; Tylenchomorpha; Tylenchoidea; Meloidogynidae; Meloidogyninae; Meloidogyne", + "Mengenilla moldrzyki\t-\t1\t5\t-\t1155016\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Strepsiptera; Mengenillidia; Mengenillidae; Mengenilla", + "Meriones unguiculatus\tMongolian gerbil\t1\t2\t-\t10047\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Gerbillinae; Meriones", + "Merops nubicus\tcarmine bee-eater\t1\t2\t-\t57421\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coraciiformes; Meropidae; Merops", + "Mesembryanthemum crystallinum\tcommon iceplant\t1\t1\t11\t3544\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Aizoaceae; Mesembryanthemum; Cryophytum", + "Mesitornis unicolor\tbrown roatelo\t1\t2\t-\t54374\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Mesitornithidae; Mesitornis", + "Mesocricetus auratus\tgolden hamster\t1\t2\t-\t10036\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Cricetidae; Cricetinae; Mesocricetus", + "Mesorhizobium\t-\t11\t0\t-\t68287\tBCT\tBacteria; Proteobacteria; Alphaproteobacteria; Rhizobiales; Phyllobacteriaceae; Mesorhizobium", + "Micoletzkya japonica\t-\t1\t5\t-\t1250332\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Diplogasteromorpha; Diplogasteroidea; Diplogasteridae; Micoletzkya", + "Microcebus murinus\tgray mouse lemur\t1\t2\t-\t30608\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Strepsirrhini; Lemuriformes; Cheirogaleidae; Microcebus", + "Microplitis demolitor\t-\t1\t5\t-\t69319\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Parasitoida; Ichneumonoidea; Braconidae; Microgastrinae; Microplitis", + "Micropterix calthella\t-\t1\t5\t-\t41027\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Zeugloptera; Micropterigidae; Micropterix", + "Micropterus floridanus\tFlorida bass\t1\t2\t-\t225391\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus", + "Micropterus salmoides\tlargemouth bass\t1\t2\t-\t27706\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus", + "Micropterus salmoides salmoides\tnorthern largemouth bass\t1\t2\t-\t489037\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus", + "Micrurus corallinus\tpainted coral snake\t1\t2\t-\t54390\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Elapidae; Elapinae; Micrurus", + "Micrurus lemniscatus carvalhoi\t-\t1\t2\t-\t129465\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Elapidae; Elapinae; Micrurus", + "Micrurus lemniscatus lemniscatus\t-\t1\t2\t-\t129467\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Elapidae; Elapinae; Micrurus", + "Mizuhopecten yessoensis\tYesso scallop\t1\t5\t-\t6573\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Pectinoida; Pectinoidea; Pectinidae; Mizuhopecten", + "Molgula\t-\t1\t13\t-\t27574\tINV\tEukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Molgulidae; Molgula", + "Molgula tectiformis\t-\t1\t13\t-\t30286\tINV\tEukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Molgulidae; Molgula", + "Momordica charantia\tbitter melon\t1\t1\t11\t3673\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Momordiceae; Momordica", + "Monomorium chinense\t-\t1\t5\t-\t482359\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Monomorium", + "Monomorium pharaonis\tpharaoh ant\t1\t5\t-\t307658\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Monomorium", + "Monosiga\t-\t1\t1\t-\t81525\tINV\tEukaryota; Choanoflagellida; Craspedida; Salpingoecidae; Monosiga", + "Monosiga ovata\t-\t1\t1\t-\t81526\tINV\tEukaryota; Choanoflagellida; Craspedida; Salpingoecidae; Monosiga", + "Morone chrysops\twhite bass\t1\t2\t-\t46259\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Moronidae; Morone", + "Morone saxatilis\tstriped sea-bass\t1\t2\t-\t34816\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Moronidae; Morone", + "Morus notabilis\t-\t1\t1\t11\t981085\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Moraceae; Morus", + "Mus\t-\t1\t2\t-\t10088\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Mus", + "Mus musculus\thouse mouse\t1\t2\t-\t10090\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Mus; Mus", + "Mus musculus domesticus\twestern European house mouse\t1\t2\t-\t10092\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Mus; Mus", + "Mus musculus molossinus\tJapanese wild mouse\t1\t2\t-\t57486\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Mus; Mus", + "Musa acuminata AAA Group\tdessert banana\t1\t1\t11\t214697\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Zingiberales; Musaceae; Musa", + "Musa acuminata subsp. malaccensis\twild Malaysian banana\t1\t1\t11\t214687\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Zingiberales; Musaceae; Musa", + "Musca domestica\thouse fly\t1\t5\t-\t7370\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Muscoidea; Muscidae; Musca", + "Mustela putorius furo\tdomestic ferret\t1\t2\t-\t9669\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Mustelidae; Mustelinae; Mustela", + "Mycobacterium tuberculosis\t-\t11\t0\t-\t1773\tBCT\tBacteria; Actinobacteria; Corynebacteriales; Mycobacteriaceae; Mycobacterium; Mycobacterium tuberculosis complex", + "Mycobacteroides abscessus subsp. abscessus\t-\t11\t0\t-\t1185650\tBCT\tBacteria; Actinobacteria; Corynebacteriales; Mycobacteriaceae; Mycobacteroides; Mycobacteroides abscessus", + "Myotis brandtii\tBrandt's bat\t1\t2\t-\t109478\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis", + "Myotis davidii\t-\t1\t2\t-\t225400\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis", + "Myotis ricketti\tRickett's big-footed Myotis\t1\t2\t-\t203696\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis", + "Myrmica rubra\t-\t1\t5\t-\t106198\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Myrmica", + "Mytilus galloprovincialis\tMediterranean mussel\t1\t5\t-\t29158\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Mytiloida; Mytiloidea; Mytilidae; Mytilinae; Mytilus", + "Nannospalax galili\tUpper Galilee mountains blind mole rat\t1\t2\t-\t1026970\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Spalacidae; Spalacinae; Nannospalax", + "Nasonia\t-\t1\t5\t-\t7424\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Parasitoida; Chalcidoidea; Pteromalidae; Pteromalinae; Nasonia", + "Nasonia vitripennis\tjewel wasp\t1\t5\t-\t7425\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Parasitoida; Chalcidoidea; Pteromalidae; Pteromalinae; Nasonia", + "Neisseria gonorrhoeae\t-\t11\t0\t-\t485\tBCT\tBacteria; Proteobacteria; Betaproteobacteria; Neisseriales; Neisseriaceae; Neisseria", + "Neisseria meningitidis\t-\t11\t0\t-\t487\tBCT\tBacteria; Proteobacteria; Betaproteobacteria; Neisseriales; Neisseriaceae; Neisseria", + "Nematostella\t-\t1\t4\t-\t45350\tINV\tEukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Edwardsiidae; Nematostella", + "Nematostella vectensis\tstarlet sea anemone\t1\t4\t-\t45351\tINV\tEukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Edwardsiidae; Nematostella", + "Neolamprologus brichardi\t-\t1\t2\t-\t32507\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Lamprologini; Neolamprologus", + "Neopelma\t-\t1\t2\t-\t114328\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Pipridae; Neopelma", + "Neopelma chrysocephalum\tsaffron-crested tyrant-manakin\t1\t2\t-\t114329\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Pipridae; Neopelma", + "Neurospora\t-\t1\t4\t-\t5140\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Sordariomycetidae; Sordariales; Sordariaceae; Neurospora", + "Neurospora crassa\t-\t1\t4\t-\t5141\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Sordariomycetidae; Sordariales; Sordariaceae; Neurospora", + "Nicotiana\t-\t1\t1\t11\t4085\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana", + "Nicotiana attenuata\t-\t1\t1\t11\t49451\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana", + "Nicotiana benthamiana\t-\t1\t1\t11\t4100\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana", + "Nicotiana sylvestris\twood tobacco\t1\t1\t11\t4096\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana", + "Nicotiana tabacum\tcommon tobacco\t1\t1\t11\t4097\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana", + "Nicotiana tomentosiformis\t-\t1\t1\t11\t4098\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana", + "Nilaparvata\t-\t1\t5\t-\t108930\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Auchenorrhyncha; Fulgoroidea; Delphacidae; Delphacinae; Nilaparvata", + "Nilaparvata lugens\tbrown planthopper\t1\t5\t-\t108931\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Auchenorrhyncha; Fulgoroidea; Delphacidae; Delphacinae; Nilaparvata", + "Nitella\t-\t1\t1\t11\t3148\tPLN\tEukaryota; Viridiplantae; Streptophyta; Charophyceae; Charales; Characeae; Nitella", + "Nitella hyalina\t-\t1\t1\t11\t181804\tPLN\tEukaryota; Viridiplantae; Streptophyta; Charophyceae; Charales; Characeae; Nitella", + "Noccaea caerulescens\t-\t1\t1\t11\t107243\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Coluteocarpeae; Noccaea", + "Notamacropus\t-\t1\t2\t-\t1960649\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Metatheria; Diprotodontia; Macropodidae; Notamacropus", + "Notamacropus eugenii\ttammar wallaby\t1\t2\t-\t9315\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Metatheria; Diprotodontia; Macropodidae; Notamacropus", + "Notechis scutatus\tmainland tiger snake\t1\t2\t-\t8663\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Elapidae; Acanthophiinae; Notechis", + "Nothobranchius\t-\t1\t2\t-\t28779\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Nothobranchiidae; Nothobranchius", + "Nothobranchius furzeri\tturquoise killifish\t1\t2\t-\t105023\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Nothobranchiidae; Nothobranchius", + "Numida meleagris\thelmeted guineafowl\t1\t2\t-\t8996\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Numididae; Numida", + "Ochotona princeps\tAmerican pika\t1\t2\t-\t9978\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Lagomorpha; Ochotonidae; Ochotona", + "Octodon degus\tdegu\t1\t2\t-\t10160\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Hystricomorpha; Octodontidae; Octodon", + "Octopus bimaculoides\t-\t1\t5\t-\t37653\tINV\tEukaryota; Metazoa; Lophotrochozoa; Mollusca; Cephalopoda; Coleoidea; Neocoleoidea; Octopodiformes; Octopoda; Incirrata; Octopodidae; Octopus", + "Oenococcus oeni\t-\t11\t0\t-\t1247\tBCT\tBacteria; Firmicutes; Bacilli; Lactobacillales; Leuconostocaceae; Oenococcus", + "Oikopleura\t-\t1\t5\t-\t34763\tINV\tEukaryota; Metazoa; Chordata; Tunicata; Appendicularia; Copelata; Oikopleuridae; Oikopleura", + "Oikopleura dioica\t-\t1\t5\t-\t34765\tINV\tEukaryota; Metazoa; Chordata; Tunicata; Appendicularia; Copelata; Oikopleuridae; Oikopleura", + "Olavius algarvensis\t-\t1\t5\t-\t188229\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Oligochaeta; Haplotaxida; Tubificina; Naididae; Phallodrilinae; Olavius", + "Olea europaea\tcommon olive\t1\t1\t11\t4146\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Oleaceae; Oleeae; Olea", + "Olea europaea var. sylvestris\t-\t1\t1\t11\t158386\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Oleaceae; Oleeae; Olea", + "Oncorhynchus\t-\t1\t2\t-\t8016\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus", + "Oncorhynchus kisutch\tcoho salmon\t1\t2\t-\t8019\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus", + "Oncorhynchus masou masou\tcherry salmon\t1\t2\t-\t90313\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus", + "Oncorhynchus mykiss\trainbow trout\t1\t2\t-\t8022\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus", + "Oncorhynchus tshawytscha\tChinook salmon\t1\t2\t-\t74940\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus", + "Ophiocoma echinata\t-\t1\t9\t-\t331088\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Ophiuroidea; Ophiuridea; Ophiurida; Ophiurina; Gnathophiurina; Ophiocomidae; Ophiocoma", + "Opisthorchis viverrini\t-\t1\t9\t-\t6198\tINV\tEukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Opisthorchiida; Opisthorchiata; Opisthorchiidae; Opisthorchis", + "Oreochromis\t-\t1\t2\t-\t8139\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Oreochromini; Oreochromis", + "Oreochromis niloticus\tNile tilapia\t1\t2\t-\t8128\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Oreochromini; Oreochromis", + "Ornithorhynchus anatinus\tplatypus\t1\t2\t-\t9258\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Monotremata; Ornithorhynchidae; Ornithorhynchus", + "Orobanche\t-\t1\t1\t11\t36747\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Orobancheae; Orobanche", + "Orobanche minor\t-\t1\t1\t11\t36748\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Orobancheae; Orobanche", + "Orycteropus afer afer\t-\t1\t2\t-\t1230840\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Afrotheria; Tubulidentata; Orycteropodidae; Orycteropus", + "Oryctolagus cuniculus\trabbit\t1\t2\t-\t9986\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Lagomorpha; Leporidae; Oryctolagus", + "Oryza\t-\t1\t1\t11\t4527\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", + "Oryza alta\t-\t1\t1\t11\t52545\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", + "Oryza australiensis\t-\t1\t1\t11\t4532\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", + "Oryza brachyantha\tmalo sina\t1\t1\t11\t4533\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", + "Oryza coarctata\t-\t1\t1\t11\t77588\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", + "Oryza glaberrima\tAfrican rice\t1\t1\t11\t4538\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", + "Oryza longistaminata\t-\t1\t1\t11\t4528\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", + "Oryza meyeriana var. granulata\t-\t1\t1\t11\t110450\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza meyeriana", + "Oryza minuta\t-\t1\t1\t11\t63629\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", + "Oryza officinalis\t-\t1\t1\t11\t4535\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", + "Oryza ridleyi\t-\t1\t1\t11\t83308\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", + "Oryza rufipogon\t-\t1\t1\t11\t4529\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza", + "Oryza sativa\trice\t1\t1\t11\t4530\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa", + "Oryza sativa f. spontanea\t-\t1\t1\t11\t4536\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa", + "Oryza sativa Indica Group\tlong-grained rice\t1\t1\t11\t39946\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa", + "Oryza sativa Japonica Group\tJapanese rice\t1\t1\t11\t39947\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa", + "Oryzias\t-\t1\t2\t-\t8089\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Beloniformes; Adrianichthyidae; Oryziinae; Oryzias", + "Oryzias latipes\tJapanese medaka\t1\t2\t-\t8090\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Beloniformes; Adrianichthyidae; Oryziinae; Oryzias", + "Osmia cornuta\t-\t1\t5\t-\t185587\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Megachilidae; Megachilinae; Osmia", + "Otolemur garnettii\tsmall-eared galago\t1\t2\t-\t30611\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Strepsirrhini; Lorisiformes; Galagidae; Otolemur", + "Ovis\t-\t1\t2\t-\t9935\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Ovis", + "Ovis aries\tsheep\t1\t2\t-\t9940\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Ovis", + "Pachypsylla venusta\thackberry petiole gall psyllid\t1\t5\t-\t38123\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Psylloidea; Aphalaridae; Pachypsylla", + "Pagrus\t-\t1\t2\t-\t8172\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Spariformes; Sparidae; Pagrus", + "Pagrus major\tred seabream\t1\t2\t-\t143350\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Spariformes; Sparidae; Pagrus", + "Pan troglodytes\tchimpanzee\t1\t2\t-\t9598\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Pan", + "Pan troglodytes troglodytes\t-\t1\t2\t-\t37011\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Pan", + "Pan troglodytes verus\t-\t1\t2\t-\t37012\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Pan", + "Panagrolaimus\t-\t1\t5\t-\t55784\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Tylenchina; Panagrolaimomorpha; Panagrolaimoidea; Panagrolaimidae; Panagrolaimus", + "Panagrolaimus davidi\t-\t1\t5\t-\t227884\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Tylenchina; Panagrolaimomorpha; Panagrolaimoidea; Panagrolaimidae; Panagrolaimus", + "Panax ginseng\t-\t1\t1\t11\t4054\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Apiales; Araliaceae; Panax", + "Panicum\t-\t1\t1\t11\t4539\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Panicinae; Panicum", + "Panicum hallii\t-\t1\t1\t11\t206008\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Panicinae; Panicum; Panicum sect. Panicum", + "Panicum hallii var. hallii\t-\t1\t1\t11\t1504633\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Panicinae; Panicum; Panicum sect. Panicum", + "Panicum virgatum\tswitchgrass\t1\t1\t11\t38727\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Panicinae; Panicum; Panicum sect. Hiantes", + "Panthera pardus\tleopard\t1\t2\t-\t9691\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Pantherinae; Panthera", + "Papaver somniferum\topium poppy\t1\t1\t11\t3469\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Ranunculales; Papaveraceae; Papaveroideae; Papaver", + "Papilio\t-\t1\t5\t-\t7145\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Papilionoidea; Papilionidae; Papilioninae; Papilio", + "Papilio polytes\tcommon Mormon\t1\t5\t-\t76194\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Papilionoidea; Papilionidae; Papilioninae; Papilio", + "Papilio xuthus\tAsian swallowtail\t1\t5\t-\t66420\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Papilionoidea; Papilionidae; Papilioninae; Papilio", + "Papio\tbaboons\t1\t2\t-\t9554\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Papio", + "Papio anubis\tolive baboon\t1\t2\t-\t9555\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Papio", + "Paracentrotus\t-\t1\t9\t-\t7655\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Echinidae; Paracentrotus", + "Paracentrotus lividus\tcommon urchin\t1\t9\t-\t7656\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Echinidae; Paracentrotus", + "Paramecium tetraurelia\t-\t6\t4\t11\t5888\tINV\tEukaryota; Alveolata; Ciliophora; Intramacronucleata; Oligohymenophorea; Peniculida; Parameciidae; Paramecium", + "Parasteatoda tepidariorum\tcommon house spider\t1\t5\t-\t114398\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Araneoidea; Theridiidae; Parasteatoda", + "Patiria miniata\tbat star\t1\t9\t-\t46514\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Valvatida; Asterinidae; Patiria", + "Patiria pectinifera\t-\t1\t9\t-\t7594\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Valvatida; Asterinidae; Patiria", + "Pedicularis\tlouseworts\t1\t1\t11\t43174\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Pedicularideae; Pedicularis", + "Pedicularis keiskei\t-\t1\t1\t11\t1392158\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Pedicularideae; Pedicularis", + "Pelecanus crispus\tDalmatian pelican\t1\t2\t-\t36300\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Pelecanidae; Pelecanus", + "Pelodiscus sinensis\tChinese soft-shelled turtle\t1\t2\t-\t13735\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Trionychia; Trionychidae; Pelodiscus", + "Penaeus\t-\t1\t5\t-\t133894\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Dendrobranchiata; Penaeoidea; Penaeidae; Penaeus", + "Penaeus monodon\tblack tiger shrimp\t1\t5\t-\t6687\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Dendrobranchiata; Penaeoidea; Penaeidae; Penaeus", + "Penaeus vannamei\tPacific white shrimp\t1\t5\t-\t6689\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Dendrobranchiata; Penaeoidea; Penaeidae; Penaeus", + "Penium margaritaceum\t-\t1\t1\t11\t102169\tPLN\tEukaryota; Viridiplantae; Streptophyta; Zygnemophyceae; Desmidiales; Peniaceae; Penium", + "Peromyscus\t-\t1\t2\t-\t10040\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Cricetidae; Neotominae; Peromyscus", + "Peromyscus maniculatus bairdii\tprairie deer mouse\t1\t2\t-\t230844\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Cricetidae; Neotominae; Peromyscus", + "Persicaria minor\t-\t1\t1\t11\t488003\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Polygonaceae; Polygonoideae; Persicarieae; Persicaria", + "Petrolisthes\t-\t1\t5\t-\t84661\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Anomura; Galatheoidea; Porcellanidae; Petrolisthes", + "Petrolisthes cinctipes\tflat porcelain crab\t1\t5\t-\t88211\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Anomura; Galatheoidea; Porcellanidae; Petrolisthes", + "Petromyzon\t-\t1\t2\t-\t7756\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Cyclostomata; Hyperoartia; Petromyzontiformes; Petromyzontidae; Petromyzon", + "Petromyzon marinus\tsea lamprey\t1\t2\t-\t7757\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Cyclostomata; Hyperoartia; Petromyzontiformes; Petromyzontidae; Petromyzon", + "Petunia integrifolia subsp. inflata\t-\t1\t1\t11\t212142\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Petunioideae; Petunia", + "Phaeodactylum\t-\t1\t1\t11\t2849\tPLN\tEukaryota; Stramenopiles; Bacillariophyta; Bacillariophyceae; Bacillariophycidae; Naviculales; Phaeodactylaceae; Phaeodactylum", + "Phaeodactylum tricornutum\t-\t1\t1\t11\t2850\tPLN\tEukaryota; Stramenopiles; Bacillariophyta; Bacillariophyceae; Bacillariophycidae; Naviculales; Phaeodactylaceae; Phaeodactylum", + "Phaethon lepturus\tWhite-tailed tropicbird\t1\t2\t-\t97097\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Phaethontidae; Phaethon", + "Phalacrocorax carbo\tgreat cormorant\t1\t2\t-\t9209\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Phalacrocoracidae; Phalacrocorax", + "Phalaenopsis aphrodite\t-\t1\t1\t11\t212056\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae; Epidendroideae; Vandeae; Aeridinae; Phalaenopsis", + "Phalaenopsis equestris\t-\t1\t1\t11\t78828\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae; Epidendroideae; Vandeae; Aeridinae; Phalaenopsis", + "Phaseolus\t-\t1\t1\t11\t3883\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Phaseolus", + "Phaseolus coccineus\t-\t1\t1\t11\t3886\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Phaseolus", + "Phaseolus vulgaris\t-\t1\t1\t11\t3885\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Phaseolus", + "Phoca largha\tspotted seal\t1\t2\t-\t39090\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Phocidae; Phoca", + "Phoenix dactylifera\tdate palm\t1\t1\t11\t42345\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Arecaceae; Coryphoideae; Phoeniceae; Phoenix", + "Photinus pyralis\tcommon eastern firefly\t1\t5\t-\t7054\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Coleoptera; Polyphaga; Elateriformia; Elateroidea; Lampyridae; Lampyrinae; Photinus", + "Physalis peruviana\t-\t1\t1\t11\t126903\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Physaleae; Physalis", + "Physcomitrella\t-\t1\t1\t11\t3217\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Bryophyta; Bryophytina; Bryopsida; Funariidae; Funariales; Funariaceae; Physcomitrella", + "Physcomitrella patens\t-\t1\t1\t11\t3218\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Bryophyta; Bryophytina; Bryopsida; Funariidae; Funariales; Funariaceae; Physcomitrella", + "Physeter catodon\tsperm whale\t1\t2\t-\t9755\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Physeteridae; Physeter", + "Phytophthora\t-\t1\t1\t11\t4783\tPLN\tEukaryota; Stramenopiles; Oomycetes; Peronosporales; Peronosporaceae; Phytophthora", + "Phytophthora infestans\tpotato late blight agent\t1\t1\t11\t4787\tPLN\tEukaryota; Stramenopiles; Oomycetes; Peronosporales; Peronosporaceae; Phytophthora", + "Picea\t-\t1\t1\t11\t3328\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Picea", + "Picea glauca\twhite spruce\t1\t1\t11\t3330\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Picea", + "Picea sitchensis\tSitka spruce\t1\t1\t11\t3332\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Picea", + "Piliocolobus tephrosceles\tUgandan red Colobus\t1\t2\t-\t591936\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Colobinae; Piliocolobus", + "Pimephales\t-\t1\t2\t-\t51137\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Pimephales", + "Pimephales promelas\tfathead minnow\t1\t2\t-\t90988\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Pimephales", + "Pinus\t-\t1\t1\t11\t139271\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Pinus; Pinus", + "Pinus sylvestris\tScots pine\t1\t1\t11\t3349\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Pinus; Pinus", + "Pinus taeda\tloblolly pine\t1\t1\t11\t3352\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Pinus; Pinus", + "Pisum sativum\tpea\t1\t1\t11\t3888\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Fabeae; Pisum", + "Pisum sativum subsp. sativum\t-\t1\t1\t11\t208194\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Fabeae; Pisum", + "Plasmodium\t-\t1\t4\t11\t5820\tINV\tEukaryota; Alveolata; Apicomplexa; Aconoidasida; Haemosporida; Plasmodiidae; Plasmodium", + "Plasmodium falciparum\tmalaria parasite P. falciparum\t1\t4\t11\t5833\tINV\tEukaryota; Alveolata; Apicomplexa; Aconoidasida; Haemosporida; Plasmodiidae; Plasmodium; Plasmodium (Laverania)", + "Platynereis dumerilii\tDumeril's clam worm\t1\t5\t-\t6359\tINV\tEukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Palpata; Aciculata; Phyllodocida; Nereididae; Platynereis", + "Plutella xylostella\tdiamondback moth\t1\t5\t-\t51655\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Yponomeutoidea; Plutellidae; Plutella", + "Poeciliopsis prolifica\tblackstripe livebearer\t1\t2\t-\t188132\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Poeciliidae; Poeciliinae; Poeciliopsis", + "Pogonomyrmex californicus\t-\t1\t5\t-\t144036\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Pogonomyrmex", + "Pongo abelii\tSumatran orangutan\t1\t2\t-\t9601\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Pongo", + "Populus\tpoplars\t1\t1\t11\t3689\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Salicaceae; Saliceae; Populus", + "Populus euphratica\tEuphrates poplar\t1\t1\t11\t75702\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Salicaceae; Saliceae; Populus", + "Populus trichocarpa\tblack cottonwood\t1\t1\t11\t3694\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Salicaceae; Saliceae; Populus", + "Porphyridium\t-\t1\t1\t11\t2791\tPLN\tEukaryota; Rhodophyta; Bangiophyceae; Porphyridiales; Porphyridiaceae; Porphyridium", + "Porphyridium purpureum\t-\t1\t1\t11\t35688\tPLN\tEukaryota; Rhodophyta; Bangiophyceae; Porphyridiales; Porphyridiaceae; Porphyridium", + "Priapulus caudatus\t-\t1\t5\t-\t37621\tINV\tEukaryota; Metazoa; Ecdysozoa; Scalidophora; Priapulida; Priapulimorpha; Priapulimorphida; Priapulidae; Priapulus", + "Pristionchus\t-\t1\t5\t-\t54125\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Diplogasteromorpha; Diplogasteroidea; Neodiplogasteridae; Pristionchus", + "Pristionchus mayeri\t-\t1\t5\t-\t1317129\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Diplogasteromorpha; Diplogasteroidea; Neodiplogasteridae; Pristionchus", + "Pristionchus pacificus\t-\t1\t5\t-\t54126\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Diplogasteromorpha; Diplogasteroidea; Neodiplogasteridae; Pristionchus", + "Proasellus aragonensis\t-\t1\t5\t-\t1281939\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus arthrodilus\t-\t1\t5\t-\t1281940\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus assaforensis\t-\t1\t5\t-\t1282049\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus beticus\t-\t1\t5\t-\t1281946\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus cantabricus\t-\t1\t5\t-\t1281948\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus cavaticus\t-\t1\t5\t-\t1281949\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus coiffaiti\t-\t1\t5\t-\t1281953\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus coxalis\t-\t1\t5\t-\t63229\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus ebrensis\t-\t1\t5\t-\t1281961\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus escolai\t-\t1\t5\t-\t1281963\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus grafi\t-\t1\t5\t-\t1281973\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus granadensis\t-\t1\t5\t-\t1281974\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus hercegovinensis\t-\t1\t5\t-\t1281977\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus ibericus\t-\t1\t5\t-\t1281981\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus jaloniacus\t-\t1\t5\t-\t1281986\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus karamani\t-\t1\t5\t-\t1281987\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus margalefi\t-\t1\t5\t-\t1281998\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus meridianus\t-\t1\t5\t-\t1282001\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus ortizi\t-\t1\t5\t-\t1282012\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus parvulus\t-\t1\t5\t-\t1282015\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus racovitzai\t-\t1\t5\t-\t1282023\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus rectus\t-\t1\t5\t-\t1282025\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Proasellus solanasi\t-\t1\t5\t-\t1282031\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus", + "Procambarus clarkii\tred swamp crayfish\t1\t5\t-\t6728\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Astacoidea; Cambaridae; Procambarus", + "Protobothrops mucrosquamatus\t-\t1\t2\t-\t103944\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Viperidae; Crotalinae; Protobothrops", + "Protophormia terraenovae\tnorthern blowfly\t1\t5\t-\t34676\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Oestroidea; Calliphoridae; Chrysomyinae; Protophormia", + "Prunus\t-\t1\t1\t11\t3754\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Amygdaleae; Prunus", + "Prunus armeniaca\tapricot\t1\t1\t11\t36596\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Amygdaleae; Prunus", + "Prunus persica\tpeach\t1\t1\t11\t3760\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Amygdaleae; Prunus", + "Pseudodiploria strigosa\t-\t1\t4\t-\t1428006\tINV\tEukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Faviina; Mussidae; Faviinae; Pseudodiploria", + "Pseudomonas aeruginosa\t-\t11\t0\t-\t287\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Pseudomonadaceae; Pseudomonas", + "Pseudomonas sp. 286\t-\t11\t0\t-\t1705700\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Pseudomonadaceae; Pseudomonas", + "Pseudorca crassidens\tfalse killer whale\t1\t2\t-\t82174\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Delphinidae; Pseudorca", + "Pterocles gutturalis\tyellow-throated sandgrouse\t1\t2\t-\t240206\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Ciconiiformes; Pteroclidae; Pterocles", + "Pteronotus parnellii\tParnell's mustached bat\t1\t2\t-\t59476\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Mormoopidae; Pteronotus", + "Pteropus alecto\tblack flying fox\t1\t2\t-\t9402\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Pteropus", + "Pteropus vampyrus\tlarge flying fox\t1\t2\t-\t132908\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Pteropus", + "Ptychodera flava\t-\t1\t9\t-\t63121\tINV\tEukaryota; Metazoa; Hemichordata; Enteropneusta; Ptychoderidae; Ptychodera", + "Pundamilia nyererei\t-\t1\t2\t-\t303518\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Pundamilia", + "Punica granatum\tpomegranate\t1\t1\t11\t22663\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Lythraceae; Punica", + "Pygocentrus nattereri\tred-bellied piranha\t1\t2\t-\t42514\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Characiformes; Characoidei; Pygocentrus", + "Pyrus x bretschneideri\tChinese white pear\t1\t1\t11\t225117\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Maleae; Pyrus", + "Pythium\t-\t1\t1\t11\t4797\tPLN\tEukaryota; Stramenopiles; Oomycetes; Pythiales; Pythiaceae; Pythium", + "Pythium ultimum DAOM BR144\t-\t1\t1\t11\t431595\tPLN\tEukaryota; Stramenopiles; Oomycetes; Pythiales; Pythiaceae; Pythium", + "Python bivittatus\tBurmese python\t1\t2\t-\t176946\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Henophidia; Pythonidae; Python", + "Quercus\t-\t1\t1\t11\t3511\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Quercus", + "Quercus robur\t-\t1\t1\t11\t38942\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Quercus", + "Quercus suber\t-\t1\t1\t11\t58331\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Quercus", + "Ramazzottius varieornatus\t-\t1\t5\t-\t947166\tINV\tEukaryota; Metazoa; Ecdysozoa; Tardigrada; Eutardigrada; Parachela; Ramazzottiidae; Ramazzottius", + "Ramulus artemis\t-\t1\t5\t-\t1390046\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Phasmatinae; Clitumnini; Ramulus", + "Rana catesbeiana\tAmerican bullfrog\t1\t2\t-\t8400\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Neobatrachia; Ranoidea; Ranidae; Rana; Aquarana", + "Raphanus\t-\t1\t1\t11\t3725\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Raphanus", + "Raphanus sativus\tradish\t1\t1\t11\t3726\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Raphanus", + "Rattus\t-\t1\t2\t-\t10114\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Rattus", + "Rattus norvegicus\tNorway rat\t1\t2\t-\t10116\tROD\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Rattus", + "Rauvolfia serpentina\tserpentwood\t1\t1\t11\t4060\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Apocynaceae; Rauvolfioideae; Vinceae; Rauvolfiinae; Rauvolfia", + "Reaumuria trigyna\t-\t1\t1\t11\t1091135\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Tamaricaceae; Reaumuria", + "Rhagoletis zephyria\tsnowberry fruit fly\t1\t5\t-\t28612\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Tephritoidea; Tephritidae; Rhagoletis", + "Rhincodon typus\twhale shark\t1\t2\t-\t259920\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Chondrichthyes; Elasmobranchii; Galeomorphii; Galeoidea; Orectolobiformes; Rhincodontidae; Rhincodon", + "Rhinolophus sinicus\tChinese rufous horseshoe bat\t1\t2\t-\t89399\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Rhinolophidae; Rhinolophinae; Rhinolophus", + "Rhinopithecus bieti\tblack snub-nosed monkey\t1\t2\t-\t61621\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Colobinae; Rhinopithecus", + "Rhinopithecus roxellana\tgolden snub-nosed monkey\t1\t2\t-\t61622\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Colobinae; Rhinopithecus", + "Rhipicephalus\t-\t1\t5\t-\t34630\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Rhipicephalinae; Rhipicephalus", + "Rhipicephalus microplus\tsouthern cattle tick\t1\t5\t-\t6941\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Rhipicephalinae; Rhipicephalus; Boophilus", + "Rhizobiales bacterium\t-\t11\t0\t-\t1909294\tBCT\tBacteria; Proteobacteria; Alphaproteobacteria; Rhizobiales", + "Ricinus communis\tcastor bean\t1\t1\t11\t3988\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Acalyphoideae; Acalypheae; Ricinus", + "Saccharomyces cerevisiae\tbaker's yeast\t1\t3\t-\t4932\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces", + "Saccharomyces pastorianus\t-\t1\t3\t-\t27292\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces", + "Saccharum\t-\t1\t1\t11\t4546\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Saccharinae; Saccharum", + "Saccharum hybrid cultivar SP80-3280\t-\t1\t1\t11\t193079\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Saccharinae; Saccharum; Saccharum officinarum complex", + "Saccoglossus\t-\t1\t5\t-\t10222\tINV\tEukaryota; Metazoa; Hemichordata; Enteropneusta; Harrimaniidae; Saccoglossus", + "Saccoglossus kowalevskii\t-\t1\t9\t-\t10224\tINV\tEukaryota; Metazoa; Hemichordata; Enteropneusta; Harrimaniidae; Saccoglossus", + "Saimiri boliviensis boliviensis\tBolivian squirrel monkey\t1\t2\t-\t39432\tPRI\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Platyrrhini; Cebidae; Saimiriinae; Saimiri", + "Salicornia europaea\t-\t1\t1\t11\t206448\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Salicornioideae; Salicornia; Salicornia subg. Salicornia", + "Salmo\t-\t1\t2\t-\t8028\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Salmo", + "Salmo salar\tAtlantic salmon\t1\t2\t-\t8030\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Salmo", + "Salmonella enterica\t-\t11\t0\t-\t28901\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella", + "Salmonella enterica subsp. enterica serovar Typhi\t-\t11\t0\t-\t90370\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella", + "Salmonella enterica subsp. enterica serovar Typhimurium\t-\t11\t0\t-\t90371\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella", + "Salvelinus alpinus\tArctic char\t1\t2\t-\t8036\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Salvelinus", + "Sarcophilus harrisii\tTasmanian devil\t1\t2\t-\t9305\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Metatheria; Dasyuromorphia; Dasyuridae; Sarcophilus", + "Schistosoma\t-\t1\t9\t-\t6181\tINV\tEukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma", + "Schistosoma japonicum\t-\t1\t9\t-\t6182\tINV\tEukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma", + "Schistosoma mansoni\t-\t1\t9\t-\t6183\tINV\tEukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma", + "Schizosaccharomyces\t-\t1\t4\t-\t4895\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Taphrinomycotina; Schizosaccharomycetes; Schizosaccharomycetales; Schizosaccharomycetaceae; Schizosaccharomyces", + "Schizosaccharomyces pombe\tfission yeast\t1\t4\t-\t4896\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Taphrinomycotina; Schizosaccharomycetes; Schizosaccharomycetales; Schizosaccharomycetaceae; Schizosaccharomyces", + "Schmidtea mediterranea\t-\t1\t9\t-\t79327\tINV\tEukaryota; Metazoa; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Geoplanoidea; Dugesiidae; Schmidtea", + "Scylla olivacea\torange mud crab\t1\t5\t-\t85551\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Brachyura; Eubrachyura; Portunoidea; Portunidae; Scylla", + "Selaginella\t-\t1\t1\t11\t3246\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Lycopodiopsida; Selaginellales; Selaginellaceae; Selaginella", + "Selaginella moellendorffii\t-\t1\t1\t11\t88036\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Lycopodiopsida; Selaginellales; Selaginellaceae; Selaginella", + "Serinus canaria\tcommon canary\t1\t2\t-\t9135\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Fringillidae; Carduelinae; Serinus", + "Seriola dumerili\tgreater amberjack\t1\t2\t-\t41447\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Carangaria; Carangiformes; Carangidae; Seriola", + "Seriola lalandi dorsalis\t-\t1\t2\t-\t1841481\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Carangaria; Carangiformes; Carangidae; Seriola", + "Serratia marcescens\t-\t11\t0\t-\t615\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Yersiniaceae; Serratia", + "Sesamum indicum\tsesame\t1\t1\t11\t4182\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Pedaliaceae; Sesamum", + "Setaria\t-\t1\t1\t11\t4554\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Setaria", + "Setaria italica\tfoxtail millet\t1\t1\t11\t4555\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Setaria", + "Setaria viridis\t-\t1\t1\t11\t4556\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Setaria", + "Shigella flexneri\t-\t11\t0\t-\t623\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Shigella", + "Shigella sonnei\t-\t11\t0\t-\t624\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Shigella", + "Sinocyclocheilus angustiporus\t-\t1\t2\t-\t307947\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Sinocyclocheilus", + "Sinocyclocheilus anophthalmus\teyeless golden-line fish\t1\t2\t-\t307955\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Sinocyclocheilus", + "Sinocyclocheilus anshuiensis\t-\t1\t2\t-\t1608454\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Sinocyclocheilus", + "Sinocyclocheilus grahami\t-\t1\t2\t-\t75366\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Sinocyclocheilus", + "Sinocyclocheilus rhinocerous\t-\t1\t2\t-\t307959\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Sinocyclocheilus", + "Sipyloidea sipylus\t-\t1\t5\t-\t202427\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Phasmatodea; Verophasmatodea; Anareolatae; Lonchodidae; Necrosciinae; Sipyloidea", + "Sisymbrium\t-\t1\t1\t11\t3729\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Sisymbrieae; Sisymbrium", + "Sisymbrium irio\t-\t1\t1\t11\t3730\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Sisymbrieae; Sisymbrium", + "Solanum\t-\t1\t1\t11\t4107\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum", + "Solanum lycopersicum\ttomato\t1\t1\t11\t4081\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum; Lycopersicon", + "Solanum melongena\teggplant\t1\t1\t11\t4111\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum", + "Solanum phureja\t-\t1\t1\t11\t172790\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum", + "Solanum torvum\t-\t1\t1\t11\t119830\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum", + "Solanum tuberosum\tpotato\t1\t1\t11\t4113\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum", + "Solenopsis invicta\tred fire ant\t1\t5\t-\t13686\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Solenopsis", + "Sorex araneus\tEuropean shrew\t1\t2\t-\t42254\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Eulipotyphla; Soricidae; Soricinae; Sorex", + "Sorghum\t-\t1\t1\t11\t4557\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Sorghinae; Sorghum", + "Sorghum bicolor\tsorghum\t1\t1\t11\t4558\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Sorghinae; Sorghum", + "Sparus aurata\tgilthead seabream\t1\t2\t-\t8175\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Spariformes; Sparidae; Sparus", + "Speleonectes cf. tulumensis BMR-2011\t-\t1\t5\t-\t1032549\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Remipedia; Nectiopoda; Speleonectidae; Speleonectes", + "Sphaerechinus granularis\t-\t1\t9\t-\t39374\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Temnopleuroida; Toxopneustidae; Sphaerechinus", + "Spinacia oleracea\tspinach\t1\t1\t11\t3562\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Chenopodioideae; Anserineae; Spinacia", + "Spirometra erinaceieuropaei\t-\t1\t9\t-\t99802\tINV\tEukaryota; Metazoa; Platyhelminthes; Cestoda; Eucestoda; Diphyllobothriidea; Diphyllobothriidae; Spirometra", + "Spodoptera\t-\t1\t5\t-\t7106\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Amphipyrinae; Spodoptera", + "Spodoptera exigua\tbeet armyworm\t1\t5\t-\t7107\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Amphipyrinae; Spodoptera", + "Spodoptera frugiperda\tfall armyworm\t1\t5\t-\t7108\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Amphipyrinae; Spodoptera", + "Squalus acanthias\tspiny dogfish\t1\t2\t-\t7797\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Chondrichthyes; Elasmobranchii; Squalimorphii; Squaliformes; Squalidae; Squalus", + "Staphylococcus aureus\t-\t11\t0\t-\t1280\tBCT\tBacteria; Firmicutes; Bacilli; Bacillales; Staphylococcaceae; Staphylococcus", + "Staphylococcus epidermidis\t-\t11\t0\t-\t1282\tBCT\tBacteria; Firmicutes; Bacilli; Bacillales; Staphylococcaceae; Staphylococcus", + "Stegodyphus mimosarum\t-\t1\t5\t-\t407821\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Eresoidea; Eresidae; Stegodyphus", + "Stenotrophomonas maltophilia\t-\t11\t0\t-\t40324\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Xanthomonadales; Xanthomonadaceae; Stenotrophomonas; Stenotrophomonas maltophilia group", + "Streptococcus agalactiae\t-\t11\t0\t-\t1311\tBCT\tBacteria; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus", + "Streptococcus pneumoniae\t-\t11\t0\t-\t1313\tBCT\tBacteria; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus", + "Streptococcus suis\t-\t11\t0\t-\t1307\tBCT\tBacteria; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus", + "Strongylocentrotus\t-\t1\t9\t-\t7664\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Strongylocentrotidae; Strongylocentrotus", + "Strongylocentrotus purpuratus\tpurple sea urchin\t1\t9\t-\t7668\tINV\tEukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Strongylocentrotidae; Strongylocentrotus", + "Stylophora pistillata\t-\t1\t4\t-\t50429\tINV\tEukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Pocilloporidae; Stylophora", + "Sus\t-\t1\t2\t-\t9822\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; Suidae; Sus", + "Sus scrofa\tpig\t1\t2\t-\t9823\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; Suidae; Sus", + "Symbiodinium sp. A1\t-\t1\t4\t11\t503409\tPLN\tEukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Symbiodinium; Symbiodinium sp. clade A", + "Symbiodinium sp. B2\t-\t1\t4\t11\t154560\tPLN\tEukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Symbiodinium; Symbiodinium sp. clade B", "synthetic construct\t-\t11\t0\t11\t32630\tSYN\tother sequences; artificial sequences", - "Synura sp. LO234KE\t-\t1\t1\t11\t1825120\tPLN\tcellular organisms; Eukaryota; Stramenopiles; Synurophyceae; Synurales; Mallomonadaceae; Synura", - "Taeniopygia guttata\tzebra finch\t1\t2\t-\t59729\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Estrildidae; Estrildinae; Taeniopygia", - "Takifugu rubripes\ttorafugu\t1\t2\t-\t31033\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Tetraodontiformes; Tetraodontoidei; Tetradontoidea; Tetraodontidae; Takifugu", - "Tarenaya hassleriana\t-\t1\t1\t11\t28532\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Cleomaceae; Tarenaya", - "Tauraco erythrolophus\tred-crested turaco\t1\t2\t-\t121530\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Musophagiformes; Musophagidae; Tauraco", - "Taxus wallichiana var. chinensis\t-\t1\t1\t11\t29808\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Cupressales; Taxaceae; Taxus; Taxus wallichiana", - "Telenomus podisi\t-\t1\t5\t-\t408256\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Platygastroidea; Scelionidae; Telenominae; Telenomus", - "Teleogryllus commodus\t-\t1\t5\t-\t672150\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Orthoptera; Ensifera; Grylloidea; Gryllidae; Gryllinae; Teleogryllus", - "Teleopsis dalmanni\t-\t1\t5\t-\t139649\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Diopsoidea; Diopsidae; Teleopsis", - "Teleopsis whitei\t-\t1\t5\t-\t139651\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Diopsoidea; Diopsidae; Teleopsis", - "Termitomyces clypeatus MTCC 5091\t-\t1\t4\t-\t1282671\tPLN\tcellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Basidiomycota; Agaricomycotina; Agaricomycetes; Agaricomycetidae; Agaricales; Lyophyllaceae; Termitomyces; Termitomyces clypeatus", - "Tetramorium bicarinatum\t-\t1\t5\t-\t219812\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Tetramoriini; Tetramorium", - "Tetranychus urticae\ttwo-spotted spider mite\t1\t5\t-\t32264\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Acariformes; Trombidiformes; Prostigmata; Eleutherengona; Raphignathae; Tetranychoidea; Tetranychidae; Tetranychus", - "Tetraodon nigroviridis\tspotted green pufferfish\t1\t2\t-\t99883\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Tetraodontiformes; Tetraodontoidei; Tetradontoidea; Tetraodontidae; Tetraodon", - "Tetraselmis subcordiformis\t-\t1\t1\t11\t3161\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Chlorophyta; Chlorodendrophyceae; Chlorodendrales; Chlorodendraceae; Tetraselmis", - "Thamnophis sirtalis\t-\t1\t2\t-\t35019\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Colubridae; Natricinae; Thamnophis", - "Theobroma cacao\tcacao\t1\t1\t11\t3641\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Byttnerioideae; Theobroma", - "Thermobia domestica\tfirebrat\t1\t5\t-\t89055\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Zygentoma; Lepismatidae; Thermobia", - "Tigriopus californicus\t-\t1\t5\t-\t6832\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Podoplea; Harpacticoida; Harpacticidae; Tigriopus", - "Tinamus guttatus\twhite-throated tinamou\t1\t2\t-\t94827\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Palaeognathae; Tinamiformes; Tinamidae; Tinamus", - "Trachemys scripta elegans\t-\t1\t2\t-\t31138\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Testudines; Cryptodira; Durocryptodira; Testudinoidea; Emydidae; Trachemys; Trachemys scripta", - "Trachymyrmex cornetzi\t-\t1\t5\t-\t471704\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Attini; Trachymyrmex", - "Trematomus bernacchii\temerald rockcod\t1\t2\t-\t40690\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Perciformes; Notothenioidei; Nototheniidae; Trematomus", - "Trichechus manatus latirostris\tFlorida manatee\t1\t2\t-\t127582\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Sirenia; Trichechidae; Trichechus; Trichechus manatus", - "Trichinella spiralis\t-\t1\t5\t-\t6334\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Enoplea; Dorylaimia; Trichocephalida; Trichinellidae; Trichinella", - "Trichobilharzia regenti\t-\t1\t9\t-\t157069\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Trichobilharzia", - "Trichomonas vaginalis G3\t-\t1\t0\t11\t412133\tINV\tcellular organisms; Eukaryota; Parabasalia; Trichomonadida; Trichomonadidae; Trichomonas; Trichomonas vaginalis", - "Trichoplusia ni\tcabbage looper\t1\t5\t-\t7111\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Plusiinae; Trichoplusia", - "Trifolium pratense\t-\t1\t1\t11\t57577\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium", - "Tripterygion delaisi\t-\t1\t2\t-\t57862\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Blenniimorphae; Blenniiformes; Blennioidei; Tripterygiidae; Tripterygiinae; Tripterygion", - "Tripterygium wilfordii\t-\t1\t1\t11\t458696\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Celastrales; Celastraceae; Tripterygium", - "Triticum aestivum\tbread wheat\t1\t1\t11\t4565\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum", - "Triticum turgidum\t-\t1\t1\t11\t4571\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum", - "Triticum urartu\t-\t1\t1\t11\t4572\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum", - "Trypanosoma cruzi strain CL Brener\t-\t1\t4\t11\t353153\tINV\tcellular organisms; Eukaryota; Euglenozoa; Kinetoplastida; Trypanosomatidae; Trypanosoma; Schizotrypanum; Trypanosoma cruzi", - "Tupaia chinensis\tChinese tree shrew\t1\t2\t-\t246437\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Scandentia; Tupaiidae; Tupaia", - "Turritopsis sp. SK-2016\t-\t1\t4\t-\t1784781\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Filifera; Oceaniidae; Turritopsis", - "Tursiops truncatus\tbottlenosed dolphin\t1\t2\t-\t9739\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Delphinidae; Tursiops", - "Tyto alba\tbarn owl\t1\t2\t-\t56313\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Strigiformes; Tytonidae; Tyto", - "uncultured archaeon\t-\t11\t0\t-\t115547\tENV\tcellular organisms; Archaea; environmental samples", - "uncultured bacterium\t-\t11\t0\t-\t77133\tENV\tcellular organisms; Bacteria; environmental samples", - "uncultured eukaryote\t-\t1\t1\t11\t100272\tENV\tcellular organisms; Eukaryota; environmental samples", - "uncultured fungus\t-\t1\t4\t-\t175245\tENV\tcellular organisms; Eukaryota; Opisthokonta; Fungi; environmental samples", + "Taeniopygia\t-\t1\t2\t-\t59728\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Estrildidae; Estrildinae; Taeniopygia", + "Taeniopygia guttata\tzebra finch\t1\t2\t-\t59729\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Estrildidae; Estrildinae; Taeniopygia", + "Takifugu\t-\t1\t2\t-\t31032\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Tetraodontiformes; Tetradontoidea; Tetraodontidae; Takifugu", + "Takifugu rubripes\ttorafugu\t1\t2\t-\t31033\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Tetraodontiformes; Tetradontoidea; Tetraodontidae; Takifugu", + "Tauraco erythrolophus\tred-crested turaco\t1\t2\t-\t121530\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Musophagiformes; Musophagidae; Tauraco", + "Teladorsagia circumcincta\t-\t1\t5\t-\t45464\tINV\tEukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Strongylida; Trichostrongyloidea; Haemonchidae; Teladorsagia", + "Teleopsis dalmanni\t-\t1\t5\t-\t139649\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Diopsoidea; Diopsidae; Teleopsis", + "Terrapene mexicana triunguis\tThree-toed box turtle\t1\t2\t-\t1415176\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Durocryptodira; Testudinoidea; Emydidae; Terrapene", + "Tetragonula carbonaria\t-\t1\t5\t-\t148810\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Tetragonula", + "Tetrahymena\t-\t6\t4\t11\t5890\tINV\tEukaryota; Alveolata; Ciliophora; Intramacronucleata; Oligohymenophorea; Hymenostomatida; Tetrahymenina; Tetrahymenidae; Tetrahymena", + "Tetrahymena thermophila\t-\t6\t4\t11\t5911\tINV\tEukaryota; Alveolata; Ciliophora; Intramacronucleata; Oligohymenophorea; Hymenostomatida; Tetrahymenina; Tetrahymenidae; Tetrahymena", + "Tetranychus urticae\ttwo-spotted spider mite\t1\t5\t-\t32264\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Acari; Acariformes; Trombidiformes; Prostigmata; Eleutherengona; Raphignathae; Tetranychoidea; Tetranychidae; Tetranychus", + "Tetraodon nigroviridis\tspotted green pufferfish\t1\t2\t-\t99883\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Tetraodontiformes; Tetradontoidea; Tetraodontidae; Tetraodon", + "Theobroma\t-\t1\t1\t11\t3640\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Byttnerioideae; Theobroma", + "Theobroma cacao\tcacao\t1\t1\t11\t3641\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Byttnerioideae; Theobroma", + "Tinamus guttatus\twhite-throated tinamou\t1\t2\t-\t94827\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Palaeognathae; Tinamiformes; Tinamidae; Tinamus", + "Toxoplasma\t-\t1\t4\t4\t5810\tINV\tEukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Sarcocystidae; Toxoplasma", + "Toxoplasma gondii\t-\t1\t4\t4\t5811\tINV\tEukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Sarcocystidae; Toxoplasma", + "Trachemys scripta elegans\t-\t1\t2\t-\t31138\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Durocryptodira; Testudinoidea; Emydidae; Trachemys", + "Trematomus bernacchii\temerald rockcod\t1\t2\t-\t40690\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Perciformes; Notothenioidei; Nototheniidae; Trematomus", + "Tribolium\t-\t1\t1\t11\t89526\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Danthonioideae; Danthonieae; Tribolium", + "Tribolium castaneum\tred flour beetle\t1\t5\t-\t7070\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Coleoptera; Polyphaga; Cucujiformia; Tenebrionidae; Tenebrionidae incertae sedis; Tribolium", + "Trichechus manatus latirostris\tFlorida manatee\t1\t2\t-\t127582\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Afrotheria; Sirenia; Trichechidae; Trichechus", + "Trichoderma\t-\t1\t4\t-\t5543\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Hypocreomycetidae; Hypocreales; Hypocreaceae; Trichoderma", + "Trichomonas vaginalis G3\t-\t1\t0\t11\t412133\tINV\tEukaryota; Parabasalia; Trichomonadida; Trichomonadidae; Trichomonas", + "Trichoplusia ni\tcabbage looper\t1\t5\t-\t7111\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Plusiinae; Trichoplusia", + "Trichosurus\tbrush-tailed possums\t1\t2\t-\t9336\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Metatheria; Diprotodontia; Phalangeridae; Trichosurus", + "Trichosurus vulpecula\tcommon brushtail\t1\t2\t-\t9337\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Metatheria; Diprotodontia; Phalangeridae; Trichosurus", + "Trifolium\t-\t1\t1\t11\t3898\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Trifolieae; Trifolium", + "Trifolium pratense\t-\t1\t1\t11\t57577\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Trifolieae; Trifolium", + "Triphysaria\t-\t1\t1\t11\t64092\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Pedicularideae; Castillejinae; Triphysaria", + "Tripterygion delaisi\t-\t1\t2\t-\t57862\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Blenniimorphae; Blenniiformes; Blennioidei; Tripterygiidae; Tripterygiinae; Tripterygion", + "Triticum\t-\t1\t1\t11\t4564\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum", + "Triticum aestivum\tbread wheat\t1\t1\t11\t4565\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum", + "Triticum turgidum\t-\t1\t1\t11\t4571\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum", + "Triticum urartu\t-\t1\t1\t11\t4572\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum", + "Trypanosoma\t-\t1\t4\t11\t5690\tINV\tEukaryota; Euglenozoa; Kinetoplastida; Trypanosomatidae; Trypanosoma", + "Trypanosoma brucei\t-\t1\t4\t11\t5691\tINV\tEukaryota; Euglenozoa; Kinetoplastida; Trypanosomatidae; Trypanosoma", + "Trypanosoma cruzi\t-\t1\t4\t11\t5693\tINV\tEukaryota; Euglenozoa; Kinetoplastida; Trypanosomatidae; Trypanosoma; Schizotrypanum", + "Trypoxylus dichotomus\t-\t1\t5\t-\t273928\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Coleoptera; Polyphaga; Scarabaeiformia; Scarabaeidae; Dynastinae; Trypoxylus", + "Tuber\ttruffles\t1\t4\t-\t36048\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Pezizomycetes; Pezizales; Tuberaceae; Tuber", + "Tuber melanosporum\tPerigord truffle\t1\t4\t-\t39416\tPLN\tEukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Pezizomycetes; Pezizales; Tuberaceae; Tuber", + "Tupaia\t-\t1\t2\t-\t9394\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Scandentia; Tupaiidae; Tupaia", + "Tupaia chinensis\tChinese tree shrew\t1\t2\t-\t246437\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Scandentia; Tupaiidae; Tupaia", + "Turritopsis sp. SK-2016\t-\t1\t4\t-\t1784781\tINV\tEukaryota; Metazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Filifera; Oceaniidae; Turritopsis", + "Tursiops truncatus\tbottlenose dolphin\t1\t2\t-\t9739\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Delphinidae; Tursiops", + "Tyto alba\tBarn owl\t1\t2\t-\t56313\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Strigiformes; Tytonidae; Tyto", + "uncultured archaeon\t-\t11\t0\t-\t115547\tENV\tArchaea; environmental samples", + "uncultured bacterium\t-\t11\t0\t-\t77133\tENV\tBacteria; environmental samples", + "uncultured eukaryote\t-\t1\t1\t11\t100272\tENV\tEukaryota; environmental samples", + "uncultured fungus\t-\t1\t4\t-\t175245\tENV\tEukaryota; Fungi; environmental samples", "uncultured microorganism\t-\t11\t2\t11\t358574\tENV\tunclassified sequences; environmental samples", - "uncultured Neocallimastigales\t-\t1\t0\t-\t325898\tENV\tcellular organisms; Eukaryota; Opisthokonta; Fungi; Neocallimastigomycota; Neocallimastigomycetes; Neocallimastigales; environmental samples", + "uncultured Neocallimastigales\t-\t1\t0\t-\t325898\tENV\tEukaryota; Fungi; Fungi incertae sedis; Chytridiomycota; Neocallimastigomycetes; Neocallimastigales; environmental samples", "uncultured organism\t-\t11\t2\t11\t155900\tENV\tunclassified sequences; environmental samples", - "uncultured prokaryote\t-\t11\t0\t11\t198431\tENV\tunclassified sequences; environmental samples; prokaryotic environmental samples", - "uncultured Pseudomonas sp.\t-\t11\t0\t-\t114707\tENV\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Pseudomonadaceae; Pseudomonas; environmental samples", - "uncultured soil bacterium\t-\t11\t0\t-\t164851\tENV\tcellular organisms; Bacteria; environmental samples", + "uncultured prokaryote\t-\t11\t0\t11\t198431\tENV\tunclassified sequences; environmental samples", "unidentified\t-\t1\t2\t11\t32644\tUNA\tunclassified sequences", - "unidentified bacterium\t-\t11\t0\t-\t1826778\tBCT\tcellular organisms; Bacteria; unclassified Bacteria; unclassified Bacteria (miscellaneous)", - "Ursus maritimus\tpolar bear\t1\t2\t-\t29073\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Ursidae; Ursus", - "Vaccinium macrocarpon\t-\t1\t1\t11\t13750\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Ericaceae; Vaccinioideae; Vaccinieae; Vaccinium", - "Vibrio cholerae\t-\t11\t0\t-\t666\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; Vibrio", - "Vibrio parahaemolyticus\t-\t11\t0\t-\t670\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; Vibrio; Vibrio harveyi group", - "Vicia faba\tfava bean\t1\t1\t11\t3906\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Fabeae; Vicia", - "Vicugna pacos\talpaca\t1\t2\t-\t30538\tMAM\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Vicugna", - "Vigna angularis\tadzuki bean\t1\t1\t11\t3914\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Vigna", - "Vigna radiata\t-\t1\t1\t11\t157791\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Vigna", - "Vigna radiata var. radiata\tmung bean\t1\t1\t11\t3916\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Vigna; Vigna radiata", - "Villosa lienosa\t-\t1\t5\t-\t326719\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Palaeoheterodonta; Unionoida; Unionoidea; Unionidae; Unioninae; Villosa", - "Vitis vinifera\twine grape\t1\t1\t11\t29760\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; rosids incertae sedis; Vitales; Vitaceae; Vitis", - "Vollenhovia emeryi\t-\t1\t5\t-\t411798\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Stenammini; Vollenhovia", - "Wasmannia auropunctata\tlittle fire ant\t1\t5\t-\t64793\tINV\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Blepharidattini; Wasmannia", - "Withania somnifera\t-\t1\t1\t11\t126910\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Physaleae; Withania", - "Xanthomonas citri pv. citri\t-\t11\t0\t-\t434928\tBCT\tcellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Xanthomonadales; Xanthomonadaceae; Xanthomonas; Xanthomonas citri group; Xanthomonas citri", - "Xenopus laevis\tAfrican clawed frog\t1\t2\t-\t8355\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Pipoidea; Pipidae; Xenopodinae; Xenopus; Xenopus", - "Xenopus tropicalis\ttropical clawed frog\t1\t2\t-\t8364\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Pipoidea; Pipidae; Xenopodinae; Xenopus; Silurana", - "Xiphophorus maculatus\tsouthern platyfish\t1\t2\t-\t8083\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Xiphophorus", - "Zantedeschia aethiopica\t-\t1\t1\t11\t69721\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Alismatales; Araceae; Philodendroideae; Zantedeschieae; Zantedeschia", - "Zea mays\t-\t1\t1\t11\t4577\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Tripsacinae; Zea", - "Zea mays subsp. mays\tmaize\t1\t1\t11\t381124\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Tripsacinae; Zea; Zea mays", - "Ziziphus jujuba\t-\t1\t1\t11\t326968\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rhamnaceae; Paliureae; Ziziphus", - "Zonotrichia albicollis\twhite-throated sparrow\t1\t2\t-\t44394\tVRT\tcellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passerellidae; Zonotrichia", - "Zostera noltei\t-\t1\t1\t11\t55326\tPLN\tcellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Alismatales; Zosteraceae; Zostera" + "Ursus maritimus\tpolar bear\t1\t2\t-\t29073\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Ursidae; Ursus", + "Ustilago\t-\t1\t4\t-\t5269\tPLN\tEukaryota; Fungi; Dikarya; Basidiomycota; Ustilaginomycotina; Ustilaginomycetes; Ustilaginales; Ustilaginaceae; Ustilago", + "Ustilago maydis\t-\t1\t4\t-\t5270\tPLN\tEukaryota; Fungi; Dikarya; Basidiomycota; Ustilaginomycotina; Ustilaginomycetes; Ustilaginales; Ustilaginaceae; Ustilago", + "Vibrio cholerae\t-\t11\t0\t-\t666\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; Vibrio", + "Vibrio parahaemolyticus\t-\t11\t0\t-\t670\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; Vibrio", + "Vicugna pacos\talpaca\t1\t2\t-\t30538\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Vicugna", + "Vigna\t-\t1\t1\t11\t3913\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Vigna", + "Vigna angularis\tadzuki bean\t1\t1\t11\t3914\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Vigna", + "Vigna unguiculata\tcowpea\t1\t1\t11\t3917\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Vigna", + "Vitis\t-\t1\t1\t11\t3603\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; Vitales; Vitaceae; Viteae; Vitis", + "Vitis rotundifolia\t-\t1\t1\t11\t103349\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; Vitales; Vitaceae; Viteae; Vitis", + "Vitis vinifera\twine grape\t1\t1\t11\t29760\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; Vitales; Vitaceae; Viteae; Vitis", + "Volvox\t-\t1\t1\t11\t3066\tPLN\tEukaryota; Viridiplantae; Chlorophyta; Chlorophyceae; Chlamydomonadales; Volvocaceae; Volvox", + "Volvox carteri f. nagariensis\t-\t1\t1\t11\t3068\tPLN\tEukaryota; Viridiplantae; Chlorophyta; Chlorophyceae; Chlamydomonadales; Volvocaceae; Volvox", + "Vulpes vulpes\tred fox\t1\t2\t-\t9627\tMAM\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Canidae; Vulpes", + "Wasmannia auropunctata\tlittle fire ant\t1\t5\t-\t64793\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Wasmannia", + "Xanthomonas\t-\t11\t0\t-\t338\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Xanthomonadales; Xanthomonadaceae; Xanthomonas", + "Xanthomonas oryzae pv. oryzae\t-\t11\t0\t-\t64187\tBCT\tBacteria; Proteobacteria; Gammaproteobacteria; Xanthomonadales; Xanthomonadaceae; Xanthomonas", + "Xenopus\t-\t1\t2\t-\t8353\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Pipoidea; Pipidae; Xenopodinae; Xenopus", + "Xenopus laevis\tAfrican clawed frog\t1\t2\t-\t8355\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Pipoidea; Pipidae; Xenopodinae; Xenopus; Xenopus", + "Xenopus tropicalis\ttropical clawed frog\t1\t2\t-\t8364\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Pipoidea; Pipidae; Xenopodinae; Xenopus; Silurana", + "Zantedeschia aethiopica\t-\t1\t1\t11\t69721\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Araceae; Philodendroideae; Zantedeschieae; Zantedeschia", + "Zea\t-\t1\t1\t11\t4575\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Tripsacinae; Zea", + "Zea mays\t-\t1\t1\t11\t4577\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Tripsacinae; Zea", + "Zea mays subsp. mays\tmaize\t1\t1\t11\t381124\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Tripsacinae; Zea", + "Zonotrichia albicollis\twhite-throated sparrow\t1\t2\t-\t44394\tVRT\tEukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passerellidae; Zonotrichia", + "Zootermopsis nevadensis\t-\t1\t5\t-\t136037\tINV\tEukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Dictyoptera; Blattodea; Blattoidea; Termitoidae; Termopsidae; Zootermopsis", + "Zostera\t-\t1\t1\t11\t27257\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Zosteraceae; Zostera", + "Zostera marina\t-\t1\t1\t11\t29655\tPLN\tEukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Zosteraceae; Zostera" }; diff --git a/c++/src/objects/seqfeat/common_tax.txt b/c++/src/objects/seqfeat/common_tax.txt index 5e0a3622..206deffc 100644 --- a/c++/src/objects/seqfeat/common_tax.txt +++ b/c++/src/objects/seqfeat/common_tax.txt @@ -1,900 +1,900 @@ -Acacia koa - 1 1 11 468172 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Mimosoideae; Acacieae; Acacia -Acanthisitta chloris rifleman 1 2 - 57068 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Acanthisittidae; Acanthisitta -Acanthoscurria geniculata - 1 5 - 575412 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Araneae; Mygalomorphae; Theraphosidae; Acanthoscurria -Acinetobacter baumannii - 11 0 - 470 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Moraxellaceae; Acinetobacter; Acinetobacter calcoaceticus/baumannii complex -Acinetobacter pittii - 11 0 - 48296 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Moraxellaceae; Acinetobacter; Acinetobacter calcoaceticus/baumannii complex -Acinonyx jubatus cheetah 1 2 - 32536 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Acinonychinae; Acinonyx -Acropora cervicornis - 1 4 - 6130 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora -Acropora digitifera - 1 4 - 70779 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora -Acropora millepora - 1 4 - 45264 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora -Actias selene Indian moon moth 1 5 - 37776 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Saturniini; Actias -Acyrthosiphon pisum pea aphid 1 5 - 7029 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidiformes; Aphidomorpha; Aphidoidea; Aphididae; Aphidinae; Macrosiphini; Acyrthosiphon -Adineta vaga - 1 5 - 104782 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Rotifera; Bdelloidea; Adinetida; Adinetidae; Adineta -Aedes aegypti yellow fever mosquito 1 5 - 7159 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Culicomorpha; Culicoidea; Culicidae; Culicinae; Aedini; Aedes; Stegomyia -Aedes albopictus Asian tiger mosquito 1 5 - 7160 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Culicomorpha; Culicoidea; Culicidae; Culicinae; Aedini; Aedes; Stegomyia -Aegilops tauschii - 1 1 11 37682 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Aegilops -Aethina tumida small hive beetle 1 5 - 116153 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Cucujoidea; Nitidulidae; Nitidulinae; Aethina -Agapanthus praecox subsp. orientalis - 1 1 11 547170 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Amaryllidaceae; Agapanthoideae; Agapanthus; Agapanthus praecox -Agave deserti - 1 1 11 382119 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Asparagaceae; Agavoideae; Agave -Agave tequilana - 1 1 11 386106 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Asparagaceae; Agavoideae; Agave -Agrilus planipennis emerald ash borer 1 5 - 224129 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Elateriformia; Buprestoidea; Buprestidae; Agrilinae; Agrilus -Agrotis segetum turnip moth 1 5 - 47767 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Noctuinae; Agrotis -Ailuropoda melanoleuca giant panda 1 2 - 9646 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Ursidae; Ailuropoda -Alexandrium tamarense - 1 4 11 2926 PLN cellular organisms; Eukaryota; Alveolata; Dinophyceae; Gonyaulacales; Gonyaulacaceae; Alexandrium -Alligator mississippiensis American alligator 1 2 - 8496 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Crocodylia; Alligatoridae; Alligatorinae; Alligator -Alligator sinensis Chinese alligator 1 2 - 38654 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Crocodylia; Alligatoridae; Alligatorinae; Alligator -Allium cepa onion 1 1 11 4679 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Amaryllidaceae; Allioideae; Allieae; Allium -Allium fistulosum Welsh onion 1 1 11 35875 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Amaryllidaceae; Allioideae; Allieae; Allium -Allium sativum garlic 1 1 11 4682 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Amaryllidaceae; Allioideae; Allieae; Allium -Amaranthus tricolor - 1 1 11 29722 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Amaranthaceae; Amaranthus -Amazona vittata Puerto Rican parrot 1 2 - 241585 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Psittaciformes; Psittacidae; Amazona -Ameiurus nebulosus brown bullhead 1 2 - 27778 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Characiphysae; Siluriformes; Siluroidei; Ictaluridae; Ameiurus -Amoebidium parasiticum JAP-7-2 - 1 4 - 1069442 INV cellular organisms; Eukaryota; Opisthokonta; Opisthokonta incertae sedis; Ichthyosporea; Ichthyophonida; Amoebidiaceae; Amoebidium; Amoebidium parasiticum -Anas platyrhynchos mallard 1 2 - 8839 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Anseriformes; Anatidae; Anas -Ancylostoma ceylanicum - 1 5 - 53326 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Ancylostomatoidea; Ancylostomatidae; Ancylostomatinae; Ancylostoma -Ancylostoma duodenale - 1 5 - 51022 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Ancylostomatoidea; Ancylostomatidae; Ancylostomatinae; Ancylostoma -Anguilla anguilla European eel 1 2 - 7936 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Elopocephalai; Elopocephala; Elopomorpha; Anguilliformes; Anguillidae; Anguilla -Anguilla japonica Japanese eel 1 2 - 7937 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Elopocephalai; Elopocephala; Elopomorpha; Anguilliformes; Anguillidae; Anguilla -Anisakis simplex herring worm 1 5 - 6269 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Ascaridida; Ascaridoidea; Anisakidae; Anisakis; Anisakis simplex complex -Anneissia japonica - 1 9 - 1529436 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Pelmatozoa; Crinoidea; Articulata; Comatulida; Comatulidae; Comatulinae; Anneissia -Annulipalpia sp. AD-2013 - 1 5 - 1499517 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Trichoptera; Annulipalpia; Unclassified Annulipalpia -Anolis carolinensis green anole 1 2 - 28377 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Iguania; Iguanidae; Polychrotinae; Anolis -Anopheles funestus African malaria mosquito 1 5 - 62324 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Culicomorpha; Culicoidea; Culicidae; Anophelinae; Anopheles; Cellia; Myzomyia; funestus group; funestus subgroup -Anopheles gambiae African malaria mosquito 1 5 - 7165 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Culicomorpha; Culicoidea; Culicidae; Anophelinae; Anopheles; Cellia; Pyretophorus; gambiae species complex -Anopheles sinensis - 1 5 - 74873 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Culicomorpha; Culicoidea; Culicidae; Anophelinae; Anopheles; Anopheles; Laticorn; Myzorhynchus; hyrcanus group -Anoplophora glabripennis Asian longhorned beetle 1 5 - 217634 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Chrysomeloidea; Cerambycidae; Lamiinae; Lamiini; Anoplophora -Anoplopoma fimbria sablefish 1 2 - 229290 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Perciformes; Cottioidei; Anoplopomatales; Anoplopomatidae; Anoplopoma -Anser cygnoides domesticus - 1 2 - 381198 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Anseriformes; Anatidae; Anser; Anser cygnoides -Antheraea assama Indian muga silkmoth 1 5 - 91021 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Saturniini; Antheraea -Antheraea pernyi Chinese oak silkmoth 1 5 - 7119 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Saturniini; Antheraea -Antheraea yamamai Japanese oak silkmoth 1 5 - 7121 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Saturniini; Antheraea -Anthopleura elegantissima clonal anemone 1 4 - 6110 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Nynantheae; Actiniidae; Anthopleura -Anthoxanthum odoratum - 1 1 11 29661 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 1 (Aveneae type); Anthoxanthinae; Anthoxanthum -Anthurium andraeanum - 1 1 11 226677 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Alismatales; Araceae; Pothoideae; Potheae; Anthurium -Aotus nancymaae Ma's night monkey 1 2 - 37293 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Platyrrhini; Aotidae; Aotus -Apaloderma vittatum bar-tailed trogon 1 2 - 57397 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Trogoniformes; Trogonidae; Apaloderma -Aphelocoma californica obscura - 1 2 - 947029 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Corvoidea; Corvidae; Aphelocoma; Aphelocoma californica -Aphyosemion striatum - 1 2 - 60296 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Aphyosemion -Apis cerana Asiatic honeybee 1 5 - 7461 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apinae; Apini; Apis -Apis florea little honeybee 1 5 - 7463 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apinae; Apini; Apis -Apis mellifera honey bee 1 5 - 7460 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apinae; Apini; Apis -Aplysia californica California sea hare 1 5 - 6500 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Euopisthobranchia; Aplysiomorpha; Aplysioidea; Aplysiidae; Aplysia -Apostichopus japonicus Japanese sea cucumber 1 9 - 307972 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Holothuroidea; Aspidochirotacea; Aspidochirotida; Stichopodidae; Apostichopus -Apteryx australis mantelli - 1 2 - 202946 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Palaeognathae; Apterygiformes; Apterygidae; Apteryx; Apteryx australis -Aquila chrysaetos canadensis - 1 2 - 216574 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Accipitridae; Accipitrinae; Aquila; Aquila chrysaetos -Ara macao scarlet macaw 1 2 - 176014 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Psittaciformes; Psittacidae; Ara -Arabidopsis lyrata subsp. lyrata - 1 1 11 81972 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Arabidopsis; Arabidopsis lyrata -Arabidopsis thaliana thale cress 1 1 11 3702 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Arabidopsis -Arachis duranensis - 1 1 11 130453 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Dalbergieae; Arachis -Arachis hypogaea peanut 1 1 11 3818 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Dalbergieae; Arachis -Arachis hypogaea var. vulgaris - 1 1 11 925390 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Dalbergieae; Arachis; Arachis hypogaea; Arachis hypogaea subsp. fastigiata -Arachis ipaensis - 1 1 11 130454 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Dalbergieae; Arachis -Araucaria cunninghamii - 1 1 11 56994 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Araucariales; Araucariaceae; Araucaria -Archaeopsylla erinacei - 1 5 - 48909 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Siphonaptera; Pulicomorpha; Pulicoidea; Pulicidae; Archaeopsyllinae; Archaeopsylla -Aretaon asperrimus thorny stick insect 1 5 - 173775 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Phasmatodea; Verophasmatodea; Areolatae; Bacilloidea; Heteropterygidae; Obriminae; Obrimini; Aretaon -Argochrysis armilla - 1 5 - 1317734 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Chrysidoidea; Chrysididae; Chrysidinae; Chrysidini; Argochrysis -Argulus siamensis - 1 5 - 1167309 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Branchiura; Arguloida; Argulidae; Argulus -Arion vulgaris - 1 5 - 1028688 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Eupulmonata; Stylommatophora; Sigmurethra; Arionoidea; Arionidae; Arion -Artemisia annua sweet wormwood 1 1 11 35608 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Anthemideae; Artemisiinae; Artemisia -artificial sequences - 11 0 11 81077 SYN other sequences -Arundo donax giant reed 1 1 11 35708 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Arundinoideae; Arundineae; Arundo -Ascaris suum pig roundworm 1 5 - 6253 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Ascaridida; Ascaridoidea; Ascarididae; Ascaris -Aspergillus oryzae - 1 4 - 5062 PLN cellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Ascomycota; saccharomyceta; Pezizomycotina; leotiomyceta; Eurotiomycetes; Eurotiomycetidae; Eurotiales; Aspergillaceae; Aspergillus -Aspidistra saxicola - 1 1 11 1197444 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Asparagaceae; Nolinoideae; Aspidistra -Astacus astacus broad-fingered crayfish 1 5 - 6715 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Astacoidea; Astacidae; Astacus -Astacus leptodactylus narrow-clawed crayfish 1 5 - 6717 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Astacoidea; Astacidae; Astacus -Asterias amurensis - 1 9 - 7602 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Asterias -Asterias forbesi Forbes's starfish 1 9 - 7603 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Asterias -Asterias rubens European starfish 1 9 - 7604 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Asterias -Astyanax mexicanus Mexican tetra 1 2 - 7994 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Characiphysae; Characiformes; Characoidei; Characidae; Characidae incertae sedis; Astyanax clade; Astyanax -Athetis lepigone - 1 5 - 1223490 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Noctuinae; Athetis -Atractaspis aterrima mole viper 1 2 - 1355159 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Lamprophiidae; Atractaspidinae; Atractaspis -Aurelia aurita moon jelly 1 4 - 6145 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Scyphozoa; Semaeostomeae; Ulmaridae; Aurelia -Austrofundulus limnaeus - 1 2 - 52670 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Rivulidae; Austrofundulus -Avena sativa oat 1 1 11 4498 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Aveninae; Avena -Avicennia marina - 1 1 11 82927 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Acanthaceae; Avicennioideae; Avicennia -Bacillus cereus - 11 0 - 1396 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Bacillales; Bacillaceae; Bacillus; Bacillus cereus group -Bacillus licheniformis - 11 0 - 1402 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Bacillales; Bacillaceae; Bacillus; Bacillus subtilis group -Bactrocera cucurbitae melon fly 1 5 - 28588 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Dacini; Bactrocera; Zeugodacus -Bactrocera dorsalis oriental fruit fly 1 5 - 27457 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Dacini; Bactrocera; Bactrocera; Bactrocera dorsalis species complex -Bactrocera latifrons - 1 5 - 174628 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Dacini; Bactrocera; Bactrocera -Bactrocera minax Oriental citrus fly 1 5 - 104690 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Dacini; Bactrocera; Tetradacus -Bactrocera oleae olive fruit fly 1 5 - 104688 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Dacini; Bactrocera; Daculus -Balaenoptera acutorostrata scammoni - 1 2 - 310752 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Mysticeti; Balaenopteridae; Balaenoptera; Balaenoptera acutorostrata -Balaenoptera bonaerensis Antarctic minke whale 1 2 - 33556 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Mysticeti; Balaenopteridae; Balaenoptera -Balearica regulorum gibbericeps East African grey crowned-crane 1 2 - 100784 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Gruidae; Balearica; Balearica regulorum -Banksia hookeriana - 1 1 11 199770 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; stem eudicotyledons; Proteales; Proteaceae; Banksia -Bdellocephala annandalei - 1 9 - 1421413 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Planarioidea; Dendrocoelidae; Bdellocephala -Bemisia tabaci - 1 5 - 7038 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aleyrodiformes; Aleyrodoidea; Aleyrodidae; Aleyrodinae; Bemisia -Beta vulgaris - 1 1 11 161934 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Betoideae; Beta -Beta vulgaris subsp. vulgaris - 1 1 11 3555 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Betoideae; Beta; Beta vulgaris -Betula platyphylla - 1 1 11 78630 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Betulaceae; Betula -Biomphalaria glabrata - 1 5 - 6526 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Hygrophila; Planorboidea; Planorbidae; Biomphalaria -Bison bison bison - 1 2 - 43346 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bison; Bison bison -Bithynia siamensis goniomphalos - 1 5 - 479249 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Caenogastropoda; Hypsogastropoda; Littorinimorpha; Truncatelloidea; Bithyniidae; Bithynia; Bithynia siamensis -Boechera gunnisoniana - 1 1 11 93888 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Boechereae; Boechera -Bombina bombina fire-bellied toad 1 2 - 8345 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina -Bombina orientalis - 1 2 - 8346 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina -Bombina variegata scabra - 1 2 - 251232 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina; Bombina variegata -Bombina variegata variegata - 1 2 - 191472 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina; Bombina variegata -Bombus impatiens common eastern bumble bee 1 5 - 132113 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Bombinae; Bombini; Bombus; Pyrobombus -Bombus insularis - 1 5 - 207637 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Bombinae; Bombini; Bombus; Psithyrus -Bombus terrestris buff-tailed bumblebee 1 5 - 30195 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Bombinae; Bombini; Bombus; Bombus -Bombyx mori domestic silkworm 1 5 - 7091 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Bombycidae; Bombycinae; Bombyx -Bordetella pertussis - 11 0 - 520 BCT cellular organisms; Bacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Alcaligenaceae; Bordetella -Bos mutus wild yak 1 2 - 72004 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bos -Bos taurus cattle 1 2 - 9913 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bos -Botryococcus braunii - 1 1 11 38881 PLN cellular organisms; Eukaryota; Viridiplantae; Chlorophyta; Trebouxiophyceae; Trebouxiophyceae incertae sedis; Botryococcaceae; Botryococcus -Brachionus calyciflorus - 1 5 - 104777 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Rotifera; Monogononta; Pseudotrocha; Ploima; Brachionidae; Brachionus -Brachycistis timberlakei - 1 5 - 1317728 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Tiphiidae; Brachycistidinae; Brachycistis -Brachypodium distachyon stiff brome 1 1 11 15368 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Brachypodieae; Brachypodium -Bragasellus peltatus - 1 5 - 1282048 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Bragasellus -Branchiostoma lanceolatum amphioxus 1 5 - 7740 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Cephalochordata; Branchiostomidae; Branchiostoma -Brassica juncea - 1 1 11 3707 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica -Brassica napus rape 1 1 11 3708 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica -Brassica oleracea var. capitata cabbage 1 1 11 3716 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica; Brassica oleracea -Brassica oleracea var. oleracea - 1 1 11 109376 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica; Brassica oleracea -Brassica rapa field mustard 1 1 11 3711 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica -Brassica rapa subsp. pekinensis Chinese cabbage 1 1 11 51351 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica; Brassica rapa -Brassica rapa x Raphanus sativus - 1 1 11 1417620 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica x Raphanus -Brassicogethes aeneus - 1 5 - 1431903 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Cucujoidea; Nitidulidae; Meligethinae; Brassicogethes -Brugia malayi - 1 5 - 6279 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Spirurida; Filarioidea; Onchocercidae; Brugia -Bubalus bubalis water buffalo 1 2 - 89462 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bubalus -Buceros rhinoceros silvestris - 1 2 - 175836 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Bucerotiformes; Bucerotidae; Buceros; Buceros rhinoceros -Burkholderia ubonensis - 11 0 - 101571 BCT cellular organisms; Bacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Burkholderiaceae; Burkholderia -Caenorhabditis elegans - 1 5 - 6239 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis -Caenorhabditis remanei - 1 5 - 31234 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis -Cajanus cajan pigeon pea 1 1 11 3821 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Cajanus -Calanus finmarchicus - 1 5 - 6837 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Gymnoplea; Calanoida; Calanidae; Calanus -Calanus glacialis - 1 5 - 113644 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Gymnoplea; Calanoida; Calanidae; Calanus -Calidris pugnax ruff 1 2 - 198806 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Charadriiformes; Scolopacidae; Calidris -Caligus rogercresseyi - 1 5 - 217165 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Podoplea; Siphonostomatoida; Caligidae; Caligus -Callithrix jacchus white-tufted-ear marmoset 1 2 - 9483 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Platyrrhini; Cebidae; Callitrichinae; Callithrix -Callorhinchus milii elephant shark 1 2 - 7868 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Chondrichthyes; Holocephali; Chimaeriformes; Callorhinchidae; Callorhinchus -Calypte anna Anna's hummingbird 1 2 - 9244 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Trochiliformes; Trochilidae; Calypte -Camelina sativa false flax 1 1 11 90675 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Camelina -Camellia oleifera - 1 1 11 385388 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Theaceae; Camellia -Camellia sinensis - 1 1 11 4442 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Theaceae; Camellia -Camellia sinensis var. sinensis - 1 1 11 542762 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Theaceae; Camellia; Camellia sinensis -Camelus bactrianus Bactrian camel 1 2 - 9837 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Camelus -Camelus dromedarius Arabian camel 1 2 - 9838 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Camelus -Camelus ferus Wild Bactrian camel 1 2 - 419612 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Camelus -Camponotus floridanus Florida carpenter ant 1 5 - 104421 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Camponotini; Camponotus -Camptotheca acuminata - 1 1 11 16922 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Cornales; Nyssaceae; Camptotheca -Campylobacter coli - 11 0 - 195 BCT cellular organisms; Bacteria; Proteobacteria; delta/epsilon subdivisions; Epsilonproteobacteria; Campylobacterales; Campylobacteraceae; Campylobacter -Campylobacter jejuni - 11 0 - 197 BCT cellular organisms; Bacteria; Proteobacteria; delta/epsilon subdivisions; Epsilonproteobacteria; Campylobacterales; Campylobacteraceae; Campylobacter -Canis lupus familiaris dog 1 2 - 9615 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Canidae; Canis; Canis lupus -Cannabis sativa hemp 1 1 11 3483 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Cannabis -Capra hircus goat 1 2 - 9925 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Capra -Caprimulgus carolinensis chuck-will's-widow 1 2 - 279965 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Caprimulgiformes; Caprimulgidae; Caprimulginae; Caprimulgus -Capsella rubella - 1 1 11 81985 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Capsella -Capsicum annuum - 1 1 11 4072 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Capsiceae; Capsicum -Carabus granulatus - 1 5 - 118799 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Adephaga; Caraboidea; Carabidae; Carabinae; Carabini; Carabina; Carabus; Carabus -Caragana korshinskii - 1 1 11 220689 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Caraganeae; Caragana -Cariama cristata red-legged seriema 1 2 - 54380 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Cariamidae; Cariama -Carlito syrichta Philippine tarsier 1 2 - 1868482 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Tarsiiformes; Tarsiidae; Carlito -Catharanthus roseus Madagascar periwinkle 1 1 11 4058 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Apocynaceae; Rauvolfioideae; Vinceae; Catharanthinae; Catharanthus -Cathartes aura turkey vulture 1 2 - 43455 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Ciconiiformes; Cathartidae; Cathartes -Cavia porcellus domestic guinea pig 1 2 - 10141 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Caviidae; Cavia -Cebus capucinus imitator - 1 2 - 1737458 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Platyrrhini; Cebidae; Cebinae; Cebus; Cebus capucinus -Cecidomyiidae sp. BOLD-2016 - 1 5 - 1881751 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Nematocera; Bibionomorpha; Sciaroidea; Cecidomyiidae; unclassified Cecidomyiidae -Cenchrus americanus - 1 1 11 4543 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Cenchrus -Centris flavifrons - 1 5 - 360639 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Anthophorinae; Centridini; Centris -Cephalotaxus hainanensis - 1 1 11 191701 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Cupressales; Taxaceae; Cephalotaxus -Cerapachys biroi clonal raider ant 1 5 - 443821 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Cerapachyinae; Cerapachyini; Cerapachys -Ceratina calcarata - 1 5 - 156304 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Xylocopinae; Ceratinini; Ceratina; Zadontomerus -Ceratitis capitata Mediterranean fruit fly 1 5 - 7213 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Dacinae; Ceratitidini; Ceratitis; Ceratitis -Ceratotherium simum simum southern white rhinoceros 1 2 - 73337 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Perissodactyla; Rhinocerotidae; Ceratotherium; Ceratotherium simum -Cercis gigantea - 1 1 11 183790 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Cercideae; Cercis -Cercocebus atys sooty mangabey 1 2 - 9531 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Cercocebus -Chaetura pelagica chimney swift 1 2 - 8897 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Apodiformes; Apodidae; Chaetura -Charadrius vociferus killdeer 1 2 - 50402 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Charadriiformes; Charadriidae; Charadrius -Chelonia mydas green sea turtle 1 2 - 8469 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Testudines; Cryptodira; Durocryptodira; Americhelydia; Chelonioidea; Cheloniidae; Chelonia -Cherax quadricarinatus - 1 5 - 27406 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Parastacoidea; Parastacidae; Cherax -Chilo suppressalis striped riceborer 1 5 - 168631 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Pyraloidea; Crambidae; Crambinae; Chilo -Chinavia ubica - 1 5 - 1497372 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Neohemiptera; Prosorrhyncha; Heteroptera; Euheteroptera; Neoheteroptera; Panheteroptera; Pentatomomorpha; Pentatomoidea; Pentatomidae; Pentatominae; Chinavia -Chinchilla lanigera long-tailed chinchilla 1 2 - 34839 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Chinchillidae; Chinchilla -Chlamydia trachomatis - 11 0 - 813 BCT cellular organisms; Bacteria; PVC group; Chlamydiae; Chlamydiia; Chlamydiales; Chlamydiaceae; Chlamydia/Chlamydophila group; Chlamydia -Chlamydotis macqueenii Macqueen's bustard 1 2 - 187382 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Otididae; Chlamydotis -Chlorella sorokiniana - 1 1 11 3076 PLN cellular organisms; Eukaryota; Viridiplantae; Chlorophyta; Trebouxiophyceae; Chlorellales; Chlorellaceae; Chlorella -Chloris chloris European greenfinch 1 2 - 37601 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Fringillidae; Carduelinae; Chloris -Chlorocebus sabaeus green monkey 1 2 - 60711 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Chlorocebus -Chorispora bungeana - 1 1 11 238895 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Chorisporeae; Chorispora -Chromolaena odorata - 1 1 11 103745 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Heliantheae alliance; Eupatorieae; Chromolaena -Chrysemys picta bellii western painted turtle 1 2 - 8478 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Testudines; Cryptodira; Durocryptodira; Testudinoidea; Emydidae; Chrysemys; Chrysemys picta -Chrysochloris asiatica Cape golden mole 1 2 - 185453 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Chrysochloridae; Chrysochlorinae; Chrysochloris -Chrysopa pallens - 1 5 - 417485 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Neuropterida; Neuroptera; Chrysopidae; Chrysopa -Chyphotes mellipes - 1 5 - 292179 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Bradynobaenidae; Chyphotinae; Chyphotes -Cicer arietinum chickpea 1 1 11 3827 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Cicereae; Cicer -Ciona intestinalis vase tunicate 1 13 - 7719 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Tunicata; Ascidiacea; Enterogona; Phlebobranchia; Cionidae; Ciona -Citrus clementina - 1 1 11 85681 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Rutaceae; Aurantioideae; Citrus -Citrus sinensis sweet orange 1 1 11 2711 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Rutaceae; Aurantioideae; Citrus -Clostridioides difficile - 11 0 - 1496 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Clostridia; Clostridiales; Peptostreptococcaceae; Clostridioides -Clostridium botulinum - 11 0 - 1491 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Clostridia; Clostridiales; Clostridiaceae; Clostridium -Clupea harengus Atlantic herring 1 2 - 7950 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Clupei; Clupeiformes; Clupeoidei; Clupeidae; Clupeinae; Clupea -Cocos nucifera coconut palm 1 1 11 13894 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Arecales; Arecaceae; Arecoideae; Cocoseae; Attaleinae; Cocos -Colaphellus bowringi - 1 5 - 561076 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Chrysomeloidea; Chrysomelidae; Chrysomelinae; Chrysomelini; Colaphellus -Colius striatus speckled mousebird 1 2 - 57412 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coliiformes; Coliidae; Colius -Colobus angolensis palliatus - 1 2 - 336983 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Colobinae; Colobus; Colobus angolensis -Columba livia rock pigeon 1 2 - 8932 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Columbiformes; Columbidae; Columba -Condylura cristata star-nosed mole 1 2 - 143302 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Insectivora; Talpidae; Condylura -Copidosoma floridanum - 1 5 - 29053 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Chaldicoidea group; Chalcidoidea; Encyrtidae; Encyrtinae; Copidosoma -Corchorus capsularis - 1 1 11 210143 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Grewioideae; Apeibeae; Corchorus -Corvus brachyrhynchos American crow 1 2 - 85066 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Corvoidea; Corvidae; Corvus -Corvus cornix cornix - 1 2 - 932674 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Corvoidea; Corvidae; Corvus; Corvus cornix -Corydalinae sp. KMRSPBM-2012 - 1 5 - 1247484 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Neuropterida; Megaloptera; Corydalidae; Corydalinae; unclassified Corydalinae -Corylus avellana - 1 1 11 13451 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Betulaceae; Corylus -Costus pictus - 1 1 11 168183 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Zingiberales; Costaceae; Costus -Coturnix japonica Japanese quail 1 2 - 93934 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Perdicinae; Coturnix -Crassostrea angulata - 1 5 - 558553 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Ostreoida; Ostreoidea; Ostreidae; Crassostrea -Crassostrea gigas Pacific oyster 1 5 - 29159 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Ostreoida; Ostreoidea; Ostreidae; Crassostrea -Crataegus pinnatifida - 1 1 11 510735 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Maleae; Crataegus -Crepidula fornicata - 1 5 - 176853 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Caenogastropoda; Hypsogastropoda; Littorinimorpha; Calyptraeoidea; Calyptraeidae; Crepidula -Cricetulus griseus Chinese hamster 1 2 - 10029 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Cricetidae; Cricetinae; Cricetulus -Crioscolia alcione - 1 5 - 1317732 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Scoliidae; Crioscolia -Ctenomys sociabilis social tuco-tuco 1 2 - 43321 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Ctenomyidae; Ctenomys -Cucumis melo muskmelon 1 1 11 3656 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Cucumis -Cucumis sativus cucumber 1 1 11 3659 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Cucumis -Cunninghamia lanceolata - 1 1 11 28977 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Cupressales; Cupressaceae; Cunninghamia -Curcuma longa turmeric 1 1 11 136217 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Zingiberales; Zingiberaceae; Curcuma -Cuscuta pentagona - 1 1 11 112407 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Cuscuteae; Cuscuta; Grammica; Cuscuta sect. Cleistogrammica -Cylicostephanus goldi - 1 5 - 71465 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Strongyloidea; Strongylidae; Cyathostominae; Cylicostephanus -Cynoglossus semilaevis tongue sole 1 2 - 244447 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Carangaria; Pleuronectiformes; Pleuronectoidei; Cynoglossidae; Cynoglossinae; Cynoglossus -Cynopterus sphinx Indian short-nosed fruit bat 1 2 - 9400 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Cynopterus -Cypridininae sp. BMR-2011 - 1 5 - 1032739 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Ostracoda; Myodocopa; Myodocopida; Cypridinoidea; Cypridinidae; unclassified Cypridinidae -Cyprinodon variegatus sheepshead minnow 1 2 - 28743 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Cyprinodontidae; Cyprinodontinae; Cyprinodontini; Cyprinodon -Cyprinus carpio common carp 1 2 - 7962 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Cyprinus -Dahlia pinnata - 1 1 11 101596 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Heliantheae alliance; Coreopsideae; Dahlia -Danio rerio zebrafish 1 2 - 7955 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Danio -Daphnia magna - 1 5 - 35525 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Branchiopoda; Phyllopoda; Diplostraca; Cladocera; Anomopoda; Daphniidae; Daphnia -Dastarcus helophoroides - 1 5 - 1169899 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Cucujoidea; Bothrideridae; Dastarcus -Dasypus novemcinctus nine-banded armadillo 1 2 - 9361 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Xenarthra; Cingulata; Dasypodidae; Dasypus -Daucus carota subsp. sativus - 1 1 11 79200 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Apiales; Apiineae; Apiaceae; Apioideae; Scandiceae; Daucinae; Daucus; Daucus sect. Daucus; Daucus carota -Dendroctonus ponderosae mountain pine beetle 1 5 - 77166 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Cucujiformia; Curculionoidea; Curculionidae; Scolytinae; Dendroctonus -Dermacentor variabilis American dog tick 1 5 - 34621 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Rhipicephalinae; Dermacentor -Dermanyssus gallinae - 1 5 - 34641 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Mesostigmata; Monogynaspida; Gamasina; Dermanyssoidea; Dermanyssidae; Dermanyssus -Dianthus caryophyllus clove pink 1 1 11 3570 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Caryophyllaceae; Caryophylleae; Dianthus -Diaphorina citri Asian citrus psyllid 1 5 - 121845 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Psylliformes; Psylloidea; Psyllidae; Diaphorina -Dicrocoelium dendriticum - 1 9 - 57078 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Plagiorchiida; Xiphidiata; Plagiorchioidea; Dicrocoeliidae; Dicrocoelium -Dinobryon sp. LO226KS - 1 1 11 1825119 PLN cellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; Chromulinales; Dinobryaceae; Dinobryon -Dinoponera quadriceps - 1 5 - 609295 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Ponerinae; Ponerini; Dinoponera -Diospyros lotus - 1 1 11 55363 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Ebenaceae; Diospyros -Diphyllobothrium latum - 1 9 - 60516 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Cestoda; Eucestoda; Diphyllobothriidea; Diphyllobothriidae; Diphyllobothrium -Dipodomys ordii Ord's kangaroo rat 1 2 - 10020 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Heteromyidae; Dipodomyinae; Dipodomys -Dorcoceras hygrometricum - 1 1 11 472368 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Gesneriaceae; Didymocarpoideae; Trichosporeae; Loxocarpinae; Dorcoceras -Drosophila ananassae - 1 5 - 7217 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; ananassae subgroup; ananassae species complex -Drosophila grimshawi - 1 5 - 7222 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Hawaiian Drosophila; picture wing clade; grimshawi clade; grimshawi group; grimshawi subgroup -Drosophila melanogaster fruit fly 1 5 - 7227 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; melanogaster subgroup -Drosophila miranda - 1 5 - 7229 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; obscura group; pseudoobscura subgroup -Drosophila mojavensis - 1 5 - 7230 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Drosophila; repleta group; mulleri subgroup; mojavensis species complex -Drosophila persimilis - 1 5 - 7234 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; obscura group; pseudoobscura subgroup -Drosophila pseudoobscura pseudoobscura - 1 5 - 46245 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; obscura group; pseudoobscura subgroup; Drosophila pseudoobscura -Drosophila rhopaloa - 1 5 - 1041015 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; rhopaloa subgroup -Drosophila sechellia - 1 5 - 7238 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; melanogaster subgroup -Drosophila simulans - 1 5 - 7240 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; melanogaster subgroup -Drosophila suzukii - 1 5 - 28584 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; suzukii subgroup -Drosophila virilis - 1 5 - 7244 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Drosophila; virilis group -Drosophila willistoni - 1 5 - 7260 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; willistoni group; willistoni subgroup -Drosophila yakuba - 1 5 - 7245 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Ephydroidea; Drosophilidae; Drosophilinae; Drosophilini; Drosophila; Sophophora; melanogaster group; melanogaster subgroup -Dugesia japonica - 1 9 - 6161 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Geoplanoidea; Dugesiidae; Dugesia -Echinarachnius parma - 1 9 - 869203 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Gnathostomata; Clypeasteroida; Echinarachniidae; Echinarachnius -Echinaster spinulosus - 1 9 - 1451296 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Spinulosacea; Spinulosida; Echinasteridae; Echinaster -Echinops telfairi small Madagascar hedgehog 1 2 - 9371 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Tenrecidae; Tenrecinae; Echinops -Echinostoma caproni - 1 9 - 27848 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Plagiorchiida; Echinostomata; Echinostomatoidea; Echinostomatidae; Echinostoma -Eidolon helvum straw-colored fruit bat 1 2 - 77214 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Eidolon -Eimeria mitis - 1 4 4 44415 INV cellular organisms; Eukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Eimeriidae; Eimeria -Elaeis guineensis African oil palm 1 1 11 51953 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Arecales; Arecaceae; Arecoideae; Cocoseae; Elaeidinae; Elaeis -Elephantulus edwardii Cape elephant shrew 1 2 - 28737 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Macroscelidea; Macroscelididae; Elephantulus -Elliptio complanata eastern elliptio 1 5 - 55832 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Palaeoheterodonta; Unionoida; Unionoidea; Unionidae; Ambleminae; Elliptio -Elodea nuttallii - 1 1 11 55313 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Alismatales; Hydrocharitaceae; Elodea -Emiliania huxleyi CCMP1516 - 1 4 11 280463 PLN cellular organisms; Eukaryota; Haptophyceae; Isochrysidales; Noelaerhabdaceae; Emiliania; Emiliania huxleyi -Enterobacter cloacae - 11 0 - 550 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Enterobacter; Enterobacter cloacae complex -Enterococcus faecalis - 11 0 - 1351 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Enterococcaceae; Enterococcus -Enterococcus faecium - 11 0 - 1352 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Enterococcaceae; Enterococcus -Epipyxis sp. PR26KG - 1 1 11 1825121 PLN cellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; Chromulinales; Dinobryaceae; Epipyxis -Eptesicus fuscus big brown bat 1 2 - 29078 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Eptesicus -Equus asinus ass 1 2 - 9793 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Perissodactyla; Equidae; Equus; Asinus -Equus caballus horse 1 2 - 9796 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Perissodactyla; Equidae; Equus; Equus -Equus przewalskii Przewalski's horse 1 2 - 9798 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Perissodactyla; Equidae; Equus; Equus -Erinaceus europaeus western European hedgehog 1 2 - 9365 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Insectivora; Erinaceidae; Erinaceinae; Erinaceus -Eriocheir sinensis Chinese mitten crab 1 5 - 95602 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Brachyura; Eubrachyura; Thoracotremata; Grapsoidea; Varunidae; Eriocheir -Erythranthe guttata spotted monkey flower 1 1 11 4155 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Phrymaceae; Erythranthe -Escherichia coli - 11 0 - 562 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Escherichia -Esox lucius northern pike 1 2 - 8010 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Protacanthopterygii; Esociformes; Esocidae; Esox -Eucalyptus camaldulensis Murray red gum 1 1 11 34316 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Myrtaceae; Myrtoideae; Eucalypteae; Eucalyptus -Eucalyptus grandis - 1 1 11 71139 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Myrtaceae; Myrtoideae; Eucalypteae; Eucalyptus -Eucidaris tribuloides - 1 9 - 7632 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Perischoechinoidea; Cidaroida; Cidaridae; Eucidaris -Eucyclops serrulatus - 1 5 - 84317 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Podoplea; Cyclopoida; Cyclopidae; Eucyclops -Eufriesea mexicana - 1 5 - 516756 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Bombinae; Euglossini; Eufriesea -Eurypyga helias sunbittern 1 2 - 54383 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Eurypygidae; Eurypyga -Eustoma exaltatum subsp. russellianum - 1 1 11 52518 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Gentianaceae; Chironieae; Eustoma; Eustoma exaltatum -Evechinus chloroticus - 1 9 - 137513 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Echinometridae; Evechinus -Exaiptasia pallida - 1 4 - 1720309 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Aiptasiidae; Exaiptasia -Exoneura robusta - 1 5 - 175328 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Xylocopinae; Allodapini; Exoneura -Extatosoma tiaratum giant prickly stick insect 1 5 - 7024 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Tropidoderinae; Extatosoma -Fagopyrum esculentum common buckwheat 1 1 11 3617 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Polygonaceae; Polygonoideae; Fagopyreae; Fagopyrum -Falco cherrug Saker falcon 1 2 - 345164 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Falconidae; Falco -Falco peregrinus peregrine falcon 1 2 - 8954 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Falconidae; Falco -Fasciola hepatica liver fluke 1 9 - 6192 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Plagiorchiida; Echinostomata; Echinostomatoidea; Fasciolidae; Fasciola -Felis catus domestic cat 1 2 - 9685 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Felinae; Felis -Ficedula albicollis collared flycatcher 1 2 - 59894 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Muscicapidae; Ficedula -Ficus carica common fig 1 1 11 3494 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Moraceae; Ficus +Acanthisitta chloris rifleman 1 2 - 57068 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Acanthisittidae; Acanthisitta +Acanthoscurria geniculata - 1 5 - 575412 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Araneae; Mygalomorphae; Theraphosidae; Acanthoscurria +Acartia tonsa - 1 5 - 136180 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Hexanauplia; Copepoda; Calanoida; Acartiidae; Acartia +Acidobacteria bacterium - 11 0 - 1978231 BCT Bacteria; Acidobacteria +Acinetobacter - 11 0 - 469 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Moraxellaceae; Acinetobacter +Acinetobacter baumannii - 11 0 - 470 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Moraxellaceae; Acinetobacter; Acinetobacter calcoaceticus/baumannii complex +Acropora cervicornis - 1 4 - 6130 INV Eukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora +Acropora millepora - 1 4 - 45264 INV Eukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora +Acropora tenuis purple tipped acropora 1 4 - 70783 INV Eukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Acroporidae; Acropora +Actinidia - 1 1 11 3624 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Actinidiaceae; Actinidia +Acyrthosiphon - 1 5 - 7028 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidomorpha; Aphidoidea; Aphididae; Macrosiphini; Acyrthosiphon +Acyrthosiphon pisum pea aphid 1 5 - 7029 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidomorpha; Aphidoidea; Aphididae; Macrosiphini; Acyrthosiphon +Aedes - 1 5 - 7158 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Culicinae; Aedini; Aedes +Aedes aegypti yellow fever mosquito 1 5 - 7159 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Culicinae; Aedini; Aedes; Stegomyia +Aedes albopictus Asian tiger mosquito 1 5 - 7160 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Culicinae; Aedini; Aedes; Stegomyia +Aegilops tauschii - 1 1 11 37682 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Aegilops +Aegilops tauschii subsp. tauschii - 1 1 11 169297 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Aegilops +Agave deserti - 1 1 11 382119 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Asparagaceae; Agavoideae; Agave +Agave tequilana - 1 1 11 386106 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Asparagaceae; Agavoideae; Agave +Ailuropoda melanoleuca giant panda 1 2 - 9646 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Ursidae; Ailuropoda +Alexandrium tamarense - 1 4 11 2926 PLN Eukaryota; Alveolata; Dinophyceae; Gonyaulacales; Gonyaulacaceae; Alexandrium +Allium cepa onion 1 1 11 4679 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Amaryllidaceae; Allioideae; Allieae; Allium +Allium sativum garlic 1 1 11 4682 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Amaryllidaceae; Allioideae; Allieae; Allium +Alvinella - 1 5 - 6375 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Scolecida; Terebellida; Alvinellidae; Alvinella +Alvinella pompejana - 1 5 - 6376 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Scolecida; Terebellida; Alvinellidae; Alvinella +Amaranthus tricolor - 1 1 11 29722 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Amaranthaceae; Amaranthus +Amborella - 1 1 11 13332 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Amborellales; Amborellaceae; Amborella +Amborella trichopoda - 1 1 11 13333 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Amborellales; Amborellaceae; Amborella +Ameiurus nebulosus brown bullhead 1 2 - 27778 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Siluriformes; Ictaluridae; Ameiurus +Amoebidium parasiticum JAP-7-2 - 1 4 - 1069442 INV Eukaryota; Ichthyosporea; Ichthyophonida; Amoebidiaceae; Amoebidium +Amphimedon queenslandica - 1 4 - 400682 INV Eukaryota; Metazoa; Porifera; Demospongiae; Heteroscleromorpha; Haplosclerida; Niphatidae; Amphimedon +Anas platyrhynchos mallard 1 2 - 8839 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Anseriformes; Anatidae; Anatinae; Anas +Ancylostoma - 1 5 - 29169 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Strongyloidea; Ancylostomatidae; Ancylostomatinae; Ancylostoma +Ancylostoma caninum dog hookworm 1 5 - 29170 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Strongyloidea; Ancylostomatidae; Ancylostomatinae; Ancylostoma +Ancylostoma ceylanicum - 1 5 - 53326 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Strongyloidea; Ancylostomatidae; Ancylostomatinae; Ancylostoma +Anguilla anguilla European eel 1 2 - 7936 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Anguilliformes; Anguillidae; Anguilla +Anguilla japonica Japanese eel 1 2 - 7937 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Anguilliformes; Anguillidae; Anguilla +Annulipalpia sp. AD-2013 - 1 5 - 1499517 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Trichoptera; Annulipalpia; unclassified Annulipalpia +Anolis - 1 2 - 28376 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Iguania; Dactyloidae; Anolis +Anolis carolinensis green anole 1 2 - 28377 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Iguania; Dactyloidae; Anolis +Anopheles - 1 5 - 7164 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Anophelinae; Anopheles +Anopheles gambiae African malaria mosquito 1 5 - 7165 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Anophelinae; Anopheles +Anoplopoma fimbria sablefish 1 2 - 229290 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Perciformes; Cottioidei; Anoplopomatales; Anoplopomatidae; Anoplopoma +Anthopleura elegantissima clonal anemone 1 4 - 6110 INV Eukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Actiniidae; Anthopleura +Antrostomus carolinensis chuck-will's-widow 1 2 - 279965 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Caprimulgiformes; Caprimulgidae; Antrostomus +Aotus nancymaae Ma's night monkey 1 2 - 37293 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Platyrrhini; Aotidae; Aotus +Apaloderma vittatum bar-tailed trogon 1 2 - 57397 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Trogoniformes; Trogonidae; Apaloderma +Aphanomyces astaci - 1 1 11 112090 PLN Eukaryota; Stramenopiles; Oomycetes; Saprolegniales; Saprolegniaceae; Aphanomyces +Aphis - 1 5 - 80764 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidomorpha; Aphidoidea; Aphididae; Aphidini; Aphis +Aphis gossypii cotton aphid 1 5 - 80765 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidomorpha; Aphidoidea; Aphididae; Aphidini; Aphis; Aphis +Apis - 1 5 - 7459 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apis +Apis mellifera honey bee 1 5 - 7460 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apis +Apis mellifera carnica Carniolan honeybee 1 5 - 88217 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Apis +Aplysia - 1 5 - 6499 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Euopisthobranchia; Aplysiida; Aplysioidea; Aplysiidae; Aplysia +Aplysia californica California sea hare 1 5 - 6500 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Euopisthobranchia; Aplysiida; Aplysioidea; Aplysiidae; Aplysia +Apostichopus japonicus Japanese sea cucumber 1 9 - 307972 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Holothuroidea; Aspidochirotacea; Aspidochirotida; Stichopodidae; Apostichopus +Apteryx australis mantelli - 1 2 - 202946 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Palaeognathae; Apterygiformes; Apterygidae; Apteryx +Aquilegia - 1 1 11 3450 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Ranunculales; Ranunculaceae; Thalictroideae; Aquilegia +Aquilegia coerulea - 1 1 11 218851 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Ranunculales; Ranunculaceae; Thalictroideae; Aquilegia +Aquilegia formosa x Aquilegia pubescens - 1 1 11 338618 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Ranunculales; Ranunculaceae; Thalictroideae; Aquilegia +Ara macao Scarlet macaw 1 2 - 176014 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Psittaciformes; Psittacidae; Ara +Arabidopsis - 1 1 11 3701 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Arabidopsis +Arabidopsis thaliana thale cress 1 1 11 3702 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Arabidopsis +Arachis - 1 1 11 3817 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; dalbergioids sensu lato; Dalbergieae; Pterocarpus clade; Arachis +Arachis duranensis - 1 1 11 130453 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; dalbergioids sensu lato; Dalbergieae; Pterocarpus clade; Arachis +Arachis hypogaea peanut 1 1 11 3818 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; dalbergioids sensu lato; Dalbergieae; Pterocarpus clade; Arachis +Arachis hypogaea var. vulgaris - 1 1 11 925390 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; dalbergioids sensu lato; Dalbergieae; Pterocarpus clade; Arachis +Arachis ipaensis - 1 1 11 130454 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; dalbergioids sensu lato; Dalbergieae; Pterocarpus clade; Arachis +Aretaon asperrimus thorny stick insect 1 5 - 173775 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Phasmatodea; Verophasmatodea; Areolatae; Bacilloidea; Heteropterygidae; Obriminae; Obrimini; Aretaon +Arion vulgaris - 1 5 - 1028688 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Eupulmonata; Stylommatophora; Sigmurethra; Arionoidea; Arionidae; Arion +Artemisia - 1 1 11 4219 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Anthemideae; Artemisiinae; Artemisia +Artemisia annua sweet wormwood 1 1 11 35608 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Anthemideae; Artemisiinae; Artemisia +Arundo donax giant reed 1 1 11 35708 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Arundinoideae; Arundineae; Arundo +Ascaris - 1 5 - 6251 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Spirurina; Ascaridomorpha; Ascaridoidea; Ascarididae; Ascaris +Ascaris suum pig roundworm 1 5 - 6253 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Spirurina; Ascaridomorpha; Ascaridoidea; Ascarididae; Ascaris +Aspergillus - 1 4 - 5052 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Eurotiomycetes; Eurotiomycetidae; Eurotiales; Aspergillaceae; Aspergillus +Astacus leptodactylus narrow-clawed crayfish 1 5 - 6717 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Astacoidea; Astacidae; Astacus +Astyanax - 1 2 - 7993 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Characiformes; Characoidei; Characidae; Characidae incertae sedis; Astyanax clade; Astyanax +Astyanax mexicanus Mexican tetra 1 2 - 7994 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Characiformes; Characoidei; Characidae; Characidae incertae sedis; Astyanax clade; Astyanax +Athetis lepigone - 1 5 - 1223490 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Noctuinae; Athetis +Atractaspis aterrima mole viper 1 2 - 1355159 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Lamprophiidae; Atractaspidinae; Atractaspis +Aurelia aurita moon jelly 1 4 - 6145 INV Eukaryota; Metazoa; Cnidaria; Scyphozoa; Semaeostomeae; Ulmaridae; Aurelia +Avicennia marina - 1 1 11 82927 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Acanthaceae; Avicennioideae; Avicennia +Bacillus cereus - 11 0 - 1396 BCT Bacteria; Firmicutes; Bacilli; Bacillales; Bacillaceae; Bacillus; Bacillus cereus group +Bacillus thuringiensis - 11 0 - 1428 BCT Bacteria; Firmicutes; Bacilli; Bacillales; Bacillaceae; Bacillus; Bacillus cereus group +bacterium - 11 0 - 1869227 BCT Bacteria +Balaenoptera bonaerensis Antarctic minke whale 1 2 - 33556 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Mysticeti; Balaenopteridae; Balaenoptera +Balearica regulorum gibbericeps East African grey crowned-crane 1 2 - 100784 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Gruidae; Balearica +Bemisia tabaci - 1 5 - 7038 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aleyrodoidea; Aleyrodidae; Aleyrodinae; Bemisia +Beta - 1 1 11 3554 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Betoideae; Beta +Beta vulgaris - 1 1 11 161934 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Betoideae; Beta +Beta vulgaris subsp. vulgaris - 1 1 11 3555 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Betoideae; Beta +Betula platyphylla - 1 1 11 78630 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Betulaceae; Betula +Bicyclus bush browns 1 5 - 110367 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Papilionoidea; Nymphalidae; Satyrinae; Satyrini; Mycalesina; Bicyclus +Bicyclus anynana squinting bush brown 1 5 - 110368 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Papilionoidea; Nymphalidae; Satyrinae; Satyrini; Mycalesina; Bicyclus +Biomphalaria glabrata - 1 5 - 6526 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Hygrophila; Lymnaeoidea; Planorbidae; Biomphalaria +Bison bison bison - 1 2 - 43346 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bison +Bithynia siamensis goniomphalos - 1 5 - 479249 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Caenogastropoda; Littorinimorpha; Truncatelloidea; Bithyniidae; Bithynia +Boechera - 1 1 11 76872 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Boechereae; Boechera +Boechera gunnisoniana - 1 1 11 93888 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Boechereae; Boechera +Boechera stricta - 1 1 11 72658 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Boechereae; Boechera +Bombina bombina fire-bellied toad 1 2 - 8345 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina +Bombina variegata variegata - 1 2 - 191472 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Bombinatoridae; Bombina +Bombus terrestris buff-tailed bumblebee 1 5 - 30195 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Bombus; Bombus +Bombyx - 1 5 - 7090 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Bombycoidea; Bombycidae; Bombycinae; Bombyx +Bombyx mori domestic silkworm 1 5 - 7091 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Bombycoidea; Bombycidae; Bombycinae; Bombyx +Bordetella pertussis - 11 0 - 520 BCT Bacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Alcaligenaceae; Bordetella +Bos oxen, cattle 1 2 - 9903 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bos +Bos mutus wild yak 1 2 - 72004 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bos +Bos taurus cattle 1 2 - 9913 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bos +Botryllus - 1 13 - 30300 INV Eukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Styelidae; Botryllus +Botryllus schlosseri - 1 13 - 30301 INV Eukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Styelidae; Botryllus +Botryococcus - 1 1 11 38880 PLN Eukaryota; Viridiplantae; Chlorophyta; Trebouxiophyceae; Elliptochloris clade; Botryococcus +Botryococcus braunii - 1 1 11 38881 PLN Eukaryota; Viridiplantae; Chlorophyta; Trebouxiophyceae; Elliptochloris clade; Botryococcus +Brachypodium false bromes 1 1 11 15367 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Brachypodieae; Brachypodium +Brachypodium distachyon stiff brome 1 1 11 15368 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Brachypodieae; Brachypodium +Bragasellus molinai - 1 5 - 1281925 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Bragasellus +Bragasellus peltatus - 1 5 - 1282048 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Bragasellus +Branchiostoma - 1 5 - 7737 INV Eukaryota; Metazoa; Chordata; Cephalochordata; Branchiostomidae; Branchiostoma +Branchiostoma floridae Florida lancelet 1 5 - 7739 INV Eukaryota; Metazoa; Chordata; Cephalochordata; Branchiostomidae; Branchiostoma +Brassica - 1 1 11 3705 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica +Brassica juncea - 1 1 11 3707 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica +Brassica napus rape 1 1 11 3708 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica +Brassica oleracea wild cabbage 1 1 11 3712 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica +Brassica oleracea var. oleracea - 1 1 11 109376 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica +Brassica oleracea var. viridis kale 1 1 11 3713 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica +Brassica rapa field mustard 1 1 11 3711 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica +Brassica rapa subsp. pekinensis Chinese cabbage 1 1 11 51351 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Brassica +Breviolum - 1 4 11 2499524 PLN Eukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Breviolum +Breviolum minutum - 1 4 11 2499525 PLN Eukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Breviolum; Breviolum minutum +Brugia malayi - 1 5 - 6279 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Spirurina; Spiruromorpha; Filarioidea; Onchocercidae; Brugia +Bubalus bubalis water buffalo 1 2 - 89462 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Bovinae; Bubalus +Buceros rhinoceros silvestris - 1 2 - 175836 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Bucerotiformes; Bucerotidae; Buceros +Burkholderia cenocepacia - 11 0 - 95486 BCT Bacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Burkholderiaceae; Burkholderia; Burkholderia cepacia complex +Burkholderia pseudomallei - 11 0 - 28450 BCT Bacteria; Proteobacteria; Betaproteobacteria; Burkholderiales; Burkholderiaceae; Burkholderia; pseudomallei group +Caenorhabditis - 1 5 - 6237 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis +Caenorhabditis elegans - 1 5 - 6239 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis +Cajanus - 1 1 11 3820 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Cajanus +Cajanus cajan pigeon pea 1 1 11 3821 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Cajanus +Calanus finmarchicus - 1 5 - 6837 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Hexanauplia; Copepoda; Calanoida; Calanidae; Calanus +Calanus glacialis - 1 5 - 113644 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Hexanauplia; Copepoda; Calanoida; Calanidae; Calanus +Callithrix - 1 2 - 9481 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Platyrrhini; Cebidae; Callitrichinae; Callithrix +Callithrix jacchus white-tufted-ear marmoset 1 2 - 9483 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Platyrrhini; Cebidae; Callitrichinae; Callithrix; Callithrix +Callorhinchus - 1 2 - 7866 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Chondrichthyes; Holocephali; Chimaeriformes; Callorhinchidae; Callorhinchus +Callorhinchus milii elephant shark 1 2 - 7868 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Chondrichthyes; Holocephali; Chimaeriformes; Callorhinchidae; Callorhinchus +Calvadosia cruxmelitensis - 1 4 - 1843192 INV Eukaryota; Metazoa; Cnidaria; Staurozoa; Stauromedusae; Amyostaurida; Kishinouyeidae; Calvadosia +Camelina sativa false flax 1 1 11 90675 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Camelina +Camellia sinensis - 1 1 11 4442 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Theaceae; Camellia +Camellia sinensis var. sinensis - 1 1 11 542762 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Theaceae; Camellia +Camelus bactrianus Bactrian camel 1 2 - 9837 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Camelus +Camelus dromedarius Arabian camel 1 2 - 9838 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Camelus +Camptotheca acuminata - 1 1 11 16922 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; Cornales; Nyssaceae; Camptotheca +Campylobacter jejuni - 11 0 - 197 BCT Bacteria; Proteobacteria; Epsilonproteobacteria; Campylobacterales; Campylobacteraceae; Campylobacter +Canis - 1 2 - 9611 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Canidae; Canis +Canis lupus dingo dingo 1 2 - 286419 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Canidae; Canis +Canis lupus familiaris dog 1 2 - 9615 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Canidae; Canis +Capitella - 1 5 - 51293 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Scolecida; Capitellida; Capitellidae; Capitella +Capitella teleta - 1 5 - 283909 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Scolecida; Capitellida; Capitellidae; Capitella +Capra hircus goat 1 2 - 9925 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Capra +Capsicum peppers 1 1 11 4071 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Capsiceae; Capsicum +Capsicum annuum - 1 1 11 4072 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Capsiceae; Capsicum +Carassius auratus goldfish 1 2 - 7957 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Carassius +Cariama cristata Red-legged seriema 1 2 - 54380 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Cariamiformes; Cariamidae; Cariama +Carica - 1 1 11 3648 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Caricaceae; Carica +Carica papaya papaya 1 1 11 3649 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Caricaceae; Carica +Caridina multidentata - 1 5 - 293153 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Caridea; Atyoidea; Atyidae; Caridina +Carlito syrichta Philippine tarsier 1 2 - 1868482 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Tarsiiformes; Tarsiidae; Carlito +Catharanthus roseus Madagascar periwinkle 1 1 11 4058 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Apocynaceae; Rauvolfioideae; Vinceae; Catharanthinae; Catharanthus +Cavia porcellus domestic guinea pig 1 2 - 10141 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Hystricomorpha; Caviidae; Cavia +Cenchrus americanus - 1 1 11 4543 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Cenchrus +Centaurea - 1 1 11 41503 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Carduoideae; Cardueae; Centaureinae; Centaurea +Ceratotherium simum simum southern white rhinoceros 1 2 - 73337 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Perissodactyla; Rhinocerotidae; Ceratotherium +Cercocebus atys sooty mangabey 1 2 - 9531 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Cercocebus +Chelonia - 1 2 - 8468 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Durocryptodira; Americhelydia; Chelonioidea; Cheloniidae; Chelonia +Chelonia mydas Green sea turtle 1 2 - 8469 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Durocryptodira; Americhelydia; Chelonioidea; Cheloniidae; Chelonia +Cherax quadricarinatus - 1 5 - 27406 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Parastacoidea; Parastacidae; Cherax +Chinchilla lanigera long-tailed chinchilla 1 2 - 34839 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Hystricomorpha; Chinchillidae; Chinchilla +Chlamydia trachomatis - 11 0 - 813 BCT Bacteria; Chlamydiae; Chlamydiales; Chlamydiaceae; Chlamydia/Chlamydophila group; Chlamydia +Chlamydomonas - 1 1 11 3052 PLN Eukaryota; Viridiplantae; Chlorophyta; Chlorophyceae; Chlamydomonadales; Chlamydomonadaceae; Chlamydomonas +Chlamydomonas reinhardtii - 1 1 11 3055 PLN Eukaryota; Viridiplantae; Chlorophyta; Chlorophyceae; Chlamydomonadales; Chlamydomonadaceae; Chlamydomonas +Chlamydotis macqueenii Macqueen's bustard 1 2 - 187382 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Otididae; Chlamydotis +Chlorocebus - 1 2 - 392815 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Chlorocebus +Chlorocebus aethiops grivet 1 2 - 9534 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Chlorocebus +Chlorocebus sabaeus green monkey 1 2 - 60711 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Chlorocebus +Chorispora bungeana - 1 1 11 238895 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Chorisporeae; Chorispora +Chromolaena odorata - 1 1 11 103745 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Heliantheae alliance; Eupatorieae; Chromolaena +Chrysemys picta bellii western painted turtle 1 2 - 8478 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Durocryptodira; Testudinoidea; Emydidae; Chrysemys +Chrysochloris asiatica Cape golden mole 1 2 - 185453 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Afrotheria; Chrysochloridae; Chrysochlorinae; Chrysochloris +Cicer - 1 1 11 3826 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Cicereae; Cicer +Cicer arietinum chickpea 1 1 11 3827 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Cicereae; Cicer +Ciona - 1 13 - 7718 INV Eukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Enterogona; Phlebobranchia; Cionidae; Ciona +Ciona intestinalis vase tunicate 1 13 - 7719 INV Eukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Enterogona; Phlebobranchia; Cionidae; Ciona +Ciona savignyi Pacific transparent sea squirt 1 13 - 51511 INV Eukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Enterogona; Phlebobranchia; Cionidae; Ciona +Citrus - 1 1 11 2706 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Rutaceae; Aurantioideae; Citrus +Citrus clementina - 1 1 11 85681 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Rutaceae; Aurantioideae; Citrus +Citrus sinensis sweet orange 1 1 11 2711 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Rutaceae; Aurantioideae; Citrus +Clonorchis - 1 9 - 79922 INV Eukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Opisthorchiida; Opisthorchiata; Opisthorchiidae; Clonorchis +Clonorchis sinensis - 1 9 - 79923 INV Eukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Opisthorchiida; Opisthorchiata; Opisthorchiidae; Clonorchis +Clostridioides difficile - 11 0 - 1496 BCT Bacteria; Firmicutes; Clostridia; Clostridiales; Peptostreptococcaceae; Clostridioides +Clytia - 1 4 - 13436 INV Eukaryota; Metazoa; Cnidaria; Hydrozoa; Hydroidolina; Leptothecata; Campanulariidae; Clytia +Clytia hemisphaerica - 1 4 - 252671 INV Eukaryota; Metazoa; Cnidaria; Hydrozoa; Hydroidolina; Leptothecata; Campanulariidae; Clytia +Coccidioides - 1 4 - 5500 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Eurotiomycetes; Eurotiomycetidae; Onygenales; Onygenales incertae sedis; Coccidioides +Coccidioides posadasii - 1 4 - 199306 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Eurotiomycetes; Eurotiomycetidae; Onygenales; Onygenales incertae sedis; Coccidioides +Cocos nucifera coconut palm 1 1 11 13894 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Arecaceae; Arecoideae; Cocoseae; Attaleinae; Cocos +Coffea - 1 1 11 13442 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Ixoroideae; Gardenieae complex; Bertiereae - Coffeeae clade; Coffeeae; Coffea +Coffea arabica coffee 1 1 11 13443 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Ixoroideae; Gardenieae complex; Bertiereae - Coffeeae clade; Coffeeae; Coffea +Coffea canephora - 1 1 11 49390 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Ixoroideae; Gardenieae complex; Bertiereae - Coffeeae clade; Coffeeae; Coffea +Colius striatus speckled mousebird 1 2 - 57412 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coliiformes; Coliidae; Colius +Colletotrichum - 1 4 - 5455 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Hypocreomycetidae; Glomerellales; Glomerellaceae; Colletotrichum +Colletotrichum graminicola - 1 4 - 31870 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Hypocreomycetidae; Glomerellales; Glomerellaceae; Colletotrichum +Condylura cristata star-nosed mole 1 2 - 143302 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Eulipotyphla; Talpidae; Condylura +Coptotermes - 1 5 - 36986 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Dictyoptera; Blattodea; Blattoidea; Termitoidae; Rhinotermitidae; Coptotermes +Coptotermes formosanus Formosan subterranean termite 1 5 - 36987 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Dictyoptera; Blattodea; Blattoidea; Termitoidae; Rhinotermitidae; Coptotermes +Crassostrea - 1 5 - 6564 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Ostreoida; Ostreoidea; Ostreidae; Crassostrea +Crassostrea gigas Pacific oyster 1 5 - 29159 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Ostreoida; Ostreoidea; Ostreidae; Crassostrea +Cricetulus griseus Chinese hamster 1 2 - 10029 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Cricetidae; Cricetinae; Cricetulus +Cryptomeria - 1 1 11 3368 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Cupressales; Cupressaceae; Cryptomeria +Cryptomeria japonica Japanese cedar 1 1 11 3369 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Cupressales; Cupressaceae; Cryptomeria +Cryptosporidium - 1 4 4 5806 INV Eukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Cryptosporidiidae; Cryptosporidium +Cryptotermes secundus - 1 5 - 105785 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Dictyoptera; Blattodea; Blattoidea; Termitoidae; Kalotermitidae; Cryptotermitinae; Cryptotermes +Cucumis - 1 1 11 3655 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Cucumis +Cucumis melo muskmelon 1 1 11 3656 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Cucumis +Cucumis sativus cucumber 1 1 11 3659 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Cucumis +Culex - 1 5 - 7174 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Culicinae; Culicini; Culex +Culex quinquefasciatus southern house mosquito 1 5 - 7176 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Nematocera; Culicoidea; Culicidae; Culicinae; Culicini; Culex; Culex +Cuscuta pentagona - 1 1 11 112407 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Cuscuteae; Cuscuta; Grammica; Cuscuta sect. Cleistogrammica +Cynara cardunculus var. scolymus - 1 1 11 59895 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Carduoideae; Cardueae; Carduinae; Cynara +Cynoglossus semilaevis tongue sole 1 2 - 244447 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Carangaria; Pleuronectiformes; Pleuronectoidei; Cynoglossidae; Cynoglossinae; Cynoglossus +Cynopterus sphinx Indian short-nosed fruit bat 1 2 - 9400 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Cynopterus +Cyprinus - 1 2 - 7961 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Cyprinus +Cyprinus carpio common carp 1 2 - 7962 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Cyprinus +Danio - 1 2 - 7954 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Danio +Danio rerio zebrafish 1 2 - 7955 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Danio +Daphnia common water fleas 1 5 - 6668 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Branchiopoda; Diplostraca; Cladocera; Anomopoda; Daphniidae; Daphnia +Daphnia magna - 1 5 - 35525 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Branchiopoda; Diplostraca; Cladocera; Anomopoda; Daphniidae; Daphnia +Daphnia pulex common water flea 1 5 - 6669 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Branchiopoda; Diplostraca; Cladocera; Anomopoda; Daphniidae; Daphnia +Dasypus novemcinctus nine-banded armadillo 1 2 - 9361 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Xenarthra; Cingulata; Dasypodidae; Dasypus +Dendrobium catenatum - 1 1 11 906689 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae; Epidendroideae; Malaxideae; Dendrobiinae; Dendrobium +Dendroctonus - 1 5 - 77156 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Coleoptera; Polyphaga; Cucujiformia; Curculionidae; Scolytinae; Dendroctonus +Dendroctonus ponderosae mountain pine beetle 1 5 - 77166 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Coleoptera; Polyphaga; Cucujiformia; Curculionidae; Scolytinae; Dendroctonus +Diacamma sp. Okinawa-2006a - 1 5 - 655490 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Ponerinae; Ponerini; Diacamma +Diaphorina citri Asian citrus psyllid 1 5 - 121845 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Psylloidea; Liviidae; Diaphorina +Dicentrarchus labrax European seabass 1 2 - 13489 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Moronidae; Dicentrarchus +Dicrocoelium dendriticum - 1 9 - 57078 INV Eukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Plagiorchiida; Xiphidiata; Gorgoderoidea; Dicrocoeliidae; Dicrocoelium +Dictyostelium - 1 1 11 5782 INV Eukaryota; Amoebozoa; Mycetozoa; Dictyostelids; Dictyosteliales; Dictyosteliaceae; Dictyostelium +Dictyostelium discoideum - 1 1 11 44689 INV Eukaryota; Amoebozoa; Mycetozoa; Dictyostelids; Dictyosteliales; Dictyosteliaceae; Dictyostelium +Diospyros lotus - 1 1 11 55363 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Ebenaceae; Diospyros +Diploscapter - 1 5 - 55799 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Rhabditoidea; Rhabditidae; Diploscapter +Diploscapter coronatus - 1 5 - 288516 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Rhabditomorpha; Rhabditoidea; Rhabditidae; Diploscapter +Dipodomys ordii Ord's kangaroo rat 1 2 - 10020 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Castorimorpha; Heteromyidae; Dipodomyinae; Dipodomys +Dorcoceras hygrometricum - 1 1 11 472368 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Gesneriaceae; Didymocarpoideae; Trichosporeae; Loxocarpinae; Dorcoceras +Drosophila fruit flies 1 5 - 7215 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila +Drosophila ananassae - 1 5 - 7217 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila; Sophophora +Drosophila melanogaster fruit fly 1 5 - 7227 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila; Sophophora +Drosophila sechellia - 1 5 - 7238 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila; Sophophora +Drosophila simulans - 1 5 - 7240 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Ephydroidea; Drosophilidae; Drosophila; Sophophora +Dugesia japonica - 1 9 - 6161 INV Eukaryota; Metazoa; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Geoplanoidea; Dugesiidae; Dugesia +Echinarachnius parma - 1 9 - 869203 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Gnathostomata; Clypeasteroida; Echinarachniidae; Echinarachnius +Echinaster spinulosus - 1 9 - 1451296 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Spinulosacea; Spinulosida; Echinasteridae; Echinaster +Echinococcus multilocularis - 1 9 - 6211 INV Eukaryota; Metazoa; Platyhelminthes; Cestoda; Eucestoda; Cyclophyllidea; Taeniidae; Echinococcus +Echinops telfairi small Madagascar hedgehog 1 2 - 9371 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Afrotheria; Tenrecidae; Tenrecinae; Echinops +Eimeria - 1 4 4 5800 INV Eukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Eimeriidae; Eimeria +Elaeis guineensis African oil palm 1 1 11 51953 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Arecaceae; Arecoideae; Cocoseae; Elaeidinae; Elaeis +Elephantulus edwardii Cape elephant shrew 1 2 - 28737 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Afrotheria; Macroscelidea; Macroscelididae; Elephantulus +Elliptio complanata eastern elliptio 1 5 - 55832 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Bivalvia; Palaeoheterodonta; Unionoida; Unionoidea; Unionidae; Ambleminae; Elliptio +Elodea nuttallii - 1 1 11 55313 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Hydrocharitaceae; Elodea +Emiliania - 1 4 11 2902 PLN Eukaryota; Haptophyceae; Isochrysidales; Noelaerhabdaceae; Emiliania +Emiliania huxleyi - 1 4 11 2903 PLN Eukaryota; Haptophyceae; Isochrysidales; Noelaerhabdaceae; Emiliania +Entamoeba - 1 1 11 5758 INV Eukaryota; Amoebozoa; Archamoebae; Entamoebidae; Entamoeba +Entamoeba histolytica - 1 1 11 5759 INV Eukaryota; Amoebozoa; Archamoebae; Entamoebidae; Entamoeba +Enterococcus faecalis - 11 0 - 1351 BCT Bacteria; Firmicutes; Bacilli; Lactobacillales; Enterococcaceae; Enterococcus +Enterococcus faecium - 11 0 - 1352 BCT Bacteria; Firmicutes; Bacilli; Lactobacillales; Enterococcaceae; Enterococcus +Eptesicus fuscus big brown bat 1 2 - 29078 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Eptesicus +Equus caballus horse 1 2 - 9796 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Perissodactyla; Equidae; Equus +Equus przewalskii Przewalski's horse 1 2 - 9798 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Perissodactyla; Equidae; Equus +Erinaceus europaeus western European hedgehog 1 2 - 9365 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Eulipotyphla; Erinaceidae; Erinaceinae; Erinaceus +Eriocheir sinensis Chinese mitten crab 1 5 - 95602 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Brachyura; Eubrachyura; Grapsoidea; Varunidae; Eriocheir +Erythranthe - 1 1 11 1502711 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Phrymaceae; Erythranthe +Erythranthe guttata spotted monkey flower 1 1 11 4155 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Phrymaceae; Erythranthe +Escherichia - 11 0 - 561 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Escherichia +Escherichia coli - 11 0 - 562 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Escherichia +Escherichia coli O26:H11 - 11 0 - 244319 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Escherichia +Esox lucius northern pike 1 2 - 8010 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Esociformes; Esocidae; Esox +Eucalyptus - 1 1 11 3932 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Myrtaceae; Myrtoideae; Eucalypteae; Eucalyptus +Eucalyptus camaldulensis Murray red gum 1 1 11 34316 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Myrtaceae; Myrtoideae; Eucalypteae; Eucalyptus +Eucalyptus grandis - 1 1 11 71139 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Myrtaceae; Myrtoideae; Eucalypteae; Eucalyptus +Eucidaris tribuloides - 1 9 - 7632 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Perischoechinoidea; Cidaroida; Cidaridae; Eucidaris +Eufriesea mexicana - 1 5 - 516756 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Eufriesea +Eurypyga helias sunbittern 1 2 - 54383 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Eurypygidae; Eurypyga +Exaiptasia pallida - 1 4 - 1720309 INV Eukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Aiptasiidae; Exaiptasia +Extatosoma tiaratum giant prickly stick insect 1 5 - 7024 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Tropidoderinae; Extatosoma +Fagopyrum esculentum common buckwheat 1 1 11 3617 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Polygonaceae; Polygonoideae; Fagopyreae; Fagopyrum +Fagus sylvatica European beech 1 1 11 28930 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Fagus +Falco falcons 1 2 - 8952 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Falconidae; Falco +Fasciola hepatica liver fluke 1 9 - 6192 INV Eukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Plagiorchiida; Echinostomata; Echinostomatoidea; Fasciolidae; Fasciola +Felis catus domestic cat 1 2 - 9685 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Felinae; Felis +Festuca - 1 1 11 4605 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Loliinae; Festuca +Festuca arundinacea - 1 1 11 4606 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Loliinae; Festuca +Ficus carica common fig 1 1 11 3494 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Moraceae; Ficus fish metagenome - 11 2 11 496924 ENV unclassified sequences; metagenomes; organismal metagenomes -Folsomia candida - 1 5 - 158441 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Collembola; Collembola; Entomobryomorpha; Isotomoidea; Isotomidae; Proisotominae; Folsomia -Fopius arisanus - 1 5 - 64838 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Ichneumonoidea; Braconidae; Opiinae; Fopius -Formica aquilonia - 1 5 - 258703 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica -Formica cinerea - 1 5 - 609761 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica -Formica exsecta - 1 5 - 72781 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica -Formica fusca - 1 5 - 72779 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica -Formica pratensis - 1 5 - 221681 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica -Formica pressilabris - 1 5 - 609858 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica -Formica truncorum - 1 5 - 72783 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Formicini; Formica -Fragaria vesca subsp. vesca - 1 1 11 101020 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Rosoideae; Potentilleae; Fragariinae; Fragaria; Fragaria vesca -Fragaria x ananassa strawberry 1 1 11 3747 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Rosoideae; Potentilleae; Fragariinae; Fragaria -Frankliniella occidentalis western flower thrips 1 5 - 133901 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Thysanoptera; Terebrantia; Thripoidea; Thripidae; Thripinae; Frankliniella -Fraxinus excelsior European ash 1 1 11 38873 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Oleaceae; Oleeae; Fraxinus -Fukomys damarensis Damara mole-rat 1 2 - 885580 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Bathyergidae; Fukomys -Fulmarus glacialis northern fulmar 1 2 - 30455 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Procellariiformes; Procellariidae; Procellariinae; Fulmarus -Fundulus grandis Gulf killifish 1 2 - 34779 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Fundulidae; Fundulus -Fundulus heteroclitus mummichog 1 2 - 8078 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Fundulidae; Fundulus -Gadus morhua Atlantic cod 1 2 - 8049 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Paracanthomorphacea; Zeiogadaria; Gadariae; Gadiformes; Gadoidei; Gadidae; Gadus -Galeopterus variegatus Sunda flying lemur 1 2 - 482537 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Dermoptera; Cynocephalidae; Galeopterus -Gallus gallus chicken 1 2 - 9031 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Phasianinae; Gallus -Gammarus chevreuxi - 1 5 - 732109 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Amphipoda; Senticaudata; Gammarida; Gammaridira; Gammaroidea; Gammaridae; Gammarus -Gardenia jasminoides - 1 1 11 114476 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Ixoroideae; Gardenieae; Gardenia -Gavia stellata red-throated loon 1 2 - 37040 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gaviiformes; Gaviidae; Gavia -Gavialis gangeticus Gharial 1 2 - 94835 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Crocodylia; Longirostres; Gavialidae; Gavialinae; Gavialis -Gekko japonicus - 1 2 - 146911 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Gekkota; Gekkonidae; Gekkoninae; Gekko -Geminigera cryophila - 1 1 11 46947 PLN cellular organisms; Eukaryota; Cryptophyta; Pyrenomonadales; Geminigeraceae; Geminigera +Formica aquilonia - 1 5 - 258703 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica +Formica cinerea - 1 5 - 609761 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica +Formica exsecta - 1 5 - 72781 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica +Formica fusca - 1 5 - 72779 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica +Formica pratensis - 1 5 - 221681 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica +Formica pressilabris - 1 5 - 609858 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica +Formica truncorum - 1 5 - 72783 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Formica +Fragaria x ananassa strawberry 1 1 11 3747 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Rosoideae; Potentilleae; Fragariinae; Fragaria +Frankliniella occidentalis western flower thrips 1 5 - 133901 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Thysanoptera; Terebrantia; Thripoidea; Thripidae; Frankliniella +Fukomys damarensis Damara mole-rat 1 2 - 885580 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Hystricomorpha; Bathyergidae; Fukomys +Fulmarus glacialis Northern fulmar 1 2 - 30455 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Procellariiformes; Procellariidae; Fulmarus +Fundulus - 1 2 - 8077 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Fundulidae; Fundulus +Fundulus grandis Gulf killifish 1 2 - 34779 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Fundulidae; Fundulus +Fundulus heteroclitus mummichog 1 2 - 8078 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Fundulidae; Fundulus +Fusarium - 1 4 - 5506 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Hypocreomycetidae; Hypocreales; Nectriaceae; Fusarium +Fusarium verticillioides - 1 4 - 117187 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Hypocreomycetidae; Hypocreales; Nectriaceae; Fusarium; Fusarium fujikuroi species complex +Gadus - 1 2 - 8048 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Zeiogadaria; Gadariae; Gadiformes; Gadoidei; Gadidae; Gadus +Gadus morhua Atlantic cod 1 2 - 8049 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Zeiogadaria; Gadariae; Gadiformes; Gadoidei; Gadidae; Gadus +Galeopterus variegatus Sunda flying lemur 1 2 - 482537 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Dermoptera; Cynocephalidae; Galeopterus +Gallus - 1 2 - 9030 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Phasianinae; Gallus +Gallus gallus chicken 1 2 - 9031 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Phasianinae; Gallus +Gardenia jasminoides - 1 1 11 114476 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Ixoroideae; Gardenieae complex; Gardenieae - Pavetteae clade; Gardenieae; Gardenia +Gasterosteus - 1 2 - 69292 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Perciformes; Cottioidei; Gasterosteales; Gasterosteidae; Gasterosteus +Gasterosteus aculeatus three-spined stickleback 1 2 - 69293 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Perciformes; Cottioidei; Gasterosteales; Gasterosteidae; Gasterosteus +Gavia stellata red-throated loon 1 2 - 37040 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gaviiformes; Gaviidae; Gavia +Gekko japonicus - 1 2 - 146911 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Gekkota; Gekkonidae; Gekkoninae; Gekko Gene trapping vector VICTR76 - 11 0 11 447635 SYN other sequences; artificial sequences; vectors -Gentiana macrophylla - 1 1 11 50765 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Gentianaceae; Gentianeae; Gentiana -Geospiza fortis medium ground-finch 1 2 - 48883 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Thraupidae; Geospiza -Gerbera hybrid cultivar - 1 1 11 18101 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Mutisioideae; Mutisieae; Gerbera -Gigaspora margarita - 1 4 - 4874 PLN cellular organisms; Eukaryota; Opisthokonta; Fungi; Mucoromycota; Glomeromycotina; Glomeromycetes; Diversisporales; Gigasporaceae; Gigaspora -Glomeris pustulata - 1 5 - 1288506 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Myriapoda; Diplopoda; Pentazonia; Glomerida; Glomeridae; Glomeris -Glossoscolex paulistus - 1 5 - 1046353 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Annelida; Clitellata; Oligochaeta; Haplotaxida; Lumbricina; Glossoscolecidae; Glossoscolex -Glycera dibranchiata - 1 5 - 6350 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Annelida; Polychaeta; Palpata; Aciculata; Phyllodocida; Glyceridae; Glycera -Glycine max soybean 1 1 11 3847 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Glycine; Soja -Gongylonema pulchrum - 1 5 - 637853 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Spirurida; Spiruroidea; Gongylonematidae; Gongylonema -Gorilla gorilla gorilla western lowland gorilla 1 2 - 9595 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Gorilla; Gorilla gorilla -Gossypium arboreum - 1 1 11 29729 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium -Gossypium hirsutum - 1 1 11 3635 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium -Gossypium raimondii - 1 1 11 29730 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium -Graminella nigrifrons - 1 5 - 30127 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Clypeorrhyncha; Membracoidea; Cicadellidae; Deltocephalinae; Graminella +Geranium pyrenaicum - 1 1 11 379953 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Geraniales; Geraniaceae; Geranium +Geranium robertianum - 1 1 11 122183 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Geraniales; Geraniaceae; Geranium +Gigaspora margarita - 1 4 - 4874 PLN Eukaryota; Fungi; Fungi incertae sedis; Mucoromycota; Glomeromycotina; Glomeromycetes; Diversisporales; Gigasporaceae; Gigaspora +Glossoscolex paulistus - 1 5 - 1046353 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Oligochaeta; Haplotaxida; Lumbricina; Glossoscolecidae; Glossoscolex +Glycera dibranchiata - 1 5 - 6350 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Palpata; Aciculata; Phyllodocida; Glyceridae; Glycera +Glycine - 1 1 11 3846 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine +Glycine canescens - 1 1 11 48924 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine +Glycine cyrtoloba - 1 1 11 45689 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine +Glycine dolichocarpa - 1 1 11 82538 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine +Glycine falcata - 1 1 11 45690 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine +Glycine max soybean 1 1 11 3847 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Soja +Glycine soja - 1 1 11 3848 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Soja +Glycine stenophita - 1 1 11 96944 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine +Glycine syndetika - 1 1 11 713886 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine +Glycine tomentella - 1 1 11 44015 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Glycine; Glycine +Gorilla gorilla gorilla western lowland gorilla 1 2 - 9595 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Gorilla +Gossypium - 1 1 11 3633 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium +Gossypium arboreum - 1 1 11 29729 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium +Gossypium hirsutum cotton 1 1 11 3635 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium +Gossypium raimondii - 1 1 11 29730 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Malvoideae; Gossypium gut metagenome - 11 2 11 749906 ENV unclassified sequences; metagenomes; organismal metagenomes -Habropoda laboriosa - 1 5 - 597456 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Anthophorinae; Anthophorini; Habropoda -Haliaeetus albicilla white-tailed eagle 1 2 - 8969 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Accipitridae; Accipitrinae; Haliaeetus -Halyomorpha halys brown marmorated stink bug 1 5 - 286706 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Neohemiptera; Prosorrhyncha; Heteroptera; Euheteroptera; Neoheteroptera; Panheteroptera; Pentatomomorpha; Pentatomoidea; Pentatomidae; Pentatominae; Halyomorpha -Hammondia hammondi - 1 4 4 99158 INV cellular organisms; Eukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Sarcocystidae; Hammondia -Haplochromis burtoni Burton's mouthbrooder 1 2 - 8153 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Haplochromis -Harpegnathos saltator Jerdon's jumping ant 1 5 - 610380 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Ponerinae; Ponerini; Harpegnathos -Helicobacter pylori - 11 0 - 210 BCT cellular organisms; Bacteria; Proteobacteria; delta/epsilon subdivisions; Epsilonproteobacteria; Campylobacterales; Helicobacteraceae; Helicobacter -Helicoverpa assulta - 1 5 - 52344 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Heliothinae; Helicoverpa -Heligmosomoides polygyrus - 1 5 - 6339 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Trichostrongyloidea; Heligmosomatidae; Heligmosomoides -Henricia sp. AR-2014 - 1 9 - 1462731 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Spinulosacea; Spinulosida; Echinasteridae; Henricia +Haemophilus influenzae - 11 0 - 727 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Pasteurellales; Pasteurellaceae; Haemophilus +Haliaeetus albicilla white-tailed eagle 1 2 - 8969 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Falconiformes; Accipitridae; Accipitrinae; Haliaeetus +Haliclystus sanjuanensis - 1 4 - 168739 INV Eukaryota; Metazoa; Cnidaria; Staurozoa; Stauromedusae; Myostaurida; Haliclystidae; Haliclystus +Halocynthia - 1 13 - 7728 INV Eukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Pyuridae; Halocynthia +Halocynthia roretzi - 1 13 - 7729 INV Eukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Pyuridae; Halocynthia +Haplochromis burtoni Burton's mouthbrooder 1 2 - 8153 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Haplochromis +Helianthus - 1 1 11 4231 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Heliantheae alliance; Heliantheae; Helianthus +Helianthus annuus common sunflower 1 1 11 4232 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Heliantheae alliance; Heliantheae; Helianthus +Helicobacter pylori - 11 0 - 210 BCT Bacteria; Proteobacteria; Epsilonproteobacteria; Campylobacterales; Helicobacteraceae; Helicobacter +Helicoverpa armigera cotton bollworm 1 5 - 29058 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Heliothinae; Helicoverpa +Helobdella - 1 5 - 6411 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Hirudinea; Hirudinida; Glossiphoniiformes; Glossiphoniidae; Helobdella +Helobdella robusta - 1 5 - 6412 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Hirudinea; Hirudinida; Glossiphoniiformes; Glossiphoniidae; Helobdella +Hemicentrotus pulcherrimus - 1 9 - 7650 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Strongylocentrotidae; Hemicentrotus +Henricia sp. AR-2014 - 1 9 - 1462731 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Spinulosacea; Spinulosida; Echinasteridae; Henricia +Hepacivirus C - 1 0 - 11103 VRL Viruses; ssRNA viruses; ssRNA positive-strand viruses, no DNA stage; Flaviviridae; Hepacivirus Hepatitis B virus - 1 0 - 10407 VRL Viruses; Retro-transcribing viruses; Hepadnaviridae; Orthohepadnavirus -Hepatitis C virus - 1 0 - 11103 VRL Viruses; ssRNA viruses; ssRNA positive-strand viruses, no DNA stage; Flaviviridae; Hepacivirus -Heterocephalus glaber naked mole-rat 1 2 - 10181 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Bathyergidae; Heterocephalus -Heterodera glycines soybean cyst nematode 1 5 - 51029 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Tylenchida; Tylenchina; Tylenchoidea; Heteroderidae; Heteroderinae; Heterodera -Hevea brasiliensis - 1 1 11 3981 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Micrandreae; Hevea -Homalodisca liturata - 1 5 - 320908 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Clypeorrhyncha; Membracoidea; Cicadellidae; Cicadellinae; unclassified Cicadellinae; Homalodisca -Homalodisca vitripennis glassy-winged sharpshooter 1 5 - 197043 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Clypeorrhyncha; Membracoidea; Cicadellidae; Cicadellinae; unclassified Cicadellinae; Homalodisca -Homo sapiens human 1 2 - 9606 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Homo -Hordeum pubiflorum - 1 1 11 112521 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum -Hordeum vulgare - 1 1 11 4513 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum -Hordeum vulgare subsp. vulgare domesticated barley 1 1 11 112509 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum; Hordeum vulgare +Heterocephalus glaber naked mole-rat 1 2 - 10181 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Hystricomorpha; Bathyergidae; Heterocephalus +Hevea brasiliensis rubber tree 1 1 11 3981 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Micrandreae; Hevea +Hippocampus comes tiger tail seahorse 1 2 - 109280 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Syngnathiaria; Syngnathiformes; Syngnathoidei; Syngnathidae; Hippocampus +Hirudo - 1 5 - 6420 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Hirudinea; Hirudinida; Hirudiniformes; Hirudinidae; Hirudo +Hirudo medicinalis medicinal leech 1 5 - 6421 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Hirudinea; Hirudinida; Hirudiniformes; Hirudinidae; Hirudo +Homo - 1 2 - 9605 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Homo +Homo sapiens human 1 2 - 9606 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Homo +Hordeum - 1 1 11 4512 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum +Hordeum pubiflorum - 1 1 11 112521 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum +Hordeum vulgare - 1 1 11 4513 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum +Hordeum vulgare subsp. vulgare domesticated barley 1 1 11 112509 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Hordeum +Hucho taimen taimen 1 2 - 201498 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Hucho human gut metagenome - 11 2 11 408170 ENV unclassified sequences; metagenomes; organismal metagenomes -Human immunodeficiency virus 1 - 1 0 - 11676 VRL Viruses; Retro-transcribing viruses; Retroviridae; Orthoretrovirinae; Lentivirus; Primate lentivirus group -Humulus lupulus European hop 1 1 11 3486 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus -Humulus lupulus var. cordifolius - 1 1 11 278022 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus; Humulus lupulus -Humulus lupulus var. lupulus - 1 1 11 1571165 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus; Humulus lupulus -Hyalella azteca - 1 5 - 294128 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Amphipoda; Senticaudata; Talitrida; Talitroidea; Hyalellidae; Hyalella -Hyas araneus - 1 5 - 361634 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Brachyura; Eubrachyura; Heterotremata; Majoidea; Majidae; Hyas -Hydra vulgaris - 1 4 - 6087 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Aplanulata; Hydridae; Hydra -Hydractinia symbiolongicarpus - 1 4 - 13093 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Filifera; Hydractiniidae; Hydractinia -Hynobius chinensis Chinese salamander 1 2 - 288313 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Caudata; Cryptobranchoidea; Hynobiidae; Hynobius; Hynobius -Hynobius retardatus Hokkaido salamander 1 2 - 36312 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Caudata; Cryptobranchoidea; Hynobiidae; Hynobius; Satobius -Hypsizygus marmoreus - 1 4 - 39966 PLN cellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Basidiomycota; Agaricomycotina; Agaricomycetes; Agaricomycetidae; Agaricales; Lyophyllaceae; Hypsizygus -Ictalurus punctatus channel catfish 1 2 - 7998 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Characiphysae; Siluriformes; Siluroidei; Ictaluridae; Ictalurus -Ictidomys tridecemlineatus thirteen-lined ground squirrel 1 2 - 43179 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Sciuridae; Xerinae; Marmotini; Ictidomys -Ipomoea batatas sweet potato 1 1 11 4120 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea -Ipomoea nil Japanese morning glory 1 1 11 35883 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea -Ipomoea purpurea common morning-glory 1 1 11 4121 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea -Ipomoea trifida - 1 1 11 35884 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea -Ixodes ricinus castor bean tick 1 5 - 34613 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Ixodinae; Ixodes -Ixodes scapularis black-legged tick 1 5 - 6945 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Ixodinae; Ixodes -Jaculus jaculus lesser Egyptian jerboa 1 2 - 51337 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Dipodidae; Dipodinae; Jaculus -Jatropha curcas - 1 1 11 180498 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Jatropheae; Jatropha -Juglans regia English walnut 1 1 11 51240 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Juglandaceae; Juglans -Karelinia caspia - 1 1 11 313960 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Inuleae; Plucheinae; Karelinia -Kerria lacca common lac scale 1 5 - 473130 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Aphidiformes; Coccoidea; Kerriidae; Kerria -Klebsiella pneumoniae - 11 0 - 573 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Klebsiella -Kryptolebias marmoratus mangrove rivulus 1 2 - 37003 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Rivulidae; Kryptolebias -Lactuca sativa - 1 1 11 4236 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Cichorioideae; Cichorieae; Lactucinae; Lactuca -Lactuca serriola - 1 1 11 75943 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Cichorioideae; Cichorieae; Lactucinae; Lactuca -Lagenaria siceraria white-flowered gourd 1 1 11 3668 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Benincaseae; Lagenaria -Landoltia punctata - 1 1 11 50518 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Alismatales; Araceae; Lemnoideae; Landoltia -Larimichthys crocea large yellow croaker 1 2 - 215358 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Eupercaria incertae sedis; Sciaenidae; Larimichthys -Larix kaempferi - 1 1 11 54800 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Larix -Lasius neglectus - 1 5 - 111072 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Lasiini; Lasius; Lasius -Lasius turcicus - 1 5 - 235463 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Lasiini; Lasius; Lasius -Lates calcarifer barramundi perch 1 2 - 8187 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Carangaria; Carangiaria incertae sedis; Centropomidae; Lates -Lathyrus sativus - 1 1 11 3860 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Fabeae; Lathyrus -Latimeria chalumnae coelacanth 1 2 - 7897 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Coelacanthimorpha; Coelacanthiformes; Coelacanthidae; Latimeria -Latimeria menadoensis Menado coelacanth 1 2 - 106881 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Coelacanthimorpha; Coelacanthiformes; Coelacanthidae; Latimeria -Latrodectus hesperus western black widow 1 5 - 256737 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Orbiculariae; Araneoidea; Theridiidae; Latrodectus -Legionella pneumophila - 11 0 - 446 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Legionellales; Legionellaceae; Legionella -Lepeophtheirus salmonis salmon louse 1 5 - 72036 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Podoplea; Siphonostomatoida; Caligidae; Lepeophtheirus -Lepidonotothen nudifrons yellowfin notie 1 2 - 83203 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Perciformes; Notothenioidei; Nototheniidae; Lepidonotothen -Lepidothrix coronata blue-crowned manakin 1 2 - 321398 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Pipridae; Lepidothrix -Lepisosteus oculatus spotted gar 1 2 - 7918 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Holostei; Semionotiformes; Lepisosteidae; Lepisosteus -Leptasterias sp. AR-2014 - 1 9 - 1462732 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Leptasterias -Leptonychotes weddellii Weddell seal 1 2 - 9713 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Phocidae; Leptonychotes -Leptosomus discolor cuckoo roller 1 2 - 188344 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coraciiformes; Leptosomidae; Leptosomus -Leptospira interrogans - 11 0 - 173 BCT cellular organisms; Bacteria; Spirochaetes; Spirochaetia; Leptospirales; Leptospiraceae; Leptospira -Limnephilus lunatus - 1 5 - 1218281 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Trichoptera; Integripalpia; Plenitentoria; Limnephiloidea; Limnephilidae; Limnephilinae; Limnephilini; Limnephilus -Limnoperna fortunei - 1 5 - 356393 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Mytiloida; Mytiloidea; Mytilidae; Mytilinae; Limnoperna -Limulus polyphemus Atlantic horseshoe crab 1 5 - 6850 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Merostomata; Xiphosura; Limulidae; Limulus -Linepithema humile Argentine ant 1 5 - 83485 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Dolichoderinae; Linepithema -Lingula anatina - 1 5 - 7574 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Brachiopoda; Linguliformea; Lingulata; Lingulida; Linguloidea; Lingulidae; Lingula -Lingulodinium polyedrum - 1 4 11 160621 PLN cellular organisms; Eukaryota; Alveolata; Dinophyceae; Gonyaulacales; Lingulodinium -Lipotes vexillifer Yangtze River dolphin 1 2 - 118797 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Lipotidae; Lipotes -Listeria monocytogenes - 11 0 - 1639 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Bacillales; Listeriaceae; Listeria -Litchi chinensis - 1 1 11 151069 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Sapindaceae; Litchi -Litopenaeus vannamei Pacific white shrimp 1 5 - 6689 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Dendrobranchiata; Penaeoidea; Penaeidae; Litopenaeus -Loa loa eye worm 1 5 - 7209 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Spirurida; Filarioidea; Onchocercidae; Loa -Lolium perenne - 1 1 11 4522 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Loliinae; Lolium -Lotus corniculatus - 1 1 11 47247 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Loteae; Lotus -Loxodonta africana African savanna elephant 1 2 - 9785 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Proboscidea; Elephantidae; Loxodonta -Luidia clathrata - 1 9 - 133437 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Paxillosida; Luidiidae; Luidia -Lupinus angustifolius narrow-leaved blue lupine 1 1 11 3871 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Genisteae; Lupinus -Lygodium japonicum - 1 1 11 13824 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Moniliformopses; Polypodiidae; Schizaeales; Lygodiaceae; Lygodium -Lygus hesperus lygus bug 1 5 - 30085 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Neohemiptera; Prosorrhyncha; Heteroptera; Euheteroptera; Neoheteroptera; Panheteroptera; Cimicomorpha; Cimicoidea; Miridae; Mirinae; Mirini; Lygus -Lymnaea stagnalis great pond snail 1 5 - 6523 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Hygrophila; Lymnaeoidea; Lymnaeidae; Lymnaea -Lynx pardinus Spanish lynx 1 2 - 191816 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Felinae; Lynx -Lytechinus variegatus green sea urchin 1 9 - 7654 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Temnopleuroida; Toxopneustidae; Lytechinus -Macaca fascicularis crab-eating macaque 1 2 - 9541 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Macaca -Macaca mulatta Rhesus monkey 1 2 - 9544 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Macaca -Macaca nemestrina pig-tailed macaque 1 2 - 9545 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Macaca -Macropus eugenii tammar wallaby 1 2 - 9315 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Metatheria; Diprotodontia; Macropodidae; Macropus -Malus domestica apple 1 1 11 3750 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Maleae; Malus -Manacus vitellinus golden-collared manakin 1 2 - 328815 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Pipridae; Manacus -Mandrillus leucophaeus drill 1 2 - 9568 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Mandrillus -Mangifera indica mango 1 1 11 29780 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Anacardiaceae; Mangifera -Manihot esculenta cassava 1 1 11 3983 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Manihoteae; Manihot -Manis javanica Malayan pangolin 1 2 - 9974 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Pholidota; Manidae; Manis +Human immunodeficiency virus 1 - 1 0 - 11676 VRL Viruses; Ortervirales; Retroviridae; Orthoretrovirinae; Lentivirus +Humulus lupulus European hop 1 1 11 3486 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus +Humulus lupulus var. cordifolius - 1 1 11 278022 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus +Humulus lupulus var. lupulus - 1 1 11 1571165 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Cannabaceae; Humulus +Hydra - 1 4 - 6083 INV Eukaryota; Metazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Aplanulata; Hydridae; Hydra +Hydra vulgaris - 1 4 - 6087 INV Eukaryota; Metazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Aplanulata; Hydridae; Hydra +Hynobius chinensis Chinese salamander 1 2 - 288313 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Caudata; Cryptobranchoidea; Hynobiidae; Hynobius; Hynobius +Hynobius retardatus Hokkaido salamander 1 2 - 36312 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Caudata; Cryptobranchoidea; Hynobiidae; Hynobius; Satobius +Ictalurus - 1 2 - 7997 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Siluriformes; Ictaluridae; Ictalurus +Ictalurus furcatus blue catfish 1 2 - 66913 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Siluriformes; Ictaluridae; Ictalurus +Ictalurus punctatus channel catfish 1 2 - 7998 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Siluriformes; Ictaluridae; Ictalurus +Ictidomys tridecemlineatus thirteen-lined ground squirrel 1 2 - 43179 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Sciuromorpha; Sciuridae; Xerinae; Marmotini; Ictidomys +Influenza A virus - 1 0 - 11320 VRL Viruses; ssRNA viruses; ssRNA negative-strand viruses; Negarnaviricota; Polyploviricotina; Insthoviricetes; Articulavirales; Orthomyxoviridae; Alphainfluenzavirus +Ipomoea - 1 1 11 4119 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea +Ipomoea batatas sweet potato 1 1 11 4120 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea +Ipomoea nil Japanese morning glory 1 1 11 35883 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea +Ipomoea purpurea common morning-glory 1 1 11 4121 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea +Ipomoea trifida - 1 1 11 35884 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Convolvulaceae; Ipomoeeae; Ipomoea +Ixodes - 1 5 - 6944 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Ixodinae; Ixodes +Ixodes scapularis black-legged tick 1 5 - 6945 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Ixodinae; Ixodes +Jaculus jaculus lesser Egyptian jerboa 1 2 - 51337 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Dipodoidea; Dipodidae; Dipodinae; Jaculus +Jatropha curcas - 1 1 11 180498 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Jatropheae; Jatropha +Juglans regia English walnut 1 1 11 51240 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Juglandaceae; Juglans +Karelinia caspia - 1 1 11 313960 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Asteroideae; Inuleae; Plucheinae; Karelinia +Kerria lacca common lac scale 1 5 - 473130 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Coccoidea; Kerriidae; Kerria +Klebsiella pneumoniae - 11 0 - 573 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Klebsiella +Klebsiella pneumoniae subsp. pneumoniae - 11 0 - 72407 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Klebsiella +Lactuca - 1 1 11 4235 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Cichorioideae; Cichorieae; Lactucinae; Lactuca +Lactuca sativa - 1 1 11 4236 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Cichorioideae; Cichorieae; Lactucinae; Lactuca +Lactuca serriola - 1 1 11 75943 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Cichorioideae; Cichorieae; Lactucinae; Lactuca +Lasius neglectus - 1 5 - 111072 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Lasius; Lasius +Lasius turcicus - 1 5 - 235463 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Formicinae; Lasius; Lasius +Lates calcarifer barramundi perch 1 2 - 8187 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Carangaria; Carangaria incertae sedis; Centropomidae; Lates +Lathyrus sativus - 1 1 11 3860 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Fabeae; Lathyrus +Latimeria chalumnae coelacanth 1 2 - 7897 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Coelacanthiformes; Coelacanthidae; Latimeria +Latrodectus hesperus western black widow 1 5 - 256737 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Araneoidea; Theridiidae; Latrodectus +Lepeophtheirus - 1 5 - 72035 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Hexanauplia; Copepoda; Siphonostomatoida; Caligidae; Lepeophtheirus +Lepeophtheirus salmonis salmon louse 1 5 - 72036 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Hexanauplia; Copepoda; Siphonostomatoida; Caligidae; Lepeophtheirus +Lepidonotothen nudifrons yellowfin notie 1 2 - 83203 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Perciformes; Notothenioidei; Nototheniidae; Lepidonotothen +Lepisosteus oculatus spotted gar 1 2 - 7918 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Holostei; Semionotiformes; Lepisosteidae; Lepisosteus +Leptasterias sp. AR-2014 - 1 9 - 1462732 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Leptasterias +Leptonychotes weddellii Weddell seal 1 2 - 9713 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Phocidae; Leptonychotes +Leptosomus discolor cuckoo roller 1 2 - 188344 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coraciiformes; Leptosomidae; Leptosomus +Ligia exotica - 1 5 - 142080 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Oniscidea; Diplocheta; Ligiidae; Ligia +Limulus polyphemus Atlantic horseshoe crab 1 5 - 6850 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Merostomata; Xiphosura; Limulidae; Limulus +Linepithema humile Argentine ant 1 5 - 83485 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Dolichoderinae; Linepithema +Lingulodinium polyedra - 1 4 11 160621 PLN Eukaryota; Alveolata; Dinophyceae; Gonyaulacales; Lingulodinium +Linum - 1 1 11 4005 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Linaceae; Linum +Linum usitatissimum flax 1 1 11 4006 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Linaceae; Linum +Listeria monocytogenes - 11 0 - 1639 BCT Bacteria; Firmicutes; Bacilli; Bacillales; Listeriaceae; Listeria +Litchi chinensis - 1 1 11 151069 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Sapindaceae; Litchi +Lolium - 1 1 11 4520 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Loliinae; Lolium +Lolium perenne - 1 1 11 4522 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Loliinae; Lolium +Lottia - 1 5 - 72691 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Patellogastropoda; Lottioidea; Lottiidae; Lottia +Lottia gigantea owl limpet 1 5 - 225164 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Patellogastropoda; Lottioidea; Lottiidae; Lottia +Lotus - 1 1 11 3867 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; robinioid clade; Loteae; Lotus +Lotus japonicus - 1 1 11 34305 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; robinioid clade; Loteae; Lotus +Lucernaria quadricornis - 1 4 - 1843199 INV Eukaryota; Metazoa; Cnidaria; Staurozoa; Stauromedusae; Myostaurida; Lucernariidae; Lucernaria +Luidia clathrata - 1 9 - 133437 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Paxillosida; Luidiidae; Luidia +Lupinus angustifolius narrow-leaved blue lupine 1 1 11 3871 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; genistoids sensu lato; core genistoids; Genisteae; Lupinus +Lygus hesperus lygus bug 1 5 - 30085 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Heteroptera; Panheteroptera; Cimicomorpha; Miridae; Mirini; Lygus +Lymnaea stagnalis great pond snail 1 5 - 6523 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Hygrophila; Lymnaeoidea; Lymnaeidae; Lymnaea +Lytechinus variegatus green sea urchin 1 9 - 7654 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Temnopleuroida; Toxopneustidae; Lytechinus +Macaca macaques 1 2 - 9539 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Macaca +Macaca fascicularis crab-eating macaque 1 2 - 9541 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Macaca +Macaca fuscata fuscata Japanese macaque 1 2 - 9543 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Macaca +Macaca mulatta Rhesus monkey 1 2 - 9544 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Macaca +Macaca nemestrina pig-tailed macaque 1 2 - 9545 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Macaca +Machilis hrabei - 1 5 - 438506 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Monocondylia; Archaeognatha; Machilidae; Machilis +Magnaporthe - 1 4 - 148303 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Sordariomycetidae; Magnaporthales; Magnaporthaceae; Magnaporthe +Magnaporthe grisea - 1 4 - 148305 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Sordariomycetidae; Magnaporthales; Magnaporthaceae; Magnaporthe +Malus - 1 1 11 3749 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Maleae; Malus +Malus domestica apple 1 1 11 3750 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Maleae; Malus +Manacus vitellinus golden-collared manakin 1 2 - 328815 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Pipridae; Manacus +Mangifera indica mango 1 1 11 29780 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Sapindales; Anacardiaceae; Mangifera +Manihot - 1 1 11 3982 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Manihoteae; Manihot +Manihot esculenta cassava 1 1 11 3983 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Crotonoideae; Manihoteae; Manihot +Manis javanica Malayan pangolin 1 2 - 9974 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Pholidota; Manidae; Manis marine metagenome - 11 2 11 408172 ENV unclassified sequences; metagenomes; ecological metagenomes marine sediment metagenome - 11 2 11 412755 ENV unclassified sequences; metagenomes; ecological metagenomes -Marmota marmota marmota Alpine marmot 1 2 - 9994 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Sciuridae; Xerinae; Marmotini; Marmota; Marmota marmota -Marthasterias glacialis spiny starfish 1 9 - 7609 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Marthasterias -Maylandia zebra zebra mbuna 1 2 - 106582 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Maylandia; Maylandia zebra complex -Medauroidea extradentata - 1 5 - 614211 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Phasmatinae; Clitumnini; Medauroidea -Medicago sativa - 1 1 11 3879 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Medicago -Medicago truncatula barrel medic 1 1 11 3880 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Medicago -Megachile rotundata alfalfa leafcutting bee 1 5 - 143995 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Megachilidae; Megachilinae; Megachilini; Megachile -Megaderma lyra Indian false vampire 1 2 - 9413 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Megadermatidae; Megaderma -Megajapyx sp. UVienna-2012 - 1 5 - 1136246 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Diplura; Diplura; Dicellurata; Japygoidea; Japygidae; Japyginae; Megajapyx -Megaselia scalaris - 1 5 - 36166 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Aschiza; Platypezoidea; Phoridae; Metopininae; Megaseliini; Megaselia -Meleagris gallopavo turkey 1 2 - 9103 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Meleagridinae; Meleagris -Melopsittacus undulatus budgerigar 1 2 - 13146 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Psittaciformes; Psittaculidae; Melopsittacus -Mengenilla moldrzyki - 1 5 - 1155016 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Strepsiptera; Mengenillidia; Mengenilloidea; Mengenillidae; Mengenilla -Meretrix meretrix Asiatic hard clam 1 5 - 291251 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Heteroconchia; Euheterodonta; Veneroida; Veneroidea; Veneridae; Meretrix -Merops nubicus carmine bee-eater 1 2 - 57421 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coraciiformes; Meropidae; Merops -Mesembryanthemum crystallinum common iceplant 1 1 11 3544 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Aizoaceae; Mesembryanthemum; Cryophytum -Mesitornis unicolor brown roatelo 1 2 - 54374 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Mesitornithidae; Mesitornis -Mesocricetus auratus golden hamster 1 2 - 10036 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Cricetidae; Cricetinae; Mesocricetus -Metaseiulus occidentalis western predatory mite 1 5 - 34638 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Mesostigmata; Monogynaspida; Gamasina; Phytoseioidea; Phytoseiidae; Typhlodrominae; Metaseiulus -Microcebus murinus gray mouse lemur 1 2 - 30608 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Strepsirrhini; Lemuriformes; Cheirogaleidae; Microcebus -Microplitis demolitor - 1 5 - 69319 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Ichneumonoidea; Braconidae; Microgastrinae; Microplitis -Micropterix calthella - 1 5 - 41027 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Zeugloptera; Micropterigidae; Micropterix -Micropterus floridanus Florida bass 1 2 - 225391 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus -Micropterus salmoides largemouth bass 1 2 - 27706 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus -Micropterus salmoides salmoides northern largemouth bass 1 2 - 489037 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus; Micropterus salmoides -Microtus ochrogaster prairie vole 1 2 - 79684 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Cricetidae; Arvicolinae; Microtus -Miichthys miiuy Mi-iuy croaker 1 2 - 240162 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Eupercaria incertae sedis; Sciaenidae; Miichthys -mine drainage metagenome - 11 2 11 410659 ENV unclassified sequences; metagenomes; ecological metagenomes -Mischocyttarus flavitarsis - 1 5 - 231975 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Vespidae; Polistinae; Mischocyttarini; Mischocyttarus -Momordica charantia - 1 1 11 3673 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Momordiceae; Momordica -Moniezia expansa sheep tapeworm 1 9 - 28841 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Cestoda; Eucestoda; Cyclophyllidea; Anoplocephalidae; Moniezia -Monodelphis domestica gray short-tailed opossum 1 2 - 13616 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Metatheria; Didelphimorphia; Didelphidae; Didelphinae; Monodelphis -Monomorium chinense - 1 5 - 482359 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Solenopsidini; Monomorium -Monomorium pharaonis pharaoh ant 1 5 - 307658 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Solenopsidini; Monomorium -Morone chrysops white bass 1 2 - 46259 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Eupercaria incertae sedis; Moronidae; Morone -Morone saxatilis striped sea-bass 1 2 - 34816 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Eupercaria incertae sedis; Moronidae; Morone -Morus notabilis - 1 1 11 981085 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Moraceae; Morus -Mus musculus house mouse 1 2 - 10090 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Muridae; Murinae; Mus; Mus -Musa ABB Group - 1 1 11 214693 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Zingiberales; Musaceae; Musa; Musa x paradisiaca -Musa acuminata AAA Group dessert banana 1 1 11 214697 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Zingiberales; Musaceae; Musa; Musa acuminata -Musa acuminata subsp. malaccensis wild Malaysian banana 1 1 11 214687 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Zingiberales; Musaceae; Musa; Musa acuminata -Musca domestica house fly 1 5 - 7370 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Calyptratae; Muscoidea; Muscidae; Muscinae; Muscini; Musca; Musca -Mustela putorius furo domestic ferret 1 2 - 9669 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Mustelidae; Mustelinae; Mustela; Mustela putorius -Mycobacterium abscessus - 11 0 - 36809 BCT cellular organisms; Bacteria; Terrabacteria group; Actinobacteria; Actinobacteria; Corynebacteriales; Mycobacteriaceae; Mycobacterium; Mycobacterium chelonae group; Mycobacterium abscessus subgroup -Mycobacterium tuberculosis - 11 0 - 1773 BCT cellular organisms; Bacteria; Terrabacteria group; Actinobacteria; Actinobacteria; Corynebacteriales; Mycobacteriaceae; Mycobacterium; Mycobacterium tuberculosis complex -Myotis brandtii Brandt's bat 1 2 - 109478 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis -Myotis davidii - 1 2 - 225400 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis -Myotis lucifugus little brown bat 1 2 - 59463 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis -Myotis ricketti Rickett's big-footed Myotis 1 2 - 203696 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis -Myrionecta rubra - 1 4 11 283649 INV cellular organisms; Eukaryota; Alveolata; Ciliophora; Intramacronucleata; Litostomatea; Haptoria; Cyclotrichida; Mesodiniidae; Myrionecta -Myrmica rubra - 1 5 - 106198 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Myrmicini; Myrmica -Myrmica ruginodis - 1 5 - 34708 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Myrmicini; Myrmica -Myrmica sulcinodis - 1 5 - 229918 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Myrmicini; Myrmica -Mytilus galloprovincialis Mediterranean mussel 1 5 - 29158 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Mytiloida; Mytiloidea; Mytilidae; Mytilinae; Mytilus -Nannochorista philpotti - 1 5 - 1260225 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Mecoptera; Nannochoristidae; Nannochorista -Nannospalax galili Upper Galilee mountains blind mole rat 1 2 - 1026970 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Spalacidae; Spalacinae; Nannospalax -Nanorana parkeri - 1 2 - 125878 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Neobatrachia; Ranoidea; Dicroglossidae; Dicroglossinae; Nanorana -Nasonia vitripennis jewel wasp 1 5 - 7425 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Chaldicoidea group; Chalcidoidea; Pteromalidae; Pteromalinae; Nasonia -Necator americanus - 1 5 - 51031 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Ancylostomatoidea; Ancylostomatidae; Bunostominae; Necator -Neisseria gonorrhoeae - 11 0 - 485 BCT cellular organisms; Bacteria; Proteobacteria; Betaproteobacteria; Neisseriales; Neisseriaceae; Neisseria -Neisseria meningitidis - 11 0 - 487 BCT cellular organisms; Bacteria; Proteobacteria; Betaproteobacteria; Neisseriales; Neisseriaceae; Neisseria -Nelumbo nucifera sacred lotus 1 1 11 4432 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; stem eudicotyledons; Proteales; Nelumbonaceae; Nelumbo -Nematostella vectensis starlet sea anemone 1 4 - 45351 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Edwardsiidae; Nematostella -Neolamarckia cadamba - 1 1 11 153762 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Rubiaceae; Cinchonoideae; Naucleeae; Neolamarckia -Neolamprologus brichardi - 1 2 - 32507 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Lamprologini; Neolamprologus -Nestor notabilis Kea 1 2 - 176057 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Psittaciformes; Psittacidae; Nestor -Nicotiana attenuata - 1 1 11 49451 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana -Nicotiana benthamiana - 1 1 11 4100 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana -Nicotiana sylvestris wood tobacco 1 1 11 4096 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana -Nicotiana tabacum common tobacco 1 1 11 4097 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana -Nicotiana tomentosiformis - 1 1 11 4098 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana -Nicrophorus vespilloides - 1 5 - 110193 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Staphyliniformia; Staphylinoidea; Silphidae; Nicrophorinae; Nicrophorus -Nilaparvata lugens brown planthopper 1 5 - 108931 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Euhemiptera; Neohemiptera; Archaeorrhyncha; Fulgoroidea; Delphacidae; Delphacinae; Nilaparvata -Nipponia nippon crested ibis 1 2 - 128390 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Threskiornithidae; Nipponia -Nitella hyalina - 1 1 11 181804 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Charophyceae; Charales; Characeae; Nitella -Nitella mirabilis - 1 1 11 231897 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Charophyceae; Charales; Characeae; Nitella -Noccaea caerulescens - 1 1 11 107243 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Coluteocarpeae; Noccaea -Nomascus leucogenys northern white-cheeked gibbon 1 2 - 61853 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hylobatidae; Nomascus -Nothobranchius furzeri turquoise killifish 1 2 - 105023 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius -Nothobranchius kadleci - 1 2 - 1051664 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius -Nothobranchius korthausae - 1 2 - 1143690 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius -Nothobranchius kuhntae Beira killifish 1 2 - 321403 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius -Nothobranchius pienaari - 1 2 - 704102 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius -Nothobranchius rachovii bluefin notho 1 2 - 451742 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Aplocheiloidei; Nothobranchiidae; Nothobranchius -Notholithocarpus densiflorus - 1 1 11 165545 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Notholithocarpus -Notothenia coriiceps black rockcod 1 2 - 8208 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Perciformes; Notothenioidei; Nototheniidae; Notothenia -Numida meleagris helmeted guineafowl 1 2 - 8996 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Numididae; Numida -Nylanderia pubens - 1 5 - 613973 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Formicinae; Lasiini; Nylanderia -Ochotona princeps American pika 1 2 - 9978 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Lagomorpha; Ochotonidae; Ochotona -Ochromonas sp. LO244K-D - 1 1 11 1825117 PLN cellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; Chromulinales; Chromulinaceae; Ochromonas -Octodon degus degu 1 2 - 10160 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Hystricognathi; Octodontidae; Octodon -Octopus bimaculoides - 1 5 - 37653 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Cephalopoda; Coleoidea; Neocoleoidea; Octopodiformes; Octopoda; Incirrata; Octopodidae; Octopus -Odobenus rosmarus divergens Pacific walrus 1 2 - 9708 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Odobenidae; Odobenus; Odobenus rosmarus -Oenococcus oeni - 11 0 - 1247 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Leuconostocaceae; Oenococcus -Oesophagostomum dentatum - 1 5 - 61180 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Strongyloidea; Cloacinidae; Oesophagostomum -Olavius algarvensis - 1 5 - 188229 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Annelida; Clitellata; Oligochaeta; Haplotaxida; Tubificina; Tubificidae; Phallodrilinae; Olavius -Olea europaea common olive 1 1 11 4146 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Oleaceae; Oleeae; Olea -Onchocerca flexuosa - 1 5 - 387005 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Spirurida; Filarioidea; Onchocercidae; Onchocerca -Oncidium hybrid cultivar - 1 1 11 141207 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Orchidaceae; Epidendroideae; Cymbidieae; Oncidiinae; Oncidium -Oncorhynchus masou masou cherry salmon 1 2 - 90313 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus; Oncorhynchus masou -Oncorhynchus mykiss rainbow trout 1 2 - 8022 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus -Onthophagus nigriventris - 1 5 - 476074 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Polyphaga; Scarabaeiformia; Scarabaeoidea; Scarabaeidae; Scarabaeinae; Scarabaeinae incertae sedis; Onthophagus -Ophiocoma echinata - 1 9 - 331088 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Ophiuroidea; Ophiuridea; Ophiurida; Ophiurina; Gnathophiurina; Ophiocomidae; Ophiocominae; Ophiocoma -Ophthalmotilapia ventralis - 1 2 - 27755 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Ectodini; Ophthalmotilapia -Opisthorchis viverrini - 1 9 - 6198 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Opisthorchiida; Opisthorchiata; Opisthorchiidae; Opisthorchis -Orchesella cincta - 1 5 - 48709 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Collembola; Collembola; Entomobryomorpha; Entomobryoidea; Entomobryidae; Orchesellinae; Orchesella -Oreochromis niloticus Nile tilapia 1 2 - 8128 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Oreochromini; Oreochromis -Ornithorhynchus anatinus platypus 1 2 - 9258 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Prototheria; Monotremata; Ornithorhynchidae; Ornithorhynchus -Oropsylla silantiewi - 1 5 - 1461318 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Siphonaptera; Ceratophyllomorpha; Ceratophylloidea; Ceratophyllidae; Oropsylla -Orycteropus afer afer - 1 2 - 1230840 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Tubulidentata; Orycteropodidae; Orycteropus; Orycteropus afer -Oryctolagus cuniculus rabbit 1 2 - 9986 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Lagomorpha; Leporidae; Oryctolagus -Oryza brachyantha malo sina 1 1 11 4533 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza -Oryza sativa rice 1 1 11 4530 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza -Oryza sativa Indica Group long-grained rice 1 1 11 39946 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa -Oryza sativa Japonica Group Japanese rice 1 1 11 39947 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa -Oryzias latipes Japanese medaka 1 2 - 8090 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Beloniformes; Adrianichthyoidei; Adrianichthyidae; Oryziinae; Oryzias -Osmia cornuta - 1 5 - 185587 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea; Megachilidae; Megachilinae; Osmiini; Osmia -Ostrinia furnacalis Asian corn borer 1 5 - 93504 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Pyraloidea; Crambidae; Pyraustinae; Ostrinia -Ostrinia nubilalis European corn borer 1 5 - 29057 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Pyraloidea; Crambidae; Pyraustinae; Ostrinia -Otolemur garnettii small-eared galago 1 2 - 30611 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Strepsirrhini; Lorisiformes; Galagidae; Otolemur -Ovis aries sheep 1 2 - 9940 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Ovis -Ovis aries musimon mouflon 1 2 - 9938 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Ovis; Ovis aries -Pachycladon fastigiatum - 1 1 11 106774 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Microlepidieae; Pachycladon -Pachypsylla venusta hackberry petiole gall psyllid 1 5 - 38123 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Psylliformes; Psylloidea; Psyllidae; Pachypsylla -Paeonia lactiflora Chinese peony 1 1 11 35924 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Saxifragales; Paeoniaceae; Paeonia -Pan paniscus pygmy chimpanzee 1 2 - 9597 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Pan -Pan troglodytes chimpanzee 1 2 - 9598 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Pan -Pan troglodytes troglodytes - 1 2 - 37011 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Pan; Pan troglodytes -Pan troglodytes verus - 1 2 - 37012 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Homininae; Pan; Pan troglodytes -Panax ginseng - 1 1 11 4054 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Apiales; Apiineae; Araliaceae; Panax -Panicum hallii var. filipes - 1 1 11 907226 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Panicinae; Panicum; Panicum hallii -Panthera tigris altaica Amur tiger 1 2 - 74533 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Pantherinae; Panthera; Panthera tigris -Pantholops hodgsonii chiru 1 2 - 59538 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Antilopinae; Pantholops -Papilio machaon common yellow swallowtail 1 5 - 76193 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Papilionoidea; Papilionidae; Papilioninae; Papilionini; Papilio -Papilio xuthus Asian swallowtail 1 5 - 66420 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Papilionoidea; Papilionidae; Papilioninae; Papilionini; Papilio -Papio anubis olive baboon 1 2 - 9555 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Papio -Papio hamadryas hamadryas baboon 1 2 - 9557 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Cercopithecinae; Papio -Paramecium tetraurelia strain d4-2 - 6 4 11 412030 INV cellular organisms; Eukaryota; Alveolata; Ciliophora; Intramacronucleata; Oligohymenophorea; Peniculida; Parameciidae; Paramecium; Paramecium tetraurelia -Parascaris equorum - 1 5 - 6256 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Ascaridida; Ascaridoidea; Ascarididae; Parascaris -Parasteatoda tepidariorum common house spider 1 5 - 114398 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Orbiculariae; Araneoidea; Theridiidae; Parasteatoda -Parus major Great Tit 1 2 - 9157 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Paridae; Parus -Patiria miniata bat star 1 9 - 46514 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Valvatida; Asterinidae; Patiria -Patiria pectinifera - 1 9 - 7594 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Valvatida; Asterinidae; Patiria -Pecten maximus - 1 5 - 6579 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Pectinoida; Pectinoidea; Pectinidae; Pecten -Pedospumella encystans - 1 1 11 1117030 PLN cellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; Chromulinales; Chromulinaceae; Pedospumella -Pelecanus crispus Dalmatian pelican 1 2 - 36300 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Pelecanidae; Pelecanus -Pelodiscus sinensis Chinese soft-shelled turtle 1 2 - 13735 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Testudines; Cryptodira; Trionychia; Trionychidae; Pelodiscus -Penaeus monodon black tiger shrimp 1 5 - 6687 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Dendrobranchiata; Penaeoidea; Penaeidae; Penaeus -Pepsis grossa - 1 5 - 1317727 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Pompiloidea; Pompilidae; Pepsinae; Pepsis -Perkinsus marinus ATCC 50983 - 1 4 11 423536 INV cellular organisms; Eukaryota; Alveolata; Perkinsea; Perkinsida; Perkinsidae; Perkinsus; Perkinsus marinus -Peromyscus maniculatus bairdii prairie deer mouse 1 2 - 230844 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Cricetidae; Neotominae; Peromyscus; Peromyscus maniculatus -Persicaria minor - 1 1 11 488003 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Polygonaceae; Polygonoideae; Persicarieae; Persicaria -Petunia integrifolia subsp. inflata - 1 1 11 212142 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Petunioideae; Petunia; Petunia integrifolia -Phaethon lepturus white-tailed tropicbird 1 2 - 97097 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Phaethontidae; Phaethon -Phalacrocorax carbo great cormorant 1 2 - 9209 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Phalacrocoracidae; Phalacrocorax -Phalaenopsis aphrodite - 1 1 11 212056 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; Asparagales; Orchidaceae; Epidendroideae; Vandeae; Aeridinae; Phalaenopsis -Phaseolus vulgaris - 1 1 11 3885 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Phaseolus -Phoca largha spotted seal 1 2 - 39090 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Phocidae; Phoca -Phoenicopterus ruber ruber - 1 2 - 9218 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Phoenicopteriformes; Phoenicopteridae; Phoenicopterus; Phoenicopterus ruber -Phoenix dactylifera date palm 1 1 11 42345 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Arecales; Arecaceae; Coryphoideae; Phoeniceae; Phoenix -Physalis alkekengi var. franchetii - 1 1 11 221454 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Physaleae; Physalis; Physalis alkekengi -Physalis peruviana - 1 1 11 126903 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Physaleae; Physalis -Physcomitrella patens - 1 1 11 3218 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Bryophyta; Bryophytina; Bryopsida; Funariidae; Funariales; Funariaceae; Physcomitrella -Physeter catodon sperm whale 1 2 - 9755 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Physeteridae; Physeter -Phytophthora cambivora - 1 1 11 53983 PLN cellular organisms; Eukaryota; Stramenopiles; Oomycetes; Peronosporales; Phytophthora -Phytophthora cinnamomi - 1 1 11 4785 PLN cellular organisms; Eukaryota; Stramenopiles; Oomycetes; Peronosporales; Phytophthora -Phytophthora x alni - 1 1 11 299392 PLN cellular organisms; Eukaryota; Stramenopiles; Oomycetes; Peronosporales; Phytophthora -Picea glauca white spruce 1 1 11 3330 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Picea -Picoides pubescens downy woodpecker 1 2 - 118200 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Piciformes; Picidae; Picoides -Pinus massoniana - 1 1 11 88730 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Pinus; Pinus -Pinus sylvestris Scots pine 1 1 11 3349 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Pinus; Pinus -Pinus taeda loblolly pine 1 1 11 3352 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Pinus; Pinus -Pisaster ochraceus purple sea star 1 9 - 7612 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Pisaster -Pisum sativum pea 1 1 11 3888 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Fabeae; Pisum -Pisum sativum subsp. sativum - 1 1 11 208194 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Fabeae; Pisum; Pisum sativum -Plakobranchus ocellatus - 1 5 - 259542 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Gastropoda; Heterobranchia; Euthyneura; Panpulmonata; Sacoglossa; Placobranchoidea; Placobranchidae; Plakobranchus -Plasmodium falciparum malaria parasite P. falciparum 1 4 11 5833 INV cellular organisms; Eukaryota; Alveolata; Apicomplexa; Aconoidasida; Haemosporida; Plasmodiidae; Plasmodium; Plasmodium (Laverania) -Plecoglossus altivelis ayu 1 2 - 61084 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Stomiatii; Osmeriformes; Plecoglossidae; Plecoglossus -Plecoglossus altivelis altivelis - 1 2 - 281464 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Stomiatii; Osmeriformes; Plecoglossidae; Plecoglossus; Plecoglossus altivelis -Plukenetia volubilis - 1 1 11 316893 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Acalyphoideae; Plukenetieae; Plukenetia -Plutella xylostella diamondback moth 1 5 - 51655 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Yponomeutoidea; Plutellidae; Plutella -Poa infirma - 1 1 11 165094 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Poinae; Poa -Poa supina - 1 1 11 289064 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Poodae; Poeae; Poeae Chloroplast Group 2 (Poeae type); Poinae; Poa -Podiceps cristatus great crested grebe 1 2 - 345573 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Podicipediformes; Podicipedidae; Podiceps -Poecilia formosa Amazon molly 1 2 - 48698 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Poecilia -Poecilia latipinna sailfin molly 1 2 - 48699 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Poecilia -Poecilia mexicana - 1 2 - 48701 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Poecilia -Poecilia reticulata guppy 1 2 - 8081 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Poecilia -Poeciliopsis prolifica blackstripe livebearer 1 2 - 188132 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Poeciliopsis -Pogonus chalceus - 1 5 - 235516 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Coleoptera; Adephaga; Caraboidea; Carabidae; Trechinae; Pogonini; Pogonus -Pohlia nutans - 1 1 11 140635 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Bryophyta; Bryophytina; Bryopsida; Bryidae; Bryanae; Bryales; Mniaceae; Pohlia -Polistes canadensis - 1 5 - 91411 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Vespidae; Polistinae; Polistini; Polistes -Polistes metricus - 1 5 - 91422 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Vespidae; Polistinae; Polistini; Polistes -Pongo abelii Sumatran orangutan 1 2 - 9601 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Hominoidea; Hominidae; Ponginae; Pongo -Populus euphratica Euphrates poplar 1 1 11 75702 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Salicaceae; Saliceae; Populus -Populus trichocarpa black cottonwood 1 1 11 3694 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Salicaceae; Saliceae; Populus -Porites australiensis - 1 4 - 51061 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Fungiina; Poritidae; Porites -Poterioochromonas sp. DS - 1 1 11 519425 PLN cellular organisms; Eukaryota; Stramenopiles; Synurophyceae; Ochromonadales; Ochromonadaceae; Poterioochromonas -Priapulus caudatus - 1 5 - 37621 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Scalidophora; Priapulida; Priapulidae; Priapulus -Proasellus aragonensis - 1 5 - 1281939 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus arthrodilus - 1 5 - 1281940 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus assaforensis - 1 5 - 1282049 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus cantabricus - 1 5 - 1281948 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus cavaticus - 1 5 - 1281949 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus coiffaiti - 1 5 - 1281953 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus coxalis - 1 5 - 63229 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus ebrensis - 1 5 - 1281961 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus escolai - 1 5 - 1281963 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus grafi - 1 5 - 1281973 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus granadensis - 1 5 - 1281974 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus hercegovinensis - 1 5 - 1281977 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus ibericus - 1 5 - 1281981 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus jaloniacus - 1 5 - 1281986 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus karamani - 1 5 - 1281987 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus margalefi - 1 5 - 1281998 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus meridianus - 1 5 - 1282001 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus ortizi - 1 5 - 1282012 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus racovitzai - 1 5 - 1282023 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus rectus - 1 5 - 1282025 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus solanasi - 1 5 - 1282031 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Proasellus spelaeus - 1 5 - 1282033 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus -Procambarus clarkii red swamp crayfish 1 5 - 6728 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Astacoidea; Cambaridae; Cambarinae; Procambarus -Procotyla fluviatilis - 1 9 - 231627 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Planarioidea; Dendrocoelidae; Procotyla -Propithecus coquereli Coquerel's sifaka 1 2 - 379532 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Strepsirrhini; Lemuriformes; Indriidae; Propithecus -Prosopis alba - 1 1 11 207710 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Mimosoideae; Mimoseae; Prosopis -Protobothrops mucrosquamatus - 1 2 - 103944 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Viperidae; Crotalinae; Protobothrops -Protopolystoma xenopodis - 1 9 - 117903 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Monogenea; Polyopisthocotylea; Polystomatidae; Protopolystoma -Prunus armeniaca apricot 1 1 11 36596 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Amygdaleae; Prunus -Prunus mume Japanese apricot 1 1 11 102107 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Amygdaleae; Prunus -Prunus persica peach 1 1 11 3760 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Amygdaleae; Prunus -Prymnesium parvum - 1 1 11 97485 PLN cellular organisms; Eukaryota; Haptophyceae; Prymnesiales; Prymnesiaceae; Prymnesium -Pseudacris regilla Pacific treefrog 1 2 - 47562 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Neobatrachia; Hyloidea; Hylidae; Hylinae; Hylini; Pseudacris -Pseudodiploria strigosa - 1 4 - 1428006 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Faviina; Mussidae; Faviinae; Pseudodiploria -Pseudomasaris vespoides - 1 5 - 1317726 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Vespidae; Masarinae; Masarini; Pseudomasaris -Pseudomonas aeruginosa - 11 0 - 287 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Pseudomonadaceae; Pseudomonas; Pseudomonas aeruginosa group -Pseudopodoces humilis Tibetan ground-tit 1 2 - 181119 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Paridae; Pseudopodoces -Pseudotsuga menziesii var. menziesii - 1 1 11 278161 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Pinales; Pinaceae; Pseudotsuga; Pseudotsuga menziesii -Pteridium aquilinum subsp. aquilinum - 1 1 11 104588 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Moniliformopses; Polypodiidae; Polypodiales; Dennstaedtiaceae; Pteridium; Pteridium aquilinum -Pterocles gutturalis yellow-throated sandgrouse 1 2 - 240206 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Ciconiiformes; Pteroclidae; Pterocles -Pteronotus parnellii Parnell's mustached bat 1 2 - 59476 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Mormoopidae; Pteronotus -Pteropus alecto black flying fox 1 2 - 9402 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Pteropus -Pteropus vampyrus large flying fox 1 2 - 132908 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Pteropus -Ptychodera flava - 1 9 - 63121 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Hemichordata; Enteropneusta; Ptychoderidae; Ptychodera -Puccinia psidii - 1 4 - 181123 PLN cellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Basidiomycota; Pucciniomycotina; Pucciniomycetes; Pucciniales; Pucciniaceae; Puccinia -Pundamilia nyererei - 1 2 - 303518 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Pundamilia -Punica granatum pomegranate 1 1 11 22663 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Lythraceae; Punica -Pygocentrus nattereri red-bellied piranha 1 2 - 42514 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Characiphysae; Characiformes; Characoidei; Serrasalmidae; Pygocentrus -Pygoscelis adeliae Adelie penguin 1 2 - 9238 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Sphenisciformes; Spheniscidae; Pygoscelis -Pyrodinium bahamense var. compressum - 1 4 11 73916 PLN cellular organisms; Eukaryota; Alveolata; Dinophyceae; Gonyaulacales; Goniodomataceae; Pyrodinium; Pyrodinium bahamense -Pyrus communis pear 1 1 11 23211 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Maleae; Pyrus -Pyrus x bretschneideri Chinese white pear 1 1 11 225117 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Maloideae; Maleae; Pyrus -Python bivittatus Burmese python 1 2 - 176946 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Henophidia; Pythonidae; Python -Quercus suber - 1 1 11 58331 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Quercus -Ramulus artemis - 1 5 - 1390046 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Phasmatinae; Clitumnini; Ramulus -Rana clamitans bronze frog 1 2 - 145282 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Neobatrachia; Ranoidea; Ranidae; Rana; Aquarana -Raphanus sativus radish 1 1 11 3726 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Raphanus -Rattus norvegicus Norway rat 1 2 - 10116 ROD cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Glires; Rodentia; Sciurognathi; Muroidea; Muridae; Murinae; Rattus -Rauvolfia serpentina serpentwood 1 1 11 4060 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Apocynaceae; Rauvolfioideae; Vinceae; Rauvolfiinae; Rauvolfia -Reaumuria trigyna - 1 1 11 1091135 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Tamaricaceae; Reaumuria -Rhagoletis zephyria snowberry fruit fly 1 5 - 28612 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Tephritoidea; Tephritidae; Trypetinae; Carpomyini; Carpomyina; Rhagoletis -Rhinolophus ferrumequinum greater horseshoe bat 1 2 - 59479 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Microchiroptera; Rhinolophidae; Rhinolophinae; Rhinolophus -Rhinopithecus bieti black snub-nosed monkey 1 2 - 61621 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Colobinae; Rhinopithecus -Rhinopithecus roxellana golden snub-nosed monkey 1 2 - 61622 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Catarrhini; Cercopithecoidea; Cercopithecidae; Colobinae; Rhinopithecus -Rhipicephalus sanguineus brown dog tick 1 5 - 34632 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Rhipicephalinae; Rhipicephalus; Rhipicephalus; Rhipicephalus sanguineus group -Rhizoctonia solani AG-1 IA - 1 4 - 983506 PLN cellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Basidiomycota; Agaricomycotina; Agaricomycetes; Agaricomycetes incertae sedis; Cantharellales; Ceratobasidiaceae; Rhizoctonia; Rhizoctonia solani -Rhodinia newara - 1 5 - 1579501 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Saturniini; Rhodinia -Ricinus communis castor bean 1 1 11 3988 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Acalyphoideae; Acalypheae; Ricinus -Romanomermis culicivorax - 1 5 - 13658 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Enoplea; Dorylaimia; Mermithida; Mermithoidea; Mermithidae; Romanomermis -Rousettus aegyptiacus Egyptian rousette 1 2 - 9407 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Rousettus -Rubus hybrid cultivar - 1 1 11 564016 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Rosoideae; Rosoideae unplaced; Rubus -Rumex palustris - 1 1 11 50298 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Polygonaceae; Polygonoideae; Rumiceae; Rumex -Saccharomyces cerevisiae baker's yeast 1 3 - 4932 PLN cellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Ascomycota; saccharomyceta; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces -Saccharomyces pastorianus - 1 3 - 27292 PLN cellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Ascomycota; saccharomyceta; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces -Saccoglossus kowalevskii - 1 9 - 10224 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Hemichordata; Enteropneusta; Harrimaniidae; Saccoglossus -Saimiri boliviensis boliviensis Bolivian squirrel monkey 1 2 - 39432 PRI cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Primates; Haplorrhini; Simiiformes; Platyrrhini; Cebidae; Saimiriinae; Saimiri; Saimiri boliviensis -Salicornia europaea - 1 1 11 206448 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Salicornioideae; Salicornia -Salmo salar Atlantic salmon 1 2 - 8030 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Salmo -Salmonella enterica - 11 0 - 28901 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella -Salmonella enterica subsp. enterica serovar Typhi - 11 0 - 90370 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella; Salmonella enterica; Salmonella enterica subsp. enterica -Salmonella enterica subsp. enterica serovar Typhimurium - 11 0 - 90371 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella; Salmonella enterica; Salmonella enterica subsp. enterica -Salmonella enterica subsp. enterica serovar Typhimurium str. DT104 - 11 0 - 85569 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella; Salmonella enterica; Salmonella enterica subsp. enterica; Salmonella enterica subsp. enterica serovar Typhimurium -Samia ricini Indian eri silkmoth 1 5 - 63990 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Bombycoidea; Saturniidae; Saturniinae; Attacini; Samia -Sarcophilus harrisii Tasmanian devil 1 2 - 9305 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Metatheria; Dasyuromorphia; Dasyuridae; Sarcophilus -Sarsinebalia urgorrii - 1 5 - 1032695 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Phyllocarida; Leptostraca; Nebaliidae; Sarsinebalia -Saussurea involucrata - 1 1 11 200489 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Asterales; Asteraceae; Carduoideae; Cardueae; Carduinae; Saussurea -Sceliphron caementarium - 1 5 - 253855 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Sphecoidea; Sphecidae; Sphecinae; Sceliphrini; Sceliphrina; Sceliphron -Schistocephalus solidus - 1 9 - 70667 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Cestoda; Eucestoda; Diphyllobothriidea; Diphyllobothriidae; Schistocephalus -Schistosoma curassoni - 1 9 - 6186 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma -Schistosoma haematobium - 1 9 - 6185 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma -Schistosoma japonicum - 1 9 - 6182 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma -Schistosoma mansoni - 1 9 - 6183 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma -Schistosoma mattheei - 1 9 - 31246 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma -Schistosoma rodhaini - 1 9 - 6188 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma -Schmidtea mediterranea - 1 9 - 79327 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Geoplanoidea; Dugesiidae; Schmidtea -Sclerodactyla briareus - 1 9 - 7710 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Holothuroidea; Dendrochirotacea; Dendrochirotida; Sclerodactylidae; Sclerodactyla -Scleropages formosus Asian bonytongue 1 2 - 113540 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Osteoglossocephala; Osteoglossomorpha; Osteoglossiformes; Osteoglossidae; Scleropages -Sclerotinia homoeocarpa - 1 4 - 38483 PLN cellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Ascomycota; saccharomyceta; Pezizomycotina; leotiomyceta; sordariomyceta; Leotiomycetes; Helotiales; Sclerotiniaceae; Sclerotinia -Scophthalmus maximus turbot 1 2 - 52904 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Carangaria; Pleuronectiformes; Pleuronectoidei; Scophthalmidae; Scophthalmus -Scylla olivacea orange mud crab 1 5 - 85551 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Brachyura; Eubrachyura; Heterotremata; Portunoidea; Portunidae; Scylla -Sebastes nigrocinctus tiger rockfish 1 2 - 72089 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Perciformes; Scorpaenoidei; Sebastidae; Sebastinae; Sebastes -Secale cereale rye 1 1 11 4550 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Hordeinae; Secale -Sedum alfredii - 1 1 11 439688 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Saxifragales; Crassulaceae; Sedum -Selaginella moellendorffii - 1 1 11 88036 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Lycopodiidae; Selaginellales; Selaginellaceae; Selaginella -Serinus canaria common canary 1 2 - 9135 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Fringillidae; Carduelinae; Serinus -Sesamum indicum sesame 1 1 11 4182 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Pedaliaceae; Sesamum -Setaria italica foxtail millet 1 1 11 4555 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Setaria -Shigella sonnei - 11 0 - 624 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Shigella -Silene latifolia - 1 1 11 37657 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Caryophyllaceae; Sileneae; Silene -Simian immunodeficiency virus - 1 0 - 11723 VRL Viruses; Retro-transcribing viruses; Retroviridae; Orthoretrovirinae; Lentivirus; Primate lentivirus group -Sinocyclocheilus angustiporus - 1 2 - 307947 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Sinocyclocheilus -Sinocyclocheilus anophthalmus eyeless golden-line fish 1 2 - 307955 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Sinocyclocheilus -Sinocyclocheilus anshuiensis - 1 2 - 1608454 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Sinocyclocheilus -Sinocyclocheilus grahami - 1 2 - 75366 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Sinocyclocheilus -Sinocyclocheilus rhinocerous - 1 2 - 307959 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Otomorpha; Ostariophysi; Otophysi; Cypriniphysae; Cypriniformes; Cyprinoidea; Cyprinidae; Sinocyclocheilus -Sinopodophyllum hexandrum - 1 1 11 93608 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; stem eudicotyledons; Ranunculales; Berberidaceae; Podophylloideae; Sinopodophyllum -Sipyloidea sipylus - 1 5 - 202427 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Phasmatodea; Verophasmatodea; Anareolatae; Diapheromeridae; Necrosciinae; Sipyloidea -soil metagenome - 11 2 11 410658 ENV unclassified sequences; metagenomes; ecological metagenomes -Solanum chacoense Chaco potato 1 1 11 4108 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum -Solanum lycopersicum tomato 1 1 11 4081 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum; Lycopersicon -Solanum melongena eggplant 1 1 11 4111 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum -Solanum pennellii - 1 1 11 28526 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum; Lycopersicon -Solanum torvum - 1 1 11 119830 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum -Solanum tuberosum potato 1 1 11 4113 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum -Solenopsis invicta red fire ant 1 5 - 13686 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Solenopsidini; Solenopsis -Sorex araneus European shrew 1 2 - 42254 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Insectivora; Soricidae; Soricinae; Sorex -Sorghum bicolor sorghum 1 1 11 4558 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Sorghinae; Sorghum -Speleonectes cf. tulumensis BMR-2011 - 1 5 - 1032549 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Remipedia; Nectiopoda; Speleonectidae; Speleonectes -Sphaerechinus granularis - 1 9 - 39374 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Temnopleuroida; Toxopneustidae; Sphaerechinus -Sphaeroforma arctica JP610 - 1 1 - 667725 INV cellular organisms; Eukaryota; Opisthokonta; Opisthokonta incertae sedis; Ichthyosporea; Ichthyophonida; Sphaeroforma; Sphaeroforma arctica -Sphaeropthalma orestes - 1 5 - 374941 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Pompiloidea; Mutillidae; Sphaeropthalminae; Sphaeropthalma -Spinacia oleracea spinach 1 1 11 3562 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Chenopodioideae; Anserineae; Spinacia -Spirometra erinaceieuropaei - 1 9 - 99802 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Cestoda; Eucestoda; Diphyllobothriidea; Diphyllobothriidae; Spirometra -Spodoptera exigua beet armyworm 1 5 - 7107 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Amphipyrinae; Spodoptera -Spodoptera frugiperda fall armyworm 1 5 - 7108 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Amphipyrinae; Spodoptera -Spumella vulgaris - 1 1 11 1117031 PLN cellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; Chromulinales; Chromulinaceae; Spumella -Spumella-like flagellate JBC/S23 - 1 1 11 293195 PLN cellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; unclassified Chrysophyceae; Spumella-like flagellate JB -Spumella-like flagellate JBNZ39 - 1 1 11 293202 PLN cellular organisms; Eukaryota; Stramenopiles; Chrysophyceae; unclassified Chrysophyceae; Spumella-like flagellate JB -Staphylococcus aureus - 11 0 - 1280 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Bacillales; Staphylococcaceae; Staphylococcus -Staphylococcus epidermidis - 11 0 - 1282 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Bacillales; Staphylococcaceae; Staphylococcus -Stegastes partitus bicolor damselfish 1 2 - 144197 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Ovalentaria incertae sedis; Pomacentridae; Stegastes -Stegodyphus mimosarum - 1 5 - 407821 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Eresoidea; Eresidae; Stegodyphus -Stenotrophomonas maltophilia - 11 0 - 40324 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Xanthomonadales; Xanthomonadaceae; Stenotrophomonas; Stenotrophomonas maltophilia group -Stigmatomma oregonense - 1 5 - 602440 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Amblyoponinae; Stigmatomma -Stomoxys calcitrans stable fly 1 5 - 35570 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Calyptratae; Muscoidea; Muscidae; Muscinae; Stomoxyini; Stomoxys -Streptococcus agalactiae - 11 0 - 1311 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus -Streptococcus equi subsp. equi - 11 0 - 148942 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus; Streptococcus dysgalactiae group; Streptococcus equi -Streptococcus pneumoniae - 11 0 - 1313 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus -Streptococcus suis - 11 0 - 1307 BCT cellular organisms; Bacteria; Terrabacteria group; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus -Strongylocentrotus purpuratus purple sea urchin 1 9 - 7668 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Strongylocentrotidae; Strongylocentrotus -Strongylus vulgaris - 1 5 - 40348 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Strongylida; Strongyloidea; Strongylidae; Strongylinae; Strongylus -Struthio camelus australis - 1 2 - 441894 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Palaeognathae; Struthioniformes; Struthionidae; Struthio; Struthio camelus -Stylophora pistillata - 1 4 - 50429 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Pocilloporidae; Stylophora -Sus scrofa pig 1 2 - 9823 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Suina; Suidae; Sus -Symbiodinium sp. A1 - 1 4 11 503409 PLN cellular organisms; Eukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Symbiodinium; Symbiodinium sp. clades; Symbiodinium sp. clade A -Symbiodinium sp. A2 - 1 4 11 765178 PLN cellular organisms; Eukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Symbiodinium; Symbiodinium sp. clades; Symbiodinium sp. clade A -Symbiodinium sp. B2 - 1 4 11 154560 PLN cellular organisms; Eukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Symbiodinium; Symbiodinium sp. clades; Symbiodinium sp. clade B -Symphylella vulgaris - 1 5 - 1288507 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Myriapoda; Symphyla; Scolopendrellidae; Symphylella +Marthasterias glacialis spiny starfish 1 9 - 7609 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Forcipulatacea; Forcipulatida; Asteriidae; Marthasterias +Maylandia zebra zebra mbuna 1 2 - 106582 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Maylandia; Maylandia zebra complex +Medauroidea extradentata - 1 5 - 614211 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Phasmatinae; Clitumnini; Medauroidea +Medicago medics 1 1 11 3877 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Trifolieae; Medicago +Medicago sativa - 1 1 11 3879 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Trifolieae; Medicago +Medicago truncatula barrel medic 1 1 11 3880 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Trifolieae; Medicago +Megaderma lyra Indian false vampire 1 2 - 9413 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Megadermatidae; Megaderma +Megaselia scalaris - 1 5 - 36166 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Platypezoidea; Phoridae; Megaseliini; Megaselia +Melampyrum - 1 1 11 52724 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Rhinantheae; Melampyrum +Melampyrum roseum - 1 1 11 321415 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Rhinantheae; Melampyrum +Meleagris gallopavo turkey 1 2 - 9103 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Phasianidae; Meleagridinae; Meleagris +Meloidogyne - 1 5 - 189290 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Tylenchina; Tylenchomorpha; Tylenchoidea; Meloidogynidae; Meloidogyninae; Meloidogyne +Mengenilla moldrzyki - 1 5 - 1155016 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Strepsiptera; Mengenillidia; Mengenillidae; Mengenilla +Meriones unguiculatus Mongolian gerbil 1 2 - 10047 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Gerbillinae; Meriones +Merops nubicus carmine bee-eater 1 2 - 57421 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Coraciiformes; Meropidae; Merops +Mesembryanthemum crystallinum common iceplant 1 1 11 3544 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Aizoaceae; Mesembryanthemum; Cryophytum +Mesitornis unicolor brown roatelo 1 2 - 54374 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Gruiformes; Mesitornithidae; Mesitornis +Mesocricetus auratus golden hamster 1 2 - 10036 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Cricetidae; Cricetinae; Mesocricetus +Mesorhizobium - 11 0 - 68287 BCT Bacteria; Proteobacteria; Alphaproteobacteria; Rhizobiales; Phyllobacteriaceae; Mesorhizobium +Micoletzkya japonica - 1 5 - 1250332 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Diplogasteromorpha; Diplogasteroidea; Diplogasteridae; Micoletzkya +Microcebus murinus gray mouse lemur 1 2 - 30608 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Strepsirrhini; Lemuriformes; Cheirogaleidae; Microcebus +Microplitis demolitor - 1 5 - 69319 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Parasitoida; Ichneumonoidea; Braconidae; Microgastrinae; Microplitis +Micropterix calthella - 1 5 - 41027 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Zeugloptera; Micropterigidae; Micropterix +Micropterus floridanus Florida bass 1 2 - 225391 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus +Micropterus salmoides largemouth bass 1 2 - 27706 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus +Micropterus salmoides salmoides northern largemouth bass 1 2 - 489037 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Centrarchiformes; Centrarchoidei; Centrarchidae; Micropterus +Micrurus corallinus painted coral snake 1 2 - 54390 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Elapidae; Elapinae; Micrurus +Micrurus lemniscatus carvalhoi - 1 2 - 129465 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Elapidae; Elapinae; Micrurus +Micrurus lemniscatus lemniscatus - 1 2 - 129467 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Elapidae; Elapinae; Micrurus +Mizuhopecten yessoensis Yesso scallop 1 5 - 6573 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Pectinoida; Pectinoidea; Pectinidae; Mizuhopecten +Molgula - 1 13 - 27574 INV Eukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Molgulidae; Molgula +Molgula tectiformis - 1 13 - 30286 INV Eukaryota; Metazoa; Chordata; Tunicata; Ascidiacea; Stolidobranchia; Molgulidae; Molgula +Momordica charantia bitter melon 1 1 11 3673 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Cucurbitales; Cucurbitaceae; Momordiceae; Momordica +Monomorium chinense - 1 5 - 482359 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Monomorium +Monomorium pharaonis pharaoh ant 1 5 - 307658 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Monomorium +Monosiga - 1 1 - 81525 INV Eukaryota; Choanoflagellida; Craspedida; Salpingoecidae; Monosiga +Monosiga ovata - 1 1 - 81526 INV Eukaryota; Choanoflagellida; Craspedida; Salpingoecidae; Monosiga +Morone chrysops white bass 1 2 - 46259 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Moronidae; Morone +Morone saxatilis striped sea-bass 1 2 - 34816 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Moronidae; Morone +Morus notabilis - 1 1 11 981085 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Moraceae; Morus +Mus - 1 2 - 10088 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Mus +Mus musculus house mouse 1 2 - 10090 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Mus; Mus +Mus musculus domesticus western European house mouse 1 2 - 10092 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Mus; Mus +Mus musculus molossinus Japanese wild mouse 1 2 - 57486 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Mus; Mus +Musa acuminata AAA Group dessert banana 1 1 11 214697 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Zingiberales; Musaceae; Musa +Musa acuminata subsp. malaccensis wild Malaysian banana 1 1 11 214687 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Zingiberales; Musaceae; Musa +Musca domestica house fly 1 5 - 7370 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Muscoidea; Muscidae; Musca +Mustela putorius furo domestic ferret 1 2 - 9669 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Mustelidae; Mustelinae; Mustela +Mycobacterium tuberculosis - 11 0 - 1773 BCT Bacteria; Actinobacteria; Corynebacteriales; Mycobacteriaceae; Mycobacterium; Mycobacterium tuberculosis complex +Mycobacteroides abscessus subsp. abscessus - 11 0 - 1185650 BCT Bacteria; Actinobacteria; Corynebacteriales; Mycobacteriaceae; Mycobacteroides; Mycobacteroides abscessus +Myotis brandtii Brandt's bat 1 2 - 109478 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis +Myotis davidii - 1 2 - 225400 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis +Myotis ricketti Rickett's big-footed Myotis 1 2 - 203696 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Vespertilionidae; Myotis +Myrmica rubra - 1 5 - 106198 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Myrmica +Mytilus galloprovincialis Mediterranean mussel 1 5 - 29158 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Mytiloida; Mytiloidea; Mytilidae; Mytilinae; Mytilus +Nannospalax galili Upper Galilee mountains blind mole rat 1 2 - 1026970 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Spalacidae; Spalacinae; Nannospalax +Nasonia - 1 5 - 7424 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Parasitoida; Chalcidoidea; Pteromalidae; Pteromalinae; Nasonia +Nasonia vitripennis jewel wasp 1 5 - 7425 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Parasitoida; Chalcidoidea; Pteromalidae; Pteromalinae; Nasonia +Neisseria gonorrhoeae - 11 0 - 485 BCT Bacteria; Proteobacteria; Betaproteobacteria; Neisseriales; Neisseriaceae; Neisseria +Neisseria meningitidis - 11 0 - 487 BCT Bacteria; Proteobacteria; Betaproteobacteria; Neisseriales; Neisseriaceae; Neisseria +Nematostella - 1 4 - 45350 INV Eukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Edwardsiidae; Nematostella +Nematostella vectensis starlet sea anemone 1 4 - 45351 INV Eukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Actiniaria; Edwardsiidae; Nematostella +Neolamprologus brichardi - 1 2 - 32507 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Lamprologini; Neolamprologus +Neopelma - 1 2 - 114328 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Pipridae; Neopelma +Neopelma chrysocephalum saffron-crested tyrant-manakin 1 2 - 114329 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Pipridae; Neopelma +Neurospora - 1 4 - 5140 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Sordariomycetidae; Sordariales; Sordariaceae; Neurospora +Neurospora crassa - 1 4 - 5141 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Sordariomycetidae; Sordariales; Sordariaceae; Neurospora +Nicotiana - 1 1 11 4085 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana +Nicotiana attenuata - 1 1 11 49451 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana +Nicotiana benthamiana - 1 1 11 4100 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana +Nicotiana sylvestris wood tobacco 1 1 11 4096 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana +Nicotiana tabacum common tobacco 1 1 11 4097 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana +Nicotiana tomentosiformis - 1 1 11 4098 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Nicotianoideae; Nicotianeae; Nicotiana +Nilaparvata - 1 5 - 108930 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Auchenorrhyncha; Fulgoroidea; Delphacidae; Delphacinae; Nilaparvata +Nilaparvata lugens brown planthopper 1 5 - 108931 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Auchenorrhyncha; Fulgoroidea; Delphacidae; Delphacinae; Nilaparvata +Nitella - 1 1 11 3148 PLN Eukaryota; Viridiplantae; Streptophyta; Charophyceae; Charales; Characeae; Nitella +Nitella hyalina - 1 1 11 181804 PLN Eukaryota; Viridiplantae; Streptophyta; Charophyceae; Charales; Characeae; Nitella +Noccaea caerulescens - 1 1 11 107243 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Coluteocarpeae; Noccaea +Notamacropus - 1 2 - 1960649 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Metatheria; Diprotodontia; Macropodidae; Notamacropus +Notamacropus eugenii tammar wallaby 1 2 - 9315 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Metatheria; Diprotodontia; Macropodidae; Notamacropus +Notechis scutatus mainland tiger snake 1 2 - 8663 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Elapidae; Acanthophiinae; Notechis +Nothobranchius - 1 2 - 28779 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Nothobranchiidae; Nothobranchius +Nothobranchius furzeri turquoise killifish 1 2 - 105023 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Nothobranchiidae; Nothobranchius +Numida meleagris helmeted guineafowl 1 2 - 8996 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Galloanserae; Galliformes; Numididae; Numida +Ochotona princeps American pika 1 2 - 9978 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Lagomorpha; Ochotonidae; Ochotona +Octodon degus degu 1 2 - 10160 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Hystricomorpha; Octodontidae; Octodon +Octopus bimaculoides - 1 5 - 37653 INV Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Cephalopoda; Coleoidea; Neocoleoidea; Octopodiformes; Octopoda; Incirrata; Octopodidae; Octopus +Oenococcus oeni - 11 0 - 1247 BCT Bacteria; Firmicutes; Bacilli; Lactobacillales; Leuconostocaceae; Oenococcus +Oikopleura - 1 5 - 34763 INV Eukaryota; Metazoa; Chordata; Tunicata; Appendicularia; Copelata; Oikopleuridae; Oikopleura +Oikopleura dioica - 1 5 - 34765 INV Eukaryota; Metazoa; Chordata; Tunicata; Appendicularia; Copelata; Oikopleuridae; Oikopleura +Olavius algarvensis - 1 5 - 188229 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Clitellata; Oligochaeta; Haplotaxida; Tubificina; Naididae; Phallodrilinae; Olavius +Olea europaea common olive 1 1 11 4146 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Oleaceae; Oleeae; Olea +Olea europaea var. sylvestris - 1 1 11 158386 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Oleaceae; Oleeae; Olea +Oncorhynchus - 1 2 - 8016 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus +Oncorhynchus kisutch coho salmon 1 2 - 8019 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus +Oncorhynchus masou masou cherry salmon 1 2 - 90313 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus +Oncorhynchus mykiss rainbow trout 1 2 - 8022 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus +Oncorhynchus tshawytscha Chinook salmon 1 2 - 74940 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Oncorhynchus +Ophiocoma echinata - 1 9 - 331088 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Ophiuroidea; Ophiuridea; Ophiurida; Ophiurina; Gnathophiurina; Ophiocomidae; Ophiocoma +Opisthorchis viverrini - 1 9 - 6198 INV Eukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Opisthorchiida; Opisthorchiata; Opisthorchiidae; Opisthorchis +Oreochromis - 1 2 - 8139 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Oreochromini; Oreochromis +Oreochromis niloticus Nile tilapia 1 2 - 8128 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Oreochromini; Oreochromis +Ornithorhynchus anatinus platypus 1 2 - 9258 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Monotremata; Ornithorhynchidae; Ornithorhynchus +Orobanche - 1 1 11 36747 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Orobancheae; Orobanche +Orobanche minor - 1 1 11 36748 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Orobancheae; Orobanche +Orycteropus afer afer - 1 2 - 1230840 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Afrotheria; Tubulidentata; Orycteropodidae; Orycteropus +Oryctolagus cuniculus rabbit 1 2 - 9986 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Lagomorpha; Leporidae; Oryctolagus +Oryza - 1 1 11 4527 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza +Oryza alta - 1 1 11 52545 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza +Oryza australiensis - 1 1 11 4532 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza +Oryza brachyantha malo sina 1 1 11 4533 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza +Oryza coarctata - 1 1 11 77588 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza +Oryza glaberrima African rice 1 1 11 4538 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza +Oryza longistaminata - 1 1 11 4528 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza +Oryza meyeriana var. granulata - 1 1 11 110450 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza meyeriana +Oryza minuta - 1 1 11 63629 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza +Oryza officinalis - 1 1 11 4535 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza +Oryza ridleyi - 1 1 11 83308 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza +Oryza rufipogon - 1 1 11 4529 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza +Oryza sativa rice 1 1 11 4530 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa +Oryza sativa f. spontanea - 1 1 11 4536 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa +Oryza sativa Indica Group long-grained rice 1 1 11 39946 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa +Oryza sativa Japonica Group Japanese rice 1 1 11 39947 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza; Oryza sativa +Oryzias - 1 2 - 8089 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Beloniformes; Adrianichthyidae; Oryziinae; Oryzias +Oryzias latipes Japanese medaka 1 2 - 8090 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Beloniformes; Adrianichthyidae; Oryziinae; Oryzias +Osmia cornuta - 1 5 - 185587 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Megachilidae; Megachilinae; Osmia +Otolemur garnettii small-eared galago 1 2 - 30611 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Strepsirrhini; Lorisiformes; Galagidae; Otolemur +Ovis - 1 2 - 9935 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Ovis +Ovis aries sheep 1 2 - 9940 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Ruminantia; Pecora; Bovidae; Caprinae; Ovis +Pachypsylla venusta hackberry petiole gall psyllid 1 5 - 38123 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Paraneoptera; Hemiptera; Sternorrhyncha; Psylloidea; Aphalaridae; Pachypsylla +Pagrus - 1 2 - 8172 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Spariformes; Sparidae; Pagrus +Pagrus major red seabream 1 2 - 143350 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Spariformes; Sparidae; Pagrus +Pan troglodytes chimpanzee 1 2 - 9598 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Pan +Pan troglodytes troglodytes - 1 2 - 37011 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Pan +Pan troglodytes verus - 1 2 - 37012 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Pan +Panagrolaimus - 1 5 - 55784 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Tylenchina; Panagrolaimomorpha; Panagrolaimoidea; Panagrolaimidae; Panagrolaimus +Panagrolaimus davidi - 1 5 - 227884 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Tylenchina; Panagrolaimomorpha; Panagrolaimoidea; Panagrolaimidae; Panagrolaimus +Panax ginseng - 1 1 11 4054 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; campanulids; Apiales; Araliaceae; Panax +Panicum - 1 1 11 4539 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Panicinae; Panicum +Panicum hallii - 1 1 11 206008 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Panicinae; Panicum; Panicum sect. Panicum +Panicum hallii var. hallii - 1 1 11 1504633 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Panicinae; Panicum; Panicum sect. Panicum +Panicum virgatum switchgrass 1 1 11 38727 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Panicinae; Panicum; Panicum sect. Hiantes +Panthera pardus leopard 1 2 - 9691 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Feliformia; Felidae; Pantherinae; Panthera +Papaver somniferum opium poppy 1 1 11 3469 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Ranunculales; Papaveraceae; Papaveroideae; Papaver +Papilio - 1 5 - 7145 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Papilionoidea; Papilionidae; Papilioninae; Papilio +Papilio polytes common Mormon 1 5 - 76194 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Papilionoidea; Papilionidae; Papilioninae; Papilio +Papilio xuthus Asian swallowtail 1 5 - 66420 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Papilionoidea; Papilionidae; Papilioninae; Papilio +Papio baboons 1 2 - 9554 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Papio +Papio anubis olive baboon 1 2 - 9555 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Cercopithecinae; Papio +Paracentrotus - 1 9 - 7655 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Echinidae; Paracentrotus +Paracentrotus lividus common urchin 1 9 - 7656 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Echinidae; Paracentrotus +Paramecium tetraurelia - 6 4 11 5888 INV Eukaryota; Alveolata; Ciliophora; Intramacronucleata; Oligohymenophorea; Peniculida; Parameciidae; Paramecium +Parasteatoda tepidariorum common house spider 1 5 - 114398 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Araneoidea; Theridiidae; Parasteatoda +Patiria miniata bat star 1 9 - 46514 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Valvatida; Asterinidae; Patiria +Patiria pectinifera - 1 9 - 7594 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Asterozoa; Asteroidea; Valvatacea; Valvatida; Asterinidae; Patiria +Pedicularis louseworts 1 1 11 43174 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Pedicularideae; Pedicularis +Pedicularis keiskei - 1 1 11 1392158 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Pedicularideae; Pedicularis +Pelecanus crispus Dalmatian pelican 1 2 - 36300 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Pelecanidae; Pelecanus +Pelodiscus sinensis Chinese soft-shelled turtle 1 2 - 13735 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Trionychia; Trionychidae; Pelodiscus +Penaeus - 1 5 - 133894 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Dendrobranchiata; Penaeoidea; Penaeidae; Penaeus +Penaeus monodon black tiger shrimp 1 5 - 6687 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Dendrobranchiata; Penaeoidea; Penaeidae; Penaeus +Penaeus vannamei Pacific white shrimp 1 5 - 6689 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Dendrobranchiata; Penaeoidea; Penaeidae; Penaeus +Penium margaritaceum - 1 1 11 102169 PLN Eukaryota; Viridiplantae; Streptophyta; Zygnemophyceae; Desmidiales; Peniaceae; Penium +Peromyscus - 1 2 - 10040 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Cricetidae; Neotominae; Peromyscus +Peromyscus maniculatus bairdii prairie deer mouse 1 2 - 230844 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Cricetidae; Neotominae; Peromyscus +Persicaria minor - 1 1 11 488003 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Polygonaceae; Polygonoideae; Persicarieae; Persicaria +Petrolisthes - 1 5 - 84661 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Anomura; Galatheoidea; Porcellanidae; Petrolisthes +Petrolisthes cinctipes flat porcelain crab 1 5 - 88211 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Anomura; Galatheoidea; Porcellanidae; Petrolisthes +Petromyzon - 1 2 - 7756 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Cyclostomata; Hyperoartia; Petromyzontiformes; Petromyzontidae; Petromyzon +Petromyzon marinus sea lamprey 1 2 - 7757 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Cyclostomata; Hyperoartia; Petromyzontiformes; Petromyzontidae; Petromyzon +Petunia integrifolia subsp. inflata - 1 1 11 212142 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Petunioideae; Petunia +Phaeodactylum - 1 1 11 2849 PLN Eukaryota; Stramenopiles; Bacillariophyta; Bacillariophyceae; Bacillariophycidae; Naviculales; Phaeodactylaceae; Phaeodactylum +Phaeodactylum tricornutum - 1 1 11 2850 PLN Eukaryota; Stramenopiles; Bacillariophyta; Bacillariophyceae; Bacillariophycidae; Naviculales; Phaeodactylaceae; Phaeodactylum +Phaethon lepturus White-tailed tropicbird 1 2 - 97097 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Phaethontidae; Phaethon +Phalacrocorax carbo great cormorant 1 2 - 9209 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Pelecaniformes; Phalacrocoracidae; Phalacrocorax +Phalaenopsis aphrodite - 1 1 11 212056 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae; Epidendroideae; Vandeae; Aeridinae; Phalaenopsis +Phalaenopsis equestris - 1 1 11 78828 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae; Epidendroideae; Vandeae; Aeridinae; Phalaenopsis +Phaseolus - 1 1 11 3883 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Phaseolus +Phaseolus coccineus - 1 1 11 3886 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Phaseolus +Phaseolus vulgaris - 1 1 11 3885 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Phaseolus +Phoca largha spotted seal 1 2 - 39090 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Phocidae; Phoca +Phoenix dactylifera date palm 1 1 11 42345 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Arecaceae; Coryphoideae; Phoeniceae; Phoenix +Photinus pyralis common eastern firefly 1 5 - 7054 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Coleoptera; Polyphaga; Elateriformia; Elateroidea; Lampyridae; Lampyrinae; Photinus +Physalis peruviana - 1 1 11 126903 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Physaleae; Physalis +Physcomitrella - 1 1 11 3217 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Bryophyta; Bryophytina; Bryopsida; Funariidae; Funariales; Funariaceae; Physcomitrella +Physcomitrella patens - 1 1 11 3218 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Bryophyta; Bryophytina; Bryopsida; Funariidae; Funariales; Funariaceae; Physcomitrella +Physeter catodon sperm whale 1 2 - 9755 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Physeteridae; Physeter +Phytophthora - 1 1 11 4783 PLN Eukaryota; Stramenopiles; Oomycetes; Peronosporales; Peronosporaceae; Phytophthora +Phytophthora infestans potato late blight agent 1 1 11 4787 PLN Eukaryota; Stramenopiles; Oomycetes; Peronosporales; Peronosporaceae; Phytophthora +Picea - 1 1 11 3328 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Picea +Picea glauca white spruce 1 1 11 3330 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Picea +Picea sitchensis Sitka spruce 1 1 11 3332 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Picea +Piliocolobus tephrosceles Ugandan red Colobus 1 2 - 591936 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Colobinae; Piliocolobus +Pimephales - 1 2 - 51137 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Pimephales +Pimephales promelas fathead minnow 1 2 - 90988 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Pimephales +Pinus - 1 1 11 139271 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Pinus; Pinus +Pinus sylvestris Scots pine 1 1 11 3349 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Pinus; Pinus +Pinus taeda loblolly pine 1 1 11 3352 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Pinidae; Pinales; Pinaceae; Pinus; Pinus +Pisum sativum pea 1 1 11 3888 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Fabeae; Pisum +Pisum sativum subsp. sativum - 1 1 11 208194 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Fabeae; Pisum +Plasmodium - 1 4 11 5820 INV Eukaryota; Alveolata; Apicomplexa; Aconoidasida; Haemosporida; Plasmodiidae; Plasmodium +Plasmodium falciparum malaria parasite P. falciparum 1 4 11 5833 INV Eukaryota; Alveolata; Apicomplexa; Aconoidasida; Haemosporida; Plasmodiidae; Plasmodium; Plasmodium (Laverania) +Platynereis dumerilii Dumeril's clam worm 1 5 - 6359 INV Eukaryota; Metazoa; Lophotrochozoa; Annelida; Polychaeta; Palpata; Aciculata; Phyllodocida; Nereididae; Platynereis +Plutella xylostella diamondback moth 1 5 - 51655 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Yponomeutoidea; Plutellidae; Plutella +Poeciliopsis prolifica blackstripe livebearer 1 2 - 188132 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Poeciliidae; Poeciliinae; Poeciliopsis +Pogonomyrmex californicus - 1 5 - 144036 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Pogonomyrmex +Pongo abelii Sumatran orangutan 1 2 - 9601 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae; Pongo +Populus poplars 1 1 11 3689 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Salicaceae; Saliceae; Populus +Populus euphratica Euphrates poplar 1 1 11 75702 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Salicaceae; Saliceae; Populus +Populus trichocarpa black cottonwood 1 1 11 3694 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Salicaceae; Saliceae; Populus +Porphyridium - 1 1 11 2791 PLN Eukaryota; Rhodophyta; Bangiophyceae; Porphyridiales; Porphyridiaceae; Porphyridium +Porphyridium purpureum - 1 1 11 35688 PLN Eukaryota; Rhodophyta; Bangiophyceae; Porphyridiales; Porphyridiaceae; Porphyridium +Priapulus caudatus - 1 5 - 37621 INV Eukaryota; Metazoa; Ecdysozoa; Scalidophora; Priapulida; Priapulimorpha; Priapulimorphida; Priapulidae; Priapulus +Pristionchus - 1 5 - 54125 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Diplogasteromorpha; Diplogasteroidea; Neodiplogasteridae; Pristionchus +Pristionchus mayeri - 1 5 - 1317129 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Diplogasteromorpha; Diplogasteroidea; Neodiplogasteridae; Pristionchus +Pristionchus pacificus - 1 5 - 54126 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Rhabditida; Rhabditina; Diplogasteromorpha; Diplogasteroidea; Neodiplogasteridae; Pristionchus +Proasellus aragonensis - 1 5 - 1281939 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus arthrodilus - 1 5 - 1281940 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus assaforensis - 1 5 - 1282049 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus beticus - 1 5 - 1281946 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus cantabricus - 1 5 - 1281948 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus cavaticus - 1 5 - 1281949 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus coiffaiti - 1 5 - 1281953 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus coxalis - 1 5 - 63229 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus ebrensis - 1 5 - 1281961 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus escolai - 1 5 - 1281963 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus grafi - 1 5 - 1281973 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus granadensis - 1 5 - 1281974 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus hercegovinensis - 1 5 - 1281977 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus ibericus - 1 5 - 1281981 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus jaloniacus - 1 5 - 1281986 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus karamani - 1 5 - 1281987 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus margalefi - 1 5 - 1281998 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus meridianus - 1 5 - 1282001 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus ortizi - 1 5 - 1282012 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus parvulus - 1 5 - 1282015 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus racovitzai - 1 5 - 1282023 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus rectus - 1 5 - 1282025 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Proasellus solanasi - 1 5 - 1282031 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Peracarida; Isopoda; Asellota; Aselloidea; Asellidae; Proasellus +Procambarus clarkii red swamp crayfish 1 5 - 6728 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Astacidea; Astacoidea; Cambaridae; Procambarus +Protobothrops mucrosquamatus - 1 2 - 103944 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Viperidae; Crotalinae; Protobothrops +Protophormia terraenovae northern blowfly 1 5 - 34676 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Oestroidea; Calliphoridae; Chrysomyinae; Protophormia +Prunus - 1 1 11 3754 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Amygdaleae; Prunus +Prunus armeniaca apricot 1 1 11 36596 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Amygdaleae; Prunus +Prunus persica peach 1 1 11 3760 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Amygdaleae; Prunus +Pseudodiploria strigosa - 1 4 - 1428006 INV Eukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Faviina; Mussidae; Faviinae; Pseudodiploria +Pseudomonas aeruginosa - 11 0 - 287 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Pseudomonadaceae; Pseudomonas +Pseudomonas sp. 286 - 11 0 - 1705700 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Pseudomonadaceae; Pseudomonas +Pseudorca crassidens false killer whale 1 2 - 82174 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Delphinidae; Pseudorca +Pterocles gutturalis yellow-throated sandgrouse 1 2 - 240206 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Ciconiiformes; Pteroclidae; Pterocles +Pteronotus parnellii Parnell's mustached bat 1 2 - 59476 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Mormoopidae; Pteronotus +Pteropus alecto black flying fox 1 2 - 9402 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Pteropus +Pteropus vampyrus large flying fox 1 2 - 132908 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Megachiroptera; Pteropodidae; Pteropodinae; Pteropus +Ptychodera flava - 1 9 - 63121 INV Eukaryota; Metazoa; Hemichordata; Enteropneusta; Ptychoderidae; Ptychodera +Pundamilia nyererei - 1 2 - 303518 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Cichlomorphae; Cichliformes; Cichlidae; African cichlids; Pseudocrenilabrinae; Haplochromini; Pundamilia +Punica granatum pomegranate 1 1 11 22663 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Myrtales; Lythraceae; Punica +Pygocentrus nattereri red-bellied piranha 1 2 - 42514 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Characiformes; Characoidei; Pygocentrus +Pyrus x bretschneideri Chinese white pear 1 1 11 225117 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rosaceae; Amygdaloideae; Maleae; Pyrus +Pythium - 1 1 11 4797 PLN Eukaryota; Stramenopiles; Oomycetes; Pythiales; Pythiaceae; Pythium +Pythium ultimum DAOM BR144 - 1 1 11 431595 PLN Eukaryota; Stramenopiles; Oomycetes; Pythiales; Pythiaceae; Pythium +Python bivittatus Burmese python 1 2 - 176946 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Henophidia; Pythonidae; Python +Quercus - 1 1 11 3511 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Quercus +Quercus robur - 1 1 11 38942 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Quercus +Quercus suber - 1 1 11 58331 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Fagaceae; Quercus +Ramazzottius varieornatus - 1 5 - 947166 INV Eukaryota; Metazoa; Ecdysozoa; Tardigrada; Eutardigrada; Parachela; Ramazzottiidae; Ramazzottius +Ramulus artemis - 1 5 - 1390046 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Phasmatodea; Verophasmatodea; Anareolatae; Phasmatidae; Phasmatinae; Clitumnini; Ramulus +Rana catesbeiana American bullfrog 1 2 - 8400 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Neobatrachia; Ranoidea; Ranidae; Rana; Aquarana +Raphanus - 1 1 11 3725 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Raphanus +Raphanus sativus radish 1 1 11 3726 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Brassiceae; Raphanus +Rattus - 1 2 - 10114 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Rattus +Rattus norvegicus Norway rat 1 2 - 10116 ROD Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha; Muroidea; Muridae; Murinae; Rattus +Rauvolfia serpentina serpentwood 1 1 11 4060 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Gentianales; Apocynaceae; Rauvolfioideae; Vinceae; Rauvolfiinae; Rauvolfia +Reaumuria trigyna - 1 1 11 1091135 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Tamaricaceae; Reaumuria +Rhagoletis zephyria snowberry fruit fly 1 5 - 28612 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Tephritoidea; Tephritidae; Rhagoletis +Rhincodon typus whale shark 1 2 - 259920 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Chondrichthyes; Elasmobranchii; Galeomorphii; Galeoidea; Orectolobiformes; Rhincodontidae; Rhincodon +Rhinolophus sinicus Chinese rufous horseshoe bat 1 2 - 89399 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Chiroptera; Microchiroptera; Rhinolophidae; Rhinolophinae; Rhinolophus +Rhinopithecus bieti black snub-nosed monkey 1 2 - 61621 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Colobinae; Rhinopithecus +Rhinopithecus roxellana golden snub-nosed monkey 1 2 - 61622 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Cercopithecidae; Colobinae; Rhinopithecus +Rhipicephalus - 1 5 - 34630 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Rhipicephalinae; Rhipicephalus +Rhipicephalus microplus southern cattle tick 1 5 - 6941 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Acari; Parasitiformes; Ixodida; Ixodoidea; Ixodidae; Rhipicephalinae; Rhipicephalus; Boophilus +Rhizobiales bacterium - 11 0 - 1909294 BCT Bacteria; Proteobacteria; Alphaproteobacteria; Rhizobiales +Ricinus communis castor bean 1 1 11 3988 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Malpighiales; Euphorbiaceae; Acalyphoideae; Acalypheae; Ricinus +Saccharomyces cerevisiae baker's yeast 1 3 - 4932 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces +Saccharomyces pastorianus - 1 3 - 27292 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces +Saccharum - 1 1 11 4546 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Saccharinae; Saccharum +Saccharum hybrid cultivar SP80-3280 - 1 1 11 193079 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Saccharinae; Saccharum; Saccharum officinarum complex +Saccoglossus - 1 5 - 10222 INV Eukaryota; Metazoa; Hemichordata; Enteropneusta; Harrimaniidae; Saccoglossus +Saccoglossus kowalevskii - 1 9 - 10224 INV Eukaryota; Metazoa; Hemichordata; Enteropneusta; Harrimaniidae; Saccoglossus +Saimiri boliviensis boliviensis Bolivian squirrel monkey 1 2 - 39432 PRI Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; Platyrrhini; Cebidae; Saimiriinae; Saimiri +Salicornia europaea - 1 1 11 206448 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Salicornioideae; Salicornia; Salicornia subg. Salicornia +Salmo - 1 2 - 8028 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Salmo +Salmo salar Atlantic salmon 1 2 - 8030 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Salmo +Salmonella enterica - 11 0 - 28901 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella +Salmonella enterica subsp. enterica serovar Typhi - 11 0 - 90370 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella +Salmonella enterica subsp. enterica serovar Typhimurium - 11 0 - 90371 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Salmonella +Salvelinus alpinus Arctic char 1 2 - 8036 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Protacanthopterygii; Salmoniformes; Salmonidae; Salmoninae; Salvelinus +Sarcophilus harrisii Tasmanian devil 1 2 - 9305 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Metatheria; Dasyuromorphia; Dasyuridae; Sarcophilus +Schistosoma - 1 9 - 6181 INV Eukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma +Schistosoma japonicum - 1 9 - 6182 INV Eukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma +Schistosoma mansoni - 1 9 - 6183 INV Eukaryota; Metazoa; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Schistosoma +Schizosaccharomyces - 1 4 - 4895 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Taphrinomycotina; Schizosaccharomycetes; Schizosaccharomycetales; Schizosaccharomycetaceae; Schizosaccharomyces +Schizosaccharomyces pombe fission yeast 1 4 - 4896 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Taphrinomycotina; Schizosaccharomycetes; Schizosaccharomycetales; Schizosaccharomycetaceae; Schizosaccharomyces +Schmidtea mediterranea - 1 9 - 79327 INV Eukaryota; Metazoa; Platyhelminthes; Rhabditophora; Seriata; Tricladida; Continenticola; Geoplanoidea; Dugesiidae; Schmidtea +Scylla olivacea orange mud crab 1 5 - 85551 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Multicrustacea; Malacostraca; Eumalacostraca; Eucarida; Decapoda; Pleocyemata; Brachyura; Eubrachyura; Portunoidea; Portunidae; Scylla +Selaginella - 1 1 11 3246 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Lycopodiopsida; Selaginellales; Selaginellaceae; Selaginella +Selaginella moellendorffii - 1 1 11 88036 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Lycopodiopsida; Selaginellales; Selaginellaceae; Selaginella +Serinus canaria common canary 1 2 - 9135 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Fringillidae; Carduelinae; Serinus +Seriola dumerili greater amberjack 1 2 - 41447 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Carangaria; Carangiformes; Carangidae; Seriola +Seriola lalandi dorsalis - 1 2 - 1841481 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Carangaria; Carangiformes; Carangidae; Seriola +Serratia marcescens - 11 0 - 615 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Yersiniaceae; Serratia +Sesamum indicum sesame 1 1 11 4182 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Pedaliaceae; Sesamum +Setaria - 1 1 11 4554 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Setaria +Setaria italica foxtail millet 1 1 11 4555 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Setaria +Setaria viridis - 1 1 11 4556 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Panicodae; Paniceae; Cenchrinae; Setaria +Shigella flexneri - 11 0 - 623 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Shigella +Shigella sonnei - 11 0 - 624 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; Enterobacteriaceae; Shigella +Sinocyclocheilus angustiporus - 1 2 - 307947 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Sinocyclocheilus +Sinocyclocheilus anophthalmus eyeless golden-line fish 1 2 - 307955 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Sinocyclocheilus +Sinocyclocheilus anshuiensis - 1 2 - 1608454 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Sinocyclocheilus +Sinocyclocheilus grahami - 1 2 - 75366 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Sinocyclocheilus +Sinocyclocheilus rhinocerous - 1 2 - 307959 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Ostariophysi; Cypriniformes; Cyprinidae; Sinocyclocheilus +Sipyloidea sipylus - 1 5 - 202427 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Phasmatodea; Verophasmatodea; Anareolatae; Lonchodidae; Necrosciinae; Sipyloidea +Sisymbrium - 1 1 11 3729 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Sisymbrieae; Sisymbrium +Sisymbrium irio - 1 1 11 3730 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Sisymbrieae; Sisymbrium +Solanum - 1 1 11 4107 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum +Solanum lycopersicum tomato 1 1 11 4081 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum; Lycopersicon +Solanum melongena eggplant 1 1 11 4111 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum +Solanum phureja - 1 1 11 172790 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum +Solanum torvum - 1 1 11 119830 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum +Solanum tuberosum potato 1 1 11 4113 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Solaneae; Solanum +Solenopsis invicta red fire ant 1 5 - 13686 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Solenopsis +Sorex araneus European shrew 1 2 - 42254 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Eulipotyphla; Soricidae; Soricinae; Sorex +Sorghum - 1 1 11 4557 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Sorghinae; Sorghum +Sorghum bicolor sorghum 1 1 11 4558 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Sorghinae; Sorghum +Sparus aurata gilthead seabream 1 2 - 8175 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Spariformes; Sparidae; Sparus +Speleonectes cf. tulumensis BMR-2011 - 1 5 - 1032549 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Crustacea; Remipedia; Nectiopoda; Speleonectidae; Speleonectes +Sphaerechinus granularis - 1 9 - 39374 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Temnopleuroida; Toxopneustidae; Sphaerechinus +Spinacia oleracea spinach 1 1 11 3562 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; Caryophyllales; Chenopodiaceae; Chenopodioideae; Anserineae; Spinacia +Spirometra erinaceieuropaei - 1 9 - 99802 INV Eukaryota; Metazoa; Platyhelminthes; Cestoda; Eucestoda; Diphyllobothriidea; Diphyllobothriidae; Spirometra +Spodoptera - 1 5 - 7106 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Amphipyrinae; Spodoptera +Spodoptera exigua beet armyworm 1 5 - 7107 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Amphipyrinae; Spodoptera +Spodoptera frugiperda fall armyworm 1 5 - 7108 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Amphipyrinae; Spodoptera +Squalus acanthias spiny dogfish 1 2 - 7797 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Chondrichthyes; Elasmobranchii; Squalimorphii; Squaliformes; Squalidae; Squalus +Staphylococcus aureus - 11 0 - 1280 BCT Bacteria; Firmicutes; Bacilli; Bacillales; Staphylococcaceae; Staphylococcus +Staphylococcus epidermidis - 11 0 - 1282 BCT Bacteria; Firmicutes; Bacilli; Bacillales; Staphylococcaceae; Staphylococcus +Stegodyphus mimosarum - 1 5 - 407821 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Araneae; Araneomorphae; Entelegynae; Eresoidea; Eresidae; Stegodyphus +Stenotrophomonas maltophilia - 11 0 - 40324 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Xanthomonadales; Xanthomonadaceae; Stenotrophomonas; Stenotrophomonas maltophilia group +Streptococcus agalactiae - 11 0 - 1311 BCT Bacteria; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus +Streptococcus pneumoniae - 11 0 - 1313 BCT Bacteria; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus +Streptococcus suis - 11 0 - 1307 BCT Bacteria; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; Streptococcus +Strongylocentrotus - 1 9 - 7664 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Strongylocentrotidae; Strongylocentrotus +Strongylocentrotus purpuratus purple sea urchin 1 9 - 7668 INV Eukaryota; Metazoa; Echinodermata; Eleutherozoa; Echinozoa; Echinoidea; Euechinoidea; Echinacea; Echinoida; Strongylocentrotidae; Strongylocentrotus +Stylophora pistillata - 1 4 - 50429 INV Eukaryota; Metazoa; Cnidaria; Anthozoa; Hexacorallia; Scleractinia; Astrocoeniina; Pocilloporidae; Stylophora +Sus - 1 2 - 9822 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; Suidae; Sus +Sus scrofa pig 1 2 - 9823 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; Suidae; Sus +Symbiodinium sp. A1 - 1 4 11 503409 PLN Eukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Symbiodinium; Symbiodinium sp. clade A +Symbiodinium sp. B2 - 1 4 11 154560 PLN Eukaryota; Alveolata; Dinophyceae; Suessiales; Symbiodiniaceae; Symbiodinium; Symbiodinium sp. clade B synthetic construct - 11 0 11 32630 SYN other sequences; artificial sequences -Synura sp. LO234KE - 1 1 11 1825120 PLN cellular organisms; Eukaryota; Stramenopiles; Synurophyceae; Synurales; Mallomonadaceae; Synura -Taeniopygia guttata zebra finch 1 2 - 59729 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Estrildidae; Estrildinae; Taeniopygia -Takifugu rubripes torafugu 1 2 - 31033 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Tetraodontiformes; Tetraodontoidei; Tetradontoidea; Tetraodontidae; Takifugu -Tarenaya hassleriana - 1 1 11 28532 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Cleomaceae; Tarenaya -Tauraco erythrolophus red-crested turaco 1 2 - 121530 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Musophagiformes; Musophagidae; Tauraco -Taxus wallichiana var. chinensis - 1 1 11 29808 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Acrogymnospermae; Pinidae; Cupressales; Taxaceae; Taxus; Taxus wallichiana -Telenomus podisi - 1 5 - 408256 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Platygastroidea; Scelionidae; Telenominae; Telenomus -Teleogryllus commodus - 1 5 - 672150 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Orthopteroidea; Orthoptera; Ensifera; Grylloidea; Gryllidae; Gryllinae; Teleogryllus -Teleopsis dalmanni - 1 5 - 139649 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Diopsoidea; Diopsidae; Teleopsis -Teleopsis whitei - 1 5 - 139651 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha; Eremoneura; Cyclorrhapha; Schizophora; Acalyptratae; Diopsoidea; Diopsidae; Teleopsis -Termitomyces clypeatus MTCC 5091 - 1 4 - 1282671 PLN cellular organisms; Eukaryota; Opisthokonta; Fungi; Dikarya; Basidiomycota; Agaricomycotina; Agaricomycetes; Agaricomycetidae; Agaricales; Lyophyllaceae; Termitomyces; Termitomyces clypeatus -Tetramorium bicarinatum - 1 5 - 219812 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Tetramoriini; Tetramorium -Tetranychus urticae two-spotted spider mite 1 5 - 32264 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Chelicerata; Arachnida; Acari; Acariformes; Trombidiformes; Prostigmata; Eleutherengona; Raphignathae; Tetranychoidea; Tetranychidae; Tetranychus -Tetraodon nigroviridis spotted green pufferfish 1 2 - 99883 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Tetraodontiformes; Tetraodontoidei; Tetradontoidea; Tetraodontidae; Tetraodon -Tetraselmis subcordiformis - 1 1 11 3161 PLN cellular organisms; Eukaryota; Viridiplantae; Chlorophyta; Chlorodendrophyceae; Chlorodendrales; Chlorodendraceae; Tetraselmis -Thamnophis sirtalis - 1 2 - 35019 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Lepidosauria; Squamata; Bifurcata; Unidentata; Episquamata; Toxicofera; Serpentes; Colubroidea; Colubridae; Natricinae; Thamnophis -Theobroma cacao cacao 1 1 11 3641 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Byttnerioideae; Theobroma -Thermobia domestica firebrat 1 5 - 89055 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Zygentoma; Lepismatidae; Thermobia -Tigriopus californicus - 1 5 - 6832 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Crustacea; Maxillopoda; Copepoda; Neocopepoda; Podoplea; Harpacticoida; Harpacticidae; Tigriopus -Tinamus guttatus white-throated tinamou 1 2 - 94827 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Palaeognathae; Tinamiformes; Tinamidae; Tinamus -Trachemys scripta elegans - 1 2 - 31138 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Testudines; Cryptodira; Durocryptodira; Testudinoidea; Emydidae; Trachemys; Trachemys scripta -Trachymyrmex cornetzi - 1 5 - 471704 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Attini; Trachymyrmex -Trematomus bernacchii emerald rockcod 1 2 - 40690 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Eupercaria; Perciformes; Notothenioidei; Nototheniidae; Trematomus -Trichechus manatus latirostris Florida manatee 1 2 - 127582 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Afrotheria; Sirenia; Trichechidae; Trichechus; Trichechus manatus -Trichinella spiralis - 1 5 - 6334 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Nematoda; Enoplea; Dorylaimia; Trichocephalida; Trichinellidae; Trichinella -Trichobilharzia regenti - 1 9 - 157069 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Platyhelminthes; Trematoda; Digenea; Strigeidida; Schistosomatoidea; Schistosomatidae; Trichobilharzia -Trichomonas vaginalis G3 - 1 0 11 412133 INV cellular organisms; Eukaryota; Parabasalia; Trichomonadida; Trichomonadidae; Trichomonas; Trichomonas vaginalis -Trichoplusia ni cabbage looper 1 5 - 7111 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Amphiesmenoptera; Lepidoptera; Glossata; Neolepidoptera; Heteroneura; Ditrysia; Obtectomera; Noctuoidea; Noctuidae; Plusiinae; Trichoplusia -Trifolium pratense - 1 1 11 57577 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium -Tripterygion delaisi - 1 2 - 57862 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Blenniimorphae; Blenniiformes; Blennioidei; Tripterygiidae; Tripterygiinae; Tripterygion -Tripterygium wilfordii - 1 1 11 458696 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Celastrales; Celastraceae; Tripterygium -Triticum aestivum bread wheat 1 1 11 4565 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum -Triticum turgidum - 1 1 11 4571 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum -Triticum urartu - 1 1 11 4572 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum -Trypanosoma cruzi strain CL Brener - 1 4 11 353153 INV cellular organisms; Eukaryota; Euglenozoa; Kinetoplastida; Trypanosomatidae; Trypanosoma; Schizotrypanum; Trypanosoma cruzi -Tupaia chinensis Chinese tree shrew 1 2 - 246437 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Euarchontoglires; Scandentia; Tupaiidae; Tupaia -Turritopsis sp. SK-2016 - 1 4 - 1784781 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Filifera; Oceaniidae; Turritopsis -Tursiops truncatus bottlenosed dolphin 1 2 - 9739 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Delphinidae; Tursiops -Tyto alba barn owl 1 2 - 56313 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Strigiformes; Tytonidae; Tyto -uncultured archaeon - 11 0 - 115547 ENV cellular organisms; Archaea; environmental samples -uncultured bacterium - 11 0 - 77133 ENV cellular organisms; Bacteria; environmental samples -uncultured eukaryote - 1 1 11 100272 ENV cellular organisms; Eukaryota; environmental samples -uncultured fungus - 1 4 - 175245 ENV cellular organisms; Eukaryota; Opisthokonta; Fungi; environmental samples +Taeniopygia - 1 2 - 59728 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Estrildidae; Estrildinae; Taeniopygia +Taeniopygia guttata zebra finch 1 2 - 59729 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passeroidea; Estrildidae; Estrildinae; Taeniopygia +Takifugu - 1 2 - 31032 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Tetraodontiformes; Tetradontoidea; Tetraodontidae; Takifugu +Takifugu rubripes torafugu 1 2 - 31033 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Tetraodontiformes; Tetradontoidea; Tetraodontidae; Takifugu +Tauraco erythrolophus red-crested turaco 1 2 - 121530 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Musophagiformes; Musophagidae; Tauraco +Teladorsagia circumcincta - 1 5 - 45464 INV Eukaryota; Metazoa; Ecdysozoa; Nematoda; Chromadorea; Strongylida; Trichostrongyloidea; Haemonchidae; Teladorsagia +Teleopsis dalmanni - 1 5 - 139649 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Diptera; Brachycera; Muscomorpha; Diopsoidea; Diopsidae; Teleopsis +Terrapene mexicana triunguis Three-toed box turtle 1 2 - 1415176 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Durocryptodira; Testudinoidea; Emydidae; Terrapene +Tetragonula carbonaria - 1 5 - 148810 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Apoidea; Apidae; Tetragonula +Tetrahymena - 6 4 11 5890 INV Eukaryota; Alveolata; Ciliophora; Intramacronucleata; Oligohymenophorea; Hymenostomatida; Tetrahymenina; Tetrahymenidae; Tetrahymena +Tetrahymena thermophila - 6 4 11 5911 INV Eukaryota; Alveolata; Ciliophora; Intramacronucleata; Oligohymenophorea; Hymenostomatida; Tetrahymenina; Tetrahymenidae; Tetrahymena +Tetranychus urticae two-spotted spider mite 1 5 - 32264 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Chelicerata; Arachnida; Acari; Acariformes; Trombidiformes; Prostigmata; Eleutherengona; Raphignathae; Tetranychoidea; Tetranychidae; Tetranychus +Tetraodon nigroviridis spotted green pufferfish 1 2 - 99883 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Tetraodontiformes; Tetradontoidea; Tetraodontidae; Tetraodon +Theobroma - 1 1 11 3640 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Byttnerioideae; Theobroma +Theobroma cacao cacao 1 1 11 3641 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Malvales; Malvaceae; Byttnerioideae; Theobroma +Tinamus guttatus white-throated tinamou 1 2 - 94827 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Palaeognathae; Tinamiformes; Tinamidae; Tinamus +Toxoplasma - 1 4 4 5810 INV Eukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Sarcocystidae; Toxoplasma +Toxoplasma gondii - 1 4 4 5811 INV Eukaryota; Alveolata; Apicomplexa; Conoidasida; Coccidia; Eucoccidiorida; Eimeriorina; Sarcocystidae; Toxoplasma +Trachemys scripta elegans - 1 2 - 31138 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Testudines; Cryptodira; Durocryptodira; Testudinoidea; Emydidae; Trachemys +Trematomus bernacchii emerald rockcod 1 2 - 40690 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Eupercaria; Perciformes; Notothenioidei; Nototheniidae; Trematomus +Tribolium - 1 1 11 89526 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Danthonioideae; Danthonieae; Tribolium +Tribolium castaneum red flour beetle 1 5 - 7070 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Coleoptera; Polyphaga; Cucujiformia; Tenebrionidae; Tenebrionidae incertae sedis; Tribolium +Trichechus manatus latirostris Florida manatee 1 2 - 127582 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Afrotheria; Sirenia; Trichechidae; Trichechus +Trichoderma - 1 4 - 5543 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Sordariomycetes; Hypocreomycetidae; Hypocreales; Hypocreaceae; Trichoderma +Trichomonas vaginalis G3 - 1 0 11 412133 INV Eukaryota; Parabasalia; Trichomonadida; Trichomonadidae; Trichomonas +Trichoplusia ni cabbage looper 1 5 - 7111 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Lepidoptera; Glossata; Ditrysia; Noctuoidea; Noctuidae; Plusiinae; Trichoplusia +Trichosurus brush-tailed possums 1 2 - 9336 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Metatheria; Diprotodontia; Phalangeridae; Trichosurus +Trichosurus vulpecula common brushtail 1 2 - 9337 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Metatheria; Diprotodontia; Phalangeridae; Trichosurus +Trifolium - 1 1 11 3898 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Trifolieae; Trifolium +Trifolium pratense - 1 1 11 57577 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; Hologalegina; IRL clade; Trifolieae; Trifolium +Triphysaria - 1 1 11 64092 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Lamiales; Orobanchaceae; Pedicularideae; Castillejinae; Triphysaria +Tripterygion delaisi - 1 2 - 57862 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Neoteleostei; Acanthomorphata; Ovalentaria; Blenniimorphae; Blenniiformes; Blennioidei; Tripterygiidae; Tripterygiinae; Tripterygion +Triticum - 1 1 11 4564 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum +Triticum aestivum bread wheat 1 1 11 4565 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum +Triticum turgidum - 1 1 11 4571 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum +Triticum urartu - 1 1 11 4572 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BOP clade; Pooideae; Triticodae; Triticeae; Triticinae; Triticum +Trypanosoma - 1 4 11 5690 INV Eukaryota; Euglenozoa; Kinetoplastida; Trypanosomatidae; Trypanosoma +Trypanosoma brucei - 1 4 11 5691 INV Eukaryota; Euglenozoa; Kinetoplastida; Trypanosomatidae; Trypanosoma +Trypanosoma cruzi - 1 4 11 5693 INV Eukaryota; Euglenozoa; Kinetoplastida; Trypanosomatidae; Trypanosoma; Schizotrypanum +Trypoxylus dichotomus - 1 5 - 273928 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Coleoptera; Polyphaga; Scarabaeiformia; Scarabaeidae; Dynastinae; Trypoxylus +Tuber truffles 1 4 - 36048 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Pezizomycetes; Pezizales; Tuberaceae; Tuber +Tuber melanosporum Perigord truffle 1 4 - 39416 PLN Eukaryota; Fungi; Dikarya; Ascomycota; Pezizomycotina; Pezizomycetes; Pezizales; Tuberaceae; Tuber +Tupaia - 1 2 - 9394 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Scandentia; Tupaiidae; Tupaia +Tupaia chinensis Chinese tree shrew 1 2 - 246437 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Scandentia; Tupaiidae; Tupaia +Turritopsis sp. SK-2016 - 1 4 - 1784781 INV Eukaryota; Metazoa; Cnidaria; Hydrozoa; Hydroidolina; Anthoathecata; Filifera; Oceaniidae; Turritopsis +Tursiops truncatus bottlenose dolphin 1 2 - 9739 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Cetacea; Odontoceti; Delphinidae; Tursiops +Tyto alba Barn owl 1 2 - 56313 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Strigiformes; Tytonidae; Tyto +uncultured archaeon - 11 0 - 115547 ENV Archaea; environmental samples +uncultured bacterium - 11 0 - 77133 ENV Bacteria; environmental samples +uncultured eukaryote - 1 1 11 100272 ENV Eukaryota; environmental samples +uncultured fungus - 1 4 - 175245 ENV Eukaryota; Fungi; environmental samples uncultured microorganism - 11 2 11 358574 ENV unclassified sequences; environmental samples -uncultured Neocallimastigales - 1 0 - 325898 ENV cellular organisms; Eukaryota; Opisthokonta; Fungi; Neocallimastigomycota; Neocallimastigomycetes; Neocallimastigales; environmental samples +uncultured Neocallimastigales - 1 0 - 325898 ENV Eukaryota; Fungi; Fungi incertae sedis; Chytridiomycota; Neocallimastigomycetes; Neocallimastigales; environmental samples uncultured organism - 11 2 11 155900 ENV unclassified sequences; environmental samples -uncultured prokaryote - 11 0 11 198431 ENV unclassified sequences; environmental samples; prokaryotic environmental samples -uncultured Pseudomonas sp. - 11 0 - 114707 ENV cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Pseudomonadales; Pseudomonadaceae; Pseudomonas; environmental samples -uncultured soil bacterium - 11 0 - 164851 ENV cellular organisms; Bacteria; environmental samples +uncultured prokaryote - 11 0 11 198431 ENV unclassified sequences; environmental samples unidentified - 1 2 11 32644 UNA unclassified sequences -unidentified bacterium - 11 0 - 1826778 BCT cellular organisms; Bacteria; unclassified Bacteria; unclassified Bacteria (miscellaneous) -Ursus maritimus polar bear 1 2 - 29073 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Carnivora; Caniformia; Ursidae; Ursus -Vaccinium macrocarpon - 1 1 11 13750 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; Ericales; Ericaceae; Vaccinioideae; Vaccinieae; Vaccinium -Vibrio cholerae - 11 0 - 666 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; Vibrio -Vibrio parahaemolyticus - 11 0 - 670 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; Vibrio; Vibrio harveyi group -Vicia faba fava bean 1 1 11 3906 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Fabeae; Vicia -Vicugna pacos alpaca 1 2 - 30538 MAM cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Mammalia; Theria; Eutheria; Boreoeutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Vicugna -Vigna angularis adzuki bean 1 1 11 3914 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Vigna -Vigna radiata - 1 1 11 157791 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Vigna -Vigna radiata var. radiata mung bean 1 1 11 3916 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; Phaseoleae; Vigna; Vigna radiata -Villosa lienosa - 1 5 - 326719 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Palaeoheterodonta; Unionoida; Unionoidea; Unionidae; Unioninae; Villosa -Vitis vinifera wine grape 1 1 11 29760 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; rosids incertae sedis; Vitales; Vitaceae; Vitis -Vollenhovia emeryi - 1 5 - 411798 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Stenammini; Vollenhovia -Wasmannia auropunctata little fire ant 1 5 - 64793 INV cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Protostomia; Ecdysozoa; Panarthropoda; Arthropoda; Mandibulata; Pancrustacea; Hexapoda; Insecta; Dicondylia; Pterygota; Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Vespoidea; Formicidae; Myrmicinae; Blepharidattini; Wasmannia -Withania somnifera - 1 1 11 126910 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; asterids; lamiids; Solanales; Solanaceae; Solanoideae; Physaleae; Withania -Xanthomonas citri pv. citri - 11 0 - 434928 BCT cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Xanthomonadales; Xanthomonadaceae; Xanthomonas; Xanthomonas citri group; Xanthomonas citri -Xenopus laevis African clawed frog 1 2 - 8355 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Pipoidea; Pipidae; Xenopodinae; Xenopus; Xenopus -Xenopus tropicalis tropical clawed frog 1 2 - 8364 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amphibia; Batrachia; Anura; Pipoidea; Pipidae; Xenopodinae; Xenopus; Silurana -Xiphophorus maculatus southern platyfish 1 2 - 8083 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Actinopterygii; Actinopteri; Neopterygii; Teleostei; Osteoglossocephalai; Clupeocephala; Euteleosteomorpha; Neoteleostei; Eurypterygia; Ctenosquamata; Acanthomorphata; Euacanthomorphacea; Percomorphaceae; Ovalentaria; Atherinomorphae; Cyprinodontiformes; Cyprinodontoidei; Poeciliidae; Poeciliinae; Xiphophorus -Zantedeschia aethiopica - 1 1 11 69721 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Alismatales; Araceae; Philodendroideae; Zantedeschieae; Zantedeschia -Zea mays - 1 1 11 4577 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Tripsacinae; Zea -Zea mays subsp. mays maize 1 1 11 381124 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Petrosaviidae; commelinids; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Tripsacinae; Zea; Zea mays -Ziziphus jujuba - 1 1 11 326968 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Rosales; Rhamnaceae; Paliureae; Ziziphus -Zonotrichia albicollis white-throated sparrow 1 2 - 44394 VRT cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria; Deuterostomia; Chordata; Craniata; Vertebrata; Gnathostomata; Teleostomi; Euteleostomi; Sarcopterygii; Dipnotetrapodomorpha; Tetrapoda; Amniota; Sauropsida; Sauria; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passerellidae; Zonotrichia -Zostera noltei - 1 1 11 55326 PLN cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; Liliopsida; Alismatales; Zosteraceae; Zostera +Ursus maritimus polar bear 1 2 - 29073 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Ursidae; Ursus +Ustilago - 1 4 - 5269 PLN Eukaryota; Fungi; Dikarya; Basidiomycota; Ustilaginomycotina; Ustilaginomycetes; Ustilaginales; Ustilaginaceae; Ustilago +Ustilago maydis - 1 4 - 5270 PLN Eukaryota; Fungi; Dikarya; Basidiomycota; Ustilaginomycotina; Ustilaginomycetes; Ustilaginales; Ustilaginaceae; Ustilago +Vibrio cholerae - 11 0 - 666 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; Vibrio +Vibrio parahaemolyticus - 11 0 - 670 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; Vibrio +Vicugna pacos alpaca 1 2 - 30538 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Tylopoda; Camelidae; Vicugna +Vigna - 1 1 11 3913 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Vigna +Vigna angularis adzuki bean 1 1 11 3914 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Vigna +Vigna unguiculata cowpea 1 1 11 3917 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; 50 kb inversion clade; NPAAA clade; indigoferoid/millettioid clade; Phaseoleae; Vigna +Vitis - 1 1 11 3603 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; Vitales; Vitaceae; Viteae; Vitis +Vitis rotundifolia - 1 1 11 103349 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; Vitales; Vitaceae; Viteae; Vitis +Vitis vinifera wine grape 1 1 11 29760 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; Vitales; Vitaceae; Viteae; Vitis +Volvox - 1 1 11 3066 PLN Eukaryota; Viridiplantae; Chlorophyta; Chlorophyceae; Chlamydomonadales; Volvocaceae; Volvox +Volvox carteri f. nagariensis - 1 1 11 3068 PLN Eukaryota; Viridiplantae; Chlorophyta; Chlorophyceae; Chlamydomonadales; Volvocaceae; Volvox +Vulpes vulpes red fox 1 2 - 9627 MAM Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Carnivora; Caniformia; Canidae; Vulpes +Wasmannia auropunctata little fire ant 1 5 - 64793 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Holometabola; Hymenoptera; Apocrita; Aculeata; Formicoidea; Formicidae; Myrmicinae; Wasmannia +Xanthomonas - 11 0 - 338 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Xanthomonadales; Xanthomonadaceae; Xanthomonas +Xanthomonas oryzae pv. oryzae - 11 0 - 64187 BCT Bacteria; Proteobacteria; Gammaproteobacteria; Xanthomonadales; Xanthomonadaceae; Xanthomonas +Xenopus - 1 2 - 8353 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Pipoidea; Pipidae; Xenopodinae; Xenopus +Xenopus laevis African clawed frog 1 2 - 8355 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Pipoidea; Pipidae; Xenopodinae; Xenopus; Xenopus +Xenopus tropicalis tropical clawed frog 1 2 - 8364 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Amphibia; Batrachia; Anura; Pipoidea; Pipidae; Xenopodinae; Xenopus; Silurana +Zantedeschia aethiopica - 1 1 11 69721 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Araceae; Philodendroideae; Zantedeschieae; Zantedeschia +Zea - 1 1 11 4575 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Tripsacinae; Zea +Zea mays - 1 1 11 4577 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Tripsacinae; Zea +Zea mays subsp. mays maize 1 1 11 381124 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; PACMAD clade; Panicoideae; Andropogonodae; Andropogoneae; Tripsacinae; Zea +Zonotrichia albicollis white-throated sparrow 1 2 - 44394 VRT Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archelosauria; Archosauria; Dinosauria; Saurischia; Theropoda; Coelurosauria; Aves; Neognathae; Passeriformes; Passerellidae; Zonotrichia +Zootermopsis nevadensis - 1 5 - 136037 INV Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera; Polyneoptera; Dictyoptera; Blattodea; Blattoidea; Termitoidae; Termopsidae; Zootermopsis +Zostera - 1 1 11 27257 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Zosteraceae; Zostera +Zostera marina - 1 1 11 29655 PLN Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; Liliopsida; Zosteraceae; Zostera diff --git a/c++/src/objects/seqfeat/ecnum_ambiguous.inc b/c++/src/objects/seqfeat/ecnum_ambiguous.inc index 74b0aa0c..2ddc6451 100644 --- a/c++/src/objects/seqfeat/ecnum_ambiguous.inc +++ b/c++/src/objects/seqfeat/ecnum_ambiguous.inc @@ -1,4 +1,4 @@ -/* $Id: ecnum_ambiguous.inc 573605 2018-10-30 11:56:53Z ivanov $ +/* $Id: ecnum_ambiguous.inc 578297 2019-01-16 16:37:19Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -795,14 +795,18 @@ static const char* const kECNum_ambiguous[] = { "7.1.1.n\tHydron translocation or charge separation linked to oxidoreductase reactions", "7.1.2.-\tHydron translocation linked to the hydrolysis of a nucleoside triphosphate", "7.1.2.n\tHydron translocation linked to the hydrolysis of a nucleoside triphosphate", + "7.1.3.-\tHydron translocation linked to the hydrolysis of diphosphate", + "7.1.3.n\tHydron translocation linked to the hydrolysis of diphosphate", "7.2.-.-\tCatalysing the translocation of inorganic cations", "7.2.n.n\tCatalysing the translocation of inorganic cations", "7.2.1.-\tLinked to oxidoreductase reactions", "7.2.1.n\tLinked to oxidoreductase reactions", "7.2.2.-\tLinked to the hydrolysis of a nucleoside triphosphate", "7.2.2.n\tLinked to the hydrolysis of a nucleoside triphosphate", - "7.2.3.-\tLinked to decarboxylation", - "7.2.3.n\tLinked to decarboxylation", + "7.2.3.-\tLinked to the hydrolysis of diphosphate", + "7.2.3.n\tLinked to the hydrolysis of diphosphate", + "7.2.4.-\tLinked to decarboxylation", + "7.2.4.n\tLinked to decarboxylation", "7.3.-.-\tCatalysing the translocation of inorganic anions and their chelates", "7.3.n.n\tCatalysing the translocation of inorganic anions and their chelates", "7.3.2.-\tLinked to the hydrolysis of a nucleoside triphosphate", diff --git a/c++/src/objects/seqfeat/ecnum_ambiguous.txt b/c++/src/objects/seqfeat/ecnum_ambiguous.txt index e9e75aad..08af4f2c 100644 --- a/c++/src/objects/seqfeat/ecnum_ambiguous.txt +++ b/c++/src/objects/seqfeat/ecnum_ambiguous.txt @@ -762,14 +762,18 @@ 7.1.1.n Hydron translocation or charge separation linked to oxidoreductase reactions 7.1.2.- Hydron translocation linked to the hydrolysis of a nucleoside triphosphate 7.1.2.n Hydron translocation linked to the hydrolysis of a nucleoside triphosphate +7.1.3.- Hydron translocation linked to the hydrolysis of diphosphate +7.1.3.n Hydron translocation linked to the hydrolysis of diphosphate 7.2.-.- Catalysing the translocation of inorganic cations 7.2.n.n Catalysing the translocation of inorganic cations 7.2.1.- Linked to oxidoreductase reactions 7.2.1.n Linked to oxidoreductase reactions 7.2.2.- Linked to the hydrolysis of a nucleoside triphosphate 7.2.2.n Linked to the hydrolysis of a nucleoside triphosphate -7.2.3.- Linked to decarboxylation -7.2.3.n Linked to decarboxylation +7.2.3.- Linked to the hydrolysis of diphosphate +7.2.3.n Linked to the hydrolysis of diphosphate +7.2.4.- Linked to decarboxylation +7.2.4.n Linked to decarboxylation 7.3.-.- Catalysing the translocation of inorganic anions and their chelates 7.3.n.n Catalysing the translocation of inorganic anions and their chelates 7.3.2.- Linked to the hydrolysis of a nucleoside triphosphate diff --git a/c++/src/objects/seqfeat/ecnum_deleted.inc b/c++/src/objects/seqfeat/ecnum_deleted.inc index 90a86d7c..21411acc 100644 --- a/c++/src/objects/seqfeat/ecnum_deleted.inc +++ b/c++/src/objects/seqfeat/ecnum_deleted.inc @@ -1,4 +1,4 @@ -/* $Id: ecnum_deleted.inc 572069 2018-10-09 13:30:48Z ivanov $ +/* $Id: ecnum_deleted.inc 580651 2019-02-19 12:38:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -52,6 +52,7 @@ static const char* const kECNum_deleted[] = { "1.5.3.3", "1.6.2.3", "1.6.5.1", + "1.6.6.9", "1.7.1.8", "1.7.99.2", "1.8.1.1", @@ -169,6 +170,7 @@ static const char* const kECNum_deleted[] = { "3.4.99.40", "3.4.99.42", "3.5.1.27", + "3.6.4.11", "3.13.1.2", "4.1.1.13", "4.1.2.3", diff --git a/c++/src/objects/seqfeat/ecnum_deleted.txt b/c++/src/objects/seqfeat/ecnum_deleted.txt index eaa35b85..1a669f8b 100644 --- a/c++/src/objects/seqfeat/ecnum_deleted.txt +++ b/c++/src/objects/seqfeat/ecnum_deleted.txt @@ -19,6 +19,7 @@ 1.5.3.3 1.6.2.3 1.6.5.1 +1.6.6.9 1.7.1.8 1.7.99.2 1.8.1.1 @@ -136,6 +137,7 @@ 3.4.99.40 3.4.99.42 3.5.1.27 +3.6.4.11 3.13.1.2 4.1.1.13 4.1.2.3 diff --git a/c++/src/objects/seqfeat/ecnum_replaced.inc b/c++/src/objects/seqfeat/ecnum_replaced.inc index 4e404ff0..71582355 100644 --- a/c++/src/objects/seqfeat/ecnum_replaced.inc +++ b/c++/src/objects/seqfeat/ecnum_replaced.inc @@ -1,4 +1,4 @@ -/* $Id: ecnum_replaced.inc 573605 2018-10-30 11:56:53Z ivanov $ +/* $Id: ecnum_replaced.inc 580651 2019-02-19 12:38:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -178,6 +178,7 @@ static const char* const kECNum_replaced[] = { "1.5.99.9\t1.5.98.1", "1.5.99.10\t1.5.8.1", "1.5.99.11\t1.5.98.2", + "1.6.1.5\t7.1.1.1", "1.6.2.1\t1.6.99.3", "1.6.4.1\t1.8.1.6", "1.6.4.2\t1.8.1.7", @@ -189,6 +190,8 @@ static const char* const kECNum_replaced[] = { "1.6.4.8\t1.8.1.12", "1.6.4.9\t1.8.1.13", "1.6.4.10\t1.8.1.14", + "1.6.5.3\t7.1.1.2", + "1.6.5.8\t7.2.1.1", "1.6.6.1\t1.7.1.1", "1.6.6.2\t1.7.1.2", "1.6.6.3\t1.7.1.3", @@ -231,7 +234,12 @@ static const char* const kECNum_replaced[] = { "1.9.99.1\t1.9.98.1", "1.10.3.7\t1.21.3.4", "1.10.3.8\t1.21.3.5", - "1.10.99.1\t1.10.9.1", + "1.10.3.10\t7.1.1.3", + "1.10.3.12\t7.1.1.5", + "1.10.3.13\t7.1.1.4", + "1.10.3.14\t7.1.1.7", + "1.10.9.1\t7.1.1.6", + "1.10.99.1\t7.1.1.6", "1.10.99.2\t1.10.5.1", "1.10.99.3\t1.23.5.1", "1.11.1.4\t1.13.11.11", @@ -358,6 +366,7 @@ static const char* const kECNum_replaced[] = { "1.14.13.121\t1.14.14.151", "1.14.13.123\t1.14.14.95", "1.14.13.124\t1.14.14.40", + "1.14.13.125\t1.14.14.156", "1.14.13.126\t1.14.15.16", "1.14.13.129\t1.14.15.24", "1.14.13.132\t1.14.14.17", @@ -365,6 +374,7 @@ static const char* const kECNum_replaced[] = { "1.14.13.134\t1.14.14.152", "1.14.13.136\t1.14.14.87", "1.14.13.137\t1.14.14.153", + "1.14.13.138\t1.14.14.157", "1.14.13.139\t1.14.14.109", "1.14.13.140\t1.14.14.110", "1.14.13.141\t1.14.15.29", @@ -411,7 +421,7 @@ static const char* const kECNum_replaced[] = { "1.14.13.214\t1.14.14.132", "1.14.13.221\t1.14.15.28", "1.14.13.n1\t1.14.14.40", - "1.14.13.n2\t1.14.13.125", + "1.14.13.n2\t1.14.14.156", "1.14.13.n3\t1.14.13.127", "1.14.13.n4\t1.14.15.16", "1.14.13.n5\t1.14.14.42", @@ -464,11 +474,13 @@ static const char* const kECNum_replaced[] = { "1.14.99.41\t1.13.11.75", "1.14.99.42\t1.13.11.84", "1.14.99.43\t1.14.14.134", + "1.14.99.45\t1.14.14.158", "1.14.99.49\t1.14.15.31", "1.14.99.n1\t1.13.11.75", "1.14.99.n2\t1.13.11.71", "1.14.99.n3\t1.13.11.84", "1.14.99.n5\t1.13.11.70", + "1.16.5.1\t7.2.1.3", "1.16.98.1\t1.16.9.1", "1.17.1.2\t1.17.7.4", "1.17.1.6\t1.1.1.395", @@ -477,6 +489,7 @@ static const char* const kECNum_replaced[] = { "1.17.98.1\t1.1.1.395", "1.17.99.1\t1.17.9.1", "1.17.99.5\t1.1.1.395", + "1.18.1.8\t7.2.1.2", "1.18.2.1\t1.18.6.1", "1.18.3.1\t1.12.7.2", "1.18.96.1\t1.15.1.2", @@ -551,6 +564,8 @@ static const char* const kECNum_replaced[] = { "2.3.1.128\t2.3.1.267", "2.3.1.154\t2.3.1.176", "2.3.1.n1\t2.3.1.191", + "2.3.1.n2\t2.3.1.274", + "2.3.1.n3\t2.3.1.275", "2.3.1.n8\t2.3.1.199", "2.3.1.n9\t2.3.1.211", "2.3.1.n10\t2.3.1.222", @@ -603,6 +618,7 @@ static const char* const kECNum_replaced[] = { "2.5.1.37\t4.4.1.20", "2.5.1.40\t4.2.3.9", "2.5.1.64\t2.2.1.9\t4.2.99.20", + "2.5.1.77\t2.5.1.147", "2.5.1.n1\t2.2.1.9", "2.5.1.n2\t2.5.1.81", "2.5.1.n3\t2.5.1.73", @@ -976,8 +992,8 @@ static const char* const kECNum_replaced[] = { "3.6.1.30\t3.6.1.59\t3.6.1.62", "3.6.1.32\t3.6.4.1", "3.6.1.33\t3.6.4.2", - "3.6.1.34\t3.6.3.14", - "3.6.1.35\t3.6.3.6", + "3.6.1.34\t7.1.2.2", + "3.6.1.35\t7.1.2.1", "3.6.1.36\t3.6.3.10", "3.6.1.37\t3.6.3.9", "3.6.1.38\t3.6.3.8", @@ -989,18 +1005,53 @@ static const char* const kECNum_replaced[] = { "3.6.1.51\t3.6.5.6", "3.6.1.n4\t3.6.1.67", "3.6.1.n5\t3.6.1.54", + "3.6.3.1\t7.6.2.1", + "3.6.3.6\t7.1.2.1", + "3.6.3.7\t7.2.2.3", + "3.6.3.12\t7.2.2.6", "3.6.3.13\t3.6.3.1", + "3.6.3.14\t7.1.2.2", + "3.6.3.15\t7.2.2.1", + "3.6.3.18\t7.5.2.2", + "3.6.3.19\t7.5.2.1", + "3.6.3.21\t7.4.2.1", + "3.6.3.22\t7.4.2.2", + "3.6.3.23\t7.4.2.6", + "3.6.3.25\t7.3.2.3", + "3.6.3.26\t7.3.2.4", + "3.6.3.27\t7.3.2.1", + "3.6.3.28\t7.3.2.2", + "3.6.3.29\t7.3.2.5", + "3.6.3.30\t7.2.2.7", + "3.6.3.35\t7.2.2.5", + "3.6.3.36\t7.6.2.7", + "3.6.3.37\t7.6.2.6", + "3.6.3.39\t7.5.2.5", + "3.6.3.40\t7.5.2.4", + "3.6.3.41\t7.6.2.5", + "3.6.3.42\t7.5.2.3", + "3.6.3.43\t7.4.2.5", + "3.6.3.44\t7.6.2.2", "3.6.3.45\t3.6.3.44", - "3.6.3.n1\t3.6.3.54", + "3.6.3.46\t7.2.2.2", + "3.6.3.47\t7.6.2.4", + "3.6.3.48\t7.4.2.7", + "3.6.3.51\t7.4.2.3", + "3.6.3.52\t7.4.2.4", + "3.6.3.54\t7.2.2.8", + "3.6.3.n1\t7.2.2.8", + "3.6.4.3\t5.6.1.1", "3.7.1.15\t4.2.1.138", "3.7.1.16\t3.3.2.12", "3.7.1.n1\t3.7.1.14", "3.7.1.n2\t3.7.1.22", "3.8.1.4\t1.21.99.4", "3.8.2.1\t3.1.8.2", + "4.1.1.3\t4.1.1.112", "4.1.1.10\t4.1.1.12", "4.1.1.26\t4.1.1.28", "4.1.1.27\t4.1.1.28", + "4.1.1.41\t7.2.4.3", "4.1.1.n1\t4.1.1.97", "4.1.1.n2\t4.1.1.94", "4.1.2.1\t4.1.3.16", @@ -1104,6 +1155,7 @@ static const char* const kECNum_replaced[] = { "4.3.1.26\t1.21.98.2", "4.3.3.n1\t4.1.99.20", "4.3.99.1\t4.2.1.104", + "4.3.99.2\t7.2.4.1", "4.4.1.7\t2.5.1.18", "4.4.1.18\t1.8.3.5", "4.4.1.27\t3.13.1.5", diff --git a/c++/src/objects/seqfeat/ecnum_replaced.txt b/c++/src/objects/seqfeat/ecnum_replaced.txt index 4b807bf1..10d58996 100644 --- a/c++/src/objects/seqfeat/ecnum_replaced.txt +++ b/c++/src/objects/seqfeat/ecnum_replaced.txt @@ -145,6 +145,7 @@ 1.5.99.9 1.5.98.1 1.5.99.10 1.5.8.1 1.5.99.11 1.5.98.2 +1.6.1.5 7.1.1.1 1.6.2.1 1.6.99.3 1.6.4.1 1.8.1.6 1.6.4.2 1.8.1.7 @@ -156,6 +157,8 @@ 1.6.4.8 1.8.1.12 1.6.4.9 1.8.1.13 1.6.4.10 1.8.1.14 +1.6.5.3 7.1.1.2 +1.6.5.8 7.2.1.1 1.6.6.1 1.7.1.1 1.6.6.2 1.7.1.2 1.6.6.3 1.7.1.3 @@ -198,7 +201,12 @@ 1.9.99.1 1.9.98.1 1.10.3.7 1.21.3.4 1.10.3.8 1.21.3.5 -1.10.99.1 1.10.9.1 +1.10.3.10 7.1.1.3 +1.10.3.12 7.1.1.5 +1.10.3.13 7.1.1.4 +1.10.3.14 7.1.1.7 +1.10.9.1 7.1.1.6 +1.10.99.1 7.1.1.6 1.10.99.2 1.10.5.1 1.10.99.3 1.23.5.1 1.11.1.4 1.13.11.11 @@ -325,6 +333,7 @@ 1.14.13.121 1.14.14.151 1.14.13.123 1.14.14.95 1.14.13.124 1.14.14.40 +1.14.13.125 1.14.14.156 1.14.13.126 1.14.15.16 1.14.13.129 1.14.15.24 1.14.13.132 1.14.14.17 @@ -332,6 +341,7 @@ 1.14.13.134 1.14.14.152 1.14.13.136 1.14.14.87 1.14.13.137 1.14.14.153 +1.14.13.138 1.14.14.157 1.14.13.139 1.14.14.109 1.14.13.140 1.14.14.110 1.14.13.141 1.14.15.29 @@ -378,7 +388,7 @@ 1.14.13.214 1.14.14.132 1.14.13.221 1.14.15.28 1.14.13.n1 1.14.14.40 -1.14.13.n2 1.14.13.125 +1.14.13.n2 1.14.14.156 1.14.13.n3 1.14.13.127 1.14.13.n4 1.14.15.16 1.14.13.n5 1.14.14.42 @@ -431,11 +441,13 @@ 1.14.99.41 1.13.11.75 1.14.99.42 1.13.11.84 1.14.99.43 1.14.14.134 +1.14.99.45 1.14.14.158 1.14.99.49 1.14.15.31 1.14.99.n1 1.13.11.75 1.14.99.n2 1.13.11.71 1.14.99.n3 1.13.11.84 1.14.99.n5 1.13.11.70 +1.16.5.1 7.2.1.3 1.16.98.1 1.16.9.1 1.17.1.2 1.17.7.4 1.17.1.6 1.1.1.395 @@ -444,6 +456,7 @@ 1.17.98.1 1.1.1.395 1.17.99.1 1.17.9.1 1.17.99.5 1.1.1.395 +1.18.1.8 7.2.1.2 1.18.2.1 1.18.6.1 1.18.3.1 1.12.7.2 1.18.96.1 1.15.1.2 @@ -518,6 +531,8 @@ 2.3.1.128 2.3.1.267 2.3.1.154 2.3.1.176 2.3.1.n1 2.3.1.191 +2.3.1.n2 2.3.1.274 +2.3.1.n3 2.3.1.275 2.3.1.n8 2.3.1.199 2.3.1.n9 2.3.1.211 2.3.1.n10 2.3.1.222 @@ -570,6 +585,7 @@ 2.5.1.37 4.4.1.20 2.5.1.40 4.2.3.9 2.5.1.64 2.2.1.9 4.2.99.20 +2.5.1.77 2.5.1.147 2.5.1.n1 2.2.1.9 2.5.1.n2 2.5.1.81 2.5.1.n3 2.5.1.73 @@ -943,8 +959,8 @@ 3.6.1.30 3.6.1.59 3.6.1.62 3.6.1.32 3.6.4.1 3.6.1.33 3.6.4.2 -3.6.1.34 3.6.3.14 -3.6.1.35 3.6.3.6 +3.6.1.34 7.1.2.2 +3.6.1.35 7.1.2.1 3.6.1.36 3.6.3.10 3.6.1.37 3.6.3.9 3.6.1.38 3.6.3.8 @@ -956,18 +972,53 @@ 3.6.1.51 3.6.5.6 3.6.1.n4 3.6.1.67 3.6.1.n5 3.6.1.54 +3.6.3.1 7.6.2.1 +3.6.3.6 7.1.2.1 +3.6.3.7 7.2.2.3 +3.6.3.12 7.2.2.6 3.6.3.13 3.6.3.1 +3.6.3.14 7.1.2.2 +3.6.3.15 7.2.2.1 +3.6.3.18 7.5.2.2 +3.6.3.19 7.5.2.1 +3.6.3.21 7.4.2.1 +3.6.3.22 7.4.2.2 +3.6.3.23 7.4.2.6 +3.6.3.25 7.3.2.3 +3.6.3.26 7.3.2.4 +3.6.3.27 7.3.2.1 +3.6.3.28 7.3.2.2 +3.6.3.29 7.3.2.5 +3.6.3.30 7.2.2.7 +3.6.3.35 7.2.2.5 +3.6.3.36 7.6.2.7 +3.6.3.37 7.6.2.6 +3.6.3.39 7.5.2.5 +3.6.3.40 7.5.2.4 +3.6.3.41 7.6.2.5 +3.6.3.42 7.5.2.3 +3.6.3.43 7.4.2.5 +3.6.3.44 7.6.2.2 3.6.3.45 3.6.3.44 -3.6.3.n1 3.6.3.54 +3.6.3.46 7.2.2.2 +3.6.3.47 7.6.2.4 +3.6.3.48 7.4.2.7 +3.6.3.51 7.4.2.3 +3.6.3.52 7.4.2.4 +3.6.3.54 7.2.2.8 +3.6.3.n1 7.2.2.8 +3.6.4.3 5.6.1.1 3.7.1.15 4.2.1.138 3.7.1.16 3.3.2.12 3.7.1.n1 3.7.1.14 3.7.1.n2 3.7.1.22 3.8.1.4 1.21.99.4 3.8.2.1 3.1.8.2 +4.1.1.3 4.1.1.112 4.1.1.10 4.1.1.12 4.1.1.26 4.1.1.28 4.1.1.27 4.1.1.28 +4.1.1.41 7.2.4.3 4.1.1.n1 4.1.1.97 4.1.1.n2 4.1.1.94 4.1.2.1 4.1.3.16 @@ -1071,6 +1122,7 @@ 4.3.1.26 1.21.98.2 4.3.3.n1 4.1.99.20 4.3.99.1 4.2.1.104 +4.3.99.2 7.2.4.1 4.4.1.7 2.5.1.18 4.4.1.18 1.8.3.5 4.4.1.27 3.13.1.5 diff --git a/c++/src/objects/seqfeat/ecnum_specific.inc b/c++/src/objects/seqfeat/ecnum_specific.inc index d1c7038e..19f615ea 100644 --- a/c++/src/objects/seqfeat/ecnum_specific.inc +++ b/c++/src/objects/seqfeat/ecnum_specific.inc @@ -1,4 +1,4 @@ -/* $Id: ecnum_specific.inc 573605 2018-10-30 11:56:53Z ivanov $ +/* $Id: ecnum_specific.inc 580651 2019-02-19 12:38:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -133,7 +133,7 @@ static const char* const kECNum_specific[] = { "1.1.1.106\tPantoate 4-dehydrogenase", "1.1.1.107\tPyridoxal 4-dehydrogenase", "1.1.1.108\tCarnitine 3-dehydrogenase", - "1.1.1.110\tIndolelactate dehydrogenase", + "1.1.1.110\tAromatic 2-oxoacid reductase", "1.1.1.111\t3-(imidazol-5-yl)lactate dehydrogenase", "1.1.1.112\tIndanol dehydrogenase", "1.1.1.113\tL-xylose 1-dehydrogenase", @@ -423,6 +423,8 @@ static const char* const kECNum_specific[] = { "1.1.1.411\tL-threonate 2-dehydrogenase", "1.1.1.412\t2-alkyl-3-oxoalkanoate reductase", "1.1.1.413\tA-factor type gamma-butyrolactone 1'-reductase (1S-forming)", + "1.1.1.414\tL-galactonate 5-dehydrogenase", + "1.1.1.415\tNoscapine synthase", "1.1.1.n4\t(-)-trans-carveol dehydrogenase", "1.1.1.n5\t3-methylmalate dehydrogenase", "1.1.1.n11\tSuccinic semialdehyde reductase", @@ -605,6 +607,7 @@ static const char* const kECNum_specific[] = { "1.2.1.98\t2-hydroxy-2-methylpropanal dehydrogenase", "1.2.1.99\t4-(gamma-glutamylamino)butanal dehydrogenase", "1.2.1.100\t5-formyl-3-hydroxy-2-methylpyridine 4-carboxylate 5-dehydrogenase", + "1.2.1.101\tL-tyrosine reductase", "1.2.1.n2\tFatty acyl-CoA reductase", "1.2.2.1\tFormate dehydrogenase (cytochrome)", "1.2.2.4\tCarbon-monoxide dehydrogenase (cytochrome b-561)", @@ -745,6 +748,8 @@ static const char* const kECNum_specific[] = { "1.3.1.115\t3-oxocholoyl-CoA 4-desaturase", "1.3.1.116\t7-beta-hydroxy-3-oxochol-24-oyl-CoA 4-desaturase", "1.3.1.117\tHydroxycinnamoyl-CoA reductase", + "1.3.1.118\tMeromycolic acid enoyl-[acyl-carrier-protein] reductase", + "1.3.1.119\tChlorobenzene dihydrodiol dehydrogenase", "1.3.1.n3\tCurcumin reductase", "1.3.2.3\tL-galactonolactone dehydrogenase", "1.3.3.3\tCoproporphyrinogen oxidase", @@ -923,6 +928,7 @@ static const char* const kECNum_specific[] = { "1.5.1.49\t1-pyrroline-2-carboxylate reductase (NAD(P)H)", "1.5.1.50\tDihydromonapterin reductase", "1.5.1.51\tN-((2S)-2-amino-2-carboxyethyl)-L-glutamate dehydrogenase", + "1.5.1.52\tStaphylopine dehydrogenase", "1.5.3.1\tSarcosine oxidase", "1.5.3.2\tN-methyl-L-amino-acid oxidase", "1.5.3.4\tN(6)-methyl-lysine oxidase", @@ -967,7 +973,6 @@ static const char* const kECNum_specific[] = { "1.6.1.2\tNAD(P)(+) transhydrogenase (Re/Si-specific)", "1.6.1.3\tNAD(P)(+) transhydrogenase", "1.6.1.4\tNAD(P)(+) transhydrogenase (ferredoxin)", - "1.6.1.5\tProton-translocating NAD(P)(+) transhydrogenase", "1.6.2.2\tCytochrome-b5 reductase", "1.6.2.4\tNADPH--hemoprotein reductase", "1.6.2.5\tNADPH--cytochrome-c2 reductase", @@ -978,17 +983,14 @@ static const char* const kECNum_specific[] = { "1.6.3.4\tNADH oxidase (H(2)O-forming)", "1.6.3.5\tRenalase", "1.6.5.2\tNAD(P)H dehydrogenase (quinone)", - "1.6.5.3\tNADH:ubiquinone reductase (H(+)-translocating)", "1.6.5.4\tMonodehydroascorbate reductase (NADH)", "1.6.5.5\tNADPH:quinone reductase", "1.6.5.6\tp-benzoquinone reductase (NADPH)", "1.6.5.7\t2-hydroxy-1,4-benzoquinone reductase", - "1.6.5.8\tNADH:ubiquinone reductase (Na(+)-transporting)", "1.6.5.9\tNADH:ubiquinone reductase (non-electrogenic)", "1.6.5.10\tNADPH dehydrogenase (quinone)", "1.6.5.11\tNADH dehydrogenase (quinone)", "1.6.5.12\tDemethylphylloquinone reductase", - "1.6.6.9\tTrimethylamine-N-oxide reductase", "1.6.99.1\tNADPH dehydrogenase", "1.6.99.3\tNADH dehydrogenase", "1.6.99.n1\tNADPH dehydrogenase (coenzyme F420 dependent)", @@ -1074,7 +1076,7 @@ static const char* const kECNum_specific[] = { "1.8.4.14\tL-methionine (R)-S-oxide reductase", "1.8.5.1\tGlutathione dehydrogenase (ascorbate)", "1.8.5.2\tThiosulfate dehydrogenase (quinone)", - "1.8.5.3\tDimethylsulfoxide reductase", + "1.8.5.3\tRespiratory dimethylsulfoxide reductase", "1.8.5.4\tBacterial sulfide:quinone reductase", "1.8.5.5\tThiosulfate reductase (quinone)", "1.8.5.6\tSulfite dehydrogenase (quinone)", @@ -1104,15 +1106,10 @@ static const char* const kECNum_specific[] = { "1.10.3.5\t3-hydroxyanthranilate oxidase", "1.10.3.6\tRifamycin-B oxidase", "1.10.3.9\tPhotosystem II", - "1.10.3.10\tUbiquinol oxidase (H(+)-transporting)", "1.10.3.11\tUbiquinol oxidase (non-electrogenic)", - "1.10.3.12\tMenaquinol oxidase (H(+)-transporting)", - "1.10.3.13\tCaldariellaquinol oxidase (H(+)-transporting)", - "1.10.3.14\tUbiquinol oxidase (electrogenic, non H(+)-transporting)", "1.10.3.15\tGrixazone synthase", "1.10.3.16\tDihydrophenazinedicarboxylate synthase", "1.10.5.1\tRibosyldihydronicotinamide dehydrogenase (quinone)", - "1.10.9.1\tPlastoquinol--plastocyanin reductase", "1.11.1.1\tNADH peroxidase", "1.11.1.2\tNADPH peroxidase", "1.11.1.3\tFatty-acid peroxidase", @@ -1231,6 +1228,8 @@ static const char* const kECNum_specific[] = { "1.13.11.83\t4-hydroxy-3-prenylphenylpyruvate oxygenase", "1.13.11.84\tCrocetin dialdehyde synthase", "1.13.11.85\tExo-cleaving rubber dioxygenase", + "1.13.11.86\t5-aminosalicylate 1,2-dioxygenase", + "1.13.11.87\tEndo-cleaving rubber dioxygenase", "1.13.12.1\tArginine 2-monooxygenase", "1.13.12.2\tLysine 2-monooxygenase", "1.13.12.3\tTryptophan 2-monooxygenase", @@ -1305,6 +1304,7 @@ static const char* const kECNum_specific[] = { "1.14.11.57\tL-proline trans-4-hydroxylase", "1.14.11.58\tOrnithine lipid ester-linked acyl 2-hydroxylase", "1.14.11.59\t2,4-dihydroxy-1,4-benzoxazin-3-one-glucoside dioxygenase", + "1.14.11.60\tScopoletin 8-hydroxylase", "1.14.11.n2\tMethylcytosine dioxygenase", "1.14.11.n4\tAnkyrin-repeat-histidine dioxagenase", "1.14.12.1\tAnthranilate 1,2-dioxygenase (deaminating, decarboxylating)", @@ -1326,6 +1326,7 @@ static const char* const kECNum_specific[] = { "1.14.12.23\tNitroarene dioxygenase", "1.14.12.24\t2,4-dinitrotoluene dioxygenase", "1.14.12.25\tp-cumate 2,3-dioxygenase", + "1.14.12.26\tChlorobenzene dioxygenase", "1.14.13.1\tSalicylate 1-monooxygenase", "1.14.13.2\t4-hydroxybenzoate 3-monooxygenase", "1.14.13.4\tMelilotate 3-monooxygenase", @@ -1382,13 +1383,11 @@ static const char* const kECNum_specific[] = { "1.14.13.114\t6-hydroxynicotinate 3-monooxygenase", "1.14.13.116\tGeranylhydroquinone 3''-hydroxylase", "1.14.13.122\tChlorophyllide a oxygenase", - "1.14.13.125\tTryptophan N-monooxygenase", "1.14.13.127\t3-(3-hydroxy-phenyl)propanoic acid hydroxylase", "1.14.13.128\t7-methylxanthine demethylase", "1.14.13.130\tPyrrole-2-carboxylate monooxygenase", - "1.14.13.131\tDimethyl-sulfide monooxygenase", + "1.14.13.131\tDissimilatory dimethyl-sulfide monooxygenase", "1.14.13.135\t1-hydroxy-2-naphthoate hydroxylase", - "1.14.13.138\tIndolin-2-one monooxygenase", "1.14.13.146\tTaxoid 14-beta-hydroxylase", "1.14.13.147\tTaxoid 7-beta-hydroxylase", "1.14.13.148\tTrimethylamine monooxygenase", @@ -1600,6 +1599,17 @@ static const char* const kECNum_specific[] = { "1.14.14.152\tBeta-amyrin 11-oxidase", "1.14.14.153\tIndole-2-monooxygenase", "1.14.14.154\tSterol 14-alpha-demethylase", + "1.14.14.155\t3,6-diketocamphane 1,2-monooxygenase", + "1.14.14.156\tTryptophan N-monooxygenase", + "1.14.14.157\tIndolin-2-one monooxygenase", + "1.14.14.158\tCarotene epsilon-hydroxylase", + "1.14.14.159\tDolabradiene monooxygenase", + "1.14.14.160\tZealexin A1 synthase", + "1.14.14.161\tNepetalactol monooxygenase", + "1.14.14.162\tFlavanone 2-hydroxylase", + "1.14.14.163\t(S)-1-hydroxy-N-methylcanadine 13-hydroxylase", + "1.14.14.164\tFraxetin 5-hydroxylase", + "1.14.14.165\tIndole-3-carbonyl nitrile 4-hydroxylase", "1.14.15.1\tCamphor 5-monooxygenase", "1.14.15.3\tAlkane 1-monooxygenase", "1.14.15.4\tSteroid 11-beta-monooxygenase", @@ -1724,6 +1734,8 @@ static const char* const kECNum_specific[] = { "1.14.19.71\tFumitremorgin C synthase", "1.14.19.72\t(-)-pluviatolide synthase", "1.14.19.73\t(S)-nandinine synthase", + "1.14.19.74\t(+)-piperitol/(+)-sesamin synthase", + "1.14.19.75\tVery-long-chain acyl-lipid omega-9 desaturase", "1.14.19.n4\tStearoyl-CoA 9-desaturase", "1.14.19.n5\tVersicolorin B desaturase", "1.14.20.1\tDeacetoxycephalosporin-C synthase", @@ -1738,6 +1750,8 @@ static const char* const kECNum_specific[] = { "1.14.20.11\t3-((Z)-2-isocyanoethenyl)-1H-indole synthase", "1.14.20.12\t3-((E)-2-isocyanoethenyl)-1H-indole synthase", "1.14.20.13\t6-beta-hydroxyhyoscyamine epoxidase", + "1.14.20.14\tHapalindole-type alkaloid chlorinase", + "1.14.20.15\tL-threonyl-[L-threonyl-carrier protein] 4-chlorinase", "1.14.99.1\tProstaglandin-endoperoxide synthase", "1.14.99.2\tKynurenine 7,8-hydroxylase", "1.14.99.4\tProgesterone monooxygenase", @@ -1759,7 +1773,6 @@ static const char* const kECNum_specific[] = { "1.14.99.38\tCholesterol 25-hydroxylase", "1.14.99.39\tAmmonia monooxygenase", "1.14.99.44\tDiapolycopene oxygenase", - "1.14.99.45\tCarotene epsilon-monooxygenase", "1.14.99.46\tPyrimidine monooxygenase", "1.14.99.47\t(+)-larreatricin hydroxylase", "1.14.99.48\tHeme oxygenase (staphylobilin-producing)", @@ -1774,6 +1787,10 @@ static const char* const kECNum_specific[] = { "1.14.99.58\tHeme oxygenase (biliverdin-IX-beta and delta-forming)", "1.14.99.59\tTryptamine 4-monooxygenase", "1.14.99.60\t3-demethoxyubiquinol 3-hydroxylase", + "1.14.99.61\tCyclooctat-9-en-7-ol 5-monooxygenase", + "1.14.99.62\tCyclooctatin synthase", + "1.14.99.63\tBeta-carotene 4-ketolase", + "1.14.99.64\tZeaxanthin 4-ketolase", "1.14.99.n4\tCarotenoid 9,10-dioxygenase", "1.15.1.1\tSuperoxide dismutase", "1.15.1.2\tSuperoxide reductase", @@ -1790,7 +1807,6 @@ static const char* const kECNum_specific[] = { "1.16.3.1\tFerroxidase", "1.16.3.2\tBacterial non-heme ferritin", "1.16.3.3\tManganese oxidase", - "1.16.5.1\tAscorbate ferrireductase (transmembrane)", "1.16.8.1\tCob(II)yrinic acid a,c-diamide reductase", "1.16.9.1\tIron:rusticyanin reductase", "1.17.1.1\tCDP-4-dehydro-6-deoxyglucose reductase", @@ -1835,7 +1851,6 @@ static const char* const kECNum_specific[] = { "1.18.1.5\tPutidaredoxin--NAD(+) reductase", "1.18.1.6\tAdrenodoxin-NADP(+) reductase", "1.18.1.7\tFerredoxin--NAD(P)(+) reductase (naphthalene dioxygenase ferredoxin-specific)", - "1.18.1.8\tFerredoxin-NAD(+) oxidoreductase (Na(+)-transporting)", "1.18.6.1\tNitrogenase", "1.18.6.2\tVanadium-dependent nitrogenase", "1.19.1.1\tFlavodoxin--NADP(+) reductase", @@ -1858,13 +1873,14 @@ static const char* const kECNum_specific[] = { "1.21.3.6\tAureusidin synthase", "1.21.3.7\tTetrahydrocannabinolic acid synthase", "1.21.3.8\tCannabidiolic acid synthase", - "1.21.4.1\tD-proline reductase (dithiol)", + "1.21.4.1\tD-proline reductase", "1.21.4.2\tGlycine reductase", "1.21.4.3\tSarcosine reductase", "1.21.4.4\tBetaine reductase", "1.21.98.1\tCyclic dehypoxanthinyl futalosine synthase", "1.21.98.2\tDichlorochromopyrrolate synthase", "1.21.98.3\tAnaerobic magnesium-protoporphyrin IX monomethyl ester cyclase", + "1.21.98.4\tPqqA peptide cyclase", "1.21.99.1\tBeta-cyclopiazonate dehydrogenase", "1.21.99.3\tThyroxine 5-deiodinase", "1.21.99.4\tThyroxine 5'-deiodinase", @@ -1957,7 +1973,7 @@ static const char* const kECNum_specific[] = { "2.1.1.90\tMethanol--corrinoid protein Co-methyltransferase", "2.1.1.91\tIsobutyraldoxime O-methyltransferase", "2.1.1.94\tTabersonine 16-O-methyltransferase", - "2.1.1.95\tTocopherol O-methyltransferase", + "2.1.1.95\tTocopherol C-methyltransferase", "2.1.1.96\tThioether S-methyltransferase", "2.1.1.97\t3-hydroxyanthranilate 4-C-methyltransferase", "2.1.1.98\tDiphthine synthase", @@ -2203,6 +2219,8 @@ static const char* const kECNum_specific[] = { "2.1.1.346\tU6 snRNA m(6)A methyltransferase", "2.1.1.347\t(+)-O-methylkolavelool synthase", "2.1.1.348\tmRNA m(6)A methyltransferase", + "2.1.1.349\tToxoflavin synthase", + "2.1.1.350\tMenaquinone C(8)-methyltransferase", "2.1.1.n1\tResorcinol O-methyltransferase", "2.1.1.n4\tThiocyanate methyltransferase", "2.1.1.n7\t5-pentadecatrienyl resorcinol O-methyltransferase", @@ -2329,7 +2347,7 @@ static const char* const kECNum_specific[] = { "2.3.1.83\tPhosphatidylcholine--dolichol O-acyltransferase", "2.3.1.84\tAlcohol O-acetyltransferase", "2.3.1.85\tFatty-acid synthase", - "2.3.1.86\tFatty-acyl-CoA synthase", + "2.3.1.86\tFatty-acyl-CoA synthase system", "2.3.1.87\tAralkylamine N-acetyltransferase", "2.3.1.89\tTetrahydrodipicolinate N-acetyltransferase", "2.3.1.90\tBeta-glucogallin O-galloyltransferase", @@ -2508,8 +2526,10 @@ static const char* const kECNum_specific[] = { "2.3.1.270\tLyso-ornithine lipid O-acyltransferase", "2.3.1.271\tL-glutamate-5-semialdehyde N-acetyltransferase", "2.3.1.272\t2-acetylphloroglucinol acetyltransferase", - "2.3.1.n2\tPhosphate acyltransferase", - "2.3.1.n3\tGlycerol-3-phosphate acyltransferase (acyl-phosphate transferring)", + "2.3.1.273\tDiglucosylglycerate octanoyltransferase", + "2.3.1.274\tPhosphate acyltransferase", + "2.3.1.275\tAcyl phosphate:glycerol-3-phosphate acyltransferase", + "2.3.1.276\tGalactosamine-1-phosphate N-acetyltransferase", "2.3.1.n4\t1-acyl-sn-glycerol-3-phosphate acyltransferase", "2.3.1.n5\tGlycerol-3-phosphate acyltransferase (acyl-[acyl-carrier-protein]-transferring)", "2.3.1.n6\t1-acylglycerophosphoserine O-acyltransferase", @@ -2889,6 +2909,8 @@ static const char* const kECNum_specific[] = { "2.4.1.355\tPoly(ribitol-phosphate) beta-N-acetylglucosaminyltransferase", "2.4.1.356\tGlucosyl-dolichyl phosphate glucuronosyltransferase", "2.4.1.357\tPhlorizin synthase", + "2.4.1.358\tAcylphloroglucinol glucosyltransferase", + "2.4.1.359\tGlucosylglycerol phosphorylase (configuration-retaining)", "2.4.1.n2\tLoliose synthase", "2.4.2.1\tPurine-nucleoside phosphorylase", "2.4.2.2\tPyrimidine-nucleoside phosphorylase", @@ -2947,6 +2969,7 @@ static const char* const kECNum_specific[] = { "2.4.2.58\tHydroxyproline O-arabinosyltransferase", "2.4.2.59\tSulfide-dependent adenosine diphosphate thiazole synthase", "2.4.2.60\tCysteine-dependent adenosine diphosphate thiazole synthase", + "2.4.2.61\tAlpha-dystroglycan beta-1,4-xylosyltransferase", "2.4.2.n2\tGlucoside xylosyltransferase", "2.4.2.n3\tXyloside xylosyltransferase", "2.4.99.1\tBeta-galactoside alpha-(2,6)-sialyltransferase", @@ -2980,7 +3003,7 @@ static const char* const kECNum_specific[] = { "2.5.1.10\t(2E,6E)-farnesyl diphosphate synthase", "2.5.1.15\tDihydropteroate synthase", "2.5.1.16\tSpermidine synthase", - "2.5.1.17\tCob(I)yrinic acid a,c-diamide adenosyltransferase", + "2.5.1.17\tCorrinoid adenosyltransferase", "2.5.1.18\tGlutathione transferase", "2.5.1.19\t3-phosphoshikimate 1-carboxyvinyltransferase", "2.5.1.20\tRubber cis-polyprenylcistransferase", @@ -3036,7 +3059,6 @@ static const char* const kECNum_specific[] = { "2.5.1.74\t1,4-dihydroxy-2-naphthoate polyprenyltransferase", "2.5.1.75\ttRNA dimethylallyltransferase", "2.5.1.76\tCysteate synthase", - "2.5.1.77\t7,8-didemethyl-8-hydroxy-5-deazariboflavin synthase", "2.5.1.78\t6,7-dimethyl-8-ribityllumazine synthase", "2.5.1.79\tThermospermine synthase", "2.5.1.80\t7-dimethylallyltryptophan synthase", @@ -3105,6 +3127,7 @@ static const char* const kECNum_specific[] = { "2.5.1.144\tS-sulfo-L-cysteine synthase (O-acetyl-L-serine-dependent)", "2.5.1.145\tPhosphatidylglycerol--prolipoprotein diacylglyceryl transferase", "2.5.1.146\t3-geranyl-3-((Z)-2-isocyanoethenyl)indole synthase", + "2.5.1.147\t5-amino-6-(D-ribitylamino)uracil--L-tyrosine 4-hydroxyphenyl transferase", "2.5.1.n9\tHeptaprenylglyceryl phosphate synthase", "2.6.1.1\tAspartate transaminase", "2.6.1.2\tAlanine transaminase", @@ -3404,6 +3427,8 @@ static const char* const kECNum_specific[] = { "2.7.1.220\tD-erythronate 4-kinase", "2.7.1.221\tN-acetylmuramate 1-kinase", "2.7.1.222\t4-hydroxytryptamine kinase", + "2.7.1.223\tAminoimidazole riboside kinase", + "2.7.1.224\tCytidine diphosphoramidate kinase", "2.7.2.1\tAcetate kinase", "2.7.2.2\tCarbamate kinase", "2.7.2.3\tPhosphoglycerate kinase", @@ -3550,7 +3575,10 @@ static const char* const kECNum_specific[] = { "2.7.7.97\t3-hydroxy-4-methylanthranilate adenylyltransferase", "2.7.7.99\tN-acetyl-alpha-D-muramate 1-phosphate uridylyltransferase", "2.7.7.100\tSAMP-activating enzyme", - "2.7.7.n1\tAdenosine monophosphate-protein transferase", + "2.7.7.101\tDNA primase DnaG", + "2.7.7.102\tDNA primase AEP", + "2.7.7.103\tL-glutamine-phosphate cytidylyltransferase", + "2.7.7.n1\tProtein adenylyltransferase", "2.7.7.n6\tGuanine phosphate-protein transferase", "2.7.8.1\tEthanolaminephosphotransferase", "2.7.8.2\tDiacylglycerol cholinephosphotransferase", @@ -3728,6 +3756,7 @@ static const char* const kECNum_specific[] = { "2.8.4.4\t[Ribosomal protein S12] (aspartate(89)-C(3))-methylthiotransferase", "2.8.4.5\ttRNA (N(6)-L-threonylcarbamoyladenosine(37)-C(2))-methylthiotransferase", "2.8.5.1\tS-sulfo-L-cysteine synthase (3-phospho-L-serine-dependent)", + "2.8.5.2\tL-cysteine S-thiosulfotransferase", "2.9.1.1\tL-seryl-tRNA(Sec) selenium transferase", "2.9.1.2\tO-phospho-L-seryl-tRNA(Sec):L-selenocysteinyl-tRNA synthase", "2.10.1.1\tMolybdopterin molybdotransferase", @@ -3827,6 +3856,7 @@ static const char* const kECNum_specific[] = { "3.1.1.101\tPoly(ethylene terephthalate) hydrolase", "3.1.1.102\tMono(ethylene terephthalate) hydrolase", "3.1.1.103\tTeichoic acid D-alanine hydrolase", + "3.1.1.104\t5-phospho-D-xylono-1,4-lactonase", "3.1.1.n2\tProtein-S-isoprenylcysteine alpha-carbonyl methylesterase", "3.1.2.1\tAcetyl-CoA hydrolase", "3.1.2.2\tPalmitoyl-CoA hydrolase", @@ -4267,6 +4297,7 @@ static const char* const kECNum_specific[] = { "3.2.1.205\tIsomaltose glucohydrolase", "3.2.1.206\tOleuropein beta-glucosidase", "3.2.1.207\tMannosyl-oligosaccharide alpha-1,3-glucosidase", + "3.2.1.208\tGlucosylglycerate hydrolase", "3.2.1.n1\tBlood group B branched chain alpha-1,3-galactosidase", "3.2.1.n2\tBlood group B linear chain alpha-1,3-galactosidase", "3.2.1.n3\tDictyostelium lysozyme A", @@ -4395,7 +4426,7 @@ static const char* const kECNum_specific[] = { "3.4.19.5\tBeta-aspartyl-peptidase", "3.4.19.6\tPyroglutamyl-peptidase II", "3.4.19.7\tN-formylmethionyl-peptidase", - "3.4.19.9\tGamma-glutamyl hydrolase", + "3.4.19.9\tFolate gamma-glutamyl hydrolase", "3.4.19.11\tGamma-D-glutamyl-meso-diaminopimelate peptidase", "3.4.19.12\tUbiquitinyl hydrolase 1", "3.4.19.13\tGlutathione hydrolase", @@ -4806,6 +4837,7 @@ static const char* const kECNum_specific[] = { "3.5.1.126\tOxamate amidohydrolase", "3.5.1.127\tJasmonoyl-L-amino acid hydrolase", "3.5.1.128\tDeaminated glutathione amidase", + "3.5.1.129\tN(5)-(cytidine 5'-diphosphoramidyl)-L-glutamine hydrolase", "3.5.1.n3\t4-deoxy-4-formamido-L-arabinose-phosphoundecaprenol deformylase", "3.5.2.1\tBarbiturase", "3.5.2.2\tDihydropyrimidinase", @@ -4969,62 +5001,29 @@ static const char* const kECNum_specific[] = { "3.6.1.n3\tL-cysteinyl-tRNA(Cys) hydrolase", "3.6.2.1\tAdenylylsulfatase", "3.6.2.2\tPhosphoadenylylsulfatase", - "3.6.3.1\tPhospholipid-translocating ATPase", "3.6.3.2\tMagnesium-importing ATPase", "3.6.3.3\tCadmium-exporting ATPase", "3.6.3.4\tCu(2+)-exporting ATPase", "3.6.3.5\tZinc-exporting ATPase", - "3.6.3.6\tProton-exporting ATPase", - "3.6.3.7\tSodium-exporting ATPase", "3.6.3.8\tCalcium-transporting ATPase", "3.6.3.9\tSodium/potassium-exchanging ATPase", "3.6.3.10\tHydrogen/potassium-exchanging ATPase", "3.6.3.11\tChloride-transporting ATPase", - "3.6.3.12\tPotassium-transporting ATPase", - "3.6.3.14\tH(+)-transporting two-sector ATPase", - "3.6.3.15\tSodium-transporting two-sector ATPase", "3.6.3.16\tArsenite-transporting ATPase", "3.6.3.17\tMonosaccharide-transporting ATPase", - "3.6.3.18\tOligosaccharide-transporting ATPase", - "3.6.3.19\tMaltose-transporting ATPase", "3.6.3.20\tGlycerol-3-phosphate-transporting ATPase", - "3.6.3.21\tPolar-amino-acid-transporting ATPase", - "3.6.3.22\tNonpolar-amino-acid-transporting ATPase", - "3.6.3.23\tOligopeptide-transporting ATPase", "3.6.3.24\tNickel-transporting ATPase", - "3.6.3.25\tSulfate-transporting ATPase", - "3.6.3.26\tNitrate-transporting ATPase", - "3.6.3.27\tPhosphate-transporting ATPase", - "3.6.3.28\tPhosphonate-transporting ATPase", - "3.6.3.29\tMolybdate-transporting ATPase", - "3.6.3.30\tFe(3+)-transporting ATPase", "3.6.3.31\tPolyamine-transporting ATPase", "3.6.3.32\tQuaternary-amine-transporting ATPase", "3.6.3.33\tVitamin B12-transporting ATPase", "3.6.3.34\tIron-chelate-transporting ATPase", - "3.6.3.35\tManganese-transporting ATPase", - "3.6.3.36\tTaurine-transporting ATPase", - "3.6.3.37\tGuanine-transporting ATPase", "3.6.3.38\tCapsular-polysaccharide-transporting ATPase", - "3.6.3.39\tLipopolysaccharide-transporting ATPase", - "3.6.3.40\tTeichoic-acid-transporting ATPase", - "3.6.3.41\tHeme-transporting ATPase", - "3.6.3.42\tBeta-glucan-transporting ATPase", - "3.6.3.43\tPeptide-transporting ATPase", - "3.6.3.44\tXenobiotic-transporting ATPase", - "3.6.3.46\tCadmium-transporting ATPase", - "3.6.3.47\tFatty-acyl-CoA-transporting ATPase", - "3.6.3.48\tAlpha-factor-transporting ATPase", "3.6.3.49\tChannel-conductance-controlling ATPase", "3.6.3.50\tProtein-secreting ATPase", - "3.6.3.51\tMitochondrial protein-transporting ATPase", - "3.6.3.52\tChloroplast protein-transporting ATPase", "3.6.3.53\tAg(+)-exporting ATPase", - "3.6.3.54\tCu(+) exporting ATPase", "3.6.3.55\tTungstate-importing ATPase", "3.6.4.1\tMyosin ATPase", "3.6.4.2\tDynein ATPase", - "3.6.4.3\tMicrotubule-severing ATPase", "3.6.4.4\tPlus-end-directed kinesin ATPase", "3.6.4.5\tMinus-end-directed kinesin ATPase", "3.6.4.6\tVesicle-fusing ATPase", @@ -5032,7 +5031,6 @@ static const char* const kECNum_specific[] = { "3.6.4.8\tProteasome ATPase", "3.6.4.9\tChaperonin ATPase", "3.6.4.10\tNon-chaperonin molecular chaperone ATPase", - "3.6.4.11\tNucleoplasmin ATPase", "3.6.4.12\tDNA helicase", "3.6.4.13\tRNA helicase", "3.6.5.1\tHeterotrimeric G-protein GTPase", @@ -5091,10 +5089,9 @@ static const char* const kECNum_specific[] = { "3.13.1.8\tS-adenosyl-L-methionine hydrolase (adenosine-forming)", "4.1.1.1\tPyruvate decarboxylase", "4.1.1.2\tOxalate decarboxylase", - "4.1.1.3\tOxaloacetate decarboxylase", "4.1.1.4\tAcetoacetate decarboxylase", "4.1.1.5\tAcetolactate decarboxylase", - "4.1.1.6\tAconitate decarboxylase", + "4.1.1.6\tCis-aconitate decarboxylase", "4.1.1.7\tBenzoylformate decarboxylase", "4.1.1.8\tOxalyl-CoA decarboxylase", "4.1.1.9\tMalonyl-CoA decarboxylase", @@ -5125,7 +5122,6 @@ static const char* const kECNum_specific[] = { "4.1.1.38\tPhosphoenolpyruvate carboxykinase (diphosphate)", "4.1.1.39\tRibulose-bisphosphate carboxylase", "4.1.1.40\tHydroxypyruvate decarboxylase", - "4.1.1.41\tMethylmalonyl-CoA decarboxylase", "4.1.1.42\tCarnitine decarboxylase", "4.1.1.43\tPhenylpyruvate decarboxylase", "4.1.1.44\t4-carboxymuconolactone decarboxylase", @@ -5196,6 +5192,8 @@ static const char* const kECNum_specific[] = { "4.1.1.109\tPhenylacetaldehyde synthase", "4.1.1.110\tBisphosphomevalonate decarboxylase", "4.1.1.111\tSiroheme decarboxylase", + "4.1.1.112\tOxaloacetate decarboxylase", + "4.1.1.113\tTrans-aconitate decarboxylase", "4.1.2.2\tKetotetrose-phosphate aldolase", "4.1.2.4\tDeoxyribose-phosphate aldolase", "4.1.2.5\tL-threonine aldolase", @@ -5288,6 +5286,8 @@ static const char* const kECNum_specific[] = { "4.1.99.20\t3-amino-4-hydroxybenzoate synthase", "4.1.99.22\tGTP 3',8-cyclase", "4.1.99.23\t5-hydroxybenzimidazole synthase", + "4.1.99.24\tL-tyrosine isonitrile synthase", + "4.1.99.25\tL-tryptophan isonitrile synthase", "4.2.1.1\tCarbonic anhydrase", "4.2.1.2\tFumarate hydratase", "4.2.1.3\tAconitate hydratase", @@ -5695,6 +5695,7 @@ static const char* const kECNum_specific[] = { "4.3.1.29\tD-glucosaminate-6-phosphate ammonia lyase", "4.3.1.30\tdTDP-4-amino-4,6-dideoxy-D-glucose ammonia-lyase", "4.3.1.31\tL-tryptophan ammonia lyase", + "4.3.1.32\t7,8-didemethyl-8-hydroxy-5-deazariboflavin synthase", "4.3.2.1\tArgininosuccinate lyase", "4.3.2.2\tAdenylosuccinate lyase", "4.3.2.3\tUreidoglycolate lyase", @@ -5704,6 +5705,7 @@ static const char* const kECNum_specific[] = { "4.3.2.7\tGlutathione-specific gamma-glutamylcyclotransferase", "4.3.2.8\tGamma-glutamylamine cyclotransferase", "4.3.2.9\tGamma-glutamylcyclotransferase", + "4.3.2.10\tImidazole glycerol-phosphate synthase", "4.3.3.1\t3-ketovalidoxylamine C-N-lyase", "4.3.3.2\tStrictosidine synthase", "4.3.3.3\tDeacetylisoipecoside synthase", @@ -5711,7 +5713,6 @@ static const char* const kECNum_specific[] = { "4.3.3.5\t4'-demethylrebeccamycin synthase", "4.3.3.6\tPyridoxal 5'-phosphate synthase (glutamine hydrolyzing)", "4.3.3.7\t4-hydroxy-tetrahydrodipicolinate synthase", - "4.3.99.2\tCarboxybiotin decarboxylase", "4.3.99.3\t7-carboxy-7-deazaguanine synthase", "4.3.99.4\tCholine trimethylamine-lyase", "4.4.1.1\tCystathionine gamma-lyase", @@ -5843,6 +5844,7 @@ static const char* const kECNum_specific[] = { "5.1.3.38\tD-erythrulose 1-phosphate 3-epimerase", "5.1.3.40\tD-tagatose 6-phosphate 4-epimerase", "5.1.3.41\tFructoselysine 3-epimerase", + "5.1.3.42\tD-glucosamine-6-phosphate 4-epimerase", "5.1.99.1\tMethylmalonyl-CoA epimerase", "5.1.99.2\t16-hydroxysteroid epimerase", "5.1.99.3\tAllantoin racemase", @@ -5944,6 +5946,7 @@ static const char* const kECNum_specific[] = { "5.4.2.10\tPhosphoglucosamine mutase", "5.4.2.11\tPhosphoglycerate mutase (2,3-diphosphoglycerate-dependent)", "5.4.2.12\tPhosphoglycerate mutase (2,3-diphosphoglycerate-independent)", + "5.4.2.13\tPhosphogalactosamine mutase", "5.4.3.2\tLysine 2,3-aminomutase", "5.4.3.3\tLysine 5,6-aminomutase", "5.4.3.4\tD-lysine 5,6-aminomutase", @@ -6058,6 +6061,7 @@ static const char* const kECNum_specific[] = { "5.5.1.31\tHapalindole H synthase", "5.5.1.32\t12-epi-hapalindole U synthase", "5.5.1.33\t12-epi-fischerindole U synthase", + "5.6.1.1\tMicrotubule-severing ATPase", "5.99.1.1\tThiocyanate isomerase", "5.99.1.2\tDNA topoisomerase", "5.99.1.3\tDNA topoisomerase (ATP-hydrolyzing)", @@ -6091,7 +6095,7 @@ static const char* const kECNum_specific[] = { "6.1.2.2\tNebramycin 5' synthase", "6.1.3.1\tOlefin beta-lactone synthetase", "6.2.1.1\tAcetate--CoA ligase", - "6.2.1.2\tButyrate--CoA ligase", + "6.2.1.2\tMedium-chain acyl-CoA ligase", "6.2.1.3\tLong-chain-fatty-acid--CoA ligase", "6.2.1.4\tSuccinate--CoA ligase (GDP-forming)", "6.2.1.5\tSuccinate--CoA ligase (ADP-forming)", @@ -6269,5 +6273,54 @@ static const char* const kECNum_specific[] = { "6.5.1.7\tDNA ligase (ATP, ADP or GTP)", "6.5.1.8\t3'-phosphate/5'-hydroxy nucleic acid ligase", "6.6.1.1\tMagnesium chelatase", - "6.6.1.2\tCobaltochelatase" + "6.6.1.2\tCobaltochelatase", + "7.1.1.1\tProton-translocating NAD(P)(+) transhydrogenase", + "7.1.1.2\tNADH:ubiquinone reductase (H(+)-translocating)", + "7.1.1.3\tUbiquinol oxidase (H(+)-transporting)", + "7.1.1.4\tCaldariellaquinol oxidase (H(+)-transporting)", + "7.1.1.5\tMenaquinol oxidase (H(+)-transporting)", + "7.1.1.6\tPlastoquinol--plastocyanin reductase", + "7.1.1.7\tUbiquinol oxidase (electrogenic, proton-motive force generating)", + "7.1.2.1\tP-type H(+)-exporting transporter", + "7.1.2.2\tH(+)-transporting two-sector ATPase", + "7.1.3.1\tH(+)-exporting diphosphatase", + "7.2.1.1\tNADH:ubiquinone reductase (Na(+)-transporting)", + "7.2.1.2\tFerredoxin--NAD(+) oxidoreductase (Na(+)-transporting)", + "7.2.1.3\tAscorbate ferrireductase (transmembrane)", + "7.2.2.1\tNa(+)-transporting two-sector ATPase", + "7.2.2.2\tABC-type Cd(2+) transporter", + "7.2.2.3\tP-type Na(+) transporter", + "7.2.2.4\tABC-type Na(+) transporter", + "7.2.2.5\tABC-type Mn(2+) transporter", + "7.2.2.6\tP-type K(+) transporter", + "7.2.2.7\tABC-type Fe(3+) transporter", + "7.2.2.8\tP-type Cu(+) transporter", + "7.2.4.1\tCarboxybiotin decarboxylase", + "7.2.4.2\tOxaloacetate decarboxylase (Na(+) extruding)", + "7.2.4.3\t(S)-methylmalonyl-CoA decarboxylase (sodium-transporting)", + "7.3.2.1\tABC-type phosphate transporter", + "7.3.2.2\tABC-type phosphonate transporter", + "7.3.2.3\tABC-type sulfate transporter", + "7.3.2.4\tABC-type nitrate transporter", + "7.3.2.5\tABC-type molybdate transporter", + "7.4.2.1\tABC-type polar-amino-acid transporter", + "7.4.2.2\tABC-type nonpolar-amino-acid transporter", + "7.4.2.3\tMitochondrial protein-transporting ATPase", + "7.4.2.4\tChloroplast protein-transporting ATPase", + "7.4.2.5\tABC-type protein transporter", + "7.4.2.6\tABC-type oligopeptide transporter", + "7.4.2.7\tABC-type alpha-factor-pheromone transporter", + "7.5.2.1\tABC-type maltose transporter", + "7.5.2.2\tABC-type oligosaccharide transporter", + "7.5.2.3\tABC-type beta-glucan transporter", + "7.5.2.4\tABC-type teichoic-acid transporter", + "7.5.2.5\tABC-type lipopolysaccharide transporter", + "7.5.2.6\tABC-type lipid A-core oligosaccharide transporter", + "7.6.2.1\tP-type phospholipid transporter", + "7.6.2.2\tABC-type xenobiotic transporter", + "7.6.2.3\tABC-type glutathione-S-conjugate transporter", + "7.6.2.4\tABC-type fatty-acyl-CoA transporter", + "7.6.2.5\tABC-type heme transporter", + "7.6.2.6\tABC-type guanine transporter", + "7.6.2.7\tABC-type taurine transporter" }; diff --git a/c++/src/objects/seqfeat/ecnum_specific.txt b/c++/src/objects/seqfeat/ecnum_specific.txt index 3c3c2df1..b2d463e2 100644 --- a/c++/src/objects/seqfeat/ecnum_specific.txt +++ b/c++/src/objects/seqfeat/ecnum_specific.txt @@ -100,7 +100,7 @@ 1.1.1.106 Pantoate 4-dehydrogenase 1.1.1.107 Pyridoxal 4-dehydrogenase 1.1.1.108 Carnitine 3-dehydrogenase -1.1.1.110 Indolelactate dehydrogenase +1.1.1.110 Aromatic 2-oxoacid reductase 1.1.1.111 3-(imidazol-5-yl)lactate dehydrogenase 1.1.1.112 Indanol dehydrogenase 1.1.1.113 L-xylose 1-dehydrogenase @@ -390,6 +390,8 @@ 1.1.1.411 L-threonate 2-dehydrogenase 1.1.1.412 2-alkyl-3-oxoalkanoate reductase 1.1.1.413 A-factor type gamma-butyrolactone 1'-reductase (1S-forming) +1.1.1.414 L-galactonate 5-dehydrogenase +1.1.1.415 Noscapine synthase 1.1.1.n4 (-)-trans-carveol dehydrogenase 1.1.1.n5 3-methylmalate dehydrogenase 1.1.1.n11 Succinic semialdehyde reductase @@ -572,6 +574,7 @@ 1.2.1.98 2-hydroxy-2-methylpropanal dehydrogenase 1.2.1.99 4-(gamma-glutamylamino)butanal dehydrogenase 1.2.1.100 5-formyl-3-hydroxy-2-methylpyridine 4-carboxylate 5-dehydrogenase +1.2.1.101 L-tyrosine reductase 1.2.1.n2 Fatty acyl-CoA reductase 1.2.2.1 Formate dehydrogenase (cytochrome) 1.2.2.4 Carbon-monoxide dehydrogenase (cytochrome b-561) @@ -712,6 +715,8 @@ 1.3.1.115 3-oxocholoyl-CoA 4-desaturase 1.3.1.116 7-beta-hydroxy-3-oxochol-24-oyl-CoA 4-desaturase 1.3.1.117 Hydroxycinnamoyl-CoA reductase +1.3.1.118 Meromycolic acid enoyl-[acyl-carrier-protein] reductase +1.3.1.119 Chlorobenzene dihydrodiol dehydrogenase 1.3.1.n3 Curcumin reductase 1.3.2.3 L-galactonolactone dehydrogenase 1.3.3.3 Coproporphyrinogen oxidase @@ -890,6 +895,7 @@ 1.5.1.49 1-pyrroline-2-carboxylate reductase (NAD(P)H) 1.5.1.50 Dihydromonapterin reductase 1.5.1.51 N-((2S)-2-amino-2-carboxyethyl)-L-glutamate dehydrogenase +1.5.1.52 Staphylopine dehydrogenase 1.5.3.1 Sarcosine oxidase 1.5.3.2 N-methyl-L-amino-acid oxidase 1.5.3.4 N(6)-methyl-lysine oxidase @@ -934,7 +940,6 @@ 1.6.1.2 NAD(P)(+) transhydrogenase (Re/Si-specific) 1.6.1.3 NAD(P)(+) transhydrogenase 1.6.1.4 NAD(P)(+) transhydrogenase (ferredoxin) -1.6.1.5 Proton-translocating NAD(P)(+) transhydrogenase 1.6.2.2 Cytochrome-b5 reductase 1.6.2.4 NADPH--hemoprotein reductase 1.6.2.5 NADPH--cytochrome-c2 reductase @@ -945,17 +950,14 @@ 1.6.3.4 NADH oxidase (H(2)O-forming) 1.6.3.5 Renalase 1.6.5.2 NAD(P)H dehydrogenase (quinone) -1.6.5.3 NADH:ubiquinone reductase (H(+)-translocating) 1.6.5.4 Monodehydroascorbate reductase (NADH) 1.6.5.5 NADPH:quinone reductase 1.6.5.6 p-benzoquinone reductase (NADPH) 1.6.5.7 2-hydroxy-1,4-benzoquinone reductase -1.6.5.8 NADH:ubiquinone reductase (Na(+)-transporting) 1.6.5.9 NADH:ubiquinone reductase (non-electrogenic) 1.6.5.10 NADPH dehydrogenase (quinone) 1.6.5.11 NADH dehydrogenase (quinone) 1.6.5.12 Demethylphylloquinone reductase -1.6.6.9 Trimethylamine-N-oxide reductase 1.6.99.1 NADPH dehydrogenase 1.6.99.3 NADH dehydrogenase 1.6.99.n1 NADPH dehydrogenase (coenzyme F420 dependent) @@ -1041,7 +1043,7 @@ 1.8.4.14 L-methionine (R)-S-oxide reductase 1.8.5.1 Glutathione dehydrogenase (ascorbate) 1.8.5.2 Thiosulfate dehydrogenase (quinone) -1.8.5.3 Dimethylsulfoxide reductase +1.8.5.3 Respiratory dimethylsulfoxide reductase 1.8.5.4 Bacterial sulfide:quinone reductase 1.8.5.5 Thiosulfate reductase (quinone) 1.8.5.6 Sulfite dehydrogenase (quinone) @@ -1071,15 +1073,10 @@ 1.10.3.5 3-hydroxyanthranilate oxidase 1.10.3.6 Rifamycin-B oxidase 1.10.3.9 Photosystem II -1.10.3.10 Ubiquinol oxidase (H(+)-transporting) 1.10.3.11 Ubiquinol oxidase (non-electrogenic) -1.10.3.12 Menaquinol oxidase (H(+)-transporting) -1.10.3.13 Caldariellaquinol oxidase (H(+)-transporting) -1.10.3.14 Ubiquinol oxidase (electrogenic, non H(+)-transporting) 1.10.3.15 Grixazone synthase 1.10.3.16 Dihydrophenazinedicarboxylate synthase 1.10.5.1 Ribosyldihydronicotinamide dehydrogenase (quinone) -1.10.9.1 Plastoquinol--plastocyanin reductase 1.11.1.1 NADH peroxidase 1.11.1.2 NADPH peroxidase 1.11.1.3 Fatty-acid peroxidase @@ -1198,6 +1195,8 @@ 1.13.11.83 4-hydroxy-3-prenylphenylpyruvate oxygenase 1.13.11.84 Crocetin dialdehyde synthase 1.13.11.85 Exo-cleaving rubber dioxygenase +1.13.11.86 5-aminosalicylate 1,2-dioxygenase +1.13.11.87 Endo-cleaving rubber dioxygenase 1.13.12.1 Arginine 2-monooxygenase 1.13.12.2 Lysine 2-monooxygenase 1.13.12.3 Tryptophan 2-monooxygenase @@ -1272,6 +1271,7 @@ 1.14.11.57 L-proline trans-4-hydroxylase 1.14.11.58 Ornithine lipid ester-linked acyl 2-hydroxylase 1.14.11.59 2,4-dihydroxy-1,4-benzoxazin-3-one-glucoside dioxygenase +1.14.11.60 Scopoletin 8-hydroxylase 1.14.11.n2 Methylcytosine dioxygenase 1.14.11.n4 Ankyrin-repeat-histidine dioxagenase 1.14.12.1 Anthranilate 1,2-dioxygenase (deaminating, decarboxylating) @@ -1293,6 +1293,7 @@ 1.14.12.23 Nitroarene dioxygenase 1.14.12.24 2,4-dinitrotoluene dioxygenase 1.14.12.25 p-cumate 2,3-dioxygenase +1.14.12.26 Chlorobenzene dioxygenase 1.14.13.1 Salicylate 1-monooxygenase 1.14.13.2 4-hydroxybenzoate 3-monooxygenase 1.14.13.4 Melilotate 3-monooxygenase @@ -1349,13 +1350,11 @@ 1.14.13.114 6-hydroxynicotinate 3-monooxygenase 1.14.13.116 Geranylhydroquinone 3''-hydroxylase 1.14.13.122 Chlorophyllide a oxygenase -1.14.13.125 Tryptophan N-monooxygenase 1.14.13.127 3-(3-hydroxy-phenyl)propanoic acid hydroxylase 1.14.13.128 7-methylxanthine demethylase 1.14.13.130 Pyrrole-2-carboxylate monooxygenase -1.14.13.131 Dimethyl-sulfide monooxygenase +1.14.13.131 Dissimilatory dimethyl-sulfide monooxygenase 1.14.13.135 1-hydroxy-2-naphthoate hydroxylase -1.14.13.138 Indolin-2-one monooxygenase 1.14.13.146 Taxoid 14-beta-hydroxylase 1.14.13.147 Taxoid 7-beta-hydroxylase 1.14.13.148 Trimethylamine monooxygenase @@ -1567,6 +1566,17 @@ 1.14.14.152 Beta-amyrin 11-oxidase 1.14.14.153 Indole-2-monooxygenase 1.14.14.154 Sterol 14-alpha-demethylase +1.14.14.155 3,6-diketocamphane 1,2-monooxygenase +1.14.14.156 Tryptophan N-monooxygenase +1.14.14.157 Indolin-2-one monooxygenase +1.14.14.158 Carotene epsilon-hydroxylase +1.14.14.159 Dolabradiene monooxygenase +1.14.14.160 Zealexin A1 synthase +1.14.14.161 Nepetalactol monooxygenase +1.14.14.162 Flavanone 2-hydroxylase +1.14.14.163 (S)-1-hydroxy-N-methylcanadine 13-hydroxylase +1.14.14.164 Fraxetin 5-hydroxylase +1.14.14.165 Indole-3-carbonyl nitrile 4-hydroxylase 1.14.15.1 Camphor 5-monooxygenase 1.14.15.3 Alkane 1-monooxygenase 1.14.15.4 Steroid 11-beta-monooxygenase @@ -1691,6 +1701,8 @@ 1.14.19.71 Fumitremorgin C synthase 1.14.19.72 (-)-pluviatolide synthase 1.14.19.73 (S)-nandinine synthase +1.14.19.74 (+)-piperitol/(+)-sesamin synthase +1.14.19.75 Very-long-chain acyl-lipid omega-9 desaturase 1.14.19.n4 Stearoyl-CoA 9-desaturase 1.14.19.n5 Versicolorin B desaturase 1.14.20.1 Deacetoxycephalosporin-C synthase @@ -1705,6 +1717,8 @@ 1.14.20.11 3-((Z)-2-isocyanoethenyl)-1H-indole synthase 1.14.20.12 3-((E)-2-isocyanoethenyl)-1H-indole synthase 1.14.20.13 6-beta-hydroxyhyoscyamine epoxidase +1.14.20.14 Hapalindole-type alkaloid chlorinase +1.14.20.15 L-threonyl-[L-threonyl-carrier protein] 4-chlorinase 1.14.99.1 Prostaglandin-endoperoxide synthase 1.14.99.2 Kynurenine 7,8-hydroxylase 1.14.99.4 Progesterone monooxygenase @@ -1726,7 +1740,6 @@ 1.14.99.38 Cholesterol 25-hydroxylase 1.14.99.39 Ammonia monooxygenase 1.14.99.44 Diapolycopene oxygenase -1.14.99.45 Carotene epsilon-monooxygenase 1.14.99.46 Pyrimidine monooxygenase 1.14.99.47 (+)-larreatricin hydroxylase 1.14.99.48 Heme oxygenase (staphylobilin-producing) @@ -1741,6 +1754,10 @@ 1.14.99.58 Heme oxygenase (biliverdin-IX-beta and delta-forming) 1.14.99.59 Tryptamine 4-monooxygenase 1.14.99.60 3-demethoxyubiquinol 3-hydroxylase +1.14.99.61 Cyclooctat-9-en-7-ol 5-monooxygenase +1.14.99.62 Cyclooctatin synthase +1.14.99.63 Beta-carotene 4-ketolase +1.14.99.64 Zeaxanthin 4-ketolase 1.14.99.n4 Carotenoid 9,10-dioxygenase 1.15.1.1 Superoxide dismutase 1.15.1.2 Superoxide reductase @@ -1757,7 +1774,6 @@ 1.16.3.1 Ferroxidase 1.16.3.2 Bacterial non-heme ferritin 1.16.3.3 Manganese oxidase -1.16.5.1 Ascorbate ferrireductase (transmembrane) 1.16.8.1 Cob(II)yrinic acid a,c-diamide reductase 1.16.9.1 Iron:rusticyanin reductase 1.17.1.1 CDP-4-dehydro-6-deoxyglucose reductase @@ -1802,7 +1818,6 @@ 1.18.1.5 Putidaredoxin--NAD(+) reductase 1.18.1.6 Adrenodoxin-NADP(+) reductase 1.18.1.7 Ferredoxin--NAD(P)(+) reductase (naphthalene dioxygenase ferredoxin-specific) -1.18.1.8 Ferredoxin-NAD(+) oxidoreductase (Na(+)-transporting) 1.18.6.1 Nitrogenase 1.18.6.2 Vanadium-dependent nitrogenase 1.19.1.1 Flavodoxin--NADP(+) reductase @@ -1825,13 +1840,14 @@ 1.21.3.6 Aureusidin synthase 1.21.3.7 Tetrahydrocannabinolic acid synthase 1.21.3.8 Cannabidiolic acid synthase -1.21.4.1 D-proline reductase (dithiol) +1.21.4.1 D-proline reductase 1.21.4.2 Glycine reductase 1.21.4.3 Sarcosine reductase 1.21.4.4 Betaine reductase 1.21.98.1 Cyclic dehypoxanthinyl futalosine synthase 1.21.98.2 Dichlorochromopyrrolate synthase 1.21.98.3 Anaerobic magnesium-protoporphyrin IX monomethyl ester cyclase +1.21.98.4 PqqA peptide cyclase 1.21.99.1 Beta-cyclopiazonate dehydrogenase 1.21.99.3 Thyroxine 5-deiodinase 1.21.99.4 Thyroxine 5'-deiodinase @@ -1924,7 +1940,7 @@ 2.1.1.90 Methanol--corrinoid protein Co-methyltransferase 2.1.1.91 Isobutyraldoxime O-methyltransferase 2.1.1.94 Tabersonine 16-O-methyltransferase -2.1.1.95 Tocopherol O-methyltransferase +2.1.1.95 Tocopherol C-methyltransferase 2.1.1.96 Thioether S-methyltransferase 2.1.1.97 3-hydroxyanthranilate 4-C-methyltransferase 2.1.1.98 Diphthine synthase @@ -2170,6 +2186,8 @@ 2.1.1.346 U6 snRNA m(6)A methyltransferase 2.1.1.347 (+)-O-methylkolavelool synthase 2.1.1.348 mRNA m(6)A methyltransferase +2.1.1.349 Toxoflavin synthase +2.1.1.350 Menaquinone C(8)-methyltransferase 2.1.1.n1 Resorcinol O-methyltransferase 2.1.1.n4 Thiocyanate methyltransferase 2.1.1.n7 5-pentadecatrienyl resorcinol O-methyltransferase @@ -2296,7 +2314,7 @@ 2.3.1.83 Phosphatidylcholine--dolichol O-acyltransferase 2.3.1.84 Alcohol O-acetyltransferase 2.3.1.85 Fatty-acid synthase -2.3.1.86 Fatty-acyl-CoA synthase +2.3.1.86 Fatty-acyl-CoA synthase system 2.3.1.87 Aralkylamine N-acetyltransferase 2.3.1.89 Tetrahydrodipicolinate N-acetyltransferase 2.3.1.90 Beta-glucogallin O-galloyltransferase @@ -2475,8 +2493,10 @@ 2.3.1.270 Lyso-ornithine lipid O-acyltransferase 2.3.1.271 L-glutamate-5-semialdehyde N-acetyltransferase 2.3.1.272 2-acetylphloroglucinol acetyltransferase -2.3.1.n2 Phosphate acyltransferase -2.3.1.n3 Glycerol-3-phosphate acyltransferase (acyl-phosphate transferring) +2.3.1.273 Diglucosylglycerate octanoyltransferase +2.3.1.274 Phosphate acyltransferase +2.3.1.275 Acyl phosphate:glycerol-3-phosphate acyltransferase +2.3.1.276 Galactosamine-1-phosphate N-acetyltransferase 2.3.1.n4 1-acyl-sn-glycerol-3-phosphate acyltransferase 2.3.1.n5 Glycerol-3-phosphate acyltransferase (acyl-[acyl-carrier-protein]-transferring) 2.3.1.n6 1-acylglycerophosphoserine O-acyltransferase @@ -2856,6 +2876,8 @@ 2.4.1.355 Poly(ribitol-phosphate) beta-N-acetylglucosaminyltransferase 2.4.1.356 Glucosyl-dolichyl phosphate glucuronosyltransferase 2.4.1.357 Phlorizin synthase +2.4.1.358 Acylphloroglucinol glucosyltransferase +2.4.1.359 Glucosylglycerol phosphorylase (configuration-retaining) 2.4.1.n2 Loliose synthase 2.4.2.1 Purine-nucleoside phosphorylase 2.4.2.2 Pyrimidine-nucleoside phosphorylase @@ -2914,6 +2936,7 @@ 2.4.2.58 Hydroxyproline O-arabinosyltransferase 2.4.2.59 Sulfide-dependent adenosine diphosphate thiazole synthase 2.4.2.60 Cysteine-dependent adenosine diphosphate thiazole synthase +2.4.2.61 Alpha-dystroglycan beta-1,4-xylosyltransferase 2.4.2.n2 Glucoside xylosyltransferase 2.4.2.n3 Xyloside xylosyltransferase 2.4.99.1 Beta-galactoside alpha-(2,6)-sialyltransferase @@ -2947,7 +2970,7 @@ 2.5.1.10 (2E,6E)-farnesyl diphosphate synthase 2.5.1.15 Dihydropteroate synthase 2.5.1.16 Spermidine synthase -2.5.1.17 Cob(I)yrinic acid a,c-diamide adenosyltransferase +2.5.1.17 Corrinoid adenosyltransferase 2.5.1.18 Glutathione transferase 2.5.1.19 3-phosphoshikimate 1-carboxyvinyltransferase 2.5.1.20 Rubber cis-polyprenylcistransferase @@ -3003,7 +3026,6 @@ 2.5.1.74 1,4-dihydroxy-2-naphthoate polyprenyltransferase 2.5.1.75 tRNA dimethylallyltransferase 2.5.1.76 Cysteate synthase -2.5.1.77 7,8-didemethyl-8-hydroxy-5-deazariboflavin synthase 2.5.1.78 6,7-dimethyl-8-ribityllumazine synthase 2.5.1.79 Thermospermine synthase 2.5.1.80 7-dimethylallyltryptophan synthase @@ -3072,6 +3094,7 @@ 2.5.1.144 S-sulfo-L-cysteine synthase (O-acetyl-L-serine-dependent) 2.5.1.145 Phosphatidylglycerol--prolipoprotein diacylglyceryl transferase 2.5.1.146 3-geranyl-3-((Z)-2-isocyanoethenyl)indole synthase +2.5.1.147 5-amino-6-(D-ribitylamino)uracil--L-tyrosine 4-hydroxyphenyl transferase 2.5.1.n9 Heptaprenylglyceryl phosphate synthase 2.6.1.1 Aspartate transaminase 2.6.1.2 Alanine transaminase @@ -3371,6 +3394,8 @@ 2.7.1.220 D-erythronate 4-kinase 2.7.1.221 N-acetylmuramate 1-kinase 2.7.1.222 4-hydroxytryptamine kinase +2.7.1.223 Aminoimidazole riboside kinase +2.7.1.224 Cytidine diphosphoramidate kinase 2.7.2.1 Acetate kinase 2.7.2.2 Carbamate kinase 2.7.2.3 Phosphoglycerate kinase @@ -3517,7 +3542,10 @@ 2.7.7.97 3-hydroxy-4-methylanthranilate adenylyltransferase 2.7.7.99 N-acetyl-alpha-D-muramate 1-phosphate uridylyltransferase 2.7.7.100 SAMP-activating enzyme -2.7.7.n1 Adenosine monophosphate-protein transferase +2.7.7.101 DNA primase DnaG +2.7.7.102 DNA primase AEP +2.7.7.103 L-glutamine-phosphate cytidylyltransferase +2.7.7.n1 Protein adenylyltransferase 2.7.7.n6 Guanine phosphate-protein transferase 2.7.8.1 Ethanolaminephosphotransferase 2.7.8.2 Diacylglycerol cholinephosphotransferase @@ -3695,6 +3723,7 @@ 2.8.4.4 [Ribosomal protein S12] (aspartate(89)-C(3))-methylthiotransferase 2.8.4.5 tRNA (N(6)-L-threonylcarbamoyladenosine(37)-C(2))-methylthiotransferase 2.8.5.1 S-sulfo-L-cysteine synthase (3-phospho-L-serine-dependent) +2.8.5.2 L-cysteine S-thiosulfotransferase 2.9.1.1 L-seryl-tRNA(Sec) selenium transferase 2.9.1.2 O-phospho-L-seryl-tRNA(Sec):L-selenocysteinyl-tRNA synthase 2.10.1.1 Molybdopterin molybdotransferase @@ -3794,6 +3823,7 @@ 3.1.1.101 Poly(ethylene terephthalate) hydrolase 3.1.1.102 Mono(ethylene terephthalate) hydrolase 3.1.1.103 Teichoic acid D-alanine hydrolase +3.1.1.104 5-phospho-D-xylono-1,4-lactonase 3.1.1.n2 Protein-S-isoprenylcysteine alpha-carbonyl methylesterase 3.1.2.1 Acetyl-CoA hydrolase 3.1.2.2 Palmitoyl-CoA hydrolase @@ -4234,6 +4264,7 @@ 3.2.1.205 Isomaltose glucohydrolase 3.2.1.206 Oleuropein beta-glucosidase 3.2.1.207 Mannosyl-oligosaccharide alpha-1,3-glucosidase +3.2.1.208 Glucosylglycerate hydrolase 3.2.1.n1 Blood group B branched chain alpha-1,3-galactosidase 3.2.1.n2 Blood group B linear chain alpha-1,3-galactosidase 3.2.1.n3 Dictyostelium lysozyme A @@ -4362,7 +4393,7 @@ 3.4.19.5 Beta-aspartyl-peptidase 3.4.19.6 Pyroglutamyl-peptidase II 3.4.19.7 N-formylmethionyl-peptidase -3.4.19.9 Gamma-glutamyl hydrolase +3.4.19.9 Folate gamma-glutamyl hydrolase 3.4.19.11 Gamma-D-glutamyl-meso-diaminopimelate peptidase 3.4.19.12 Ubiquitinyl hydrolase 1 3.4.19.13 Glutathione hydrolase @@ -4773,6 +4804,7 @@ 3.5.1.126 Oxamate amidohydrolase 3.5.1.127 Jasmonoyl-L-amino acid hydrolase 3.5.1.128 Deaminated glutathione amidase +3.5.1.129 N(5)-(cytidine 5'-diphosphoramidyl)-L-glutamine hydrolase 3.5.1.n3 4-deoxy-4-formamido-L-arabinose-phosphoundecaprenol deformylase 3.5.2.1 Barbiturase 3.5.2.2 Dihydropyrimidinase @@ -4936,62 +4968,29 @@ 3.6.1.n3 L-cysteinyl-tRNA(Cys) hydrolase 3.6.2.1 Adenylylsulfatase 3.6.2.2 Phosphoadenylylsulfatase -3.6.3.1 Phospholipid-translocating ATPase 3.6.3.2 Magnesium-importing ATPase 3.6.3.3 Cadmium-exporting ATPase 3.6.3.4 Cu(2+)-exporting ATPase 3.6.3.5 Zinc-exporting ATPase -3.6.3.6 Proton-exporting ATPase -3.6.3.7 Sodium-exporting ATPase 3.6.3.8 Calcium-transporting ATPase 3.6.3.9 Sodium/potassium-exchanging ATPase 3.6.3.10 Hydrogen/potassium-exchanging ATPase 3.6.3.11 Chloride-transporting ATPase -3.6.3.12 Potassium-transporting ATPase -3.6.3.14 H(+)-transporting two-sector ATPase -3.6.3.15 Sodium-transporting two-sector ATPase 3.6.3.16 Arsenite-transporting ATPase 3.6.3.17 Monosaccharide-transporting ATPase -3.6.3.18 Oligosaccharide-transporting ATPase -3.6.3.19 Maltose-transporting ATPase 3.6.3.20 Glycerol-3-phosphate-transporting ATPase -3.6.3.21 Polar-amino-acid-transporting ATPase -3.6.3.22 Nonpolar-amino-acid-transporting ATPase -3.6.3.23 Oligopeptide-transporting ATPase 3.6.3.24 Nickel-transporting ATPase -3.6.3.25 Sulfate-transporting ATPase -3.6.3.26 Nitrate-transporting ATPase -3.6.3.27 Phosphate-transporting ATPase -3.6.3.28 Phosphonate-transporting ATPase -3.6.3.29 Molybdate-transporting ATPase -3.6.3.30 Fe(3+)-transporting ATPase 3.6.3.31 Polyamine-transporting ATPase 3.6.3.32 Quaternary-amine-transporting ATPase 3.6.3.33 Vitamin B12-transporting ATPase 3.6.3.34 Iron-chelate-transporting ATPase -3.6.3.35 Manganese-transporting ATPase -3.6.3.36 Taurine-transporting ATPase -3.6.3.37 Guanine-transporting ATPase 3.6.3.38 Capsular-polysaccharide-transporting ATPase -3.6.3.39 Lipopolysaccharide-transporting ATPase -3.6.3.40 Teichoic-acid-transporting ATPase -3.6.3.41 Heme-transporting ATPase -3.6.3.42 Beta-glucan-transporting ATPase -3.6.3.43 Peptide-transporting ATPase -3.6.3.44 Xenobiotic-transporting ATPase -3.6.3.46 Cadmium-transporting ATPase -3.6.3.47 Fatty-acyl-CoA-transporting ATPase -3.6.3.48 Alpha-factor-transporting ATPase 3.6.3.49 Channel-conductance-controlling ATPase 3.6.3.50 Protein-secreting ATPase -3.6.3.51 Mitochondrial protein-transporting ATPase -3.6.3.52 Chloroplast protein-transporting ATPase 3.6.3.53 Ag(+)-exporting ATPase -3.6.3.54 Cu(+) exporting ATPase 3.6.3.55 Tungstate-importing ATPase 3.6.4.1 Myosin ATPase 3.6.4.2 Dynein ATPase -3.6.4.3 Microtubule-severing ATPase 3.6.4.4 Plus-end-directed kinesin ATPase 3.6.4.5 Minus-end-directed kinesin ATPase 3.6.4.6 Vesicle-fusing ATPase @@ -4999,7 +4998,6 @@ 3.6.4.8 Proteasome ATPase 3.6.4.9 Chaperonin ATPase 3.6.4.10 Non-chaperonin molecular chaperone ATPase -3.6.4.11 Nucleoplasmin ATPase 3.6.4.12 DNA helicase 3.6.4.13 RNA helicase 3.6.5.1 Heterotrimeric G-protein GTPase @@ -5058,10 +5056,9 @@ 3.13.1.8 S-adenosyl-L-methionine hydrolase (adenosine-forming) 4.1.1.1 Pyruvate decarboxylase 4.1.1.2 Oxalate decarboxylase -4.1.1.3 Oxaloacetate decarboxylase 4.1.1.4 Acetoacetate decarboxylase 4.1.1.5 Acetolactate decarboxylase -4.1.1.6 Aconitate decarboxylase +4.1.1.6 Cis-aconitate decarboxylase 4.1.1.7 Benzoylformate decarboxylase 4.1.1.8 Oxalyl-CoA decarboxylase 4.1.1.9 Malonyl-CoA decarboxylase @@ -5092,7 +5089,6 @@ 4.1.1.38 Phosphoenolpyruvate carboxykinase (diphosphate) 4.1.1.39 Ribulose-bisphosphate carboxylase 4.1.1.40 Hydroxypyruvate decarboxylase -4.1.1.41 Methylmalonyl-CoA decarboxylase 4.1.1.42 Carnitine decarboxylase 4.1.1.43 Phenylpyruvate decarboxylase 4.1.1.44 4-carboxymuconolactone decarboxylase @@ -5163,6 +5159,8 @@ 4.1.1.109 Phenylacetaldehyde synthase 4.1.1.110 Bisphosphomevalonate decarboxylase 4.1.1.111 Siroheme decarboxylase +4.1.1.112 Oxaloacetate decarboxylase +4.1.1.113 Trans-aconitate decarboxylase 4.1.2.2 Ketotetrose-phosphate aldolase 4.1.2.4 Deoxyribose-phosphate aldolase 4.1.2.5 L-threonine aldolase @@ -5255,6 +5253,8 @@ 4.1.99.20 3-amino-4-hydroxybenzoate synthase 4.1.99.22 GTP 3',8-cyclase 4.1.99.23 5-hydroxybenzimidazole synthase +4.1.99.24 L-tyrosine isonitrile synthase +4.1.99.25 L-tryptophan isonitrile synthase 4.2.1.1 Carbonic anhydrase 4.2.1.2 Fumarate hydratase 4.2.1.3 Aconitate hydratase @@ -5662,6 +5662,7 @@ 4.3.1.29 D-glucosaminate-6-phosphate ammonia lyase 4.3.1.30 dTDP-4-amino-4,6-dideoxy-D-glucose ammonia-lyase 4.3.1.31 L-tryptophan ammonia lyase +4.3.1.32 7,8-didemethyl-8-hydroxy-5-deazariboflavin synthase 4.3.2.1 Argininosuccinate lyase 4.3.2.2 Adenylosuccinate lyase 4.3.2.3 Ureidoglycolate lyase @@ -5671,6 +5672,7 @@ 4.3.2.7 Glutathione-specific gamma-glutamylcyclotransferase 4.3.2.8 Gamma-glutamylamine cyclotransferase 4.3.2.9 Gamma-glutamylcyclotransferase +4.3.2.10 Imidazole glycerol-phosphate synthase 4.3.3.1 3-ketovalidoxylamine C-N-lyase 4.3.3.2 Strictosidine synthase 4.3.3.3 Deacetylisoipecoside synthase @@ -5678,7 +5680,6 @@ 4.3.3.5 4'-demethylrebeccamycin synthase 4.3.3.6 Pyridoxal 5'-phosphate synthase (glutamine hydrolyzing) 4.3.3.7 4-hydroxy-tetrahydrodipicolinate synthase -4.3.99.2 Carboxybiotin decarboxylase 4.3.99.3 7-carboxy-7-deazaguanine synthase 4.3.99.4 Choline trimethylamine-lyase 4.4.1.1 Cystathionine gamma-lyase @@ -5810,6 +5811,7 @@ 5.1.3.38 D-erythrulose 1-phosphate 3-epimerase 5.1.3.40 D-tagatose 6-phosphate 4-epimerase 5.1.3.41 Fructoselysine 3-epimerase +5.1.3.42 D-glucosamine-6-phosphate 4-epimerase 5.1.99.1 Methylmalonyl-CoA epimerase 5.1.99.2 16-hydroxysteroid epimerase 5.1.99.3 Allantoin racemase @@ -5911,6 +5913,7 @@ 5.4.2.10 Phosphoglucosamine mutase 5.4.2.11 Phosphoglycerate mutase (2,3-diphosphoglycerate-dependent) 5.4.2.12 Phosphoglycerate mutase (2,3-diphosphoglycerate-independent) +5.4.2.13 Phosphogalactosamine mutase 5.4.3.2 Lysine 2,3-aminomutase 5.4.3.3 Lysine 5,6-aminomutase 5.4.3.4 D-lysine 5,6-aminomutase @@ -6025,6 +6028,7 @@ 5.5.1.31 Hapalindole H synthase 5.5.1.32 12-epi-hapalindole U synthase 5.5.1.33 12-epi-fischerindole U synthase +5.6.1.1 Microtubule-severing ATPase 5.99.1.1 Thiocyanate isomerase 5.99.1.2 DNA topoisomerase 5.99.1.3 DNA topoisomerase (ATP-hydrolyzing) @@ -6058,7 +6062,7 @@ 6.1.2.2 Nebramycin 5' synthase 6.1.3.1 Olefin beta-lactone synthetase 6.2.1.1 Acetate--CoA ligase -6.2.1.2 Butyrate--CoA ligase +6.2.1.2 Medium-chain acyl-CoA ligase 6.2.1.3 Long-chain-fatty-acid--CoA ligase 6.2.1.4 Succinate--CoA ligase (GDP-forming) 6.2.1.5 Succinate--CoA ligase (ADP-forming) @@ -6237,3 +6241,52 @@ 6.5.1.8 3'-phosphate/5'-hydroxy nucleic acid ligase 6.6.1.1 Magnesium chelatase 6.6.1.2 Cobaltochelatase +7.1.1.1 Proton-translocating NAD(P)(+) transhydrogenase +7.1.1.2 NADH:ubiquinone reductase (H(+)-translocating) +7.1.1.3 Ubiquinol oxidase (H(+)-transporting) +7.1.1.4 Caldariellaquinol oxidase (H(+)-transporting) +7.1.1.5 Menaquinol oxidase (H(+)-transporting) +7.1.1.6 Plastoquinol--plastocyanin reductase +7.1.1.7 Ubiquinol oxidase (electrogenic, proton-motive force generating) +7.1.2.1 P-type H(+)-exporting transporter +7.1.2.2 H(+)-transporting two-sector ATPase +7.1.3.1 H(+)-exporting diphosphatase +7.2.1.1 NADH:ubiquinone reductase (Na(+)-transporting) +7.2.1.2 Ferredoxin--NAD(+) oxidoreductase (Na(+)-transporting) +7.2.1.3 Ascorbate ferrireductase (transmembrane) +7.2.2.1 Na(+)-transporting two-sector ATPase +7.2.2.2 ABC-type Cd(2+) transporter +7.2.2.3 P-type Na(+) transporter +7.2.2.4 ABC-type Na(+) transporter +7.2.2.5 ABC-type Mn(2+) transporter +7.2.2.6 P-type K(+) transporter +7.2.2.7 ABC-type Fe(3+) transporter +7.2.2.8 P-type Cu(+) transporter +7.2.4.1 Carboxybiotin decarboxylase +7.2.4.2 Oxaloacetate decarboxylase (Na(+) extruding) +7.2.4.3 (S)-methylmalonyl-CoA decarboxylase (sodium-transporting) +7.3.2.1 ABC-type phosphate transporter +7.3.2.2 ABC-type phosphonate transporter +7.3.2.3 ABC-type sulfate transporter +7.3.2.4 ABC-type nitrate transporter +7.3.2.5 ABC-type molybdate transporter +7.4.2.1 ABC-type polar-amino-acid transporter +7.4.2.2 ABC-type nonpolar-amino-acid transporter +7.4.2.3 Mitochondrial protein-transporting ATPase +7.4.2.4 Chloroplast protein-transporting ATPase +7.4.2.5 ABC-type protein transporter +7.4.2.6 ABC-type oligopeptide transporter +7.4.2.7 ABC-type alpha-factor-pheromone transporter +7.5.2.1 ABC-type maltose transporter +7.5.2.2 ABC-type oligosaccharide transporter +7.5.2.3 ABC-type beta-glucan transporter +7.5.2.4 ABC-type teichoic-acid transporter +7.5.2.5 ABC-type lipopolysaccharide transporter +7.5.2.6 ABC-type lipid A-core oligosaccharide transporter +7.6.2.1 P-type phospholipid transporter +7.6.2.2 ABC-type xenobiotic transporter +7.6.2.3 ABC-type glutathione-S-conjugate transporter +7.6.2.4 ABC-type fatty-acyl-CoA transporter +7.6.2.5 ABC-type heme transporter +7.6.2.6 ABC-type guanine transporter +7.6.2.7 ABC-type taurine transporter diff --git a/c++/src/objects/seqfeat/gc.inc b/c++/src/objects/seqfeat/gc.inc new file mode 100644 index 00000000..f533a038 --- /dev/null +++ b/c++/src/objects/seqfeat/gc.inc @@ -0,0 +1,208 @@ +/* $Id: gc.inc 581624 2019-03-04 16:47:19Z ivanov $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Author: Jonathan Kans et al. + * + * File Description: + * Built-in copy of gc.prt. + * + */ + +static const char* const s_GenCodeTblMemStr[] = { + "", + "Genetic-code-table ::= {", + " {", + " name \"Standard\" ,", + " name \"SGC0\" ,", + " id 1 ,", + " ncbieaa \"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"---M------**--*----M---------------M----------------------------\"", + " },", + " {", + " name \"Vertebrate Mitochondrial\" ,", + " name \"SGC1\" ,", + " id 2 ,", + " ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG\",", + " sncbieaa \"----------**--------------------MMMM----------**---M------------\"", + " },", + " {", + " name \"Yeast Mitochondrial\" ,", + " name \"SGC2\" ,", + " id 3 ,", + " ncbieaa \"FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"----------**----------------------MM---------------M------------\"", + " },", + " {", + " name \"Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate", + " Mitochondrial; Mycoplasma; Spiroplasma\" ,", + " name \"SGC3\" ,", + " id 4 ,", + " ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"--MM------**-------M------------MMMM---------------M------------\"", + " },", + " {", + " name \"Invertebrate Mitochondrial\" ,", + " name \"SGC4\" ,", + " id 5 ,", + " ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG\",", + " sncbieaa \"---M------**--------------------MMMM---------------M------------\"", + " },", + " {", + " name \"Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear\" ,", + " name \"SGC5\" ,", + " id 6 ,", + " ncbieaa \"FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"--------------*--------------------M----------------------------\"", + " },", + " {", + " name \"Echinoderm Mitochondrial; Flatworm Mitochondrial\" ,", + " name \"SGC8\" ,", + " id 9 ,", + " ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG\",", + " sncbieaa \"----------**-----------------------M---------------M------------\"", + " },", + " {", + " name \"Euplotid Nuclear\" ,", + " name \"SGC9\" ,", + " id 10 ,", + " ncbieaa \"FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"----------**-----------------------M----------------------------\"", + " },", + " {", + " name \"Bacterial, Archaeal and Plant Plastid\" ,", + " id 11 ,", + " ncbieaa \"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"---M------**--*----M------------MMMM---------------M------------\"", + " },", + " {", + " name \"Alternative Yeast Nuclear\" ,", + " id 12 ,", + " ncbieaa \"FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"----------**--*----M---------------M----------------------------\"", + " },", + " {", + " name \"Ascidian Mitochondrial\" ,", + " id 13 ,", + " ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG\",", + " sncbieaa \"---M------**----------------------MM---------------M------------\"", + " },", + " {", + " name \"Alternative Flatworm Mitochondrial\" ,", + " id 14 ,", + " ncbieaa \"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG\",", + " sncbieaa \"-----------*-----------------------M----------------------------\"", + " } ,", + " {", + " name \"Blepharisma Macronuclear\" ,", + " id 15 ,", + " ncbieaa \"FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"----------*---*--------------------M----------------------------\"", + " } ,", + " {", + " name \"Chlorophycean Mitochondrial\" ,", + " id 16 ,", + " ncbieaa \"FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"----------*---*--------------------M----------------------------\"", + " } ,", + " {", + " name \"Trematode Mitochondrial\" ,", + " id 21 ,", + " ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG\",", + " sncbieaa \"----------**-----------------------M---------------M------------\"", + " } ,", + " {", + " name \"Scenedesmus obliquus Mitochondrial\" ,", + " id 22 ,", + " ncbieaa \"FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"------*---*---*--------------------M----------------------------\"", + " } ,", + " {", + " name \"Thraustochytrium Mitochondrial\" ,", + " id 23 ,", + " ncbieaa \"FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"--*-------**--*-----------------M--M---------------M------------\"", + " } ,", + " {", + " name \"Pterobranchia Mitochondrial\" ,", + " id 24 ,", + " ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG\",", + " sncbieaa \"---M------**-------M---------------M---------------M------------\"", + " } ,", + " {", + " name \"Candidate Division SR1 and Gracilibacteria\" ,", + " id 25 ,", + " ncbieaa \"FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"---M------**-----------------------M---------------M------------\"", + " } ,", + " {", + " name \"Pachysolen tannophilus Nuclear\" ,", + " id 26 ,", + " ncbieaa \"FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"----------**--*----M---------------M----------------------------\"", + " } ,", + " {", + " name \"Karyorelict Nuclear\" ,", + " id 27 ,", + " ncbieaa \"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"--------------*--------------------M----------------------------\"", + " } ,", + " {", + " name \"Condylostoma Nuclear\" ,", + " id 28 ,", + " ncbieaa \"FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"----------**--*--------------------M----------------------------\"", + " } ,", + " {", + " name \"Mesodinium Nuclear\" ,", + " id 29 ,", + " ncbieaa \"FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"--------------*--------------------M----------------------------\"", + " } ,", + " {", + " name \"Peritrich Nuclear\" ,", + " id 30 ,", + " ncbieaa \"FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"--------------*--------------------M----------------------------\"", + " } ,", + " {", + " name \"Blastocrithidia Nuclear\" ,", + " id 31 ,", + " ncbieaa \"FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"----------**-----------------------M----------------------------\"", + " } ,", + " {", + " name \"Balanophoraceae Plastid\" ,", + " id 32 ,", + " ncbieaa \"FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG\",", + " sncbieaa \"---M------*---*----M------------MMMM---------------M------------\"", + " } ,", + " {", + " name \"Cephalodiscidae Mitochondrial\" ,", + " id 33 ,", + " ncbieaa \"FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG\",", + " sncbieaa \"---M-------*-------M---------------M---------------M------------\"", + " }", + "}" +}; diff --git a/c++/src/objects/seqfeat/gc.prt b/c++/src/objects/seqfeat/gc.prt new file mode 100644 index 00000000..cf9a30a8 --- /dev/null +++ b/c++/src/objects/seqfeat/gc.prt @@ -0,0 +1,354 @@ +--************************************************************************** +-- This is the NCBI genetic code table +-- Initial base data set from Andrzej Elzanowski while at PIR International +-- Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI +-- Base 1-3 of each codon have been added as comments to facilitate +-- readability at the suggestion of Peter Rice, EMBL +-- Later additions by Taxonomy Group staff at NCBI +-- +-- Version 4.5 +-- Added Cephalodiscidae mitochondrial genetic code 33 +-- +-- Version 4.4 +-- Added GTG as start codon for genetic code 3 +-- Added Balanophoraceae plastid genetic code 32 +-- +-- Version 4.3 +-- Change to CTG -> Leu in genetic codes 27, 28, 29, 30 +-- +-- Version 4.2 +-- Added Karyorelict nuclear genetic code 27 +-- Added Condylostoma nuclear genetic code 28 +-- Added Mesodinium nuclear genetic code 29 +-- Added Peritrich nuclear genetic code 30 +-- Added Blastocrithidia nuclear genetic code 31 +-- +-- Version 4.1 +-- Added Pachysolen tannophilus nuclear genetic code 26 +-- +-- Version 4.0 +-- Updated version to reflect numerous undocumented changes: +-- Corrected start codons for genetic code 25 +-- Name of new genetic code is Candidate Division SR1 and Gracilibacteria +-- Added candidate division SR1 nuclear genetic code 25 +-- Added GTG as start codon for genetic code 24 +-- Corrected Pterobranchia Mitochondrial genetic code (24) +-- Added genetic code 24, Pterobranchia Mitochondrial +-- Genetic code 11 is now Bacterial, Archaeal and Plant Plastid +-- Fixed capitalization of mitochondrial in codes 22 and 23 +-- Added GTG, ATA, and TTG as alternative start codons to code 13 +-- +-- Version 3.9 +-- Code 14 differs from code 9 only by translating UAA to Tyr rather than +-- STOP. A recent study (Telford et al, 2000) has found no evidence that +-- the codon UAA codes for Tyr in the flatworms, but other opinions exist. +-- There are very few GenBank records that are translated with code 14, +-- but a test translation shows that retranslating these records with code +-- 9 can cause premature terminations. Therefore, GenBank will maintain +-- code 14 until further information becomes available. +-- +-- Version 3.8 +-- Added GTG start to Echinoderm mitochondrial code, code 9 +-- +-- Version 3.7 +-- Added code 23 Thraustochytrium mitochondrial code +-- formerly OGMP code 93 +-- submitted by Gertraude Berger, Ph.D. +-- +-- Version 3.6 +-- Added code 22 TAG-Leu, TCA-stop +-- found in mitochondrial DNA of Scenedesmus obliquus +-- submitted by Gertraude Berger, Ph.D. +-- Organelle Genome Megasequencing Program, Univ Montreal +-- +-- Version 3.5 +-- Added code 21, Trematode Mitochondrial +-- (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990) +-- Added code 16, Chlorophycean Mitochondrial +-- (TAG can translated to Leucine instaed to STOP in chlorophyceans +-- and fungi) +-- +-- Version 3.4 +-- Added CTG,TTG as allowed alternate start codons in Standard code. +-- Prats et al. 1989, Hann et al. 1992 +-- +-- Version 3.3 - 10/13/95 +-- Added alternate intiation codon ATC to code 5 +-- based on complete mitochondrial genome of honeybee +-- Crozier and Crozier (1993) +-- +-- Version 3.2 - 6/24/95 +-- Code Comments +-- 10 Alternative Ciliate Macronuclear renamed to Euplotid Macro... +-- 15 Blepharisma Macro.. code added +-- 5 Invertebrate Mito.. GTG allowed as alternate initiator +-- 11 Eubacterial renamed to Bacterial as most alternate starts +-- have been found in Archea +-- +-- +-- Version 3.1 - 1995 +-- Updated as per Andrzej Elzanowski at NCBI +-- Complete documentation in NCBI toolkit documentation +-- Note: 2 genetic codes have been deleted +-- +-- Old id Use id - Notes +-- +-- id 7 id 4 - Kinetoplast code now merged in code id 4 +-- id 8 id 1 - all plant chloroplast differences due to RNA edit +-- +--************************************************************************* + +Genetic-code-table ::= { + { + name "Standard" , + name "SGC0" , + id 1 , + ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "---M------**--*----M---------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Vertebrate Mitochondrial" , + name "SGC1" , + id 2 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", + sncbieaa "----------**--------------------MMMM----------**---M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Yeast Mitochondrial" , + name "SGC2" , + id 3 , + ncbieaa "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**----------------------MM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate + Mitochondrial; Mycoplasma; Spiroplasma" , + name "SGC3" , + id 4 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--MM------**-------M------------MMMM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Invertebrate Mitochondrial" , + name "SGC4" , + id 5 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", + sncbieaa "---M------**--------------------MMMM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" , + name "SGC5" , + id 6 , + ncbieaa "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--------------*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Echinoderm Mitochondrial; Flatworm Mitochondrial" , + name "SGC8" , + id 9 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", + sncbieaa "----------**-----------------------M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Euplotid Nuclear" , + name "SGC9" , + id 10 , + ncbieaa "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**-----------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Bacterial, Archaeal and Plant Plastid" , + id 11 , + ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "---M------**--*----M------------MMMM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Alternative Yeast Nuclear" , + id 12 , + ncbieaa "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**--*----M---------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Ascidian Mitochondrial" , + id 13 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", + sncbieaa "---M------**----------------------MM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Alternative Flatworm Mitochondrial" , + id 14 , + ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", + sncbieaa "-----------*-----------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Blepharisma Macronuclear" , + id 15 , + ncbieaa "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------*---*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Chlorophycean Mitochondrial" , + id 16 , + ncbieaa "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------*---*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Trematode Mitochondrial" , + id 21 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", + sncbieaa "----------**-----------------------M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Scenedesmus obliquus Mitochondrial" , + id 22 , + ncbieaa "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "------*---*---*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Thraustochytrium Mitochondrial" , + id 23 , + ncbieaa "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--*-------**--*-----------------M--M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Pterobranchia Mitochondrial" , + id 24 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", + sncbieaa "---M------**-------M---------------M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Candidate Division SR1 and Gracilibacteria" , + id 25 , + ncbieaa "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "---M------**-----------------------M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Pachysolen tannophilus Nuclear" , + id 26 , + ncbieaa "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**--*----M---------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Karyorelict Nuclear" , + id 27 , + ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--------------*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Condylostoma Nuclear" , + id 28 , + ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**--*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Mesodinium Nuclear" , + id 29 , + ncbieaa "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--------------*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Peritrich Nuclear" , + id 30 , + ncbieaa "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--------------*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Blastocrithidia Nuclear" , + id 31 , + ncbieaa "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**-----------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Balanophoraceae Plastid" , + id 32 , + ncbieaa "FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "---M------*---*----M------------MMMM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Cephalodiscidae Mitochondrial" , + id 33 , + ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", + sncbieaa "---M-------*-------M---------------M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } +} diff --git a/c++/src/objects/seqfeat/institution_codes.inc b/c++/src/objects/seqfeat/institution_codes.inc index e729a236..98b741cd 100644 --- a/c++/src/objects/seqfeat/institution_codes.inc +++ b/c++/src/objects/seqfeat/institution_codes.inc @@ -1,4 +1,4 @@ -/* $Id: institution_codes.inc 575197 2018-11-26 16:14:43Z ivanov $ +/* $Id: institution_codes.inc 581627 2019-03-04 16:48:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -31,6 +31,7 @@ */ static const char* const kInstitutionCollectionCodeList[] = { +"# $Id: institution_codes.inc 581627 2019-03-04 16:48:48Z ivanov $", "A\ts\tArnold Arboretum, Harvard University\t\t\t", "AA\ts\tMinistry of Science, Academy of Sciences\t\t\t", "AAC\tc\tArignar Anna College\t\t\t", @@ -94,6 +95,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "ACNS\ts\tAgriculture Canada Nova Scotia\t\t\t", "ACOI\tc\tCoimbra Collection of Algae\t\t\t", "ACOR\ts\tUniversidad Nacional de Cordoba\t\t\t", +"ACSSI\tc\tAlgal Collection of Soil Science Institute\t\t\t", "ACTC\ts\tAustin College\t\t\t", "ACU\ts\tAbilene Christian University\t\t\t", "ACUNHC\ts\tAbilene Christian University, Natural History Collection\t\t\t", @@ -232,6 +234,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "ANDES\ts\tUniversidad de Los Andes\t\t\t", "ANDES:A\ts\tUniversidad de Los Andes, Amphibian Collection\t\t", "ANDES:O\ts\tUniversidad de Los Andes, Ornithology Collection\t\t", +"ANDES:T\tsb\tUniversidad de Los Andes, Tissue Collection\t", "ANES\ts\tAnadolu University, Biology Department\t\t\t", "ANFC\ts\tAustralian National Fish Collection\t\t\t", "ANFM\ts\tAssociazione Naturalisti Forlivesi Pro Museo\t\t\t", @@ -260,6 +263,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "ANUG\ts\tAnhui Normal University, Geography Department\t\t\t", "ANWC\ts\tAustralian National Wildlife Collection\t\t\t", "AO\ts\tMuseo Regionale di Scienze Naturali della Valle d'Aosta\t\t\t", +"AORI\ts\tAtmosphere and Ocean Research Institute\t\t\t", "APCC\tc\tAntarctic Protistan Culture Collection (Woods Hole Oceanographic Institution)\t\t\t", "APCR\ts\tArkansas Tech University, Biological Sciences Department\t\t\t", "APEI\ts\tAgriculture Canada Research Station\t\t\t", @@ -392,7 +396,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "BAI\ts\tInstituto Forestal Nacional (IFONA), Centro Forestal Castelar\t\t\t", "BAIL\ts\tConservatoire Botanique National de Bailleul\t\t\t", "BAJ\ts\tInstituto Municipal de Botanica, Parque Pte. Dr. Nicolas Avellaneda\t\t\t", -"BAK\ts\tAcademy of Sciences of Azerbaijan\t\t\t", +"BAK\ts\tAzerbaijan National Academy of Sciences (ANAS)\t\t\t", "BAL\ts\tINTA, EEA Balcarce, Catedra de Botanica Agricola\t\t\t", "BALT\ts\tTowson University, Department of Biological Sciences\t\t\t", "BAMU\ts\tDr. Babasaheb Ambedkar Marathwada University\t\t\t", @@ -740,7 +744,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "BU\ts\tBrock University\t\t\t", "BUA\ts\tUniversity of Baghdad, Plant Protection Department\t\t\t", "BUAG\ts\tUniversity of Agronomical Sciences and Veterinary Medicine, Botany and Plant Physiology Department\t\t\t", -"BUC\ts\tUniversitatea din Bucuresti\t\t\t", +"BUC\ts\tGradina Botanica D. Brandza\t\t\t", "BUCA\ts\tInstitute of Biology, Romanian Academy\t\t\t", "BUCF\ts\tForest Research and Management Institute\t\t\t", "BUCM\ts\tInstitute of Biology Bucharest, Romanian Academy (Mycology Herbarium)\t\t\t", @@ -818,6 +822,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "CANL\ts\tCanadian Museum of Nature, Lichenology Section\t\t\t", "CANM\ts\tCanadian Museum of Nature, Bryology Section\t\t\t", "CANT\ts\tSouth China Agricultural University, Forestry Department\t\t\t", +"CANTE\tsb\tJardin Botanico El Charco del Ingenio\t\t\t", "CANTY\ts\tCanterbury Museum\t\t\t", "CANU\ts\tUniversity of Canterbury, Department of Plant and Microbial Sciences\t\t\t", "CAPM\tc\tCollection of Animal Pathogenic Microorganisms\t\t\t", @@ -1029,7 +1034,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "CFUA\ts\tUniversidad Austral de Chile\t\t\t", "CG\tc\tEmbrapa Collection of Fungi of Invertebrates\t\t\t", "CGC\tb\tCaenorhabditis Genetics Center\t\t\t", -"CGE\ts\tUniversity of Cambridge, Department of Plant Sciences\t\t\t", +"CGE\ts\tCambridge University, Department of Plant Sciences\t\t\t", "CGEC\ts\tChina Entomological Research Institute\t\t\t", "CGG\ts\tCambridge University Botanic Garden\t\t\t", "CGH\ts\tNational Museum of Prague\t\t\t", @@ -2123,13 +2128,13 @@ static const char* const kInstitutionCollectionCodeList[] = { "GAM\tc\tGrupo Actinomicetales Merida Facultad de Medicina\t\t\t", "GAP\ts\tConservatoire Botanique National Alpin\t\t\t", "GAS\ts\tGeorgia Southern University, Department of Biology\t\t\t", -"GAT\ts\tInstitute of Plant Genetics and Crop Plant Research\t\t\t", +"GAT\ts\tLeibniz Institute of Plant Genetics and Crop Plant Research (IPK)\t\t\t", "GAUA\ts\tGuangxi University\t\t\t", "GAUBA\ts\tAustralian National University, Division of Botany and Zoology\t\t\t", "GAUF\ts\tGansu Agricultural University\t\t\t", "GAW\ts\tEastern Botanical Society of Glasgow\t\t\t", "GAZI\ts\tGazi Ueniversitesi, Biyoloji Boeluemue\t\t\t", -"GB\ts\tGoeteborg University, Department of Plant and Environmental Sciences\t\t\t", +"GB\ts\tGoteborg University, Department of Plant and Environmental Sciences\t\t\t", "GBFM\ts\tUniversidad de Panama\t\t\t", "GBG\tb\tGotheburg Botanical Garden\t\t\t", "GBH\ts\tHerbarium of Geo. B. Hinton\t\t\t", @@ -2322,9 +2327,10 @@ static const char* const kInstitutionCollectionCodeList[] = { "GXNM\ts\tGuangxi Natural History Museum, Herbarium\t\t\t", "GXSP\ts\tGuangxi School of Pharmacy\t\t\t", "GZAC\ts\tGuizhou Agricultural College, College of Life Science\t\t\t", +"GZCC\tc\tGuizhou Culture Collection\t\t\t", "GZM\ts\tGiessener Zoologisches Museum\t\t\t", "GZTM\ts\tGuizhou Institute of Traditional Chinese Medicine\t\t\t", -"GZU\ts\tKarl-Franzens-Universitaet Graz\t\t\t", +"GZU\ts\tKarl-Franzens-Universitat Graz\t\t\t", "GZUH\ts\tGuizhou University Herbarium\t\t\t", "H\ts\tUniversity of Helsinki\t\t\t", "H-GSP\ts\tHoward University-Geological Survey of Pakistan Project\t\t\t", @@ -2343,7 +2349,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "HAJU\ts\tHerbario Dr. Armando Jesus Urquiola\t\t\t", "HAK\ts\tHokkaido University, Faculty of Fisheries\t\t\t", "HAKS\ts\tHakgala Botanic Gardens\t\t\t", -"HAL\ts\tMartin-Luther-Universitaet\t\t\t", +"HAL\ts\tMartin-Luther-Universitat\t\t\t", "HALA\ts\tUniversity of Alabama, Biological Sciences Department\t\t\t", "HALE\ts\tHaleakala National Park\t\t\t", "HALLE\ts\tZoologisches Institut der Martin-Luther Universitaet\t\t\t", @@ -2727,7 +2733,9 @@ static const char* const kInstitutionCollectionCodeList[] = { "IAUM\ts\tIslamic Azad University of Mashhad\t\t\t", "IAUNT\ts\tIslamica Azad University, North Tehran Branch\t\t\t", "IAV\tsb\tInstitut Agronomique et Veterinaire Hassan II, Departement d'Ecologie Vegetale\t\t\t", -"IAVH\ts\tInstituto de Ivestigacion de los Recursos Biologicos Alexander von Humboldt\t\t\t", +"IAVH\tsb\tInstituto de Ivestigacion de los Recursos Biologicos Alexander von Humboldt\t\t\t", +"IAVH:A\tsb\tInstituto de Ivestigacion de los Recursos Biologicos Alexander von Humboldt, Ornithological Collection\t", +"IAVH:CT\tsb\tInstituto de Ivestigacion de los Recursos Biologicos Alexander von Humboldt, Tissue Collection\t", "IB\ts\tUniversitat Innsbruck\t\t\t", "IBA\ts\tInstituto Asturiano de Taxonomia y Ecologia Vegetal\t\t\t", "IBA\tc\tCollection of Microorganisms Producing Antibiotics\t\t\t", @@ -2798,7 +2806,8 @@ static const char* const kInstitutionCollectionCodeList[] = { "ICM\ts\tInstituto de Ciencias del Mar\tICMD\t\t", "ICMP\tc\tInternational Collection of Microorganisms from Plants\tPDDCC \thttp://scd.landcareresearch.co.nz/Specimen/ICMP_&spec;\t", "ICN\ts\tUniversidade Federal do Rio Grande do Sul, Departamento de Botanica\t\t\t", -"ICN\ts\tInstituto de Ciencias Naturales, Museo de Historia Natural\t\t\t", +"ICN\tsb\tInstituto de Ciencias Naturales, Museo de Historia Natural\t\t\t", +"ICN:Aves\tsb\tInstituto de Ciencias Naturales, Museo de Historia Natural, Ornithological collection\t", "ICP\ts\tIslamia College, University of Peshawar, Botany Department\t\t\t", "ICPB\tc\tInternational Collection of Phytopathogenic Bacteria\t\t\t", "ICPPB\ts\tInternational Collection of Plant Pathogenic Bacteria\t\t\t", @@ -2813,6 +2822,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "ICVI\ts\tThe Volcani Center\t\t\t", "ID\ts\tUniversity of Idaho, Biological Sciences Department\t\t\t", "IDAC\tcb\tThe International Depositary Authority of Canada\t\t\t", +"IDB\tsb\tKoltzov Institute of Developmental Biology Russian Academy of Sciences\t\t\t", "IDEA\ts\tInstituto de Agronomia\t\t\t", "IDF\ts\tUniversity of Idaho\t\t\t", "IDS\ts\tIdaho State University, Biological Sciences Department\t\t\t", @@ -3838,7 +3848,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "MAIC\ts\tMediterranean Agronomic Institute of Chania, Department of Natural Products\t\t\t", "MAINE\ts\tUniversity of Maine, Department of Biological Sciences\t\t\t", "MAIS\ts\tInstitut d'Elevage et de Medecine Veterinaire des Pays Tropicaux, Departement de Botanique\t\t\t", -"MAK\ts\tTokyo Metropolitan University\t\t\t", +"MAK\ts\tTokyo Metropolitan University, Makino Herbarium\t\t\t", "MAKAR\ts\tInstitut Planina i More\t\t\t", "MAKFUNGI\ts\tMacedonian Collection of Fungi,Institute of Biology\tFungi Macedonici\t\t", "MAL\ts\tBotanic Gardens of Malawi\t\t\t", @@ -3851,7 +3861,6 @@ static const char* const kInstitutionCollectionCodeList[] = { "MAND\ts\tAgricultural College and Research Institute\t\t\t", "MANK\ts\tMinnesota State University-Mankato, Department of Biological Sciences\t\t\t", "MAO\tc\tMircen Afrique Ouest\t\t\t", -"MAPA\ts\tMuseu Anchieta Porto Alegra\t\t\t", "MAPR\ts\tUniversity of Puerto Rico, Mayagueez Campus, Biology Department\t\t\t", "MAR\tc\tGrasslands Rhizobium Collection\t\t\t", "MARDI\ts\tMalaysian Agricultural Research and Development Institute\tMDI\t\t", @@ -4038,7 +4047,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "MG\ts\tMuseum of Zoology\t\t\t", "MGA\ts\tInstituto Pedagogico de Varones\t\t\t", "MGAB\ts\tMuzeul de Istorie Naturala \"Grigore Antipa\"\t\t\t", -"MGAP\ts\tMuseu Anchieta\t\t\t", +"MGAP\ts\tMuseu Anchieta\tMAPA\t\t", "MGB\ts\tMuseo de Geologia (del Seminario Diocesano) de Barcelona\t\t\t", "MGC\ts\tUniversidad de Malaga, Departamento de Biologia Vegetal\t\t\t", "MGDL\ts\tMuseum d'Histoire Naturalle du Grand-Duchy de Luxembourg\t\t\t", @@ -4409,7 +4418,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "MSPP\tc\tMycology Section, Plant Pathology and Microbiology Division, Department of Agricultural Science\t\t\t", "MSSC\ts\tMidwestern State University\t\t\t", "MSTFM\ts\tMiddle School of the Third Factory Machinery\t\t\t", -"MSTR\ts\tWestfaelisches Museum fuer Naturkunde\t\t\t", +"MSTR\ts\tWestfalisches Museum fur Naturkunde\t\t\t", "MSU\tc\tAcetobacter\t\t\t", "MSU\ts\tMichigan State University Museum\tMSUC,MSUM\t\t", "MSUD\ts\tI. I. Mecynikov State University of Odessa, Department of Morphology and Systematics of Plants\t\t\t", @@ -4441,7 +4450,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "MTSU\ts\tMiddle Tennessee State University, Biology Department\t\t\t", "MTUF\ts\tUniversity Museum, Tokyo University of Fisheries\t\t\t", "MU\tc\tMugla Sitki Kocman University\t\t\t", -"MU\ts\tMiami University, Botany Department, Willard Sherman Turrell Herbarium\t\t\t", +"MU\ts\tMiami University, Botany Department, Willard Sherman Turrell Herbarium\t\thttp://herbarium.muohio.edu/herbariummu/\t", "MU\ts\tMidwestern University\t\t\t", "MUACC\tc\tMurdoch University Algal Culture Collection\t\t\t", "MUAF\tc\tCulture collection of Mendel University of Agriculture and Forestry in Brno\t\t\t", @@ -6192,6 +6201,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "SMR\ts\tSamara State University, Department for Ecology, Botany, and Nature Protection\t\t\t", "SMRG\tc\tSoil Microbiology Research Group, Division of Soil Science, Department of Agriculture\t\t\t", "SMRS\ts\tStavropol Museum of Regional Studies\t\t\t", +"SMRU\ts\tShoklo Malaria Research Unit\t\t\t", "SMS\ts\tMissouri State University, Department of Biology\t\t\t", "SMTP\ts\tSwedish Malaise Trap Project\t\t\t", "SMTWA\tc\tSchool of Medical Technology Western Australia\t\t\t", @@ -6349,7 +6359,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "STPE\ts\tFlorida Marine Research Institute, Florida Department of Environmental Protection\t\t\t", "STPH\ts\tDireccao Geral do Ambiente, Cabinet of Environment, Ministry of Natural Resources and Environment\t\t\t", "STPS\ts\tSt. Paul's School\t\t\t", -"STR\ts\tInstitut de Botanique\t\t\t", +"STR\ts\tUniversity of Strasbourg\t\t\t", "STRI\tsc\tSmithsonian Tropical Research Institute\tSCZ\t\t", "STRI:ICBG-Panama\tc\tSmithsonian Tropical Research Institute, International Cooperative Biodiversity Groups\t\t", "STS\ts\tStromness Museum\t\t\t", @@ -6690,7 +6700,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "TU:Invertebrates\ts\tTulane University, Museum of Natural History, Invertebrate Collection\t\t", "TU:Mammals\ts\tTulane University, Museum of Natural History, Mammal Collection\t\t", "TUAT\ts\tTokyo University of Agriculture\t\t\t", -"TUB\ts\tEberhard-Karls-Universitaet Tuebingen, Institut fuer Biologie I\t\t\t", +"TUB\ts\tEberhard-Karls-Universitat Tubingen, Institut fur Biologie I\t\t\t", "TUBSB\tb\tTohoku University Brassica Seed Bank\t\t\t", "TUC\ts\tUniversity of Arizona, Ecology and Evolutionary Biology Department\t\t\t", "TUCH\ts\tTribhuvan University, Central Department of Botany\t\t\t", @@ -6755,6 +6765,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "UAM:Fish\ts\tUniversity of Alaska, Museum of the North, Fish Collection\t", "UAM:Herb\ts\tUniversity of Alaska, Museum of the North, UAM Herbarium\t\t", "UAM:Herp\ts\tUniversity of Alaska, Museum of the North, Amphibian and Reptile Collection\t", +"UAM:Inv\ts\tUniversity of Alaska, Museum of the North, Invertebrate Collection\t", "UAM:Mamm\ts\tUniversity of Alaska, Museum of the North, Mammal Collection\t", "UAM:Moll\ts\tUniversity of Alaska, Museum of the North, Mollusc Collection\t\t", "UAM:Paleo\ts\tUniversity of Alaska, Museum of the North, paleontology collection\t\t", @@ -6900,7 +6911,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "UFRJ:POR\ts\tDepartramento de Zoologia, Universidade Federal do Rio de Janeiro, Porifera collection\t\t", "UFRJIM\tc\tDepartamento de Microbiologia Medica\t\t\t", "UFRN\ts\tUniversidade Federal do Rio Grande do Norte\t\t\t", -"UFRN:Fungos\ts\tUniversidade Federal do Rio Grande do Norte, Fungal collection\t\t", +"UFRN:Fungos\ts\tUniversidade Federal do Rio Grande do Norte, Fungal collection\t", "UFRR\ts\tUniversidade Federal de Roraima\t\t\t", "UFS\ts\tNyabyeya Forestry College, Department of Environmental Forestry\t\t\t", "UFSC\ts\tUniversidade Federal de Santa Catarina\t\t\t", @@ -7202,6 +7213,8 @@ static const char* const kInstitutionCollectionCodeList[] = { "USNM:ENT\ts\tNational Museum of Natural History, Smithsonian Institution, Entomology Collection\t", "USNM:FISH\ts\tNational Museum of Natural History, Smithsonian Institution, National Fish Collection\t", "USNM:Herp\ts\tNational Museum of Natural History, Smithsonian Institution, Division of Amphibians and Reptiles\t", +"USNM:Herp Image\ts\tNational Museum of Natural History, Smithsonian Institution, Herpetology image collection\t", +"USNM:Herp Tissue\ts\tNational Museum of Natural History, Smithsonian Institution, Herpetology tissue collection\t", "USNM:IZ\ts\tNational Museum of Natural History, Smithsonian Institution, Department of Invertebrate Zoology\t", "USNM:LAB\tb\tNational Museum of Natural History, Smithsonian Institution, Laboratories of Analytical Biology\t\t", "USNM:MAMM\ts\tNational Museum of Natural History, Smithsonian Institution, Division of Mammals\t", @@ -7343,7 +7356,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "VDAM\ts\tInstitute of Plant Science\t\t\t", "VDB\ts\tVanderbilt University, Department of Biological Sciences\t\t\t", "VECTOR\tsc\tState Research Center of Virology and Biotechnology\t\t\t", -"VEN\ts\tFundacion Instituto Botanico de Venezuela Dr. Tobias Lasser\t\t\t", +"VEN\ts\tHerbario Nacional de Venezuela, Instituto Experimental Jardin Botanico Dr. Tobias Lasser\t\t\t", "VENDA\ts\tThohoyandou Botanical Gardens, Department of Agriculture, Land & Environment\t\t\t", "VER\ts\tHerbario, Museo Civico di Storia Naturale, Verona\t\t\t", "VETMED\ts\tUniversity of Veterinary Medicine\t\t\t", @@ -7596,7 +7609,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "WTU\ts\tUniversity of Washington\t\t\t", "WTU:F\ts\tUniversity of Washington, Fungal Collection\t", "WTUH\ts\tUniversity of Washington Botanic Gardens, College of Forest Resources\t\t\t", -"WU\ts\tUniversitaet Wien\t\t\t", +"WU\ts\tUniversitat Wien\t\t\t", "WU\ts\tWayland University\t\t\t", "WUD\ts\tWayne State University, Biological Sciences Department\t\t\t", "WUH\ts\tWuhu School of Traditional Chinese Medicine\t\t\t", @@ -7711,7 +7724,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "YUTO\ts\tYork University, Biology Department\t\t\t", "YXDC\ts\tYuxi District Institute for Drug Control\t\t\t", "YZU\ts\tYuzhou University\t\t\t", -"Z\ts\tUniversitaet Zuerich\t\t\t", +"Z\tsb\tInstitut fur Systematische Botanik Universitat Zurich\t\t\t", "ZA\ts\tUniversity of Zagreb, Botany Department\tZAHO\t\t", "ZAD\ts\tMount Makulu Research Station\t\t\t", "ZAGR\ts\tUniversity of Zagreb\t\t\t", diff --git a/c++/src/objects/seqfeat/institution_codes.txt b/c++/src/objects/seqfeat/institution_codes.txt index 466b84c7..fa6c61d9 100644 --- a/c++/src/objects/seqfeat/institution_codes.txt +++ b/c++/src/objects/seqfeat/institution_codes.txt @@ -1,3 +1,4 @@ +# $Id: institution_codes.txt 581627 2019-03-04 16:48:48Z ivanov $ A s Arnold Arboretum, Harvard University AA s Ministry of Science, Academy of Sciences AAC c Arignar Anna College @@ -61,6 +62,7 @@ ACNB s Agriculture Canada Research Station ACNS s Agriculture Canada Nova Scotia ACOI c Coimbra Collection of Algae ACOR s Universidad Nacional de Cordoba +ACSSI c Algal Collection of Soil Science Institute ACTC s Austin College ACU s Abilene Christian University ACUNHC s Abilene Christian University, Natural History Collection @@ -199,6 +201,7 @@ ANDA s Andalas University ANDES s Universidad de Los Andes ANDES:A s Universidad de Los Andes, Amphibian Collection ANDES:O s Universidad de Los Andes, Ornithology Collection +ANDES:T sb Universidad de Los Andes, Tissue Collection ANES s Anadolu University, Biology Department ANFC s Australian National Fish Collection ANFM s Associazione Naturalisti Forlivesi Pro Museo @@ -227,6 +230,7 @@ ANUC s Australian National University, Chemistry Department ANUG s Anhui Normal University, Geography Department ANWC s Australian National Wildlife Collection AO s Museo Regionale di Scienze Naturali della Valle d'Aosta +AORI s Atmosphere and Ocean Research Institute APCC c Antarctic Protistan Culture Collection (Woods Hole Oceanographic Institution) APCR s Arkansas Tech University, Biological Sciences Department APEI s Agriculture Canada Research Station @@ -359,7 +363,7 @@ BAH s Biologische Anstalt Helgoland Marine Station BAI s Instituto Forestal Nacional (IFONA), Centro Forestal Castelar BAIL s Conservatoire Botanique National de Bailleul BAJ s Instituto Municipal de Botanica, Parque Pte. Dr. Nicolas Avellaneda -BAK s Academy of Sciences of Azerbaijan +BAK s Azerbaijan National Academy of Sciences (ANAS) BAL s INTA, EEA Balcarce, Catedra de Botanica Agricola BALT s Towson University, Department of Biological Sciences BAMU s Dr. Babasaheb Ambedkar Marathwada University @@ -707,7 +711,7 @@ BTU s Technische Universitaet Berlin BU s Brock University BUA s University of Baghdad, Plant Protection Department BUAG s University of Agronomical Sciences and Veterinary Medicine, Botany and Plant Physiology Department -BUC s Universitatea din Bucuresti +BUC s Gradina Botanica D. Brandza BUCA s Institute of Biology, Romanian Academy BUCF s Forest Research and Management Institute BUCM s Institute of Biology Bucharest, Romanian Academy (Mycology Herbarium) @@ -785,6 +789,7 @@ CANI s Canisius College, Biology Department CANL s Canadian Museum of Nature, Lichenology Section CANM s Canadian Museum of Nature, Bryology Section CANT s South China Agricultural University, Forestry Department +CANTE sb Jardin Botanico El Charco del Ingenio CANTY s Canterbury Museum CANU s University of Canterbury, Department of Plant and Microbial Sciences CAPM c Collection of Animal Pathogenic Microorganisms @@ -996,7 +1001,7 @@ CFSHB s North Coast Regional Botanic Gardens CFUA s Universidad Austral de Chile CG c Embrapa Collection of Fungi of Invertebrates CGC b Caenorhabditis Genetics Center -CGE s University of Cambridge, Department of Plant Sciences +CGE s Cambridge University, Department of Plant Sciences CGEC s China Entomological Research Institute CGG s Cambridge University Botanic Garden CGH s National Museum of Prague @@ -2090,13 +2095,13 @@ GAM s University of Georgia GAM c Grupo Actinomicetales Merida Facultad de Medicina GAP s Conservatoire Botanique National Alpin GAS s Georgia Southern University, Department of Biology -GAT s Institute of Plant Genetics and Crop Plant Research +GAT s Leibniz Institute of Plant Genetics and Crop Plant Research (IPK) GAUA s Guangxi University GAUBA s Australian National University, Division of Botany and Zoology GAUF s Gansu Agricultural University GAW s Eastern Botanical Society of Glasgow GAZI s Gazi Ueniversitesi, Biyoloji Boeluemue -GB s Goeteborg University, Department of Plant and Environmental Sciences +GB s Goteborg University, Department of Plant and Environmental Sciences GBFM s Universidad de Panama GBG b Gotheburg Botanical Garden GBH s Herbarium of Geo. B. Hinton @@ -2289,9 +2294,10 @@ GXMI s Guangxi Institute of Traditional Medical and Pharmaceutical Sciences GXNM s Guangxi Natural History Museum, Herbarium GXSP s Guangxi School of Pharmacy GZAC s Guizhou Agricultural College, College of Life Science +GZCC c Guizhou Culture Collection GZM s Giessener Zoologisches Museum GZTM s Guizhou Institute of Traditional Chinese Medicine -GZU s Karl-Franzens-Universitaet Graz +GZU s Karl-Franzens-Universitat Graz GZUH s Guizhou University Herbarium H s University of Helsinki H-GSP s Howard University-Geological Survey of Pakistan Project @@ -2310,7 +2316,7 @@ HAJB s Jardin Botanico Nacional HAJU s Herbario Dr. Armando Jesus Urquiola HAK s Hokkaido University, Faculty of Fisheries HAKS s Hakgala Botanic Gardens -HAL s Martin-Luther-Universitaet +HAL s Martin-Luther-Universitat HALA s University of Alabama, Biological Sciences Department HALE s Haleakala National Park HALLE s Zoologisches Institut der Martin-Luther Universitaet @@ -2694,7 +2700,9 @@ IAUH s Islamic Azad University (Avicenna Herbarium) IAUM s Islamic Azad University of Mashhad IAUNT s Islamica Azad University, North Tehran Branch IAV sb Institut Agronomique et Veterinaire Hassan II, Departement d'Ecologie Vegetale -IAVH s Instituto de Ivestigacion de los Recursos Biologicos Alexander von Humboldt +IAVH sb Instituto de Ivestigacion de los Recursos Biologicos Alexander von Humboldt +IAVH:A sb Instituto de Ivestigacion de los Recursos Biologicos Alexander von Humboldt, Ornithological Collection +IAVH:CT sb Instituto de Ivestigacion de los Recursos Biologicos Alexander von Humboldt, Tissue Collection IB s Universitat Innsbruck IBA s Instituto Asturiano de Taxonomia y Ecologia Vegetal IBA c Collection of Microorganisms Producing Antibiotics @@ -2765,7 +2773,8 @@ ICIS s Idaho Museum of Natural History ICM s Instituto de Ciencias del Mar ICMD ICMP c International Collection of Microorganisms from Plants PDDCC http://scd.landcareresearch.co.nz/Specimen/ICMP_&spec; ICN s Universidade Federal do Rio Grande do Sul, Departamento de Botanica -ICN s Instituto de Ciencias Naturales, Museo de Historia Natural +ICN sb Instituto de Ciencias Naturales, Museo de Historia Natural +ICN:Aves sb Instituto de Ciencias Naturales, Museo de Historia Natural, Ornithological collection ICP s Islamia College, University of Peshawar, Botany Department ICPB c International Collection of Phytopathogenic Bacteria ICPPB s International Collection of Plant Pathogenic Bacteria @@ -2780,6 +2789,7 @@ ICUI s University of Iowa ICVI s The Volcani Center ID s University of Idaho, Biological Sciences Department IDAC cb The International Depositary Authority of Canada +IDB sb Koltzov Institute of Developmental Biology Russian Academy of Sciences IDEA s Instituto de Agronomia IDF s University of Idaho IDS s Idaho State University, Biological Sciences Department @@ -3805,7 +3815,7 @@ MAIA s Instituto Nacional de Investigaciones Agrarias, Departamento de Ecologia MAIC s Mediterranean Agronomic Institute of Chania, Department of Natural Products MAINE s University of Maine, Department of Biological Sciences MAIS s Institut d'Elevage et de Medecine Veterinaire des Pays Tropicaux, Departement de Botanique -MAK s Tokyo Metropolitan University +MAK s Tokyo Metropolitan University, Makino Herbarium MAKAR s Institut Planina i More MAKFUNGI s Macedonian Collection of Fungi,Institute of Biology Fungi Macedonici MAL s Botanic Gardens of Malawi @@ -3818,7 +3828,6 @@ MANCH s University of Manchester MAND s Agricultural College and Research Institute MANK s Minnesota State University-Mankato, Department of Biological Sciences MAO c Mircen Afrique Ouest -MAPA s Museu Anchieta Porto Alegra MAPR s University of Puerto Rico, Mayagueez Campus, Biology Department MAR c Grasslands Rhizobium Collection MARDI s Malaysian Agricultural Research and Development Institute MDI @@ -4005,7 +4014,7 @@ MG s Museu Paraense Emilio Goeldi, Departamento de Botanica MG s Museum of Zoology MGA s Instituto Pedagogico de Varones MGAB s Muzeul de Istorie Naturala "Grigore Antipa" -MGAP s Museu Anchieta +MGAP s Museu Anchieta MAPA MGB s Museo de Geologia (del Seminario Diocesano) de Barcelona MGC s Universidad de Malaga, Departamento de Biologia Vegetal MGDL s Museum d'Histoire Naturalle du Grand-Duchy de Luxembourg @@ -4376,7 +4385,7 @@ MSPC s Museo di Storia Naturale "Pietro Calderini" MSPP c Mycology Section, Plant Pathology and Microbiology Division, Department of Agricultural Science MSSC s Midwestern State University MSTFM s Middle School of the Third Factory Machinery -MSTR s Westfaelisches Museum fuer Naturkunde +MSTR s Westfalisches Museum fur Naturkunde MSU c Acetobacter MSU s Michigan State University Museum MSUC,MSUM MSUD s I. I. Mecynikov State University of Odessa, Department of Morphology and Systematics of Plants @@ -4408,7 +4417,7 @@ MTSN s Trento Museum of Natural Sciences MTSU s Middle Tennessee State University, Biology Department MTUF s University Museum, Tokyo University of Fisheries MU c Mugla Sitki Kocman University -MU s Miami University, Botany Department, Willard Sherman Turrell Herbarium +MU s Miami University, Botany Department, Willard Sherman Turrell Herbarium http://herbarium.muohio.edu/herbariummu/ MU s Midwestern University MUACC c Murdoch University Algal Culture Collection MUAF c Culture collection of Mendel University of Agriculture and Forestry in Brno @@ -6159,6 +6168,7 @@ SMP s The State Museum of Pennsylvania SMR s Samara State University, Department for Ecology, Botany, and Nature Protection SMRG c Soil Microbiology Research Group, Division of Soil Science, Department of Agriculture SMRS s Stavropol Museum of Regional Studies +SMRU s Shoklo Malaria Research Unit SMS s Missouri State University, Department of Biology SMTP s Swedish Malaise Trap Project SMTWA c School of Medical Technology Western Australia @@ -6316,7 +6326,7 @@ STPCM s Island Museum, Candie Gardens STPE s Florida Marine Research Institute, Florida Department of Environmental Protection STPH s Direccao Geral do Ambiente, Cabinet of Environment, Ministry of Natural Resources and Environment STPS s St. Paul's School -STR s Institut de Botanique +STR s University of Strasbourg STRI sc Smithsonian Tropical Research Institute SCZ STRI:ICBG-Panama c Smithsonian Tropical Research Institute, International Cooperative Biodiversity Groups STS s Stromness Museum @@ -6657,7 +6667,7 @@ TU:Herptiles s Tulane University, Museum of Natural History, Amphibian A TU:Invertebrates s Tulane University, Museum of Natural History, Invertebrate Collection TU:Mammals s Tulane University, Museum of Natural History, Mammal Collection TUAT s Tokyo University of Agriculture -TUB s Eberhard-Karls-Universitaet Tuebingen, Institut fuer Biologie I +TUB s Eberhard-Karls-Universitat Tubingen, Institut fur Biologie I TUBSB b Tohoku University Brassica Seed Bank TUC s University of Arizona, Ecology and Evolutionary Biology Department TUCH s Tribhuvan University, Central Department of Botany @@ -6722,6 +6732,7 @@ UAM:ES s University of Alaska, Museum of the North, Earth Science UAM:Fish s University of Alaska, Museum of the North, Fish Collection UAM:Herb s University of Alaska, Museum of the North, UAM Herbarium UAM:Herp s University of Alaska, Museum of the North, Amphibian and Reptile Collection +UAM:Inv s University of Alaska, Museum of the North, Invertebrate Collection UAM:Mamm s University of Alaska, Museum of the North, Mammal Collection UAM:Moll s University of Alaska, Museum of the North, Mollusc Collection UAM:Paleo s University of Alaska, Museum of the North, paleontology collection @@ -6867,7 +6878,7 @@ UFRJ s Departramento de Zoologia, Universidade Federal do Rio de Janeiro UFRJ:POR s Departramento de Zoologia, Universidade Federal do Rio de Janeiro, Porifera collection UFRJIM c Departamento de Microbiologia Medica UFRN s Universidade Federal do Rio Grande do Norte -UFRN:Fungos s Universidade Federal do Rio Grande do Norte, Fungal collection +UFRN:Fungos s Universidade Federal do Rio Grande do Norte, Fungal collection UFRR s Universidade Federal de Roraima UFS s Nyabyeya Forestry College, Department of Environmental Forestry UFSC s Universidade Federal de Santa Catarina @@ -7169,6 +7180,8 @@ USNM:Birds s National Museum of Natural History, Smithsonian Institution, Divisi USNM:ENT s National Museum of Natural History, Smithsonian Institution, Entomology Collection USNM:FISH s National Museum of Natural History, Smithsonian Institution, National Fish Collection USNM:Herp s National Museum of Natural History, Smithsonian Institution, Division of Amphibians and Reptiles +USNM:Herp Image s National Museum of Natural History, Smithsonian Institution, Herpetology image collection +USNM:Herp Tissue s National Museum of Natural History, Smithsonian Institution, Herpetology tissue collection USNM:IZ s National Museum of Natural History, Smithsonian Institution, Department of Invertebrate Zoology USNM:LAB b National Museum of Natural History, Smithsonian Institution, Laboratories of Analytical Biology USNM:MAMM s National Museum of Natural History, Smithsonian Institution, Division of Mammals @@ -7310,7 +7323,7 @@ VDAC s Virginia Department of Agriculture and Consumer Services VDAM s Institute of Plant Science VDB s Vanderbilt University, Department of Biological Sciences VECTOR sc State Research Center of Virology and Biotechnology -VEN s Fundacion Instituto Botanico de Venezuela Dr. Tobias Lasser +VEN s Herbario Nacional de Venezuela, Instituto Experimental Jardin Botanico Dr. Tobias Lasser VENDA s Thohoyandou Botanical Gardens, Department of Agriculture, Land & Environment VER s Herbario, Museo Civico di Storia Naturale, Verona VETMED s University of Veterinary Medicine @@ -7563,7 +7576,7 @@ WTSU s West Texas A&M University, Natural History Collection WTU s University of Washington WTU:F s University of Washington, Fungal Collection WTUH s University of Washington Botanic Gardens, College of Forest Resources -WU s Universitaet Wien +WU s Universitat Wien WU s Wayland University WUD s Wayne State University, Biological Sciences Department WUH s Wuhu School of Traditional Chinese Medicine @@ -7678,7 +7691,7 @@ YUO s Youngstown State University, Biological Sciences Department YUTO s York University, Biology Department YXDC s Yuxi District Institute for Drug Control YZU s Yuzhou University -Z s Universitaet Zuerich +Z sb Institut fur Systematische Botanik Universitat Zurich ZA s University of Zagreb, Botany Department ZAHO ZAD s Mount Makulu Research Station ZAGR s University of Zagreb diff --git a/c++/src/objects/seqfeat/prepare_taxtable.sh b/c++/src/objects/seqfeat/prepare_taxtable.sh index fcbe44d3..e2679a8b 100644 --- a/c++/src/objects/seqfeat/prepare_taxtable.sh +++ b/c++/src/objects/seqfeat/prepare_taxtable.sh @@ -1,4 +1,5 @@ #! /bin/sh +cut -f 3,7 top900_orgn_nucl.txt > lineages.txt cut -f 3 top900_orgn_nucl.txt > taxids.txt cat taxids.txt | epost -db taxonomy | @@ -19,7 +20,9 @@ sed \ -e 's/Viruses<\/Division>/VRL<\/Division>/g' \ > taxa.xml cat taxa.xml | -xtract -pattern Taxon -COMM "(-)" -COMM GenbankCommonName -PGCODE "(-)" \ +xtract -transform lineages.txt -pattern Taxon -COMM "(-)" -COMM GenbankCommonName -PGCODE "(-)" \ -block Property -if PropName -equals pgcode -PGCODE PropValueInt \ - -block Taxon -element ScientificName "&COMM" GCId MGCId "&PGCODE" TaxId Division Lineage > tax_table.txt -cat tax_table.txt | sort -f > sorted_taxlist.txt + -block Taxon -element ScientificName "&COMM" GCId MGCId "&PGCODE" TaxId Division \ + -translate TaxId > tax_table.txt +cat tax_table.txt | sort -f > common_tax.txt +/am/ncbiapdata/scripts/misc/txt2inc.sh common_tax.txt diff --git a/c++/src/objects/seqloc/PDB_seq_id.cpp b/c++/src/objects/seqloc/PDB_seq_id.cpp index c8666922..a582fd76 100644 --- a/c++/src/objects/seqloc/PDB_seq_id.cpp +++ b/c++/src/objects/seqloc/PDB_seq_id.cpp @@ -1,4 +1,4 @@ -/* $Id: PDB_seq_id.cpp 567352 2018-07-18 00:36:08Z kans $ +/* $Id: PDB_seq_id.cpp 578935 2019-01-28 18:19:28Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -58,7 +58,7 @@ bool CPDB_seq_id::Match(const CPDB_seq_id& psip2) const } } if (IsSetChain_id() && psip2.IsSetChain_id()) { - if (! PCase().Compare(GetChain_id(), psip2.GetChain_id())) { + if ( PCase().Compare(GetChain_id(), psip2.GetChain_id())) { return false; } } diff --git a/c++/src/objects/seqloc/Seq_id.cpp b/c++/src/objects/seqloc/Seq_id.cpp index 2ada6189..1c919ecd 100644 --- a/c++/src/objects/seqloc/Seq_id.cpp +++ b/c++/src/objects/seqloc/Seq_id.cpp @@ -1,4 +1,4 @@ -/* $Id: Seq_id.cpp 574526 2018-11-14 19:49:28Z ivanov $ +/* $Id: Seq_id.cpp 582149 2019-03-11 18:03:06Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -63,6 +63,10 @@ #include #include #include +#include +#include +#include +#include #include "accguide.inc" @@ -1275,12 +1279,17 @@ CSeq_id::x_IdentifyAccession(const CTempString& main_acc, TParseFlags flags, const unsigned char* ucdata = (const unsigned char*)main_acc.data(); if (non_dig_pos != NPOS && (flags & fParse_RawText) != 0) { if ( !has_version && digit_pos == 0 && main_size >= 4 - && (main_size <= 7 || strchr("|-_", main_acc[4])) - && isalnum(ucdata[1]) && isalnum(ucdata[2]) - && isalnum(ucdata[3])) { + && non_dig_pos < 5 && isalnum(ucdata[1]) + && isalnum(ucdata[2]) && isalnum(ucdata[3])) { // Possible PDB (always unversioned); examine further // to avoid false positives. - switch (main_size) { + if (main_size > 4 && main_size <= 17 + && strchr("|-_", main_acc[4]) + && (main_size <= 6 || isalnum(ucdata[5]))) { + // Conventionally delimited + return eAcc_pdb; + } else switch (main_size) { + /* case 7: if ((main_acc[5] != main_acc[6] && (main_acc[5] != 'V' || main_acc[6] != 'B')) @@ -1291,8 +1300,7 @@ CSeq_id::x_IdentifyAccession(const CTempString& main_acc, TParseFlags flags, // Be extra strict when the potential molecule ID // could simply be a year. (NB: *insisting* on a // non-digit would rule out 1914|A, gi 157829621.) - if ((non_dig_pos < 4 && ispunct(ucdata[4])) - || strchr("|-_", main_acc[4])) { + if ((non_dig_pos < 4 && ispunct(ucdata[4]))) { return eAcc_pdb; } break; @@ -1300,12 +1308,10 @@ CSeq_id::x_IdentifyAccession(const CTempString& main_acc, TParseFlags flags, if ((flags & fParse_ValidLocal) == 0) { break; } // else fall through + */ case 4: return eAcc_pdb; } - if (strchr("|-_", main_acc[4])) { - return eAcc_pdb; - } } if (digit_pos == 1 && main_size == 6 && (main_acc[0] == 'O' || main_acc[0] == 'P' @@ -1653,7 +1659,31 @@ void CSeq_id::GetLabel(string* label, ELabelType type, TLabelFlags flags) const case eBoth: x_GetLabel_Type(*this, label, flags); *label += "|"; - x_GetLabel_Content(*this, label, flags, NULL); + if (flags & fLabel_UpperCase) { + NStr::ToUpper(*label); + // ID-5290 : This function may be called for primary or secondary + // Seq-ids (e.g. gis), so need to check both primary and secondary id + // values returned from the ComposeOSLT function. In the latter case, + // always look at the first secondary ID in the list (there's almost + // always just one anyway). + // CXX-10440 : Original default version of ComposeOSLT function returns + // empty string for local ids, but in this context local Seq-ids must + // be parsed, hence use a special flag. + string primary_id; + list secondary_id_list; + primary_id = ComposeOSLT(&secondary_id_list, fAllowLocalId); + if (!primary_id.empty()) + *label += primary_id; + else if (secondary_id_list.size() > 0) + *label += *secondary_id_list.begin(); + if (flags & fLabel_Version) { + const CTextseq_id* tsid = GetTextseq_Id(); + if (tsid && tsid->IsSetVersion()) + *label += "." + NStr::IntToString(tsid->GetVersion()); + } + } else { + x_GetLabel_Content(*this, label, flags, NULL); + } break; case eType: @@ -1664,8 +1694,14 @@ void CSeq_id::GetLabel(string* label, ELabelType type, TLabelFlags flags) const x_GetLabel_Content(*this, label, flags, NULL); break; } -} + if ((flags & fLabel_Trimmed) != 0 + && (type == eFasta || type == eFastaContent)) { + while ((*label)[label->size() - 1] == '|') { + label->resize(label->size() - 1); + } + } +} void CSeq_id::GetLabel(string* label, int* version, ELabelType type) const { @@ -1675,9 +1711,17 @@ void CSeq_id::GetLabel(string* label, int* version, ELabelType type) const switch (type) { case eFasta: - *label = AsFastaString(); + *label += AsFastaString(); break; + case eFastaContent: + { + CNcbiOstrstream oss; + x_WriteContentAsFasta(oss); + *label += CNcbiOstrstreamToString(oss); + break; + } + case eBoth: x_GetLabel_Type(*this, label, 0); *label += "|"; @@ -1725,12 +1769,15 @@ void CSeq_id::WriteAsFasta(ostream& out) the_type = e_not_set; if (IsPatent() && !GetPatent().GetCit().GetId().IsNumber() ) { - out << "pgp|"; + const char pgp[] = "pgp|"; + out.write(pgp, sizeof(pgp) - 1); } else if (IsSwissprot() && GetSwissprot().IsSetRelease() && GetSwissprot().GetRelease() == "unreviewed") { - out << "tr|"; + const char tr[] = "tr|"; + out.write(tr, sizeof(tr) - 1); } else { - out << s_TextId[the_type] << '|'; + out.write(s_TextId[the_type], strlen(s_TextId[the_type])); + out.put('|'); } x_WriteContentAsFasta(out); @@ -1816,10 +1863,18 @@ void CSeq_id::x_WriteContentAsFasta(ostream& out) const } } - const string CSeq_id::AsFastaString(void) const { +#ifdef HAVE_THREAD_LOCAL + thread_local static CNcbiOstrstream str; + str.seekp(0); + + // VS2017 needs this call presumably because the first time seekp(0) is + // called on an empty stream and thus a failbit is set. + str.clear(); +#else CNcbiOstrstream str; +#endif WriteAsFasta(str); return CNcbiOstrstreamToString(str); } @@ -2834,6 +2889,129 @@ bool CSeq_id::AvoidGi(void) } +string CSeq_id::ComposeOSLT(list* secondary_id_list, + TComposeOSLTFlags parse_flags) const +{ + string primary_id; + string secondary_id; + E_Choice seqid_type = Which(); + + switch (seqid_type) { + case e_Giim: + primary_id = NStr::IntToString(GetGiim().GetId()); + break; + case e_Gibbsq: + primary_id = NStr::IntToString(GetGibbsq()); + break; + case e_Gibbmt: + primary_id = NStr::IntToString(GetGibbmt()); + break; + case e_Pir: + case e_Prf: + { + // This is a Textseq-id, however primary id is normally stored in the + // name field. + // For PIR, if name is empty, id is allowed to be placed in the accession field; + // For PRF only name is allowed! + const CTextseq_id* tsid = GetTextseq_Id(); + if (tsid->CanGetName()) + primary_id = tsid->GetName(); + else if (seqid_type == e_Pir && tsid->CanGetAccession()) + primary_id = tsid->GetAccession(); + break; + } + case e_Patent: + if (secondary_id_list) { + // All patents have GenBank Seq-ids, so id string derived from a patent + // seqid is always secondary + const CId_pat& pat = GetPatent().GetCit(); + secondary_id = pat.GetCountry() + "|" + + (pat.GetId().IsNumber() ? + pat.GetId().GetNumber() : pat.GetId().GetApp_number()) + "|" + + NStr::IntToString(GetPatent().GetSeqid()); + } + break; + case e_Pdb: + { + const CPDB_seq_id& pdb = GetPdb(); + primary_id = pdb.GetMol().Get(); + if (pdb.IsSetChain_id()) { + const char* ptr = pdb.GetChain_id().c_str(); + char buf[256]; + size_t pos = 0; + while (*ptr != '\0') { + buf[pos++] = toupper(*ptr); + if (islower(*ptr)) + buf[pos++] = '+'; + ++ptr; + } + buf[pos] = '\0'; + primary_id += string(buf); + } else if (pdb.IsSetChain() && pdb.GetChain() != ' ') { + // Old style single-character chain. For lower case, append a '+' sign + char chain = pdb.GetChain(); + primary_id += string(1, chain); + if (islower(chain)) + primary_id += "+"; + } + break; + } + case e_General: + { + // General ids are always secondary! + if (secondary_id_list) { + const CObject_id& dbtag = GetGeneral().GetTag(); + secondary_id = GetGeneral().GetDb() + "|"; + if (dbtag.IsId()) + secondary_id += NStr::IntToString(dbtag.GetId()); + else + secondary_id += dbtag.GetStr(); + } + break; + } + case e_Gi: + // GIs are always secondary + if (secondary_id_list) { + secondary_id = NStr::NumericToString(GetGi()); + } + break; + case CSeq_id::e_Local: + { + if ((parse_flags & fAllowLocalId) != 0 && secondary_id_list) { + const CObject_id& oid = GetLocal(); + if (oid.IsId()) { + secondary_id = NStr::IntToString(oid.GetId()); + } else if (oid.IsStr()) { + secondary_id = oid.GetStr(); + } + } + break; + } + default: + { + // In the logic below, any Textseq-id is treated as primary. However a + // Bioseq object may contain multiple Textseq-ids in its list of Seq-ids, + // e.g. when RefSeq takes over a preexisting GPIPE record. + const CTextseq_id* tsid = GetTextseq_Id(); + if (tsid) { + if (tsid->CanGetAccession()) + primary_id = tsid->GetAccession(); + if (secondary_id_list && tsid->CanGetName() && !tsid->GetName().empty()) + secondary_id = tsid->GetName(); + } + break; + } + } + + NStr::ToUpper(primary_id); + if (secondary_id_list && !secondary_id.empty()) { + NStr::ToUpper(secondary_id); + secondary_id_list->emplace_back(secondary_id); + } + return primary_id; +} + + SSeqIdRange::SSeqIdRange(const CTempString& s, TFlags flags) : start(0), stop(0), digits(0), acc_info(CSeq_id::eAcc_unknown) { diff --git a/c++/src/objects/seqloc/Seq_loc.cpp b/c++/src/objects/seqloc/Seq_loc.cpp index f44074fd..1dd60db0 100644 --- a/c++/src/objects/seqloc/Seq_loc.cpp +++ b/c++/src/objects/seqloc/Seq_loc.cpp @@ -1,4 +1,4 @@ -/* $Id: Seq_loc.cpp 556251 2018-01-29 14:20:19Z grichenk $ +/* $Id: Seq_loc.cpp 581839 2019-03-06 16:08:55Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -4417,8 +4417,9 @@ void x_PushRange(CSeq_loc& dst, } } else if ( rg.GetLength() == 1 && - ( !rg.IsSetFuzzFrom() || !rg.IsSetFuzzTo() || - rg.GetFuzzFrom().Equals(rg.GetFuzzTo()) ) ) + rg.IsSetFuzzFrom() == rg.IsSetFuzzTo() && + ( !rg.IsSetFuzzFrom() || + rg.GetFuzzFrom().Equals(rg.GetFuzzTo()) ) ) { // Preserve points CRef pnt(new CSeq_point); diff --git a/c++/src/objects/seqloc/Textseq_id.cpp b/c++/src/objects/seqloc/Textseq_id.cpp index 0b22cb90..801c6e39 100644 --- a/c++/src/objects/seqloc/Textseq_id.cpp +++ b/c++/src/objects/seqloc/Textseq_id.cpp @@ -1,4 +1,4 @@ -/* $Id: Textseq_id.cpp 408266 2013-07-29 03:59:33Z vakatov $ +/* $Id: Textseq_id.cpp 578921 2019-01-28 18:16:38Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -219,7 +219,7 @@ int CTextseq_id::Compare(const CTextseq_id& tsip2) const return diff; } } - + // All checks failed to distinguish Seq-ids. return 0; } @@ -233,12 +233,13 @@ ostream& CTextseq_id::AsFastaString(ostream& s, bool allow_version) const if (allow_version && IsSetVersion()) { int version = GetVersion(); if (version) { - s << '.' << version; + s.put('.'); + s << version; } } } - s << '|'; + s.put('|'); if ( IsSetName() ) { s << GetName(); // no Upcase per Ostell - Karl 7/2001 } diff --git a/c++/src/objects/seqloc/accguide.inc b/c++/src/objects/seqloc/accguide.inc index 70212361..4d201cf4 100644 --- a/c++/src/objects/seqloc/accguide.inc +++ b/c++/src/objects/seqloc/accguide.inc @@ -1,4 +1,4 @@ -/* $Id: accguide.inc 574610 2018-11-15 18:32:35Z ucko $ +/* $Id: accguide.inc 582108 2019-03-11 14:36:11Z ucko $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -31,7 +31,7 @@ */ static const char* const kBuiltInGuide[] = { - "# $Id: accguide.inc 574610 2018-11-15 18:32:35Z ucko $", + "# $Id: accguide.inc 582108 2019-03-11 14:36:11Z ucko $", "version 1 # of file format", "", "# three-letter-prefix protein accessions (traditionally with five digits)", @@ -54,7 +54,8 @@ static const char* const kBuiltInGuide[] = { "3+9 D?? gb_tpa_prot", "3+11 D?? gb_tpa_prot", "3+5 E?? gb_wgs_prot", - "3+7 E?? gb_prot", + "3+7 E?? gb_wgs_prot # fallback to placate older clients", + "3+7 E?? gb_wgsv_prot", "3+9 E?? gb_prot", "3+11 E?? gb_prot", "3+5 F?? ddbj_tpa_prot", @@ -67,7 +68,8 @@ static const char* const kBuiltInGuide[] = { "3+11 G?? ddbj_prot", "3+5 H?? gb_tpa_wgs_prot # fallback to placate older clients", "3+5 H?? gb_tpa_wgsv_prot", - "3+7 H?? gb_tpa_prot", + "3+7 H?? gb_tpa_wgs_prot # fallback to placate older clients", + "3+7 H?? gb_tpa_wgsv_prot", "3+9 H?? gb_tpa_prot", "3+11 H?? gb_tpa_prot", "3+5 I?? ddbj_tpa_prot # fallback to placate older clients", @@ -89,12 +91,14 @@ static const char* const kBuiltInGuide[] = { "3+11 L?? ddbj_prot", "3+5 M?? gb_wgs_prot # fallback to placate older clients", "3+5 M?? gb_wgsv_prot", - "3+7 M?? gb_prot", + "3+7 M?? gb_wgs_prot # fallback to placate older clients", + "3+7 M?? gb_wgsv_prot", "3+9 M?? gb_prot", "3+11 M?? gb_prot", "3+5 N?? gb_wgs_prot # fallback to placate older clients", "3+5 N?? gb_wgsv_prot", - "3+7 N?? gb_prot", + "3+7 N?? gb_wgs_prot # fallback to placate older clients", + "3+7 N?? gb_wgsv_prot", "3+9 N?? gb_prot", "3+11 N?? gb_prot", "3+5 O?? gb_wgs_prot", @@ -117,7 +121,7 @@ static const char* const kBuiltInGuide[] = { "3+7 S?? embl_prot", "3+9 S?? embl_prot", "3+11 S?? embl_prot", - "3+5 T?? gb_prot", + "3+5 T?? gb_wgs_prot", "3+7 T?? gb_prot", "3+9 T?? gb_prot", "3+11 T?? gb_prot", @@ -249,6 +253,9 @@ static const char* const kBuiltInGuide[] = { "6+9 C????? embl_wgs_nuc", "6+10 C????? embl_wgs_nuc", "6+11 C????? embl_wgs_nuc", + "6+9 D????? gb_tpa_wgs_nuc", + "6+10 D????? gb_tpa_wgs_nuc", + "6+11 D????? gb_tpa_wgs_nuc", "6+9 ?????? unreserved_nuc", "6+10 ?????? unreserved_nuc", "6+11 ?????? unreserved_nuc", @@ -279,6 +286,9 @@ static const char* const kBuiltInGuide[] = { "7+8 NZ_???? refseq_wgs_nuc", "7+9 NZ_???? refseq_wgs_nuc", "7+10 NZ_???? refseq_wgs_nuc", + "9+9 NZ_?????? refseq_wgs_nuc", + "9+10 NZ_?????? refseq_wgs_nuc", + "9+11 NZ_?????? refseq_wgs_nuc", "3+9 WC_ refseq_chromosome # fallback to placate older clients", "3+9 WC_ refseq_chromosome_ncbo", "3+9 WP_ refseq_prot # fallback to placate older clients", diff --git a/c++/src/objects/seqloc/accguide.txt b/c++/src/objects/seqloc/accguide.txt index f912c84a..47a4c973 100644 --- a/c++/src/objects/seqloc/accguide.txt +++ b/c++/src/objects/seqloc/accguide.txt @@ -1,4 +1,4 @@ -# $Id: accguide.txt 574610 2018-11-15 18:32:35Z ucko $ +# $Id: accguide.txt 582108 2019-03-11 14:36:11Z ucko $ version 1 # of file format # three-letter-prefix protein accessions (traditionally with five digits) @@ -21,7 +21,8 @@ version 1 # of file format 3+9 D?? gb_tpa_prot 3+11 D?? gb_tpa_prot 3+5 E?? gb_wgs_prot -3+7 E?? gb_prot +3+7 E?? gb_wgs_prot # fallback to placate older clients +3+7 E?? gb_wgsv_prot 3+9 E?? gb_prot 3+11 E?? gb_prot 3+5 F?? ddbj_tpa_prot @@ -34,7 +35,8 @@ version 1 # of file format 3+11 G?? ddbj_prot 3+5 H?? gb_tpa_wgs_prot # fallback to placate older clients 3+5 H?? gb_tpa_wgsv_prot -3+7 H?? gb_tpa_prot +3+7 H?? gb_tpa_wgs_prot # fallback to placate older clients +3+7 H?? gb_tpa_wgsv_prot 3+9 H?? gb_tpa_prot 3+11 H?? gb_tpa_prot 3+5 I?? ddbj_tpa_prot # fallback to placate older clients @@ -56,12 +58,14 @@ version 1 # of file format 3+11 L?? ddbj_prot 3+5 M?? gb_wgs_prot # fallback to placate older clients 3+5 M?? gb_wgsv_prot -3+7 M?? gb_prot +3+7 M?? gb_wgs_prot # fallback to placate older clients +3+7 M?? gb_wgsv_prot 3+9 M?? gb_prot 3+11 M?? gb_prot 3+5 N?? gb_wgs_prot # fallback to placate older clients 3+5 N?? gb_wgsv_prot -3+7 N?? gb_prot +3+7 N?? gb_wgs_prot # fallback to placate older clients +3+7 N?? gb_wgsv_prot 3+9 N?? gb_prot 3+11 N?? gb_prot 3+5 O?? gb_wgs_prot @@ -84,7 +88,7 @@ version 1 # of file format 3+7 S?? embl_prot 3+9 S?? embl_prot 3+11 S?? embl_prot -3+5 T?? gb_prot +3+5 T?? gb_wgs_prot 3+7 T?? gb_prot 3+9 T?? gb_prot 3+11 T?? gb_prot @@ -216,6 +220,9 @@ version 1 # of file format 6+9 C????? embl_wgs_nuc 6+10 C????? embl_wgs_nuc 6+11 C????? embl_wgs_nuc +6+9 D????? gb_tpa_wgs_nuc +6+10 D????? gb_tpa_wgs_nuc +6+11 D????? gb_tpa_wgs_nuc 6+9 ?????? unreserved_nuc 6+10 ?????? unreserved_nuc 6+11 ?????? unreserved_nuc @@ -246,6 +253,9 @@ version 1 # of file format 7+8 NZ_???? refseq_wgs_nuc 7+9 NZ_???? refseq_wgs_nuc 7+10 NZ_???? refseq_wgs_nuc +9+9 NZ_?????? refseq_wgs_nuc +9+10 NZ_?????? refseq_wgs_nuc +9+11 NZ_?????? refseq_wgs_nuc 3+9 WC_ refseq_chromosome # fallback to placate older clients 3+9 WC_ refseq_chromosome_ncbo 3+9 WP_ refseq_prot # fallback to placate older clients diff --git a/c++/src/objects/taxon1/cache.cpp b/c++/src/objects/taxon1/cache.cpp index 238a9471..b2dfbfe2 100644 --- a/c++/src/objects/taxon1/cache.cpp +++ b/c++/src/objects/taxon1/cache.cpp @@ -1,4 +1,4 @@ -/* $Id: cache.cpp 555946 2018-01-24 17:15:32Z domrach $ +/* $Id: cache.cpp 578162 2019-01-14 19:38:55Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -159,6 +159,7 @@ COrgRefCache::LookupAndAdd( TTaxId tax_id, CTaxon1Node** ppData ) pIt->AddChild( pNode ); pIt->GoNode( pNode ); } + delete pIt; } else { // Store secondary in index m_ppEntries[ tax_id ] = pNode; } diff --git a/c++/src/objects/valerr/ValidErrItem.cpp b/c++/src/objects/valerr/ValidErrItem.cpp index b40a6a0f..6ac43c0d 100644 --- a/c++/src/objects/valerr/ValidErrItem.cpp +++ b/c++/src/objects/valerr/ValidErrItem.cpp @@ -1,4 +1,4 @@ -/* $Id: ValidErrItem.cpp 572668 2018-10-17 17:13:44Z ivanov $ +/* $Id: ValidErrItem.cpp 581296 2019-02-27 16:27:53Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -956,6 +956,9 @@ static const TErrTypStrs sc_ErrStrs[] = { { eErr_SEQ_DESCR_TaxonomyEmptyInput, { "TaxonomyEmptyInput", "No valid input to perform Taxonomy request" } } , + { eErr_SEQ_DESCR_ChromosomeWithoutLocation, + {"ChromosomeWithoutLocation", + "Location should be chromosome if chromosome name is present" } }, /* SEQ_GENERIC */ diff --git a/c++/src/objects/valid/validrules.inc b/c++/src/objects/valid/validrules.inc index 144efa13..be094411 100644 --- a/c++/src/objects/valid/validrules.inc +++ b/c++/src/objects/valid/validrules.inc @@ -1,4 +1,4 @@ -/* $Id: validrules.inc 559269 2018-03-08 17:28:50Z bollin $ +/* $Id: validrules.inc 580316 2019-02-13 13:45:43Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1159,6 +1159,481 @@ static const char* const s_Defaultvalidrules[] = { " field-name \"Other Catalog Numbers\" ,", " required FALSE }", " }", + " },", + " {", + " prefix \"##Evidence-For-Name-Assignment-START##\" ,", + " fields {", + " {", + " field-name \"Evidence Category\" ,", + " required TRUE ,", + " severity error } ,", + " {", + " field-name \"Evidence Accession\" ,", + " required TRUE ,", + " severity error } ,", + " {", + " field-name \"Evidence Source\" ,", + " required TRUE ,", + " severity error } ,", + " {", + " field-name \"Source Identifier\" ,", + " required FALSE } ,", + " {", + " field-name \"Comment\" ,", + " required FALSE }", + " } ,", + " require-order FALSE ,", + " allow-unlisted TRUE", + " },", + " {", + " prefix \"MIMARKS:5.0-Data-START\" ,", + " fields {", + " {", + " field-name \"investigation_type\" ,", + " match-expression \"^mimarks-survey\\|mimarks-specimen\\|$\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"project_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"lat_lon\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"geo_loc_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"collection_date\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_broad_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_local_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_medium\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"seq_meth\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"target_gene\" ,", + " required TRUE ,", + " severity info }", + " } ,", + " require-order FALSE ,", + " allow-unlisted TRUE ,", + " dependent-rules {", + " {", + " match-name \"investigation_type\" ,", + " value-constraint \"^mimarks-specimen$\" ,", + " other-fields {", + " {", + " field-name \"isol_growth_condt\" ,", + " required TRUE ,", + " severity info } ", + " }", + " }", + " } ", + " },", + " {", + " prefix \"MIMS:5.0-Data-START\" ,", + " fields {", + " {", + " field-name \"investigation_type\" ,", + " match-expression \"^metagenome$\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"project_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"lat_lon\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"geo_loc_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"collection_date\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_broad_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_local_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_medium\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"seq_meth\" ,", + " required TRUE ,", + " severity info }", + " } ,", + " require-order FALSE ,", + " allow-unlisted TRUE", + " },", + " {", + " prefix \"MISAG:5.0-Data-START\" ,", + " fields {", + " {", + " field-name \"investigation_type\" ,", + " match-expression \"^misag$\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"project_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"lat_lon\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"geo_loc_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"collection_date\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_broad_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_local_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_medium\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"seq_meth\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"assembly_qual\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"tax_ident\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"assembly_software\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"compl_score\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"compl_software\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"contam_score\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"sort_tech\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"single_cell_lysis_appr\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"wga_amp_appr\" ,", + " required TRUE ,", + " severity info }", + " } ,", + " require-order FALSE ,", + " allow-unlisted TRUE", + " },", + " {", + " prefix \"MIGS:5.0-Data-START\" ,", + " fields {", + " {", + " field-name \"investigation_type\" ,", + " match-expression \"^eukaryote\\|bacteria_archaea\\|plasmid\\|virus\\|organelle\\|$\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"project_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"lat_lon\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"geo_loc_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"collection_date\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_broad_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_local_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_medium\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"seq_meth\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"isol_growth_condt\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"assembly_software\" ,", + " required TRUE ,", + " severity info }", + " } ,", + " require-order FALSE ,", + " allow-unlisted TRUE ,", + " dependent-rules {", + " {", + " match-name \"investigation_type\" ,", + " value-constraint \"^eukaryote$\" ,", + " other-fields {", + " {", + " field-name \"assembly_qual\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"number_contig\" ,", + " required TRUE ,", + " severity info } ", + " }", + " },", + " {", + " match-name \"investigation_type\" ,", + " value-constraint \"^bacteria_archaea$\" ,", + " other-fields {", + " {", + " field-name \"num_replicons\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"ref_biomaterial\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"assembly_qual\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"number_contig\" ,", + " required TRUE ,", + " severity info } ", + " }", + " },", + " {", + " match-name \"investigation_type\" ,", + " value-constraint \"^plasmid$\" ,", + " other-fields {", + " {", + " field-name \"propagation\" ,", + " required TRUE ,", + " severity info } ", + " }", + " },", + " {", + " match-name \"investigation_type\" ,", + " value-constraint \"^virus$\" ,", + " other-fields {", + " {", + " field-name \"propagation\" ,", + " required TRUE ,", + " severity info } ", + " }", + " }", + " } ", + " },", + " {", + " prefix \"MIUVIG:5.0-Data-START\" ,", + " fields {", + " {", + " field-name \"investigation_type\" ,", + " match-expression \"^miuvig$\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"project_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"lat_lon\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"geo_loc_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"collection_date\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_broad_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_local_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_medium\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"seq_meth\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"assembly_qual\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"source_uvig\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"virus_enrich_appr\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"assembly_software\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"number_contig\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"vir_ident_software\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"pred_genome_type\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"pred_genome_struc\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"detec_type\" ,", + " required TRUE ,", + " severity info }", + " } ,", + " require-order FALSE ,", + " allow-unlisted TRUE", + " },", + " {", + " prefix \"MIMAG:5.0-Data-START\" ,", + " fields {", + " {", + " field-name \"investigation_type\" ,", + " match-expression \"^mimag$\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"project_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"lat_lon\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"geo_loc_name\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"collection_date\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_broad_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_local_scale\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"env_medium\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"seq_meth\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"assembly_qual\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"tax_ident\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"assembly_software\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"compl_score\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"compl_software\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"contam_score\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"bin_param\" ,", + " required TRUE ,", + " severity info } ,", + " {", + " field-name \"bin_software\" ,", + " required TRUE ,", + " severity info }", + " } ,", + " require-order FALSE ,", + " allow-unlisted TRUE", " }", "}", "" diff --git a/c++/src/objects/valid/validrules.prt b/c++/src/objects/valid/validrules.prt index 1c1d397d..a38c4346 100644 --- a/c++/src/objects/valid/validrules.prt +++ b/c++/src/objects/valid/validrules.prt @@ -1126,6 +1126,481 @@ AR\|APR\|MAY\|JUN\|JUL\|AUG\|SEP\|OCT\|NOV\|DEC\)-\(19\|20\)\(0\|1\|2\|3\|4\|5 field-name "Other Catalog Numbers" , required FALSE } } + }, + { + prefix "##Evidence-For-Name-Assignment-START##" , + fields { + { + field-name "Evidence Category" , + required TRUE , + severity error } , + { + field-name "Evidence Accession" , + required TRUE , + severity error } , + { + field-name "Evidence Source" , + required TRUE , + severity error } , + { + field-name "Source Identifier" , + required FALSE } , + { + field-name "Comment" , + required FALSE } + } , + require-order FALSE , + allow-unlisted TRUE + }, + { + prefix "MIMARKS:5.0-Data-START" , + fields { + { + field-name "investigation_type" , + match-expression "^mimarks-survey\|mimarks-specimen\|$" , + required TRUE , + severity info } , + { + field-name "project_name" , + required TRUE , + severity info } , + { + field-name "lat_lon" , + required TRUE , + severity info } , + { + field-name "geo_loc_name" , + required TRUE , + severity info } , + { + field-name "collection_date" , + required TRUE , + severity info } , + { + field-name "env_broad_scale" , + required TRUE , + severity info } , + { + field-name "env_local_scale" , + required TRUE , + severity info } , + { + field-name "env_medium" , + required TRUE , + severity info } , + { + field-name "seq_meth" , + required TRUE , + severity info } , + { + field-name "target_gene" , + required TRUE , + severity info } + } , + require-order FALSE , + allow-unlisted TRUE , + dependent-rules { + { + match-name "investigation_type" , + value-constraint "^mimarks-specimen$" , + other-fields { + { + field-name "isol_growth_condt" , + required TRUE , + severity info } + } + } + } + }, + { + prefix "MIMS:5.0-Data-START" , + fields { + { + field-name "investigation_type" , + match-expression "^metagenome$" , + required TRUE , + severity info } , + { + field-name "project_name" , + required TRUE , + severity info } , + { + field-name "lat_lon" , + required TRUE , + severity info } , + { + field-name "geo_loc_name" , + required TRUE , + severity info } , + { + field-name "collection_date" , + required TRUE , + severity info } , + { + field-name "env_broad_scale" , + required TRUE , + severity info } , + { + field-name "env_local_scale" , + required TRUE , + severity info } , + { + field-name "env_medium" , + required TRUE , + severity info } , + { + field-name "seq_meth" , + required TRUE , + severity info } + } , + require-order FALSE , + allow-unlisted TRUE + }, + { + prefix "MISAG:5.0-Data-START" , + fields { + { + field-name "investigation_type" , + match-expression "^misag$" , + required TRUE , + severity info } , + { + field-name "project_name" , + required TRUE , + severity info } , + { + field-name "lat_lon" , + required TRUE , + severity info } , + { + field-name "geo_loc_name" , + required TRUE , + severity info } , + { + field-name "collection_date" , + required TRUE , + severity info } , + { + field-name "env_broad_scale" , + required TRUE , + severity info } , + { + field-name "env_local_scale" , + required TRUE , + severity info } , + { + field-name "env_medium" , + required TRUE , + severity info } , + { + field-name "seq_meth" , + required TRUE , + severity info } , + { + field-name "assembly_qual" , + required TRUE , + severity info } , + { + field-name "tax_ident" , + required TRUE , + severity info } , + { + field-name "assembly_software" , + required TRUE , + severity info } , + { + field-name "compl_score" , + required TRUE , + severity info } , + { + field-name "compl_software" , + required TRUE , + severity info } , + { + field-name "contam_score" , + required TRUE , + severity info } , + { + field-name "sort_tech" , + required TRUE , + severity info } , + { + field-name "single_cell_lysis_appr" , + required TRUE , + severity info } , + { + field-name "wga_amp_appr" , + required TRUE , + severity info } + } , + require-order FALSE , + allow-unlisted TRUE + }, + { + prefix "MIGS:5.0-Data-START" , + fields { + { + field-name "investigation_type" , + match-expression "^eukaryote\|bacteria_archaea\|plasmid\|virus\|organelle\|$" , + required TRUE , + severity info } , + { + field-name "project_name" , + required TRUE , + severity info } , + { + field-name "lat_lon" , + required TRUE , + severity info } , + { + field-name "geo_loc_name" , + required TRUE , + severity info } , + { + field-name "collection_date" , + required TRUE , + severity info } , + { + field-name "env_broad_scale" , + required TRUE , + severity info } , + { + field-name "env_local_scale" , + required TRUE , + severity info } , + { + field-name "env_medium" , + required TRUE , + severity info } , + { + field-name "seq_meth" , + required TRUE , + severity info } , + { + field-name "isol_growth_condt" , + required TRUE , + severity info } , + { + field-name "assembly_software" , + required TRUE , + severity info } + } , + require-order FALSE , + allow-unlisted TRUE , + dependent-rules { + { + match-name "investigation_type" , + value-constraint "^eukaryote$" , + other-fields { + { + field-name "assembly_qual" , + required TRUE , + severity info } , + { + field-name "number_contig" , + required TRUE , + severity info } + } + }, + { + match-name "investigation_type" , + value-constraint "^bacteria_archaea$" , + other-fields { + { + field-name "num_replicons" , + required TRUE , + severity info } , + { + field-name "ref_biomaterial" , + required TRUE , + severity info } , + { + field-name "assembly_qual" , + required TRUE , + severity info } , + { + field-name "number_contig" , + required TRUE , + severity info } + } + }, + { + match-name "investigation_type" , + value-constraint "^plasmid$" , + other-fields { + { + field-name "propagation" , + required TRUE , + severity info } + } + }, + { + match-name "investigation_type" , + value-constraint "^virus$" , + other-fields { + { + field-name "propagation" , + required TRUE , + severity info } + } + } + } + }, + { + prefix "MIUVIG:5.0-Data-START" , + fields { + { + field-name "investigation_type" , + match-expression "^miuvig$" , + required TRUE , + severity info } , + { + field-name "project_name" , + required TRUE , + severity info } , + { + field-name "lat_lon" , + required TRUE , + severity info } , + { + field-name "geo_loc_name" , + required TRUE , + severity info } , + { + field-name "collection_date" , + required TRUE , + severity info } , + { + field-name "env_broad_scale" , + required TRUE , + severity info } , + { + field-name "env_local_scale" , + required TRUE , + severity info } , + { + field-name "env_medium" , + required TRUE , + severity info } , + { + field-name "seq_meth" , + required TRUE , + severity info } , + { + field-name "assembly_qual" , + required TRUE , + severity info } , + { + field-name "source_uvig" , + required TRUE , + severity info } , + { + field-name "virus_enrich_appr" , + required TRUE , + severity info } , + { + field-name "assembly_software" , + required TRUE , + severity info } , + { + field-name "number_contig" , + required TRUE , + severity info } , + { + field-name "vir_ident_software" , + required TRUE , + severity info } , + { + field-name "pred_genome_type" , + required TRUE , + severity info } , + { + field-name "pred_genome_struc" , + required TRUE , + severity info } , + { + field-name "detec_type" , + required TRUE , + severity info } + } , + require-order FALSE , + allow-unlisted TRUE + }, + { + prefix "MIMAG:5.0-Data-START" , + fields { + { + field-name "investigation_type" , + match-expression "^mimag$" , + required TRUE , + severity info } , + { + field-name "project_name" , + required TRUE , + severity info } , + { + field-name "lat_lon" , + required TRUE , + severity info } , + { + field-name "geo_loc_name" , + required TRUE , + severity info } , + { + field-name "collection_date" , + required TRUE , + severity info } , + { + field-name "env_broad_scale" , + required TRUE , + severity info } , + { + field-name "env_local_scale" , + required TRUE , + severity info } , + { + field-name "env_medium" , + required TRUE , + severity info } , + { + field-name "seq_meth" , + required TRUE , + severity info } , + { + field-name "assembly_qual" , + required TRUE , + severity info } , + { + field-name "tax_ident" , + required TRUE , + severity info } , + { + field-name "assembly_software" , + required TRUE , + severity info } , + { + field-name "compl_score" , + required TRUE , + severity info } , + { + field-name "compl_software" , + required TRUE , + severity info } , + { + field-name "contam_score" , + required TRUE , + severity info } , + { + field-name "bin_param" , + required TRUE , + severity info } , + { + field-name "bin_software" , + required TRUE , + severity info } + } , + require-order FALSE , + allow-unlisted TRUE } } diff --git a/c++/src/objmgr/CMakeLists.objmgr.lib.txt b/c++/src/objmgr/CMakeLists.objmgr.lib.txt index ab4b8ec3..93964b9c 100644 --- a/c++/src/objmgr/CMakeLists.objmgr.lib.txt +++ b/c++/src/objmgr/CMakeLists.objmgr.lib.txt @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeLists.objmgr.lib.txt 565603 2018-06-14 13:44:37Z gouriano $ +# $Id: CMakeLists.objmgr.lib.txt 576254 2018-12-12 14:44:36Z dicuccio $ ############################################################################# NCBI_begin_lib(xobjmgr) @@ -22,7 +22,7 @@ NCBI_begin_lib(xobjmgr) edit_saver unsupp_editsaver edits_db_engine edits_db_saver annot_finder gc_assembly_parser split_parser seq_id_sort ) - NCBI_uses_toolkit_libraries(genome_collection seqedit seqsplit) + NCBI_uses_toolkit_libraries(genome_collection seqedit seqsplit submit) NCBI_project_watchers(vasilche) NCBI_end_lib() diff --git a/c++/src/objmgr/annot_collector.cpp b/c++/src/objmgr/annot_collector.cpp index 36ede2e9..e7a08f31 100644 --- a/c++/src/objmgr/annot_collector.cpp +++ b/c++/src/objmgr/annot_collector.cpp @@ -1,4 +1,4 @@ -/* $Id: annot_collector.cpp 564208 2018-05-23 13:47:16Z vasilche $ +/* $Id: annot_collector.cpp 579017 2019-01-29 16:46:41Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -3060,6 +3060,7 @@ void CAnnot_Collector::x_SearchRange(const CTSE_Handle& tseh, break; } gfx::timsort(it.second.begin(), it.second.end()); + it.second.erase(unique(it.second.begin(), it.second.end()), it.second.end()); it.first->LoadChunks(it.second); } tse.UpdateAnnotIndex(id); @@ -3571,9 +3572,9 @@ void CAnnot_Collector::x_SearchAll(const CSeq_entry_Info& entry_info) { {{ entry_info.UpdateAnnotIndex(); - CConstRef base = entry_info.m_Contents; + const CBioseq_Base_Info& base = entry_info.x_GetBaseInfo(); // Collect all annotations from the entry - ITERATE( CBioseq_Base_Info::TAnnot, ait, base->GetAnnot() ) { + ITERATE( CBioseq_Base_Info::TAnnot, ait, base.GetAnnot() ) { x_SearchAll(**ait); if ( x_NoMoreObjects() ) return; diff --git a/c++/src/objmgr/bioseq_base_info.cpp b/c++/src/objmgr/bioseq_base_info.cpp index af46c666..fb20d0df 100644 --- a/c++/src/objmgr/bioseq_base_info.cpp +++ b/c++/src/objmgr/bioseq_base_info.cpp @@ -1,4 +1,4 @@ -/* $Id: bioseq_base_info.cpp 511088 2016-08-18 15:44:06Z vasilche $ +/* $Id: bioseq_base_info.cpp 576406 2018-12-14 15:28:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -289,6 +289,25 @@ CRef CBioseq_Base_Info::RemoveSeqdesc(const CSeqdesc& d) } +CRef CBioseq_Base_Info::ReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) +{ + x_Update(fNeedUpdate_descr); + if ( !IsSetDescr() ) { + return CRef(0); + } + TDescr::Tdata& s = x_SetDescr().Set(); + NON_CONST_ITERATE ( TDescr::Tdata, it, s ) { + if ( it->GetPointer() == &old_desc ) { + // Lock the object to prevent destruction + CRef desc_nc = *it; + *it = &new_desc; + return desc_nc; + } + } + return CRef(0); +} + + void CBioseq_Base_Info::AddSeq_descr(const TDescr& v) { TDescr::Tdata& s = x_SetDescr().Set(); diff --git a/c++/src/objmgr/bioseq_handle.cpp b/c++/src/objmgr/bioseq_handle.cpp index 8708c87a..d2324adb 100644 --- a/c++/src/objmgr/bioseq_handle.cpp +++ b/c++/src/objmgr/bioseq_handle.cpp @@ -1,4 +1,4 @@ -/* $Id: bioseq_handle.cpp 571924 2018-10-04 17:41:25Z ivanov $ +/* $Id: bioseq_handle.cpp 576406 2018-12-14 15:28:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -831,6 +831,12 @@ CRef CBioseq_EditHandle::RemoveSeqdesc(const CSeqdesc& d) const } +CRef CBioseq_EditHandle::ReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const +{ + return x_RealReplaceSeqdesc(old_desc, new_desc); +} + + void CBioseq_EditHandle::AddSeq_descr(TDescr& v) const { typedef CAddDescr_EditCommand TCommand; @@ -1037,6 +1043,12 @@ CRef CBioseq_EditHandle::x_RealRemoveSeqdesc(const CSeqdesc& d) const } +CRef CBioseq_EditHandle::x_RealReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const +{ + return x_GetInfo().ReplaceSeqdesc(old_desc, new_desc); +} + + void CBioseq_EditHandle::x_RealAddSeq_descr(TDescr& v) const { x_GetInfo().AddSeq_descr(v); diff --git a/c++/src/objmgr/bioseq_info.cpp b/c++/src/objmgr/bioseq_info.cpp index 5e0af0aa..d552b8e6 100644 --- a/c++/src/objmgr/bioseq_info.cpp +++ b/c++/src/objmgr/bioseq_info.cpp @@ -1,4 +1,4 @@ -/* $Id: bioseq_info.cpp 513580 2016-09-13 11:58:16Z ivanov $ +/* $Id: bioseq_info.cpp 577156 2018-12-31 16:20:57Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -157,7 +157,10 @@ void CBioseq_Info::x_DoUpdate(TNeedUpdateFlags flags) x_LoadChunks(m_Seq_dataChunks); } if ( m_SeqMap ) { - m_SeqMap->x_UpdateSeq_inst(m_Object->SetInst()); + CFastMutexGuard guard(m_SeqMap_Mtx); + if ( m_SeqMap ) { + m_SeqMap->x_UpdateSeq_inst(m_Object->SetInst()); + } } } if ( flags & fNeedUpdate_assembly ) { diff --git a/c++/src/objmgr/bioseq_set_handle.cpp b/c++/src/objmgr/bioseq_set_handle.cpp index 2f4124bf..826351b3 100644 --- a/c++/src/objmgr/bioseq_set_handle.cpp +++ b/c++/src/objmgr/bioseq_set_handle.cpp @@ -1,4 +1,4 @@ -/* $Id: bioseq_set_handle.cpp 571924 2018-10-04 17:41:25Z ivanov $ +/* $Id: bioseq_set_handle.cpp 576406 2018-12-14 15:28:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -546,6 +546,12 @@ CRef CBioseq_set_EditHandle::RemoveSeqdesc(const CSeqdesc& d) const } +CRef CBioseq_set_EditHandle::ReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const +{ + return x_RealReplaceSeqdesc(old_desc, new_desc); +} + + void CBioseq_set_EditHandle::AddSeq_descr(TDescr& v) const { typedef CAddDescr_EditCommand TCommand; @@ -708,6 +714,12 @@ CRef CBioseq_set_EditHandle::x_RealRemoveSeqdesc(const CSeqdesc& d) co } +CRef CBioseq_set_EditHandle::x_RealReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const +{ + return x_GetInfo().ReplaceSeqdesc(old_desc, new_desc); +} + + void CBioseq_set_EditHandle::x_RealAddSeq_descr(TDescr& v) const { x_GetInfo().AddSeq_descr(v); diff --git a/c++/src/objmgr/scope.cpp b/c++/src/objmgr/scope.cpp index 41cce1e3..daafc4e5 100644 --- a/c++/src/objmgr/scope.cpp +++ b/c++/src/objmgr/scope.cpp @@ -1,4 +1,4 @@ -/* $Id: scope.cpp 554030 2017-12-27 16:11:11Z gouriano $ +/* $Id: scope.cpp 576250 2018-12-12 13:23:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -525,6 +525,13 @@ CBioseq_Handle CScope::AddBioseq(const CBioseq& bioseq, } +CSeq_entry_Handle CScope::AddSeq_submit(CSeq_submit& submit, + TPriority priority) +{ + return m_Impl->AddSeq_submit(submit, priority); +} + + CSeq_entry_Handle CScope::AddGC_Assembly(const CGC_Assembly& gc_assembly, CGC_Assembly_Parser::FParserFlags flags, TPriority pri, diff --git a/c++/src/objmgr/scope_impl.cpp b/c++/src/objmgr/scope_impl.cpp index 5d032e67..d44a1182 100644 --- a/c++/src/objmgr/scope_impl.cpp +++ b/c++/src/objmgr/scope_impl.cpp @@ -1,4 +1,4 @@ -/* $Id: scope_impl.cpp 563184 2018-05-03 18:15:18Z vasilche $ +/* $Id: scope_impl.cpp 576250 2018-12-12 13:23:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -288,6 +289,7 @@ CBioseq_Handle CScope_Impl::AddBioseq(CBioseq& bioseq, CRef ds_info = GetEditDS(priority); CRef entry = x_MakeDummyTSE(bioseq); CTSE_Lock tse_lock = ds_info->GetDataSource().AddStaticTSE(*entry); + const_cast(*tse_lock).SetTopLevelObjectType(CTSE_Handle::eTopLevel_Bioseq); x_ClearCacheOnNewData(*tse_lock); return x_GetBioseqHandle(tse_lock->GetSeq(), @@ -319,6 +321,7 @@ CBioseq_Handle CScope_Impl::AddSharedBioseq(const CBioseq& bioseq, CRef ds_info = GetConstDS(priority); CRef entry = x_MakeDummyTSE(const_cast(bioseq)); CTSE_Lock tse_lock = ds_info->GetDataSource().AddStaticTSE(*entry); + const_cast(*tse_lock).SetTopLevelObjectType(CTSE_Handle::eTopLevel_Bioseq); #endif _ASSERT(tse_lock->IsSeq() && tse_lock->GetSeq().GetBioseqCore() == &bioseq); @@ -347,6 +350,7 @@ CSeq_annot_Handle CScope_Impl::AddSeq_annot(CSeq_annot& annot, CRef ds_info = GetEditDS(priority); CRef entry = x_MakeDummyTSE(annot); CTSE_Lock tse_lock = ds_info->GetDataSource().AddStaticTSE(*entry); + const_cast(*tse_lock).SetTopLevelObjectType(CTSE_Handle::eTopLevel_Seq_annot); x_ClearCacheOnNewAnnot(*tse_lock); return CSeq_annot_Handle(*tse_lock->GetSet().GetAnnot()[0], @@ -378,6 +382,7 @@ CSeq_annot_Handle CScope_Impl::AddSharedSeq_annot(const CSeq_annot& annot, CRef ds_info = GetConstDS(priority); CRef entry = x_MakeDummyTSE(const_cast(annot)); CTSE_Lock tse_lock = ds_info->GetDataSource().AddStaticTSE(*entry); + const_cast(*tse_lock).SetTopLevelObjectType(CTSE_Handle::eTopLevel_Seq_annot); #endif _ASSERT(tse_lock->IsSet() && tse_lock->GetSet().IsSetAnnot() && @@ -389,6 +394,21 @@ CSeq_annot_Handle CScope_Impl::AddSharedSeq_annot(const CSeq_annot& annot, } +CSeq_entry_Handle CScope_Impl::AddSeq_submit(CSeq_submit& submit, + TPriority priority) +{ + TConfWriteLockGuard guard(m_ConfLock); + + CRef ds_info = GetEditDS(priority); + CRef entry = x_MakeDummyTSE(submit); + CTSE_Lock tse_lock = ds_info->GetDataSource().AddStaticTSE(*entry); + const_cast(*tse_lock).SetTopLevelObject(CTSE_Handle::eTopLevel_Seq_submit, &submit); + x_ClearCacheOnNewAnnot(*tse_lock); + + return CSeq_entry_Handle(*tse_lock, *ds_info->GetTSE_Lock(tse_lock)); +} + + namespace { class CClearCacheOnRemoveGuard { @@ -1529,6 +1549,24 @@ CRef CScope_Impl::x_MakeDummyTSE(CSeq_annot& annot) const } +CRef CScope_Impl::x_MakeDummyTSE(CSeq_submit& submit) const +{ + CRef entry(new CSeq_entry); + entry->SetSet().SetSeq_set(); // it's not optional + switch ( submit.GetData().Which() ) { + case CSeq_submit::TData::e_Entrys: + entry->SetSet().SetSeq_set() = submit.GetData().GetEntrys(); + break; + case CSeq_submit::TData::e_Annots: + entry->SetSet().SetAnnot() = submit.GetData().GetAnnots(); + break; + default: // no data to add + break; + } + return entry; +} + + bool CScope_Impl::x_IsDummyTSE(const CTSE_Info& tse, const CBioseq_Info& seq) const { diff --git a/c++/src/objmgr/seq_entry_handle.cpp b/c++/src/objmgr/seq_entry_handle.cpp index 9a413f63..86d1f006 100644 --- a/c++/src/objmgr/seq_entry_handle.cpp +++ b/c++/src/objmgr/seq_entry_handle.cpp @@ -1,4 +1,4 @@ -/* $Id: seq_entry_handle.cpp 194592 2010-06-15 18:54:05Z vasilche $ +/* $Id: seq_entry_handle.cpp 576406 2018-12-14 15:28:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -126,12 +126,36 @@ CSeq_entry_Handle CSeq_entry_Handle::GetParentEntry(void) const } +bool CSeq_entry_Handle::IsTopLevelEntry(void) const +{ + return !x_GetInfo().HasParent_Info(); +} + + CSeq_entry_Handle CSeq_entry_Handle::GetTopLevelEntry(void) const { return GetTSE_Handle(); } +bool CSeq_entry_Handle::IsTopLevelSeq_submit(void) const +{ + return IsTopLevelEntry() && GetTSE_Handle().IsTopLevelSeq_submit(); +} + + +const CSeq_submit& CSeq_entry_Handle::GetTopLevelSeq_submit() const +{ + return GetTSE_Handle().GetTopLevelSeq_submit(); +} + + +const CSubmit_block& CSeq_entry_Handle::GetTopLevelSubmit_block() const +{ + return GetTSE_Handle().GetTopLevelSubmit_block(); +} + + CBioseq_Handle CSeq_entry_Handle::GetBioseqHandle(const CSeq_id& id) const { return GetTSE_Handle().GetBioseqHandle(id); @@ -244,6 +268,18 @@ CSeq_entry_EditHandle::CSeq_entry_EditHandle(CSeq_entry_Info& info, } +CSubmit_block& CSeq_entry_EditHandle::SetTopLevelSubmit_block(void) const +{ + return GetTSE_Handle().SetTopLevelSubmit_block(); +} + + +void CSeq_entry_EditHandle::SetTopLevelSubmit_block(CSubmit_block& sub) const +{ + GetTSE_Handle().SetTopLevelSubmit_block(sub); +} + + CSeq_entry_EditHandle CSeq_entry_EditHandle::GetParentEntry(void) const { CSeq_entry_EditHandle ret; @@ -348,6 +384,11 @@ CRef CSeq_entry_EditHandle::RemoveSeqdesc(const CSeqdesc& v) const } +CRef CSeq_entry_EditHandle::ReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const +{ + return x_RealReplaceSeqdesc(old_desc, new_desc); +} + CBioseq_EditHandle CSeq_entry_EditHandle::AttachBioseq(CBioseq& seq, int index) const @@ -708,6 +749,12 @@ CRef CSeq_entry_EditHandle::x_RealRemoveSeqdesc(const CSeqdesc& v) con } +CRef CSeq_entry_EditHandle::x_RealReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) const +{ + return x_GetInfo().ReplaceSeqdesc(old_desc, new_desc); +} + + void CSeq_entry_EditHandle::x_RealAddSeq_descr(TDescr& v) const { x_GetInfo().AddSeq_descr(v); diff --git a/c++/src/objmgr/seq_entry_info.cpp b/c++/src/objmgr/seq_entry_info.cpp index 480c8b63..e03d52fa 100644 --- a/c++/src/objmgr/seq_entry_info.cpp +++ b/c++/src/objmgr/seq_entry_info.cpp @@ -1,4 +1,4 @@ -/* $Id: seq_entry_info.cpp 517902 2016-10-28 16:56:25Z vasilche $ +/* $Id: seq_entry_info.cpp 576406 2018-12-14 15:28:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -535,6 +535,13 @@ CRef CSeq_entry_Info::RemoveSeqdesc(const CSeqdesc& d) return m_Contents->RemoveSeqdesc(d); } + +CRef CSeq_entry_Info::ReplaceSeqdesc(const CSeqdesc& old_desc, CSeqdesc& new_desc) +{ + x_Update(fNeedUpdate_descr); + return m_Contents->ReplaceSeqdesc(old_desc, new_desc); +} + /* void CSeq_entry_Info::AddDescr(CSeq_entry_Info& src) { diff --git a/c++/src/objmgr/tse_handle.cpp b/c++/src/objmgr/tse_handle.cpp index 2bee7806..dd29d25a 100644 --- a/c++/src/objmgr/tse_handle.cpp +++ b/c++/src/objmgr/tse_handle.cpp @@ -1,4 +1,4 @@ -/* $Id: tse_handle.cpp 544387 2017-08-22 19:28:06Z vasilche $ +/* $Id: tse_handle.cpp 576250 2018-12-12 13:23:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -38,6 +38,7 @@ #include #include #include +#include BEGIN_NCBI_SCOPE BEGIN_SCOPE(objects) @@ -231,6 +232,50 @@ bool CTSE_Handle::CanBeEdited(void) const } +CTSE_Handle::ETopLevelObjectType CTSE_Handle::GetTopLevelObjectType() const +{ + return x_GetTSE_Info().GetTopLevelObjectType(); +} + + +const CSeq_submit& CTSE_Handle::GetTopLevelSeq_submit() const +{ + return x_GetTSE_Info().GetTopLevelSeq_submit(); +} + + +bool CTSE_Handle::IsTopLevelSeq_submit() const +{ + return x_GetTSE_Info().IsTopLevelSeq_submit(); +} + + +const CSubmit_block& CTSE_Handle::GetTopLevelSubmit_block() const +{ + return x_GetTSE_Info().GetTopLevelSubmit_block(); +} + + +CSubmit_block& CTSE_Handle::SetTopLevelSubmit_block() const +{ + if ( !CanBeEdited() ) { + NCBI_THROW(CObjMgrException, eModifyDataError, + "CTSE_Handle::SetTopLevelSubmit_block: entry cannot be edited"); + } + return x_GetTSE_Info().SetTopLevelSubmit_block(); +} + + +void CTSE_Handle::SetTopLevelSubmit_block(CSubmit_block& sub) const +{ + if ( !CanBeEdited() ) { + NCBI_THROW(CObjMgrException, eModifyDataError, + "CTSE_Handle::SetTopLevelSubmit_block: entry cannot be edited"); + } + return x_GetTSE_Info().SetTopLevelSubmit_block(sub); +} + + //////////////////////////////////////////////////////////////////////////// // CHandleInfo_Base //////////////////////////////////////////////////////////////////////////// diff --git a/c++/src/objmgr/tse_info.cpp b/c++/src/objmgr/tse_info.cpp index cdfae76d..3e1c9e93 100644 --- a/c++/src/objmgr/tse_info.cpp +++ b/c++/src/objmgr/tse_info.cpp @@ -1,4 +1,4 @@ -/* $Id: tse_info.cpp 544387 2017-08-22 19:28:06Z vasilche $ +/* $Id: tse_info.cpp 576250 2018-12-12 13:23:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -47,6 +47,7 @@ #include #include +#include #include #include @@ -190,6 +191,7 @@ CTSE_Info::CTSE_Info(const CTSE_Lock& tse) x_Initialize(); m_BlobState = tse->m_BlobState; + m_TopLevelObjectType = tse->m_TopLevelObjectType; m_Name = tse->m_Name; m_UsedMemory = tse->m_UsedMemory; m_LoadState = eLoaded; @@ -231,6 +233,7 @@ CTSE_Info& CTSE_Info::Assign(const CTSE_Lock& tse) { // m_BaseTSE.reset(new SBaseTSE(tse)); m_BlobState = tse->m_BlobState; + m_TopLevelObjectType = tse->m_TopLevelObjectType; m_Name = tse->m_Name; m_UsedMemory = tse->m_UsedMemory; @@ -254,6 +257,7 @@ CTSE_Info& CTSE_Info::Assign(const CTSE_Lock& tse, // CRef listener) { m_BlobState = tse->m_BlobState; + m_TopLevelObjectType = tse->m_TopLevelObjectType; m_Name = tse->m_Name; m_UsedMemory = tse->m_UsedMemory; @@ -287,6 +291,7 @@ void CTSE_Info::x_Initialize(void) m_DataSource = 0; m_BlobVersion = -1; m_BlobState = CBioseq_Handle::fState_none; + m_TopLevelObjectType = CTSE_Handle::eTopLevel_Seq_entry; m_UsedMemory = 0; m_LoadState = eNotLoaded; m_CacheState = eNotInCache; @@ -1988,6 +1993,101 @@ string CTSE_Info::GetDescription(void) const } +CTSE_Info::ETopLevelObjectType CTSE_Info::GetTopLevelObjectType(void) const +{ + return m_TopLevelObjectType; +} + + +const CSerialObject* CTSE_Info::GetTopLevelObjectPtr(void) const +{ + return m_TopLevelObjectPtr.GetPointerOrNull(); +} + + +void CTSE_Info::SetTopLevelObject(ETopLevelObjectType type, CSerialObject* ptr) +{ + m_TopLevelObjectType = type; + m_TopLevelObjectPtr = ptr; +} + + +bool CTSE_Info::IsTopLevelSeq_submit() const +{ + return GetTopLevelObjectType() == CTSE_Handle::eTopLevel_Seq_submit; +} + + +CSeq_submit& CTSE_Info::x_GetTopLevelSeq_submit() const +{ + if ( !IsTopLevelSeq_submit() ) { + NCBI_THROW(CObjMgrException, eInvalidHandle, + "CTSE_Handle::GetTopLevelSeq_submit: " + "Top level object is not Seq-submit"); + } + CSeq_submit* submit = dynamic_cast(m_TopLevelObjectPtr.GetNCPointerOrNull()); + if ( !submit ) { + NCBI_THROW(CObjMgrException, eInvalidHandle, + "CTSE_Handle::GetTopLevelSeq_submit: " + "Top level object is not Seq-submit"); + } + return *submit; +} + + +const CSeq_submit& CTSE_Info::GetTopLevelSeq_submit() const +{ + CSeq_submit& submit = x_GetTopLevelSeq_submit(); + if ( IsSet() ) { + const TSet& set = GetSet(); + // update entry/annot lists + if ( set.IsSetSeq_set() && !set.GetSeq_set().empty() ) { + submit.SetData().SetEntrys() = set.GetBioseq_setCore()->GetSeq_set(); + } + else if ( set.IsSetAnnot() && !set.GetAnnot().empty() ) { + submit.SetData().SetAnnots() = set.GetBioseq_setCore()->GetAnnot(); + } + else { + switch ( submit.GetData().Which() ) { + case CSeq_submit::TData::e_Entrys: + submit.SetData().SetEntrys().clear(); + break; + case CSeq_submit::TData::e_Annots: + submit.SetData().SetAnnots().clear(); + break; + default: + break; + } + } + } + return submit; +} + + +const CSubmit_block& CTSE_Info::GetTopLevelSubmit_block() const +{ + return x_GetTopLevelSeq_submit().GetSub(); +} + + +CSubmit_block& CTSE_Info::SetTopLevelSubmit_block() const +{ + return x_GetTopLevelSeq_submit().SetSub(); +} + + +void CTSE_Info::SetTopLevelSubmit_block(CSubmit_block& sub) const +{ + x_GetTopLevelSeq_submit().SetSub(sub); +} + + +void CTSE_Info::SetTopLevelObjectType(ETopLevelObjectType type) +{ + SetTopLevelObject(type, 0); +} + + CTSE_SetObjectInfo::CTSE_SetObjectInfo(void) { } diff --git a/c++/src/objmgr/util/autodef.cpp b/c++/src/objmgr/util/autodef.cpp index 2206b8cc..893cc569 100644 --- a/c++/src/objmgr/util/autodef.cpp +++ b/c++/src/objmgr/util/autodef.cpp @@ -1,4 +1,4 @@ -/* $Id: autodef.cpp 572646 2018-10-17 16:58:07Z ivanov $ +/* $Id: autodef.cpp 578992 2019-01-29 13:03:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -196,9 +196,8 @@ struct SAutoDefModifierComboSort { -CAutoDefModifierCombo * CAutoDef::FindBestModifierCombo() +CRef CAutoDef::FindBestModifierCombo() { - CAutoDefModifierCombo *best = NULL; TModifierComboVector combo_list; combo_list.clear(); @@ -207,7 +206,7 @@ CAutoDefModifierCombo * CAutoDef::FindBestModifierCombo() TModifierComboVector tmp, add_list; TModifierComboVector::iterator it; - CAutoDefSourceDescription::TModifierVector mod_list, mods_to_try; + CAutoDefSourceDescription::TModifierVector mod_list; bool stop = false; unsigned int k; @@ -230,7 +229,7 @@ CAutoDefModifierCombo * CAutoDef::FindBestModifierCombo() } it = combo_list.erase (it); } else { - it++; + ++it; } tmp.clear(); } @@ -248,7 +247,7 @@ CAutoDefModifierCombo * CAutoDef::FindBestModifierCombo() mod_list.push_back (CAutoDefSourceModifierInfo(*it)); } - return combo_list[0].Release(); + return combo_list[0]; } @@ -291,9 +290,9 @@ CAutoDefModifierCombo* CAutoDef::GetEmptyCombo() } -string CAutoDef::GetOneSourceDescription(CBioseq_Handle bh) +string CAutoDef::GetOneSourceDescription(const CBioseq_Handle& bh) { - CAutoDefModifierCombo *best = FindBestModifierCombo(); + CRef best = FindBestModifierCombo(); if (best == NULL) { return ""; } @@ -306,131 +305,7 @@ string CAutoDef::GetOneSourceDescription(CBioseq_Handle bh) } -// Some misc_RNA clauses have a comment that actually lists multiple -// features. These functions create a clause for each element in the -// comment. - -bool CAutoDef::x_AddMiscRNAFeatures(CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, CAutoDefFeatureClause_Base &main_clause) -{ - string comment = ""; - string::size_type pos; - - if (cf.GetData().Which() == CSeqFeatData::e_Rna) { - comment = cf.GetNamedQual("product"); - if (NStr::IsBlank(comment) - && cf.IsSetData() - && cf.GetData().IsRna() - && cf.GetData().GetRna().IsSetExt()) { - if (cf.GetData().GetRna().GetExt().IsName()) { - comment = cf.GetData().GetRna().GetExt().GetName(); - } else if (cf.GetData().GetRna().GetExt().IsGen() - && cf.GetData().GetRna().GetExt().GetGen().IsSetProduct()) { - comment = cf.GetData().GetRna().GetExt().GetGen().GetProduct(); - } - } - } - - if ((NStr::Equal (comment, "misc_RNA") || NStr::IsBlank (comment)) && cf.CanGetComment()) { - comment = cf.GetComment(); - } - if (NStr::IsBlank(comment)) { - return false; - } - - pos = NStr::Find(comment, "spacer"); - if (pos == NCBI_NS_STD::string::npos) { - return false; - } - - bool is_region = false; - - if (NStr::StartsWith (comment, "contains ")) { - comment = comment.substr(9); - } else if (NStr::StartsWith (comment, "may contain ")) { - comment = comment.substr(12); - is_region = true; - } - - pos = NStr::Find(comment, ";"); - if (pos != string::npos) { - comment = comment.substr(0, pos); - } - - if (is_region) { - main_clause.AddSubclause(new CAutoDefParsedRegionClause(bh, cf, mapped_loc, comment)); - } else { - vector elements = CAutoDefFeatureClause_Base::GetMiscRNAElements(comment); - if (!elements.empty()) { - ITERATE(vector, s, elements) { - CAutoDefParsedClause *new_clause = new CAutoDefParsedClause(bh, cf, mapped_loc, - (*s == elements.front()), (*s == elements.back())); - new_clause->SetMiscRNAWord(*s); - main_clause.AddSubclause(new_clause); - } - } else { - elements = CAutoDefFeatureClause_Base::GetTrnaIntergenicSpacerClausePhrases(comment); - if (!elements.empty()) { - ITERATE(vector, s, elements) { - size_t pos = NStr::Find(*s, "intergenic spacer"); - if (pos != string::npos) { - CAutoDefParsedIntergenicSpacerClause *spacer = - new CAutoDefParsedIntergenicSpacerClause(bh, - cf, - mapped_loc, - (*s), - (*s == elements.front()), - (*s == elements.back())); - main_clause.AddSubclause(spacer); - } else { - CAutoDefFeatureClause *gene = s_tRNAClauseFromNote(bh, cf, mapped_loc, *s, (*s == elements.front()), (*s == elements.back())); - main_clause.AddSubclause(gene); - } - } - } else { - CAutoDefParsedIntergenicSpacerClause *spacer = - new CAutoDefParsedIntergenicSpacerClause(bh, - cf, - mapped_loc, - comment, - true, - true); - main_clause.AddSubclause(spacer); - } - } - } - return true; -} - - - - - -bool CAutoDef::x_AddtRNAAndOther(CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, CAutoDefFeatureClause_Base &main_clause) -{ - if (cf.GetData().GetSubtype() != CSeqFeatData::eSubtype_misc_feature || - !cf.IsSetComment()) { - return false; - } - - vector phrases = CAutoDefFeatureClause_Base::GetFeatureClausePhrases(cf.GetComment()); - if (phrases.size() < 2) { - return false; - } - - bool first = true; - string last = phrases.back(); - phrases.pop_back(); - ITERATE(vector, it, phrases) { - main_clause.AddSubclause(CAutoDefFeatureClause_Base::ClauseFromPhrase(*it, bh, cf, mapped_loc, first, false)); - first = false; - } - main_clause.AddSubclause(CAutoDefFeatureClause_Base::ClauseFromPhrase(last, bh, cf, mapped_loc, first, true)); - - return true; -} - - -void CAutoDef::x_RemoveOptionalFeatures(CAutoDefFeatureClause_Base *main_clause, CBioseq_Handle bh) +void CAutoDef::x_RemoveOptionalFeatures(CAutoDefFeatureClause_Base *main_clause, const CBioseq_Handle& bh) { // remove optional features that have not been requested if (main_clause == NULL) { @@ -508,7 +383,7 @@ bool CAutoDef::x_IsFeatureSuppressed(CSeqFeatData::ESubtype subtype) } -void CAutoDef::SuppressFeature(objects::CFeatListItem feat) +void CAutoDef::SuppressFeature(const objects::CFeatListItem& feat) { if (feat.GetType() == CSeqFeatData::e_not_set) { m_Options.SuppressAllFeatures(); @@ -524,7 +399,7 @@ void CAutoDef::SuppressFeature(objects::CSeqFeatData::ESubtype subtype) } -bool CAutoDef::IsSegment(CBioseq_Handle bh) +bool CAutoDef::IsSegment(const CBioseq_Handle& bh) { CSeq_entry_Handle seh = bh.GetParentEntry(); @@ -654,7 +529,7 @@ bool s_HasPromoter(CBioseq_Handle bh) } -string CAutoDef::x_GetFeatureClauses(CBioseq_Handle bh) +string CAutoDef::x_GetFeatureClauses(const CBioseq_Handle& bh) { const string& custom = m_Options.GetCustomFeatureClause(); if (!NStr::IsBlank(custom)) { @@ -671,7 +546,6 @@ string CAutoDef::x_GetFeatureClauses(CBioseq_Handle bh) CAutoDefFeatureClause_Base main_clause; - CAutoDefFeatureClause *new_clause; CRange range; CBioseq_Handle master_bh = bh; @@ -690,9 +564,9 @@ string CAutoDef::x_GetFeatureClauses(CBioseq_Handle bh) fake_promoter->SetLocation(*fake_promoter_loc); - main_clause.AddSubclause (new CAutoDefFakePromoterClause (master_bh, + main_clause.AddSubclause (CRef(new CAutoDefFakePromoterClause (master_bh, *fake_promoter, - *fake_promoter_loc)); + *fake_promoter_loc))); } // now create clauses for real features @@ -706,75 +580,21 @@ string CAutoDef::x_GetFeatureClauses(CBioseq_Handle bh) while (feat_ci) { - const CSeq_feat& cf = feat_ci->GetOriginalFeature(); - const CSeq_loc& mapped_loc = feat_ci->GetMappedFeature().GetLocation(); - unsigned int subtype = cf.GetData().GetSubtype(); - unsigned int stop = mapped_loc.GetStop(eExtreme_Positional); - new_clause = NULL; - // unless it's a gene, don't use it unless it ends in the sequence we're looking at - if ((subtype == CSeqFeatData::eSubtype_gene - || subtype == CSeqFeatData::eSubtype_mRNA - || subtype == CSeqFeatData::eSubtype_cdregion - || (stop >= range.GetFrom() && stop <= range.GetTo())) - && !x_IsFeatureSuppressed(cf.GetData().GetSubtype())) { - - // some clauses can be created differently just knowing the subtype - if (subtype == CSeqFeatData::eSubtype_gene) { - new_clause = new CAutoDefGeneClause(bh, cf, mapped_loc, m_Options.GetSuppressLocusTags()); - } else if (subtype == CSeqFeatData::eSubtype_ncRNA) { - new_clause = new CAutoDefNcRNAClause(bh, cf, mapped_loc, m_Options.GetUseNcRNAComment()); - } else if (subtype == CSeqFeatData::eSubtype_mobile_element) { - new_clause = new CAutoDefMobileElementClause(bh, cf, mapped_loc); - } else if (CAutoDefFeatureClause::IsSatellite(cf)) { - new_clause = new CAutoDefSatelliteClause(bh, cf, mapped_loc); - } else if (subtype == CSeqFeatData::eSubtype_otherRNA - || subtype == CSeqFeatData::eSubtype_misc_RNA - || subtype == CSeqFeatData::eSubtype_rRNA) { - if (!x_AddMiscRNAFeatures(bh, cf, mapped_loc, main_clause)) { - new_clause = new CAutoDefFeatureClause(bh, cf, mapped_loc); + vector > fclause = FeatureClauseFactory(bh, feat_ci->GetOriginalFeature(), feat_ci->GetMappedFeature().GetLocation(), m_Options, is_single_misc_feat); + for (auto it : fclause) { + if (it && + (it->IsRecognizedFeature() || + (m_Options.GetKeepRepeatRegion() && + (it->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_repeat_region || + it->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_LTR)))) { + if (it->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_exon || + it->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_intron) { + it->Label(m_Options.GetSuppressAlleles()); } - } else if (CAutoDefFeatureClause::IsPromoter(cf)) { - new_clause = new CAutoDefPromoterClause(bh, cf, mapped_loc); - } else if (CAutoDefFeatureClause::IsGeneCluster(cf)) { - new_clause = new CAutoDefGeneClusterClause(bh, cf, mapped_loc); - } else if (CAutoDefFeatureClause::IsControlRegion(cf)) { - new_clause = new CAutoDefFeatureClause(bh, cf, mapped_loc); - } else if ((subtype == CSeqFeatData::eSubtype_misc_feature || subtype == CSeqFeatData::eSubtype_otherRNA) && - (x_AddMiscRNAFeatures(bh, cf, mapped_loc, main_clause) || x_AddtRNAAndOther(bh, cf, mapped_loc, main_clause))) { - // special misc_features and misc_RNA features - } else if (subtype == CSeqFeatData::eSubtype_misc_feature) { - // some misc-features may require more parsing - new_clause = new CAutoDefFeatureClause(bh, cf, mapped_loc); - if (!is_single_misc_feat && - (m_Options.GetMiscFeatRule() == CAutoDefOptions::eDelete - || (m_Options.GetMiscFeatRule() == CAutoDefOptions::eNoncodingProductFeat && !new_clause->IsNoncodingProductFeat()))) { - delete new_clause; - new_clause = NULL; - } else if (m_Options.GetMiscFeatRule() == CAutoDefOptions::eCommentFeat) { - delete new_clause; - new_clause = NULL; - if (cf.CanGetComment() && !NStr::IsBlank(cf.GetComment())) { - new_clause = new CAutoDefMiscCommentClause(bh, cf, mapped_loc); - } - } - } else { - new_clause = new CAutoDefFeatureClause(bh, cf, mapped_loc); - } - - if (new_clause != NULL && - (new_clause->IsRecognizedFeature() || - (m_Options.GetKeepRepeatRegion() && - (new_clause->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_repeat_region || - new_clause->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_LTR)))) { - if (new_clause->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_exon || - new_clause->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_intron) { - new_clause->Label(m_Options.GetSuppressAlleles()); - } - main_clause.AddSubclause(new_clause); - } else if (new_clause != NULL) { - delete new_clause; + main_clause.AddSubclause(it); } } + ++feat_ci; } @@ -1166,7 +986,7 @@ string CAutoDef::GetKeywordPrefix(CBioseq_Handle bh) } -string CAutoDef::GetOneDefLine(CAutoDefModifierCombo *mod_combo, CBioseq_Handle bh) +string CAutoDef::GetOneDefLine(CAutoDefModifierCombo *mod_combo, const CBioseq_Handle& bh) { // for protein sequences, use sequence::GetTitle if (bh.CanGetInst() && bh.GetInst().CanGetMol() && bh.GetInst().GetMol() == CSeq_inst::eMol_aa) { @@ -1203,7 +1023,7 @@ string CAutoDef::GetOneDefLine(CAutoDefModifierCombo *mod_combo, CBioseq_Handle // use internal settings to create mod combo -string CAutoDef::GetOneDefLine(CBioseq_Handle bh) +string CAutoDef::GetOneDefLine(const CBioseq_Handle& bh) { // for protein sequences, use sequence::GetTitle if (bh.CanGetInst() && bh.GetInst().CanGetMol() && bh.GetInst().GetMol() == CSeq_inst::eMol_aa) { @@ -1447,7 +1267,7 @@ CRef CAutoDef::CreateIDOptions(CSeq_entry_Handle seh) CAutoDef ad; ad.AddSources(seh); - CAutoDefModifierCombo* src_combo = ad.FindBestModifierCombo(); + CRef src_combo = ad.FindBestModifierCombo(); CAutoDefSourceDescription::TAvailableModifierVector modifiers; src_combo->GetAvailableModifiers(modifiers); diff --git a/c++/src/objmgr/util/autodef_feature_clause.cpp b/c++/src/objmgr/util/autodef_feature_clause.cpp index a827e17a..2afec2f1 100644 --- a/c++/src/objmgr/util/autodef_feature_clause.cpp +++ b/c++/src/objmgr/util/autodef_feature_clause.cpp @@ -1,4 +1,4 @@ -/* $Id: autodef_feature_clause.cpp 556773 2018-02-05 14:58:19Z bollin $ +/* $Id: autodef_feature_clause.cpp 580319 2019-02-13 13:46:55Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -31,6 +31,7 @@ */ #include +#include #include #include #include @@ -694,13 +695,29 @@ bool CAutoDefFeatureClause::x_GetProductName(string &product_name) } else { string label; - if (m_MainFeat.CanGetProduct()) { - CConstRef prot = GetBestOverlappingFeat(m_MainFeat.GetProduct(), - CSeqFeatData::e_Prot, - eOverlap_Simple, - m_BH.GetScope()); - if (prot) { - feature::GetLabel(*prot, &label, feature::fFGL_Content); + if (subtype == CSeqFeatData::eSubtype_cdregion && m_MainFeat.IsSetProduct()) { + const CSeq_loc& product_loc = m_MainFeat.GetProduct(); + CBioseq_Handle prot_h = m_BH.GetScope().GetBioseqHandle(product_loc); + if (prot_h) { + CFeat_CI prot_f(prot_h, CSeqFeatData::eSubtype_prot); + if (prot_f) { + feature::GetLabel(*(prot_f->GetSeq_feat()), &label, feature::fFGL_Content); + if (m_MainFeat.IsSetPartial() && m_MainFeat.GetPartial()) { + CFeat_CI mat_pi(prot_h, CSeqFeatData::eSubtype_mat_peptide_aa); + if (mat_pi && mat_pi->GetData().GetProt().IsSetName()) { + const string& m_name = mat_pi->GetData().GetProt().GetName().front(); + ++mat_pi; + if (!mat_pi && !m_name.empty()) { + if (label.empty()) { + label = m_name; + } + else { + label += ", " + m_name + " region,"; + } + } + } + } + } } } @@ -735,8 +752,6 @@ bool CAutoDefFeatureClause::x_GetProductName(string &product_name) bool CAutoDefFeatureClause::x_GetExonDescription(string &description) { - CSeqFeatData::ESubtype subtype = m_MainFeat.GetData().GetSubtype(); - if (m_MainFeat.IsSetQual()) { ITERATE(CSeq_feat::TQual, it, m_MainFeat.GetQual()) { if ((*it)->IsSetQual() && (*it)->IsSetVal() @@ -911,8 +926,8 @@ void CAutoDefFeatureClause::x_GetOperonSubfeatures(string &interval) { bool has_promoter = false; - ITERATE(TClauseList, it, m_ClauseList) { - if ((*it)->IsPromoter()) { + for (auto it : m_ClauseList) { + if (it->IsPromoter()) { has_promoter = true; break; } @@ -931,9 +946,7 @@ void CAutoDefFeatureClause::x_GetOperonSubfeatures(string &interval) * last two items if the feature is a CDS. */ bool CAutoDefFeatureClause::x_GetGenericInterval (string &interval, bool suppress_allele) -{ - unsigned int k; - +{ interval = ""; if (m_IsUnknown) { return false; @@ -958,7 +971,7 @@ bool CAutoDefFeatureClause::x_GetGenericInterval (string &interval, bool suppres return false; } - CAutoDefFeatureClause_Base *utr3 = NULL; + CRef utr3; if (subtype == CSeqFeatData::eSubtype_operon) { // suppress subclauses except promoters @@ -966,15 +979,19 @@ bool CAutoDefFeatureClause::x_GetGenericInterval (string &interval, bool suppres } else if (!m_SuppressSubfeatures) { // label subclauses // check to see if 3'UTR is present, and whether there are any other features - for (k = 0; k < m_ClauseList.size(); k++) { - m_ClauseList[k]->Label(suppress_allele); - if (m_ClauseList[k]->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_3UTR && subtype == CSeqFeatData::eSubtype_cdregion) { - utr3 = m_ClauseList[k]; - for (unsigned int j = k + 1; j < m_ClauseList.size(); j++) { - m_ClauseList[j-1] = m_ClauseList[j]; + auto it = m_ClauseList.begin(); + while (it != m_ClauseList.end()) { + if (*it) { + (*it)->Label(suppress_allele); + if ((*it)->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_3UTR && subtype == CSeqFeatData::eSubtype_cdregion) { + utr3 = *it; + it = m_ClauseList.erase(it); + } + else { + ++it; } - m_ClauseList[m_ClauseList.size() - 1] = NULL; - m_ClauseList.pop_back(); + } else { + it = m_ClauseList.erase(it); } } @@ -1423,18 +1440,7 @@ void CAutoDefFeatureClause::ReverseCDSClauseLists() ENa_strand this_strand = m_ClauseLocation->GetStrand(); if (this_strand == eNa_strand_minus && GetMainFeatureSubtype() == CSeqFeatData::eSubtype_cdregion) { - TClauseList tmp; - tmp.clear(); - for (size_t k = m_ClauseList.size(); k > 0; k--) { - tmp.push_back(m_ClauseList[k - 1]); - m_ClauseList[k - 1] = NULL; - } - m_ClauseList.clear(); - for (size_t k = 0; k < tmp.size(); k++) { - m_ClauseList.push_back(tmp[k]); - tmp[k] = NULL; - } - tmp.clear(); + std::reverse(m_ClauseList.begin(), m_ClauseList.end()); } for (unsigned int k = 0; k < m_ClauseList.size(); k++) { @@ -2159,5 +2165,208 @@ CAutoDefFeatureClause::EClauseType CAutoDefFeatureClause::GetClauseType() } +// Some misc_RNA clauses have a comment that actually lists multiple +// features. These functions create a clause for each element in the +// comment. + +vector > AddMiscRNAFeatures(const CBioseq_Handle& bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc) +{ + vector > rval; + string comment; + string::size_type pos; + + if (cf.GetData().Which() == CSeqFeatData::e_Rna) { + comment = cf.GetNamedQual("product"); + if (NStr::IsBlank(comment) + && cf.IsSetData() + && cf.GetData().IsRna() + && cf.GetData().GetRna().IsSetExt()) { + if (cf.GetData().GetRna().GetExt().IsName()) { + comment = cf.GetData().GetRna().GetExt().GetName(); + } + else if (cf.GetData().GetRna().GetExt().IsGen() + && cf.GetData().GetRna().GetExt().GetGen().IsSetProduct()) { + comment = cf.GetData().GetRna().GetExt().GetGen().GetProduct(); + } + } + } + + if ((NStr::Equal(comment, "misc_RNA") || NStr::IsBlank(comment)) && cf.CanGetComment()) { + comment = cf.GetComment(); + } + if (NStr::IsBlank(comment)) { + return rval; + } + + pos = NStr::Find(comment, "spacer"); + if (pos == NPOS) { + return rval; + } + + bool is_region = false; + + NStr::TrimPrefixInPlace(comment, "contains "); + if (NStr::StartsWith(comment, "may contain ")) { + NStr::TrimPrefixInPlace(comment, "may contain "); + is_region = true; + } + + pos = NStr::Find(comment, ";"); + if (pos != string::npos) { + comment = comment.substr(0, pos); + } + + if (is_region) { + rval.push_back(CRef(new CAutoDefParsedRegionClause(bh, cf, mapped_loc, comment))); + } else { + vector elements = CAutoDefFeatureClause::GetMiscRNAElements(comment); + if (!elements.empty()) { + for (auto s : elements) { + CRef new_clause(new CAutoDefParsedClause(bh, cf, mapped_loc, + (s == elements.front()), (s == elements.back()))); + new_clause->SetMiscRNAWord(s); + rval.push_back(new_clause); + } + } else { + elements = CAutoDefFeatureClause::GetTrnaIntergenicSpacerClausePhrases(comment); + if (!elements.empty()) { + for (auto s : elements) { + size_t pos = NStr::Find(s, "intergenic spacer"); + if (pos != string::npos) { + rval.push_back(CRef(new CAutoDefParsedIntergenicSpacerClause(bh, + cf, + mapped_loc, + (s), + (s == elements.front()), + (s == elements.back())))); + } else { + rval.push_back(CRef(s_tRNAClauseFromNote(bh, cf, mapped_loc, s, (s == elements.front()), (s == elements.back())))); + } + } + } else { + rval.push_back(CRef(new CAutoDefParsedIntergenicSpacerClause(bh, + cf, + mapped_loc, + comment, + true, + true))); + } + } + } + return rval; +} + + +vector > AddtRNAAndOther(const CBioseq_Handle& bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc) +{ + vector > rval; + if (cf.GetData().GetSubtype() != CSeqFeatData::eSubtype_misc_feature || + !cf.IsSetComment()) { + return rval; + } + + vector phrases = CAutoDefFeatureClause_Base::GetFeatureClausePhrases(cf.GetComment()); + if (phrases.size() < 2) { + return rval; + } + + bool first = true; + string last = phrases.back(); + phrases.pop_back(); + ITERATE(vector, it, phrases) { + rval.push_back(CRef(CAutoDefFeatureClause_Base::ClauseFromPhrase(*it, bh, cf, mapped_loc, first, false))); + first = false; + } + rval.push_back(CRef(CAutoDefFeatureClause_Base::ClauseFromPhrase(last, bh, cf, mapped_loc, first, true))); + + return rval; +} + + +vector > FeatureClauseFactory(CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, const CAutoDefOptions& opts, bool is_single_misc_feat) +{ + vector > rval; + + auto subtype = cf.GetData().GetSubtype(); + + if (opts.IsFeatureSuppressed(subtype)) { + return rval; + } + + if (subtype == CSeqFeatData::eSubtype_gene) { + rval.push_back(CRef(new CAutoDefGeneClause(bh, cf, mapped_loc, opts.GetSuppressLocusTags()))); + } else if (subtype == CSeqFeatData::eSubtype_ncRNA) { + rval.push_back(CRef(new CAutoDefNcRNAClause(bh, cf, mapped_loc, opts.GetUseNcRNAComment()))); + } else if (subtype == CSeqFeatData::eSubtype_mobile_element) { + rval.push_back(CRef(new CAutoDefMobileElementClause(bh, cf, mapped_loc))); + } else if (CAutoDefFeatureClause::IsSatellite(cf)) { + rval.push_back(CRef(new CAutoDefSatelliteClause(bh, cf, mapped_loc))); + } else if (subtype == CSeqFeatData::eSubtype_otherRNA + || subtype == CSeqFeatData::eSubtype_misc_RNA + || subtype == CSeqFeatData::eSubtype_rRNA) { + auto misc_rna = AddMiscRNAFeatures(bh, cf, mapped_loc); + if (misc_rna.empty()) { + rval.push_back(CRef(new CAutoDefFeatureClause(bh, cf, mapped_loc))); + } else { + for (auto it : misc_rna) { + rval.push_back(it); + } + } + } else if (CAutoDefFeatureClause::IsPromoter(cf)) { + rval.push_back(CRef(new CAutoDefPromoterClause(bh, cf, mapped_loc))); + } else if (CAutoDefFeatureClause::IsGeneCluster(cf)) { + rval.push_back(CRef(new CAutoDefGeneClusterClause(bh, cf, mapped_loc))); + } else if (CAutoDefFeatureClause::IsControlRegion(cf)) { + rval.push_back(CRef(new CAutoDefFeatureClause(bh, cf, mapped_loc))); + } else if (subtype == CSeqFeatData::eSubtype_otherRNA) { + auto misc_rna = AddMiscRNAFeatures(bh, cf, mapped_loc); + if (misc_rna.empty()) { + // try to make trna clauses + misc_rna = AddtRNAAndOther(bh, cf, mapped_loc); + } + if (misc_rna.empty()) { + rval.push_back(CRef(new CAutoDefFeatureClause(bh, cf, mapped_loc))); + } else { + for (auto it : misc_rna) { + rval.push_back(it); + } + } + + } else if (subtype == CSeqFeatData::eSubtype_misc_feature) { + auto misc_rna = AddMiscRNAFeatures(bh, cf, mapped_loc); + if (misc_rna.empty()) { + // try to make trna clauses + misc_rna = AddtRNAAndOther(bh, cf, mapped_loc); + } + if (misc_rna.empty()) { + // some misc-features may require more parsing + CRef new_clause(new CAutoDefFeatureClause(bh, cf, mapped_loc)); + if (!is_single_misc_feat && + (opts.GetMiscFeatRule() == CAutoDefOptions::eDelete + || (opts.GetMiscFeatRule() == CAutoDefOptions::eNoncodingProductFeat && !new_clause->IsNoncodingProductFeat()))) { + // do not create a clause at all + new_clause.Reset(NULL); + } else if (opts.GetMiscFeatRule() == CAutoDefOptions::eCommentFeat) { + new_clause.Reset(NULL); + if (cf.CanGetComment() && !NStr::IsBlank(cf.GetComment())) { + misc_rna.push_back(CRef(new CAutoDefMiscCommentClause(bh, cf, mapped_loc))); + } + } else { + misc_rna.push_back(new_clause); + } + } + if (!misc_rna.empty()) { + for (auto it : misc_rna) { + rval.push_back(it); + } + } + + } else { + rval.push_back(CRef(new CAutoDefFeatureClause(bh, cf, mapped_loc))); + } + return rval; +} + + END_SCOPE(objects) END_NCBI_SCOPE diff --git a/c++/src/objmgr/util/autodef_feature_clause_base.cpp b/c++/src/objmgr/util/autodef_feature_clause_base.cpp index 6440b4af..40b6ef51 100644 --- a/c++/src/objmgr/util/autodef_feature_clause_base.cpp +++ b/c++/src/objmgr/util/autodef_feature_clause_base.cpp @@ -1,4 +1,4 @@ -/* $Id: autodef_feature_clause_base.cpp 572646 2018-10-17 16:58:07Z ivanov $ +/* $Id: autodef_feature_clause_base.cpp 578992 2019-01-29 13:03:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -67,9 +67,6 @@ CAutoDefFeatureClause_Base::CAutoDefFeatureClause_Base() : CAutoDefFeatureClause_Base::~CAutoDefFeatureClause_Base() { - for (auto it : m_ClauseList) { - delete it; - } } @@ -99,7 +96,7 @@ bool CAutoDefFeatureClause_Base::IsuORF(const string& product) } -void CAutoDefFeatureClause_Base::AddSubclause (CAutoDefFeatureClause_Base *subclause) +void CAutoDefFeatureClause_Base::AddSubclause (CRef subclause) { if (subclause) { m_ClauseList.push_back(subclause); @@ -188,7 +185,7 @@ string CAutoDefFeatureClause_Base::PrintClause(bool print_typeword, bool typewor } -bool CAutoDefFeatureClause_Base::DisplayAlleleName () +bool CAutoDefFeatureClause_Base::DisplayAlleleName () const { if (NStr::IsBlank(m_AlleleName)) { return false; @@ -341,6 +338,21 @@ void CAutoDefFeatureClause_Base::RemoveGenesMentionedElsewhere() } +bool HasAlleleChange(const CAutoDefFeatureClause_Base& c1, const CAutoDefFeatureClause_Base& c2) +{ + bool has_allele_1 = c1.DisplayAlleleName(); + bool has_allele_2 = c2.DisplayAlleleName(); + if ((has_allele_1 && !has_allele_2) || + (!has_allele_1 && has_allele_2) || + (has_allele_1 && has_allele_2)) { + return true; + } else { + return false; + } +} + + + string CAutoDefFeatureClause_Base::ListClauses(bool allow_semicolons, bool suppress_final_and, bool suppress_allele) { if (m_ClauseList.size() < 1) { @@ -397,8 +409,7 @@ string CAutoDefFeatureClause_Base::ListClauses(bool allow_semicolons, bool suppr } if (onebefore_has_typeword_change || onebefore_has_interval_change || - (m_ClauseList[k-1]->DisplayAlleleName() && - m_ClauseList[k]->DisplayAlleleName())) { + HasAlleleChange(*m_ClauseList[k-1], *m_ClauseList[k])) { onebefore_has_detail_change = true; } } @@ -414,8 +425,7 @@ string CAutoDefFeatureClause_Base::ListClauses(bool allow_semicolons, bool suppr } if (oneafter_has_typeword_change || oneafter_has_interval_change || - (m_ClauseList[k+1]->DisplayAlleleName() && - m_ClauseList[k]->DisplayAlleleName())) { + HasAlleleChange(*m_ClauseList[k], *m_ClauseList[k+1])) { oneafter_has_detail_change = true; } } @@ -593,7 +603,7 @@ size_t CAutoDefFeatureClause_Base::x_LastIntervalChangeBeforeEnd () const if (m_ClauseList.size() < 2) { return 0; } - string last_interval = m_ClauseList[m_ClauseList.size() - 1]->GetInterval(); + string last_interval = m_ClauseList.back()->GetInterval(); for (size_t k = m_ClauseList.size() - 2; k > 0; k--) { if (!NStr::Equal(m_ClauseList[k]->GetInterval(), last_interval) @@ -667,33 +677,15 @@ void CAutoDefFeatureClause_Base::PluralizeDescription() void CAutoDefFeatureClause_Base::RemoveDeletedSubclauses() { - unsigned int k, j; - k = 0; - while (k < m_ClauseList.size()) { - j = k; - while (j < m_ClauseList.size() && (m_ClauseList[j] == NULL || m_ClauseList[j]->IsMarkedForDeletion())) { - if (m_ClauseList[j] != NULL) { - delete m_ClauseList[j]; - } - j++; - } - if (j > k) { - unsigned int num_removed = j - k; - while (j < m_ClauseList.size()) { - m_ClauseList[j - num_removed] = m_ClauseList[j]; - j++; - } - while (num_removed > 0) { - m_ClauseList[m_ClauseList.size() - 1] = NULL; - m_ClauseList.pop_back(); - num_removed --; - } - } - while (k < m_ClauseList.size() && m_ClauseList[k] != NULL && !m_ClauseList[k]->IsMarkedForDeletion()) { - m_ClauseList[k]->RemoveDeletedSubclauses(); - k++; + auto it = m_ClauseList.begin(); + while (it != m_ClauseList.end()) { + if (!(*it) || (*it)->IsMarkedForDeletion()) { + it = m_ClauseList.erase(it); + } else { + (*it)->RemoveDeletedSubclauses(); + ++it; } - } + } } @@ -1138,7 +1130,7 @@ void CAutoDefFeatureClause_Base::SetProductName(string product_name) void CAutoDefFeatureClause_Base::CountUnknownGenes() { - CAutoDefUnknownGeneList *unknown_list = new CAutoDefUnknownGeneList(); + CRef unknown_list(new CAutoDefUnknownGeneList()); bool any_found = false; for (unsigned int k = 0; k < m_ClauseList.size(); k++) { @@ -1154,8 +1146,6 @@ void CAutoDefFeatureClause_Base::CountUnknownGenes() if (any_found) { AddSubclause(unknown_list); - } else { - delete unknown_list; } } @@ -1235,7 +1225,6 @@ void CAutoDefFeatureClause_Base::ExpandExonLists() TClauseList subclauses; subclauses.clear(); m_ClauseList[k]->TransferSubclauses(subclauses); - delete m_ClauseList[k]; for (unsigned int j = 0; j < subclauses.size(); j++) { if (k + j < m_ClauseList.size()) { m_ClauseList[k + j] = subclauses[j]; @@ -1535,7 +1524,7 @@ void CAutoDefFeatureClause_Base::RemoveuORFs() void CAutoDefFeatureClause_Base::RemoveOptionalMobileElements() { for (unsigned int k = 0; k < m_ClauseList.size(); k++) { - CAutoDefMobileElementClause* clause = dynamic_cast(m_ClauseList[k]); + CAutoDefMobileElementClause* clause = dynamic_cast(m_ClauseList[k].GetPointer()); if (clause && clause->IsOptional()) { m_ClauseList[k]->MarkForDeletion(); } else { @@ -1624,7 +1613,7 @@ CRef CAutoDefExonListClause::SeqLocIntersect (CRef loc1, CRe } -void CAutoDefExonListClause::AddSubclause (CAutoDefFeatureClause_Base *subclause) +void CAutoDefExonListClause::AddSubclause (CRef subclause) { CAutoDefFeatureClause_Base::AddSubclause(subclause); if (m_ClauseList.size() == 1) { @@ -1998,22 +1987,22 @@ vector CAutoDefFeatureClause_Base::GetFeatureClausePhrases(string commen } -CAutoDefFeatureClause_Base * CAutoDefFeatureClause_Base::ClauseFromPhrase(const string& phrase, CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, bool first, bool last) +CRef CAutoDefFeatureClause_Base::ClauseFromPhrase(const string& phrase, CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, bool first, bool last) { if (NStr::Equal(phrase, "control region") || NStr::Equal(phrase, "D-loop")) { // create a clause of the appropriate type - CAutoDefParsedClause* other = new CAutoDefParsedClause(bh, cf, mapped_loc, first, last); + CAutoDefParsedClause * other(new CAutoDefParsedClause(bh, cf, mapped_loc, first, last)); other->SetTypeword(phrase); other->SetTypewordFirst(false); - return other; + return CRef< CAutoDefFeatureClause> (other); } else if (x_GetRnaMiscWordType(phrase) != eMiscRnaWordType_Unrecognized) { CAutoDefParsedClause *new_clause = new CAutoDefParsedClause(bh, cf, mapped_loc, first, last); new_clause->SetMiscRNAWord(phrase); - return new_clause; + return CRef< CAutoDefFeatureClause>(new_clause); } else { CAutoDefParsedtRNAClause* trna = s_tRNAClauseFromNote(bh, cf, mapped_loc, phrase, first, last); - return trna; + return CRef< CAutoDefFeatureClause>(trna); } } diff --git a/c++/src/objmgr/util/autodef_mod_combo.cpp b/c++/src/objmgr/util/autodef_mod_combo.cpp index 9022b6ce..07a4894c 100644 --- a/c++/src/objmgr/util/autodef_mod_combo.cpp +++ b/c++/src/objmgr/util/autodef_mod_combo.cpp @@ -1,4 +1,4 @@ -/* $Id: autodef_mod_combo.cpp 572646 2018-10-17 16:58:07Z ivanov $ +/* $Id: autodef_mod_combo.cpp 575412 2018-11-28 17:54:57Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -78,9 +78,8 @@ CAutoDefModifierCombo::CAutoDefModifierCombo(CAutoDefModifierCombo *orig) m_GroupList.clear(); m_Modifiers.clear(); - ITERATE (TGroupListVector, it, orig->GetGroupList()) { - CAutoDefSourceGroup * g = new CAutoDefSourceGroup(*it); - m_GroupList.push_back (g); + for (auto it : orig->GetGroupList()) { + m_GroupList.emplace_back (new CAutoDefSourceGroup(*it)); } ITERATE (CAutoDefSourceDescription::TModifierVector, it, orig->GetModifiers()) { m_Modifiers.push_back (CAutoDefSourceModifierInfo(*it)); @@ -111,9 +110,6 @@ CAutoDefModifierCombo::CAutoDefModifierCombo(CAutoDefModifierCombo *orig) CAutoDefModifierCombo::~CAutoDefModifierCombo() { - for (unsigned int k = 0; k < m_GroupList.size(); k++) { - delete m_GroupList[k]; - } } @@ -223,21 +219,21 @@ bool CAutoDefModifierCombo::HasOrgMod(COrgMod::ESubtype st) } -void CAutoDefModifierCombo::AddSource(const CBioSource& bs, string feature_clauses) +void CAutoDefModifierCombo::AddSource(const CBioSource& bs, const string& feature_clauses) { - CAutoDefSourceDescription src(bs, feature_clauses); + CRef src(new CAutoDefSourceDescription(bs, feature_clauses)); bool found = false; - NON_CONST_ITERATE (TGroupListVector, it, m_GroupList) { - if ((*it)->GetSrcList().size() > 0 - && src.Compare (*((*it)->GetSrcList().begin())) == 0) { - (*it)->AddSource (&src); + for (auto it : m_GroupList) { + if (it->GetSrcList().size() > 0 + && src->Compare (**(it->GetSrcList().begin())) == 0) { + it->AddSource (src); found = true; } } if (!found) { - CAutoDefSourceGroup * g = new CAutoDefSourceGroup(); - g->AddSource (&src); + CRef g(new CAutoDefSourceGroup()); + g->AddSource (src); m_GroupList.push_back (g); } } @@ -371,7 +367,7 @@ bool CAutoDefModifierCombo::x_AddSubsourceString (string &source_description, co val = val.substr(0, pos); } } else if (st == CSubSource::eSubtype_plasmid_name && NStr::EqualNocase(val, "unnamed")) { - val = ""; + val.clear(); } if (!NStr::IsBlank(val)) { source_description += " " + val; @@ -420,7 +416,6 @@ bool CAutoDefModifierCombo::IsModifierInString(const string& find_this, const st bool CAutoDefModifierCombo::x_AddOrgModString (string &source_description, const CBioSource& bsrc, COrgMod::ESubtype st) { bool used = false; - string val; if (!bsrc.IsSetOrg() || !bsrc.GetOrg().IsSetOrgname() || !bsrc.GetOrg().GetOrgname().IsSetMod()) { return false; @@ -480,8 +475,6 @@ bool CAutoDefModifierCombo::HasTrickyHIV() void CAutoDefModifierCombo::x_AddHIVModifiers(TExtraOrgMods& extra_orgmods, TExtraSubSrcs& extra_subsrcs, const CBioSource& bsrc) { - string clone_text = ""; - string isolate_text = ""; bool src_has_clone = false; bool src_has_isolate = false; bool src_has_strain = false; @@ -1027,45 +1020,43 @@ int CAutoDefModifierCombo::Compare(const CAutoDefModifierCombo& other) const } -struct SAutoDefSourceGroupByStrings { - bool operator()(const CAutoDefSourceGroup& s1, - const CAutoDefSourceGroup& s2) const - { - return (s1 < s2); - } -}; +bool CompareAutoDefSourceGroupByStrings(CRef s1, + CRef s2) +{ + return (*s1 < *s2); +} bool CAutoDefModifierCombo::AddQual (bool IsOrgMod, int subtype, bool even_if_not_uniquifying) { bool added = false, rval = false; - vector new_groups; + vector > new_groups; new_groups.clear(); NON_CONST_ITERATE (TGroupListVector, it, m_GroupList) { - added |= (*it)->AddQual (IsOrgMod, subtype, m_KeepAfterSemicolon); - } - - if (added) { - NON_CONST_ITERATE (TGroupListVector, it, m_GroupList) { - vector tmp = (*it)->RemoveNonMatchingDescriptions(); - while (!tmp.empty()) { - new_groups.push_back (tmp[tmp.size() - 1]); - tmp.pop_back(); + if ((*it)->AddQual(IsOrgMod, subtype, m_KeepAfterSemicolon)) { + (*it)->SortDescriptions(); + auto split = (*it)->SplitGroup(); + while (split) { rval = true; + new_groups.emplace_back(split); + // further split group if necessary + split = split->SplitGroup(); } } } + // NOTE - need to put groups from non-matching descriptions and put them in a new_groups list // in order to avoid processing them twice - while (!new_groups.empty()) { - m_GroupList.push_back (new_groups[new_groups.size() - 1]); - new_groups.pop_back(); + if (!new_groups.empty()) { + m_GroupList.insert(m_GroupList.end(), new_groups.begin(), new_groups.end()); + rval = true; } + if (rval || even_if_not_uniquifying) { m_Modifiers.push_back (CAutoDefSourceModifierInfo (IsOrgMod, subtype, "")); - std::sort (m_GroupList.begin(), m_GroupList.end(), SAutoDefSourceGroupByStrings()); + std::sort (m_GroupList.begin(), m_GroupList.end(), CompareAutoDefSourceGroupByStrings); if (IsOrgMod) { m_OrgMods.push_back ((COrgMod_Base::ESubtype)subtype); } else { @@ -1094,16 +1085,15 @@ vector> CAutoDefModifierCombo::ExpandByAnyPresent() vector> expanded; expanded.clear(); - NON_CONST_ITERATE (TGroupListVector, it, m_GroupList) { - if ((*it)->GetSrcList().size() == 1) { + for (auto it :m_GroupList) { + if (it->GetSrcList().size() == 1) { continue; } - mods = (*it)->GetModifiersPresentForAny(); - ITERATE (CAutoDefSourceDescription::TModifierVector, mod_it, mods) { - expanded.emplace_back (new CAutoDefModifierCombo (this)); - if (!expanded[expanded.size() - 1]->AddQual (mod_it->IsOrgMod(), mod_it->GetSubtype())) { - expanded.pop_back (); - RemoveQual(mod_it->IsOrgMod(), mod_it->GetSubtype()); + mods = it->GetModifiersPresentForAny(); + for (auto mod_it : mods) { + CRef cpy(new CAutoDefModifierCombo(this)); + if (cpy->AddQual(mod_it.IsOrgMod(), mod_it.GetSubtype())) { + expanded.emplace_back(cpy); } } if (!expanded.empty()) { @@ -1123,7 +1113,7 @@ bool CAutoDefModifierCombo::AreFeatureClausesUnique() CAutoDefSourceGroup::TSourceDescriptionVector::iterator s = src_list.begin(); while (s != src_list.end()) { clauses.push_back((*s)->GetFeatureClauses()); - s++; + ++s; } } if (clauses.size() < 2) { @@ -1133,14 +1123,14 @@ bool CAutoDefModifierCombo::AreFeatureClausesUnique() bool unique = true; vector::iterator sit = clauses.begin(); string prev = *sit; - sit++; + ++sit; while (sit != clauses.end() && unique) { if (NStr::Equal(prev, *sit)) { unique = false; } else { prev = *sit; } - sit++; + ++sit; } return unique; } diff --git a/c++/src/objmgr/util/autodef_source_group.cpp b/c++/src/objmgr/util/autodef_source_group.cpp index 0fa2ab0b..74f261cc 100644 --- a/c++/src/objmgr/util/autodef_source_group.cpp +++ b/c++/src/objmgr/util/autodef_source_group.cpp @@ -1,4 +1,4 @@ -/* $Id: autodef_source_group.cpp 530329 2017-03-14 11:07:17Z bollin $ +/* $Id: autodef_source_group.cpp 575411 2018-11-28 17:54:35Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -63,23 +63,19 @@ CAutoDefSourceGroup::CAutoDefSourceGroup(CAutoDefSourceGroup *other) m_SourceList.clear(); for (index = 0; index < other->GetNumDescriptions(); index++) { - m_SourceList.push_back(new CAutoDefSourceDescription(other->GetSourceDescription(index))); + m_SourceList.push_back(CRef(new CAutoDefSourceDescription(other->GetSourceDescription(index)))); } } CAutoDefSourceGroup::~CAutoDefSourceGroup() { - unsigned int k; - for (k = 0; k < m_SourceList.size(); k++) { - delete (m_SourceList[k]); - } } -void CAutoDefSourceGroup::AddSource (CAutoDefSourceDescription *src) +void CAutoDefSourceGroup::AddSource (CRef src) { - m_SourceList.push_back (new CAutoDefSourceDescription(src)); + m_SourceList.push_back(src); } @@ -108,16 +104,45 @@ bool CAutoDefSourceGroup::RemoveQual (bool IsOrgMod, int subtype) struct SAutoDefSourceDescByStrings { - bool operator()(const CAutoDefSourceDescription& s1, - const CAutoDefSourceDescription& s2) const + bool operator()(CRef s1, + CRef s2) const { - return (s1 < s2); + return (*s1 < *s2); } }; -vector CAutoDefSourceGroup::RemoveNonMatchingDescriptions () + +void CAutoDefSourceGroup::SortDescriptions() +{ + std::sort(m_SourceList.begin(), m_SourceList.end(), SAutoDefSourceDescByStrings()); +} + + +// this function will make a new group out of any source descriptions that don't match the first one in the list +CRef CAutoDefSourceGroup::SplitGroup() +{ + CRef g(NULL); + auto it = m_SourceList.begin(); + it++; + while (it != m_SourceList.end() && (*it)->Compare(*m_SourceList[0]) == 0) { + it++; + } + if (it != m_SourceList.end()) { + g.Reset(new CAutoDefSourceGroup()); + while (it != m_SourceList.end()) { + g->AddSource(*it); + it = m_SourceList.erase(it); + } + } + return g; +} + + +// After adding a qualifier, some descriptions should no longer match, so they should be +// part of new source groups +vector > CAutoDefSourceGroup::RemoveNonMatchingDescriptions () { - vector group_list; + vector > group_list; TSourceDescriptionVector::iterator it; group_list.clear(); @@ -132,7 +157,7 @@ vector CAutoDefSourceGroup::RemoveNonMatchingDescriptions it++; } while (it != m_SourceList.end()) { - CAutoDefSourceGroup *g = new CAutoDefSourceGroup(); + CRef g(new CAutoDefSourceGroup()); g->AddSource (*it); it = m_SourceList.erase(it); while (it != m_SourceList.end() @@ -247,7 +272,7 @@ CAutoDefSourceDescription *CAutoDefSourceGroup::GetSourceDescription(unsigned in } -void CAutoDefSourceGroup::AddSourceDescription(CAutoDefSourceDescription *tmp) +void CAutoDefSourceGroup::AddSourceDescription(CRef tmp) { if (tmp == NULL) { return; diff --git a/c++/src/objmgr/util/create_defline.cpp b/c++/src/objmgr/util/create_defline.cpp index 40a4cffe..6b5b7f91 100644 --- a/c++/src/objmgr/util/create_defline.cpp +++ b/c++/src/objmgr/util/create_defline.cpp @@ -531,6 +531,7 @@ void CDeflineGenerator::x_SetFlagsIdx ( m_UnverifiedPrefix.clear(); if (m_IsUnverified) { int unverified_count = 0; + m_UnverifiedPrefix = "UNVERIFIED: "; if (bsx->IsUnverifiedOrganism()) { m_UnverifiedPrefix = "UNVERIFIED_ORG: "; unverified_count++; diff --git a/c++/src/objmgr/util/indexer.cpp b/c++/src/objmgr/util/indexer.cpp index 7d7044f1..ee3069d6 100644 --- a/c++/src/objmgr/util/indexer.cpp +++ b/c++/src/objmgr/util/indexer.cpp @@ -1055,8 +1055,13 @@ void CBioseqIndex::x_InitGaps (void) SSeqMapSelector sel; + size_t resolveCount = 1; + if (m_Policy == CSeqEntryIndex::eInternal) { + resolveCount = 0; + } + sel.SetFlags(CSeqMap::fFindGap) - .SetResolveCount(1); + .SetResolveCount(resolveCount); // explore gaps, pass original target BioseqHandle if using Bioseq sublocation for (CSeqMap_CI gap_it(m_OrigBsh, sel); gap_it; ++gap_it) { @@ -1855,13 +1860,24 @@ void CBioseqIndex::x_InitFeats (void) sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_STS); } if ((m_Flags & CSeqEntryIndex::fHideExonFeats) != 0) { + sel.ExcludeNamedAnnots("Exon"); sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon); } if ((m_Flags & CSeqEntryIndex::fHideIntronFeats) != 0) { sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron); } if ((m_Flags & CSeqEntryIndex::fHideMiscFeats) != 0) { + sel.ExcludeFeatType(CSeqFeatData::e_Site); + sel.ExcludeFeatType(CSeqFeatData::e_Bond); + sel.ExcludeFeatType(CSeqFeatData::e_Region); + sel.ExcludeFeatType(CSeqFeatData::e_Comment); sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature); + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_preprotein); + } + + bool onlyGeneRNACDS = false; + if ((m_Flags & CSeqEntryIndex::fGeneRNACDSOnly) != 0) { + onlyGeneRNACDS = true; } // additional common settings @@ -1909,6 +1925,17 @@ void CBioseqIndex::x_InitFeats (void) // iterate features on Bioseq for (CFeat_CI feat_it(m_Bsh, sel); feat_it; ++feat_it) { const CMappedFeat mf = *feat_it; + + if (onlyGeneRNACDS) { + const CSeqFeatData& data = mf.GetData(); + CSeqFeatData::E_Choice type = data.Which(); + if (type != CSeqFeatData::e_Gene && + type != CSeqFeatData::e_Rna && + type != CSeqFeatData::e_Cdregion) { + continue; + } + } + CSeq_feat_Handle hdl = mf.GetSeq_feat_Handle(); CRef sfx(new CFeatureIndex(hdl, mf, *this)); @@ -2871,7 +2898,7 @@ CFeatureIndex::CFeatureIndex (CSeq_feat_Handle sfh, m_Mf(mf), m_Bsx(&bsx) { - const CSeqFeatData& data = m_Mf.GetData(); + const CSeqFeatData& data = m_Mf.GetData(); m_Type = data.Which(); m_Subtype = data.GetSubtype(); const CSeq_feat& mpd = m_Mf.GetMappedFeature(); diff --git a/c++/src/objmgr/util/objutil.cpp b/c++/src/objmgr/util/objutil.cpp index 65054aae..cb25ee0b 100644 --- a/c++/src/objmgr/util/objutil.cpp +++ b/c++/src/objmgr/util/objutil.cpp @@ -1,4 +1,4 @@ -/* $Id: objutil.cpp 573600 2018-10-30 11:55:27Z ivanov $ +/* $Id: objutil.cpp 580156 2019-02-11 16:24:38Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -253,11 +253,25 @@ string ConvertQuotes(const string& str) // Strips all spaces in string in following manner. If the function // meet several spaces (spaces and tabs) in succession it replaces them // with one space. Strips all spaces after '(' and before ( ')' or ',' ). -void StripSpaces(string& str) +bool StripSpaces(string& str) { if (str.empty()) { - return; + return false; + } + auto orig_len = str.length(); + + NStr::ReplaceInPlace(str, "\t", " "); + auto this_len = str.length(); + NStr::ReplaceInPlace(str, " ", " "); + while (str.length() != this_len) { + this_len = str.length(); + NStr::ReplaceInPlace(str, " ", " "); } + NStr::ReplaceInPlace(str, "( ", "("); + NStr::ReplaceInPlace(str, " )", ")"); + NStr::ReplaceInPlace(str, " ,", ","); + +#if 0 string::iterator end = str.end(); string::iterator it = str.begin(); @@ -277,6 +291,8 @@ void StripSpaces(string& str) } } str.erase(new_str, str.end()); +#endif + return (orig_len != str.length()); } diff --git a/c++/src/objtools/align_format/align_format_util.cpp b/c++/src/objtools/align_format/align_format_util.cpp index 0e22a4aa..1782dd35 100644 --- a/c++/src/objtools/align_format/align_format_util.cpp +++ b/c++/src/objtools/align_format/align_format_util.cpp @@ -1,4 +1,4 @@ -/* $Id: align_format_util.cpp 568472 2018-08-06 16:28:20Z zaretska $ +/* $Id: align_format_util.cpp 577752 2019-01-08 18:07:51Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2875,7 +2875,6 @@ CRange CAlignFormatUtil::GetSeqAlignCoverageParams(const CSeq_align_set return subjectRange; } - CRef CAlignFormatUtil::SortSeqalignForSortableFormat(CCgiContext& ctx, CScope& scope, @@ -2902,45 +2901,77 @@ CAlignFormatUtil::SortSeqalignForSortableFormat(CCgiContext& ctx, }else { seqalign_vec[0] = const_cast(&aln_set); } - + + ITERATE(vector< CRef >, iter, seqalign_vec){ - list< CRef > seqalign_hit_list; - HspListToHitList(seqalign_hit_list, **iter); - - if (hit_sort == eTotalScore) { - seqalign_hit_list.sort(SortHitByTotalScoreDescending); - } else if (hit_sort == eHighestScore) { - seqalign_hit_list.sort(CAlignFormatUtil::SortHitByScoreDescending); - } else if (hit_sort == ePercentIdentity) { + list< CRef > one_seqalign_hit_total_list = SortOneSeqalignForSortableFormat(**iter, + nuc_to_nuc_translation, + hit_sort, + hsp_sort); + + seqalign_hit_total_list.splice(seqalign_hit_total_list.end(),one_seqalign_hit_total_list); + + } + + return HitListToHspList(seqalign_hit_total_list); +} +list< CRef > +CAlignFormatUtil::SortOneSeqalignForSortableFormat(const CSeq_align_set& source, + bool nuc_to_nuc_translation, + int hit_sort, + int hsp_sort) +{ + list< CRef > seqalign_hit_total_list; + list< CRef > seqalign_hit_list; + HspListToHitList(seqalign_hit_list, source); - SortHitByPercentIdentityDescending(seqalign_hit_list, + if (hit_sort == eTotalScore) { + seqalign_hit_list.sort(SortHitByTotalScoreDescending); + } else if (hit_sort == eHighestScore) { + seqalign_hit_list.sort(CAlignFormatUtil::SortHitByScoreDescending); + } else if (hit_sort == ePercentIdentity) { + SortHitByPercentIdentityDescending(seqalign_hit_list, nuc_to_nuc_translation); - } else if (hit_sort == eQueryCoverage) { - seqalign_hit_list.sort(SortHitByMasterCoverageDescending); - } + } else if (hit_sort == eQueryCoverage) { + seqalign_hit_list.sort(SortHitByMasterCoverageDescending); + } + + ITERATE(list< CRef >, iter2, seqalign_hit_list) { + CRef temp(*iter2); + if (hsp_sort == eQueryStart) { + temp->Set().sort(SortHspByMasterStartAscending); + } else if (hsp_sort == eHspPercentIdentity) { + temp->Set().sort(SortHspByPercentIdentityDescending); + } else if (hsp_sort == eScore) { + temp->Set().sort(SortHspByScoreDescending); + } else if (hsp_sort == eSubjectStart) { + temp->Set().sort(SortHspBySubjectStartAscending); + + } + seqalign_hit_total_list.push_back(temp); + } + return seqalign_hit_total_list; +} - ITERATE(list< CRef >, iter2, seqalign_hit_list) { - CRef temp(*iter2); - if (hsp_sort == eQueryStart) { - temp->Set().sort(SortHspByMasterStartAscending); - } else if (hsp_sort == eHspPercentIdentity) { - temp->Set().sort(SortHspByPercentIdentityDescending); - - } else if (hsp_sort == eScore) { - temp->Set().sort(SortHspByScoreDescending); - - } else if (hsp_sort == eSubjectStart) { - temp->Set().sort(SortHspBySubjectStartAscending); - - } - - seqalign_hit_total_list.push_back(temp); - } +CRef +CAlignFormatUtil::SortSeqalignForSortableFormat(CSeq_align_set& aln_set, + bool nuc_to_nuc_translation, + int hit_sort, + int hsp_sort) { + + if (hit_sort <= eEvalue && hsp_sort <= eHspEvalue) { + return (CRef) &aln_set; } - + +// seqalign_vec[0] = const_cast(&aln_set); + list< CRef > seqalign_hit_total_list = SortOneSeqalignForSortableFormat(aln_set, + nuc_to_nuc_translation, + hit_sort, + hsp_sort); return HitListToHspList(seqalign_hit_total_list); } + CRef CAlignFormatUtil::FilterSeqalignByEval(CSeq_align_set& source_aln, double evalueLow, double evalueHigh) diff --git a/c++/src/objtools/align_format/format_flags.cpp b/c++/src/objtools/align_format/format_flags.cpp index e27892a5..6d92d830 100644 --- a/c++/src/objtools/align_format/format_flags.cpp +++ b/c++/src/objtools/align_format/format_flags.cpp @@ -1,4 +1,4 @@ -/* $Id: format_flags.cpp 516923 2016-10-19 14:23:23Z fongah2 $ +/* $Id: format_flags.cpp 577748 2019-01-08 18:06:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -222,6 +222,11 @@ const size_t kDfltArgNumAlignments = 250; const string kArgProduceHtml("html"); const bool kDfltArgProduceHtml = false; const size_t kDfltLineLength = 60; +const string kArgAlignSeqList("alignseqlist"); +const string kArgMetadata("searchmetadata"); +const string kArgQueryIndex("queryindex"); +const string kArgSortHits("sorthits"); +const string kArgSortHSPs("sorthsps"); const size_t kNumSAMOutputFormatSpecifiers = 2; const SSAMFormatSpec sc_SAMFormatSpecifiers[kNumSAMOutputFormatSpecifiers] = { diff --git a/c++/src/objtools/align_format/showalign.cpp b/c++/src/objtools/align_format/showalign.cpp index ff99a334..30e8a698 100644 --- a/c++/src/objtools/align_format/showalign.cpp +++ b/c++/src/objtools/align_format/showalign.cpp @@ -1,4 +1,4 @@ -/* $Id: showalign.cpp 565464 2018-06-12 17:53:45Z zaretska $ +/* $Id: showalign.cpp 581160 2019-02-26 13:09:53Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -87,8 +87,8 @@ static const string k_FrameConversion[k_NumFrame] = {"+1", "+2", "+3", "-1", "-2", "-3"}; static const int k_GetSubseqThreshhold = 10000; -///threshhold to color mismatch. 98 means 98% -static const int k_ColorMismatchIdentity = 0; +///threshhold to color mismatch. 98 means 98% +static const int k_ColorMismatchIdentity = 0; static const int k_GetDynamicFeatureSeqLength = 200000; static const string k_DumpGnlUrl = "/blast/dumpgnl.cgi"; static const int k_FeatureIdLen = 16; @@ -103,14 +103,14 @@ static const int k_StartSequenceMargin = 2; static const int k_AlignStatsMargin = 2; static const int k_SequencePropertyLabelMargin = 2; -const string k_DefaultAnchorTempl = ">"; +const string k_DefaultAnchorTempl = ">"; const string k_DefaultAnchorWithPosTempl = "_<@id_lbl@>>"; static const string k_DefaultSpaceMaintainerTempl = "<@chkbox@>"; static const string k_DefaultCheckboxTempl = "\" onClick=\"synchronizeCheck(this.value, 'getSeqAlignment<@queryNumber@>', 'getSeqGi', this.checked)\">"; static const string k_DefaultCheckboxExTempl = "\" checked=\"checked\" onClick=\"synchAl(this);\">"; - + //highlight the seqid for pairwise-with-identity format -const string k_DefaultPairwiseWithIdntTempl = "<@alndata@>";//k_ColorRed +const string k_DefaultPairwiseWithIdntTempl = "<@alndata@>";//k_ColorRed const string k_DefaultFeaturesTempl = "<@alndata@>";//k_ColorPink const string k_DefaultMaskSeqLocTempl = "\"><@alndata@>"; @@ -123,7 +123,7 @@ onClick=\"finalSubmit(%d, 'getSeqAlignment%d', 'getSeqGi', '%s%d', %d)\">", - + "
>* mask_seqloc, + list >* mask_seqloc, list * external_feature, const char* matrix_name /* = BLAST_DEFAULT_MATRIX */) : m_SeqalignSetRef(&seqalign), @@ -180,20 +180,20 @@ CDisplaySeqalign::CDisplaySeqalign(const CSeq_align_set& seqalign, m_MidLineStyle = eBar; m_ConfigFile = NULL; m_Reg = NULL; - m_DynamicFeature = NULL; + m_DynamicFeature = NULL; m_MasterGeneticCode = 1; m_SlaveGeneticCode = 1; m_AlignTemplates = NULL; m_Ctx = NULL; - m_Matrix = NULL; //-RMH- + m_Matrix = NULL; //-RMH- m_DomainInfo = NULL; - m_SeqPropertyLabel = new vector; + m_SeqPropertyLabel.reset(new vector); m_TranslatedFrameForLocalSeq = eFirst; m_ResultPositionIndex = -1; m_currAlignSeqListIndex = 1; CNcbiMatrix mtx; - CAlignFormatUtil::GetAsciiProteinMatrix(matrix_name - ? matrix_name + CAlignFormatUtil::GetAsciiProteinMatrix(matrix_name + ? matrix_name : BLAST_DEFAULT_MATRIX, mtx); // Use default score matrix if one with the provided name was not found. @@ -233,21 +233,21 @@ CDisplaySeqalign::~CDisplaySeqalign() delete [] m_Matrix; if (m_ConfigFile) { delete m_ConfigFile; - } + } if (m_Reg) { delete m_Reg; } - + if(m_DynamicFeature){ delete m_DynamicFeature; } - } + } } //8.Display Identities,positives,frames etc string CDisplaySeqalign::x_FormatIdentityInfo(string alignInfo, SAlnInfo* aln_vec_info) { - int aln_stop = (int)m_AV->GetAlnStop(); + int aln_stop = (int)m_AV->GetAlnStop(); int master_strand = m_AV->StrandSign(0); int slave_strand = m_AV->StrandSign(1); int master_frame = aln_vec_info->alnRowInfo->frame[0]; @@ -256,38 +256,38 @@ string CDisplaySeqalign::x_FormatIdentityInfo(string alignInfo, SAlnInfo* aln_ve string alignParams = alignInfo;//Some already filled in x_DisplayAlignInfo - - + + alignParams = CAlignFormatUtil::MapTemplate(alignParams, "aln_match",NStr::IntToString(aln_vec_info->match) + "/"+ NStr::IntToString(aln_stop+1)); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_ident",aln_vec_info->identity); - - if(aln_is_prot){ + + if(aln_is_prot){ alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_pos",NStr::IntToString(aln_vec_info->positive + aln_vec_info->match) + "/" + NStr::IntToString(aln_stop+1)); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_pos_prc",NStr::IntToString(((aln_vec_info->positive + aln_vec_info->match)*100)/(aln_stop+1))); } else { alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_strand",(master_strand==1 ? "Plus" : "Minus")+ (string)"/"+ (slave_strand==1? "Plus" : "Minus")); } - + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_gaps",NStr::IntToString(aln_vec_info->gap) + "/" + NStr::IntToString(aln_stop+1)); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_gaps_prc",NStr::IntToString((aln_vec_info->gap*100)/(aln_stop+1))); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_params_frame",(master_frame != 0 || slave_frame != 0) ? m_AlignTemplates->alignInfoFrameTmpl: ""); - if(master_frame != 0 && slave_frame != 0) { - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",((master_frame > 0) ? "+" : "") + NStr::IntToString(master_frame) + if(master_frame != 0 && slave_frame != 0) { + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",((master_frame > 0) ? "+" : "") + NStr::IntToString(master_frame) + (string)"/"+((slave_frame > 0) ? "+" : "") + NStr::IntToString(slave_frame)); - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown"); - } else if (master_frame != 0){ + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown"); + } else if (master_frame != 0){ alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",((master_frame > 0) ? "+" : "") + NStr::IntToString(master_frame)); - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown"); - } else if (slave_frame != 0){ + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown"); + } else if (slave_frame != 0){ alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",((slave_frame > 0) ? "+" : "") + NStr::IntToString(slave_frame)) ; - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown"); + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown"); } else { - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",""); - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show",""); - } + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",""); + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show",""); + } return alignParams; } @@ -305,10 +305,10 @@ string CDisplaySeqalign::x_FormatIdentityInfo(string alignInfo, SAlnInfo* aln_ve ///@param slave_frame: frame for slave ///@param aln_is_prot: is protein alignment? /// -static void s_DisplayIdentityInfo(CNcbiOstream& out, int aln_stop, +static void s_DisplayIdentityInfo(CNcbiOstream& out, int aln_stop, int identity, int positive, int match, - int gap, int master_strand, - int slave_strand, int master_frame, + int gap, int master_strand, + int slave_strand, int master_frame, int slave_frame, bool aln_is_prot) { out<<" Identities = "< 0) ? "+" : "") + out <<" Frame = " << ((master_frame > 0) ? "+" : "") + << master_frame <<"/"<<((slave_frame > 0) ? "+" : "") << slave_frame<<"\n"; } else if (master_frame != 0){ - out <<" Frame = " << ((master_frame > 0) ? "+" : "") + out <<" Frame = " << ((master_frame > 0) ? "+" : "") << master_frame << "\n"; } else if (slave_frame != 0){ - out <<" Frame = " << ((slave_frame > 0) ? "+" : "") + out <<" Frame = " << ((slave_frame > 0) ? "+" : "") << slave_frame <<"\n"; - } + } out<<"\n"; - + } ///wrap line @@ -350,10 +350,10 @@ static void s_WrapOutputLine(CNcbiOstream& out, const string& str) for (int i = 0; i < length; i ++){ if(i > 0 && i % line_len == 0){ do_wrap = true; - } + } out << str[i]; if(do_wrap && isspace((unsigned char) str[i])){ - out << "\n"; + out << "\n"; do_wrap = false; } } @@ -373,20 +373,20 @@ static void s_WrapOutputLine(CNcbiOstream& out, const string& str) /// //This function appends seq[currIndex] to styledSeqStr if startStyledOutput==true or !styledSeqStr.empty() && !stopStyledOutput //If stopStyledOutput==true or it is the end of the seq and styledSeqStr has data, -//Template like "<@alndata@>" or <@alndata@> is applied to styledSeqStr +//Template like "<@alndata@>" or <@alndata@> is applied to styledSeqStr // and output to CNcbiOstream static bool s_ProcessStyledContent(string& seq, int currIndex, bool startStyledOutput, bool stopStyledOutput, string tmpl,string &styledSeqStr,CNcbiOstream& out) -{ +{ bool isStyled = false; - if(startStyledOutput || (!styledSeqStr.empty() && !stopStyledOutput)){ - styledSeqStr += seq[currIndex]; + if(startStyledOutput || (!styledSeqStr.empty() && !stopStyledOutput)){ + styledSeqStr += seq[currIndex]; isStyled = true; - } + } if(!styledSeqStr.empty() && (stopStyledOutput || currIndex == (int)seq.size() - 1) ) { - styledSeqStr = CAlignFormatUtil::MapTemplate(tmpl,"alndata",styledSeqStr); + styledSeqStr = CAlignFormatUtil::MapTemplate(tmpl,"alndata",styledSeqStr); out << styledSeqStr; - styledSeqStr = ""; - } + styledSeqStr = ""; + } return isStyled; } @@ -396,12 +396,12 @@ static bool s_ProcessStyledContent(string& seq, int currIndex, bool startStyledO ///@param out: output stream /// static void s_ColorDifferentBases(string& seq, char identity_char, - CNcbiOstream& out){ - std::string colorSeqStr; - for(int i = 0; i < (int)seq.size(); i ++){ + CNcbiOstream& out){ + std::string colorSeqStr; + for(int i = 0; i < (int)seq.size(); i ++){ bool isStyled = s_ProcessStyledContent(seq,i,seq[i] != identity_char,seq[i] == identity_char,k_DefaultPairwiseWithIdntTempl,colorSeqStr,out); if(!isStyled) out << seq[i]; - } + } } ///return the frame for a given strand @@ -412,8 +412,8 @@ static void s_ColorDifferentBases(string& seq, char identity_char, ///@param scope: the scope ///@return: the frame /// -static int s_GetFrame (int start, ENa_strand strand, const CSeq_id& id, - CScope& sp) +static int s_GetFrame (int start, ENa_strand strand, const CSeq_id& id, + CScope& sp) { int frame = 0; if (strand == eNa_strand_plus) { @@ -421,7 +421,7 @@ static int s_GetFrame (int start, ENa_strand strand, const CSeq_id& id, } else if (strand == eNa_strand_minus) { frame = -(((int)sp.GetBioseqHandle(id).GetBioseqLength() - start - 1) % 3 + 1); - + } return frame; } @@ -450,11 +450,11 @@ static int s_GetStdsegMasterFrame(const CStd_seg& ss, CScope& scope) ///@param range: the range list of seqloc ///@param total_coding_len: the total exon length excluding intron ///@param raw_cdr_product: the raw protein sequence -///@return: the concatenated exon sequences with amino acid aligned to +///@return: the concatenated exon sequences with amino acid aligned to ///to the second base of a codon /// -static string s_GetConcatenatedExon(CFeat_CI& feat, - ENa_strand feat_strand, +static string s_GetConcatenatedExon(CFeat_CI& feat, + ENa_strand feat_strand, list >& range, TSeqPos total_coding_len, string& raw_cdr_product, TSeqPos frame_adj) @@ -473,14 +473,14 @@ static string s_GetConcatenatedExon(CFeat_CI& feat, coding_start_base = total_coding_len - 1 - (frame -1) - frame_adj; num_base = total_coding_len - 1; num_coding_base = 0; - + } else { - coding_start_base = 0; + coding_start_base = 0; coding_start_base += frame - 1 + frame_adj; num_base = 0; num_coding_base = 0; } - + ITERATE(list >, iter, range){ //note that feature on minus strand needs to be //filled backward. @@ -494,34 +494,34 @@ static string s_GetConcatenatedExon(CFeat_CI& feat, //make sure the coding region is no //more than the protein seq as there //could errors in ncbi record - concat_exon[num_base] + concat_exon[num_base] = raw_cdr_product[num_coding_base / 3]; - } + } } } num_base ++; - } + } } else { - + for(TSeqPos i = 0; i < iter->GetLength() && num_base >= 0; i ++){ if((TSeqPos)num_base <= coding_start_base){ num_coding_base ++; if(num_coding_base % 3 == 2){ //a.a to the 2nd base - if(num_coding_base / 3 < + if(num_coding_base / 3 < raw_cdr_product.size() && coding_start_base >= num_coding_base){ //make sure the coding region is no //more than the protein seq as there //could errors in ncbi record - concat_exon[num_base] + concat_exon[num_base] = raw_cdr_product[num_coding_base / 3]; - } + } } } num_base --; - } + } } } return concat_exon; @@ -540,7 +540,7 @@ static void s_MapSlaveFeatureToMaster(list >& master_feat_range, ENa_strand& master_feat_strand, CFeat_CI& feat, list& slave_feat_range, ENa_strand slave_feat_strand, - CAlnVec* av, + CAlnVec* av, int row, TSeqPos frame_adj) { TSeqPos trans_frame = 1; @@ -553,88 +553,88 @@ static void s_MapSlaveFeatureToMaster(list >& master_feat_range, TSeqPos prev_exon_len = 0; bool is_first_in_range = true; - if ((av->IsPositiveStrand(1) && slave_feat_strand == eNa_strand_plus) || + if ((av->IsPositiveStrand(1) && slave_feat_strand == eNa_strand_plus) || (av->IsNegativeStrand(1) && slave_feat_strand == eNa_strand_minus)) { master_feat_strand = eNa_strand_plus; } else { master_feat_strand = eNa_strand_minus; } - + list acutal_slave_feat_range = slave_feat_range; ITERATE(list, iter_temp, acutal_slave_feat_range){ CRange actual_feat_seq_range = av->GetSeqRange(row). - IntersectionWith(*iter_temp); + IntersectionWith(*iter_temp); if(!actual_feat_seq_range.Empty()){ TSeqPos slave_aln_from = 0, slave_aln_to = 0; TSeqPos frame_offset = 0; int curr_exon_leading_len = 0; - //adjust frame - if (is_first_in_range) { + //adjust frame + if (is_first_in_range) { if (slave_feat_strand == eNa_strand_plus) { - curr_exon_leading_len + curr_exon_leading_len = actual_feat_seq_range.GetFrom() - iter_temp->GetFrom(); - + } else { - curr_exon_leading_len + curr_exon_leading_len = iter_temp->GetTo() - actual_feat_seq_range.GetTo(); } is_first_in_range = false; frame_offset = (3 - (prev_exon_len + curr_exon_leading_len)%3 + (trans_frame - 1)) % 3; } - - if (av->IsPositiveStrand(1) && + + if (av->IsPositiveStrand(1) && slave_feat_strand == eNa_strand_plus) { - slave_aln_from - = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetFrom() + - frame_offset, CAlnMap::eRight ); - + slave_aln_from + = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetFrom() + + frame_offset, CAlnMap::eRight ); + slave_aln_to = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo(), CAlnMap::eLeft); - } else if (av->IsNegativeStrand(1) && + } else if (av->IsNegativeStrand(1) && slave_feat_strand == eNa_strand_plus) { - - slave_aln_from + + slave_aln_from = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo(), - CAlnMap::eRight); - + CAlnMap::eRight); + slave_aln_to = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetFrom() + frame_offset, CAlnMap::eLeft); - } else if (av->IsPositiveStrand(1) && + } else if (av->IsPositiveStrand(1) && slave_feat_strand == eNa_strand_minus) { - slave_aln_from + slave_aln_from = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetFrom(), - CAlnMap::eRight); - + CAlnMap::eRight); + slave_aln_to = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo() - frame_offset, CAlnMap::eLeft); - } else if (av->IsNegativeStrand(1) && + } else if (av->IsNegativeStrand(1) && slave_feat_strand == eNa_strand_minus){ - slave_aln_from - = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo() - - frame_offset, CAlnMap::eRight ); - + slave_aln_from + = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo() - + frame_offset, CAlnMap::eRight ); + slave_aln_to = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetFrom(), CAlnMap::eLeft); } - - TSeqPos master_from = + + TSeqPos master_from = av->GetSeqPosFromAlnPos(0, slave_aln_from, CAlnMap::eRight); - - TSeqPos master_to = + + TSeqPos master_to = av->GetSeqPosFromAlnPos(0, slave_aln_to, CAlnMap::eLeft); - + CRange master_range(master_from, master_to); - master_feat_range.push_back(master_range); - + master_feat_range.push_back(master_range); + } prev_exon_len += iter_temp->GetLength(); } @@ -654,27 +654,27 @@ static void s_MapSlaveFeatureToMaster(list >& master_feat_range, ///@param mix_loc: is this seqloc mixed with other seqid? ///@return: the encoded protein sequence /// -static string s_GetCdsSequence(int genetic_code, CFeat_CI& feat, +static string s_GetCdsSequence(int genetic_code, CFeat_CI& feat, CScope& scope, list >& range, - const CBioseq_Handle& handle, + const CBioseq_Handle& handle, ENa_strand feat_strand, string& feat_id, TSeqPos frame_adj, bool mix_loc) { string raw_cdr_product = NcbiEmptyString; if(feat->IsSetProduct() && feat->GetProduct().IsWhole() && !mix_loc){ //show actual aa if there is a cds product - - const CSeq_id& productId = + + const CSeq_id& productId = feat->GetProduct().GetWhole(); - const CBioseq_Handle& productHandle + const CBioseq_Handle& productHandle = scope.GetBioseqHandle(productId ); - feat_id = "CDS:" + + feat_id = "CDS:" + CDeflineGenerator().GenerateDefline(productHandle).substr(0, k_FeatureIdLen); productHandle. GetSeqVector(CBioseq_Handle::eCoding_Iupac). GetSeqData(0, productHandle. GetBioseqLength(), raw_cdr_product); - } else { + } else { CSeq_loc isolated_loc; ITERATE(list >, iter, range){ TSeqPos from = iter->GetFrom(); @@ -682,12 +682,12 @@ static string s_GetCdsSequence(int genetic_code, CFeat_CI& feat, if(feat_strand == eNa_strand_plus){ isolated_loc. Add(*(handle.GetRangeSeq_loc(from + frame_adj, - to, + to, feat_strand))); } else { isolated_loc. Add(*(handle.GetRangeSeq_loc(from, - to - frame_adj, + to - frame_adj, feat_strand))); } } @@ -716,35 +716,35 @@ static string s_GetCdsSequence(int genetic_code, CFeat_CI& feat, static void s_FillCdsStartPosition(string& line, string& concat_exon, size_t length_per_line, TSeqPos feat_aln_start_totalexon, - ENa_strand seq_strand, + ENa_strand seq_strand, ENa_strand feat_strand, list& start) { size_t actual_line_len = 0; size_t aln_len = line.size(); TSeqPos previous_num_letter = 0; - + //the number of amino acids preceeding this exon start position for (size_t i = 0; i <= feat_aln_start_totalexon; i ++){ if(feat_strand == eNa_strand_minus){ //remember the amino acid in this case goes backward //therefore we count backward too - + int pos = concat_exon.size() -1 - i; if(pos >= 0 && isalpha((unsigned char) concat_exon[pos])){ previous_num_letter ++; } - + } else { if(isalpha((unsigned char) concat_exon[i])){ previous_num_letter ++; } } } - - + + TSeqPos prev_num = 0; - //go through the entire feature line and get the amino acid position + //go through the entire feature line and get the amino acid position //for each line for(size_t i = 0; i < aln_len; i += actual_line_len){ //handle the last row which may be shorter @@ -756,7 +756,7 @@ static void s_FillCdsStartPosition(string& line, string& concat_exon, //the number of amino acids on this row TSeqPos cur_num = 0; bool has_intron = false; - + //go through each character on a row for(size_t j = i; j < actual_line_len + i; j ++){ //don't count gap @@ -766,20 +766,20 @@ static void s_FillCdsStartPosition(string& line, string& concat_exon, has_intron = true; } } - + if(cur_num > 0){ if(seq_strand == eNa_strand_plus){ if(feat_strand == eNa_strand_minus) { - start.push_back(previous_num_letter - prev_num); + start.push_back(previous_num_letter - prev_num); } else { - start.push_back(previous_num_letter + prev_num); + start.push_back(previous_num_letter + prev_num); } } else { if(feat_strand == eNa_strand_minus) { - start.push_back(previous_num_letter + prev_num); + start.push_back(previous_num_letter + prev_num); } else { - start.push_back(previous_num_letter - prev_num); - } + start.push_back(previous_num_letter - prev_num); + } } } else if (has_intron) { start.push_back(0); //sentinal for no show @@ -797,11 +797,11 @@ static void s_FillCdsStartPosition(string& line, string& concat_exon, /// static CRef s_MakeNewMasterSeq(list > >& feat_range, list& feat_seq_strand, - const CBioseq_Handle& handle) + const CBioseq_Handle& handle) { CRef obj; obj = CObjectManager::GetInstance(); - CGBDataLoader::RegisterInObjectManager(*obj); + CGBDataLoader::RegisterInObjectManager(*obj); CRef scope (new CScope(*obj)); scope->AddDefaults(); CRef cbsp(new CBioseq()); @@ -822,7 +822,7 @@ static CRef s_MakeNewMasterSeq(list > >& feat_range seq_feat->SetData(*feat_data); seq_feat->SetComment("Putative " + NStr::IntToString(counter)); CRef seq_loc (new CSeq_loc); - + ITERATE(list >, iter2, *iter) { seq_loc->Add(*(handle.GetRangeSeq_loc(iter2->GetFrom(), iter2->GetTo(), @@ -836,12 +836,12 @@ static CRef s_MakeNewMasterSeq(list > >& feat_range CRef entry(new CSeq_entry()); entry->SetSeq(*cbsp); scope->AddTopLevelSeqEntry(*entry); - + return scope; } //output feature lines -//@param reference_feat_line: the master feature line to be compared +//@param reference_feat_line: the master feature line to be compared //for coloring //@param feat_line: the slave feature line //@param color_feat_mismatch: color or not @@ -849,11 +849,11 @@ static CRef s_MakeNewMasterSeq(list > >& feat_range //@param len: the length per line //@param out: stream for output // -static void s_OutputFeature(string& reference_feat_line, +static void s_OutputFeature(string& reference_feat_line, string& feat_line, bool color_feat_mismatch, int start, - int len, + int len, CNcbiOstream& out, bool is_html) { @@ -863,14 +863,14 @@ static void s_OutputFeature(string& reference_feat_line, if(reference_feat_line != NcbiEmptyString){ actual_reference_feat = reference_feat_line.substr(start, len); } - if(color_feat_mismatch + if(color_feat_mismatch && actual_reference_feat != NcbiEmptyString && - !NStr::IsBlank(actual_reference_feat)){ + !NStr::IsBlank(actual_reference_feat)){ string styledSequenceStr; for(int i = 0; i < (int)actual_feat.size() && i < (int)actual_reference_feat.size(); i ++){ - bool styledOutput = actual_feat[i] != actual_reference_feat[i] && - (actual_feat[i] != ' ' && actual_feat[i] != k_IntronChar && actual_reference_feat[i] != k_IntronChar); + bool styledOutput = actual_feat[i] != actual_reference_feat[i] && + (actual_feat[i] != ' ' && actual_feat[i] != k_IntronChar && actual_reference_feat[i] != k_IntronChar); bool stopStyledOutput = (actual_feat[i] == actual_reference_feat[i]) && actual_feat[i] != ' '; bool isStyled = s_ProcessStyledContent(actual_feat,i,styledOutput,stopStyledOutput, k_DefaultFeaturesTempl,styledSequenceStr,out); if(!isStyled) out << actual_feat[i]; @@ -879,19 +879,19 @@ static void s_OutputFeature(string& reference_feat_line, out << (is_html?CHTMLHelper::HTMLEncode(actual_feat):actual_feat); } } - + } void CDisplaySeqalign::x_PrintFeatures(SAlnRowInfo *alnRoInfo, - int row, + int row, string& master_feat_str, CNcbiOstream& out) { TSAlnFeatureInfoList& feature = alnRoInfo->bioseqFeature[row]; CAlnMap::TSignedRange alignment_range = alnRoInfo->currRange; int aln_start = alnRoInfo->currPrintSegment; - int line_length = alnRoInfo->currActualLineLen; + int line_length = alnRoInfo->currActualLineLen; int start_length = alnRoInfo->maxStartLen; int id_length = alnRoInfo->maxIdLen; if (alnRoInfo->show_align_stats) { @@ -899,17 +899,17 @@ void CDisplaySeqalign::x_PrintFeatures(SAlnRowInfo *alnRoInfo, } if (alnRoInfo->show_seq_property_label){ id_length += alnRoInfo->max_seq_property_label + k_SequencePropertyLabelMargin; - } + } NON_CONST_ITERATE(TSAlnFeatureInfoList, iter, feature) { - //check blank string for cases where CDS is in range - //but since it must align with the 2nd codon and is + //check blank string for cases where CDS is in range + //but since it must align with the 2nd codon and is //actually not in range - if (alignment_range.IntersectingWith((*iter)->aln_range) && + if (alignment_range.IntersectingWith((*iter)->aln_range) && !(NStr::IsBlank((*iter)->feature_string. substr(aln_start, line_length)) && - m_AlignOption & eShowCdsFeature)){ + m_AlignOption & eShowCdsFeature)){ if((m_AlignOption&eHtml)&&(m_AlignOption&eMergeAlign) - && (m_AlignOption&eSequenceRetrieval && m_CanRetrieveSeq)){ + && (m_AlignOption&eSequenceRetrieval && m_CanRetrieveSeq)){ string checkboxBuf = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox",""); out << checkboxBuf; } @@ -936,22 +936,22 @@ void CDisplaySeqalign::x_PrintFeatures(SAlnRowInfo *alnRoInfo, +start_length + k_StartSequenceMargin -(*iter)->feature->feature_id.size()); } - + (*iter)->feature_start.pop_front(); } - bool color_cds_mismatch = false; - if((m_AlignOption & eHtml) && + bool color_cds_mismatch = false; + if((m_AlignOption & eHtml) && (m_AlignOption & eShowCdsFeature) && row > 0){ //only for slaves, only for cds feature color_cds_mismatch = true; - } else if((m_AlignOption & eHtml) && + } else if((m_AlignOption & eHtml) && !(m_AlignOption & eShowCdsFeature) && (m_AlignOption & eShowTranslationForLocalSeq) && row > 0){ //mostly for igblast //only for slave color_cds_mismatch = true; } - s_OutputFeature(master_feat_str, + s_OutputFeature(master_feat_str, (*iter)->feature_string, color_cds_mismatch, aln_start, line_length, out, (m_AlignOption & eHtml)); @@ -961,14 +961,14 @@ void CDisplaySeqalign::x_PrintFeatures(SAlnRowInfo *alnRoInfo, out<<"\n"; } } - + } - + string CDisplaySeqalign::x_HTMLSeqIDLink(SAlnRowInfo *alnRoInfo, int row,TGi giToUse) { - const CBioseq_Handle& bsp_handle = m_AV->GetBioseqHandle(row); + const CBioseq_Handle& bsp_handle = m_AV->GetBioseqHandle(row); string urlLink = NcbiEmptyString; - const list >& ids = bsp_handle.GetBioseqCore()->GetId(); + const list >& ids = bsp_handle.GetBioseqCore()->GetId(); CAlignFormatUtil::SSeqURLInfo *seqUrlInfo = x_InitSeqUrl(giToUse,alnRoInfo->seqidArray[row],alnRoInfo->taxid[row],ids); if(m_AlignOption & eShowInfoOnMouseOverSeqid) { seqUrlInfo->defline = sequence::CDeflineGenerator().GenerateDefline(bsp_handle); @@ -982,32 +982,32 @@ string CDisplaySeqalign::x_HTMLSeqIDLink(SAlnRowInfo *alnRoInfo, int row,TGi giT CAlignFormatUtil::SSeqURLInfo *CDisplaySeqalign::x_InitSeqUrl(TGi giToUse,string accession, int taxid,const list >& ids) { - string idString = m_AV->GetSeqId(1).GetSeqIdString(); + string idString = m_AV->GetSeqId(1).GetSeqIdString(); CRange range = (m_AlnLinksParams.count(idString) > 0 && m_AlnLinksParams[idString].subjRange) ? CRange(m_AlnLinksParams[idString].subjRange->GetFrom() + 1,m_AlnLinksParams[idString].subjRange->GetTo() + 1) : - CRange(0,0); - bool flip = (m_AlnLinksParams.count(idString) > 0) ? m_AlnLinksParams[idString].flip : false; - string user_url= (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "TOOL_URL") : ""; - giToUse = (giToUse == ZERO_GI) ? CAlignFormatUtil::GetGiForSeqIdList(ids):giToUse; + CRange(0,0); + bool flip = (m_AlnLinksParams.count(idString) > 0) ? m_AlnLinksParams[idString].flip : false; + string user_url= (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "TOOL_URL") : ""; + giToUse = (giToUse == ZERO_GI) ? CAlignFormatUtil::GetGiForSeqIdList(ids):giToUse; CAlignFormatUtil::SSeqURLInfo *seqUrlInfo = new CAlignFormatUtil::SSeqURLInfo(user_url,m_BlastType,m_IsDbNa,m_DbName,m_Rid, m_QueryNumber, giToUse, - accession, + accession, 0,// linkout not used any more in seqUrl m_cur_align, true, (m_AlignOption & eNewTargetWindow) ? true : false, range, - flip, + flip, taxid, (m_AlignOption & eShowInfoOnMouseOverSeqid) ? true : false); - seqUrlInfo->resourcesUrl = (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "RESOURCE_URL") : ""; - seqUrlInfo->advancedView = seqUrlInfo->useTemplates = m_AlignTemplates != NULL; + seqUrlInfo->resourcesUrl = (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "RESOURCE_URL") : ""; + seqUrlInfo->advancedView = seqUrlInfo->useTemplates = m_AlignTemplates != NULL; return seqUrlInfo; } -void CDisplaySeqalign::x_InitAlignLinks(SAlnDispParams *alnDispParams, - const list< CRef< CBlast_def_line > > &bdl_list, +void CDisplaySeqalign::x_InitAlignLinks(SAlnDispParams *alnDispParams, + const list< CRef< CBlast_def_line > > &bdl_list, int lnkDispParams) { CAlignFormatUtil::SSeqURLInfo *seqUrlInfo = alnDispParams->seqUrlInfo; @@ -1015,35 +1015,35 @@ void CDisplaySeqalign::x_InitAlignLinks(SAlnDispParams *alnDispParams, CRef seqID = alnDispParams->seqID; if(lnkDispParams & eDisplayResourcesLinks) { seqUrlInfo->segs = (lnkDispParams & eDisplayDownloadLink) ? x_GetSegs(1) : ""; - int customLinkTypes = (lnkDispParams & eDisplayDownloadLink) ? CAlignFormatUtil::eDownLoadSeq : CAlignFormatUtil::eLinkTypeDefault; + int customLinkTypes = (lnkDispParams & eDisplayDownloadLink) ? CAlignFormatUtil::eDownLoadSeq : CAlignFormatUtil::eLinkTypeDefault; m_CustomLinksList = CAlignFormatUtil::GetCustomLinksList(seqUrlInfo, *seqID, - m_Scope, - customLinkTypes); + m_Scope, + customLinkTypes); + + m_HSPLinksList = CAlignFormatUtil::GetSeqLinksList(seqUrlInfo,true); - m_HSPLinksList = CAlignFormatUtil::GetSeqLinksList(seqUrlInfo,true); - //URL tp FASTA representation, includes genbank, trace and SNP m_FASTAlinkUrl = CAlignFormatUtil::GetFASTALinkURL(seqUrlInfo,*seqID, m_Scope); //URL to FASTA for all regions m_AlignedRegionsUrl = CAlignFormatUtil::GetAlignedRegionsURL(seqUrlInfo,*seqID, m_Scope); - - if(m_AlignOption&eLinkout && (seqUrlInfo->hasTextSeqID)){ + + if(m_AlignOption&eLinkout && (seqUrlInfo->hasTextSeqID)){ m_LinkoutInfo.cur_align = m_cur_align; m_LinkoutInfo.taxid = seqUrlInfo->taxid; - m_LinkoutInfo.subjRange = seqUrlInfo->seqRange; + m_LinkoutInfo.subjRange = seqUrlInfo->seqRange; if(bdl_list.size() > 0) { m_LinkoutList = CAlignFormatUtil::GetFullLinkoutUrl(bdl_list,m_LinkoutInfo); } - else { + else { m_LinkoutList = CAlignFormatUtil::GetFullLinkoutUrl(alnDispParams->ids,m_LinkoutInfo,false); } - - } - } -} + + } + } +} void @@ -1056,7 +1056,7 @@ CDisplaySeqalign::SetSubjectMasks(const TSeqLocInfoVector& masks) } //align translation to 2nd base -static string s_GetFinalTranslatedString(const CSeq_loc& loc, CScope& scope, +static string s_GetFinalTranslatedString(const CSeq_loc& loc, CScope& scope, int first_encoding_base, int align_length, const string& translation, const string& sequence, char gap_char){ @@ -1064,12 +1064,12 @@ static string s_GetFinalTranslatedString(const CSeq_loc& loc, CScope& scope, string feat(align_length, ' '); int num_base = 0; int j = 0; - - for (int i = first_encoding_base; i < (int) feat.size() && + + for (int i = first_encoding_base; i < (int) feat.size() && j < (int)translation.size(); i ++) { if (sequence[i] != gap_char) { num_base ++; - + //aa residue to 2nd nuc position if (num_base%3 == 2) { feat[i] = translation[j]; @@ -1077,21 +1077,21 @@ static string s_GetFinalTranslatedString(const CSeq_loc& loc, CScope& scope, } } } - return feat; + return feat; } void CDisplaySeqalign::x_AddTranslationForLocalSeq(vector& retval, vector& sequence) const { if (m_AV->IsPositiveStrand(0) && m_AV->IsPositiveStrand(1)) { - + //find the first aln pos that both seq has no gaps for 3 consecutive pos. int non_gap_aln_pos = 0; CAlnVec::TResidue gap_char = m_AV->GetGapChar(0); int num_consecutive = 0; for (int i =0; i < (int) sequence[0].size(); i ++) { - if (sequence[0][i] != gap_char && + if (sequence[0][i] != gap_char && sequence[1][i] != gap_char) { - + num_consecutive ++; if (num_consecutive >=3) { non_gap_aln_pos = i - 2; @@ -1101,15 +1101,15 @@ void CDisplaySeqalign::x_AddTranslationForLocalSeq(vector& num_consecutive = 0; } } - - + + //master int master_frame_extra = m_AV->GetSeqPosFromAlnPos(0, non_gap_aln_pos)%3; int master_frame_start; //= m_AV->GetSeqPosFromSeqPos(0, 1, subject_frame_start); - master_frame_start = m_AV->GetSeqPosFromAlnPos(0, non_gap_aln_pos) + + master_frame_start = m_AV->GetSeqPosFromAlnPos(0, non_gap_aln_pos) + (3 - (master_frame_extra - m_TranslatedFrameForLocalSeq))%3; - + CRef master_loc(new CSeq_loc((CSeq_loc::TId &) m_AV->GetSeqId(0), master_frame_start, m_AV->GetSeqStop(0))); @@ -1120,17 +1120,17 @@ void CDisplaySeqalign::x_AddTranslationForLocalSeq(vector& m_Scope, master_translation); int master_first_encoding_base = m_AV->GetAlnPosFromSeqPos(0, master_frame_start); - string master_feat = s_GetFinalTranslatedString(*master_loc, m_Scope, + string master_feat = s_GetFinalTranslatedString(*master_loc, m_Scope, master_first_encoding_base, m_AV->GetAlnStop() + 1, - master_translation, + master_translation, sequence[0], gap_char); - + CRef master_featInfo(new SAlnFeatureInfo); - - x_SetFeatureInfo(master_featInfo, *master_loc, 0, m_AV->GetAlnStop(), + + x_SetFeatureInfo(master_featInfo, *master_loc, 0, m_AV->GetAlnStop(), m_AV->GetAlnStop(), ' ', - " ", master_feat); + " ", master_feat); retval[0].push_back(master_featInfo); @@ -1147,24 +1147,24 @@ void CDisplaySeqalign::x_AddTranslationForLocalSeq(vector& m_Scope, subject_translation); int subject_first_encoding_base = m_AV->GetAlnPosFromSeqPos(1, subject_frame_start); - string subject_feat = s_GetFinalTranslatedString(*subject_loc, m_Scope, + string subject_feat = s_GetFinalTranslatedString(*subject_loc, m_Scope, subject_first_encoding_base, m_AV->GetAlnStop() + 1, - subject_translation, + subject_translation, sequence[1], gap_char); - + CRef subject_featInfo(new SAlnFeatureInfo); - - x_SetFeatureInfo(subject_featInfo, *subject_loc, 0, m_AV->GetAlnStop(), + + x_SetFeatureInfo(subject_featInfo, *subject_loc, 0, m_AV->GetAlnStop(), m_AV->GetAlnStop(), ' ', - " ", subject_feat); + " ", subject_feat); retval[1].push_back(subject_featInfo); } } -//this is a special function to calculate pert_identity between master and a given row +//this is a special function to calculate pert_identity between master and a given row //for multiple alignment. Excluding leading and trailing gaps. void s_CalculateIdentity(const string& sequence_standard, const string& sequence , char gap_char, @@ -1179,24 +1179,24 @@ void s_CalculateIdentity(const string& sequence_standard, break; } } - + for(int i = (int)sequence.size() - 1; i > 0; i--){ if (sequence[i] != gap_char){ end = i; break; } } - - + + for(int i = start; i <= end && i < (int)sequence.size() && i < (int)sequence_standard.size(); i++){ if(sequence[i] == gap_char && sequence_standard[i] == gap_char) { //skip } else { - if (sequence_standard[i]==sequence[i]){ + if (sequence_standard[i]==sequence[i]){ match ++; } align_length ++; - } + } } } @@ -1205,7 +1205,7 @@ CDisplaySeqalign::SAlnRowInfo *CDisplaySeqalign::x_PrepareRowData(void) size_t maxIdLen=0, maxStartLen=0; //, startLen=0, actualLineLen=0; //size_t aln_stop=m_AV->GetAlnStop(); - const int rowNum=m_AV->GetNumRows(); + const int rowNum=m_AV->GetNumRows(); if(m_AlignOption & eMasterAnchored){ m_AV->SetAnchor(0); } @@ -1216,7 +1216,7 @@ CDisplaySeqalign::SAlnRowInfo *CDisplaySeqalign::x_PrepareRowData(void) } else { m_AV->SetEndChar(' '); - } + } vector sequence(rowNum); vector seqStarts(rowNum); vector seqStops(rowNum); @@ -1252,16 +1252,16 @@ CDisplaySeqalign::SAlnRowInfo *CDisplaySeqalign::x_PrepareRowData(void) const CSeq_id& id = m_AV->GetSeqId(row); x_FillLocList(masked_regions[row], &m_SubjectMasks[id]); } - - //prepare data for each row + + //prepare data for each row list > > feat_seq_range; list feat_seq_strand; for (int row=0; rowGetSeqId(row), m_Scope); @@ -1273,54 +1273,54 @@ CDisplaySeqalign::SAlnRowInfo *CDisplaySeqalign::x_PrepareRowData(void) taxid[row] = 0; } rowRng[row] = m_AV->GetSeqAlnRange(row); - frame[row] = (m_AV->GetWidth(row) == 3 ? - s_GetFrame(m_AV->IsPositiveStrand(row) ? - m_AV->GetSeqStart(row) : - m_AV->GetSeqStop(row), - m_AV->IsPositiveStrand(row) ? - eNa_strand_plus : eNa_strand_minus, - m_AV->GetSeqId(row), m_Scope) : 0); + frame[row] = (m_AV->GetWidth(row) == 3 ? + s_GetFrame(m_AV->IsPositiveStrand(row) ? + m_AV->GetSeqStart(row) : + m_AV->GetSeqStop(row), + m_AV->IsPositiveStrand(row) ? + eNa_strand_plus : eNa_strand_minus, + m_AV->GetSeqId(row), m_Scope) : 0); //make sequence m_AV->GetWholeAlnSeqString(row, sequence[row], &insertAlnStart[row], &insertStart[row], &insertLength[row], (int)m_LineLen, &seqStarts[row], &seqStops[row]); if(row > 0 && m_AlignOption & eShowAlignStatsForMultiAlignView && m_AlignOption&eMergeAlign && m_AV->GetWidth(row) != 3) { - - s_CalculateIdentity(sequence[0], sequence[row], m_AV->GetGapChar(row), - match[row-1], align_length[row-1]); - + + s_CalculateIdentity(sequence[0], sequence[row], m_AV->GetGapChar(row), + match[row-1], align_length[row-1]); + if (align_length[row-1] > 0 ){ percent_ident[row-1] = ((double)match[row-1])/align_length[row-1]*100; - align_stats[row-1] = NStr::DoubleToString(percent_ident[row-1], 1, 0) + + align_stats[row-1] = NStr::DoubleToString(percent_ident[row-1], 1, 0) + "% (" + NStr::IntToString(match[row-1]) + "/" + NStr::IntToString(align_length[row-1]) + ")" ; } else {//something is wrong percent_ident[row - 1] = 0; align_stats[row-1] = "0"; } - + max_align_stats = max(max_align_stats, (int)align_stats[row-1].size()); } - + //seq property label - if(row > 0 && + if(row > 0 && m_AlignOption & eShowSequencePropertyLabel && m_AlignOption&eMergeAlign && m_AV->GetWidth(row) != 3) { - + if((int)m_SeqPropertyLabel->size() >= row -1){ seq_property_label[row-1] = (*m_SeqPropertyLabel)[row]; //skip the first one which is for query } else {//something is wrong seq_property_label[row-1] = NcbiEmptyString; - } - + } + max_seq_property_label = max(max_seq_property_label, (int)seq_property_label[row-1].size()); } - if (row == 1 && eShowTranslationForLocalSeq & m_AlignOption - && m_AV->GetWidth(row) != 3 + if (row == 1 && eShowTranslationForLocalSeq & m_AlignOption + && m_AV->GetWidth(row) != 3 && !(m_AlignType & eProt)) { x_AddTranslationForLocalSeq(bioseqFeature, sequence); } @@ -1331,20 +1331,20 @@ CDisplaySeqalign::SAlnRowInfo *CDisplaySeqalign::x_PrepareRowData(void) if(m_AlignOption & eShowCdsFeature){ TGi master_gi = FindGi(m_AV->GetBioseqHandle(0). GetBioseqCore()->GetId()); - x_GetFeatureInfo(bioseqFeature[row], *m_featScope, + x_GetFeatureInfo(bioseqFeature[row], *m_featScope, CSeqFeatData::e_Cdregion, row, sequence[row], feat_seq_range, feat_seq_strand, row == 1 && !(master_gi > ZERO_GI) ? true : false); - + if(!(feat_seq_range.empty()) && row == 1) { //make a new copy of master bioseq and add the feature from - //slave to make putative cds feature - CRef master_scope_with_feat = + //slave to make putative cds feature + CRef master_scope_with_feat = s_MakeNewMasterSeq(feat_seq_range, feat_seq_strand, m_AV->GetBioseqHandle(0)); //make feature string for master bioseq list > > temp_holder; - x_GetFeatureInfo(bioseqFeature[0], *master_scope_with_feat, + x_GetFeatureInfo(bioseqFeature[0], *master_scope_with_feat, CSeqFeatData::e_Cdregion, 0, sequence[0], temp_holder, feat_seq_strand, false); } @@ -1361,7 +1361,7 @@ CDisplaySeqalign::SAlnRowInfo *CDisplaySeqalign::x_PrepareRowData(void) size_t maxCood=max(m_AV->GetSeqStart(row), m_AV->GetSeqStop(row)); maxStartLen = max(NStr::SizetToString(maxCood).size(), maxStartLen); } - for(int i = 0; i < rowNum; i ++){//adjust max id length for feature id + for(int i = 0; i < rowNum; i ++){//adjust max id length for feature id int num_feature = 0; ITERATE(TSAlnFeatureInfoList, iter, bioseqFeature[i]) { maxIdLen=max((*iter)->feature->feature_id.size(), maxIdLen); @@ -1388,7 +1388,7 @@ CDisplaySeqalign::SAlnRowInfo *CDisplaySeqalign::x_PrepareRowData(void) alnRoInfo->seqidArray = seqidArray; alnRoInfo->maxIdLen = maxIdLen; alnRoInfo->maxStartLen = maxStartLen; - alnRoInfo->max_feature_num = max_feature_num; + alnRoInfo->max_feature_num = max_feature_num; alnRoInfo->colorMismatch = false; alnRoInfo->rowNum = rowNum; alnRoInfo->match = match; @@ -1407,10 +1407,10 @@ string CDisplaySeqalign::x_DisplayRowData(SAlnRowInfo *alnRoInfo) int rowNum = alnRoInfo->rowNum; vector prev_stop(rowNum); CNcbiOstrstream out; - + //only for untranslated alignment alnRoInfo->show_align_stats = (m_AlignOption&eShowAlignStatsForMultiAlignView && - m_AlignOption&eMergeAlign && + m_AlignOption&eMergeAlign && m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false; //only for untranslated alignment @@ -1418,12 +1418,12 @@ string CDisplaySeqalign::x_DisplayRowData(SAlnRowInfo *alnRoInfo) m_AlignOption&eMergeAlign && m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false; - //output rows + //output rows string formattedString; for(int j=0; j<=(int)aln_stop; j+=(int)m_LineLen){ string rowdata = x_DisplayRowDataSet(alnRoInfo,j, prev_stop); - formattedString += rowdata; - }//end of displaying rows + formattedString += rowdata; + }//end of displaying rows return formattedString; } @@ -1432,10 +1432,10 @@ void CDisplaySeqalign::x_DisplayRowData(SAlnRowInfo *alnRoInfo,CNcbiOstream& out size_t aln_stop=m_AV->GetAlnStop(); int rowNum = alnRoInfo->rowNum; vector prev_stop(rowNum); - + //only for untranslated alignment alnRoInfo->show_align_stats = (m_AlignOption&eShowAlignStatsForMultiAlignView && - m_AlignOption&eMergeAlign && + m_AlignOption&eMergeAlign && m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false; //only for untranslated alignment @@ -1443,11 +1443,11 @@ void CDisplaySeqalign::x_DisplayRowData(SAlnRowInfo *alnRoInfo,CNcbiOstream& out m_AlignOption&eMergeAlign && m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false; - //output rows + //output rows for(int j=0; j<=(int)aln_stop; j+=(int)m_LineLen){ string rowdata = x_DisplayRowDataSet(alnRoInfo,j, prev_stop); - out << rowdata; - }//end of displaying rows + out << rowdata; + }//end of displaying rows } @@ -1458,10 +1458,10 @@ string CDisplaySeqalign::x_DisplayRowDataSet(SAlnRowInfo *alnRoInfo,int aln_star string master_feat_str = NcbiEmptyString; size_t aln_stop=m_AV->GetAlnStop(); - int rowNum = alnRoInfo->rowNum; + int rowNum = alnRoInfo->rowNum; CNcbiOstrstream out; - - + + //output according to aln coordinates if(aln_stop-aln_start+1currRange = curRange; //here is each row for (int row=0; rowrowRng[row]); } //only output rows that have sequence - if (hasSequence){ - int end = alnRoInfo->seqStops[row].front() + 1; + if (hasSequence){ + int end = alnRoInfo->seqStops[row].front() + 1; bool has_mismatch = false; - //change the alignment line to identity style + //change the alignment line to identity style if (row>0 && m_AlignOption & eShowIdentity){//check usage - pairwise - only - for (int index = aln_start; index < aln_start + (int)actualLineLen && + for (int index = aln_start; index < aln_start + (int)actualLineLen && index < (int)alnRoInfo->sequence[row].size(); index ++){ if (alnRoInfo->sequence[row][index] == alnRoInfo->sequence[0][index] && isalpha((unsigned char) alnRoInfo->sequence[row][index])) { - alnRoInfo->sequence[row][index] = k_IdentityChar; + alnRoInfo->sequence[row][index] = k_IdentityChar; } else if (!has_mismatch) { has_mismatch = true; - } + } } } //feature for query - if(row == 0){ - x_PrintFeatures(alnRoInfo, row, master_feat_str, out); + if(row == 0){ + x_PrintFeatures(alnRoInfo, row, master_feat_str, out); } - if((m_AlignOption & eMergeAlign) || (m_AlignOption & eHyperLinkMasterSeqid) || (m_AlignOption & eHyperLinkSlaveSeqid)) { + if((m_AlignOption & eMergeAlign) || (m_AlignOption & eHyperLinkMasterSeqid) || (m_AlignOption & eHyperLinkSlaveSeqid)) { x_DisplaySequenceIDForQueryAnchored(alnRoInfo,row,out); } else { @@ -1509,26 +1509,26 @@ string CDisplaySeqalign::x_DisplayRowDataSet(SAlnRowInfo *alnRoInfo,int aln_star if(m_AlignOption & eMasterAnchored){ //inserts for anchored view x_DisplayInsertsForQueryAnchored(alnRoInfo,row,out); - } + } //display subject sequence feature. - if(row > 0){ + if(row > 0){ x_PrintFeatures(alnRoInfo, row, master_feat_str, out); } //display middle line for pairwise if (row == 0 && ((m_AlignOption & eShowMiddleLine)) && !(m_AlignOption&eMergeAlign)) { - x_DisplayMiddLine(alnRoInfo, row,out); + x_DisplayMiddLine(alnRoInfo, row,out); } - prev_stop[row] = end; + prev_stop[row] = end; } if(!alnRoInfo->seqStarts[row].empty()){ //shouldn't need this check alnRoInfo->seqStarts[row].pop_front(); } if(!alnRoInfo->seqStops[row].empty()){ alnRoInfo->seqStops[row].pop_front(); - } - }//end of displaying rows + } + }//end of displaying rows out<<"\n"; - string formattedString = CNcbiOstrstreamToString(out); + string formattedString = CNcbiOstrstreamToString(out); return formattedString; } @@ -1538,12 +1538,12 @@ void CDisplaySeqalign::x_DisplaySequenceLine(SAlnRowInfo *alnRoInfo, int row, in int start = alnRoInfo->seqStarts[row].front() + 1; //+1 for 1 based int end = alnRoInfo->seqStops[row].front() + 1; int j = alnRoInfo->currPrintSegment; - int actualLineLen = alnRoInfo->currActualLineLen; + int actualLineLen = alnRoInfo->currActualLineLen; //print out sequence line //adjust space between id and start CAlignFormatUtil::AddSpace(out, alnRoInfo->maxIdLen-alnRoInfo->seqidArray[row].size() + k_IdStartMargin); //not to display start and stop number for empty row - if ((j > 0 && end == prev_stop) + if ((j > 0 && end == prev_stop) || (j == 0 && start == 1 && end == 1)) { startLen = 0; } else { @@ -1552,9 +1552,9 @@ void CDisplaySeqalign::x_DisplaySequenceLine(SAlnRowInfo *alnRoInfo, int row, in } CAlignFormatUtil::AddSpace(out, alnRoInfo->maxStartLen-startLen + k_StartSequenceMargin); - x_OutputSeq(alnRoInfo->sequence[row], m_AV->GetSeqId(row), j, + x_OutputSeq(alnRoInfo->sequence[row], m_AV->GetSeqId(row), j, (int)actualLineLen, alnRoInfo->frame[row], row, - (row > 0 && alnRoInfo->colorMismatch)?true:false, + (row > 0 && alnRoInfo->colorMismatch)?true:false, alnRoInfo->masked_regions[row], out); CAlignFormatUtil::AddSpace(out, k_SeqStopMargin); @@ -1562,31 +1562,31 @@ void CDisplaySeqalign::x_DisplaySequenceLine(SAlnRowInfo *alnRoInfo, int row, in if (!(j > 0 && end == prev_stop) && !(j == 0 && start == 1 && end == 1)) { out << end; - } - out<<"\n"; + } + out<<"\n"; } void CDisplaySeqalign::x_DisplayInsertsForQueryAnchored(SAlnRowInfo *alnRoInfo, int row,CNcbiOstrstream &out) { list inserts; - string insertPosString; //the one with "\" to indicate insert + string insertPosString; //the one with "\" to indicate insert TSInsertInformationList insertList; int j = alnRoInfo->currPrintSegment; - CAlnMap::TSignedRange curRange = alnRoInfo->currRange; - x_GetInserts(insertList, alnRoInfo->insertAlnStart[row], - alnRoInfo->insertStart[row], alnRoInfo->insertLength[row], + CAlnMap::TSignedRange curRange = alnRoInfo->currRange; + x_GetInserts(insertList, alnRoInfo->insertAlnStart[row], + alnRoInfo->insertStart[row], alnRoInfo->insertLength[row], j + (int)m_LineLen); - x_FillInserts(row, curRange, j, inserts, insertPosString, insertList); + x_FillInserts(row, curRange, j, inserts, insertPosString, insertList); bool insertAlready = false; - for(list::iterator iter = inserts.begin(); - iter != inserts.end(); iter ++){ + for(list::iterator iter = inserts.begin(); + iter != inserts.end(); iter ++){ if(!insertAlready){ - if((m_AlignOption&eHtml) &&(m_AlignOption&eMergeAlign) - && (m_AlignOption&eSequenceRetrieval && m_CanRetrieveSeq)){ + if((m_AlignOption&eHtml) &&(m_AlignOption&eMergeAlign) + && (m_AlignOption&eSequenceRetrieval && m_CanRetrieveSeq)){ string checkboxBuf = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox",""); out << checkboxBuf; } - + int base_margin = alnRoInfo->maxIdLen + k_IdStartMargin + alnRoInfo->maxStartLen + k_StartSequenceMargin; if (alnRoInfo->show_align_stats) { @@ -1598,12 +1598,12 @@ void CDisplaySeqalign::x_DisplayInsertsForQueryAnchored(SAlnRowInfo *alnRoInfo, CAlignFormatUtil::AddSpace(out, base_margin); out << insertPosString<<"\n"; } - if((m_AlignOption&eHtml) &&(m_AlignOption&eMergeAlign) && (m_AlignOption&eSequenceRetrieval && m_CanRetrieveSeq)){ + if((m_AlignOption&eHtml) &&(m_AlignOption&eMergeAlign) && (m_AlignOption&eSequenceRetrieval && m_CanRetrieveSeq)){ string checkboxBuf = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox",""); out << checkboxBuf; } int base_margin = alnRoInfo->maxIdLen + k_IdStartMargin + alnRoInfo->maxStartLen + k_StartSequenceMargin; - + if (alnRoInfo->show_align_stats) { base_margin += alnRoInfo->max_align_stats_len + k_AlignStatsMargin; } @@ -1620,15 +1620,15 @@ void CDisplaySeqalign::x_DisplaySequenceIDForPairwise(SAlnRowInfo *alnRoInfo, in { //highlight the seqid for pairwise-with-identity format if(row>0 && m_AlignOption&eHtml && !(m_AlignOption&eMergeAlign) - && m_AlignOption&eShowIdentity && has_mismatch && - (m_AlignOption & eColorDifferentBases)){ + && m_AlignOption&eShowIdentity && has_mismatch && + (m_AlignOption & eColorDifferentBases)){ //highlight the seqid for pairwise-with-identity format string alnStr = CAlignFormatUtil::MapTemplate(k_DefaultPairwiseWithIdntTempl,"alndata",alnRoInfo->seqidArray[row]); - out<< alnStr; + out<< alnStr; } else { - out<seqidArray[row]; - } + out<seqidArray[row]; + } } void CDisplaySeqalign::x_DisplaySequenceIDForQueryAnchored(SAlnRowInfo *alnRoInfo, int row, CNcbiOstrstream &out) @@ -1636,7 +1636,7 @@ void CDisplaySeqalign::x_DisplaySequenceIDForQueryAnchored(SAlnRowInfo *alnRoInf string urlLink = NcbiEmptyString; //setup url link for seqid TGi gi = ZERO_GI; - if(m_AlignOption & eHtml){ + if(m_AlignOption & eHtml){ if(m_AV->GetSeqId(row).Which() == CSeq_id::e_Gi){ gi = m_AV->GetSeqId(row).GetGi(); } @@ -1644,51 +1644,51 @@ void CDisplaySeqalign::x_DisplaySequenceIDForQueryAnchored(SAlnRowInfo *alnRoInf gi = CAlignFormatUtil::GetGiForSeqIdList(m_AV->GetBioseqHandle(row). GetBioseqCore()->GetId()); } - string anchorTmpl,checkBoxTmpl,id_lbl; + string anchorTmpl,checkBoxTmpl,id_lbl; bool showAnchor = (row == 0 && (m_AlignOption & eHyperLinkMasterSeqid)) || (row > 0 && (m_AlignOption & eHyperLinkSlaveSeqid)); - bool showCheckbox = ((m_AlignOption & eMergeAlign) && (m_AlignOption & eSequenceRetrieval) && m_CanRetrieveSeq) || + bool showCheckbox = ((m_AlignOption & eMergeAlign) && (m_AlignOption & eSequenceRetrieval) && m_CanRetrieveSeq) || (m_AlignOption & eShowCheckBox); if(showAnchor){ anchorTmpl = (m_ResultPositionIndex >= 0) ? k_DefaultAnchorWithPosTempl : k_DefaultAnchorTempl; if (m_ResultPositionIndex >= 0){ - anchorTmpl = CAlignFormatUtil::MapTemplate(anchorTmpl,"resultPositionIndex",m_ResultPositionIndex); - } + anchorTmpl = CAlignFormatUtil::MapTemplate(anchorTmpl,"resultPositionIndex",m_ResultPositionIndex); + } anchorTmpl = CAlignFormatUtil::MapTemplate(anchorTmpl,"id_lbl",gi > ZERO_GI ? - NStr::NumericToString(gi):alnRoInfo->seqidArray[row]); - } + NStr::NumericToString(gi):alnRoInfo->seqidArray[row]); + } //get sequence checkbox if(showCheckbox) { - checkBoxTmpl = !(m_AlignOption & eShowCheckBox) ? - ((row == 0) ? "" : k_DefaultCheckboxTempl) : k_DefaultCheckboxExTempl; - - checkBoxTmpl = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox",checkBoxTmpl); - checkBoxTmpl = CAlignFormatUtil::MapTemplate(checkBoxTmpl,"queryNumber",NStr::IntToString(m_QueryNumber)); + checkBoxTmpl = !(m_AlignOption & eShowCheckBox) ? + ((row == 0) ? "" : k_DefaultCheckboxTempl) : k_DefaultCheckboxExTempl; + + checkBoxTmpl = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox",checkBoxTmpl); + checkBoxTmpl = CAlignFormatUtil::MapTemplate(checkBoxTmpl,"queryNumber",NStr::IntToString(m_QueryNumber)); if(m_AlignOption & eShowCheckBox) { const CRef seqID = FindBestChoice(m_AV->GetBioseqHandle(row).GetBioseqCore()->GetId(), CSeq_id::WorstRank); id_lbl = CAlignFormatUtil::GetLabel(seqID,CSeq_id::eContent); if(seqID->IsLocal()) { - id_lbl = "lcl|" + id_lbl; - } + id_lbl = "lcl|" + id_lbl; + } } - } - if(showCheckbox || showAnchor) { + } + if(showCheckbox || showAnchor) { id_lbl = id_lbl.empty() ? ((gi > ZERO_GI) ? NStr::NumericToString(gi) : alnRoInfo->seqidArray[row]) : id_lbl; - string displString = CAlignFormatUtil::MapTemplate(anchorTmpl + checkBoxTmpl,"id_lbl",id_lbl); + string displString = CAlignFormatUtil::MapTemplate(anchorTmpl + checkBoxTmpl,"id_lbl",id_lbl); out << displString; } } - + if(alnRoInfo->show_seq_property_label){ if (row > 0){ - + out<seq_property_label[row-1]; CAlignFormatUtil::AddSpace(out, alnRoInfo->max_seq_property_label - (int)alnRoInfo->seq_property_label[row-1].size() + k_SequencePropertyLabelMargin); } else { CAlignFormatUtil::AddSpace(out, alnRoInfo->max_seq_property_label + k_SequencePropertyLabelMargin); } - } - + } + if(alnRoInfo->show_align_stats){ if (row > 0){ out<align_stats[row-1]; @@ -1698,19 +1698,19 @@ void CDisplaySeqalign::x_DisplaySequenceIDForQueryAnchored(SAlnRowInfo *alnRoInf CAlignFormatUtil::AddSpace(out, alnRoInfo->max_align_stats_len + k_AlignStatsMargin); } } - if(m_AlignOption & eHtml){ + if(m_AlignOption & eHtml){ if((row == 0 && (m_AlignOption & eHyperLinkMasterSeqid)) || - (row > 0 && (m_AlignOption & eHyperLinkSlaveSeqid))){ - m_cur_align = row; - urlLink = x_HTMLSeqIDLink(alnRoInfo, row,gi); - } - } + (row > 0 && (m_AlignOption & eHyperLinkSlaveSeqid))){ + m_cur_align = row; + urlLink = x_HTMLSeqIDLink(alnRoInfo, row,gi); + } + } if(!urlLink.empty()) { - out << urlLink; + out << urlLink; } else { - out<seqidArray[row]; - } + out<seqidArray[row]; + } } void CDisplaySeqalign::x_DisplayMiddLine(SAlnRowInfo *alnRoInfo, int row, CNcbiOstrstream &out) @@ -1726,15 +1726,15 @@ void CDisplaySeqalign::x_DisplayMiddLine(SAlnRowInfo *alnRoInfo, int row, CNcbiO void CDisplaySeqalign::x_PrepareIdentityInfo(SAlnInfo* aln_vec_info) { size_t aln_stop=m_AV->GetAlnStop(); - + aln_vec_info->match = 0; aln_vec_info->positive = 0; aln_vec_info->gap = 0; aln_vec_info->identity = 0; - x_FillIdentityInfo(aln_vec_info->alnRowInfo->sequence[0], - aln_vec_info->alnRowInfo->sequence[1], - aln_vec_info->match, - aln_vec_info->positive, + x_FillIdentityInfo(aln_vec_info->alnRowInfo->sequence[0], + aln_vec_info->alnRowInfo->sequence[1], + aln_vec_info->match, + aln_vec_info->positive, aln_vec_info->alnRowInfo->middleLine); if(m_AlignOption & eShowBlastInfo){ aln_vec_info->identity = CAlignFormatUtil::GetPercentMatch(aln_vec_info->match, (int)aln_stop+1); @@ -1742,64 +1742,64 @@ void CDisplaySeqalign::x_PrepareIdentityInfo(SAlnInfo* aln_vec_info) (m_AlignOption & eColorDifferentBases)){ aln_vec_info->alnRowInfo->colorMismatch = true; } - aln_vec_info->gap = x_GetNumGaps(); + aln_vec_info->gap = x_GetNumGaps(); } } void CDisplaySeqalign::x_DisplayAlnvec(CNcbiOstream& out) -{ - SAlnRowInfo *alnRoInfo = x_PrepareRowData(); +{ + SAlnRowInfo *alnRoInfo = x_PrepareRowData(); - x_DisplayRowData(alnRoInfo,out); + x_DisplayRowData(alnRoInfo,out); delete alnRoInfo; } CRef CDisplaySeqalign::x_GetAlnVecForSeqalign(const CSeq_align& align) { - + //make alnvector CRef avRef; CConstRef finalAln; if (align.GetSegs().Which() == CSeq_align::C_Segs::e_Std) { CRef densegAln = align.CreateDensegFromStdseg(); - if (m_AlignOption & eTranslateNucToNucAlignment) { + if (m_AlignOption & eTranslateNucToNucAlignment) { finalAln = densegAln->CreateTranslatedDensegFromNADenseg(); } else { finalAln = densegAln; - } - } else if(align.GetSegs().Which() == + } + } else if(align.GetSegs().Which() == CSeq_align::C_Segs::e_Denseg){ - if (m_AlignOption & eTranslateNucToNucAlignment) { + if (m_AlignOption & eTranslateNucToNucAlignment) { finalAln = align.CreateTranslatedDensegFromNADenseg(); } else { finalAln = &align; } - } else if(align.GetSegs().Which() == + } else if(align.GetSegs().Which() == CSeq_align::C_Segs::e_Dendiag){ - CRef densegAln = + CRef densegAln = CAlignFormatUtil::CreateDensegFromDendiag(align); - if (m_AlignOption & eTranslateNucToNucAlignment) { + if (m_AlignOption & eTranslateNucToNucAlignment) { finalAln = densegAln->CreateTranslatedDensegFromNADenseg(); } else { finalAln = densegAln; } } else { - NCBI_THROW(CException, eUnknown, + NCBI_THROW(CException, eUnknown, "Seq-align should be Denseg, Stdseg or Dendiag!"); } CRef finalDenseg(new CDense_seg); const CTypeConstIterator ds = ConstBegin(*finalAln); - if((ds->IsSetStrands() - && ds->GetStrands().front()==eNa_strand_minus) + if((ds->IsSetStrands() + && ds->GetStrands().front()==eNa_strand_minus) && !(ds->IsSetWidths() && ds->GetWidths()[0] == 3)){ //show plus strand if master is minus for non-translated case finalDenseg->Assign(*ds); finalDenseg->Reverse(); - avRef = new CAlnVec(*finalDenseg, m_Scope); + avRef = new CAlnVec(*finalDenseg, m_Scope); } else { avRef = new CAlnVec(*ds, m_Scope); - } - + } + avRef->SetAaCoding(CSeq_data::e_Ncbieaa); return avRef; @@ -1809,20 +1809,20 @@ CRef CDisplaySeqalign::x_GetAlnVecForSeqalign(const CSeq_align& align) void CDisplaySeqalign::x_InitAlignParams(CSeq_align_set &actual_aln_list) { //scope for feature fetching - if(!(m_AlignOption & eMasterAnchored) - && (m_AlignOption & eShowCdsFeature || m_AlignOption + if(!(m_AlignOption & eMasterAnchored) + && (m_AlignOption & eShowCdsFeature || m_AlignOption & eShowGeneFeature)){ m_FeatObj = CObjectManager::GetInstance(); CGBDataLoader::RegisterInObjectManager(*m_FeatObj); m_featScope = new CScope(*m_FeatObj); //for seq feature fetch string name = CGBDataLoader::GetLoaderNameFromArgs(); m_featScope->AddDataLoader(name); - } + } m_CanRetrieveSeq = CAlignFormatUtil::GetDbType(actual_aln_list,m_Scope) == CAlignFormatUtil::eDbTypeNotSet ? false : true; if(m_AlignOption & eHtml || m_AlignOption & eDynamicFeature){ //set config file m_ConfigFile = new CNcbiIfstream(".ncbirc"); - m_Reg = new CNcbiRegistry(*m_ConfigFile); + m_Reg = new CNcbiRegistry(*m_ConfigFile); if(!m_BlastType.empty()) m_LinkoutOrder = m_Reg->Get(m_BlastType,"LINKOUT_ORDER"); m_LinkoutOrder = (!m_LinkoutOrder.empty()) ? m_LinkoutOrder : kLinkoutOrderStr; @@ -1835,57 +1835,57 @@ void CDisplaySeqalign::x_InitAlignParams(CSeq_align_set &actual_aln_list) } } if(m_AlignOption&eLinkout) { - string user_url = (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "TOOL_URL") : ""; - + string user_url = (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "TOOL_URL") : ""; + m_LinkoutInfo.Init(m_Rid, m_CddRid, m_EntrezTerm, m_IsDbNa, m_DbName, m_QueryNumber, user_url, m_PreComputedResID, m_LinkoutOrder, m_LinkoutDB, m_MapViewerBuildName); - - CRef wid = FindBestChoice(m_Scope.GetBioseqHandle(actual_aln_list.Get().front()->GetSeq_id(0)).GetBioseqCore()->GetId(), CSeq_id::WorstRank); - wid->GetLabel(&m_LinkoutInfo.queryID, CSeq_id::eContent); + + CRef wid = FindBestChoice(m_Scope.GetBioseqHandle(actual_aln_list.Get().front()->GetSeq_id(0)).GetBioseqCore()->GetId(), CSeq_id::WorstRank); + wid->GetLabel(&m_LinkoutInfo.queryID, CSeq_id::eContent); } } void CDisplaySeqalign::DisplaySeqalign(CNcbiOstream& out) -{ +{ CSeq_align_set actual_aln_list; - CAlignFormatUtil::ExtractSeqalignSetFromDiscSegs(actual_aln_list, + CAlignFormatUtil::ExtractSeqalignSetFromDiscSegs(actual_aln_list, *m_SeqalignSetRef); if (actual_aln_list.Get().empty()){ return; } - + //inits m_FeatObj,m_featScope,m_CanRetrieveSeq,m_ConfigFile,m_Reg,m_LinkoutOrder,m_DynamicFeature - x_InitAlignParams(actual_aln_list); - + x_InitAlignParams(actual_aln_list); + //get sequence - if(m_AlignOption&eSequenceRetrieval && m_AlignOption&eHtml && m_CanRetrieveSeq){ + if(m_AlignOption&eSequenceRetrieval && m_AlignOption&eHtml && m_CanRetrieveSeq){ out<<"\n"; } //begin to display int num_align = 0; m_cur_align = m_currAlignSeqListIndex - 1; - m_currAlignHsp = 0; + m_currAlignHsp = 0; auto_ptr out2(CObjectOStream::Open(eSerial_AsnText, out)); - //*out2 << *m_SeqalignSetRef; + //*out2 << *m_SeqalignSetRef; //get segs first and get hsp number - m_segs,m_Hsp,m_subjRange x_PreProcessSeqAlign(actual_aln_list); - if(!(m_AlignOption&eMergeAlign)){ + if(!(m_AlignOption&eMergeAlign)){ /*pairwise alignment. Note we can't just show each alnment as we go because we will need seg information form all hsp's with the same id - for genome url link. As a result we show hsp's with the same id + for genome url link. As a result we show hsp's with the same id as a group*/ - + CConstRef previousId, subid; - for (CSeq_align_set::Tdata::const_iterator - iter = actual_aln_list.Get().begin(); - iter != actual_aln_list.Get().end() + for (CSeq_align_set::Tdata::const_iterator + iter = actual_aln_list.Get().begin(); + iter != actual_aln_list.Get().end() && num_align avRef = x_GetAlnVecForSeqalign(**iter); - + if(!(avRef.Empty())){ //Note: do not switch the set order per calnvec specs. avRef->SetGenCode(m_SlaveGeneticCode); @@ -1893,60 +1893,60 @@ void CDisplaySeqalign::DisplaySeqalign(CNcbiOstream& out) try{ const CBioseq_Handle& handle = avRef->GetBioseqHandle(1); if(handle){ - + //save the current alnment regardless CRef alnvecInfo(new SAlnInfo); int num_ident; - CAlignFormatUtil::GetAlnScores(**iter, - alnvecInfo->score, - alnvecInfo->bits, - alnvecInfo->evalue, - alnvecInfo->sum_n, + CAlignFormatUtil::GetAlnScores(**iter, + alnvecInfo->score, + alnvecInfo->bits, + alnvecInfo->evalue, + alnvecInfo->sum_n, num_ident, alnvecInfo->use_this_seqid, alnvecInfo->comp_adj_method); alnvecInfo->alnvec = avRef; - + subid=&(avRef->GetSeqId(1)); bool showDefLine = previousId.Empty() || !subid->Match(*previousId); - x_DisplayAlnvecInfo(out, alnvecInfo,showDefLine); - + x_DisplayAlnvecInfo(out, alnvecInfo,showDefLine); + previousId = subid; - } + } } catch (const CException&){ out << "Sequence with id " - << (avRef->GetSeqId(1)).GetSeqIdString().c_str() + << (avRef->GetSeqId(1)).GetSeqIdString().c_str() <<" no longer exists in database...alignment skipped\n"; continue; } } - } + } } else if(m_AlignOption&eMergeAlign){ //multiple alignment - vector< CRef > mix(k_NumFrame); + vector< CRef > mix(k_NumFrame); //each for one frame for translated alignment for(int i = 0; i < k_NumFrame; i++){ mix[i] = new CAlnMix(m_Scope); - } + } num_align = 0; vector > alnVector(k_NumFrame); for(int i = 0; i < k_NumFrame; i ++){ alnVector[i] = new CSeq_align_set; } - for (CSeq_align_set::Tdata::const_iterator - alnIter = actual_aln_list.Get().begin(); - alnIter != actual_aln_list.Get().end() + for (CSeq_align_set::Tdata::const_iterator + alnIter = actual_aln_list.Get().begin(); + alnIter != actual_aln_list.Get().end() && num_alignGetSeq_id(1)); if(subj_handle){ //need to convert to denseg for stdseg if((*alnIter)->GetSegs().Which() == CSeq_align::C_Segs::e_Std) { - CTypeConstIterator ss = ConstBegin(**alnIter); - CRef convertedDs = + CTypeConstIterator ss = ConstBegin(**alnIter); + CRef convertedDs = (*alnIter)->CreateDensegFromStdseg(); - if((convertedDs->GetSegs().GetDenseg().IsSetWidths() + if((convertedDs->GetSegs().GetDenseg().IsSetWidths() && convertedDs->GetSegs().GetDenseg().GetWidths()[0] == 3) || m_AlignOption & eTranslateNucToNucAlignment){ //only do this for translated master @@ -1985,24 +1985,24 @@ void CDisplaySeqalign::DisplaySeqalign(CNcbiOstream& out) alnVector[0]->Set().\ push_back(CAlignFormatUtil::CreateDensegFromDendiag(**alnIter)); } else { - NCBI_THROW(CException, eUnknown, + NCBI_THROW(CException, eUnknown, "Input Seq-align should be Denseg, Stdseg or Dendiag!"); } } } for(int i = 0; i < (int)alnVector.size(); i ++){ bool hasAln = false; - for(CTypeConstIterator + for(CTypeConstIterator alnRef = ConstBegin(*alnVector[i]); alnRef; ++alnRef){ CTypeConstIterator ds = ConstBegin(*alnRef); - //*out2 << *ds; + //*out2 << *ds; try{ - if (m_AlignOption & eTranslateNucToNucAlignment) { + if (m_AlignOption & eTranslateNucToNucAlignment) { mix[i]->Add(*ds, CAlnMix::fForceTranslation); } else { if (ds->IsSetWidths() && - ds->GetWidths()[0] == 3 && - ds->IsSetStrands() && + ds->GetWidths()[0] == 3 && + ds->IsSetStrands() && ds->GetStrands().front()==eNa_strand_minus){ mix[i]->Add(*ds, CAlnMix::fNegativeStrand); } else { @@ -2017,13 +2017,13 @@ void CDisplaySeqalign::DisplaySeqalign(CNcbiOstream& out) } if(hasAln){ // *out2<<*alnVector[i]; - mix[i]->Merge(CAlnMix::fMinGap - | CAlnMix::fQuerySeqMergeOnly - | CAlnMix::fFillUnalignedRegions); + mix[i]->Merge(CAlnMix::fMinGap + | CAlnMix::fQuerySeqMergeOnly + | CAlnMix::fFillUnalignedRegions); //*out2<GetDenseg(); } } - + int numDistinctFrames = 0; for(int i = 0; i < (int)alnVector.size(); i ++){ if(!alnVector[i]->Get().empty()){ @@ -2033,33 +2033,33 @@ void CDisplaySeqalign::DisplaySeqalign(CNcbiOstream& out) out<<"\n"; for(int i = 0; i < k_NumFrame; i ++){ try{ - CRef avRef (new CAlnVec (mix[i]->GetDenseg(), + CRef avRef (new CAlnVec (mix[i]->GetDenseg(), m_Scope)); avRef->SetAaCoding(CSeq_data::e_Ncbieaa); avRef->SetGenCode(m_SlaveGeneticCode); avRef->SetGenCode(m_MasterGeneticCode, 0); m_AV = avRef; - + if(numDistinctFrames > 1){ - out << "For reading frame " << k_FrameConversion[i] + out << "For reading frame " << k_FrameConversion[i] << " of query sequence:\n\n"; } x_DisplayAlnvec(out); } catch (CException e){ continue; } - } + } } if(m_AlignOption&eSequenceRetrieval && m_AlignOption&eHtml && m_CanRetrieveSeq){ - out<<"\n"; + out<<"\n"; } } void CDisplaySeqalign::x_FillIdentityInfo(const string& sequence_standard, - const string& sequence , - int& match, int& positive, - string& middle_line) + const string& sequence , + int& match, int& positive, + string& middle_line) { match = 0; positive = 0; @@ -2078,8 +2078,8 @@ void CDisplaySeqalign::x_FillIdentityInfo(const string& sequence_standard, } match ++; } else { - if ((m_AlignType&eProt) - && m_Matrix[(int)sequence_standard[i]][(int)sequence[i]] > 0){ + if ((m_AlignType&eProt) + && m_Matrix[(int)sequence_standard[i]][(int)sequence[i]] > 0){ positive ++; if(m_AlignOption & eShowMiddleLine){ if (m_MidLineStyle == eChar){ @@ -2090,26 +2090,26 @@ void CDisplaySeqalign::x_FillIdentityInfo(const string& sequence_standard, if (m_AlignOption & eShowMiddleLine){ middle_line[i] = ' '; } - } + } } - } + } } int CDisplaySeqalign::x_GetLinkout(const objects::CSeq_id & id) { int linkout = 0; - if(m_AlignOption & eLinkout) { + if(m_AlignOption & eLinkout) { try { - linkout = m_LinkoutDB + linkout = m_LinkoutDB ? m_LinkoutDB->GetLinkout(id,m_MapViewerBuildName) - : 0; + : 0; } - catch (const CException & e) { - ERR_POST("Problem with linkoutdb: " + e.GetMsg()); + catch (const CException & e) { + ERR_POST("Problem with linkoutdb: " + e.GetMsg()); m_AlignOption &= ~eLinkout; //Remove linkout bit for the rest of sequences - linkout = 0; - } + linkout = 0; + } } return linkout; } @@ -2117,35 +2117,35 @@ int CDisplaySeqalign::x_GetLinkout(const objects::CSeq_id & id) CDisplaySeqalign::SAlnDispParams *CDisplaySeqalign::x_FillAlnDispParams(const CRef< CBlast_def_line > &bdl, - const CBioseq_Handle& bsp_handle, + const CBioseq_Handle& bsp_handle, list &use_this_seqid, TGi firstGi, - int deflineNum) + int deflineNum) { SAlnDispParams *alnDispParams = NULL; const int kMaxDeflineNum = 10; bool isNa = bsp_handle.GetBioseqCore()->IsNa(); - int seqLength = (int)bsp_handle.GetBioseqLength(); + int seqLength = (int)bsp_handle.GetBioseqLength(); const list > ids = bdl->GetSeqid(); TGi gi = CAlignFormatUtil::GetGiForSeqIdList(ids); - CRef wid = FindBestChoice(ids, CSeq_id::WorstRank); - TGi gi_in_use_this_gi = ZERO_GI; + CRef wid = FindBestChoice(ids, CSeq_id::WorstRank); + TGi gi_in_use_this_gi = ZERO_GI; bool isGiList = false; - bool match = CAlignFormatUtil::MatchSeqInSeqList(gi, wid, use_this_seqid,&isGiList); + bool match = CAlignFormatUtil::MatchSeqInSeqList(gi, wid, use_this_seqid,&isGiList); if(match && isGiList) gi_in_use_this_gi = gi; - + if(use_this_seqid.empty() || match) { firstGi = (firstGi == ZERO_GI) ? gi_in_use_this_gi : firstGi; alnDispParams = new SAlnDispParams(); alnDispParams->gi = gi; - alnDispParams->seqID = FindBestChoice(ids, CSeq_id::WorstRank); //change to use use_this_seq + alnDispParams->seqID = FindBestChoice(ids, CSeq_id::WorstRank); //change to use use_this_seq alnDispParams->hasTextSeqID = CAlignFormatUtil::GetTextSeqID(alnDispParams->seqID); alnDispParams->ids = bsp_handle.GetBioseqCore()->GetId(); alnDispParams->label = CAlignFormatUtil::GetLabel(alnDispParams->seqID,CSeq_id::eContent);//Just accession without db part like ref| or pdbd| - + if(m_AlignOption&eHtml){ int taxid = 0; string type_temp = m_BlastType; @@ -2153,70 +2153,70 @@ CDisplaySeqalign::SAlnDispParams *CDisplaySeqalign::x_FillAlnDispParams(const CR if(bdl->IsSetTaxid() && bdl->CanGetTaxid()){ taxid = bdl->GetTaxid(); } - - alnDispParams->seqUrlInfo = x_InitSeqUrl(gi_in_use_this_gi,alnDispParams->label,taxid,ids); + + alnDispParams->seqUrlInfo = x_InitSeqUrl(gi_in_use_this_gi,alnDispParams->label,taxid,ids); alnDispParams->id_url = CAlignFormatUtil::GetIDUrl(alnDispParams->seqUrlInfo,&ids); - } - if(m_AlignOption&eLinkout && m_AlignTemplates == NULL){ + } + if(m_AlignOption&eLinkout && m_AlignTemplates == NULL){ int linkout = 0; - if (alnDispParams->hasTextSeqID) { - linkout = (deflineNum < kMaxDeflineNum) ? CAlignFormatUtil::GetSeqLinkoutInfo((CBioseq::TId &)ids, + if (alnDispParams->hasTextSeqID) { + linkout = (deflineNum < kMaxDeflineNum) ? CAlignFormatUtil::GetSeqLinkoutInfo((CBioseq::TId &)ids, &m_LinkoutDB, m_MapViewerBuildName, alnDispParams->gi) : 0; if(!m_LinkoutDB) m_AlignOption &= ~eLinkout; - } - + } + if(linkout != 0) { list linkout_url = CAlignFormatUtil:: GetLinkoutUrl(linkout, ids, m_Rid, m_CddRid, m_EntrezTerm, - isNa, + isNa, firstGi, - false, true, m_cur_align,m_PreComputedResID); + false, true, m_cur_align,m_PreComputedResID); ITERATE(list, iter_linkout, linkout_url){ alnDispParams->linkoutStr += *iter_linkout; } } if(seqLength > k_GetSubseqThreshhold){ - alnDispParams->dumpGnlUrl = x_GetDumpgnlLink(ids); + alnDispParams->dumpGnlUrl = x_GetDumpgnlLink(ids); } - + } if(bdl->IsSetTitle()){ alnDispParams->title = bdl->GetTitle(); } if(alnDispParams->title.empty()) { - alnDispParams->title = CDeflineGenerator().GenerateDefline(bsp_handle); + alnDispParams->title = CDeflineGenerator().GenerateDefline(bsp_handle); } - } + } return alnDispParams; } -CDisplaySeqalign::SAlnDispParams *CDisplaySeqalign::x_FillAlnDispParams(const CBioseq_Handle& bsp_handle) +CDisplaySeqalign::SAlnDispParams *CDisplaySeqalign::x_FillAlnDispParams(const CBioseq_Handle& bsp_handle) { SAlnDispParams *alnDispParams = new SAlnDispParams(); alnDispParams->gi = FindGi(bsp_handle.GetBioseqCore()->GetId()); alnDispParams->seqID = FindBestChoice(bsp_handle.GetBioseqCore()->GetId(),CSeq_id::WorstRank); alnDispParams->label = CAlignFormatUtil::GetLabel(alnDispParams->seqID,CSeq_id::eContent); - if(m_AlignOption&eHtml){ + if(m_AlignOption&eHtml){ alnDispParams->ids = bsp_handle.GetBioseqCore()->GetId(); - alnDispParams->seqUrlInfo = x_InitSeqUrl(alnDispParams->gi,alnDispParams->label,0,alnDispParams->ids); - alnDispParams->id_url = CAlignFormatUtil::GetIDUrl(alnDispParams->seqUrlInfo,&alnDispParams->ids); - } - alnDispParams->title = CDeflineGenerator().GenerateDefline(bsp_handle); + alnDispParams->seqUrlInfo = x_InitSeqUrl(alnDispParams->gi,alnDispParams->label,0,alnDispParams->ids); + alnDispParams->id_url = CAlignFormatUtil::GetIDUrl(alnDispParams->seqUrlInfo,&alnDispParams->ids); + } + alnDispParams->title = CDeflineGenerator().GenerateDefline(bsp_handle); alnDispParams->hasTextSeqID = CAlignFormatUtil::GetTextSeqID(alnDispParams->seqID); return alnDispParams; } string CDisplaySeqalign::x_PrintDefLine(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_vec_info) - + { - CNcbiOstrstream out; + CNcbiOstrstream out; CNcbiEnvironment env; /* Facilitates comparing formatted output using diff */ static string kLengthString("Length="); @@ -2229,49 +2229,50 @@ CDisplaySeqalign::x_PrintDefLine(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_ value_set = true; } #endif /* CTOOLKIT_COMPATIBLE */ - + if(bsp_handle){ const CRef wid = - FindBestChoice(bsp_handle.GetBioseqCore()->GetId(), + FindBestChoice(bsp_handle.GetBioseqCore()->GetId(), CSeq_id::WorstRank); - - const CRef bdlRef - = CSeqDB::ExtractBlastDefline(bsp_handle); + + const CRef bdlRef + = CSeqDB::ExtractBlastDefline(bsp_handle); const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get(); bool isFirst = true; TGi firstGi = ZERO_GI; m_cur_align++; - + if(bdl.empty()){ //no blast defline struct, should be no such case now //actually not so fast...as we now fetch from entrez even when it's not in blast db //there is no blast defline in such case. - SAlnDispParams *alnDispParams = x_FillAlnDispParams(bsp_handle); + CRef alnDispParams + (x_FillAlnDispParams(bsp_handle)); out << ">"; if ((m_AlignOption&eSequenceRetrieval) && (m_AlignOption&eHtml) && m_CanRetrieveSeq && isFirst) { - string buf = CAlignFormatUtil::MapTemplate(k_DefaultCheckboxTempl,"queryNumber",NStr::IntToString(m_QueryNumber)); + string buf = CAlignFormatUtil::MapTemplate(k_DefaultCheckboxTempl,"queryNumber",NStr::IntToString(m_QueryNumber)); buf = CAlignFormatUtil::MapTemplate(buf,"id_lbl", alnDispParams->gi > ZERO_GI ? - NStr::NumericToString(alnDispParams->gi) : alnDispParams->label); + NStr::NumericToString(alnDispParams->gi) : alnDispParams->label); out << buf; } - - if(m_AlignOption&eHtml){ - + + if(m_AlignOption&eHtml){ + aln_vec_info->id_label = (alnDispParams->gi != ZERO_GI) ? - NStr::NumericToString(alnDispParams->gi) : alnDispParams->label; + NStr::NumericToString(alnDispParams->gi) : alnDispParams->label; out<id_url; } - + if(m_AlignOption&eShowGi && alnDispParams->gi > ZERO_GI && !alnDispParams->seqID->IsGi()){ out<<"gi|"<gi<<"|"; - } + } if(!((alnDispParams->seqID->AsFastaString().find("gnl|BL_ORD_ID") != string::npos) || alnDispParams->seqID->AsFastaString().find("lcl|Subject_") != string::npos)){ - if (strncmp(alnDispParams->seqID->AsFastaString().c_str(), "lcl|", 4) == 0) + if (strncmp(alnDispParams->seqID->AsFastaString().c_str(), "lcl|", 4) == 0) out << alnDispParams->label; else { if (m_UseLongSeqIds || ((m_AlignOption & eShowGi) && @@ -2289,72 +2290,71 @@ CDisplaySeqalign::x_PrintDefLine(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_ out<<""; } if(alnDispParams->gi != ZERO_GI){ - out<<""; + out<<""; } else { - out<<""; + out<<""; } } out <<" "; - s_WrapOutputLine(out, (m_AlignOption&eHtml) ? + s_WrapOutputLine(out, (m_AlignOption&eHtml) ? CHTMLHelper::HTMLEncode(alnDispParams->title) : - alnDispParams->title); - + alnDispParams->title); + out<<"\n"; - + } else { - //print each defline + //print each defline bool bMultipleDeflines = false; int numBdl = 0; int maxNumBdl = (aln_vec_info->use_this_seqid.empty()) ? bdl.size() : aln_vec_info->use_this_seqid.size(); - for(list< CRef< CBlast_def_line > >::const_iterator + for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin(); iter != bdl.end(); iter++){ - - SAlnDispParams *alnDispParams = x_FillAlnDispParams(*iter, - bsp_handle, - aln_vec_info->use_this_seqid, - firstGi, - numBdl); - - + + CRef alnDispParams + (x_FillAlnDispParams(*iter, bsp_handle, + aln_vec_info->use_this_seqid, + firstGi, numBdl)); + + if(alnDispParams) { numBdl++; if(isFirst){ - out << ">"; + out << ">"; } else{ out << " "; - if (m_AlignOption&eHtml && (int)(maxNumBdl) > k_MaxDeflinesToShow && numBdl == k_MinDeflinesToShow + 1){ + if (m_AlignOption&eHtml && (int)(maxNumBdl) > k_MaxDeflinesToShow && numBdl == k_MinDeflinesToShow + 1){ //Show first 3 deflines out of 8 or more, hide the rest string mdlTag = aln_vec_info->id_label; - //string mdlTag = id_label + "_" + NStr::IntToString(m_cur_align); + //string mdlTag = id_label + "_" + NStr::IntToString(m_cur_align); out << "" << maxNumBdl - k_MinDeflinesToShow << " more sequence titles" << "\n"; - + out << "
"; bMultipleDeflines = true; } } - + if(isFirst){ firstGi = alnDispParams->gi; } if ((m_AlignOption&eSequenceRetrieval) && (m_AlignOption&eHtml) && m_CanRetrieveSeq && isFirst) { - string buf = CAlignFormatUtil::MapTemplate(k_DefaultCheckboxTempl,"queryNumber",NStr::IntToString(m_QueryNumber)); + string buf = CAlignFormatUtil::MapTemplate(k_DefaultCheckboxTempl,"queryNumber",NStr::IntToString(m_QueryNumber)); buf = CAlignFormatUtil::MapTemplate(buf,"id_lbl", alnDispParams->gi > ZERO_GI ? - NStr::NumericToString(alnDispParams->gi) : alnDispParams->label); + NStr::NumericToString(alnDispParams->gi) : alnDispParams->label); out << buf; } - + if(m_AlignOption&eHtml){ out<< alnDispParams->id_url; } - + if(m_AlignOption&eShowGi && alnDispParams->gi > ZERO_GI && !alnDispParams->seqID->IsGi()){ out<<"gi|"<gi<<"|"; - } + } if(!(alnDispParams->seqID->AsFastaString().find("gnl|BL_ORD_ID") != string::npos) || alnDispParams->seqID->AsFastaString().find("lcl|Subject_") != string::npos){ if (strncmp(alnDispParams->seqID->AsFastaString().c_str(), "lcl|", 4) == 0) { @@ -2385,49 +2385,50 @@ CDisplaySeqalign::x_PrintDefLine(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_ aln_vec_info->id_label = alnDispParams->label; } if(m_AlignOption&eLinkout){ - + out <<" "; out << alnDispParams->linkoutStr; - if(!alnDispParams->dumpGnlUrl.empty()) { - + if(!alnDispParams->dumpGnlUrl.empty()) { + out<dumpGnlUrl; } } } - - out <<" "; - if(!alnDispParams->title.empty()) { - s_WrapOutputLine(out, (m_AlignOption&eHtml) ? + + if (out.tellp() > 1L) { + out << " "; + } + if(!alnDispParams->title.empty()) { + s_WrapOutputLine(out, (m_AlignOption&eHtml) ? CHTMLHelper:: HTMLEncode(alnDispParams->title) : - alnDispParams->title); + alnDispParams->title); } out<<"\n"; isFirst = false; - delete alnDispParams; } } if(m_AlignOption&eHtml && bMultipleDeflines) { out << "
"; - } + } } - } - out< start); list > actualSeqloc; string actualSeq = sequence.substr(start, len); - + if(id.Which() != CSeq_id::e_not_set){ /*only do this for sequence but not for others like middle line, features*/ @@ -2435,15 +2436,15 @@ void CDisplaySeqalign::x_OutputSeq(string& sequence, const CSeq_id& id, int from=(*iter)->aln_range.GetFrom(); int to=(*iter)->aln_range.GetTo(); int locFrame = (*iter)->seqloc->GetFrame(); - if(id.Match((*iter)->seqloc->GetInterval().GetId()) + if(id.Match((*iter)->seqloc->GetInterval().GetId()) && locFrame == frame){ bool isFirstChar = true; CRange eachSeqloc(0, 0); //go through each residule and mask it - for (int i=max(from, start); + for (int i=max(from, start); i<=min(to, start+len -1); i++){ //store seqloc start for font tag below - if ((m_AlignOption & eHtml) && isFirstChar){ + if ((m_AlignOption & eHtml) && isFirstChar){ isFirstChar = false; eachSeqloc.Set(i, eachSeqloc.GetTo()); } @@ -2457,8 +2458,8 @@ void CDisplaySeqalign::x_OutputSeq(string& sequence, const CSeq_id& id, actualSeq[i-start]=tolower((unsigned char) actualSeq[i-start]); } //store seqloc start for font tag below - if ((m_AlignOption & eHtml) - && i == min(to, start+len)){ + if ((m_AlignOption & eHtml) + && i == min(to, start+len)){ eachSeqloc.Set(eachSeqloc.GetFrom(), i); } } @@ -2468,40 +2469,40 @@ void CDisplaySeqalign::x_OutputSeq(string& sequence, const CSeq_id& id, } } } - + if(actualSeqloc.empty()){//no need to add font tag if((m_AlignOption & eColorDifferentBases) && (m_AlignOption & eHtml) && color_mismatch && (m_AlignOption & eShowIdentity)){ - //color the mismatches. Only for rows without mask. + //color the mismatches. Only for rows without mask. //Otherwise it may confilicts with mask font tag. s_ColorDifferentBases(actualSeq, k_IdentityChar, out); } else { out< >::iterator iter=actualSeqloc.begin(); + for (list >::iterator iter=actualSeqloc.begin(); iter!=actualSeqloc.end(); iter++){ int from = (*iter).GetFrom() - start; int to = (*iter).GetTo() - start; //start tag - if(from == i){ + if(from == i){ frontTag = true; } if(to == i && to > 0){ endTag = true; - } + } } startStyledOutput = frontTag; - stopStyledOutput = endTag && frontTag; + stopStyledOutput = endTag && frontTag; bool isStyled = s_ProcessStyledContent(actualSeq,i,startStyledOutput,stopStyledOutput,styledSqLocTmpl ,refStr,out); if(!isStyled) out<GetNumRows(); row++) { - CRef chunk_vec + CRef chunk_vec = m_AV->GetAlnChunks(row, m_AV->GetSeqAlnRange(0)); for (int i=0; isize(); i++) { CConstRef chunk = (*chunk_vec)[i]; if (chunk->IsGap()) { - gap += (chunk->GetAlnRange().GetTo() + gap += (chunk->GetAlnRange().GetTo() - chunk->GetAlnRange().GetFrom() + 1); } } @@ -2529,16 +2530,16 @@ int CDisplaySeqalign::x_GetNumGaps() void CDisplaySeqalign::x_GetFeatureInfo(TSAlnFeatureInfoList& feature, - CScope& scope, + CScope& scope, CSeqFeatData::E_Choice choice, int row, string& sequence, list > >& feat_range_list, list& feat_seq_strand, - bool fill_feat_range ) const + bool fill_feat_range ) const { //Only fetch features for seq that has a gi unless it's master seq const CSeq_id& id = m_AV->GetSeqId(row); - + TGi gi_temp = FindGi(m_AV->GetBioseqHandle(row).GetBioseqCore()->GetId()); if(gi_temp > ZERO_GI || row == 0){ const CBioseq_Handle& handle = scope.GetBioseqHandle(id); @@ -2551,7 +2552,7 @@ void CDisplaySeqalign::x_GetFeatureInfo(TSAlnFeatureInfoList& feature, max(seq_start, seq_stop)); SAnnotSelector sel(choice); sel.SetAdaptiveDepth().SetResolveAll(); - + for (CFeat_CI feat(scope, *loc_ref, sel); feat; ++feat) { const CSeq_loc& loc = feat->GetLocation(); bool has_id = false; @@ -2575,21 +2576,21 @@ void CDisplaySeqalign::x_GetFeatureInfo(TSAlnFeatureInfoList& feature, has_id = true; if(loc_it.IsSetStrand()){ feat_strand = loc_it.GetStrand(); - if(feat_strand != eNa_strand_plus && + if(feat_strand != eNa_strand_plus && feat_strand != eNa_strand_minus){ feat_strand = eNa_strand_plus; } } else { feat_strand = eNa_strand_plus; } - + if(!first_loc && prev_strand != feat_strand){ mixed_strand = true; } first_loc = false; prev_strand = feat_strand; } else { - //if seqloc has other seqids then need to remove other + //if seqloc has other seqids then need to remove other //seqid encoded amino acids in the front later if (first_loc) { other_seqloc_length += loc_it.GetRange().GetLength(); @@ -2601,45 +2602,45 @@ void CDisplaySeqalign::x_GetFeatureInfo(TSAlnFeatureInfoList& feature, if(!has_id || mixed_strand){ continue; } - + string featLable = NcbiEmptyString; string featId; char feat_char = ' '; - string alternativeFeatStr = NcbiEmptyString; + string alternativeFeatStr = NcbiEmptyString; TSeqPos feat_aln_from = 0; TSeqPos feat_aln_to = 0; TSeqPos actual_feat_seq_start = 0, actual_feat_seq_stop = 0; - feature::GetLabel(feat->GetOriginalFeature(), &featLable, + feature::GetLabel(feat->GetOriginalFeature(), &featLable, feature::fFGL_Both, &scope); featId = featLable.substr(0, k_FeatureIdLen); //default - TSeqPos aln_stop = m_AV->GetAlnStop(); + TSeqPos aln_stop = m_AV->GetAlnStop(); CRef featInfo; - - //find the actual feature sequence start and stop + + //find the actual feature sequence start and stop if(m_AV->IsPositiveStrand(row)){ - actual_feat_seq_start = + actual_feat_seq_start = max(feat_seq_range.GetFrom(), seq_start); - actual_feat_seq_stop = + actual_feat_seq_stop = min(feat_seq_range.GetTo(), seq_stop); - + } else { - actual_feat_seq_start = + actual_feat_seq_start = min(feat_seq_range.GetTo(), seq_start); actual_feat_seq_stop = max(feat_seq_range.GetFrom(), seq_stop); } //the feature alignment positions - feat_aln_from = + feat_aln_from = m_AV->GetAlnPosFromSeqPos(row, actual_feat_seq_start); - feat_aln_to = + feat_aln_to = m_AV->GetAlnPosFromSeqPos(row, actual_feat_seq_stop); if(choice == CSeqFeatData::e_Gene){ - featInfo.Reset(new SAlnFeatureInfo); + featInfo.Reset(new SAlnFeatureInfo); feat_char = '^'; - + } else if(choice == CSeqFeatData::e_Cdregion){ - - string raw_cdr_product = + + string raw_cdr_product = s_GetCdsSequence(m_SlaveGeneticCode, feat, scope, isolated_range, handle, feat_strand, featId, other_seqloc_length%3 == 0 ? @@ -2649,38 +2650,38 @@ void CDisplaySeqalign::x_GetFeatureInfo(TSAlnFeatureInfoList& feature, continue; } featInfo.Reset(new SAlnFeatureInfo); - - //line represents the amino acid line starting covering + + //line represents the amino acid line starting covering //the whole alignment. The idea is if there is no feature //in some range, then fill it with space and this won't - //be shown - - string line(aln_stop+1, ' '); + //be shown + + string line(aln_stop+1, ' '); //pre-fill all cds region with intron char for (TSeqPos i = feat_aln_from; i <= feat_aln_to; i ++){ line[i] = k_IntronChar; } - + //get total coding length TSeqPos total_coding_len = 0; ITERATE(list, iter, isolated_range){ - total_coding_len += iter->GetLength(); + total_coding_len += iter->GetLength(); } - + //fill concatenated exon (excluding intron) //with product //this is will be later used to //fill the feature line char gap_char = m_AV->GetGapChar(row); - string concat_exon = - s_GetConcatenatedExon(feat, feat_strand, + string concat_exon = + s_GetConcatenatedExon(feat, feat_strand, isolated_range, total_coding_len, raw_cdr_product, other_seqloc_length%3 == 0 ? 0 : 3 - other_seqloc_length%3); - - + + //fill slave feature info to make putative feature for //master sequence if (fill_feat_range) { @@ -2690,51 +2691,51 @@ void CDisplaySeqalign::x_GetFeatureInfo(TSAlnFeatureInfoList& feature, feat, isolated_range, feat_strand, m_AV, row, other_seqloc_length%3 == 0 ? - 0 : + 0 : 3 - other_seqloc_length%3); if(!(master_feat_range.empty())) { - feat_range_list.push_back(master_feat_range); + feat_range_list.push_back(master_feat_range); feat_seq_strand.push_back(master_strand); - } + } } - - + + TSeqPos feat_aln_start_totalexon = 0; TSeqPos prev_feat_aln_start_totalexon = 0; TSeqPos prev_feat_seq_stop = 0; TSeqPos intron_size = 0; bool is_first = true; bool is_first_exon_start = true; - + //here things get complicated a bit. The idea is fill the //whole feature line in alignment coordinates with //amino acid on the second base of a condon //go through the feature seqloc and fill the feature line - + //Need to reverse the seqloc order for minus strand if(feat_strand == eNa_strand_minus){ - isolated_range.reverse(); + isolated_range.reverse(); } - + ITERATE(list, iter, isolated_range){ //intron refers to the distance between two exons //i.e. each seqloc is an exon //intron needs to be skipped if(!is_first){ - intron_size += iter->GetFrom() + intron_size += iter->GetFrom() - prev_feat_seq_stop - 1; } CRange actual_feat_seq_range = loc_ref->GetTotalRange(). - IntersectionWith(*iter); + IntersectionWith(*iter); if(!actual_feat_seq_range.Empty()){ //the sequence start position in aln coordinates //that has a feature TSeqPos feat_aln_start; TSeqPos feat_aln_stop; if(m_AV->IsPositiveStrand(row)){ - feat_aln_start = + feat_aln_start = m_AV-> GetAlnPosFromSeqPos (row, actual_feat_seq_range.GetFrom()); @@ -2742,7 +2743,7 @@ void CDisplaySeqalign::x_GetFeatureInfo(TSAlnFeatureInfoList& feature, = m_AV->GetAlnPosFromSeqPos (row, actual_feat_seq_range.GetTo()); } else { - feat_aln_start = + feat_aln_start = m_AV-> GetAlnPosFromSeqPos (row, actual_feat_seq_range.GetTo()); @@ -2751,49 +2752,49 @@ void CDisplaySeqalign::x_GetFeatureInfo(TSAlnFeatureInfoList& feature, (row, actual_feat_seq_range.GetFrom()); } //put actual amino acid on feature line - //in aln coord + //in aln coord for (TSeqPos i = feat_aln_start; - i <= feat_aln_stop; i ++){ + i <= feat_aln_stop; i ++){ if(sequence[i] != gap_char){ - //the amino acid position in + //the amino acid position in //concatanated exon that corresponds //to the sequence position //note intron needs to be skipped //as it does not have cds feature TSeqPos product_adj_seq_pos - = m_AV->GetSeqPosFromAlnPos(row, i) - + = m_AV->GetSeqPosFromAlnPos(row, i) - intron_size - feat_seq_range.GetFrom(); - if(product_adj_seq_pos < + if(product_adj_seq_pos < concat_exon.size()){ //fill the cds feature line with //actual amino acids - line[i] = + line[i] = concat_exon[product_adj_seq_pos]; //get the exon start position //note minus strand needs to be //counted backward if(m_AV->IsPositiveStrand(row)){ - //don't count gap + //don't count gap if(is_first_exon_start && isalpha((unsigned char) line[i])){ - if(feat_strand == eNa_strand_minus){ - feat_aln_start_totalexon = + if(feat_strand == eNa_strand_minus){ + feat_aln_start_totalexon = concat_exon.size() - product_adj_seq_pos + 1; is_first_exon_start = false; - + } else { - feat_aln_start_totalexon = + feat_aln_start_totalexon = product_adj_seq_pos; is_first_exon_start = false; } } - + } else { - if(feat_strand == eNa_strand_minus){ - if(is_first_exon_start && + if(feat_strand == eNa_strand_minus){ + if(is_first_exon_start && isalpha((unsigned char) line[i])){ - feat_aln_start_totalexon = + feat_aln_start_totalexon = concat_exon.size() - product_adj_seq_pos + 1; is_first_exon_start = false; @@ -2807,55 +2808,55 @@ void CDisplaySeqalign::x_GetFeatureInfo(TSAlnFeatureInfoList& feature, //reversed feat_aln_start_totalexon = min(TSeqPos(concat_exon.size() - - product_adj_seq_pos + 1), + - product_adj_seq_pos + 1), prev_feat_aln_start_totalexon); prev_feat_aln_start_totalexon = - feat_aln_start_totalexon; + feat_aln_start_totalexon; } } else { - feat_aln_start_totalexon = + feat_aln_start_totalexon = max(prev_feat_aln_start_totalexon, - product_adj_seq_pos); - + product_adj_seq_pos); + prev_feat_aln_start_totalexon = feat_aln_start_totalexon; } } } } else { //adding gap - line[i] = ' '; - } - - } + line[i] = ' '; + } + + } } - - prev_feat_seq_stop = iter->GetTo(); + + prev_feat_seq_stop = iter->GetTo(); is_first = false; - } + } alternativeFeatStr = line; s_FillCdsStartPosition(line, concat_exon, m_LineLen, feat_aln_start_totalexon, m_AV->IsPositiveStrand(row) ? eNa_strand_plus : eNa_strand_minus, - feat_strand, featInfo->feature_start); - + feat_strand, featInfo->feature_start); + } - + if(featInfo){ x_SetFeatureInfo(featInfo, *loc_ref, - feat_aln_from, feat_aln_to, aln_stop, - feat_char, featId, alternativeFeatStr); + feat_aln_from, feat_aln_to, aln_stop, + feat_char, featId, alternativeFeatStr); feature.push_back(featInfo); } } - } + } } } -void CDisplaySeqalign::x_SetFeatureInfo(CRef feat_info, - const CSeq_loc& seqloc, int aln_from, - int aln_to, int aln_stop, +void CDisplaySeqalign::x_SetFeatureInfo(CRef feat_info, + const CSeq_loc& seqloc, int aln_from, + int aln_to, int aln_stop, char pattern_char, string pattern_id, string& alternative_feat_str) const { @@ -2863,7 +2864,7 @@ void CDisplaySeqalign::x_SetFeatureInfo(CRef feat_info, feat->seqloc = &seqloc; feat->feature_char = pattern_char; feat->feature_id = pattern_id; - + if(alternative_feat_str != NcbiEmptyString){ feat_info->feature_string = alternative_feat_str; } else { @@ -2874,8 +2875,8 @@ void CDisplaySeqalign::x_SetFeatureInfo(CRef feat_info, } feat_info->feature_string = line; } - - feat_info->aln_range.Set(aln_from, aln_to); + + feat_info->aln_range.Set(aln_from, aln_to); feat_info->feature = feat; } @@ -2909,22 +2910,22 @@ static int x_AddBar(string& seq, int insert_alnpos, int aln_start){ ///@param aln_start: alnment start ///@return: the updated insert end position /// -static int s_AdjustInsert(string& cur_insert, string& new_insert, +static int s_AdjustInsert(string& cur_insert, string& new_insert, int insert_alnpos, int aln_start) { int insertEnd = 0; int curInsertSize = (int)cur_insert.size(); - int insertLeftSpace = insert_alnpos - aln_start - curInsertSize + 2; + int insertLeftSpace = insert_alnpos - aln_start - curInsertSize + 2; //plus2 because insert is put after the position if(curInsertSize > 0){ _ASSERT(insertLeftSpace >= 2); } - int newInsertSize = (int)new_insert.size(); - if(insertLeftSpace - newInsertSize >= 1){ + int newInsertSize = (int)new_insert.size(); + if(insertLeftSpace - newInsertSize >= 1){ //can insert with the end position right below the bar string spacer(insertLeftSpace - newInsertSize, ' '); cur_insert += spacer + new_insert; - + } else { //Need to insert beyond the insert postion if(curInsertSize > 0){ cur_insert += " " + new_insert; @@ -2937,26 +2938,26 @@ static int s_AdjustInsert(string& cur_insert, string& new_insert, } -void CDisplaySeqalign::x_DoFills(int row, CAlnMap::TSignedRange& aln_range, - int aln_start, - TSInsertInformationList& insert_list, +void CDisplaySeqalign::x_DoFills(int row, CAlnMap::TSignedRange& aln_range, + int aln_start, + TSInsertInformationList& insert_list, list& inserts) const { if(!insert_list.empty()){ string bar(aln_range.GetLength(), ' '); - + string seq; TSInsertInformationList leftOverInsertList; bool isFirstInsert = true; int curInsertAlnStart = 0; int prvsInsertAlnEnd = 0; - - //go through each insert and fills the seq if it can + + //go through each insert and fills the seq if it can //be filled on the same line. If not, go to the next line NON_CONST_ITERATE(TSInsertInformationList, iter, insert_list) { curInsertAlnStart = (*iter)->aln_start; //always fill the first insert. Also fill if there is enough space if(isFirstInsert || curInsertAlnStart - prvsInsertAlnEnd >= 1){ - bar[curInsertAlnStart-aln_start+1] = '|'; + bar[curInsertAlnStart-aln_start+1] = '|'; int seqStart = (*iter)->seq_start; int seqEnd = seqStart + (*iter)->insert_len - 1; string newInsert; @@ -2965,13 +2966,13 @@ void CDisplaySeqalign::x_DoFills(int row, CAlnMap::TSignedRange& aln_range, prvsInsertAlnEnd = s_AdjustInsert(seq, newInsert, curInsertAlnStart, aln_start); isFirstInsert = false; - } else { //if no space, save the chunk and go to next line - bar[curInsertAlnStart-aln_start+1] = '|'; + } else { //if no space, save the chunk and go to next line + bar[curInsertAlnStart-aln_start+1] = '|'; //indicate insert goes to the next line - prvsInsertAlnEnd += x_AddBar(seq, curInsertAlnStart, aln_start); - //May need to add a bar after the current insert sequence + prvsInsertAlnEnd += x_AddBar(seq, curInsertAlnStart, aln_start); + //May need to add a bar after the current insert sequence //to indicate insert goes to the next line. - leftOverInsertList.push_back(*iter); + leftOverInsertList.push_back(*iter); } } //save current insert. Note that each insert has a bar and sequence @@ -2981,41 +2982,41 @@ void CDisplaySeqalign::x_DoFills(int row, CAlnMap::TSignedRange& aln_range, //here recursively fill the chunk that don't have enough space x_DoFills(row, aln_range, aln_start, leftOverInsertList, inserts); } - + } void CDisplaySeqalign::x_FillInserts(int row, CAlnMap::TSignedRange& aln_range, int aln_start, list& inserts, - string& insert_pos_string, + string& insert_pos_string, TSInsertInformationList& insert_list) const { - + string line(aln_range.GetLength(), ' '); - + ITERATE(TSInsertInformationList, iter, insert_list){ int from = (*iter)->aln_start; line[from - aln_start + 1] = '\\'; } - insert_pos_string = line; + insert_pos_string = line; //this is the line with "\" right after each insert position - + //here fills the insert sequence x_DoFills(row, aln_range, aln_start, insert_list, inserts); } void CDisplaySeqalign::x_GetInserts(TSInsertInformationList& insert_list, - CAlnMap::TSeqPosList& insert_aln_start, - CAlnMap::TSeqPosList& insert_seq_start, - CAlnMap::TSeqPosList& insert_length, + CAlnMap::TSeqPosList& insert_aln_start, + CAlnMap::TSeqPosList& insert_seq_start, + CAlnMap::TSeqPosList& insert_length, int line_aln_stop) { - while(!insert_aln_start.empty() + while(!insert_aln_start.empty() && (int)insert_aln_start.front() < line_aln_stop){ CRef insert(new SInsertInformation); - insert->aln_start = insert_aln_start.front() - 1; + insert->aln_start = insert_aln_start.front() - 1; //Need to minus one as we are inserting after this position insert->seq_start = insert_seq_start.front(); insert->insert_len = insert_length.front(); @@ -3024,33 +3025,33 @@ void CDisplaySeqalign::x_GetInserts(TSInsertInformationList& insert_list, insert_seq_start.pop_front(); insert_length.pop_front(); } - + } -string CDisplaySeqalign::x_GetSegs(int row) const +string CDisplaySeqalign::x_GetSegs(int row) const { string segs = NcbiEmptyString; if(m_AlignOption & eMergeAlign){ //only show this hsp segs = NStr::IntToString(m_AV->GetSeqStart(row)) + "-" + NStr::IntToString(m_AV->GetSeqStop(row)); } else { //for all segs - string idString = m_AV->GetSeqId(1).GetSeqIdString(); - map::const_iterator iter = m_AlnLinksParams.find(idString); + string idString = m_AV->GetSeqId(1).GetSeqIdString(); + map::const_iterator iter = m_AlnLinksParams.find(idString); if ( iter != m_AlnLinksParams.end() ){ segs = iter->second.segs; - } + } } return segs; } - - + + string CDisplaySeqalign::x_GetDumpgnlLink(const list >& ids) const { string dowloadUrl; - string segs = x_GetSegs(1); //row=1 - string label = CAlignFormatUtil::GetLabel(FindBestChoice(ids, CSeq_id::WorstRank)); + string segs = x_GetSegs(1); //row=1 + string label = CAlignFormatUtil::GetLabel(FindBestChoice(ids, CSeq_id::WorstRank)); string url_with_parameters = CAlignFormatUtil::BuildUserUrl(ids, 0, kDownloadUrl, m_DbName, m_IsDbNa, m_Rid, m_QueryNumber, @@ -3059,22 +3060,22 @@ string CDisplaySeqalign::x_GetDumpgnlLink(const list >& ids) const dowloadUrl = CAlignFormatUtil::MapTemplate(kDownloadLink,"download_url",url_with_parameters); dowloadUrl = CAlignFormatUtil::MapTemplate(dowloadUrl,"segs",segs); dowloadUrl = CAlignFormatUtil::MapTemplate(dowloadUrl,"lnk_displ",kDownloadImg); - dowloadUrl = CAlignFormatUtil::MapTemplate(dowloadUrl,"label",label); + dowloadUrl = CAlignFormatUtil::MapTemplate(dowloadUrl,"label",label); } return dowloadUrl; } -CRef -CDisplaySeqalign::PrepareBlastUngappedSeqalign(const CSeq_align_set& alnset) +CRef +CDisplaySeqalign::PrepareBlastUngappedSeqalign(const CSeq_align_set& alnset) { CRef alnSetRef(new CSeq_align_set); ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()){ const CSeq_align::TSegs& seg = (*iter)->GetSegs(); if(seg.Which() == CSeq_align::C_Segs::e_Std){ - if(seg.GetStd().size() > 1){ - //has more than one stdseg. Need to seperate as each + if(seg.GetStd().size() > 1){ + //has more than one stdseg. Need to seperate as each //is a distinct HSP ITERATE (CSeq_align::C_Segs::TStd, iterStdseg, seg.GetStd()){ CRef aln(new CSeq_align); @@ -3084,12 +3085,12 @@ CDisplaySeqalign::PrepareBlastUngappedSeqalign(const CSeq_align_set& alnset) aln->SetSegs().SetStd().push_back(*iterStdseg); alnSetRef->Set().push_back(aln); } - + } else { alnSetRef->Set().push_back(*iter); } } else if(seg.Which() == CSeq_align::C_Segs::e_Dendiag){ - if(seg.GetDendiag().size() > 1){ + if(seg.GetDendiag().size() > 1){ //has more than one dendiag. Need to seperate as each is //a distinct HSP ITERATE (CSeq_align::C_Segs::TDendiag, iterDendiag, @@ -3105,22 +3106,22 @@ CDisplaySeqalign::PrepareBlastUngappedSeqalign(const CSeq_align_set& alnset) alnSetRef->Set().push_back(aln); } - + } else { alnSetRef->Set().push_back(*iter); } } else { //Denseg, doing nothing. - + alnSetRef->Set().push_back(*iter); } } - + return alnSetRef; } -CRef -CDisplaySeqalign::PrepareBlastUngappedSeqalignEx(const CSeq_align_set& alnset) +CRef +CDisplaySeqalign::PrepareBlastUngappedSeqalignEx(const CSeq_align_set& alnset) { CRef alnSetRef(new CSeq_align_set); @@ -3146,17 +3147,17 @@ CDisplaySeqalign::PrepareBlastUngappedSeqalignEx(const CSeq_align_set& alnset) alnSetRef->Set().push_back(aln); } } else { //Denseg, doing nothing. - + alnSetRef->Set().push_back(*iter); } } - + return alnSetRef; } -// this version will set aggregate scores -CRef -CDisplaySeqalign::PrepareBlastUngappedSeqalignEx2(CSeq_align_set& alnset) +// this version will set aggregate scores +CRef +CDisplaySeqalign::PrepareBlastUngappedSeqalignEx2(CSeq_align_set& alnset) { CRef alnSetRef(new CSeq_align_set); @@ -3225,11 +3226,11 @@ CDisplaySeqalign::PrepareBlastUngappedSeqalignEx2(CSeq_align_set& alnset) alnSetRef->Set().push_back(aln); } } else { //Denseg, doing nothing. - + alnSetRef->Set().push_back(*iter); } } - + return alnSetRef; } @@ -3249,13 +3250,13 @@ bool CDisplaySeqalign::x_IsGeneInfoAvailable(SAlnInfo* aln_vec_info) return false; } - const CRef bdlRef - = CSeqDB::ExtractBlastDefline(bsp_handle); + const CRef bdlRef + = CSeqDB::ExtractBlastDefline(bsp_handle); const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get(); ITERATE(CBlast_def_line_set::Tdata, iter, bdl) { - int linkout = x_GetLinkout(*(*iter)->GetSeqid().front()); + int linkout = x_GetLinkout(*(*iter)->GetSeqid().front()); if (linkout & eGene) { return true; @@ -3271,7 +3272,7 @@ string CDisplaySeqalign::x_GetGeneLinkUrl(int gene_id) string strGeneLinkUrl = CAlignFormatUtil::GetURLFromRegistry("GENE_INFO"); AutoPtr > buf (new char[strGeneLinkUrl.size() + 1024]); - sprintf(buf.get(), strGeneLinkUrl.c_str(), + sprintf(buf.get(), strGeneLinkUrl.c_str(), gene_id, m_Rid.c_str(), m_IsDbNa ? "nucl" : "prot", @@ -3310,7 +3311,7 @@ string CDisplaySeqalign::x_DisplayGeneInfo(const CBioseq_Handle& bsp_handle,SAln string strInfo; info->ToString(strInfo, true, strUrl); out << strInfo << "\n"; - } + } } } catch (CException& e) @@ -3322,87 +3323,87 @@ string CDisplaySeqalign::x_DisplayGeneInfo(const CBioseq_Handle& bsp_handle,SAln { out << "(Gene info extraction error)" << "\n"; } - string formattedString = CNcbiOstrstreamToString(out); + string formattedString = CNcbiOstrstreamToString(out); return formattedString; } void CDisplaySeqalign::x_DisplayAlignSortInfo(CNcbiOstream& out,string id_label) { - string query_buf; + string query_buf; map< string, string> parameters_to_change; parameters_to_change.insert(map::value_type("HSP_SORT", "")); CAlignFormatUtil::BuildFormatQueryString(*m_Ctx,parameters_to_change,query_buf); out << "\n"; - CAlignFormatUtil::AddSpace(out, 57); + CAlignFormatUtil::AddSpace(out, 57); out << "Sort alignments for this subject sequence by:\n"; - CAlignFormatUtil::AddSpace(out, 59); - + CAlignFormatUtil::AddSpace(out, 59); + string hsp_sort_value = m_Ctx->GetRequestValue("HSP_SORT").GetValue(); int hsp_sort = hsp_sort_value == NcbiEmptyString ? 0 : NStr::StringToInt(hsp_sort_value); - + if (hsp_sort != CAlignFormatUtil::eEvalue) { - out << ""; } - + out << "E value"; if (hsp_sort != CAlignFormatUtil::eEvalue) { - out << ""; + out << ""; } - + CAlignFormatUtil::AddSpace(out, 2); if (hsp_sort != CAlignFormatUtil::eScore) { - out << ""; } - + out << "Score"; if (hsp_sort != CAlignFormatUtil::eScore) { - out << ""; + out << ""; } - + CAlignFormatUtil::AddSpace(out, 2); if (hsp_sort != CAlignFormatUtil::eHspPercentIdentity) { - out << ""; } - out << "Percent identity"; + out << "Percent identity"; if (hsp_sort != CAlignFormatUtil::eHspPercentIdentity) { - out << ""; + out << ""; } out << "\n"; - CAlignFormatUtil::AddSpace(out, 59); + CAlignFormatUtil::AddSpace(out, 59); if (hsp_sort != CAlignFormatUtil::eQueryStart) { - out << ""; - } + } out << "Query start position"; if (hsp_sort != CAlignFormatUtil::eQueryStart) { - out << ""; + out << ""; } CAlignFormatUtil::AddSpace(out, 2); - + if (hsp_sort != CAlignFormatUtil::eSubjectStart) { - out << ""; - } + } out << "Subject start position"; if (hsp_sort != CAlignFormatUtil::eSubjectStart) { - out << ""; + out << ""; } - + out << "\n"; } @@ -3411,17 +3412,17 @@ string CDisplaySeqalign::x_FormatAlignSortInfo() string alignSort = m_AlignTemplates->sortInfoTmpl; alignSort = CAlignFormatUtil::MapTemplate(alignSort,"id_label",m_CurrAlnID_DbLbl); alignSort = CAlignFormatUtil::MapTemplate(alignSort,"alnSeqGi",m_CurrAlnID_Lbl); - - string hsp_sort_value = m_Ctx->GetRequestValue("HSP_SORT").GetValue(); + + string hsp_sort_value = m_Ctx ? m_Ctx->GetRequestValue("HSP_SORT").GetValue() : kEmptyStr; int hsp_sort = hsp_sort_value == NcbiEmptyString ? 0 : NStr::StringToInt(hsp_sort_value); for(int i = 0; i < 5; i++) { if(hsp_sort == i) { - alignSort = CAlignFormatUtil::MapTemplate(alignSort,"sorted_" + NStr::IntToString(hsp_sort),"sortAlnArrowLinkW"); + alignSort = CAlignFormatUtil::MapTemplate(alignSort,"sorted_" + NStr::IntToString(hsp_sort),"sortAlnArrowLinkW"); } else { - alignSort = CAlignFormatUtil::MapTemplate(alignSort,"sorted_" + NStr::IntToString(i),""); + alignSort = CAlignFormatUtil::MapTemplate(alignSort,"sorted_" + NStr::IntToString(i),""); } - } + } return alignSort; } @@ -3431,12 +3432,12 @@ void CDisplaySeqalign::x_DisplayBl2SeqLink(CNcbiOstream& out) const CBioseq_Handle& subject_handle=m_AV->GetBioseqHandle(1); CSeq_id_Handle query_seqid = GetId(query_handle, eGetId_Best); CSeq_id_Handle subject_seqid = GetId(subject_handle, eGetId_Best); - TGi query_gi = FindGi(query_handle.GetBioseqCore()->GetId()); + TGi query_gi = FindGi(query_handle.GetBioseqCore()->GetId()); TGi subject_gi = FindGi(subject_handle.GetBioseqCore()->GetId()); - - string url_link = CAlignFormatUtil::MapTemplate(kBl2seqUrl,"query",query_gi); - url_link = CAlignFormatUtil::MapTemplate(url_link,"subject",subject_gi); - + + string url_link = CAlignFormatUtil::MapTemplate(kBl2seqUrl,"query",query_gi); + url_link = CAlignFormatUtil::MapTemplate(url_link,"subject",subject_gi); + out << url_link << "\n"; } @@ -3446,10 +3447,10 @@ void CDisplaySeqalign::x_DisplayMpvAnchor(CNcbiOstream& out,SAlnInfo* aln_vec_in //add id anchor for mapviewer link string type_temp = m_BlastType; type_temp = NStr::TruncateSpaces(NStr::ToLower(type_temp)); - if(m_AlignOption&eHtml && + if(m_AlignOption&eHtml && (type_temp.find("genome") != string::npos || - type_temp == "mapview" || - type_temp == "mapview_prev" || + type_temp == "mapview" || + type_temp == "mapview_prev" || type_temp == "gsfasta" || type_temp == "gsfasta_prev")){ string subj_id_str; char buffer[126]; @@ -3457,29 +3458,29 @@ void CDisplaySeqalign::x_DisplayMpvAnchor(CNcbiOstream& out,SAlnInfo* aln_vec_in int master_stop = m_AV->GetSeqStop(0) + 1; int subject_start = m_AV->GetSeqStart(1) + 1; int subject_stop = m_AV->GetSeqStop(1) + 1; - + m_AV->GetSeqId(1).GetLabel(&subj_id_str, CSeq_id::eContent); - + sprintf(buffer, "", subj_id_str.c_str(), aln_vec_info->score, min(master_start, master_stop), max(master_start, master_stop), min(subject_start, subject_stop), max(subject_start, subject_stop)); - - out << buffer << "\n"; + + out << buffer << "\n"; } } string CDisplaySeqalign::x_FormatAlnBlastInfo(SAlnInfo* aln_vec_info) { string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf; - CAlignFormatUtil::GetScoreString(aln_vec_info->evalue, - aln_vec_info->bits, 0, 0, evalue_buf, + CAlignFormatUtil::GetScoreString(aln_vec_info->evalue, + aln_vec_info->bits, 0, 0, evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf); string alignParams = m_AlignTemplates->alignInfoTmpl; - + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_curr_num",NStr::IntToString(m_currAlignHsp + 1)); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"alnSeqGi",m_CurrAlnID_Lbl);//not used now @@ -3492,32 +3493,32 @@ string CDisplaySeqalign::x_FormatAlnBlastInfo(SAlnInfo* aln_vec_info) hideNextNaviagtion = "disabled=\"disabled\""; } - const CRange& range = m_AV->GetSeqRange(1); + const CRange& range = m_AV->GetSeqRange(1); TSeqPos from = (range.GetFrom()> range.GetTo()) ? range.GetTo() : range.GetFrom() + 1; TSeqPos to = (range.GetFrom()> range.GetTo()) ? range.GetFrom() : range.GetTo() + 1; alignParams = CAlignFormatUtil::MapTemplate(alignParams,"fromHSP",from); - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"toHSP",to); + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"toHSP",to); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_hide_prev",hidePrevNaviagtion); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_hide_next",hideNextNaviagtion); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_hide_fst",hideFirstNavigation); - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"firstSeqID",m_CurrAlnAccession);//displays the first accession if multiple + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"firstSeqID",m_CurrAlnAccession);//displays the first accession if multiple //current segment number = m_currAlignHsp + 1 alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_next_num",NStr::IntToString(m_currAlignHsp + 2)); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_prev_num",NStr::IntToString(m_currAlignHsp)); - - if (m_SeqalignSetRef->Get().front()->CanGetType() && + + if (m_SeqalignSetRef->Get().front()->CanGetType() && m_SeqalignSetRef->Get().front()->GetType() == CSeq_align_Base::eType_global) { alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_score",aln_vec_info->score); } else { - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_score",bit_score_buf); - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_score_bits",aln_vec_info->score); + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_score",bit_score_buf); + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_score_bits",aln_vec_info->score); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_eval",evalue_buf); - if (aln_vec_info->sum_n > 0) { + if (aln_vec_info->sum_n > 0) { alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_sumN",aln_vec_info->sum_n); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"sumNshow","shown"); } @@ -3526,36 +3527,36 @@ string CDisplaySeqalign::x_FormatAlnBlastInfo(SAlnInfo* aln_vec_info) alignParams = CAlignFormatUtil::MapTemplate(alignParams,"sumNshow",""); } - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_params_method",(aln_vec_info->comp_adj_method == 1 || aln_vec_info->comp_adj_method == 2) ? m_AlignTemplates->alignInfoMethodTmpl: ""); - if (aln_vec_info->comp_adj_method == 1){ + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_params_method",(aln_vec_info->comp_adj_method == 1 || aln_vec_info->comp_adj_method == 2) ? m_AlignTemplates->alignInfoMethodTmpl: ""); + if (aln_vec_info->comp_adj_method == 1){ alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth","Composition-based stats."); - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","");//???? is that the same for all aligns??? + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","");//???? is that the same for all aligns??? } - else if (aln_vec_info->comp_adj_method == 2){ + else if (aln_vec_info->comp_adj_method == 2){ alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth","Compositional matrix adjust."); - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","");//???? is that the same for all aligns??? + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","");//???? is that the same for all aligns??? } else { - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","hidden");//???? is that the same for all aligns??? + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","hidden");//???? is that the same for all aligns??? alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth",""); - } - } - return alignParams; + } + } + return alignParams; } //sumN - hidden, cbs_md - shown, aln_frame - hidden -void CDisplaySeqalign::x_DisplayAlignInfo(CNcbiOstream& out, +void CDisplaySeqalign::x_DisplayAlignInfo(CNcbiOstream& out, SAlnInfo* aln_vec_info) { string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf; - CAlignFormatUtil::GetScoreString(aln_vec_info->evalue, - aln_vec_info->bits, 0, 0, evalue_buf, + CAlignFormatUtil::GetScoreString(aln_vec_info->evalue, + aln_vec_info->bits, 0, 0, evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf); - + CRef first_aln = m_SeqalignSetRef->Get().front(); - if (m_SeqalignSetRef->Get().front()->CanGetType() && + if (m_SeqalignSetRef->Get().front()->CanGetType() && m_SeqalignSetRef->Get().front()->GetType() == CSeq_align_Base::eType_global) { out<<" NW Score = "<< aln_vec_info->score; @@ -3564,10 +3565,10 @@ void CDisplaySeqalign::x_DisplayAlignInfo(CNcbiOstream& out, { // Disable bits score/evalue fields and only show raw // score for RMBlastN -RMH- - if ( m_AlignOption & eShowRawScoreOnly ) + if ( m_AlignOption & eShowRawScoreOnly ) { out<<" Score = "<score<<"\n"; - }else + }else { out<<" Score = "<score<<"),"<<" "; @@ -3585,27 +3586,27 @@ void CDisplaySeqalign::x_DisplayAlignInfo(CNcbiOstream& out, out << "\n"; } -//1. Display defline(s) +//1. Display defline(s) //2. Display Gene info //3. Display Bl2Seq TBLASTX link //4. add id anchor for mapviewer link -void CDisplaySeqalign::x_ShowAlnvecInfo(CNcbiOstream& out, +void CDisplaySeqalign::x_ShowAlnvecInfo(CNcbiOstream& out, SAlnInfo* aln_vec_info, - bool show_defline) + bool show_defline) { bool showSortControls = false; - if(show_defline) { - const CBioseq_Handle& bsp_handle=m_AV->GetBioseqHandle(1); + if(show_defline) { + const CBioseq_Handle& bsp_handle=m_AV->GetBioseqHandle(1); if(m_AlignOption&eShowBlastInfo) { if(!(m_AlignOption & eShowNoDeflineInfo)){ - //1. Display defline(s),Gene info + //1. Display defline(s),Gene info string deflines = x_PrintDefLine(bsp_handle, aln_vec_info); out<< deflines; //2. Format Gene info string geneInfo = x_DisplayGeneInfo(bsp_handle,aln_vec_info); - out<< geneInfo; - } - + out<< geneInfo; + } + if((m_AlignOption&eHtml) && (m_AlignOption&eShowBlastInfo) && (m_AlignOption&eShowBl2seqLink)) { //3. Display Bl2Seq TBLASTX link @@ -3617,12 +3618,12 @@ void CDisplaySeqalign::x_ShowAlnvecInfo(CNcbiOstream& out, } if (m_AlignOption&eShowBlastInfo) { //4. add id anchor for mapviewer link - x_DisplayMpvAnchor(out,aln_vec_info); + x_DisplayMpvAnchor(out,aln_vec_info); } - + //Displays sorting controls, features, Score, Expect, Idnt,Gaps,strand,positives,frames etc x_DisplaySingleAlignParams(out, aln_vec_info,showSortControls); - x_DisplayRowData(aln_vec_info->alnRowInfo,out); + x_DisplayRowData(aln_vec_info->alnRowInfo,out); } @@ -3632,7 +3633,7 @@ string CDisplaySeqalign::x_MapDefLine(SAlnDispParams *alnDispParams,bool isFirst, bool linkout,bool hideDefline,int seqLength) { /* - string firstSeqClassInfo = (isFirst) ? "" : "hidden"; //hide ">" sign if not first seq align + string firstSeqClassInfo = (isFirst) ? "" : "hidden"; //hide ">" sign if not first seq align string alnDefLine = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alnDefLineTmpl,"alnSeqSt",firstSeqClassInfo); */ string alnDefLine = m_AlignTemplates->alnDefLineTmpl; @@ -3640,9 +3641,9 @@ CDisplaySeqalign::x_MapDefLine(SAlnDispParams *alnDispParams,bool isFirst, bool string alnGi = (m_AlignOption&eShowGi && alnDispParams->gi > ZERO_GI) ? "gi|" + NStr::NumericToString(alnDispParams->gi) + "|" : ""; - string seqid; - if(!(alnDispParams->seqID->AsFastaString().find("gnl|BL_ORD_ID") != string::npos) || - alnDispParams->seqID->AsFastaString().find("lcl|Subject_") != string::npos){ + string seqid; + if(!(alnDispParams->seqID->AsFastaString().find("gnl|BL_ORD_ID") != string::npos) || + alnDispParams->seqID->AsFastaString().find("lcl|Subject_") != string::npos){ if (m_UseLongSeqIds) { seqid = alnDispParams->seqID->AsFastaString(); } @@ -3650,7 +3651,7 @@ CDisplaySeqalign::x_MapDefLine(SAlnDispParams *alnDispParams,bool isFirst, bool seqid = CAlignFormatUtil::GetBareId(*alnDispParams->seqID); } } - + if(alnDispParams->id_url != NcbiEmptyString) { string seqInfo = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alnSeqInfoTmpl,"aln_url",alnDispParams->id_url); string trgt = (m_AlignOption & eNewTargetWindow) ? "TARGET=\"EntrezView\"" : ""; @@ -3658,18 +3659,18 @@ CDisplaySeqalign::x_MapDefLine(SAlnDispParams *alnDispParams,bool isFirst, bool seqInfo = CAlignFormatUtil::MapTemplate(seqInfo,"aln_target",trgt); seqInfo = CAlignFormatUtil::MapTemplate(seqInfo,"aln_rid",m_Rid); - alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"seq_info",seqInfo); - alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"aln_gi",alnGi); - alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"aln_seqid",seqid); + alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"seq_info",seqInfo); + alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"aln_gi",alnGi); + alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"aln_seqid",seqid); } else { - alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"seq_info",alnGi + seqid); + alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"seq_info",alnGi + seqid); } string hspNum,isFirstDflAttr; if(isFirst) { - string totalHsps = m_Ctx->GetRequestValue("TOTAL_HSPS").GetValue(); //Future use - m_TotalHSPNum = totalHsps.empty() ? m_AlnLinksParams[m_AV->GetSeqId(1).GetSeqIdString()].hspNumber : NStr::StringToInt(totalHsps); - hspNum = (m_TotalHSPNum != 0) ? NStr::IntToString(m_TotalHSPNum) : ""; + string totalHsps = m_Ctx ? m_Ctx->GetRequestValue("TOTAL_HSPS").GetValue() : kEmptyStr; //Future use + m_TotalHSPNum = totalHsps.empty() ? m_AlnLinksParams[m_AV->GetSeqId(1).GetSeqIdString()].hspNumber : NStr::StringToInt(totalHsps); + hspNum = (m_TotalHSPNum != 0) ? NStr::IntToString(m_TotalHSPNum) : ""; } else { isFirstDflAttr = "hidden"; @@ -3682,41 +3683,41 @@ CDisplaySeqalign::x_MapDefLine(SAlnDispParams *alnDispParams,bool isFirst, bool alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"alnIdLbl",alnIdLbl); string linkoutStr, dnldLinkStr; if (linkout) { - linkoutStr = (!alnDispParams->linkoutStr.empty()) ? alnDispParams->linkoutStr : ""; + linkoutStr = (!alnDispParams->linkoutStr.empty()) ? alnDispParams->linkoutStr : ""; dnldLinkStr = alnDispParams->dumpGnlUrl; - } + } alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine ,"alnLinkout",linkoutStr); - alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine ,"dndlLinkt",dnldLinkStr); - alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"alnTitle",alnDispParams->title); + alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine ,"dndlLinkt",dnldLinkStr); + alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"alnTitle",alnDispParams->title); return alnDefLine; } string alnTitlesLinkTmpl; ///< Template for displaying link for more defline titles string alnTitlesTmpl; ///< Template for displaying multiple defline titles - + string CDisplaySeqalign::x_InitDefLinesHeader(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_vec_info) { - string deflines; + string deflines; string firstDefline; CNcbiEnvironment env; - list& use_this_seqid = aln_vec_info->use_this_seqid; - if(bsp_handle){ - const CRef bdlRef = CSeqDB::ExtractBlastDefline(bsp_handle); + list& use_this_seqid = aln_vec_info->use_this_seqid; + if(bsp_handle){ + const CRef bdlRef = CSeqDB::ExtractBlastDefline(bsp_handle); const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get(); bool isFirst = true; TGi firstGi = ZERO_GI; m_NumBlastDefLines = 0; - m_cur_align++; + m_cur_align++; SAlnDispParams *alnDispParams; //fill length - int seqLength = bsp_handle.GetBioseqLength(); + int seqLength = bsp_handle.GetBioseqLength(); if(bdl.empty()){ //no blast defline struct, should be no such case now //actually not so fast...as we now fetch from entrez even when it's not in blast db //there is no blast defline in such case. alnDispParams = x_FillAlnDispParams(bsp_handle); string alnDefLine = x_MapDefLine(alnDispParams,isFirst,false,false,seqLength); m_CurrAlnID_Lbl = (alnDispParams->gi != ZERO_GI) ? - NStr::NumericToString(alnDispParams->gi) : alnDispParams->label; + NStr::NumericToString(alnDispParams->gi) : alnDispParams->label; if (m_UseLongSeqIds || alnDispParams->seqID->IsLocal()) { m_CurrAlnAccession = alnDispParams->seqID->AsFastaString(); } @@ -3731,18 +3732,18 @@ CDisplaySeqalign::x_InitDefLinesHeader(const CBioseq_Handle& bsp_handle,SAlnInfo firstDefline = alnDefLine; m_NumBlastDefLines++; } else { - //format each defline - int numBdl = 0; - for(list< CRef< CBlast_def_line > >::const_iterator - iter = bdl.begin(); iter != bdl.end(); iter++){ - alnDispParams = x_FillAlnDispParams(*iter,bsp_handle,use_this_seqid,firstGi,numBdl); + //format each defline + int numBdl = 0; + for(list< CRef< CBlast_def_line > >::const_iterator + iter = bdl.begin(); iter != bdl.end(); iter++){ + alnDispParams = x_FillAlnDispParams(*iter,bsp_handle,use_this_seqid,firstGi,numBdl); if(alnDispParams) { - numBdl++; - bool hideDefline = (numBdl > 1)? true : false; - string alnDefLine = x_MapDefLine(alnDispParams,isFirst,m_AlignOption&eLinkout,hideDefline,seqLength); + numBdl++; + bool hideDefline = (numBdl > 1)? true : false; + string alnDefLine = x_MapDefLine(alnDispParams,isFirst,m_AlignOption&eLinkout,hideDefline,seqLength); if(isFirst){ const CSeq_id& aln_id = m_AV->GetSeqId(1); - TGi alnGi; + TGi alnGi; CRef dispId = CAlignFormatUtil::GetDisplayIds(bsp_handle,aln_id,use_this_seqid,&alnGi); m_CurrAlnID_Lbl = (alnGi == ZERO_GI) ? CAlignFormatUtil::GetLabel(dispId) : NStr::NumericToString(alnGi); if(alnGi == ZERO_GI) { @@ -3751,9 +3752,9 @@ CDisplaySeqalign::x_InitDefLinesHeader(const CBioseq_Handle& bsp_handle,SAlnInfo else { m_CurrAlnID_DbLbl = m_CurrAlnID_Lbl; } - + firstGi = alnGi; - + //This should probably change on dispId if (m_UseLongSeqIds) { m_CurrAlnAccession = @@ -3765,51 +3766,51 @@ CDisplaySeqalign::x_InitDefLinesHeader(const CBioseq_Handle& bsp_handle,SAlnInfo *alnDispParams->seqID); } if(m_CurrAlnAccession.find("gnl|BL_ORD_ID") != string::npos || - m_CurrAlnAccession.find("lcl|Subject_") != string::npos){ + m_CurrAlnAccession.find("lcl|Subject_") != string::npos){ ///Get first token of the title vector parts; NStr::Split(alnDispParams->title," ",parts); if(parts.size() > 0) { - m_CurrAlnAccession = parts[0]; + m_CurrAlnAccession = parts[0]; } - } - } + } + } //1. isFirst && firstGi == ZERO_GI - covers resource links for non-gis databases - //2. alnDispParams->gi == firstGi - covers resource links for gi databases/ + //2. alnDispParams->gi == firstGi - covers resource links for gi databases/ if( (isFirst && firstGi == ZERO_GI) || (alnDispParams->gi == firstGi && firstGi != ZERO_GI) ) { - //Get custom links only for the first gi + //Get custom links only for the first gi int linksDisplayOption = eDisplayResourcesLinks; if(seqLength > k_GetSubseqThreshhold) { linksDisplayOption += eDisplayDownloadLink; - } + } x_InitAlignLinks(alnDispParams,bdl,linksDisplayOption); firstDefline = alnDefLine; } - else { + else { deflines += alnDefLine; //this contains all deflines except the first one - } + } if(isFirst) { - isFirst = false; + isFirst = false; } if(m_AlignTemplates->alnTitlesTmpl.empty() && !firstDefline.empty()) { m_NumBlastDefLines = 1; break; } - + delete alnDispParams; } - } - m_NumBlastDefLines = numBdl; - } + } + m_NumBlastDefLines = numBdl; + } if(m_NumBlastDefLines == 1) { - deflines = firstDefline; + deflines = firstDefline; } else { string alnTitles = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alnTitlesTmpl,"seqTitles",deflines); string alnTitleslnk = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alnTitlesLinkTmpl,"titleNum",NStr::IntToString(m_NumBlastDefLines - 1)); - deflines = firstDefline + alnTitleslnk + alnTitles; + deflines = firstDefline + alnTitleslnk + alnTitles; } - } + } return deflines; } @@ -3817,25 +3818,25 @@ CDisplaySeqalign::x_InitDefLinesHeader(const CBioseq_Handle& bsp_handle,SAlnInfo string CDisplaySeqalign::x_FormatDefLinesHeader(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_vec_info) { - CNcbiOstrstream out; + CNcbiOstrstream out; string deflines, linkOutStr,customLinkStr; list linkoutStr; m_CurrAlnID_DbLbl = ""; - if(bsp_handle){ + if(bsp_handle){ deflines = x_InitDefLinesHeader(bsp_handle,aln_vec_info); - - if(m_CustomLinksList.size() > 0) { + + if(m_CustomLinksList.size() > 0) { ITERATE(list, iter_custList, m_CustomLinksList){ customLinkStr += *iter_custList; } } - if(m_LinkoutList.size() > 0) { + if(m_LinkoutList.size() > 0) { ITERATE(list, iter_List, m_LinkoutList){ linkOutStr += *iter_List; - } - } - } + } + } + } //fill deflines string alignInfo = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alignHeaderTmpl,"aln_deflines",deflines); @@ -3843,98 +3844,98 @@ CDisplaySeqalign::x_FormatDefLinesHeader(const CBioseq_Handle& bsp_handle,SAlnIn int alnSeqTitlesNum = (m_NumBlastDefLines > k_MaxDeflinesToShow) ? m_NumBlastDefLines - k_MinDeflinesToShow : 0; string alnSeqTitlesShow = (m_NumBlastDefLines > k_MaxDeflinesToShow) ? "" : "hidden"; alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqTitlesNum", NStr::IntToString(alnSeqTitlesNum)); - alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqTitlesShow",alnSeqTitlesShow); + alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqTitlesShow",alnSeqTitlesShow); + + - - //fill sequence checkbox string seqRetrieval = ((m_AlignOption&eSequenceRetrieval) && m_CanRetrieveSeq) ? "" : "hidden"; alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqGi",m_CurrAlnID_Lbl); - alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnQueryNum",NStr::IntToString(m_QueryNumber)); + alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnQueryNum",NStr::IntToString(m_QueryNumber)); alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqRet",seqRetrieval); - + alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnLinkOutLinks",linkOutStr); alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnCustomLinks",customLinkStr); - //fill id info - alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"firstSeqID",m_CurrAlnAccession); + //fill id info + alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"firstSeqID",m_CurrAlnAccession); - string isGenbankAttr = (NStr::Find(customLinkStr,"GenBank") == NPOS && NStr::Find(customLinkStr,"GenPept") == NPOS)? "hidden" : ""; + string isGenbankAttr = (NStr::Find(customLinkStr,"GenBank") == NPOS && NStr::Find(customLinkStr,"GenPept") == NPOS)? "hidden" : ""; alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"dwGnbn",isGenbankAttr); - + string hideDndl = (m_BlastType == "sra")? "hidden":""; alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"hideDndl",hideDndl); alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"aln_ord_pos",NStr::IntToString(m_cur_align)); - + //The next two lines are not used for now //alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnFASTA",m_FASTAlinkUrl); //alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnRegFASTA",m_AlignedRegionsUrl); - + //fill sort info - string sortInfo; - if(m_TotalHSPNum > 1) { + string sortInfo; + if(m_TotalHSPNum > 1) { //3. Display sort info - sortInfo = x_FormatAlignSortInfo(); + sortInfo = x_FormatAlignSortInfo(); } alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"sortInfo",sortInfo); - + return alignInfo; } -//1. Display defline(s) +//1. Display defline(s) //2. Display Gene info //3. Display Bl2Seq TBLASTX link -void CDisplaySeqalign::x_ShowAlnvecInfoTemplate(CNcbiOstream& out, +void CDisplaySeqalign::x_ShowAlnvecInfoTemplate(CNcbiOstream& out, SAlnInfo* aln_vec_info, - bool show_defline) + bool show_defline) { string alignHeader; - string sortOneAln = m_Ctx ? m_Ctx->GetRequestValue("SORT_ONE_ALN").GetValue() : kEmptyStr; - if(show_defline) { - const CBioseq_Handle& bsp_handle=m_AV->GetBioseqHandle(1); + string sortOneAln = m_Ctx ? m_Ctx->GetRequestValue("SORT_ONE_ALN").GetValue() : kEmptyStr; + if(show_defline) { + const CBioseq_Handle& bsp_handle=m_AV->GetBioseqHandle(1); //1. Display defline(s),Gene info string alignHeader = x_FormatDefLinesHeader(bsp_handle, aln_vec_info); /**2. Format Gene info - string geneInfo = x_DisplayGeneInfo(bsp_handle,aln_vec_info); + string geneInfo = x_DisplayGeneInfo(bsp_handle,aln_vec_info); alignHeader = CAlignFormatUtil::MapTemplate(alignHeader,"aln_gene_info",geneInfo); **/ if(sortOneAln.empty()) { - out<< alignHeader; + out<< alignHeader; if(m_AlignOption&eShowBl2seqLink) { //3. Display Bl2Seq TBLASTX link x_DisplayBl2SeqLink(out); - } + } } //start counting hsp - string currHsp = m_Ctx->GetRequestValue("HSP_START").GetValue(); - m_currAlignHsp = currHsp.empty() ? 0: NStr::StringToInt(currHsp); - } + string currHsp = m_Ctx ? m_Ctx->GetRequestValue("HSP_START").GetValue() : kEmptyStr; + m_currAlignHsp = currHsp.empty() ? 0: NStr::StringToInt(currHsp); + } if (m_AlignOption&eShowBlastInfo) { //4. add id anchor for mapviewer link - x_DisplayMpvAnchor(out,aln_vec_info); + x_DisplayMpvAnchor(out,aln_vec_info); } - - //Displays sorting controls, features, Score, Expect, Idnt,Gaps,strand,positives,frames etc + + //Displays sorting controls, features, Score, Expect, Idnt,Gaps,strand,positives,frames etc string alignInfo = x_FormatSingleAlign(aln_vec_info); - out << alignInfo; + out << alignInfo; } -void CDisplaySeqalign::x_DisplayAlnvecInfo(CNcbiOstream& out, +void CDisplaySeqalign::x_DisplayAlnvecInfo(CNcbiOstream& out, SAlnInfo* aln_vec_info, - bool show_defline) + bool show_defline) { m_AV = aln_vec_info->alnvec; - //Calculate Dynamic Features in aln_vec_info - x_PrepareDynamicFeatureInfo(aln_vec_info); + //Calculate Dynamic Features in aln_vec_info + x_PrepareDynamicFeatureInfo(aln_vec_info); //Calculate row data for actual alignment display aln_vec_info->alnRowInfo = x_PrepareRowData(); - //Calculate indentity data in aln_vec_info + //Calculate indentity data in aln_vec_info if((m_AlignOption & eShowBlastInfo) || (m_AlignOption & eShowMiddleLine)){ x_PrepareIdentityInfo(aln_vec_info); } @@ -3943,8 +3944,8 @@ void CDisplaySeqalign::x_DisplayAlnvecInfo(CNcbiOstream& out, } else { x_ShowAlnvecInfoTemplate(out,aln_vec_info,show_defline); - } - + } + delete aln_vec_info->alnRowInfo; out<<"\n"; @@ -3952,13 +3953,13 @@ void CDisplaySeqalign::x_DisplayAlnvecInfo(CNcbiOstream& out, //Displays features, Score Expect, Idnt,Gaps,strand -void CDisplaySeqalign::x_DisplaySingleAlignParams(CNcbiOstream& out, +void CDisplaySeqalign::x_DisplaySingleAlignParams(CNcbiOstream& out, SAlnInfo* aln_vec_info, - bool showSortControls) + bool showSortControls) { - if (m_AlignOption&eShowBlastInfo) { - - if(showSortControls && m_AlignOption&eHtml && + if (m_AlignOption&eShowBlastInfo) { + + if(showSortControls && m_AlignOption&eHtml && m_AlnLinksParams[m_AV->GetSeqId(1).GetSeqIdString()].hspNumber > 1 && m_AlignOption & eShowSortControls){ //3. Display sort info @@ -3966,30 +3967,30 @@ void CDisplaySeqalign::x_DisplaySingleAlignParams(CNcbiOstream& out, } //output dynamic feature lines - if(aln_vec_info->feat_list.size() > 0 || aln_vec_info->feat5 || aln_vec_info->feat3 ){ + if(aln_vec_info->feat_list.size() > 0 || aln_vec_info->feat5 || aln_vec_info->feat3 ){ //6. Display Dynamic Features - x_PrintDynamicFeatures(out,aln_vec_info); + x_PrintDynamicFeatures(out,aln_vec_info); } - + //7. Display score,bits,expect,method x_DisplayAlignInfo(out,aln_vec_info); } - + if((m_AlignOption & eShowBlastInfo) || (m_AlignOption & eShowMiddleLine)){ //8.Display Identities,positives,strand, frames etc //x_DisplayIdentityInfo(aln_vec_info->alnRowInfo, out); - s_DisplayIdentityInfo(out, - (int)m_AV->GetAlnStop(), - aln_vec_info->identity, - aln_vec_info->positive, - aln_vec_info->match, + s_DisplayIdentityInfo(out, + (int)m_AV->GetAlnStop(), + aln_vec_info->identity, + aln_vec_info->positive, + aln_vec_info->match, aln_vec_info->gap, - m_AV->StrandSign(0), + m_AV->StrandSign(0), m_AV->StrandSign(1), - aln_vec_info->alnRowInfo->frame[0], - aln_vec_info->alnRowInfo->frame[1], + aln_vec_info->alnRowInfo->frame[0], + aln_vec_info->alnRowInfo->frame[1], ((m_AlignType & eProt) != 0 ? true : false)); - } + } } //
<@alnHSPLinks@>
@@ -3997,67 +3998,67 @@ string CDisplaySeqalign:: x_FormatAlnHSPLinks(string &alignInfo) { string hspLinks; - if(m_HSPLinksList.size() > 0) { - const CRange& range = m_AV->GetSeqRange(1); + if(m_HSPLinksList.size() > 0) { + const CRange& range = m_AV->GetSeqRange(1); TSeqPos from = (range.GetFrom()> range.GetTo()) ? range.GetTo() : range.GetFrom() + 1; TSeqPos to = (range.GetFrom()> range.GetTo()) ? range.GetFrom() : range.GetTo() + 1; - int addToRange = (int)((to - from) * 0.05);//add 5% to each side - int fromAdjust = max(0,(int)from - addToRange); - int toAdjust = to + addToRange; + int addToRange = (int)((to - from) * 0.05);//add 5% to each side + int fromAdjust = max(0,(int)from - addToRange); + int toAdjust = to + addToRange; string customLinkStr; ITERATE(list, iter_custList, m_HSPLinksList){ - string singleLink = CAlignFormatUtil::MapTemplate(*iter_custList,"from",fromAdjust); + string singleLink = CAlignFormatUtil::MapTemplate(*iter_custList,"from",fromAdjust); singleLink = CAlignFormatUtil::MapTemplate(singleLink,"to",toAdjust); singleLink = CAlignFormatUtil::MapTemplate(singleLink,"fromHSP",from); - singleLink = CAlignFormatUtil::MapTemplate(singleLink,"toHSP",to); + singleLink = CAlignFormatUtil::MapTemplate(singleLink,"toHSP",to); hspLinks += singleLink; - } + } alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"fromHSP",from); - alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"toHSP",to); - } - string multiHSP = (hspLinks.empty()) ? "hidden" : "" ; + alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"toHSP",to); + } + string multiHSP = (hspLinks.empty()) ? "hidden" : "" ; + - alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnHSPLinks",hspLinks); alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"multiHSP",multiHSP); alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"firstSeqID",m_CurrAlnAccession); - return alignInfo; + return alignInfo; } //Displays features, Score Expect, Idnt,Gaps,strand -string CDisplaySeqalign::x_FormatSingleAlign(SAlnInfo* aln_vec_info) +string CDisplaySeqalign::x_FormatSingleAlign(SAlnInfo* aln_vec_info) { string alignInfo; - - if (m_AlignOption&eShowBlastInfo) { - + + if (m_AlignOption&eShowBlastInfo) { + //7. Display score,bits,expect,method alignInfo = x_FormatAlnBlastInfo(aln_vec_info); - //8.Display Identities,positives,strands, frames etc + //8.Display Identities,positives,strands, frames etc alignInfo = x_FormatIdentityInfo(alignInfo, aln_vec_info); - + //output dynamic feature lines //only for aln_vec_info->feat_list.size() > 0 || aln_vec_info->feat5 || aln_vec_info->feat3 - //6. Display Dynamic Features - alignInfo = x_FormatDynamicFeaturesInfo(alignInfo, aln_vec_info); - } - - alignInfo = (alignInfo.empty()) ? m_AlignTemplates->alignInfoTmpl : alignInfo; - alignInfo = x_FormatAlnHSPLinks(alignInfo); - - m_currAlignHsp++; + //6. Display Dynamic Features + alignInfo = x_FormatDynamicFeaturesInfo(alignInfo, aln_vec_info); + } + + alignInfo = (alignInfo.empty()) ? m_AlignTemplates->alignInfoTmpl : alignInfo; + alignInfo = x_FormatAlnHSPLinks(alignInfo); + + m_currAlignHsp++; string alignRowsTemplate = (m_currAlignHsp == m_TotalHSPNum) ? m_AlignTemplates->alignRowTmplLast : m_AlignTemplates->alignRowTmpl; - + string alignRows = x_DisplayRowData(aln_vec_info->alnRowInfo); alignRows = CAlignFormatUtil::MapTemplate(alignRowsTemplate,"align_rows",alignRows); alignRows = CAlignFormatUtil::MapTemplate(alignRows,"aln_curr_num",NStr::IntToString(m_currAlignHsp)); alignRows = CAlignFormatUtil::MapTemplate(alignRows,"alnSeqGi",m_CurrAlnID_Lbl); alignInfo += alignRows; - return alignInfo; + return alignInfo; } @@ -4066,10 +4067,10 @@ void CDisplaySeqalign::x_PrepareDynamicFeatureInfo(SAlnInfo* aln_vec_info) { aln_vec_info->feat5 = NULL; aln_vec_info->feat3 = NULL; - aln_vec_info->feat_list.clear(); - //Calculate Dynamic Features in aln_vec_info - if((m_AlignOption&eDynamicFeature) - && (int)m_AV->GetBioseqHandle(1).GetBioseqLength() + aln_vec_info->feat_list.clear(); + //Calculate Dynamic Features in aln_vec_info + if((m_AlignOption&eDynamicFeature) + && (int)m_AV->GetBioseqHandle(1).GetBioseqLength() >= k_GetDynamicFeatureSeqLength){ if(m_DynamicFeature){ const CSeq_id& subject_seqid = m_AV->GetSeqId(1); @@ -4083,38 +4084,38 @@ void CDisplaySeqalign::x_PrepareDynamicFeatureInfo(SAlnInfo* aln_vec_info) const CBioseq_Handle& subject_handle=m_AV->GetBioseqHandle(1); aln_vec_info->subject_gi = FindGi(subject_handle.GetBioseqCore()->GetId()); aln_vec_info->feat_list = m_DynamicFeature->GetFeatInfo(id_str, aln_vec_info->actual_range, aln_vec_info->feat5, aln_vec_info->feat3, 2); - } + } } } -static string s_MapFeatureURL(string viewerURL, +static string s_MapFeatureURL(string viewerURL, string textSeqID, - string db, - int fromRange, + string db, + int fromRange, int toRange, string rid) -{ - string url_link = CAlignFormatUtil::MapTemplate(viewerURL,"db",db); - url_link = CAlignFormatUtil::MapTemplate(url_link,"gi",textSeqID); - url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",rid); - url_link = CAlignFormatUtil::MapTemplate(url_link,"from",fromRange); - url_link = CAlignFormatUtil::MapTemplate(url_link,"to",toRange); +{ + string url_link = CAlignFormatUtil::MapTemplate(viewerURL,"db",db); + url_link = CAlignFormatUtil::MapTemplate(url_link,"gi",textSeqID); + url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",rid); + url_link = CAlignFormatUtil::MapTemplate(url_link,"from",fromRange); + url_link = CAlignFormatUtil::MapTemplate(url_link,"to",toRange); return url_link; } string CDisplaySeqalign::x_FormatOneDynamicFeature(string viewerURL, - TGi subject_gi, - int fromRange, + TGi subject_gi, + int fromRange, int toRange, string featText) { string alignFeature = m_AlignTemplates->alignFeatureTmpl; string textSeqID; - + if(subject_gi > ZERO_GI) { - //if(CAlignFormatUtil::GetTextSeqID((CConstRef)&m_AV->GetSeqId(1))) { - alignFeature = CAlignFormatUtil::MapTemplate(alignFeature,"aln_feat_info",m_AlignTemplates->alignFeatureLinkTmpl); - string url = s_MapFeatureURL(viewerURL, + //if(CAlignFormatUtil::GetTextSeqID((CConstRef)&m_AV->GetSeqId(1))) { + alignFeature = CAlignFormatUtil::MapTemplate(alignFeature,"aln_feat_info",m_AlignTemplates->alignFeatureLinkTmpl); + string url = s_MapFeatureURL(viewerURL, m_CurrAlnAccession, string(m_IsDbNa ? "nucleotide" : "protein"), fromRange + 1, @@ -4131,34 +4132,34 @@ string CDisplaySeqalign::x_FormatOneDynamicFeature(string viewerURL, //6. Display Dynamic Features -string CDisplaySeqalign::x_FormatDynamicFeaturesInfo(string alignInfo, SAlnInfo* aln_vec_info) +string CDisplaySeqalign::x_FormatDynamicFeaturesInfo(string alignInfo, SAlnInfo* aln_vec_info) { string alignParams = alignInfo; //string alignFeature = m_AlignTemplates->alignFeatureTmpl; - - + + string viewerURL = CAlignFormatUtil::GetURLFromRegistry("ENTREZ_SUBSEQ_TM"); string allAlnFeatures = ""; if(aln_vec_info->feat_list.size() > 0) { //has feature in this range - ITERATE(vector, iter, aln_vec_info->feat_list){ - + ITERATE(vector, iter, aln_vec_info->feat_list){ + string alignFeature = x_FormatOneDynamicFeature(viewerURL, aln_vec_info->subject_gi, (*iter)->range.GetFrom(), (*iter)->range.GetTo(), (*iter)->feat_str); - - ///TO DO: NO hyperlink if aln_vec_info->subject_gi == 0 - - allAlnFeatures += alignFeature; + + ///TO DO: NO hyperlink if aln_vec_info->subject_gi == 0 + + allAlnFeatures += alignFeature; } } else { //show flank features - if(aln_vec_info->feat5 || aln_vec_info->feat3){ + if(aln_vec_info->feat5 || aln_vec_info->feat3){ //TO DO: Check if we need that //out << " Features flanking this part of subject sequence:" << "\n"; } - if(aln_vec_info->feat5){ + if(aln_vec_info->feat5){ string alignFeature = x_FormatOneDynamicFeature(viewerURL, aln_vec_info->subject_gi, aln_vec_info->feat5->range.GetFrom(), @@ -4166,8 +4167,8 @@ string CDisplaySeqalign::x_FormatDynamicFeaturesInfo(string alignInfo, SAlnInfo* NStr::IntToString(aln_vec_info->actual_range.GetFrom() - aln_vec_info->feat5->range.GetTo()) + (string)" bp at 5' side: " + aln_vec_info->feat5->feat_str); allAlnFeatures += alignFeature; } - if(aln_vec_info->feat3){ - + if(aln_vec_info->feat3){ + string alignFeature = x_FormatOneDynamicFeature(viewerURL, aln_vec_info->subject_gi, aln_vec_info->feat3->range.GetFrom(), @@ -4176,18 +4177,18 @@ string CDisplaySeqalign::x_FormatDynamicFeaturesInfo(string alignInfo, SAlnInfo* allAlnFeatures += alignFeature; } } - if(!allAlnFeatures.empty()) { + if(!allAlnFeatures.empty()) { alignParams = CAlignFormatUtil::MapTemplate(alignParams,"all_aln_features",allAlnFeatures); - alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_feat_show",""); + alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_feat_show",""); } else { alignParams = CAlignFormatUtil::MapTemplate(alignParams,"all_aln_features",""); alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_feat_show","hidden"); - } + } return alignParams; } -void CDisplaySeqalign::x_PrintDynamicFeatures(CNcbiOstream& out,SAlnInfo* aln_vec_info) +void CDisplaySeqalign::x_PrintDynamicFeatures(CNcbiOstream& out,SAlnInfo* aln_vec_info) { string l_EntrezSubseqUrl = CAlignFormatUtil::GetURLFromRegistry("ENTREZ_SUBSEQ"); @@ -4196,60 +4197,60 @@ void CDisplaySeqalign::x_PrintDynamicFeatures(CNcbiOstream& out,SAlnInfo* aln_ve ITERATE(vector, iter, aln_vec_info->feat_list){ out << " "; if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){ - string featStr = s_MapFeatureURL(l_EntrezSubseqUrl, - NStr::NumericToString(aln_vec_info->subject_gi), - m_IsDbNa ? "nucleotide" : "protein", - (*iter)->range.GetFrom() +1 , + string featStr = s_MapFeatureURL(l_EntrezSubseqUrl, + NStr::NumericToString(aln_vec_info->subject_gi), + m_IsDbNa ? "nucleotide" : "protein", + (*iter)->range.GetFrom() +1 , (*iter)->range.GetTo() + 1, - m_Rid); + m_Rid); out << featStr; - } + } out << (*iter)->feat_str; if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){ out << ""; - } + } out << "\n"; } } else { //show flank features - if(aln_vec_info->feat5 || aln_vec_info->feat3){ + if(aln_vec_info->feat5 || aln_vec_info->feat3){ out << " Features flanking this part of subject sequence:" << "\n"; } if(aln_vec_info->feat5){ out << " "; if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){ - string featStr = s_MapFeatureURL(l_EntrezSubseqUrl, - NStr::NumericToString(aln_vec_info->subject_gi), - m_IsDbNa ? "nucleotide" : "protein", - aln_vec_info->feat5->range.GetFrom() + 1 , + string featStr = s_MapFeatureURL(l_EntrezSubseqUrl, + NStr::NumericToString(aln_vec_info->subject_gi), + m_IsDbNa ? "nucleotide" : "protein", + aln_vec_info->feat5->range.GetFrom() + 1 , aln_vec_info->feat5->range.GetTo() + 1, m_Rid); out << featStr; - } - out << aln_vec_info->actual_range.GetFrom() - aln_vec_info->feat5->range.GetTo() + } + out << aln_vec_info->actual_range.GetFrom() - aln_vec_info->feat5->range.GetTo() << " bp at 5' side: " << aln_vec_info->feat5->feat_str; if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){ out << ""; - } + } out << "\n"; } if(aln_vec_info->feat3){ out << " "; if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){ - string featStr = s_MapFeatureURL(l_EntrezSubseqUrl, - NStr::NumericToString(aln_vec_info->subject_gi), - m_IsDbNa ? "nucleotide" : "protein", - aln_vec_info->feat3->range.GetFrom() + 1 , + string featStr = s_MapFeatureURL(l_EntrezSubseqUrl, + NStr::NumericToString(aln_vec_info->subject_gi), + m_IsDbNa ? "nucleotide" : "protein", + aln_vec_info->feat3->range.GetFrom() + 1 , aln_vec_info->feat3->range.GetTo() + 1, m_Rid); out << featStr; } - out << aln_vec_info->feat3->range.GetFrom() - aln_vec_info->actual_range.GetTo() + out << aln_vec_info->feat3->range.GetFrom() - aln_vec_info->actual_range.GetTo() << " bp at 3' side: " << aln_vec_info->feat3->feat_str; if(m_AlignOption&eHtml){ out << ""; - } + } out << "\n"; } } @@ -4258,8 +4259,8 @@ void CDisplaySeqalign::x_PrintDynamicFeatures(CNcbiOstream& out,SAlnInfo* aln_ve } } -void -CDisplaySeqalign::x_FillLocList(TSAlnSeqlocInfoList& loc_list, +void +CDisplaySeqalign::x_FillLocList(TSAlnSeqlocInfoList& loc_list, const list< CRef >* masks) const { if ( !masks ) { @@ -4277,30 +4278,30 @@ CDisplaySeqalign::x_FillLocList(TSAlnSeqlocInfoList& loc_list, int actualAlnStart = 0, actualAlnStop = 0; if(m_AV->IsPositiveStrand(i)){ actualAlnStart = - m_AV->GetAlnPosFromSeqPos(i, + m_AV->GetAlnPosFromSeqPos(i, interval.GetFrom(), CAlnMap::eBackwards, true); actualAlnStop = - m_AV->GetAlnPosFromSeqPos(i, + m_AV->GetAlnPosFromSeqPos(i, interval.GetTo(), CAlnMap::eBackwards, true); } else { actualAlnStart = - m_AV->GetAlnPosFromSeqPos(i, + m_AV->GetAlnPosFromSeqPos(i, interval.GetTo(), CAlnMap::eBackwards, true); actualAlnStop = - m_AV->GetAlnPosFromSeqPos(i, + m_AV->GetAlnPosFromSeqPos(i, interval.GetFrom(), CAlnMap::eBackwards, true); } - alnloc->aln_range.Set(actualAlnStart, actualAlnStop); + alnloc->aln_range.Set(actualAlnStart, actualAlnStop); has_valid_loc = true; break; } } if (has_valid_loc) { - alnloc->seqloc = *iter; + alnloc->seqloc = *iter; loc_list.push_back(alnloc); } } @@ -4309,26 +4310,26 @@ CDisplaySeqalign::x_FillLocList(TSAlnSeqlocInfoList& loc_list, void CDisplaySeqalign::x_GetQueryFeatureList(int row_num, int aln_stop, - vector& retval) + vector& retval) const { retval.clear(); retval.resize(row_num); //list* bioseqFeature= new list[row_num]; if(m_QueryFeature){ - for (list::iterator iter=m_QueryFeature->begin(); + for (list::iterator iter=m_QueryFeature->begin(); iter!=m_QueryFeature->end(); iter++){ for(int i = 0; i < row_num; i++){ if((*iter)->seqloc->GetInt().GetId().Match(m_AV->GetSeqId(i))){ int actualSeqStart = 0, actualSeqStop = 0; if(m_AV->IsPositiveStrand(i)){ - if((*iter)->seqloc->GetInt().GetFrom() + if((*iter)->seqloc->GetInt().GetFrom() < m_AV->GetSeqStart(i)){ actualSeqStart = m_AV->GetSeqStart(i); } else { actualSeqStart = (*iter)->seqloc->GetInt().GetFrom(); } - + if((*iter)->seqloc->GetInt().GetTo() > m_AV->GetSeqStop(i)){ actualSeqStop = m_AV->GetSeqStop(i); @@ -4336,14 +4337,14 @@ CDisplaySeqalign::x_GetQueryFeatureList(int row_num, int aln_stop, actualSeqStop = (*iter)->seqloc->GetInt().GetTo(); } } else { - if((*iter)->seqloc->GetInt().GetFrom() + if((*iter)->seqloc->GetInt().GetFrom() < m_AV->GetSeqStart(i)){ actualSeqStart = (*iter)->seqloc->GetInt().GetFrom(); } else { actualSeqStart = m_AV->GetSeqStart(i); } - - if((*iter)->seqloc->GetInt().GetTo() > + + if((*iter)->seqloc->GetInt().GetTo() > m_AV->GetSeqStop(i)){ actualSeqStop = (*iter)->seqloc->GetInt().GetTo(); } else { @@ -4352,13 +4353,13 @@ CDisplaySeqalign::x_GetQueryFeatureList(int row_num, int aln_stop, } int alnFrom = m_AV->GetAlnPosFromSeqPos(i, actualSeqStart); int alnTo = m_AV->GetAlnPosFromSeqPos(i, actualSeqStop); - + CRef featInfo(new SAlnFeatureInfo); string tempFeat = NcbiEmptyString; if (alnTo - alnFrom >= 0){ - x_SetFeatureInfo(featInfo, *((*iter)->seqloc), alnFrom, + x_SetFeatureInfo(featInfo, *((*iter)->seqloc), alnFrom, alnTo, aln_stop, (*iter)->feature_char, - (*iter)->feature_id, tempFeat); + (*iter)->feature_id, tempFeat); retval[i].push_back(featInfo); } } @@ -4369,11 +4370,11 @@ CDisplaySeqalign::x_GetQueryFeatureList(int row_num, int aln_stop, static void s_MakeDomainString(int aln_from, int aln_to, const string& domain_name, string& final_domain) { - + string domain_string(aln_to - aln_from + 1, ' '); - + if (domain_string.size() > 2){ - + for (int i = 0; i < (int)domain_string.size(); i++){ domain_string[i] = '-'; } @@ -4383,12 +4384,12 @@ static void s_MakeDomainString(int aln_from, int aln_to, const string& domain_na int midpoint = domain_string.size()/2; int first_possible_pos = 1; int actual_first_pos = max(first_possible_pos, midpoint - ((int)domain_name.size())/2); - + for (int i = actual_first_pos, j = 0; i < domain_string.size() - 1 && j < domain_name.size(); i ++, j ++){ domain_string[i] = domain_name[j]; } } - + for (int i = 0; i < (int)domain_string.size(); i++){ final_domain[i + aln_from] = domain_string[i]; } @@ -4397,30 +4398,30 @@ static void s_MakeDomainString(int aln_from, int aln_to, const string& domain_na void CDisplaySeqalign::x_GetDomainInfo(int row_num, int aln_stop, vector& retval) const { - + if(m_DomainInfo && !m_DomainInfo->empty()){ string final_domain (m_AV->GetAlnStop() + 1, ' '); - int last_aln_to = m_AV->GetAlnStop(); - for (list >::iterator iter=m_DomainInfo->begin(); + int last_aln_to = m_AV->GetAlnStop(); + for (list >::iterator iter=m_DomainInfo->begin(); iter!=m_DomainInfo->end(); iter++){ if((*iter)->seqloc->GetInt().GetId().Match(m_AV->GetSeqId(0))){ int actualSeqStart = 0, actualSeqStop = 0; - if(m_AV->IsPositiveStrand(0)){ //only show domain on positive strand + if(m_AV->IsPositiveStrand(0)){ //only show domain on positive strand actualSeqStart = max((int)m_AV->GetSeqStart(0), (int)(*iter)->seqloc->GetInt().GetFrom()); - + actualSeqStop = min((int)m_AV->GetSeqStop(0), (int)(*iter)->seqloc->GetInt().GetTo()); - + int alnFrom = m_AV->GetAlnPosFromSeqPos(0, actualSeqStart); //check if there is gap between this and last seq position on master if (actualSeqStart > 0 && (*iter)->is_subject_start_valid) { - if (alnFrom - + if (alnFrom - m_AV->GetAlnPosFromSeqPos(0, actualSeqStart - 1) > 1) { //if so then use subject seq to get domain boundary - alnFrom = m_AV->GetAlnPosFromSeqPos(1, - (int)(*iter)->subject_seqloc->GetInt().GetFrom()); - } + alnFrom = m_AV->GetAlnPosFromSeqPos(1, + (int)(*iter)->subject_seqloc->GetInt().GetFrom()); + } } int alnTo = m_AV->GetAlnPosFromSeqPos(0, actualSeqStop); @@ -4429,9 +4430,9 @@ void CDisplaySeqalign::x_GetDomainInfo(int row_num, int aln_stop, (*iter)->is_subject_stop_valid) { if (m_AV->GetAlnPosFromSeqPos(0, actualSeqStop + 1) - alnTo > 1) { //if so then use subject seq to get domain boundary - alnTo = m_AV->GetAlnPosFromSeqPos(1, - (int)(*iter)->subject_seqloc->GetInt().GetTo()); - } + alnTo = m_AV->GetAlnPosFromSeqPos(1, + (int)(*iter)->subject_seqloc->GetInt().GetTo()); + } } int actual_aln_from = min(alnFrom,last_aln_to +1); if (actual_aln_from > alnTo) { @@ -4439,9 +4440,9 @@ void CDisplaySeqalign::x_GetDomainInfo(int row_num, int aln_stop, return; } s_MakeDomainString(actual_aln_from, alnTo, (*iter)->domain_name, final_domain); - + last_aln_to = alnTo; - + } } } @@ -4449,9 +4450,9 @@ void CDisplaySeqalign::x_GetDomainInfo(int row_num, int aln_stop, CRef seqloc(new CSeq_loc((CSeq_loc::TId &) m_DomainInfo->front()->seqloc->GetInt().GetId(), (CSeq_loc::TPoint) 0, (CSeq_loc::TPoint) aln_stop)); - x_SetFeatureInfo(featInfo, *(seqloc), 0, + x_SetFeatureInfo(featInfo, *(seqloc), 0, aln_stop, aln_stop, ' ', - " ", final_domain); + " ", final_domain); retval[0].push_back(featInfo); } } @@ -4478,7 +4479,7 @@ void CDisplaySeqalign::x_FillSeqid(string& id, int row) const id=kQuery; } else {//hits if (!(m_AlignOption&eMergeAlign)){ - //hits for pairwise + //hits for pairwise id=kSubject; } else { if(m_AlignOption&eShowGi){ @@ -4493,19 +4494,19 @@ void CDisplaySeqalign::x_FillSeqid(string& id, int row) const if(gi > ZERO_GI){ id=NStr::NumericToString(gi); } else { - const CRef wid + const CRef wid = FindBestChoice(m_AV->GetBioseqHandle(row).\ - GetBioseqCore()->GetId(), + GetBioseqCore()->GetId(), CSeq_id::WorstRank); id = CAlignFormatUtil::GetLabel(wid,CSeq_id::eContent).c_str(); } } else { - const CRef wid + const CRef wid = FindBestChoice(m_AV->GetBioseqHandle(row).\ - GetBioseqCore()->GetId(), + GetBioseqCore()->GetId(), CSeq_id::WorstRank); id = CAlignFormatUtil::GetLabel(wid,CSeq_id::eContent).c_str(); - } + } } } } else { @@ -4521,19 +4522,19 @@ void CDisplaySeqalign::x_FillSeqid(string& id, int row) const if(gi > ZERO_GI){ id=NStr::NumericToString(gi); } else { - const CRef wid + const CRef wid = FindBestChoice(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId(), CSeq_id::WorstRank); id = CAlignFormatUtil::GetLabel(wid,CSeq_id::eContent).c_str(); } } else { - const CRef wid + const CRef wid = FindBestChoice(m_AV->GetBioseqHandle(row).\ - GetBioseqCore()->GetId(), + GetBioseqCore()->GetId(), CSeq_id::WorstRank); id = CAlignFormatUtil::GetLabel(wid,CSeq_id::eContent).c_str(); - } + } } } @@ -4548,12 +4549,12 @@ void CDisplaySeqalign::x_PreProcessSeqAlign(CSeq_align_set &actual_aln_list) } if( // Calculate m_AlnLinksParams->segs,hspNum, subjRange only for the following conditions (!(m_AlignOption & eMergeAlign) && - (toolUrl.find("dumpgnl.cgi") != string::npos + (toolUrl.find("dumpgnl.cgi") != string::npos || (m_AlignOption & eLinkout) || (m_AlignOption & eHtml && m_AlignOption & eShowBlastInfo)))) { /*need to construct segs for dumpgnl and get sub-sequence for long sequences*/ - + for (CSeq_align_set::Tdata::const_iterator iter = actual_aln_list.Get().begin(); iter != actual_aln_list.Get().end() @@ -4562,8 +4563,8 @@ void CDisplaySeqalign::x_PreProcessSeqAlign(CSeq_align_set &actual_aln_list) CConstRef subid; subid = &((*iter)->GetSeq_id(1)); string idString = subid->GetSeqIdString(); - - x_CalcUrlLinksParams(**iter,idString,toolUrl);//sets m_AlnLinksParams->segs,hspNum, subjRange + + x_CalcUrlLinksParams(**iter,idString,toolUrl);//sets m_AlnLinksParams->segs,hspNum, subjRange } } } @@ -4572,12 +4573,12 @@ void CDisplaySeqalign::x_PreProcessSeqAlign(CSeq_align_set &actual_aln_list) void CDisplaySeqalign::x_CalcUrlLinksParams(const CSeq_align& align, string idString,string toolUrl) { - //make alnvector - CRef avRef = x_GetAlnVecForSeqalign(align); - - bool first = m_AlnLinksParams.count(idString) == 0; + //make alnvector + CRef avRef = x_GetAlnVecForSeqalign(align); + + bool first = m_AlnLinksParams.count(idString) == 0; struct SAlnLinksParams *alnLinksParam = first ? new SAlnLinksParams : &m_AlnLinksParams[idString]; - + if (toolUrl.find("dumpgnl.cgi") != string::npos || (m_AlignOption & eLinkout)) { if(!first){ @@ -4585,30 +4586,30 @@ void CDisplaySeqalign::x_CalcUrlLinksParams(const CSeq_align& align, string idSt } alnLinksParam->segs += NStr::IntToString(avRef->GetSeqStart(1)) + "-" + - NStr::IntToString(avRef->GetSeqStop(1)); + NStr::IntToString(avRef->GetSeqStop(1)); } - - + + TSeqPos from = (avRef->GetSeqStart(1)> avRef->GetSeqStop(1)) ? avRef->GetSeqStop(1) : avRef->GetSeqStart(1); - TSeqPos to = (avRef->GetSeqStart(1)> avRef->GetSeqStop(1)) ? avRef->GetSeqStart(1) : avRef->GetSeqStop(1); + TSeqPos to = (avRef->GetSeqStart(1)> avRef->GetSeqStop(1)) ? avRef->GetSeqStart(1) : avRef->GetSeqStop(1); if(first) { alnLinksParam->subjRange = new CRange(from,to); - alnLinksParam->flip = avRef->StrandSign(0) != avRef->StrandSign(1); + alnLinksParam->flip = avRef->StrandSign(0) != avRef->StrandSign(1); } else{ TSeqPos currFrom = alnLinksParam->subjRange->GetFrom(); TSeqPos currTo = alnLinksParam->subjRange->GetTo(); alnLinksParam->subjRange->SetFrom(min(from,currFrom)); - alnLinksParam->subjRange->SetTo(max(to,currTo)); - } + alnLinksParam->subjRange->SetTo(max(to,currTo)); + } + - if (m_AlignOption & eHtml && m_AlignOption & eShowBlastInfo) { alnLinksParam->hspNumber = (!first) ? alnLinksParam->hspNumber + 1 : 1; } - - if(first){ - m_AlnLinksParams.insert(map::value_type(idString,*alnLinksParam)); + + if(first){ + m_AlnLinksParams.insert(map::value_type(idString,*alnLinksParam)); } } @@ -4623,24 +4624,24 @@ void CDisplaySeqalign::x_PreProcessSingleAlign(CSeq_align_set::Tdata::const_iter string toolUrl; if(multipleSeqs && (m_AlignOption & eHtml)) { //actually this is needed for long sequences only - toolUrl = m_Reg->Get(m_BlastType, "TOOL_URL"); + toolUrl = m_Reg->Get(m_BlastType, "TOOL_URL"); } - + string idString, prevIdString; - for (CSeq_align_set::Tdata::const_iterator - iter = currSeqAlignIter; + for (CSeq_align_set::Tdata::const_iterator + iter = currSeqAlignIter; iter != actual_aln_list.Get().end();iter++) { subid = &((*iter)->GetSeq_id(1)); idString = subid->GetSeqIdString(); if(prevIdString.empty() || prevIdString == idString) { - x_CalcUrlLinksParams(**iter,idString,toolUrl);//sets m_AlnLinksParams->segs,hspNum, subjRange - } + x_CalcUrlLinksParams(**iter,idString,toolUrl);//sets m_AlnLinksParams->segs,hspNum, subjRange + } else { - break; + break; } - prevIdString = idString; - } + prevIdString = idString; + } } @@ -4651,65 +4652,65 @@ void CDisplaySeqalign::DisplayPairwiseSeqalign(CNcbiOstream& out,unordered_set < CSeq_align_set actual_aln_list; //Not sure we need this - check with Jean - CAlignFormatUtil::ExtractSeqalignSetFromDiscSegs(actual_aln_list, + CAlignFormatUtil::ExtractSeqalignSetFromDiscSegs(actual_aln_list, *m_SeqalignSetRef); if (actual_aln_list.Get().empty()){ return; } //scope for feature fetching //sets m_featScope, m_CanRetrieveSeq,m_DynamicFeature - x_InitAlignParams(actual_aln_list); + x_InitAlignParams(actual_aln_list); CConstRef previousId, subid; - + int idCount = 0; m_currAlignHsp = 0; - bool showBlastDefline = false; - for (CSeq_align_set::Tdata::const_iterator - iter = actual_aln_list.Get().begin(); + bool showBlastDefline = false; + for (CSeq_align_set::Tdata::const_iterator + iter = actual_aln_list.Get().begin(); iter != actual_aln_list.Get().end();iter++) { - subid = &((*iter)->GetSeq_id(1)); - + subid = &((*iter)->GetSeq_id(1)); + string currID; - if(subid->Which() == CSeq_id::e_Gi) { - TGi currGi = subid->GetGi(); + if(subid->Which() == CSeq_id::e_Gi) { + TGi currGi = subid->GetGi(); currID = NStr::NumericToString(currGi); } - else { - subid->GetLabel(&currID, CSeq_id::eContent); + else { + subid->GetLabel(&currID, CSeq_id::eContent); } idsIter = selectedIDs.find(currID); - //seqid from seqalign not found in input seq list + //seqid from seqalign not found in input seq list if(idsIter == selectedIDs.end() && idCount < (int)selectedIDs.size()) continue; - if(idsIter == selectedIDs.end() && idCount >= (int)selectedIDs.size()) break; - + if(idsIter == selectedIDs.end() && idCount >= (int)selectedIDs.size()) break; + //reach here if currID from seqalign found in selectedIDs list if(previousId.Empty() || !subid->Match(*previousId)){ idCount++; - - + + //Calculates m_HSPNum for showing sorting links //If getSegs = true calculates m_segs for showing download chicklet for large seqs x_PreProcessSingleAlign(iter,actual_aln_list,selectedIDs.size() > 1); showBlastDefline = true; - + } else { showBlastDefline = false; } - if(!previousId.Empty() && + if(!previousId.Empty() && !subid->Match(*previousId)){ - m_Scope.RemoveFromHistory(m_Scope.GetBioseqHandle(*previousId)); //release memory + m_Scope.RemoveFromHistory(m_Scope.GetBioseqHandle(*previousId)); //release memory } previousId = subid; //make alnvector CRef avRef = x_GetAlnVecForSeqalign(**iter); - + if(!(avRef.Empty())){ //Note: do not switch the set order per calnvec specs. avRef->SetGenCode(m_SlaveGeneticCode); @@ -4719,27 +4720,27 @@ void CDisplaySeqalign::DisplayPairwiseSeqalign(CNcbiOstream& out,unordered_set < if(handle){ //save the current alnment regardless CRef alnvecInfo(new SAlnInfo); - + int num_ident; - CAlignFormatUtil::GetAlnScores(**iter, - alnvecInfo->score, - alnvecInfo->bits, - alnvecInfo->evalue, - alnvecInfo->sum_n, + CAlignFormatUtil::GetAlnScores(**iter, + alnvecInfo->score, + alnvecInfo->bits, + alnvecInfo->evalue, + alnvecInfo->sum_n, num_ident, alnvecInfo->use_this_seqid, alnvecInfo->comp_adj_method); - + alnvecInfo->alnvec = avRef; - - x_DisplayAlnvecInfo(out,alnvecInfo,showBlastDefline); - } + + x_DisplayAlnvecInfo(out,alnvecInfo,showBlastDefline); + } } catch (const CException&){ out << "Sequence with id " - << (avRef->GetSeqId(1)).GetSeqIdString().c_str() - <<" no longer exists in database...alignment skipped\n"; + << (avRef->GetSeqId(1)).GetSeqIdString().c_str() + <<" no longer exists in database...alignment skipped\n"; } - } + } } } diff --git a/c++/src/objtools/align_format/showdefline.cpp b/c++/src/objtools/align_format/showdefline.cpp index 120c85b6..e9675302 100644 --- a/c++/src/objtools/align_format/showdefline.cpp +++ b/c++/src/objtools/align_format/showdefline.cpp @@ -1,4 +1,4 @@ -/* $Id: showdefline.cpp 557027 2018-02-08 15:51:06Z zaretska $ +/* $Id: showdefline.cpp 577752 2019-01-08 18:07:51Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -74,7 +74,7 @@ static string kTwoSpaceMargin = " "; static const string kHeader = "Sequences producing significant alignments:"; static const string kScore = "Score"; static const string kE = "E"; -static const string kBits = +static const string kBits = (getenv("CTOOLKIT_COMPATIBLE") ? "(bits)" : "(Bits)"); static const string kEvalue = "E value"; static const string kValue = "Value"; @@ -92,6 +92,12 @@ static const string kQuery = "Query"; static const string kCoverage = "Query coverage"; static const string kEllipsis = "..."; +static const string kIdent = "Max"; +static const string kIdentLine2 = "ident"; +static const string kTotalLine2 = "score"; +static const string kQueryCov = "Query"; +static const string kQueryCovLine2 = "cover"; + //psiblast related static const string kPsiblastNewSeqGif = "\"New"; @@ -117,7 +123,7 @@ ecked_GI\" VALUE=\"%d\"> "; //Max length of title string for the the link static const int kMaxDescrLength = 4096; -string +string CShowBlastDefline::GetSeqIdListString(const list >& id, bool show_gi) { @@ -139,42 +145,42 @@ CShowBlastDefline::GetSeqIdListString(const list >& id, if (best_id.NotEmpty() && !best_id->IsGi() ) { if (found_gi) id_string += "|"; - + if (best_id->IsLocal()) { string id_token; best_id->GetLabel(&id_token, CSeq_id::eContent, 0); id_string += id_token; } - else + else id_string += best_id->AsFastaString(); } return id_string; } -void +void CShowBlastDefline::GetSeqIdList(const objects::CBioseq_Handle& bh, list >& ids) { ids.clear(); - + vector< CConstRef > original_seqids; - + ITERATE(CBioseq_Handle::TId, itr, bh.GetId()) { - original_seqids.push_back(itr->GetSeqId()); - } + original_seqids.push_back(itr->GetSeqId()); + } // Check for ids of type "gnl|BL_ORD_ID". These are the artificial ids // created in a BLAST database when it is formatted without indexing. - // For such ids, create new fake local Seq-ids, saving the first token of + // For such ids, create new fake local Seq-ids, saving the first token of // the Bioseq's title, if it's available. GetSeqIdList(bh,original_seqids,ids); } -void +void CShowBlastDefline::GetSeqIdList(const objects::CBioseq_Handle& bh, vector< CConstRef > &original_seqids, list >& ids) @@ -183,18 +189,18 @@ CShowBlastDefline::GetSeqIdList(const objects::CBioseq_Handle& bh, ITERATE(vector< CConstRef >, itr, original_seqids) { CRef next_seqid(new CSeq_id()); string id_token = NcbiEmptyString; - + if (((*itr)->IsGeneral() && - (*itr)->AsFastaString().find("gnl|BL_ORD_ID") + (*itr)->AsFastaString().find("gnl|BL_ORD_ID") != string::npos) || (*itr)->AsFastaString().find("lcl|Subject_") != string::npos) { vector title_tokens; - id_token = + id_token = NStr::Split(sequence::CDeflineGenerator().GenerateDefline(bh), " ", title_tokens)[0]; } if (id_token != NcbiEmptyString) { // Create a new local id with a label containing the extracted - // token and save it in the next_seqid instead of the original + // token and save it in the next_seqid instead of the original // id. CObject_id* obj_id = new CObject_id(); obj_id->SetStr(id_token); @@ -209,22 +215,22 @@ CShowBlastDefline::GetSeqIdList(const objects::CBioseq_Handle& bh, void CShowBlastDefline::GetBioseqHandleDeflineAndId(const CBioseq_Handle& handle, list& use_this_gi, - string& seqid, string& defline, + string& seqid, string& defline, bool show_gi /* = true */, TGi this_gi_first /* = -1 */) { // Retrieve the CBlast_def_line_set object and save in a CRef, preventing // its destruction; then extract the list of CBlast_def_line objects. if( !handle ) return; // No bioseq for this handle ( deleted accession ? ) - CRef bdlRef = - CSeqDB::ExtractBlastDefline(handle); + CRef bdlRef = + CSeqDB::ExtractBlastDefline(handle); - if(bdlRef.Empty()){ + if(bdlRef.Empty()){ list > ids; GetSeqIdList(handle, ids); seqid = GetSeqIdListString(ids, show_gi); defline = sequence::CDeflineGenerator().GenerateDefline(handle); - } else { + } else { bdlRef->PutTargetGiFirst(this_gi_first); const list< CRef< CBlast_def_line > >& bdl = bdlRef->Get(); bool is_first = true; @@ -234,7 +240,7 @@ CShowBlastDefline::GetBioseqHandleDeflineAndId(const CBioseq_Handle& handle, TGi gi_in_use_this_gi = ZERO_GI; ITERATE(list, iter_gi, use_this_gi){ if(cur_gi == *iter_gi){ - gi_in_use_this_gi = *iter_gi; + gi_in_use_this_gi = *iter_gi; break; } } @@ -247,15 +253,15 @@ CShowBlastDefline::GetBioseqHandleDeflineAndId(const CBioseq_Handle& handle, defline = (*iter)->GetTitle(); } else { string concat_acc; - CConstRef wid = + CConstRef wid = FindBestChoice(cur_id, CSeq_id::WorstRank); wid->GetLabel(&concat_acc, CSeq_id::eFasta, 0); if( show_gi && cur_gi > ZERO_GI){ - defline = defline + " >" + "gi|" + - NStr::NumericToString(cur_gi) + "|" + + defline = defline + " >" + "gi|" + + NStr::NumericToString(cur_gi) + "|" + concat_acc + " " + (*iter)->GetTitle(); } else { - defline = defline + " >" + concat_acc + " " + + defline = defline + " >" + concat_acc + " " + (*iter)->GetTitle(); } } @@ -269,19 +275,19 @@ CShowBlastDefline::GetBioseqHandleDeflineAndId(const CBioseq_Handle& handle, static void s_LimitDescrLength(string &descr) { if(descr.length() > kMaxDescrLength) { - descr = descr.substr(0,kMaxDescrLength); + descr = descr.substr(0,kMaxDescrLength); size_t end = NStr::Find(descr," ",NStr::eNocase,NStr::eReverseSearch); - - if(end != NPOS) { - descr = descr.substr(0,end); + + if(end != NPOS) { + descr = descr.substr(0,end); descr += "..."; - } - } - + } + } + } void CShowBlastDefline::x_InitLinkOutInfo(SDeflineInfo* sdl, - CBioseq::TId& cur_id, + CBioseq::TId& cur_id, int blast_rank, bool getIdentProteins) { @@ -291,10 +297,10 @@ void CShowBlastDefline::x_InitLinkOutInfo(SDeflineInfo* sdl, string linkout_list; - - sdl->linkout = CAlignFormatUtil::GetSeqLinkoutInfo(cur_id, - &m_LinkoutDB, - m_MapViewerBuildName, + + sdl->linkout = CAlignFormatUtil::GetSeqLinkoutInfo(cur_id, + &m_LinkoutDB, + m_MapViewerBuildName, sdl->gi); if(!m_LinkoutDB) { m_Option &= ~eLinkout; @@ -304,7 +310,7 @@ void CShowBlastDefline::x_InitLinkOutInfo(SDeflineInfo* sdl, if(m_LinkoutOrder.empty()) { m_ConfigFile.reset(new CNcbiIfstream(".ncbirc")); - m_Reg.reset(new CNcbiRegistry(*m_ConfigFile)); + m_Reg.reset(new CNcbiRegistry(*m_ConfigFile)); if(!m_BlastType.empty()) m_LinkoutOrder = m_Reg->Get(m_BlastType,"LINKOUT_ORDER"); m_LinkoutOrder = (!m_LinkoutOrder.empty()) ? m_LinkoutOrder : kLinkoutOrderStr; } @@ -312,32 +318,32 @@ void CShowBlastDefline::x_InitLinkOutInfo(SDeflineInfo* sdl, if(m_Option & eRealtedInfoLinks){ string user_url = m_Reg.get() ? m_Reg->Get(m_BlastType, "TOOL_URL") : kEmptyStr; sdl->linkout_list = CAlignFormatUtil::GetFullLinkoutUrl(cur_id, - m_Rid, - m_CddRid, - m_EntrezTerm, + m_Rid, + m_CddRid, + m_EntrezTerm, m_IsDbNa, false, - true, + true, blast_rank, m_LinkoutOrder, sdl->taxid, m_Database, - m_QueryNumber, + m_QueryNumber, user_url, m_PreComputedResID, - m_LinkoutDB, + m_LinkoutDB, m_MapViewerBuildName, getIdentProteins); } else { sdl->linkout_list = CAlignFormatUtil::GetLinkoutUrl(sdl->linkout, - cur_id, - m_Rid, - m_CddRid, - m_EntrezTerm, + cur_id, + m_Rid, + m_CddRid, + m_EntrezTerm, m_IsDbNa, - ZERO_GI, - true, + ZERO_GI, + true, false, blast_rank, m_PreComputedResID); @@ -356,24 +362,24 @@ void CShowBlastDefline::x_FillDeflineAndId(const CBioseq_Handle& handle, const CRef bdlRef = CSeqDB::ExtractBlastDefline(handle); const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get(); - - CRef wid; + + CRef wid; sdl->defline = NcbiEmptyString; - + sdl->gi = ZERO_GI; sdl->id_url = NcbiEmptyString; - sdl->score_url = NcbiEmptyString; + sdl->score_url = NcbiEmptyString; sdl->linkout = 0; sdl->is_new = false; sdl->was_checked = false; - sdl->taxid = 0; + sdl->taxid = 0; //get psiblast stuff - + if(m_SeqStatus){ string aln_id_str; aln_id.GetLabel(&aln_id_str, CSeq_id::eContent); PsiblastSeqStatus seq_status = eUnknown; - + TIdString2SeqStatus::const_iterator itr = m_SeqStatus->find(aln_id_str); if ( itr != m_SeqStatus->end() ){ seq_status = itr->second; @@ -386,34 +392,34 @@ void CShowBlastDefline::x_FillDeflineAndId(const CBioseq_Handle& handle, } if(seq_status & eCheckedSeq){ sdl->was_checked = true; - } + } } - } - //get id (sdl->id, sdl-gi) + } + //get id (sdl->id, sdl-gi) sdl->id = CAlignFormatUtil::GetDisplayIds(handle,aln_id,use_this_seqid,&sdl->gi,&sdl->taxid,&sdl->textSeqID); sdl->alnIDFasta = aln_id.AsFastaString(); //get linkout**** - if((m_Option & eLinkout)){ + if((m_Option & eLinkout)){ bool getIdentProteins = !m_IsDbNa && bdl.size() > 1; for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin(); iter != bdl.end(); iter++){ CBioseq::TId& cur_id = (CBioseq::TId &)(*iter)->GetSeqid(); - TGi cur_gi = FindGi(cur_id); + TGi cur_gi = FindGi(cur_id); bool match = false; if(!use_this_seqid.empty()){ wid = FindBestChoice(cur_id, CSeq_id::WorstRank); match = CAlignFormatUtil::MatchSeqInSeqList(cur_gi, wid, use_this_seqid); } - if((use_this_seqid.empty() && sdl->gi == cur_gi) || match) { + if((use_this_seqid.empty() && sdl->gi == cur_gi) || match) { x_InitLinkOutInfo(sdl,cur_id,blast_rank,getIdentProteins); //only initialized if !(m_DeflineTemplates->advancedView && !is_mixed_database) break; - } + } } } - + //get score and id url - if(m_Option & eHtml){ + if(m_Option & eHtml){ bool useTemplates = m_DeflineTemplates != NULL; bool advancedView = (m_DeflineTemplates != NULL) ? m_DeflineTemplates->advancedView : false; string accession; @@ -422,19 +428,19 @@ void CShowBlastDefline::x_FillDeflineAndId(const CBioseq_Handle& handle, if (!useTemplates && m_PositionIndex >= 0) { sdl->score_url += "_" + NStr::IntToString(m_PositionIndex) + "_"; } - sdl->score_url += sdl->gi == ZERO_GI ? accession : + sdl->score_url += sdl->gi == ZERO_GI ? accession : NStr::NumericToString(sdl->gi); sdl->score_url += !useTemplates ? ">" : ""; - string user_url = m_Reg.get() ? m_Reg->Get(m_BlastType, "TOOL_URL") : kEmptyStr; + string user_url = m_Reg.get() ? m_Reg->Get(m_BlastType, "TOOL_URL") : kEmptyStr; //blast_rank = num_align + 1 CRange seqRange = ((int)m_ScoreList.size() >= blast_rank)? m_ScoreList[blast_rank - 1]->subjRange : CRange(0,0); - bool flip = ((int)m_ScoreList.size() >= blast_rank) ? m_ScoreList[blast_rank - 1]->flip : false; + bool flip = ((int)m_ScoreList.size() >= blast_rank) ? m_ScoreList[blast_rank - 1]->flip : false; CAlignFormatUtil::SSeqURLInfo seqUrlInfo(user_url,m_BlastType,m_IsDbNa,m_Database,m_Rid, m_QueryNumber,sdl->gi, accession, 0, //linkout = 0, not used any more - blast_rank,false,(m_Option & eNewTargetWindow) ? true : false,seqRange,flip); + blast_rank,false,(m_Option & eNewTargetWindow) ? true : false,seqRange,flip); seqUrlInfo.resourcesUrl = m_Reg.get() ? m_Reg->Get(m_BlastType, "RESOURCE_URL") : kEmptyStr; - seqUrlInfo.useTemplates = useTemplates; + seqUrlInfo.useTemplates = useTemplates; seqUrlInfo.advancedView = advancedView; if(sdl->id->Which() == CSeq_id::e_Local && (m_Option & eHtml)){ @@ -446,21 +452,21 @@ void CShowBlastDefline::x_FillDeflineAndId(const CBioseq_Handle& handle, } } } - sdl->id_url = CAlignFormatUtil::GetIDUrl(&seqUrlInfo,aln_id,*m_ScopeRef); + sdl->id_url = CAlignFormatUtil::GetIDUrl(&seqUrlInfo,aln_id,*m_ScopeRef); } //get defline sdl->defline = CDeflineGenerator().GenerateDefline(m_ScopeRef->GetBioseqHandle(*(sdl->id))); sdl->fullDefline = sdl->defline; - if (!(bdl.empty())) { + if (!(bdl.empty())) { for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin(); iter != bdl.end(); iter++){ const CBioseq::TId& cur_id = (*iter)->GetSeqid(); TGi cur_gi = FindGi(cur_id); wid = FindBestChoice(cur_id, CSeq_id::WorstRank); - bool match = CAlignFormatUtil::MatchSeqInSeqList(cur_gi, wid, use_this_seqid); + bool match = CAlignFormatUtil::MatchSeqInSeqList(cur_gi, wid, use_this_seqid); if(use_this_seqid.empty() || match) { - + if((*iter)->IsSetTitle()){ bool id_used_already = false; ITERATE(CBioseq::TId, iter_id, cur_id) { @@ -474,15 +480,15 @@ void CShowBlastDefline::x_FillDeflineAndId(const CBioseq_Handle& handle, wid = FindBestChoice(cur_id, CSeq_id::WorstRank); wid->GetLabel(&concat_acc, CSeq_id::eFasta, 0); if( (m_Option & eShowGi) && cur_gi > ZERO_GI){ - sdl->fullDefline = sdl->fullDefline + " >" + "gi|" + - NStr::NumericToString(cur_gi) + "|" + + sdl->fullDefline = sdl->fullDefline + " >" + "gi|" + + NStr::NumericToString(cur_gi) + "|" + concat_acc + " " + (*iter)->GetTitle(); } else { sdl->fullDefline = sdl->fullDefline + " >" + concat_acc + - " " + + " " + (*iter)->GetTitle(); } - if(sdl->fullDefline.length() > kMaxDescrLength) { + if(sdl->fullDefline.length() > kMaxDescrLength) { break; } } @@ -499,8 +505,8 @@ CShowBlastDefline::CShowBlastDefline(const CSeq_align_set& seqalign, size_t num_defline_to_show, bool translated_nuc_alignment, CRange* master_range): - - m_AlnSetRef(&seqalign), + + m_AlnSetRef(&seqalign), m_ScopeRef(&scope), m_LineLen(line_length), m_NumToShow(num_defline_to_show), @@ -510,7 +516,7 @@ CShowBlastDefline::CShowBlastDefline(const CSeq_align_set& seqalign, m_MasterRange(master_range), m_LinkoutDB(NULL) { - + m_Option = 0; m_EntrezTerm = NcbiEmptyString; m_QueryNumber = 0; @@ -534,19 +540,24 @@ CShowBlastDefline::CShowBlastDefline(const CSeq_align_set& seqalign, } CShowBlastDefline::~CShowBlastDefline() -{ +{ ITERATE(vector, iter, m_ScoreList){ delete *iter; } + + ITERATE(vector, iter, m_SdlFormatInfoVec){ + delete *iter; + } + } void CShowBlastDefline::Init(void) { if (m_DeflineTemplates != NULL) { - x_InitDeflineTable(); + x_InitDeflineTable(); } - else { + else { x_InitDefline(); } } @@ -554,10 +565,10 @@ void CShowBlastDefline::Init(void) void CShowBlastDefline::Display(CNcbiOstream & out) { - if (m_DeflineTemplates != NULL) { - x_DisplayDeflineTableTemplate(out); + if (m_DeflineTemplates != NULL) { + x_DisplayDeflineTableTemplate(out); } - else { + else { x_DisplayDefline(out); } } @@ -571,7 +582,7 @@ bool CShowBlastDefline::x_CheckForStructureLink() ITERATE(vector, iter, m_ScoreList) { const CBioseq_Handle& handle = m_ScopeRef->GetBioseqHandle(*(*iter)->id); if( !handle ) continue; // invalid handle. - const CRef bdlRef = CSeqDB::ExtractBlastDefline(handle); + const CRef bdlRef = CSeqDB::ExtractBlastDefline(handle); const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get(); for(list< CRef< CBlast_def_line > >::const_iterator bdl_iter = bdl.begin(); bdl_iter != bdl.end() && struct_linkout == false; bdl_iter++){ @@ -600,9 +611,9 @@ bool CShowBlastDefline::x_CheckForStructureLink() //bool m_StructureLinkout void CShowBlastDefline::x_InitDefline(void) { - /*Note we can't just show each alnment as we go because we will + /*Note we can't just show each alnment as we go because we will need to show defline only once for all hsp's with the same id*/ - + bool is_first_aln = true; size_t num_align = 0; CConstRef previous_id, subid; @@ -610,19 +621,24 @@ void CShowBlastDefline::x_InitDefline(void) m_MaxScoreLen = kBits.size(); m_MaxEvalueLen = kValue.size(); m_MaxSumNLen =1; + + m_MaxPercentIdentityLen = kIdentity.size(); + m_MaxQueryCoverLen = kCoverage.size(); + m_MaxTotalScoreLen = kTotal.size(); + if(m_Option & eHtml){ m_ConfigFile.reset(new CNcbiIfstream(".ncbirc")); - m_Reg.reset(new CNcbiRegistry(*m_ConfigFile)); + m_Reg.reset(new CNcbiRegistry(*m_ConfigFile)); } bool master_is_na = false; //prepare defline int ialn = 0; - for (CSeq_align_set::Tdata::const_iterator + for (CSeq_align_set::Tdata::const_iterator iter = m_AlnSetRef->Get().begin(); - iter != m_AlnSetRef->Get().end() && num_align < m_NumToShow; + iter != m_AlnSetRef->Get().end() && num_align < m_NumToShow; iter++, ialn++){ if (ialn < m_SkipTo && ialn >= m_SkipFrom) continue; @@ -634,7 +650,7 @@ void CShowBlastDefline::x_InitDefline(void) } subid = &((*iter)->GetSeq_id(1)); if(is_first_aln || (!is_first_aln && !subid->Match(*previous_id))) { - SScoreInfo* sci = x_GetScoreInfo(**iter, num_align); + SScoreInfo* sci = x_GetScoreInfo(**iter, num_align); if(sci){ m_ScoreList.push_back(sci); if(m_MaxScoreLen < sci->bit_string.size()){ @@ -644,6 +660,14 @@ void CShowBlastDefline::x_InitDefline(void) m_MaxEvalueLen = sci->evalue_string.size(); } + if(m_MaxTotalScoreLen < sci->total_bit_string.size()){ + m_MaxTotalScoreLen = sci->total_bit_string.size(); + } + int percent_identity = CAlignFormatUtil::GetPercentMatch(sci->match,sci->align_length); + if(m_MaxPercentIdentityLen < NStr::IntToString(percent_identity).size()) { + m_MaxPercentIdentityLen = NStr::IntToString(percent_identity).size(); + } + if( m_MaxSumNLen < NStr::IntToString(sci->sum_n).size()){ m_MaxSumNLen = NStr::IntToString(sci->sum_n).size(); } @@ -652,10 +676,10 @@ void CShowBlastDefline::x_InitDefline(void) } is_first_aln = false; previous_id = subid; - + } - + if((m_Option & eLinkout) && (m_Option & eHtml) && !m_IsDbNa && !master_is_na) m_StructureLinkout = x_CheckForStructureLink(); } @@ -683,8 +707,29 @@ void CShowBlastDefline::x_DisplayDefline(CNcbiOstream & out) out << kScore; CAlignFormatUtil::AddSpace(out, m_MaxScoreLen - kScore.size()); CAlignFormatUtil::AddSpace(out, kTwoSpaceMargin.size()); + + + if (m_Option & eShowTotalScore) { + out << kTotal; + CAlignFormatUtil::AddSpace(out, m_MaxTotalScoreLen - kTotal.size()); + CAlignFormatUtil::AddSpace(out, kTwoSpaceMargin.size()); + } + if (m_Option & eShowQueryCoverage) { + out << kQueryCov; + CAlignFormatUtil::AddSpace(out, kTwoSpaceMargin.size()); + } + CAlignFormatUtil::AddSpace(out, 2); //E align to l of value - out << kE; + out << kE; + + if (m_Option & eShowPercentIdent) { + CAlignFormatUtil::AddSpace(out, m_MaxEvalueLen - kValue.size()); + CAlignFormatUtil::AddSpace(out, kTwoSpaceMargin.size()); + CAlignFormatUtil::AddSpace(out, kTwoSpaceMargin.size()); + CAlignFormatUtil::AddSpace(out, kOneSpaceMargin.size()); + out << kIdent;//"Max" - "ident" -second line + } + out << "\n"; out << kHeader; if(m_Option & eHtml){ @@ -701,14 +746,33 @@ void CShowBlastDefline::x_DisplayDefline(CNcbiOstream & out) CAlignFormatUtil::AddSpace(out, kOneSpaceMargin.size()); out << kBits; //in case m_MaxScoreLen > kBits.size() - CAlignFormatUtil::AddSpace(out, m_MaxScoreLen - kBits.size()); + CAlignFormatUtil::AddSpace(out, m_MaxScoreLen - kBits.size()); CAlignFormatUtil::AddSpace(out, kTwoSpaceMargin.size()); + + + if (m_Option & eShowTotalScore) { + CAlignFormatUtil::AddSpace(out, kOneSpaceMargin.size()); + out << kTotalLine2;//"score" + CAlignFormatUtil::AddSpace(out, m_MaxTotalScoreLen - kTotal.size()); + CAlignFormatUtil::AddSpace(out, kTwoSpaceMargin.size()); + } + if (m_Option & eShowQueryCoverage) { + out << kQueryCovLine2;//"cov" + CAlignFormatUtil::AddSpace(out, kTwoSpaceMargin.size()); + CAlignFormatUtil::AddSpace(out, kOneSpaceMargin.size()); + } + out << kValue; - if(m_Option & eShowSumN){ + if((m_Option & eShowSumN) || (m_Option & eShowPercentIdent)){ CAlignFormatUtil::AddSpace(out, m_MaxEvalueLen - kValue.size()); - CAlignFormatUtil::AddSpace(out, kTwoSpaceMargin.size()); + CAlignFormatUtil::AddSpace(out, kTwoSpaceMargin.size()); + } + if(m_Option & eShowSumN){ out << kN; } + if (m_Option & eShowPercentIdent) { + out << kIdentLine2;//"ident" + } out << "\n"; } if(m_PsiblastStatus == eRepeatPass){ @@ -719,34 +783,34 @@ void CShowBlastDefline::x_DisplayDefline(CNcbiOstream & out) } out << "\n"; } - + bool first_new =true; ITERATE(vector, iter, m_ScoreList){ SDeflineInfo* sdl = x_GetDeflineInfo((*iter)->id, (*iter)->use_this_seqid, (*iter)->blast_rank); size_t line_length = 0; string line_component; if ((m_Option & eHtml) && (sdl->gi > ZERO_GI)){ - if((m_Option & eShowNewSeqGif)) { + if((m_Option & eShowNewSeqGif)) { if (sdl->is_new) { if (first_new) { first_new = false; out << kPsiblastEvalueLink; } out << kPsiblastNewSeqGif; - + } else { out << kPsiblastNewSeqBackgroundGif; } if (sdl->was_checked) { out << kPsiblastCheckedGif; - + } else { out << kPsiblastCheckedBackgroundGif; } } char buf[256]; if((m_Option & eCheckboxChecked)){ - sprintf(buf, kPsiblastCheckboxChecked.c_str(), sdl->gi, + sprintf(buf, kPsiblastCheckboxChecked.c_str(), sdl->gi, sdl->gi); out << buf; } else if (m_Option & eCheckbox) { @@ -754,8 +818,8 @@ void CShowBlastDefline::x_DisplayDefline(CNcbiOstream & out) out << buf; } } - - + + if((m_Option & eHtml) && (sdl->id_url != NcbiEmptyString)) { out << sdl->id_url; } @@ -767,14 +831,14 @@ void CShowBlastDefline::x_DisplayDefline(CNcbiOstream & out) } } if(!sdl->id.Empty()){ - if(!(sdl->id->AsFastaString().find("gnl|BL_ORD_ID") != string::npos || + if(!(sdl->id->AsFastaString().find("gnl|BL_ORD_ID") != string::npos || sdl->id->AsFastaString().find("lcl|Subject_") != string::npos)){ string idStr; if (use_long_seqids || ((m_Option & eShowGi) && !sdl->id->IsGi())) { idStr = sdl->id->AsFastaString(); } else { - idStr = CAlignFormatUtil::GetBareId(*sdl->id); + idStr = CAlignFormatUtil::GetBareId(*sdl->id) + " "; } if (strncmp(idStr.c_str(), "lcl|", 4) == 0) { idStr = sdl->id->AsFastaString().substr(4); @@ -785,42 +849,66 @@ void CShowBlastDefline::x_DisplayDefline(CNcbiOstream & out) } if((m_Option & eHtml) && (sdl->id_url != NcbiEmptyString)) { out << ""; - } - line_component = " " + sdl->defline; + } + line_component = (line_component.empty() ? "" : " ") + sdl->defline; string actual_line_component; if(line_component.size()+line_length > m_LineLen){ - actual_line_component = line_component.substr(0, m_LineLen - + actual_line_component = line_component.substr(0, m_LineLen - line_length - 3); actual_line_component += kEllipsis; } else { - actual_line_component = line_component.substr(0, m_LineLen - + actual_line_component = line_component.substr(0, m_LineLen - line_length); } if (m_Option & eHtml) { out << CHTMLHelper::HTMLEncode(actual_line_component); } else { - out << actual_line_component; + out << actual_line_component; } line_length += actual_line_component.size(); //pad the short lines CAlignFormatUtil::AddSpace(out, m_LineLen - line_length); out << kTwoSpaceMargin; - + if((m_Option & eHtml) && (sdl->score_url != NcbiEmptyString)) { out << sdl->score_url; } out << (*iter)->bit_string; if((m_Option & eHtml) && (sdl->score_url != NcbiEmptyString)) { out << ""; - } + } CAlignFormatUtil::AddSpace(out, m_MaxScoreLen - (*iter)->bit_string.size()); + if (m_Option & eShowTotalScore) { + out << kTwoSpaceMargin << kOneSpaceMargin << (*iter)->total_bit_string; + CAlignFormatUtil::AddSpace(out, m_MaxTotalScoreLen - + (*iter)->total_bit_string.size()); + } + + if (m_Option & eShowQueryCoverage) { + //int percent_coverage = 100*(*iter)->master_covered_length/m_QueryLength; + int percent_coverage = (*iter)->percent_coverage; + + out << kTwoSpaceMargin << percent_coverage << "%"; + //minus one due to % sign + CAlignFormatUtil::AddSpace(out, kQueryCov.size() - + NStr::IntToString(percent_coverage).size() - 1); + } + + out << kTwoSpaceMargin << (*iter)->evalue_string; CAlignFormatUtil::AddSpace(out, m_MaxEvalueLen - (*iter)->evalue_string.size()); + if(m_Option & eShowSumN){ out << kTwoSpaceMargin << (*iter)->sum_n; CAlignFormatUtil::AddSpace(out, m_MaxSumNLen - NStr::IntToString((*iter)->sum_n).size()); } + if(m_Option & eShowPercentIdent){ + int percent_identity =(*iter)->percent_identity; + out << kTwoSpaceMargin << percent_identity <<"%"; + CAlignFormatUtil::AddSpace(out, m_MaxPercentIdentityLen - + NStr::IntToString(percent_identity).size()); + } if((m_Option & eLinkout) && (m_Option & eHtml)){ bool is_first = true; ITERATE(list, iter_linkout, sdl->linkout_list){ @@ -838,61 +926,61 @@ void CShowBlastDefline::x_DisplayDefline(CNcbiOstream & out) void CShowBlastDefline::DisplayBlastDefline(CNcbiOstream & out) { - x_InitDefline(); - if(m_StructureLinkout){ + x_InitDeflineTable(); + if(m_StructureLinkout){ char buf[512]; - string mapCDDParams = (NStr::Find(m_CddRid,"data_cache") != NPOS) ? "" : "blast_CD_RID=" + m_CddRid; + string mapCDDParams = (NStr::Find(m_CddRid,"data_cache") != NPOS) ? "" : "blast_CD_RID=" + m_CddRid; sprintf(buf, kStructure_Overview, m_Rid.c_str(), 0, 0, mapCDDParams.c_str(), "overview", m_EntrezTerm == NcbiEmptyString ? "none": m_EntrezTerm.c_str()); - out << buf <<"\n\n"; + out << buf <<"\n\n"; } - x_DisplayDefline(out); + x_DisplayDefline(out); } static void s_DisplayDescrColumnHeader(CNcbiOstream & out, int currDisplaySort, - string query_buf, + string query_buf, int columnDisplSort, int columnHspSort, string columnText, int max_data_len, bool html) - + { - if (html) { + if (html) { if(currDisplaySort == columnDisplSort) { - out << ""; + out << ""; } else { out << ""; - } - + } + out << ""; - + } out << columnText; - if (html) { + if (html) { out << "\n"; } else { CAlignFormatUtil::AddSpace(out, max_data_len - columnText.size()); CAlignFormatUtil::AddSpace(out, kTwoSpaceMargin.size()); } - + } void CShowBlastDefline::x_InitDeflineTable(void) { - /*Note we can't just show each alnment as we go because we will + /*Note we can't just show each alnment as we go because we will need to show defline only once for all hsp's with the same id*/ - + bool is_first_aln = true; size_t num_align = 0; CConstRef previous_id, subid; @@ -902,11 +990,12 @@ void CShowBlastDefline::x_InitDeflineTable(void) m_MaxTotalScoreLen = kTotal.size(); m_MaxPercentIdentityLen = kIdentity.size(); int percent_identity = 0; - m_MaxQueryCoverLen = kCoverage.size(); + m_MaxQueryCoverLen = kCoverage.size(); + if(m_Option & eHtml){ m_ConfigFile.reset(new CNcbiIfstream(".ncbirc")); - m_Reg.reset(new CNcbiRegistry(*m_ConfigFile)); + m_Reg.reset(new CNcbiRegistry(*m_ConfigFile)); if(!m_BlastType.empty()) m_LinkoutOrder = m_Reg->Get(m_BlastType,"LINKOUT_ORDER"); m_LinkoutOrder = (!m_LinkoutOrder.empty()) ? m_LinkoutOrder : kLinkoutOrderStr; } @@ -917,14 +1006,14 @@ void CShowBlastDefline::x_InitDeflineTable(void) //prepare defline int ialn = 0; - for (CSeq_align_set::Tdata::const_iterator + for (CSeq_align_set::Tdata::const_iterator iter = m_AlnSetRef->Get().begin(); - iter != m_AlnSetRef->Get().end() && num_align < m_NumToShow; + iter != m_AlnSetRef->Get().end() && num_align < m_NumToShow; iter++, ialn++){ if (ialn < m_SkipTo && ialn >= m_SkipFrom) continue; if (is_first_aln) { - m_QueryLength = m_MasterRange ? + m_QueryLength = m_MasterRange ? m_MasterRange->GetLength() : m_ScopeRef->GetBioseqHandle((*iter)->GetSeq_id(0)).GetBioseqLength(); master_is_na = m_ScopeRef->GetBioseqHandle((*iter)->GetSeq_id(0)). @@ -932,7 +1021,7 @@ void CShowBlastDefline::x_InitDeflineTable(void) } subid = &((*iter)->GetSeq_id(1)); - + // This if statement is working on the last CSeq_align_set, stored in "hit" // This is confusing and the loop should probably be restructured at some point. if(!is_first_aln && !(subid->Match(*previous_id))) { @@ -952,13 +1041,13 @@ void CShowBlastDefline::x_InitDeflineTable(void) if(m_MaxEvalueLen < sci->evalue_string.size()){ m_MaxEvalueLen = sci->evalue_string.size(); } - + if( m_MaxSumNLen < NStr::IntToString(sci->sum_n).size()){ m_MaxSumNLen = NStr::IntToString(sci->sum_n).size(); } hit.Set().clear(); } - + num_align++; // Only increment if new subject ID found. } if (num_align < m_NumToShow) { //no adding if number to show already reached @@ -985,7 +1074,7 @@ void CShowBlastDefline::x_InitDeflineTable(void) if(m_MaxEvalueLen < sci->evalue_string.size()){ m_MaxEvalueLen = sci->evalue_string.size(); } - + if( m_MaxSumNLen < NStr::IntToString(sci->sum_n).size()){ m_MaxSumNLen = NStr::IntToString(sci->sum_n).size(); } @@ -994,12 +1083,12 @@ void CShowBlastDefline::x_InitDeflineTable(void) if((m_Option & eLinkout) && (m_Option & eHtml) && !m_IsDbNa && !master_is_na) m_StructureLinkout = x_CheckForStructureLink(); - + } void CShowBlastDefline::x_DisplayDeflineTable(CNcbiOstream & out) { - //This is max number of columns in the table - later should be probably put in enum DisplayOption + //This is max number of columns in the table - later should be probably put in enum DisplayOption if((m_PsiblastStatus == eFirstPass) || (m_PsiblastStatus == eRepeatPass)){ @@ -1013,41 +1102,41 @@ void CShowBlastDefline::x_DisplayDeflineTable(CNcbiOstream & out) out << kPsiblastCheckedBackgroundGif; } } - //This is done instead of code displaying titles + //This is done instead of code displaying titles if(!(m_Option & eNoShowHeader)) { - + if(m_Option & eHtml){ - + out << ""; } out << kHeader << "\n"; if(m_Option & eHtml){ - out << ""; + out << ""; out << "(Click headers to sort columns)\n"; } } if(m_Option & eHtml){ out << "
" << "" << "\n" << "" << "\n"; - out << "" << "\n" << "" << "\n" << "" << "\n"; + out << "" << "\n" << "" << "\n" << "" << "\n"; } - - string query_buf; + + string query_buf; map< string, string> parameters_to_change; parameters_to_change.insert(map:: value_type("DISPLAY_SORT", "")); parameters_to_change.insert(map:: value_type("HSP_SORT", "")); - CAlignFormatUtil::BuildFormatQueryString(*m_Ctx, + CAlignFormatUtil::BuildFormatQueryString(*m_Ctx, parameters_to_change, query_buf); - + parameters_to_change.clear(); string display_sort_value = m_Ctx->GetRequestValue("DISPLAY_SORT"). GetValue(); - int display_sort = display_sort_value == NcbiEmptyString ? + int display_sort = display_sort_value == NcbiEmptyString ? CAlignFormatUtil::eEvalue : NStr::StringToInt(display_sort_value); - + s_DisplayDescrColumnHeader(out,display_sort,query_buf,CAlignFormatUtil::eHighestScore,CAlignFormatUtil::eScore,kMaxScore,m_MaxScoreLen,m_Option & eHtml); s_DisplayDescrColumnHeader(out,display_sort,query_buf,CAlignFormatUtil::eTotalScore,CAlignFormatUtil::eScore,kTotalScore,m_MaxTotalScoreLen,m_Option & eHtml); @@ -1055,26 +1144,26 @@ void CShowBlastDefline::x_DisplayDeflineTable(CNcbiOstream & out) s_DisplayDescrColumnHeader(out,display_sort,query_buf,CAlignFormatUtil::eEvalue,CAlignFormatUtil::eHspEvalue,kEvalue,m_MaxEvalueLen,m_Option & eHtml); if(m_Option & eShowPercentIdent){ s_DisplayDescrColumnHeader(out,display_sort,query_buf,CAlignFormatUtil::ePercentIdentity,CAlignFormatUtil::eHspPercentIdentity,kIdentity,m_MaxPercentIdentityLen,m_Option & eHtml); - }else { + }else { } - - if(m_Option & eShowSumN){ + + if(m_Option & eShowSumN){ out << "" << "\n"; - - } + + } if (m_Option & eLinkout) { out << "\n"; out << "\n"; out << "\n"; } - } - + } + if (m_Option & eHtml) { out << "\n"; } - + x_DisplayDeflineTableBody(out); - + if (m_Option & eHtml) { out << "\n
AccessionDescription
AccessionDescription" << kN << "Links
\n"; } @@ -1083,13 +1172,13 @@ void CShowBlastDefline::x_DisplayDeflineTable(CNcbiOstream & out) void CShowBlastDefline::x_DisplayDeflineTableBody(CNcbiOstream & out) { int percent_identity = 0; - int tableColNumber = (m_Option & eShowPercentIdent) ? 9 : 8; + int tableColNumber = (m_Option & eShowPercentIdent) ? 9 : 8; bool first_new =true; int prev_database_type = 0, cur_database_type = 0; bool is_first = true; // Mixed db is genomic + transcript and this does not apply to proteins. bool is_mixed_database = false; - if (m_IsDbNa == true) + if (m_IsDbNa == true) is_mixed_database = CAlignFormatUtil::IsMixedDatabase(*m_Ctx); map< string, string> parameters_to_change; @@ -1097,7 +1186,7 @@ void CShowBlastDefline::x_DisplayDeflineTableBody(CNcbiOstream & out) if (is_mixed_database && m_Option & eHtml) { parameters_to_change.insert(map:: value_type("DATABASE_SORT", "")); - CAlignFormatUtil::BuildFormatQueryString(*m_Ctx, + CAlignFormatUtil::BuildFormatQueryString(*m_Ctx, parameters_to_change, query_buf); } @@ -1112,32 +1201,32 @@ void CShowBlastDefline::x_DisplayDeflineTableBody(CNcbiOstream & out) out << "\n"; } if (cur_database_type) { - out << "Genomic sequences"; + out << "Genomic sequences"; } else { - out << "Transcripts"; + out << "Transcripts"; } - if (!(m_Option & eHtml)) { + if (!(m_Option & eHtml)) { out << ":\n"; } if (m_Option & eHtml) { out << "\n"; } - } else if (prev_database_type != cur_database_type) { + } else if (prev_database_type != cur_database_type) { if (m_Option & eHtml) { out << "\n"; } if (cur_database_type) { - out << "Genomic sequences"; + out << "Genomic sequences"; } else { out << "Transcripts"; - } + } if (m_Option & eHtml) { out << "" << " [\n"; } - } + } } prev_database_type = cur_database_type; is_first = false; @@ -1158,27 +1247,27 @@ void CShowBlastDefline::x_DisplayDeflineTableBody(CNcbiOstream & out) out << "\n"; } if ((m_Option & eHtml) && (sdl->gi > ZERO_GI)){ - if((m_Option & eShowNewSeqGif)) { + if((m_Option & eShowNewSeqGif)) { if (sdl->is_new) { if (first_new) { first_new = false; out << kPsiblastEvalueLink; } out << kPsiblastNewSeqGif; - + } else { out << kPsiblastNewSeqBackgroundGif; } if (sdl->was_checked) { out << kPsiblastCheckedGif; - + } else { out << kPsiblastCheckedBackgroundGif; } } char buf[256]; if((m_Option & eCheckboxChecked)){ - sprintf(buf, kPsiblastCheckboxChecked.c_str(), sdl->gi, + sprintf(buf, kPsiblastCheckboxChecked.c_str(), sdl->gi, sdl->gi); out << buf; } else if (m_Option & eCheckbox) { @@ -1186,8 +1275,8 @@ void CShowBlastDefline::x_DisplayDeflineTableBody(CNcbiOstream & out) out << buf; } } - - + + if((m_Option & eHtml) && (sdl->id_url != NcbiEmptyString)) { out << sdl->id_url; } @@ -1199,7 +1288,7 @@ void CShowBlastDefline::x_DisplayDeflineTableBody(CNcbiOstream & out) } } if(!sdl->id.Empty()){ - if(!(sdl->id->AsFastaString().find("gnl|BL_ORD_ID") != string::npos || + if(!(sdl->id->AsFastaString().find("gnl|BL_ORD_ID") != string::npos || sdl->id->AsFastaString().find("lcl|Subject_") != string::npos)){ string id_str; sdl->id->GetLabel(&id_str, CSeq_id::eContent); @@ -1213,24 +1302,24 @@ void CShowBlastDefline::x_DisplayDeflineTableBody(CNcbiOstream & out) if (m_Option & eHtml) { out << "
"; } - line_component = " " + sdl->defline; + line_component = " " + sdl->defline; string actual_line_component; actual_line_component = line_component; - + if (m_Option & eHtml) { out << CHTMLHelper::HTMLEncode(actual_line_component); out << "
"; } else { - out << actual_line_component; + out << actual_line_component; } - + if((m_Option & eHtml) && (sdl->score_url != NcbiEmptyString)) { out << sdl->score_url; } out << (*iter)->bit_string; if((m_Option & eHtml) && (sdl->score_url != NcbiEmptyString)) { out << "
"; - } + } if(m_Option & eHtml) { out << ""; out << "" << (*iter)->total_bit_string << ""; @@ -1239,57 +1328,57 @@ void CShowBlastDefline::x_DisplayDeflineTableBody(CNcbiOstream & out) CAlignFormatUtil::AddSpace(out, m_MaxScoreLen - (*iter)->bit_string.size()); out << kTwoSpaceMargin << kOneSpaceMargin << (*iter)->total_bit_string; - CAlignFormatUtil::AddSpace(out, m_MaxTotalScoreLen - + CAlignFormatUtil::AddSpace(out, m_MaxTotalScoreLen - (*iter)->total_bit_string.size()); } - + int percent_coverage = 100*(*iter)->master_covered_length/m_QueryLength; if (m_Option & eHtml) { out << "" << percent_coverage << "%"; } else { out << kTwoSpaceMargin << percent_coverage << "%"; - + //minus one due to % sign - CAlignFormatUtil::AddSpace(out, m_MaxQueryCoverLen - + CAlignFormatUtil::AddSpace(out, m_MaxQueryCoverLen - NStr::IntToString(percent_coverage).size() - 1); } - if (m_Option & eHtml) { + if (m_Option & eHtml) { out << "" << (*iter)->evalue_string << ""; } else { out << kTwoSpaceMargin << (*iter)->evalue_string; CAlignFormatUtil::AddSpace(out, m_MaxEvalueLen - (*iter)->evalue_string.size()); } - if(m_Option & eShowPercentIdent){ + if(m_Option & eShowPercentIdent){ percent_identity = CAlignFormatUtil::GetPercentMatch((*iter)->match,(*iter)->align_length); - if (m_Option & eHtml) { + if (m_Option & eHtml) { out << "" << percent_identity << "%"; } else { out << kTwoSpaceMargin << percent_identity <<"%"; - - CAlignFormatUtil::AddSpace(out, m_MaxPercentIdentityLen - + + CAlignFormatUtil::AddSpace(out, m_MaxPercentIdentityLen - NStr::IntToString(percent_identity).size()); } } //??? - if(m_Option & eShowSumN){ + if(m_Option & eShowSumN){ if (m_Option & eHtml) { out << ""; } - out << kTwoSpaceMargin << (*iter)->sum_n; + out << kTwoSpaceMargin << (*iter)->sum_n; if (m_Option & eHtml) { out << ""; } else { - CAlignFormatUtil::AddSpace(out, m_MaxSumNLen - + CAlignFormatUtil::AddSpace(out, m_MaxSumNLen - NStr::IntToString((*iter)->sum_n).size()); } } - + if((m_Option & eLinkout) && (m_Option & eHtml)){ - + out << ""; bool first_time = true; ITERATE(list, iter_linkout, sdl->linkout_list){ @@ -1304,7 +1393,7 @@ void CShowBlastDefline::x_DisplayDeflineTableBody(CNcbiOstream & out) if (m_Option & eHtml) { out << ""; } - if (!(m_Option & eHtml)) { + if (!(m_Option & eHtml)) { out <<"\n"; } delete sdl; @@ -1315,18 +1404,18 @@ void CShowBlastDefline::x_DisplayDeflineTableBody(CNcbiOstream & out) void CShowBlastDefline::DisplayBlastDeflineTable(CNcbiOstream & out) { x_InitDeflineTable(); - if(m_StructureLinkout){ + if(m_StructureLinkout){ char buf[512]; sprintf(buf, kStructure_Overview, m_Rid.c_str(), 0, 0, m_CddRid.c_str(), "overview", m_EntrezTerm == NcbiEmptyString ? "none": m_EntrezTerm.c_str()); - out << buf <<"\n\n"; - } + out << buf <<"\n\n"; + } x_DisplayDeflineTable(out); } -CShowBlastDefline::SScoreInfo* +CShowBlastDefline::SScoreInfo* CShowBlastDefline::x_GetScoreInfo(const CSeq_align& aln, int blast_rank) { string evalue_buf, bit_score_buf, total_bit_score_buf, raw_score_buf; @@ -1334,11 +1423,11 @@ CShowBlastDefline::x_GetScoreInfo(const CSeq_align& aln, int blast_rank) double bits = 0; double evalue = 0; int sum_n = 0; - int num_ident = 0; + int num_ident = 0; list use_this_seq; - use_this_seq.clear(); - CAlignFormatUtil::GetAlnScores(aln, score, bits, evalue, sum_n, + use_this_seq.clear(); + CAlignFormatUtil::GetAlnScores(aln, score, bits, evalue, sum_n, num_ident, use_this_seq); CAlignFormatUtil::GetScoreString(evalue, bits, 0, score, @@ -1355,21 +1444,21 @@ CShowBlastDefline::x_GetScoreInfo(const CSeq_align& aln, int blast_rank) score_info->raw_score_string = raw_score_buf; score_info->evalue_string = evalue_buf; score_info->id = &(aln.GetSeq_id(1)); - score_info->blast_rank = blast_rank+1; - score_info->subjRange = CRange(0,0); + score_info->blast_rank = blast_rank+1; + score_info->subjRange = CRange(0,0); score_info->flip = false; return score_info.release(); } -CShowBlastDefline::SScoreInfo* +CShowBlastDefline::SScoreInfo* CShowBlastDefline::x_GetScoreInfoForTable(const CSeq_align_set& aln, int blast_rank) { string evalue_buf, bit_score_buf, total_bit_score_buf, raw_score_buf; - + if(aln.Get().empty()) return NULL; - auto_ptr score_info(new SScoreInfo); + auto_ptr score_info(new SScoreInfo); CAlignFormatUtil::SSeqAlignSetCalcParams* seqSetInfo = CAlignFormatUtil::GetSeqAlignSetCalcParamsFromASN(aln); if(seqSetInfo->hspNum == 0) {//calulated params are not in ASN - calculate now @@ -1381,53 +1470,53 @@ CShowBlastDefline::x_GetScoreInfoForTable(const CSeq_align_set& aln, int blast_r raw_score_buf); score_info->id = seqSetInfo->id; - score_info->total_bit_string = total_bit_score_buf; - score_info->bit_string = bit_score_buf; + score_info->total_bit_string = total_bit_score_buf; + score_info->bit_string = bit_score_buf; score_info->evalue_string = evalue_buf; score_info->percent_coverage = seqSetInfo->percent_coverage; score_info->percent_identity = seqSetInfo->percent_identity; score_info->hspNum = seqSetInfo->hspNum; score_info->totalLen = seqSetInfo->totalLen; - + score_info->use_this_seqid = seqSetInfo->use_this_seq; score_info->sum_n = seqSetInfo->sum_n == -1 ? 1:seqSetInfo->sum_n ; score_info->raw_score_string = raw_score_buf;//check if used - score_info->match = seqSetInfo->match; //check if used + score_info->match = seqSetInfo->match; //check if used score_info->align_length = seqSetInfo->align_length;//check if used score_info->master_covered_length = seqSetInfo->master_covered_length;//check if used - + score_info->subjRange = seqSetInfo->subjRange; //check if used - score_info->flip = seqSetInfo->flip;//check if used + score_info->flip = seqSetInfo->flip;//check if used - score_info->blast_rank = blast_rank+1; + score_info->blast_rank = blast_rank+1; return score_info.release(); } -vector +vector CShowBlastDefline::GetDeflineInfo(vector< CConstRef > &seqIds) { vector sdlVec; for(size_t i = 0; i < seqIds.size(); i++) { list use_this_seq; CShowBlastDefline::SDeflineInfo* sdl = x_GetDeflineInfo(seqIds[i], use_this_seq, i + 1 ); - sdlVec.push_back(sdl); + sdlVec.push_back(sdl); } return sdlVec; } -CShowBlastDefline::SDeflineInfo* +CShowBlastDefline::SDeflineInfo* CShowBlastDefline::x_GetDeflineInfo(CConstRef id, list &use_this_seqid, int blast_rank) { SDeflineInfo* sdl = NULL; sdl = new SDeflineInfo; sdl->id = id; sdl->defline = "Unknown"; - + try{ const CBioseq_Handle& handle = m_ScopeRef->GetBioseqHandle(*id); x_FillDeflineAndId(handle, *id, use_this_seqid, sdl, blast_rank); @@ -1436,10 +1525,10 @@ CShowBlastDefline::x_GetDeflineInfo(CConstRef id, list &use_thi sdl->is_new = false; sdl->was_checked = false; sdl->linkout = 0; - + if((*id).Which() == CSeq_id::e_Gi){ sdl->gi = (*id).GetGi(); - } else { + } else { sdl->gi = ZERO_GI; } sdl->id = id; @@ -1452,71 +1541,71 @@ CShowBlastDefline::x_GetDeflineInfo(CConstRef id, list &use_thi CRange seqRange(0,0); CAlignFormatUtil::SSeqURLInfo seqUrlInfo(user_url,m_BlastType,m_IsDbNa,m_Database,m_Rid, m_QueryNumber,sdl->gi,accession,0,blast_rank,false,(m_Option & eNewTargetWindow) ? true : false,seqRange,false,0); - sdl->id_url = CAlignFormatUtil::GetIDUrl(&seqUrlInfo,*id,*m_ScopeRef); + sdl->id_url = CAlignFormatUtil::GetIDUrl(&seqUrlInfo,*id,*m_ScopeRef); sdl->score_url = NcbiEmptyString; } } - + return sdl; } -void CShowBlastDefline::x_DisplayDeflineTableTemplate(CNcbiOstream & out) +void CShowBlastDefline::x_DisplayDeflineTableTemplate(CNcbiOstream & out) { bool first_new =true; int prev_database_type = 0, cur_database_type = 0; bool is_first = true; - // Mixed db is genomic + transcript and this does not apply to proteins. - bool is_mixed_database = (m_IsDbNa == true)? CAlignFormatUtil::IsMixedDatabase(*m_Ctx): false; + // Mixed db is genomic + transcript and this does not apply to proteins. + bool is_mixed_database = (m_Ctx && m_IsDbNa == true)? CAlignFormatUtil::IsMixedDatabase(*m_Ctx): false; string rowType = "odd"; - string subHeaderID; + string subHeaderID; ITERATE(vector, iter, m_ScoreList){ SDeflineInfo* sdl = x_GetDeflineInfo((*iter)->id, (*iter)->use_this_seqid, (*iter)->blast_rank); cur_database_type = (sdl->linkout & eGenomicSeq); - string subHeader; + string subHeader; bool formatHeaderSort = !is_first && (prev_database_type != cur_database_type); - if (is_mixed_database && (is_first || formatHeaderSort)) { + if (is_mixed_database && (is_first || formatHeaderSort)) { subHeader = x_FormatSeqSetHeaders(cur_database_type, formatHeaderSort); subHeaderID = cur_database_type ? "GnmSeq" : "Transcr"; //This is done for 508 complience - subHeader = CAlignFormatUtil::MapTemplate(subHeader,"defl_header_id",subHeaderID); - } + subHeader = CAlignFormatUtil::MapTemplate(subHeader,"defl_header_id",subHeaderID); + } prev_database_type = cur_database_type; - + string defLine = x_FormatDeflineTableLine(sdl,*iter,first_new); //This is done for 508 complience - defLine = CAlignFormatUtil::MapTemplate(defLine,"defl_header_id",subHeaderID); + defLine = CAlignFormatUtil::MapTemplate(defLine,"defl_header_id",subHeaderID); string firstSeq = (is_first) ? "firstSeq" : ""; - defLine = CAlignFormatUtil::MapTemplate(defLine,"firstSeq",firstSeq); - defLine = CAlignFormatUtil::MapTemplate(defLine,"trtp",rowType); + defLine = CAlignFormatUtil::MapTemplate(defLine,"firstSeq",firstSeq); + defLine = CAlignFormatUtil::MapTemplate(defLine,"trtp",rowType); rowType = (rowType == "odd") ? "even" : "odd"; - + if(!subHeader.empty()) { defLine = subHeader + defLine; } is_first = false; out << defLine; - + delete sdl; } } string CShowBlastDefline::x_FormatSeqSetHeaders(int isGenomicSeq, bool formatHeaderSort) { - string seqSetType = isGenomicSeq ? "Genomic sequences" : "Transcripts"; + string seqSetType = isGenomicSeq ? "Genomic sequences" : "Transcripts"; string subHeader = CAlignFormatUtil::MapTemplate(m_DeflineTemplates->subHeaderTmpl,"defl_seqset_type",seqSetType); if (formatHeaderSort) { int database_sort = isGenomicSeq ? CAlignFormatUtil::eGenomicFirst : CAlignFormatUtil::eNonGenomicFirst; string deflnSubHeaderSort = CAlignFormatUtil::MapTemplate(m_DeflineTemplates->subHeaderSort,"database_sort",database_sort); - subHeader = CAlignFormatUtil::MapTemplate(subHeader,"defl_header_sort",deflnSubHeaderSort); + subHeader = CAlignFormatUtil::MapTemplate(subHeader,"defl_header_sort",deflnSubHeaderSort); } else { subHeader = CAlignFormatUtil::MapTemplate(subHeader,"defl_header_sort",""); } - return subHeader; + return subHeader; } @@ -1528,56 +1617,56 @@ string CShowBlastDefline::x_FormatDeflineTableLine(SDeflineInfo* sdl,SScoreInfo* if(!sdl->id.Empty()){ if(!(sdl->id->AsFastaString().find("gnl|BL_ORD_ID") != string::npos || sdl->id->AsFastaString().find("lcl|Subject_") != string::npos)) { - sdl->id->GetLabel(&seqid, CSeq_id::eContent); + sdl->id->GetLabel(&seqid, CSeq_id::eContent); } - } - - if(sdl->id_url != NcbiEmptyString) { + } + + if(sdl->id_url != NcbiEmptyString) { string seqInfo = CAlignFormatUtil::MapTemplate(m_DeflineTemplates->seqInfoTmpl,"dfln_url",sdl->id_url); string trgt = (m_Option & eNewTargetWindow) ? "TARGET=\"EntrezView\"" : ""; seqInfo = CAlignFormatUtil::MapTemplate(seqInfo,"dfln_target",trgt); - defLine = CAlignFormatUtil::MapTemplate(defLine,"seq_info",seqInfo); - defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_gi",dflGi); - defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_seqid",seqid); + defLine = CAlignFormatUtil::MapTemplate(defLine,"seq_info",seqInfo); + defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_gi",dflGi); + defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_seqid",seqid); } - else { - defLine = CAlignFormatUtil::MapTemplate(defLine,"seq_info",dflGi + seqid); + else { + defLine = CAlignFormatUtil::MapTemplate(defLine,"seq_info",dflGi + seqid); } - string descr = (!sdl->defline.empty()) ? sdl->defline : "None provided"; + string descr = (!sdl->defline.empty()) ? sdl->defline : "None provided"; s_LimitDescrLength(descr); - defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_defline",CHTMLHelper::HTMLEncode(descr)); + defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_defline",CHTMLHelper::HTMLEncode(descr)); - descr = (!sdl->fullDefline.empty()) ? sdl->fullDefline : seqid; + descr = (!sdl->fullDefline.empty()) ? sdl->fullDefline : seqid; s_LimitDescrLength(descr); defLine = CAlignFormatUtil::MapTemplate(defLine,"full_dfln_defline",CHTMLHelper::HTMLEncode(descr)); - if(sdl->score_url != NcbiEmptyString) { + if(sdl->score_url != NcbiEmptyString) { string scoreInfo = CAlignFormatUtil::MapTemplate(m_DeflineTemplates->scoreInfoTmpl,"score_url",sdl->score_url); scoreInfo = CAlignFormatUtil::MapTemplate(scoreInfo,"bit_string",iter->bit_string); scoreInfo = CAlignFormatUtil::MapTemplate(scoreInfo,"score_seqid",seqid); - defLine = CAlignFormatUtil::MapTemplate(defLine,"score_info",scoreInfo); + defLine = CAlignFormatUtil::MapTemplate(defLine,"score_info",scoreInfo); } - else { - defLine = CAlignFormatUtil::MapTemplate(defLine,"score_info",iter->bit_string); + else { + defLine = CAlignFormatUtil::MapTemplate(defLine,"score_info",iter->bit_string); } - /*****************This block of code is for future use with AJAX begin***************************/ - string deflId,deflFrmID,deflFastaSeq,deflAccs; - if(sdl->gi == ZERO_GI) { + /*****************This block of code is for future use with AJAX begin***************************/ + string deflId,deflFrmID,deflFastaSeq,deflAccs; + if(sdl->gi == ZERO_GI) { sdl->id->GetLabel(& deflId, CSeq_id::eContent); - deflFrmID = CAlignFormatUtil::GetLabel(sdl->id);//Just accession without db part like GNOMON: or ti: + deflFrmID = CAlignFormatUtil::GetLabel(sdl->id);//Just accession without db part like GNOMON: or ti: deflFastaSeq = NStr::TruncateSpaces(sdl->alnIDFasta); deflAccs = sdl->id->AsFastaString(); } - else { - deflFrmID = deflId = NStr::NumericToString(sdl->gi); + else { + deflFrmID = deflId = NStr::NumericToString(sdl->gi); deflFastaSeq = "gi|" + NStr::NumericToString(sdl->gi); - deflFastaSeq = NStr::TruncateSpaces(sdl->alnIDFasta); + deflFastaSeq = NStr::TruncateSpaces(sdl->alnIDFasta); sdl->id->GetLabel(&deflAccs, CSeq_id::eContent); } //Setup applog info structure if(m_AppLogInfo && (m_AppLogInfo->currInd < m_AppLogInfo->topMatchesNum)) { m_AppLogInfo->deflIdVec.push_back(deflId); - m_AppLogInfo->accVec.push_back(deflAccs); + m_AppLogInfo->accVec.push_back(deflAccs); m_AppLogInfo->taxidVec.push_back(NStr::IntToString(sdl->taxid)); m_AppLogInfo->queryCoverageVec.push_back(NStr::IntToString(iter->percent_coverage)); m_AppLogInfo->percentIdentityVec.push_back(NStr::IntToString(iter->percent_identity)); @@ -1586,83 +1675,203 @@ string CShowBlastDefline::x_FormatDeflineTableLine(SDeflineInfo* sdl,SScoreInfo* //If gi - deflFrmID and deflId are the same and equal to gi "555",deflFastaSeq will have "gi|555" //If gnl - deflFrmID=number, deflId=ti:number,deflFastaSeq=gnl|xxx - //like "268252125","ti:268252125","gnl|ti|961433.m" or "961433.m" and "GNOMON:961433.m" "gnl|GNOMON|961433.m" + //like "268252125","ti:268252125","gnl|ti|961433.m" or "961433.m" and "GNOMON:961433.m" "gnl|GNOMON|961433.m" defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_id",deflId); - defLine = CAlignFormatUtil::MapTemplate(defLine,"dflnFrm_id",deflFrmID); + defLine = CAlignFormatUtil::MapTemplate(defLine,"dflnFrm_id",deflFrmID); defLine = CAlignFormatUtil::MapTemplate(defLine,"dflnFASTA_id",deflFastaSeq); - defLine = CAlignFormatUtil::MapTemplate(defLine,"dflnAccs",deflAccs); - defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_rid",m_Rid); - defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_hspnum",iter->hspNum); - defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_alnLen",iter->totalLen); - defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_blast_rank",m_StartIndex + iter->blast_rank); - - /*****************This block of code is for future use with AJAX end***************************/ + defLine = CAlignFormatUtil::MapTemplate(defLine,"dflnAccs",deflAccs); + defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_rid",m_Rid); + defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_hspnum",iter->hspNum); + defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_alnLen",iter->totalLen); + defLine = CAlignFormatUtil::MapTemplate(defLine,"dfln_blast_rank",m_StartIndex + iter->blast_rank); + + /*****************This block of code is for future use with AJAX end***************************/ defLine = CAlignFormatUtil::MapTemplate(defLine,"total_bit_string",iter->total_bit_string); - - + + defLine = CAlignFormatUtil::MapTemplate(defLine,"percent_coverage",NStr::IntToString(iter->percent_coverage)); defLine = CAlignFormatUtil::MapTemplate(defLine,"evalue_string",iter->evalue_string); - if(m_Option & eShowPercentIdent){ + if(m_Option & eShowPercentIdent){ defLine = CAlignFormatUtil::MapTemplate(defLine,"percent_identity",NStr::IntToString(iter->percent_identity)); } - - if(m_Option & eShowSumN){ + + if(m_Option & eShowSumN){ defLine = CAlignFormatUtil::MapTemplate(defLine,"sum_n",NStr::IntToString(iter->sum_n)); } string links; //sdl->linkout_list may contain linkouts + mapview link + seqview link - ITERATE(list, iter_linkout, sdl->linkout_list){ + ITERATE(list, iter_linkout, sdl->linkout_list){ links += *iter_linkout; - } - defLine = CAlignFormatUtil::MapTemplate(defLine,"linkout",links); - + } + defLine = CAlignFormatUtil::MapTemplate(defLine,"linkout",links); + return defLine; } + string CShowBlastDefline::x_FormatPsi(SDeflineInfo* sdl, bool &first_new) { string defline = m_DeflineTemplates->defLineTmpl; - string show_new,psi_new,psi_new_accesible,show_checked,replaceBy; - if((m_Option & eShowNewSeqGif)) { + string show_new,psi_new,psi_new_accesible,show_checked,replaceBy; + if((m_Option & eShowNewSeqGif)) { replaceBy = (sdl->is_new && first_new) ? m_DeflineTemplates->psiFirstNewAnchorTmpl : ""; first_new = (sdl->is_new && first_new) ? false : first_new; - if (!sdl->is_new) { - show_new = "hidden"; + if (!sdl->is_new) { + show_new = "hidden"; } - if (sdl->is_new && m_StepNumber > 1) { - psi_new = "psi_new"; + if (sdl->is_new && m_StepNumber > 1) { + psi_new = "psi_new"; psi_new_accesible = "psiNw"; } - + if(!sdl->was_checked) { - show_checked = "hidden"; + show_checked = "hidden"; } - defline = CAlignFormatUtil::MapTemplate(defline,"first_new",replaceBy); + defline = CAlignFormatUtil::MapTemplate(defline,"first_new",replaceBy); defline = CAlignFormatUtil::MapTemplate(defline,"psi_new_gi",show_new); defline = CAlignFormatUtil::MapTemplate(defline,"psi_new_gi_hl",psi_new); defline = CAlignFormatUtil::MapTemplate(defline,"psi_new_gi_accs",psi_new_accesible);//insert for accesibilty - defline = CAlignFormatUtil::MapTemplate(defline,"psi_checked_gi",show_checked); + defline = CAlignFormatUtil::MapTemplate(defline,"psi_checked_gi",show_checked); } - replaceBy = (m_Option & eCheckboxChecked) ? m_DeflineTemplates->psiGoodGiHiddenTmpl : "";//<@psi_good_gi@> + replaceBy = (m_Option & eCheckboxChecked) ? m_DeflineTemplates->psiGoodGiHiddenTmpl : "";//<@psi_good_gi@> defline = CAlignFormatUtil::MapTemplate(defline,"psi_good_gi",replaceBy); replaceBy = (m_Option & eCheckboxChecked) ? "checked=\"checked\"" : ""; - defline = CAlignFormatUtil::MapTemplate(defline,"gi_checked",replaceBy); + defline = CAlignFormatUtil::MapTemplate(defline,"gi_checked",replaceBy); if(sdl->gi > ZERO_GI) { defline = CAlignFormatUtil::MapTemplate(defline,"psiGi",NStr::NumericToString(sdl->gi)); } else { defline = CAlignFormatUtil::MapTemplate(defline,"psiGi",sdl->textSeqID); } - + return defline; } + + + +void CShowBlastDefline::x_InitFormattingInfo(SScoreInfo* sci) +{ + SDeflineFormattingInfo* sdlFormatInfo = new SDeflineFormattingInfo; + SDeflineInfo* sdl = x_GetDeflineInfo(sci->id, sci->use_this_seqid, sci->blast_rank); + + string dflGi = (m_Option & eShowGi) && (sdl->gi > ZERO_GI) ? "gi|" + NStr::NumericToString(sdl->gi) + "|" : ""; + string seqid; + if(!sdl->id.Empty()){ + if(!(sdl->id->AsFastaString().find("gnl|BL_ORD_ID") != string::npos || sdl->id->AsFastaString().find("lcl|Subject_") != string::npos)) { + sdl->id->GetLabel(&seqid, CSeq_id::eContent); + } + } + + sdlFormatInfo->dfln_url = sdl->id_url; + + sdlFormatInfo->dfln_rid = m_Rid; + sdlFormatInfo->dfln_gi = dflGi; + sdlFormatInfo->dfln_seqid = seqid; + + + string descr = (!sdl->defline.empty()) ? sdl->defline : "None provided"; + s_LimitDescrLength(descr); + + sdlFormatInfo->dfln_defline = CHTMLHelper::HTMLEncode(descr); + + descr = (!sdl->fullDefline.empty()) ? sdl->fullDefline : seqid; + s_LimitDescrLength(descr); + sdlFormatInfo->full_dfln_defline = CHTMLHelper::HTMLEncode(descr); + + + + string deflId,deflFrmID,deflFastaSeq,deflAccs; + if(sdl->gi == ZERO_GI) { + sdl->id->GetLabel(& deflId, CSeq_id::eContent); + deflFrmID = CAlignFormatUtil::GetLabel(sdl->id);//Just accession without db part like GNOMON: or ti: + deflFastaSeq = NStr::TruncateSpaces(sdl->alnIDFasta); + deflAccs = sdl->id->AsFastaString(); + } + else { + deflFrmID = deflId = NStr::NumericToString(sdl->gi); + deflFastaSeq = "gi|" + NStr::NumericToString(sdl->gi); + deflFastaSeq = NStr::TruncateSpaces(sdl->alnIDFasta); + sdl->id->GetLabel(&deflAccs, CSeq_id::eContent); + } + + sdlFormatInfo->dfln_id = deflId; + sdlFormatInfo->dflnFrm_id = deflFrmID; + sdlFormatInfo->dflnFASTA_id = deflFastaSeq; + sdlFormatInfo->dflnAccs =deflAccs; + + sdlFormatInfo->score_info = sci->bit_string; + sdlFormatInfo->dfln_hspnum = NStr::IntToString(sci->hspNum); + sdlFormatInfo->dfln_alnLen = NStr::NumericToString(sci->totalLen); + sdlFormatInfo->dfln_blast_rank = NStr::IntToString(m_StartIndex + sci->blast_rank); + sdlFormatInfo->total_bit_string = sci->total_bit_string; + sdlFormatInfo->percent_coverage = NStr::IntToString(sci->percent_coverage); + sdlFormatInfo->evalue_string = sci->evalue_string; + sdlFormatInfo->percent_identity = NStr::IntToString(sci->percent_identity); + m_SdlFormatInfoVec.push_back(sdlFormatInfo); +} + + +vector CShowBlastDefline::GetFormattingInfo(void) +{ + /*Note we can't just show each alnment as we go because we will + need to show defline only once for all hsp's with the same id*/ + + bool is_first_aln = true; + size_t num_align = 0; + CConstRef previous_id, subid; + + CSeq_align_set hit; + m_QueryLength = 1; + + //prepare defline + + + for (CSeq_align_set::Tdata::const_iterator iter = m_AlnSetRef->Get().begin(); + iter != m_AlnSetRef->Get().end() && num_align < m_NumToShow; + iter++) + { + + if (is_first_aln) { + m_QueryLength = m_MasterRange ? m_MasterRange->GetLength() : m_ScopeRef->GetBioseqHandle((*iter)->GetSeq_id(0)).GetBioseqLength(); + } + + subid = &((*iter)->GetSeq_id(1)); + + if(!is_first_aln && !(subid->Match(*previous_id))) { + + SScoreInfo* sci = x_GetScoreInfoForTable(hit, num_align); + if(sci) { + x_InitFormattingInfo(sci); + hit.Set().clear(); + } + + num_align++; // Only increment if new subject ID found. + } + if (num_align < m_NumToShow) { //no adding if number to show already reached + hit.Set().push_back(*iter); + } + is_first_aln = false; + previous_id = subid; + } + //the last hit + SScoreInfo* sci = x_GetScoreInfoForTable(hit, num_align); + + if(sci) { + x_InitFormattingInfo(sci); + hit.Set().clear(); + } + + return m_SdlFormatInfoVec; +} + + END_SCOPE(align_format) END_NCBI_SCOPE diff --git a/c++/src/objtools/align_format/tabular.cpp b/c++/src/objtools/align_format/tabular.cpp index d6ffe7e1..eae5bf0d 100644 --- a/c++/src/objtools/align_format/tabular.cpp +++ b/c++/src/objtools/align_format/tabular.cpp @@ -1,4 +1,4 @@ -/* $Id: tabular.cpp 573450 2018-10-29 11:26:05Z ivanov $ +/* $Id: tabular.cpp 577758 2019-01-08 18:09:41Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -113,16 +113,16 @@ void CBlastTabularInfo::x_ResetFields() m_QueryCovSeqalign = -1; } -void CBlastTabularInfo::x_SetFieldDelimiter(EFieldDelimiter delim) +void CBlastTabularInfo::x_SetFieldDelimiter(EFieldDelimiter delim,string customDelim) { switch (delim) { - case eSpace: m_FieldDelimiter = ' '; break; - case eComma: m_FieldDelimiter = ','; break; - default: m_FieldDelimiter = '\t'; break; // eTab or unsupported value + case eSpace: m_FieldDelimiter = " "; break; + case eComma: m_FieldDelimiter = ","; break; + case eCustom: m_FieldDelimiter = customDelim; break; + default: m_FieldDelimiter = "\t"; break; // eTab or unsupported value } } - void CBlastTabularInfo::x_CheckTaxDB() { if( x_IsFieldRequested(eSubjectSciNames) || diff --git a/c++/src/objtools/alnmgr/score_builder_base.cpp b/c++/src/objtools/alnmgr/score_builder_base.cpp index 08470b6e..0c8a9e2b 100644 --- a/c++/src/objtools/alnmgr/score_builder_base.cpp +++ b/c++/src/objtools/alnmgr/score_builder_base.cpp @@ -1,4 +1,4 @@ -/* $Id: score_builder_base.cpp 495880 2016-03-22 14:14:37Z chetvern $ +/* $Id: score_builder_base.cpp 576144 2018-12-11 15:05:52Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -536,7 +536,7 @@ static bool s_IsProteinToGenomic(CScope& scope, static void s_GetPercentCoverage(CScope& scope, const CSeq_align& align, const CRangeCollection& ranges, - double* pct_coverage) + double* pct_coverage, unsigned query = 0) { if (!ranges.empty() && ranges.begin()->IsWhole() && align.GetNamedScore(CSeq_align::eScore_PercentCoverage, @@ -573,7 +573,17 @@ static void s_GetPercentCoverage(CScope& scope, const CSeq_align& align, } if ( !seq_len ) { - seq_len = scope.GetSequenceLength(align.GetSeq_id(0)); + const auto &query_id = align.GetSeq_id(query); + const objects::CBioseq_Handle& bsh_seq = scope.GetBioseqHandle(query_id); + if (!bsh_seq) { + *pct_coverage = 0; + NCBI_THROW(CSeqalignException, eInvalidSeqId, + "Can't get sequence data for " + query_id.AsFastaString() + + " in order to calculate coverage"); + } + seq_len = bsh_seq.GetBioseqLength(); + + //seq_len = align.GetSeqRange(query).GetLength(); // // determine if the alignment is protein-to-genomic @@ -686,30 +696,34 @@ double CScoreBuilderBase::GetPercentIdentity(CScope& scope, double CScoreBuilderBase::GetPercentCoverage(CScope& scope, - const CSeq_align& align) + const CSeq_align& align, + unsigned query) { double pct_coverage = 0; s_GetPercentCoverage(scope, align, CRangeCollection(TSeqRange::GetWhole()), - &pct_coverage); + &pct_coverage, + query); return pct_coverage; } double CScoreBuilderBase::GetPercentCoverage(CScope& scope, const CSeq_align& align, - const TSeqRange& range) + const TSeqRange& range, + unsigned query) { double pct_coverage = 0; - s_GetPercentCoverage(scope, align, CRangeCollection(range), &pct_coverage); + s_GetPercentCoverage(scope, align, CRangeCollection(range), &pct_coverage, query); return pct_coverage; } double CScoreBuilderBase::GetPercentCoverage(CScope& scope, const CSeq_align& align, - const CRangeCollection& ranges) + const CRangeCollection& ranges, + unsigned query) { double pct_coverage = 0; - s_GetPercentCoverage(scope, align, ranges, &pct_coverage); + s_GetPercentCoverage(scope, align, ranges, &pct_coverage, query); return pct_coverage; } diff --git a/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp b/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp index 62b54684..be2f021a 100644 --- a/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp +++ b/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp @@ -1,4 +1,4 @@ -/* $Id: blastdb_dataextract.cpp 574128 2018-11-06 16:43:02Z ivanov $ +/* $Id: blastdb_dataextract.cpp 581743 2019-03-05 16:46:27Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -798,22 +798,6 @@ void CBlastDBExtractor::SetConfig(TSeqRange range, objects::ENa_strand strand, m_FiltAlgoId = filt_algo_id; } -static bool s_MatchPDBId(const CSeq_id & target_id, const CSeq_id & defline_id) -{ - if(defline_id.IsPdb()) { - if(target_id.GetPdb().IsSetChain()) { - if(defline_id.GetPdb().IsSetChain()) { - return ((target_id.GetPdb().GetChain() == defline_id.GetPdb().GetChain()) && - PNocase().Equals(target_id.GetPdb().GetMol(), defline_id.GetPdb().GetMol())); - } - } - else { - return PNocase().Equals(target_id.GetPdb().GetMol(), defline_id.GetPdb().GetMol()); - } - } - return false; -} - void CBlastDeflineUtil::ExtractDataFromBlastDeflineSet(const CBlast_def_line_set & dl_set, vector & results, BlastDeflineFields fields, @@ -823,11 +807,9 @@ void CBlastDeflineUtil::ExtractDataFromBlastDeflineSet(const CBlast_def_line_set CSeq_id target_seq_id (target_id, CSeq_id::fParse_PartialOK | CSeq_id::fParse_Default); Int8 num_id = NStr::StringToNumeric(target_id, NStr::fConvErr_NoThrow); bool can_be_gi = errno ? false: true; - bool isPDBId = target_seq_id.IsPdb(); ITERATE(CBlast_def_line_set::Tdata, itr, dl_set.Get()) { ITERATE(CBlast_def_line::TSeqid, id, (*itr)->GetSeqid()) { - if ((*id)->Match(target_seq_id) || (can_be_gi && (*id)->IsGi() && ((*id)->GetGi() == num_id)) - || (isPDBId && s_MatchPDBId(target_seq_id, **id))) { + if ((*id)->Match(target_seq_id) || (can_be_gi && (*id)->IsGi() && ((*id)->GetGi() == num_id))) { CBlastDeflineUtil::ExtractDataFromBlastDefline( **itr, results, fields, use_long_id); return; } diff --git a/c++/src/objtools/blast/blastdb_format/unit_test/data/pdb_ids.test b/c++/src/objtools/blast/blastdb_format/unit_test/data/pdb_ids.test new file mode 100644 index 00000000..a0779463 --- /dev/null +++ b/c++/src/objtools/blast/blastdb_format/unit_test/data/pdb_ids.test @@ -0,0 +1,15 @@ +0 5B64_B 9606 +1 4X65_m 10480 +1 6CAP_MM 562 +6 2KA9_C 10480 +8 2W80_eF 9234 +10 2V64_B1 43148 +10 2V64_b 4530 +10 2V64_i4 7819 +10 1KLQ_B 562 +13 1CLH_AA 4530 +14 1K8A_1 9606 +14 1KC8_12 10480 +14 1Q82_1b 60011 +14 1W2B_YY 7819 +14 3OW2_Y 10480 diff --git a/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pdb b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pdb new file mode 100644 index 00000000..c0a8503f Binary files /dev/null and b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pdb differ diff --git a/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.phr b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.phr new file mode 100644 index 00000000..2578737b Binary files /dev/null and b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.phr differ diff --git a/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pin b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pin new file mode 100644 index 00000000..93c233bd Binary files /dev/null and b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pin differ diff --git a/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pog b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pog new file mode 100644 index 00000000..7c3fdc5a Binary files /dev/null and b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pog differ diff --git a/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pos b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pos new file mode 100644 index 00000000..8010728d Binary files /dev/null and b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pos differ diff --git a/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pot b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pot new file mode 100644 index 00000000..f0ec3167 Binary files /dev/null and b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pot differ diff --git a/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.psq b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.psq new file mode 100644 index 00000000..bf1a1ead Binary files /dev/null and b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.psq differ diff --git a/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.ptf b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.ptf new file mode 100644 index 00000000..7bc7b83d Binary files /dev/null and b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.ptf differ diff --git a/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pto b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pto new file mode 100644 index 00000000..7358ecbd Binary files /dev/null and b/c++/src/objtools/blast/blastdb_format/unit_test/data/test_pdb_v5.pto differ diff --git a/c++/src/objtools/blast/blastdb_format/unit_test/seq_formatter_unit_test.cpp b/c++/src/objtools/blast/blastdb_format/unit_test/seq_formatter_unit_test.cpp index 7009af86..08c94852 100644 --- a/c++/src/objtools/blast/blastdb_format/unit_test/seq_formatter_unit_test.cpp +++ b/c++/src/objtools/blast/blastdb_format/unit_test/seq_formatter_unit_test.cpp @@ -383,4 +383,43 @@ BOOST_AUTO_TEST_CASE(TestMaskedSequenceData) } } + + +BOOST_AUTO_TEST_CASE(TestPDBIds) +{ + CTmpFile tmpfile; + const string& tmp_fname = tmpfile.GetFileName(); + CSeqDB db("data/test_pdb_v5", CSeqDB::eProtein); + const string format_spec("%i %T"); + ofstream out(tmp_fname.c_str()); + CBlastDB_SeqFormatter f(format_spec, db, out); + CBlastDB_FormatterConfig config; + CNcbiIfstream in_ids("data/pdb_ids.test"); + string line(kEmptyStr); + while (getline(in_ids, line)) { + vector oids; + vector d; + NStr::Split(line, " ", d); + db.AccessionToOids(d[1], oids); + BOOST_REQUIRE_EQUAL(oids.size(), 1); + BOOST_REQUIRE_EQUAL(oids[0], NStr::StringToInt(d[0])); + f.Write(oids[0], config, d[1]); + } + out.close(); + + CNcbiIfstream in(tmp_fname.c_str()); + while (getline(in, line)) { + vector t; + NStr::Split(line, " ", t); + BOOST_REQUIRE_EQUAL(t[0].find("pdb|"),0); + } + + vector oids; + db.AccessionToOids("2V64", oids); + BOOST_REQUIRE_EQUAL(oids.size(), 2); + BOOST_REQUIRE_EQUAL(oids[0], 8); + BOOST_REQUIRE_EQUAL(oids[1], 10); + +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/c++/src/objtools/blast/gene_info_reader/file_utils.cpp b/c++/src/objtools/blast/gene_info_reader/file_utils.cpp index 06d0676f..a7f5bf37 100644 --- a/c++/src/objtools/blast/gene_info_reader/file_utils.cpp +++ b/c++/src/objtools/blast/gene_info_reader/file_utils.cpp @@ -1,4 +1,4 @@ -/* $Id: file_utils.cpp 523368 2017-01-03 13:12:11Z madden $ +/* $Id: file_utils.cpp 579191 2019-01-31 13:34:46Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -147,9 +147,9 @@ void CGeneFileUtils::ReadGeneInfo(CNcbiIfstream& in, } int nBufSize = k_nGeneAllDataLineMax; - char* pBuf = new char[nBufSize + 1]; - in.getline(pBuf, nBufSize); - string strBuf = string(pBuf); + AutoArray pBuf(nBufSize + 1); + in.getline(pBuf.get(), nBufSize); + CTempString strBuf(pBuf.get()); if (strBuf.length() < k_nGeneAllDataLineMin) { @@ -157,7 +157,7 @@ void CGeneFileUtils::ReadGeneInfo(CNcbiIfstream& in, "Gene data line appears to be too short: " + strBuf); } - vector strItems; + vector strItems; NStr::SplitByPattern(strBuf, "\t", strItems); if (strItems.size() != k_nGeneAllDataNumItems) diff --git a/c++/src/objtools/blast/seqdb_reader/seqdb.cpp b/c++/src/objtools/blast/seqdb_reader/seqdb.cpp index 74a47fa1..22d7bb21 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdb.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdb.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdb.cpp 559974 2018-03-16 15:23:03Z fongah2 $ +/* $Id: seqdb.cpp 579716 2019-02-05 16:53:21Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -112,7 +112,7 @@ s_SeqDBInit(const string & dbname, char prot_nucl, int oid_begin, int oid_end, - bool use_mmap, + bool use_atlas_lock, CSeqDBGiList * gi_list = NULL, CSeqDBNegativeList * neg_list = NULL, CSeqDBIdSet idset = CSeqDBIdSet()) @@ -126,10 +126,10 @@ s_SeqDBInit(const string & dbname, prot_nucl, oid_begin, oid_end, - use_mmap, gi_list, neg_list, - idset); + idset, + use_atlas_lock); } catch(CSeqDBException &) { prot_nucl = 'n'; @@ -141,10 +141,10 @@ s_SeqDBInit(const string & dbname, prot_nucl, oid_begin, oid_end, - use_mmap, gi_list, neg_list, - idset); + idset, + use_atlas_lock); } _ASSERT(impl); @@ -154,7 +154,9 @@ s_SeqDBInit(const string & dbname, CSeqDB::CSeqDB(const string & dbname, ESeqType seqtype, - CSeqDBGiList * gi_list) + CSeqDBGiList * gi_list, + bool use_atlas_lock) + { if (dbname.size() == 0) { NCBI_THROW(CSeqDBException, @@ -168,7 +170,7 @@ CSeqDB::CSeqDB(const string & dbname, seq_type, 0, 0, - true, + use_atlas_lock, gi_list); ////m_Impl->Verify(); @@ -184,17 +186,69 @@ CSeqDB::CSeqDB(const string & dbname, "Database name is required."); } + const bool kUseAtlasLock = true; m_Impl = s_SeqDBInit(dbname, s_GetSeqTypeChar(seqtype), 0, 0, - true, + kUseAtlasLock, NULL, nlist); ////m_Impl->Verify(); } +CSeqDB::CSeqDB(const string & dbname, + ESeqType seqtype, + CSeqDBGiList * gi_list, + CSeqDBNegativeList * nlist) +{ + if (dbname.size() == 0) { + NCBI_THROW(CSeqDBException, + eArgErr, + "Database name is required."); + } + + char seq_type = s_GetSeqTypeChar(seqtype); + + m_Impl = s_SeqDBInit(dbname, + seq_type, + 0, + 0, + true, + gi_list, + nlist); + + ////m_Impl->Verify(); +} + +CSeqDB::CSeqDB(const string & dbname, + ESeqType seqtype, + int oid_begin, + int oid_end, + CSeqDBGiList * gi_list, + CSeqDBNegativeList * nlist) +{ + if (dbname.size() == 0) { + NCBI_THROW(CSeqDBException, + eArgErr, + "Database name is required."); + } + + char seq_type = s_GetSeqTypeChar(seqtype); + + m_Impl = s_SeqDBInit(dbname, + seq_type, + oid_begin, + oid_end, + true, + gi_list, + nlist); + + ////m_Impl->Verify(); +} + + void CSeqDB::AccessionsToOids(const vector& accs, vector& oids) const { m_Impl->AccessionsToOids(accs, oids); @@ -244,11 +298,12 @@ CSeqDB::CSeqDB(const string & dbname, ESeqType seqtype, CSeqDBIdSet ids) } } + const bool kUseAtlasLock = true; m_Impl = s_SeqDBInit(dbname, s_GetSeqTypeChar(seqtype), 0, 0, - true, + kUseAtlasLock, pos.GetPointerOrNull(), neg.GetPointerOrNull(), ids); @@ -269,11 +324,12 @@ CSeqDB::CSeqDB(const vector & dbs, "Database name is required."); } + const bool kUseAtlasLock = true; m_Impl = s_SeqDBInit(dbname, s_GetSeqTypeChar(seqtype), 0, 0, - true, + kUseAtlasLock, gi_list); ////m_Impl->Verify(); @@ -292,11 +348,12 @@ CSeqDB::CSeqDB(const string & dbname, "Database name is required."); } + const bool kUseAtlasLock = true; m_Impl = s_SeqDBInit(dbname, s_GetSeqTypeChar(seqtype), oid_begin, oid_end, - use_mmap, + kUseAtlasLock, gi_list); ////m_Impl->Verify(); @@ -318,11 +375,12 @@ CSeqDB::CSeqDB(const vector & dbs, "Database name is required."); } + const bool kUseAtlasLock = true; m_Impl = s_SeqDBInit(dbname, s_GetSeqTypeChar(seqtype), oid_begin, oid_end, - use_mmap, + kUseAtlasLock, gi_list); ////m_Impl->Verify(); @@ -330,7 +388,7 @@ CSeqDB::CSeqDB(const vector & dbs, CSeqDB::CSeqDB() { - m_Impl = new CSeqDBImpl; + m_Impl = new CSeqDBImpl(); ////m_Impl->Verify(); } diff --git a/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp b/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp index 8b5f2a3a..37201c84 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdb_lmdb.cpp 559974 2018-03-16 15:23:03Z fongah2 $ +/* $Id: seqdb_lmdb.cpp 581731 2019-03-05 16:42:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -124,8 +124,8 @@ CBlastLMDBManager::~CBlastLMDBManager() { NON_CONST_ITERATE(list , itr, m_EnvList) { delete *itr; - itr = m_EnvList.erase(itr); } + m_EnvList.clear(); } CSeqDBLMDB::CSeqDBLMDB(const string & fname) @@ -517,6 +517,7 @@ void CSeqDBLMDB::GetOidsForTaxIds(const set & tax_ids, vector& tax_ids, vector } } +void CSeqDBLMDB::GetTaxIdsForOids(const vector & oids, set & tax_ids) const +{ + CMemoryFile oid_file(m_Oid2TaxIdsFile); + CLookupTaxIds lookup(oid_file); + for(unsigned int i=0; i < oids.size(); i++) { + vector taxid_list; + lookup.GetTaxIdListForOid(oids[i], taxid_list); + tax_ids.insert(taxid_list.begin(), taxid_list.end()); + } +} + + string BuildLMDBFileName(const string& basename, bool is_protein, bool use_index, unsigned int index) { if (basename.empty()) { diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbalias.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbalias.cpp index 3803db9d..01d7d013 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbalias.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbalias.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbalias.cpp 536658 2017-05-22 15:48:20Z zaretska $ +/* $Id: seqdbalias.cpp 581731 2019-03-05 16:42:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1491,6 +1491,10 @@ public: m_NeedScan = true; return true; } + if (vars.find("TAXIDLIST") != vars.end()) { + m_NeedScan = true; + return true; + } // If none of those conditions is met, traversal proceeds. return false; @@ -1877,6 +1881,7 @@ void CSeqDBAliasNode::ComputeMasks(bool & has_filters) TVarList::iterator f_oid_iter = m_Values.find(string("FIRST_OID")); TVarList::iterator l_oid_iter = m_Values.find(string("LAST_OID")); TVarList::iterator mbit_iter = m_Values.find(string("MEMB_BIT")); + TVarList::iterator taxid_iter = m_Values.find(string("TAXIDLIST")); if (! m_DBList.empty()) { if (oid_iter != m_Values.end() || @@ -1885,7 +1890,8 @@ void CSeqDBAliasNode::ComputeMasks(bool & has_filters) sil_iter != m_Values.end() || f_oid_iter != m_Values.end() || l_oid_iter != m_Values.end() || - mbit_iter != m_Values.end()) { + mbit_iter != m_Values.end() || + taxid_iter != m_Values.end()) { has_filters = true; @@ -1982,6 +1988,20 @@ void CSeqDBAliasNode::ComputeMasks(bool & has_filters) m_NodeMasks.push_back(mask); } + if (taxid_iter != m_Values.end()) { + const string & taxid_name = taxid_iter->second; + if (taxid_name.find(" ") != taxid_name.npos) { + string msg = string("Alias file (") + m_DBPath.GetDirNameS() + + ") has multiple Tax ids lists (" + taxid_name + ")."; + NCBI_THROW(CSeqDBException, eFileErr, msg); + } + + CSeqDB_FileName lst(taxid_name); + CSeqDB_Path lst_path(m_DBPath, lst); + + CRef mask(new TMask(TMask::eTaxIdList, lst_path)); + m_NodeMasks.push_back(mask); + } } } diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbatlas.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbatlas.cpp index bef4cd3b..f21125e8 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbatlas.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbatlas.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbatlas.cpp 536658 2017-05-22 15:48:20Z zaretska $ +/* $Id: seqdbatlas.cpp 579717 2019-02-05 16:53:45Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -183,32 +183,19 @@ TOut SeqDB_CheckLength(TIn value) return result; } -CSeqDBAtlas::CSeqDBAtlas(bool use_mmap) - :m_CurAlloc (0), - m_LastFID (0), +CSeqDBAtlas::CSeqDBAtlas(bool use_atlas_lock) + :m_UseLock (use_atlas_lock), + m_CurAlloc (0), + m_Alloc (false), m_MaxFileSize (0), m_SearchPath (GenerateSearchPath()) { - m_Alloc = false; m_OpenedFilesCount = 0; m_MaxOpenedFilesCount = 0; } CSeqDBAtlas::~CSeqDBAtlas() { - //int openedFilesCount = GetOpenedFilseCount(); - //cerr << "********Before Cleaning: openedFilesCount=" << openedFilesCount << endl; - for (map::iterator it=m_FileMemMap.begin(); it!=m_FileMemMap.end(); ++it) { - string filename = it->first; - it->second->Unmap(); - //ChangeOpenedFilseCount(eFileCounterDecrement); - //cerr << "********Cleaning:Unmap CMemoryFile:" << filename << endl; - delete it->second; - } - //openedFilesCount = GetOpenedFilseCount(); - //cerr << "********After Cleaning: openedFilesCount=" << openedFilesCount << " maxOpenedFilesCount="<< m_MaxOpenedFilesCount << endl; - - // For now, and maybe permanently, enforce balance. _ASSERT(m_Pool.size() == 0); @@ -216,13 +203,36 @@ CSeqDBAtlas::~CSeqDBAtlas() // Erase 'manually allocated' elements - In debug mode, this will // not execute, because of the above test. - for(TPoolIter i = m_Pool.begin(); i != m_Pool.end(); i++) { + for(auto i = m_Pool.begin(); i != m_Pool.end(); i++) { delete[] (char*)((*i).first); } m_Pool.clear(); } +CMemoryFile* CSeqDBAtlas::GetMemoryFile(const string& fileName) +{ + { + std::lock_guard guard(m_FileMemMapMutex); + auto it = m_FileMemMap.find(fileName); + if (it != m_FileMemMap.end()) + return it->second.get(); + } + + unique_ptr file(new CMemoryFile(fileName)); + + { + std::lock_guard guard(m_FileMemMapMutex); + auto it = m_FileMemMap.find(fileName); + if (it != m_FileMemMap.end()) + return it->second.get(); + CMemoryFile* memFile = file.release(); + m_FileMemMap[fileName].reset(memFile); + ChangeOpenedFilseCount(CSeqDBAtlas::eFileCounterIncrement); + return memFile; + } +} + bool CSeqDBAtlas::DoesFileExist(const string & fname, CSeqDBLockHold & locked) { TIndx length(0); @@ -239,37 +249,40 @@ bool CSeqDBAtlas::GetFileSize(const string & fname, return GetFileSizeL(fname, length); } -bool CSeqDBAtlas::GetFileSizeL(const string & fname, - TIndx & length) +bool CSeqDBAtlas::GetFileSizeL(const string & fname, TIndx &length) { + { + std::lock_guard guard(m_FileSizeMutex); + auto it = m_FileSize.find(fname); + if (it != m_FileSize.end()) { + length = it->second.second; + return it->second.first; + } + } - // Fields: file-exists, file-length - pair data; - - map< string, pair >::iterator i = - m_FileSize.find(fname); + pair val; + CFile whole(fname); + Int8 file_length = whole.GetLength(); - if (i == m_FileSize.end()) { - CFile whole(fname); - Int8 file_length = whole.GetLength(); + if (file_length >= 0) { + val.first = true; + val.second = SeqDB_CheckLength(file_length); + } + else { + val.first = false; + val.second = 0; + } - if (file_length >= 0) { - data.first = true; - data.second = SeqDB_CheckLength(file_length); - if ((Uint8)file_length > m_MaxFileSize) m_MaxFileSize = file_length; - } else { - data.first = false; - data.second = 0; - } + { + std::lock_guard guard(m_FileSizeMutex); + m_FileSize[fname] = val; - m_FileSize[fname] = data; - } else { - data = (*i).second; + if (file_length >= 0 && (Uint8)file_length > m_MaxFileSize) + m_MaxFileSize = file_length; } - - length = data.second; - return data.first; + length = val.second; + return val.first; } /// Simple idiom for RIIA with malloc + free. @@ -378,7 +391,7 @@ void CSeqDBAtlas::Free(const char * freeme, CSeqDBLockHold & locked) bool CSeqDBAtlas::x_Free(const char * freeme) { if(!m_Alloc) return true; - TPoolIter i = m_Pool.find((const char*) freeme); + auto i = m_Pool.find((const char*) freeme); if (i == m_Pool.end()) { return false; @@ -423,20 +436,36 @@ void CSeqDBAtlas::UnregisterExternal(CSeqDBMemReg & memreg) -CSeqDBAtlasHolder::CSeqDBAtlasHolder(bool use_mmap, - CSeqDBLockHold * lockedp) +CSeqDBAtlasHolder::CSeqDBAtlasHolder(CSeqDBLockHold * lockedp, + bool use_atlas_lock) { {{ CFastMutexGuard guard(m_Lock); if (m_Count == 0) { - m_Atlas = new CSeqDBAtlas(use_mmap); + m_Atlas = new CSeqDBAtlas(use_atlas_lock); + } + m_Count ++; + }} +} + + +// FIXME: This constrctor is deprecated +CSeqDBAtlasHolder::CSeqDBAtlasHolder(bool use_atlas_lock, + CSeqDBLockHold* locdep) +{ + {{ + CFastMutexGuard guard(m_Lock); + + if (m_Count == 0) { + m_Atlas = new CSeqDBAtlas(use_atlas_lock); } m_Count ++; }} } + DEFINE_CLASS_STATIC_FAST_MUTEX(CSeqDBAtlasHolder::m_Lock); CSeqDBAtlasHolder::~CSeqDBAtlasHolder() diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbcol.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbcol.cpp index f8fe1dd1..c8062e97 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbcol.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbcol.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbcol.cpp 536658 2017-05-22 15:48:20Z zaretska $ +/* $Id: seqdbcol.cpp 579716 2019-02-05 16:53:21Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -103,7 +103,7 @@ CSeqDBColumn::CSeqDBColumn(const string & basename, const string & index_extn, const string & data_extn, CSeqDBLockHold * lockedp) - : m_AtlasHolder (true, lockedp), + : m_AtlasHolder (lockedp, true), m_Atlas (m_AtlasHolder.Get()), m_IndexFile (m_Atlas), m_DataFile (m_Atlas), diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp index 9503e41a..efdca535 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbcommon.cpp 559887 2018-03-15 17:48:06Z fongah2 $ +/* $Id: seqdbcommon.cpp 581731 2019-03-05 16:42:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -545,6 +545,20 @@ public: } }; +class CSeqDB_SortPigLessThan { +public: + /// Test whether lhs is less than (occurs before) rhs. + /// @param lhs Left hand side of less-than operator. [in] + /// @param rhs Right hand side of less-than operator. [in] + /// @return True if lhs has a lower GI than rhs. + int operator()(const CSeqDBGiList::SPigOid & lhs, + const CSeqDBGiList::SPigOid & rhs) + { + return lhs.pig < rhs.pig; + } +}; + + /// Compare SGiOid structs by GI. class CSeqDB_SortTiLessThan { @@ -627,6 +641,7 @@ void CSeqDBGiList::InsureOrder(ESortOrder order) s_InsureOrder(m_GisOids); s_InsureOrder(m_TisOids); s_InsureOrder(m_SisOids); + s_InsureOrder(m_PigsOids); break; default: @@ -767,6 +782,16 @@ CSeqDBGiList::GetGiList(vector& gis) const } } +void +CSeqDBGiList::GetPigList(vector& pigs) const +{ + pigs.clear(); + pigs.reserve(GetNumPigs()); + + ITERATE(vector, itr, m_PigsOids) { + pigs.push_back(itr->pig); + } +} void CSeqDBGiList::GetTiList(vector& tis) const @@ -865,6 +890,41 @@ bool s_SeqDB_IsBinaryNumericList(const char* fbeginp, const char* fendp, return retval; } +int s_ReadDigit(const char d, const string & list_type) +{ + switch(d) { + case '0': + return 0; + case '1': + return 1; + case '2': + return 2; + case '3': + return 3; + case '4': + return 4; + case '5': + return 5; + case '6': + return 6; + case '7': + return 7; + case '8': + return 8; + case '9': + return 9; + case ' ': + case '\n': + case '\r': + return -1; + default: + { + string msg = string("Invalid byte in text" + list_type + " list [") + + NStr::UIntToString((unsigned char) d) + "]."; + NCBI_THROW(CSeqDBException, eFileErr, msg); + } + } +} void SeqDB_ReadMemoryGiList(const char * fbeginp, const char * fendp, @@ -930,70 +990,135 @@ void SeqDB_ReadMemoryGiList(const char * fbeginp, gis.reserve((int) (file_size / 7)); Uint4 elem(0); + const string list_type("GI"); for(const char * p = fbeginp; p < fendp; p ++) { - Uint4 dig = 0; + int dig = s_ReadDigit(*p, list_type); + if (dig == -1) { + if (elem != 0) { + gis.push_back(GI_FROM(Uint4, elem)); + } + elem = 0; + continue; + } + elem *= 10; + elem += dig; + } + } +} - switch(*p) { - case '0': - dig = 0; - break; +void SeqDB_ReadMemoryPigList(const char * fbeginp, + const char * fendp, + vector & pigs, + bool * in_order) +{ + bool long_ids = false; + Int8 file_size = fendp - fbeginp; - case '1': - dig = 1; - break; + if (s_SeqDB_IsBinaryNumericList(fbeginp, fendp, long_ids)) { + Uint4* bbeginp = (Uint4*) fbeginp; + Uint4* bendp = (Uint4*) fendp; - case '2': - dig = 2; - break; + Int4 num_pigs = (Int4) (bendp - bbeginp) - 2; - case '3': - dig = 3; - break; + pigs.clear(); - case '4': - dig = 4; - break; + if (((bendp - bbeginp) < 2U) + || (bbeginp[0] != 0xFFFFFFFFU) + || (SeqDB_GetStdOrd(bbeginp + 1) != (Uint4) num_pigs)) { + NCBI_THROW(CSeqDBException, + eFileErr, + "Specified file is not a valid binary IPG file."); + } - case '5': - dig = 5; - break; + pigs.reserve(num_pigs); - case '6': - dig = 6; - break; + if (in_order) { + TPig prev_pig = 0; + bool sorted = true; - case '7': - dig = 7; - break; + Uint4* elem = bbeginp + 2; + while(elem < bendp) { + TPig this_pig = SeqDB_GetStdOrd(elem); + pigs.push_back(this_pig); - case '8': - dig = 8; - break; + if (prev_pig > this_pig) { + sorted = false; + break; + } + prev_pig = this_pig; + elem++; + } - case '9': - dig = 9; - break; + while(elem < bendp) { + pigs.push_back(SeqDB_GetStdOrd(elem++)); + } - case '#': - case '\n': - case '\r': + *in_order = sorted; + } else { + for(Uint4 * elem = (bbeginp + 2); elem < bendp; ++elem) { + pigs.push_back(SeqDB_GetStdOrd(elem)); + } + } + } else { + pigs.reserve((int) (file_size / 7)); + + Uint4 elem(0); + const string list_type("IPG"); + + for(const char * p = fbeginp; p < fendp; p ++) { + int dig = s_ReadDigit(*p, list_type); + if (dig == -1) { // Skip blank lines or comments by ignoring zero. if (elem != 0) { - gis.push_back(GI_FROM(Uint4, elem)); + pigs.push_back(elem); } elem = 0; continue; + } + elem *= 10; + elem += dig; + } + } +} + +void SeqDB_ReadMemoryTaxIdList(const char * fbeginp, + const char * fendp, + CSeqDBGiList::STaxIdsOids & taxids) +{ + bool long_ids = false; + if (s_SeqDB_IsBinaryNumericList(fbeginp, fendp, long_ids)) { + Int4* bbeginp = (Int4*) fbeginp; + Int4* bendp = (Int4*) fendp; + + Int4 num_taxids = (Int4) (bendp - bbeginp) - 2; + + taxids.tax_ids.clear(); + taxids.oids.clear(); + + if (((bendp - bbeginp) < 2) || (bbeginp[0] != 0xFFFFFFFF) + || (SeqDB_GetStdOrd(bbeginp + 1) != (Int4) num_taxids)) { + NCBI_THROW(CSeqDBException, eFileErr, + "Specified file is not a valid binary Tax Id List file."); + } + + for(Int4 * elem = (bbeginp + 2); elem < bendp; ++elem) { + taxids.tax_ids.insert(SeqDB_GetStdOrd(elem)); + } + } else { + Int4 elem(0); + const string list_type("TAXID"); - default: - { - string msg = string("Invalid byte in text GI list [") + - NStr::UIntToString((unsigned char)(*p)) + " at location " + - NStr::NumericToString(p-fbeginp) + "]."; - NCBI_THROW(CSeqDBException, eFileErr, msg); + for(const char * p = fbeginp; p < fendp; p ++) { + int dig = s_ReadDigit(*p, list_type); + if (dig == -1) { + // Skip blank lines or comments by ignoring zero. + if (elem != 0) { + taxids.tax_ids.insert(elem); } + elem = 0; + continue; } - elem *= 10; elem += dig; } @@ -1144,71 +1269,17 @@ void SeqDB_ReadMemoryTiList(const char * fbeginp, tis.reserve(int(file_size / 7)); Int8 elem(0); + const string list_type("TI"); for(const char * p = fbeginp; p < fendp; p ++) { - Uint4 dig = 0; - - switch(*p) { - case '0': - dig = 0; - break; - - case '1': - dig = 1; - break; - - case '2': - dig = 2; - break; - - case '3': - dig = 3; - break; - - case '4': - dig = 4; - break; - - case '5': - dig = 5; - break; - - case '6': - dig = 6; - break; - - case '7': - dig = 7; - break; - - case '8': - dig = 8; - break; - - case '9': - dig = 9; - break; - - case '#': - case '\n': - case '\r': - // Skip blank lines and comments by ignoring zero. + int dig = s_ReadDigit(*p, list_type); + if (dig == -1) { if (elem != 0) { tis.push_back(elem); } elem = 0; continue; - - default: - { - string msg = string("Invalid byte in text TI list [") + - NStr::UIntToString((unsigned char)(*p)) + " at location " + - NStr::NumericToString(p-fbeginp) + "]."; - - NCBI_THROW(CSeqDBException, eFileErr, msg); - } } - elem *= 10; elem += dig; } @@ -1371,6 +1442,28 @@ void SeqDB_ReadMixList(const string & fname, vector & gis, SeqDB_ReadMemoryMixList(fbeginp, fendp, gis, tis, sis, in_order); } +void SeqDB_ReadPigList(const string & fname, vector & pigs, bool * in_order) +{ + CMemoryFile mfile(SeqDB_MakeOSPath(fname)); + + Int8 file_size = mfile.GetSize(); + const char * fbeginp = (char*) mfile.GetPtr(); + const char * fendp = fbeginp + file_size; + + SeqDB_ReadMemoryPigList(fbeginp, fendp, pigs, in_order); +} + +void SeqDB_ReadTaxIdList(const string & fname, CSeqDBGiList::STaxIdsOids & taxids) +{ + CMemoryFile mfile(SeqDB_MakeOSPath(fname)); + + Int8 file_size = mfile.GetSize(); + const char * fbeginp = (char*) mfile.GetPtr(); + const char * fendp = fbeginp + file_size; + + SeqDB_ReadMemoryTaxIdList(fbeginp, fendp, taxids); +} + void SeqDB_ReadGiList(const string & fname, vector & gis, bool * in_order) { typedef vector TPairList; @@ -1592,6 +1685,13 @@ CSeqDBFileGiList::CSeqDBFileGiList(const string & fname, EIdType idtype) case eMixList: SeqDB_ReadMixList(fname, m_GisOids, m_TisOids, m_SisOids, & in_order); break; + case ePigList: + SeqDB_ReadPigList(fname, m_PigsOids, & in_order); + break; + case eTaxIdList: + SeqDB_ReadTaxIdList(fname, m_TaxIdsOids); + in_order = true; + break; } m_CurrentOrder = in_order ? eGi : eNone; } diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbfilter.hpp b/c++/src/objtools/blast/seqdb_reader/seqdbfilter.hpp index bbbd0ec5..a8f4f585 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbfilter.hpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbfilter.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS_SEQDB__SEQDBFILTER_HPP #define OBJTOOLS_READERS_SEQDB__SEQDBFILTER_HPP -/* $Id: seqdbfilter.hpp 515860 2016-10-06 12:29:07Z camacho $ +/* $Id: seqdbfilter.hpp 581731 2019-03-05 16:42:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -52,12 +52,13 @@ class CSeqDB_AliasMask : public CObject { public: /// Various types of masking. enum EMaskType { - eGiList, ///< GI list. - eTiList, ///< TI list. - eSiList, ///< SI list. - eOidList, ///< OID list. - eOidRange,///< OID Range [start, end). - eMemBit ///< MEMBIT filter. + eGiList, ///< GI list. + eTiList, ///< TI list. + eSiList, ///< SI list. + eOidList, ///< OID list. + eOidRange, ///< OID Range [start, end). + eMemBit, ///< MEMBIT filter. + eTaxIdList ///< Taxonomay Id List. }; /// Constructor for file-based filtering. @@ -108,7 +109,8 @@ public: case eOidRange: p="eOidRange"; r = true; break; - case eMemBit: p="eMemBit"; break; + case eMemBit: p="eMemBit"; break; + case eTaxIdList: p="eTaxIdList"; break; } string oss; diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp index 09c493db..60144025 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbgilistset.cpp 553487 2017-12-18 14:23:38Z fongah2 $ +/* $Id: seqdbgilistset.cpp 579228 2019-01-31 16:32:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -110,6 +110,10 @@ public: break; case CSeqDBGiListSet::eSiList: SeqDB_ReadMemorySiList(fbeginp, fendp, m_SisOids, & in_order); + break; + case CSeqDBGiListSet::ePigList: + SeqDB_ReadMemoryPigList(fbeginp, fendp, m_PigsOids, & in_order); + break; } if (in_order) { @@ -160,19 +164,13 @@ void s_VerifySeqidlist(const SBlastSeqIdListInfo & list_info, const CSeqDBVolSet return; } - -CSeqDBGiListSet::CSeqDBGiListSet(CSeqDBAtlas & atlas, - const CSeqDBVolSet & volset, - CRef user_list, - CRef neg_list, - CSeqDBLockHold & locked, - const CSeqDBLMDBSet & lmdb_set) - : m_Atlas (atlas), - m_UserList (user_list), - m_NegativeList (neg_list) +void +CSeqDBGiListSet::x_ResolvePositiveList(CSeqDBAtlas & atlas, + const CSeqDBVolSet & volset, + CRef user_list, + CSeqDBLockHold & locked, + const CSeqDBLMDBSet & lmdb_set) { - _ASSERT(user_list.Empty() || neg_list.Empty()); - if (m_UserList.NotEmpty() && m_UserList->NotEmpty()) { if(user_list->GetNumSis() > 0) { s_VerifySeqidlist(user_list->GetListInfo(), volset, lmdb_set); @@ -196,7 +194,8 @@ CSeqDBGiListSet::CSeqDBGiListSet(CSeqDBAtlas & atlas, set & tax_ids = user_list->GetTaxIdsList(); lmdb_set.TaxIdsToOids(tax_ids, oids); } - if((user_list->GetNumGis() == 0) && (user_list->GetNumTis() == 0)) { + if((user_list->GetNumGis() == 0) && (user_list->GetNumTis() == 0) && + (user_list->GetNumPigs() == 0)) { return; } } @@ -237,7 +236,18 @@ CSeqDBGiListSet::CSeqDBGiListSet(CSeqDBAtlas & atlas, vol->Vol()->IdsToOids(*m_UserList, locked); } - } else if (m_NegativeList.NotEmpty() && m_NegativeList->NotEmpty()) { + } +} + + +void +CSeqDBGiListSet::x_ResolveNegativeList(CSeqDBAtlas & atlas, + const CSeqDBVolSet & volset, + CRef neg_list, + CSeqDBLockHold & locked, + const CSeqDBLMDBSet & lmdb_set) +{ + if (m_NegativeList.NotEmpty() && m_NegativeList->NotEmpty()) { // We don't bother to sort these since every ISAM mapping must // be examined for the negative ID list case. if(m_NegativeList->GetNumSis() > 0) { @@ -261,7 +271,8 @@ CSeqDBGiListSet::CSeqDBGiListSet(CSeqDBAtlas & atlas, lmdb_set.NegativeTaxIdsToOids(tax_ids, oids); } - if((m_NegativeList->GetNumGis() == 0) && (m_NegativeList->GetNumTis() == 0)) { + if((m_NegativeList->GetNumGis() == 0) && (m_NegativeList->GetNumTis() == 0) && + (m_NegativeList->GetNumPigs() == 0)) { return; } } @@ -269,6 +280,27 @@ CSeqDBGiListSet::CSeqDBGiListSet(CSeqDBAtlas & atlas, if((m_NegativeList->GetNumSis() > 0) && !(lmdb_set.IsBlastDBVersion5())) { m_NegativeList->PreprocessIdsForISAMSiLookup(); } + + if (m_NegativeList->GetNumPigs() > 0) { + CSeqDBGiList pigs; + pigs.ReservePigs(m_NegativeList->GetNumPigs()); + ITERATE(vector, p, m_NegativeList->GetPigList()){ + pigs.AddPig(*p); + } + for(int v = 0; v < volset.GetNumVols(); v++) { + const CSeqDBVolEntry * vol = volset.GetVolEntry(v); + vol->Vol()->IdsToOids(pigs, locked); + } + + vector & exclude_oid_list = m_NegativeList->SetExcludedOids(); + for(int o=0; o < pigs.GetNumPigs(); o++) { + const CSeqDBGiList::SPigOid & pig = pigs.GetPigOid(o); + if(pig.oid != -1) { + exclude_oid_list.push_back(pig.oid); + } + } + + } for(int i = 0; i < volset.GetNumVols(); i++) { const CSeqDBVolEntry * vol = volset.GetVolEntry(i); @@ -279,6 +311,20 @@ CSeqDBGiListSet::CSeqDBGiListSet(CSeqDBAtlas & atlas, } } +CSeqDBGiListSet::CSeqDBGiListSet(CSeqDBAtlas & atlas, + const CSeqDBVolSet & volset, + CRef user_list, + CRef neg_list, + CSeqDBLockHold & locked, + const CSeqDBLMDBSet & lmdb_set) + : m_Atlas (atlas), + m_UserList (user_list), + m_NegativeList (neg_list) +{ + x_ResolvePositiveList(atlas, volset, user_list, locked, lmdb_set); + x_ResolveNegativeList(atlas, volset, neg_list, locked, lmdb_set); +} + CRef CSeqDBGiListSet::GetNodeIdList(const CSeqDB_Path & filename, const CSeqDBVol * volp, diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.hpp b/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.hpp index 5c0d1cbc..e1a57fa6 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.hpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS_SEQDB__SEQDBGILISTSET_HPP #define OBJTOOLS_READERS_SEQDB__SEQDBGILISTSET_HPP -/* $Id: seqdbgilistset.hpp 553487 2017-12-18 14:23:38Z fongah2 $ +/* $Id: seqdbgilistset.hpp 579228 2019-01-31 16:32:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -61,7 +61,8 @@ public: enum EGiListType { eGiList, eTiList, - eSiList + eSiList, + ePigList }; /// Constructor @@ -153,6 +154,20 @@ private: /// @param gilist The volume GI list. void x_TranslateTisFromUserList(CSeqDBGiList & gilist); + void x_TranslatePigsFromUserList(CSeqDBGiList & gilist); + + void x_ResolveNegativeList(CSeqDBAtlas & atlas, + const CSeqDBVolSet & volset, + CRef neg_list, + CSeqDBLockHold & locked, + const CSeqDBLMDBSet & lmdb_set); + + void x_ResolvePositiveList(CSeqDBAtlas & atlas, + const CSeqDBVolSet & volset, + CRef user_list, + CSeqDBLockHold & locked, + const CSeqDBLMDBSet & lmdb_set); + /// Memory management layer object. CSeqDBAtlas & m_Atlas; @@ -173,6 +188,8 @@ private: /// Map of filenames to alias node specified SI lists. TNodeListMap m_SINodeListMap; + + TNodeListMap m_PIGNodeListMap; }; END_NCBI_SCOPE diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp index 9c8c9154..c2e161c9 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbimpl.cpp 560817 2018-03-27 17:24:45Z rackerst $ +/* $Id: seqdbimpl.cpp 579716 2019-02-05 16:53:21Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -44,11 +44,11 @@ CSeqDBImpl::CSeqDBImpl(const string & db_name_list, char prot_nucl, int oid_begin, int oid_end, - bool use_mmap, CSeqDBGiList * gi_list, CSeqDBNegativeList * neg_list, - CSeqDBIdSet idset) - : m_AtlasHolder (use_mmap, NULL), + CSeqDBIdSet idset, + bool use_atlas_lock) + : m_AtlasHolder (NULL, use_atlas_lock), m_Atlas (m_AtlasHolder.Get()), m_DBNames (db_name_list), m_Aliases (m_Atlas, db_name_list, prot_nucl), @@ -88,7 +88,6 @@ CSeqDBImpl::CSeqDBImpl(const string & db_name_list, m_GiMask.Reset(new CSeqDBGiMask(m_Atlas, mask_list)); } - _ASSERT((! gi_list) || (! neg_list)); m_VolSet.OptimizeGiLists(); @@ -150,8 +149,8 @@ CSeqDBImpl::CSeqDBImpl(const string & db_name_list, CHECK_MARKER(); } -CSeqDBImpl::CSeqDBImpl() - : m_AtlasHolder (false, NULL), +CSeqDBImpl::CSeqDBImpl(bool use_atlas_lock) + : m_AtlasHolder (NULL, use_atlas_lock), m_Atlas (m_AtlasHolder.Get()), m_Aliases (m_Atlas, "", '-'), m_RestrictBegin (0), @@ -1314,12 +1313,26 @@ void CSeqDBImpl::TaxIdsToOids(set& tax_ids, vector& rv) return; } -void CSeqDBImpl::GetDBTaxIds(set & tax_ids) const +void CSeqDBImpl::GetDBTaxIds(set & tax_ids) { CHECK_MARKER(); + CSeqDBLockHold locked(m_Atlas); + + if (! m_OidListSetup) { + x_GetOidList(locked); + } tax_ids.clear(); if (m_LMDBSet.IsBlastDBVersion5()) { - m_LMDBSet.GetDBTaxIds(tax_ids); + if(m_OIDList.NotEmpty()){ + vector oids; + for(int oid = 0; CheckOrFindOID(oid); oid++) { + oids.push_back(oid); + } + m_LMDBSet.GetTaxIdsForOids(oids, tax_ids); + } + else { + m_LMDBSet.GetDBTaxIds(tax_ids); + } } else { NCBI_THROW(CSeqDBException, eArgErr, @@ -1494,7 +1507,8 @@ CSeqDBImpl::FindVolumePaths(const string & dbname, bool recursive, bool expand_links) { - CSeqDBAtlasHolder AH(true, NULL); + bool use_atlas_lock = true; + CSeqDBAtlasHolder AH(NULL, use_atlas_lock); CSeqDBAtlas & atlas(AH.Get()); // This constructor handles its own locking. @@ -1943,7 +1957,7 @@ void CSeqDBImpl::ListColumns(vector & titles) m_VolSet.GetVolNonConst(vol_idx)->ListColumns(all, locked); } - titles.resize(SeqDB_VectorAssign(all, titles)); + titles.assign(all.begin(), all.end()); } int CSeqDBImpl::GetColumnId(const string & title) diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp index 21e04afb..73f2e898 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS_SEQDB__SEQDBIMPL_HPP #define OBJTOOLS_READERS_SEQDB__SEQDBIMPL_HPP -/* $Id: seqdbimpl.hpp 553487 2017-12-18 14:23:38Z fongah2 $ +/* $Id: seqdbimpl.hpp 579716 2019-02-05 16:53:21Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -158,31 +158,30 @@ public: /// found in the OID lists (if any) will be returned. /// @param oid_end /// Iterator will return up to (but not including) this OID. - /// @param use_mmap - /// If kSeqDBMMap is specified (the default), memory mapping is - /// attempted. If kSeqDBNoMMap is specified, memory mapping - /// fails, or this platform does not support it, the less - /// efficient read and write calls are used instead. /// @param gi_list /// If not null, specifies included deflines and OIDs. /// @param neg_list /// If not null, specifies excluded deflines and OIDs. /// @param idset /// Specifies included or excluded deflines and OIDs. + /// @param use_atlas_lock + /// If true, a single atlas lock will be used to protect most of the + /// critical regions. It can be set to false only if each thread + /// accesses a different database volume. CSeqDBImpl(const string & db_name_list, char prot_nucl, int oid_begin, int oid_end, - bool use_mmap, CSeqDBGiList * gi_list, CSeqDBNegativeList * neg_list, - CSeqDBIdSet idset); + CSeqDBIdSet idset, + bool use_atlas_lock); /// Default Constructor /// /// This builds a null version of the CSeqDBImpl object. It is in /// support of the default constructor for the CSeqDBExpert class. - CSeqDBImpl(); + CSeqDBImpl(bool use_atlas_lock = true); /// Destructor ~CSeqDBImpl(); @@ -1064,7 +1063,7 @@ public: /// Get all unique tax ids from db /// @param tax_ids return taxonomy ids in db - void GetDBTaxIds(set & tax_ids) const; + void GetDBTaxIds(set & tax_ids); private: CLASS_MARKER_FIELD("IMPL") diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbisam.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbisam.cpp index af6c3738..96cca224 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbisam.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbisam.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbisam.cpp 536658 2017-05-22 15:48:20Z zaretska $ +/* $Id: seqdbisam.cpp 579218 2019-01-31 16:18:47Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1436,6 +1436,10 @@ void CSeqDBIsam::IdsToOids(int vol_start, x_TranslateGiList(vol_start, ids); break; + case ePigId: + x_TranslateGiList(vol_start, ids); + break; + default: NCBI_THROW(CSeqDBException, eArgErr, diff --git a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp index 55d18058..7074e4b2 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp @@ -92,7 +92,7 @@ void CSeqDBLMDBEntry::x_AdjustOidsOffset(vector & oids) const if (oids[i] != kSeqDBEntryNotFound) { TOid skipped_oids = 0; for(unsigned int j=0;j < m_VolInfo.size(); j++) { - if(oids[i] > m_VolInfo[j].max_oid) { + if(oids[i] >= m_VolInfo[j].max_oid) { skipped_oids += m_VolInfo[j].skipped_oids; } else { @@ -124,7 +124,7 @@ void CSeqDBLMDBEntry::x_AdjustOidsOffset_TaxList(vector & oids) const for(unsigned int i=0; i < oids.size(); i++) { TOid skipped_oids = 0; for(unsigned int j=0;j < m_VolInfo.size(); j++) { - if(oids[i] > m_VolInfo[j].max_oid) { + if(oids[i] >= m_VolInfo[j].max_oid) { skipped_oids += m_VolInfo[j].skipped_oids; } else { @@ -178,6 +178,27 @@ void CSeqDBLMDBEntry::GetDBTaxIds(vector & tax_ids) const m_LMDB->GetDBTaxIds(tax_ids); } +void CSeqDBLMDBEntry::GetTaxIdsForOids(const vector & oids, set & tax_ids) const +{ + if(m_isPartial) { + vector tmp; + TOid skipped_oids = 0; + unsigned int j=0; + for(unsigned int i=0; i < oids.size(); i++) { + while(j < m_VolInfo.size() && + (m_VolInfo[j].skipped_oids > 0 || oids[i] + skipped_oids >= m_VolInfo[j].max_oid)){ + skipped_oids += m_VolInfo[j].skipped_oids; + j++; + } + + tmp.push_back(oids[i] + skipped_oids) ; + } + m_LMDB->GetTaxIdsForOids(tmp, tax_ids); + } + else { + m_LMDB->GetTaxIdsForOids(oids, tax_ids); + } +} CSeqDBLMDBSet::CSeqDBLMDBSet() { @@ -342,4 +363,35 @@ void CSeqDBLMDBSet::GetDBTaxIds(set & tax_ids) const tax_ids.insert(t.begin(), t.end()); } } + + +void CSeqDBLMDBSet::GetTaxIdsForOids(const vector & oids, set & tax_ids) const +{ + if (m_LMDBEntrySet.size() > 1) { + vector t; + int j =0; + for(unsigned int i =0; i < oids.size(); i++){ + if (oids[i] >= m_LMDBEntrySet[j]->GetOIDEnd()){ + if (t.size() > 0){ + set t_set; + m_LMDBEntrySet[j]->GetTaxIdsForOids(t, t_set); + t.clear(); + tax_ids.insert(t_set.begin(), t_set.end()); + } + j++; + } + t.push_back(oids[i] - m_LMDBEntrySet[j]->GetOIDStart()); + } + if (t.size() > 0){ + set t_set; + m_LMDBEntrySet[j]->GetTaxIdsForOids(t, t_set); + tax_ids.insert(t_set.begin(), t_set.end()); + } + } + else { + m_LMDBEntrySet[0]->GetTaxIdsForOids(oids, tax_ids); + } + + +} END_NCBI_SCOPE diff --git a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp index d05a1f6e..0eb441bd 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp @@ -106,6 +106,8 @@ public: void GetDBTaxIds(vector & tax_ids) const; + void GetTaxIdsForOids(const vector & oids, set & tax_ids) const; + private: void x_AdjustOidsOffset(vector & oids) const; void x_AdjustOidsOffset_TaxList(vector & oids) const; @@ -175,6 +177,8 @@ public: void GetDBTaxIds(set & tax_ids) const; + void GetTaxIdsForOids(const vector & oids, set & tax_ids) const; + private: vector > m_LMDBEntrySet; diff --git a/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp b/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp index 9636be7e..d156e129 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdboidlist.cpp 553487 2017-12-18 14:23:38Z fongah2 $ +/* $Id: seqdboidlist.cpp 581734 2019-03-05 16:42:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -99,15 +99,17 @@ void CSeqDBOIDList::x_Setup(const CSeqDBVolSet & volset, } if (lmdb_set.IsBlastDBVersion5() && filters.HasFilter()) { - CSeqDB_BitSet si_bits(0, m_NumOIDs); - si_bits.AssignBitRange(0, m_NumOIDs, true); - x_ComputeFilters(volset, filters, lmdb_set, si_bits); - m_AllBits->IntersectWith(si_bits, true); + CSeqDB_BitSet f_bits(0, m_NumOIDs); + f_bits.AssignBitRange(0, m_NumOIDs, true); + if(x_ComputeFilters(volset, filters, lmdb_set, f_bits, gi_list, neg_list)) { + m_AllBits->IntersectWith(f_bits, true); + } } if (gi_list.NotEmpty()) { x_ApplyUserGiList(*gi_list); - } else if (neg_list.NotEmpty()) { + } + if (neg_list.NotEmpty()) { x_ApplyNegativeList(*neg_list, lmdb_set.IsBlastDBVersion5()); } @@ -217,7 +219,8 @@ CSeqDBOIDList::x_ComputeFilters(const CSeqDB_FilterTree & filters, if ((mask.GetType() == CSeqDB_AliasMask::eOidRange) || (mask.GetType() == CSeqDB_AliasMask::eMemBit) - || (isBlastDBv5 && (mask.GetType() == CSeqDB_AliasMask::eSiList))) { + || (isBlastDBv5 && (mask.GetType() == CSeqDB_AliasMask::eSiList)) + || (mask.GetType() == CSeqDB_AliasMask::eTaxIdList)) { continue; } @@ -254,6 +257,7 @@ CSeqDBOIDList::x_ComputeFilters(const CSeqDB_FilterTree & filters, case CSeqDB_AliasMask::eOidRange: case CSeqDB_AliasMask::eMemBit: + case CSeqDB_AliasMask::eTaxIdList: // these should have been handled in the previous loop. break; @@ -283,61 +287,72 @@ void CSeqDBOIDList::x_ApplyUserGiList(CSeqDBGiList & gis) // included OID, and then scan the vector sequentially. This // technique also uniqifies the set, which is desireable here. - CRef gilist_oids(new CSeqDB_BitSet(0, m_NumOIDs)); int j = 0; - for(j = 0; j < gis.GetNumGis(); j++) { - int oid = gis.GetGiOid(j).oid; - - if ((oid != -1) && (oid < m_NumOIDs)) { - gilist_oids->SetBit(oid); + if (gis.GetNumGis() || gis.GetNumSis() || gis.GetNumTis() || gis.GetNumPigs()){ + CRef gilist_oids(new CSeqDB_BitSet(0, m_NumOIDs)); + if (gis.GetNumGis()) { + for(j = 0; j < gis.GetNumGis(); j++) { + int oid = gis.GetGiOid(j).oid; + if ((oid != -1) && (oid < m_NumOIDs)) { + gilist_oids->SetBit(oid); + } } } - for(j = 0; j < gis.GetNumSis(); j++) { - int oid = gis.GetSiOid(j).oid; - - if ((oid != -1) && (oid < m_NumOIDs)) { - gilist_oids->SetBit(oid); + if(gis.GetNumSis()) { + for(j = 0; j < gis.GetNumSis(); j++) { + int oid = gis.GetSiOid(j).oid; + if ((oid != -1) && (oid < m_NumOIDs)) { + gilist_oids->SetBit(oid); + } } } - for(j = 0; j < gis.GetNumTis(); j++) { - int oid = gis.GetTiOid(j).oid; - - if ((oid != -1) && (oid < m_NumOIDs)) { - gilist_oids->SetBit(oid); + if(gis.GetNumTis()) { + for(j = 0; j < gis.GetNumTis(); j++) { + int oid = gis.GetTiOid(j).oid; + if ((oid != -1) && (oid < m_NumOIDs)) { + gilist_oids->SetBit(oid); + } } } + if(gis.GetNumPigs()) { + for(j = 0; j < gis.GetNumPigs(); j++) { + int oid = gis.GetPigOid(j).oid; + if ((oid != -1) && (oid < m_NumOIDs)) { + gilist_oids->SetBit(oid); + } + } + } + m_AllBits->IntersectWith(*gilist_oids, true); + } const vector & oids_tax = gis.GetOidsForTaxIdsList(); - for(unsigned int k = 0; k < oids_tax.size(); k++) { - if (oids_tax[k] < m_NumOIDs) { - gilist_oids->SetBit(oids_tax[k]); + if(oids_tax.size()) { + CRef taxlist_oids(new CSeqDB_BitSet(0, m_NumOIDs)); + for(unsigned int k = 0; k < oids_tax.size(); k++) { + if (oids_tax[k] < m_NumOIDs) { + taxlist_oids->SetBit(oids_tax[k]); + } } + m_AllBits->IntersectWith(*taxlist_oids, true); } - // Intersect the user GI list with the OID bit map. - m_AllBits->IntersectWith(*gilist_oids, true); } void CSeqDBOIDList::x_ApplyNegativeList(CSeqDBNegativeList & nlist, bool is_v5) { // We require a normalized list in order to turn bits off. - m_AllBits->Normalize(); - if(is_v5 && ((nlist.GetNumSis() > 0 )|| (nlist.GetNumTaxIds() > 0))){ - const vector & excluded_oids = nlist.GetExcludedOids(); - for(unsigned int i=0; i < excluded_oids.size(); i++) { - m_AllBits->ClearBit(excluded_oids[i]); - } - - if(nlist.GetNumGis() == 0 && nlist.GetNumTis() == 0) { - return; - } + m_AllBits->Normalize(); + const vector & excluded_oids = nlist.GetExcludedOids(); + for(unsigned int i=0; i < excluded_oids.size(); i++) { + m_AllBits->ClearBit(excluded_oids[i]); } - //m_Atlas.Lock(locked); + + if((!is_v5 && nlist.GetNumSis() > 0) || nlist.GetNumGis() > 0 || nlist.GetNumTis() > 0) { // Intersect the user GI list with the OID bit map. @@ -363,6 +378,7 @@ void CSeqDBOIDList::x_ApplyNegativeList(CSeqDBNegativeList & nlist, bool is_v5) m_AllBits->ClearBit(oid); } } + } } @@ -459,7 +475,6 @@ CSeqDBOIDList::x_GetOidMask(const CSeqDB_Path & fn, bitend = bitmap + (((num_oids + 31) / 32) * 4); } - CRef bitset(new CSeqDB_BitSet(vol_start, vol_end, bitmap, bitend)); @@ -512,46 +527,93 @@ s_IsOidInFilteredVol(blastdb::TOid oid, vector & exclu return false; } -void -CSeqDBOIDList::x_ComputeFilters(const CSeqDBVolSet & volset, - const CSeqDB_FilterTree & filters, - const CSeqDBLMDBSet & lmdb_set, - CSeqDB_BitSet & si_bit) +void s_AddFilterFile(string & name, const string & vn, vector & fnames, vector > & fnames_vols) { - vector si_fnames; - vector< vector > si_fnames_vols; + unsigned int j=0; + for(; j < fnames.size(); j++) { + if(fnames[j] == name) { + fnames_vols[j].push_back(vn); + break; + } + } + if( fnames.size() == j) { + vector p(1,vn); + fnames.push_back(name); + fnames_vols.push_back(p); + } +} - for(int i = 0; i < volset.GetNumVols(); i++) { - const CSeqDBVolEntry & vol = *(volset.GetVolEntry(i)); - const string & vn = vol.Vol()->GetVolName(); - CRef ft = filters.Specialize(vn); - ITERATE(CSeqDB_FilterTree::TFilters, itr, ft->GetFilters()){ - if((*itr)->GetType() == CSeqDB_AliasMask::eSiList){ - string si_name = (*itr)->GetPath().GetPathS(); - unsigned int j=0; - for(; j < si_fnames.size(); j++) { - if(si_fnames[j] == si_name) { - si_fnames_vols[j].push_back(vn); - break; - } - } - if( si_fnames.size() == j) { - vector p(1,vn); - si_fnames.push_back(si_name); - si_fnames_vols.push_back(p); - } - si_bit.AssignBitRange(vol.OIDStart(), vol.OIDEnd(), false); - } - } +bool s_CompareSeqId(const string & id1, const string & id2) +{ + if (id1 == id2){ + return false; + } + CSeq_id seq_id1(id1, (CSeq_id::fParse_AnyRaw | CSeq_id::fParse_ValidLocal)); + CSeq_id seq_id2(id2, (CSeq_id::fParse_AnyRaw | CSeq_id::fParse_ValidLocal)); + if (seq_id1.Match(seq_id2)) { + return false; } + return (id1 < id2); +} - for(unsigned int k=0; k < si_fnames.size(); k++) { +void s_ProcessSeqIdFilters(const vector & fnames, + vector > & fnames_vols, + CRef user_list, + CRef neg_user_list, + const CSeqDBLMDBSet & lmdb_set, + const CSeqDBVolSet & volset, + CSeqDB_BitSet & filter_bit) +{ + if (fnames.size() == 0) { + return; + } + vector user_accs; + if ((!user_list.Empty()) && (user_list->GetNumSis() > 0)) { + user_list->GetSiList(user_accs); + sort(user_accs.begin(), user_accs.end(), s_CompareSeqId); + } + vector neg_user_accs; + if ((!neg_user_list.Empty()) && (neg_user_list->GetNumSis() > 0)) { + neg_user_accs = neg_user_list->GetSiList(); + sort(neg_user_accs.begin(), neg_user_accs.end()); + } + + for(unsigned int k=0; k < fnames.size(); k++) { vector excluded_vols; - CRef si_list(new CSeqDBFileGiList(si_fnames[k], CSeqDBFileGiList::eSiList)); - s_GetFilteredOidRange(volset, si_fnames_vols[k], excluded_vols, si_list); - vector accs; vector oids; - si_list->GetSiList(accs); + CRef list(new CSeqDBFileGiList(fnames[k], CSeqDBFileGiList::eSiList)); + s_GetFilteredOidRange(volset, fnames_vols[k], excluded_vols, list); + vector accs; + list->GetSiList(accs); + if(accs.size() == 0){ + continue; + } + if((user_accs.size() > 0) || (neg_user_accs.size() > 0)){ + sort(accs.begin(), accs.end(), s_CompareSeqId); + if (user_accs.size() > 0) { + vector common; + common.resize(accs.size()); + vector::iterator itr = set_intersection(accs.begin(), accs.end(), + user_accs.begin(), user_accs.end(), common.begin(), s_CompareSeqId); + common.resize(itr-common.begin()); + if(common.size() == 0){ + continue; + } + swap(accs, common); + } + if(neg_user_accs.size() > 0) { + vector difference; + difference.resize(accs.size()); + vector::iterator itr = set_difference(accs.begin(), accs.end(), + neg_user_accs.begin(), neg_user_accs.end(), difference.begin(), s_CompareSeqId); + difference.resize(itr-difference.begin()); + if(difference.size() == 0){ + continue; + } + swap(accs, difference); + } + } + lmdb_set.AccessionsToOids(accs, oids); for(unsigned int i=0; i < accs.size(); i++) { if(oids[i] == kSeqDBEntryNotFound) { @@ -562,11 +624,121 @@ CSeqDBOIDList::x_ComputeFilters(const CSeqDBVolSet & volset, continue; } } - si_list->SetSiTranslation(i, oids[i]); - si_bit.SetBit(oids[i]); + filter_bit.SetBit(oids[i]); } } - return; +} + +void s_ProcessTaxIdFilters(const vector & fnames, + vector > & fnames_vols, + CRef user_list, + CRef neg_user_list, + const CSeqDBLMDBSet & lmdb_set, + const CSeqDBVolSet & volset, + CSeqDB_BitSet & filter_bit) +{ + if (fnames.size() == 0) { + return; + } + + set user_taxids; + if(!user_list.Empty() && (user_list->GetNumTaxIds() > 0)) { + user_taxids = user_list->GetTaxIdsList(); + } + set neg_user_taxids; + if(!neg_user_list.Empty() && (neg_user_list->GetNumTaxIds() > 0)) { + neg_user_taxids = neg_user_list->GetTaxIdsList(); + } + + for(unsigned int k=0; k < fnames.size(); k++) { + vector excluded_vols; + vector oids; + CRef list(new CSeqDBFileGiList(fnames[k], CSeqDBFileGiList::eTaxIdList)); + s_GetFilteredOidRange(volset, fnames_vols[k], excluded_vols, list); + set taxids; + taxids = list->GetTaxIdsList(); + if(taxids.size() == 0){ + continue; + } + if(user_taxids.size() > 0){ + vector common; + common.resize(taxids.size()); + vector::iterator itr = set_intersection(taxids.begin(), taxids.end(), + user_taxids.begin(), user_taxids.end(), common.begin()); + common.resize(itr-common.begin()); + if( common.size() == 0) { + continue; + } + taxids.clear(); + taxids.insert(common.begin(), common.end()); + } + if(neg_user_taxids.size() > 0) { + vector difference; + difference.resize(taxids.size()); + vector::iterator itr = set_difference(taxids.begin(), taxids.end(), + neg_user_taxids.begin(), neg_user_taxids.end(), difference.begin()); + difference.resize(itr-difference.begin()); + if(difference.size() == 0){ + continue; + } + taxids.clear(); + taxids.insert(difference.begin(), difference.end()); + } + + lmdb_set.TaxIdsToOids(taxids, oids); + for(unsigned int i=0; i < oids.size(); i++) { + if(excluded_vols.size() != 0) { + if (s_IsOidInFilteredVol(oids[i], excluded_vols)) { + continue; + } + } + filter_bit.SetBit(oids[i]); + } + } +} + +bool +CSeqDBOIDList::x_ComputeFilters(const CSeqDBVolSet & volset, + const CSeqDB_FilterTree & filters, + const CSeqDBLMDBSet & lmdb_set, + CSeqDB_BitSet & filter_bit, + CRef user_list, + CRef neg_user_list) +{ + vector seqid_fnames; + vector taxid_fnames; + vector< vector > seqid_fnames_vols; + vector< vector > taxid_fnames_vols; + + for(int i = 0; i < volset.GetNumVols(); i++) { + const CSeqDBVolEntry & vol = *(volset.GetVolEntry(i)); + const string & vn = vol.Vol()->GetVolName(); + CRef ft = filters.Specialize(vn); + ITERATE(CSeqDB_FilterTree::TFilters, itr, ft->GetFilters()){ + if(((*itr)->GetType() == CSeqDB_AliasMask::eSiList) || + ((*itr)->GetType() == CSeqDB_AliasMask::eTaxIdList)) { + string name = (*itr)->GetPath().GetPathS(); + if((*itr)->GetType() == CSeqDB_AliasMask::eSiList) { + s_AddFilterFile(name, vn, seqid_fnames, seqid_fnames_vols); + } + else { + s_AddFilterFile(name, vn, taxid_fnames, taxid_fnames_vols); + } + filter_bit.AssignBitRange(vol.OIDStart(), vol.OIDEnd(), false); + } + } + } + + if (seqid_fnames.size() > 0) { + s_ProcessSeqIdFilters(seqid_fnames, seqid_fnames_vols, user_list, neg_user_list, + lmdb_set, volset, filter_bit); + } + if (taxid_fnames.size() > 0) { + s_ProcessTaxIdFilters(taxid_fnames, taxid_fnames_vols, user_list, neg_user_list, + lmdb_set, volset, filter_bit); + } + + return ((seqid_fnames.size() + taxid_fnames.size()) > 0 ? true:false); } diff --git a/c++/src/objtools/blast/seqdb_reader/seqdboidlist.hpp b/c++/src/objtools/blast/seqdb_reader/seqdboidlist.hpp index 7630a8d8..ca2bd6d8 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdboidlist.hpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdboidlist.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS_SEQDB__SEQDBOIDLIST_HPP #define OBJTOOLS_READERS_SEQDB__SEQDBOIDLIST_HPP -/* $Id: seqdboidlist.hpp 553487 2017-12-18 14:23:38Z fongah2 $ +/* $Id: seqdboidlist.hpp 581731 2019-03-05 16:42:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -274,10 +274,12 @@ private: /// True if db is v5 void x_ApplyNegativeList(CSeqDBNegativeList & neg, bool is_v5); - void x_ComputeFilters(const CSeqDBVolSet & volset, + bool x_ComputeFilters(const CSeqDBVolSet & volset, const CSeqDB_FilterTree & filters, const CSeqDBLMDBSet & lmdb_set, - CSeqDB_BitSet & si_bit); + CSeqDB_BitSet & filter_bit, + CRef user_list, + CRef neg_user_list); /// The memory management layer object. CSeqDBAtlas & m_Atlas; diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp index f786263d..a7a0a307 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbvol.cpp 559232 2018-03-08 14:41:08Z fongah2 $ +/* $Id: seqdbvol.cpp 581731 2019-03-05 16:42:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1881,27 +1881,35 @@ CSeqDBVol::x_GetFilteredHeader(int oid, // Here we must pass both the user-gi and volume-gi test, // for each defline, but not necessarily for each Seq-id. - - if (have_memb && id_filter && defline.CanGetSeqid()) { - have_memb = false; - - bool have_user = false, have_volume = false; - - ITERATE(list< CRef >, seqid, defline.GetSeqid()) { - x_FilterHasId(**seqid, have_user, have_volume); - - if (have_user && have_volume) - break; - } - - have_memb = have_user && have_volume; - } - - if(have_memb && (!m_UserGiList.Empty()) && (m_UserGiList->GetNumTaxIds() > 0)) { - have_memb = s_IncludeDefline_Taxid(defline, m_UserGiList->GetTaxIdsList()); - } - if(have_memb && (!m_NegativeList.Empty()) && (m_NegativeList->GetNumTaxIds() > 0)) { - have_memb = s_IncludeDefline_NegativeTaxid(defline, m_NegativeList->GetTaxIdsList()); + if (have_memb) { + if (id_filter && defline.CanGetSeqid()) { + have_memb = false; + bool have_user = false, have_volume = false; + ITERATE(list< CRef >, seqid, defline.GetSeqid()) { + x_FilterHasId(**seqid, have_user, have_volume); + if (have_user && have_volume) break; + } + have_memb = have_user && have_volume; + } + + if(have_memb && (!m_UserGiList.Empty()) && (m_UserGiList->GetNumTaxIds() > 0)) { + have_memb = s_IncludeDefline_Taxid(defline, m_UserGiList->GetTaxIdsList()); + } + + if (!have_memb && !m_VolumeGiLists.empty()) { + NON_CONST_ITERATE(TGiLists, vtaxid, m_VolumeGiLists) { + if( (*vtaxid)->GetNumTaxIds() > 0) { + have_memb = s_IncludeDefline_Taxid(defline, (*vtaxid)->GetTaxIdsList()); + if(have_memb){ + break; + } + } + } + } + + if(have_memb && (!m_NegativeList.Empty()) && (m_NegativeList->GetNumTaxIds() > 0)) { + have_memb = s_IncludeDefline_NegativeTaxid(defline, m_NegativeList->GetTaxIdsList()); + } } if (! have_memb) { @@ -2194,6 +2202,17 @@ void CSeqDBVol::IdsToOids(CSeqDBGiList & ids, } } + if (ids.GetNumPigs()) { + if (!m_PigFileOpened) x_OpenPigFile(); + if (m_IsamPig.NotEmpty()) { + m_IsamPig->IdsToOids(m_VolStart, m_VolEnd, ids); + } else { + NCBI_THROW(CSeqDBException, + eArgErr, + "IPG list specified but no ISAM file found for IPG in " + m_VolName); + } + } + if (ids.GetNumSis() && (GetLMDBFileName() == kEmptyStr)) { if (!m_StrFileOpened) x_OpenStrFile(); if (m_IsamStr.NotEmpty()) { diff --git a/c++/src/objtools/blast/seqdb_writer/build-alias-index b/c++/src/objtools/blast/seqdb_writer/build-alias-index index 6c642849..e2536c46 100755 --- a/c++/src/objtools/blast/seqdb_writer/build-alias-index +++ b/c++/src/objtools/blast/seqdb_writer/build-alias-index @@ -3,7 +3,7 @@ # subdirectory # Author: Kevin Bealer # Original date: 10/21/2005 -# $URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.8.1/c++/src/objtools/blast/seqdb_writer/build-alias-index $ +# $URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.9.0/c++/src/objtools/blast/seqdb_writer/build-alias-index $ INDEX_NAME=index.alx OUTNAME=index.alx.new diff --git a/c++/src/objtools/blast/seqdb_writer/multisource_util.cpp b/c++/src/objtools/blast/seqdb_writer/multisource_util.cpp index eb983d40..d3045bed 100644 --- a/c++/src/objtools/blast/seqdb_writer/multisource_util.cpp +++ b/c++/src/objtools/blast/seqdb_writer/multisource_util.cpp @@ -1,4 +1,4 @@ -/* $Id: multisource_util.cpp 568561 2018-08-07 18:29:39Z grichenk $ +/* $Id: multisource_util.cpp 580237 2019-02-12 13:07:41Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -95,6 +95,11 @@ string AccessionToKey(const string & acc) } else { GetSeqIdKey(*seqid, str); } + } else { + string tempID = "lcl|" + acc; + if(CheckAccession(tempID, gi, seqid, specific)) { + GetSeqIdKey(*seqid, str); + } } return str; diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/data/WP_009685663.fasta b/c++/src/objtools/blast/seqdb_writer/unit_test/data/WP_009685663.fasta new file mode 100644 index 00000000..86dc20be --- /dev/null +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/data/WP_009685663.fasta @@ -0,0 +1,3 @@ +>gi|497371450|ref|WP_009685663.1| MULTISPECIES: XRE family transcriptional regulator [Pseudomonas]gi|955937162|gb|KSD99966.1| transcriptional regulator [Pseudomonas aeruginosa]gi|956677830|gb|KSL27839.1|pdb|6ES9|A transcriptional regulator [Pseudomonas aeruginosa] +MSKAKKPPLSHNIAFGLVVAELRRGRRMSQDKLSEASNFDRTSISLLERGLRSPTLDTVICLCRALEIPYSQLAVLIEAK +LEQLHAQEKPQPR diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/data/a4WZJ.ASN1 b/c++/src/objtools/blast/seqdb_writer/unit_test/data/a4WZJ.ASN1 new file mode 100755 index 00000000..c2814571 --- /dev/null +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/data/a4WZJ.ASN1 @@ -0,0 +1,28684 @@ +Seq-entry ::= set { + class pdb-entry, + descr { + pdb { + deposition std { + year 2014, + month 11, + day 19 + }, + class "SPLICING", + compound { + "Spliceosomal U4 snRNP core domain" + }, + source { + "Mmdb_id: 161719", + "Pdb_id 1: 4WZJ" + }, + exp-method "X-ray Diffraction" + }, + pub { + pub { + sub { + authors { + names std { + { + name name { + last "Leung", + full "A.K.W.Leung", + initials "A.K.W." + } + }, + { + name name { + last "Nagai", + full "K.Nagai", + initials "K." + } + }, + { + name name { + last "Li", + full "J.Li", + initials "J." + } + } + } + }, + date std { + year 2014, + month 11, + day 19 + } + } + } + }, + pub { + pub { + article { + title { + name "Structure of the spliceosomal U4 snRNP core domain and its + implication for snRNP biogenesis." + }, + authors { + names std { + { + name name { + last "Leung", + full "A.K.Leung", + initials "A.K." + } + }, + { + name name { + last "Nagai", + full "K.Nagai", + initials "K." + } + }, + { + name name { + last "Li", + full "J.Li", + initials "J." + } + } + } + }, + from journal { + title { + iso-jta "Nature", + ml-jta "Nature", + issn "1476-4687", + name "Nature" + }, + imp { + date std { + year 2011, + month 5, + day 26 + }, + volume "473", + issue "7348", + pages "536-539", + language "eng", + pubstatus ppublish, + history { + { + pubstatus received, + date std { + year 2010, + month 9, + day 2 + } + }, + { + pubstatus accepted, + date std { + year 2011, + month 2, + day 17 + } + }, + { + pubstatus other, + date std { + year 2011, + month 4, + day 26, + hour 6, + minute 0 + } + }, + { + pubstatus pubmed, + date std { + year 2011, + month 4, + day 26, + hour 6, + minute 0 + } + }, + { + pubstatus medline, + date std { + year 2011, + month 7, + day 8, + hour 6, + minute 0 + } + } + } + } + }, + ids { + pubmed 21516107, + pii "nature09956", + doi "10.1038/nature09956", + other { + db "pmc", + tag str "PMC3103711" + }, + other { + db "mid", + tag str "UKMS34436" + }, + other { + db "ELocationID doi", + tag str "10.1038/nature09956" + }, + other { + db "PDB Citation Status", + tag str "primary" + } + } + }, + medline { + em std { + year 2011, + month 4, + day 26 + }, + cit { + from journal { + title { + name "Structure of the spliceosomal U4 snRNP core domain and + its implication for snRNP biogenesis" + }, + imp { + date std { + year 2011, + month 5, + day 26 + } + } + } + }, + abstract "The spliceosome is a dynamic macromolecular machine that + assembles on pre-messenger RNA substrates and catalyses the excision of + non-coding intervening sequences (introns). Four of the five major components + of the spliceosome, U1, U2, U4 and U5 small nuclear ribonucleoproteins + (snRNPs), contain seven Sm proteins (SmB/B', SmD1, SmD2, SmD3, SmE, SmF and + SmG) in common. Following export of the U1, U2, U4 and U5 snRNAs to the + cytoplasm, the seven Sm proteins, chaperoned by the survival of motor neurons + (SMN) complex, assemble around a single-stranded, U-rich sequence called the + Sm site in each small nuclear RNA (snRNA), to form the core domain of the + respective snRNP particle. Core domain formation is a prerequisite for + re-import into the nucleus, where these snRNPs mature via addition of their + particle-specific proteins. Here we present a crystal structure of the U4 + snRNP core domain at 3.6 A resolution, detailing how the Sm site heptad + (AUUUUUG) binds inside the central hole of the heptameric ring of Sm + proteins, interacting one-to-one with SmE-SmG-SmD3-SmB-SmD1-SmD2-SmF. An + irregular backbone conformation of the Sm site sequence combined with the + asymmetric structure of the heteromeric protein ring allows each base to + interact in a distinct manner with four key residues at equivalent positions + in the L3 and L5 loops of the Sm fold. A comparison of this structure with + the U1 snRNP at 5.5 A resolution reveals snRNA-dependent structural changes + outside the Sm fold, which may facilitate the binding of particle-specific + proteins that are crucial to biogenesis of spliceosomal snRNPs." + }, + pmid 21516107 + } + }, + pub { + pub { + article { + title { + name "Crystal structures of two Sm protein complexes and their + implications for the assembly of the spliceosomal snRNPs." + }, + authors { + names std { + { + name name { + last "Kambach", + full "C.Kambach", + initials "C." + } + }, + { + name name { + last "Walke", + full "S.Walke", + initials "S." + } + }, + { + name name { + last "Young", + full "R.Young", + initials "R." + } + }, + { + name name { + last "Avis", + full "J.M.Avis", + initials "J.M." + } + }, + { + name name { + last "de la", + full "F.o.de la", + initials "F.o." + } + }, + { + name name { + last "Raker", + full "V.A.Raker", + initials "V.A." + } + }, + { + name name { + last "Luhrmann", + full "R.Luhrmann", + initials "R." + } + }, + { + name name { + last "Li", + full "J.Li", + initials "J." + } + }, + { + name name { + last "Nagai", + full "K.Nagai", + initials "K." + } + } + } + }, + from journal { + title { + iso-jta "Cell", + ml-jta "Cell", + issn "0092-8674", + name "Cell" + }, + imp { + date std { + year 1999, + month 2, + day 5 + }, + volume "96", + issue "3", + pages "375-387", + language "eng", + pubstatus ppublish, + history { + { + pubstatus pubmed, + date std { + year 1999, + month 2, + day 20 + } + }, + { + pubstatus medline, + date std { + year 1999, + month 2, + day 20, + hour 0, + minute 1 + } + }, + { + pubstatus other, + date std { + year 1999, + month 2, + day 20, + hour 0, + minute 0 + } + } + } + } + }, + ids { + pubmed 10025403, + pii "S0092-8674(00)80550-4", + other { + db "PDB Citation Status", + tag str "secondary" + } + } + }, + medline { + em std { + year 1999, + month 2, + day 20 + }, + cit { + from journal { + title { + name "Crystal structures of two Sm protein complexes and their + implications for the assembly of the spliceosomal snRNPs" + }, + imp { + date std { + year 1999, + month 2, + day 5 + } + } + } + }, + abstract "The U1, U2, U4/U6, and U5 small nuclear ribonucleoprotein + particles (snRNPs) involved in pre-mRNA splicing contain seven Sm proteins + (B/B', D1, D2, D3, E, F, and G) in common, which assemble around the Sm site + present in four of the major spliceosomal small nuclear RNAs (snRNAs). These + proteins share a common sequence motif in two segments, Sm1 and Sm2, + separated by a short variable linker. Crystal structures of two Sm protein + complexes, D3B and D1D2, show that these proteins have a common fold + containing an N-terminal helix followed by a strongly bent five-stranded + antiparallel beta sheet, and the D1D2 and D3B dimers superpose closely in + their core regions, including the dimer interfaces. The crystal structures + suggest that the seven Sm proteins could form a closed ring and the snRNAs + may be bound in the positively charged central hole." + }, + pmid 10025403 + } + }, + pub { + pub { + article { + title { + name "RNA binding in an Sm core domain: X-ray structure and + functional analysis of an archaeal Sm protein complex." + }, + authors { + names std { + { + name name { + last "Toro", + full "I.Toro", + initials "I." + } + }, + { + name name { + last "Thore", + full "S.Thore", + initials "S." + } + }, + { + name name { + last "Mayer", + full "C.Mayer", + initials "C." + } + }, + { + name name { + last "Basquin", + full "J.Basquin", + initials "J." + } + }, + { + name name { + last "Seraphin", + full "B.Seraphin", + initials "B." + } + }, + { + name name { + last "Suck", + full "D.Suck", + initials "D." + } + } + } + }, + from journal { + title { + iso-jta "EMBO J.", + ml-jta "EMBO J", + issn "0261-4189", + name "The EMBO journal" + }, + imp { + date std { + year 2001, + month 5, + day 1 + }, + volume "20", + issue "9", + pages "2293-2303", + language "eng", + pubstatus ppublish, + history { + { + pubstatus pubmed, + date std { + year 2001, + month 5, + day 2, + hour 10, + minute 0 + } + }, + { + pubstatus medline, + date std { + year 2001, + month 6, + day 2, + hour 10, + minute 1 + } + }, + { + pubstatus other, + date std { + year 2001, + month 5, + day 2, + hour 10, + minute 0 + } + } + } + } + }, + ids { + pubmed 11331594, + doi "10.1093/emboj/20.9.2293", + other { + db "pmc", + tag str "PMC125243" + }, + other { + db "PDB Citation Status", + tag str "secondary" + } + } + }, + medline { + em std { + year 2001, + month 5, + day 2 + }, + cit { + from journal { + title { + name "RNA binding in an Sm core domain: X-ray structure and + functional analysis of an archaeal Sm protein complex" + }, + imp { + date std { + year 2001, + month 5, + day 1 + } + } + } + }, + abstract "Eukaryotic Sm and Sm-like proteins associate with RNA to + form the core domain of ribonucleoprotein particles involved in pre-mRNA + splicing and other processes. Recently, putative Sm proteins of unknown + function have been identified in Archaea. We show by immunoprecipitation + experiments that the two Sm proteins present in Archaeoglobus fulgidus + (AF-Sm1 and AF-Sm2) associate with RNase P RNA in vivo, suggesting a role in + tRNA processing. The AF-Sm1 protein also interacts specifically with + oligouridylate in vitro. We have solved the crystal structures of this + protein and a complex with RNA. AF-Sm1 forms a seven-membered ring, with the + RNA interacting inside the central cavity on one face of the doughnut-shaped + complex. The bases are bound via stacking and specific hydrogen bonding + contacts in pockets lined by residues highly conserved in archaeal and + eukaryotic Sm proteins, while the phosphates remain solvent accessible. A + comparison with the structures of human Sm protein dimers reveals closely + related monomer folds and intersubunit contacts, indicating that the + architecture of the Sm core domain and RNA binding have been conserved during + evolution." + }, + pmid 11331594 + } + }, + pub { + pub { + article { + title { + name "Crystal structure of human U1 snRNP, a small nuclear + ribonucleoprotein particle, reveals the mechanism of 5' splice site + recognition." + }, + authors { + names std { + { + name name { + last "Kondo", + full "Y.Kondo", + initials "Y." + } + }, + { + name name { + last "Oubridge", + full "C.Oubridge", + initials "C." + } + }, + { + name name { + last "van Roon", + full "A.M.van Roon", + initials "A.M." + } + }, + { + name name { + last "Nagai", + full "K.Nagai", + initials "K." + } + } + } + }, + from journal { + title { + iso-jta "Elife", + ml-jta "Elife", + issn "2050-084X", + name "eLife" + }, + imp { + date std { + year 2015, + month 1, + day 2 + }, + volume "4", + language "eng", + pubstatus epublish, + history { + { + pubstatus received, + date std { + year 2014, + month 10, + day 1 + } + }, + { + pubstatus accepted, + date std { + year 2014, + month 12, + day 6 + } + }, + { + pubstatus other, + date std { + year 2015, + month 1, + day 3, + hour 6, + minute 0 + } + }, + { + pubstatus pubmed, + date std { + year 2015, + month 1, + day 3, + hour 6, + minute 0 + } + }, + { + pubstatus medline, + date std { + year 2015, + month 9, + day 15, + hour 6, + minute 0 + } + } + } + } + }, + ids { + pubmed 25555158, + doi "10.7554/eLife.04986", + other { + db "pmc", + tag str "PMC4383343" + }, + other { + db "ELocationID doi", + tag str "10.7554/eLife.04986" + }, + other { + db "PDB Citation Status", + tag str "secondary" + } + } + }, + medline { + em std { + year 2015, + month 1, + day 3 + }, + cit { + from journal { + title { + name "Crystal structure of human U1 snRNP, a small nuclear + ribonucleoprotein particle, reveals the mechanism of 5' splice site + recognition" + }, + imp { + date std { + year 2015, + month 1, + day 2 + } + } + } + }, + abstract "U1 snRNP binds to the 5' exon-intron junction of pre-mRNA + and thus plays a crucial role at an early stage of pre-mRNA splicing. We + present two crystal structures of engineered U1 sub-structures, which + together reveal at atomic resolution an almost complete network of + protein-protein and RNA-protein interactions within U1 snRNP, and show how + the 5' splice site of pre-mRNA is recognised by U1 snRNP. The zinc-finger of + U1-C interacts with the duplex between pre-mRNA and the 5'-end of U1 snRNA. + The binding of the RNA duplex is stabilized by hydrogen bonds and + electrostatic interactions between U1-C and the RNA backbone around the + splice junction but U1-C makes no base-specific contacts with pre-mRNA. The + structure, together with RNA binding assays, shows that the selection of 5 +'-splice site nucleotides by U1 snRNP is achieved predominantly through + basepairing with U1 snRNA whilst U1-C fine-tunes relative affinities of + mismatched 5'-splice sites. " + }, + pmid 25555158 + } + }, + pub { + pub { + article { + title { + name "Re-refinement of the spliceosomal U4 snRNP core-domain + structure." + }, + authors { + names std { + { + name name { + last "Li", + full "J.Li", + initials "J." + } + }, + { + name name { + last "Leung", + full "A.K.Leung", + initials "A.K." + } + }, + { + name name { + last "Kondo", + full "Y.Kondo", + initials "Y." + } + }, + { + name name { + last "Oubridge", + full "C.Oubridge", + initials "C." + } + }, + { + name name { + last "Nagai", + full "K.Nagai", + initials "K." + } + } + } + }, + from journal { + title { + iso-jta "Acta Crystallogr D Struct Biol", + ml-jta "Acta Crystallogr D Struct Biol", + issn "2059-7983", + name "Acta crystallographica. Section D, Structural biology" + }, + imp { + date std { + year 2016, + month 1 + }, + volume "72", + issue "Pt 1", + pages "131-146", + language "eng", + pubstatus ppublish, + history { + { + pubstatus received, + date std { + year 2015, + month 7, + day 22 + } + }, + { + pubstatus accepted, + date std { + year 2015, + month 11, + day 19 + } + }, + { + pubstatus other, + date std { + year 2016, + month 2, + day 20, + hour 6, + minute 0 + } + }, + { + pubstatus pubmed, + date std { + year 2016, + month 2, + day 20, + hour 6, + minute 0 + } + }, + { + pubstatus medline, + date std { + year 2016, + month 11, + day 9, + hour 6, + minute 0 + } + } + } + } + }, + ids { + pubmed 26894541, + pii "S2059798315022111", + doi "10.1107/S2059798315022111", + other { + db "pmc", + tag str "PMC4756616" + }, + other { + db "ELocationID doi", + tag str "10.1107/S2059798315022111" + }, + other { + db "PDB Citation Status", + tag str "secondary" + } + } + }, + medline { + em std { + year 2016, + month 2, + day 20 + }, + cit { + from journal { + title { + name "Re-refinement of the spliceosomal U4 snRNP core-domain + structure" + }, + imp { + date std { + year 2016, + month 1 + } + } + } + }, + abstract "The core domain of small nuclear ribonucleoprotein + (snRNP), comprised of a ring of seven paralogous proteins bound around a + single-stranded RNA sequence, functions as the assembly nucleus in the + maturation of U1, U2, U4 and U5 spliceosomal snRNPs. The structure of the + human U4 snRNP core domain was initially solved at 3.6 A resolution by + experimental phasing using data with tetartohedral twinning. Molecular + replacement from this model followed by density modification using untwinned + data recently led to a structure of the minimal U1 snRNP at 3.3 A resolution. + With the latter structure providing a search model for molecular replacement, + the U4 core-domain structure has now been re-refined. The U4 Sm site-sequence + AAUUUUU has been shown to bind to the seven Sm proteins + SmF-SmE-SmG-SmD3-SmB-SmD1-SmD2 in an identical manner as the U1 Sm-site + sequence AAUUUGU, except in SmD1 where the bound U replaces G. The + progression from the initial to the re-refined structure exemplifies a + tortuous route to accuracy: where well diffracting crystals of complex + assemblies are initially unavailable, the early model errors are rectified by + exploiting preliminary interpretations in further experiments involving + homologous structures. New insights are obtained from the more accurate model." + }, + pmid 26894541 + } + } + }, + seq-set { + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "A" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 1", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "A" + } + } + }, + { + data psec-str sheet, + comment "strand 1", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "A" + } + } + }, + { + data psec-str sheet, + comment "strand 2", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "A" + } + } + }, + { + data psec-str sheet, + comment "strand 3", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "A" + } + } + }, + { + data psec-str sheet, + comment "strand 4", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "A" + } + } + }, + { + data psec-str sheet, + comment "strand 5", + location int { + from 43, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "A" + } + } + }, + { + data psec-str sheet, + comment "strand 6", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "A" + } + } + }, + { + data psec-str sheet, + comment "strand 7", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "A" + } + } + }, + { + data psec-str sheet, + comment "strand 8", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "A" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "B" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 9", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "B" + } + } + }, + { + data psec-str sheet, + comment "strand 10", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "B" + } + } + }, + { + data psec-str sheet, + comment "strand 11", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "B" + } + } + }, + { + data psec-str sheet, + comment "strand 12", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "B" + } + } + }, + { + data psec-str sheet, + comment "strand 13", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "B" + } + } + }, + { + data psec-str sheet, + comment "strand 14", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "B" + } + } + }, + { + data psec-str sheet, + comment "strand 15", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "B" + } + } + }, + { + data psec-str sheet, + comment "strand 16", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "B" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "C" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 17", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "C" + } + } + }, + { + data psec-str sheet, + comment "strand 18", + location int { + from 21, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "C" + } + } + }, + { + data psec-str sheet, + comment "strand 19", + location int { + from 26, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "C" + } + } + }, + { + data psec-str sheet, + comment "strand 20", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "C" + } + } + }, + { + data psec-str sheet, + comment "strand 21", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "C" + } + } + }, + { + data psec-str sheet, + comment "strand 22", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "C" + } + } + }, + { + data psec-str sheet, + comment "strand 23", + location int { + from 56, + to 61, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "C" + } + } + }, + { + data psec-str sheet, + comment "strand 24", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "C" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "D" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 2", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "D" + } + } + }, + { + data psec-str sheet, + comment "strand 25", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "D" + } + } + }, + { + data psec-str sheet, + comment "strand 26", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "D" + } + } + }, + { + data psec-str sheet, + comment "strand 27", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "D" + } + } + }, + { + data psec-str sheet, + comment "strand 28", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "D" + } + } + }, + { + data psec-str sheet, + comment "strand 29", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "D" + } + } + }, + { + data psec-str sheet, + comment "strand 30", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "D" + } + } + }, + { + data psec-str sheet, + comment "strand 31", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "D" + } + } + }, + { + data psec-str sheet, + comment "strand 32", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "D" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "F" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 3", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "F" + } + } + }, + { + data psec-str sheet, + comment "strand 33", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "F" + } + } + }, + { + data psec-str sheet, + comment "strand 34", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "F" + } + } + }, + { + data psec-str sheet, + comment "strand 35", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "F" + } + } + }, + { + data psec-str sheet, + comment "strand 36", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "F" + } + } + }, + { + data psec-str sheet, + comment "strand 37", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "F" + } + } + }, + { + data psec-str sheet, + comment "strand 38", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "F" + } + } + }, + { + data psec-str sheet, + comment "strand 39", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "F" + } + } + }, + { + data psec-str sheet, + comment "strand 40", + location int { + from 69, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "F" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "E" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 4", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "E" + } + } + }, + { + data psec-str sheet, + comment "strand 41", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "E" + } + } + }, + { + data psec-str sheet, + comment "strand 42", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "E" + } + } + }, + { + data psec-str sheet, + comment "strand 43", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "E" + } + } + }, + { + data psec-str sheet, + comment "strand 44", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "E" + } + } + }, + { + data psec-str sheet, + comment "strand 45", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "E" + } + } + }, + { + data psec-str sheet, + comment "strand 46", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "E" + } + } + }, + { + data psec-str sheet, + comment "strand 47", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "E" + } + } + }, + { + data psec-str sheet, + comment "strand 48", + location int { + from 84, + to 89, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "E" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "G" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 49", + location int { + from 12, + to 17, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "G" + } + } + }, + { + data psec-str sheet, + comment "strand 50", + location int { + from 18, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "G" + } + } + }, + { + data psec-str sheet, + comment "strand 51", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "G" + } + } + }, + { + data psec-str sheet, + comment "strand 52", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "G" + } + } + }, + { + data psec-str sheet, + comment "strand 53", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "G" + } + } + }, + { + data psec-str sheet, + comment "strand 54", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "G" + } + } + }, + { + data psec-str sheet, + comment "strand 55", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "G" + } + } + }, + { + data psec-str sheet, + comment "strand 56", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "G" + } + } + }, + { + data psec-str sheet, + comment "strand 57", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "G" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "H" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 5", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "H" + } + } + }, + { + data psec-str sheet, + comment "strand 58", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "H" + } + } + }, + { + data psec-str sheet, + comment "strand 59", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "H" + } + } + }, + { + data psec-str sheet, + comment "strand 60", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "H" + } + } + }, + { + data psec-str sheet, + comment "strand 61", + location int { + from 38, + to 43, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "H" + } + } + }, + { + data psec-str sheet, + comment "strand 62", + location int { + from 44, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "H" + } + } + }, + { + data psec-str sheet, + comment "strand 63", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "H" + } + } + }, + { + data psec-str sheet, + comment "strand 64", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "H" + } + } + }, + { + data psec-str sheet, + comment "strand 65", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "H" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "I" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 66", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "I" + } + } + }, + { + data psec-str sheet, + comment "strand 67", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "I" + } + } + }, + { + data psec-str sheet, + comment "strand 68", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "I" + } + } + }, + { + data psec-str sheet, + comment "strand 69", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "I" + } + } + }, + { + data psec-str sheet, + comment "strand 70", + location int { + from 43, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "I" + } + } + }, + { + data psec-str sheet, + comment "strand 71", + location int { + from 64, + to 68, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "I" + } + } + }, + { + data psec-str sheet, + comment "strand 72", + location int { + from 69, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "I" + } + } + }, + { + data psec-str sheet, + comment "strand 73", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "I" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "J" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 74", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "J" + } + } + }, + { + data psec-str sheet, + comment "strand 75", + location int { + from 21, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "J" + } + } + }, + { + data psec-str sheet, + comment "strand 76", + location int { + from 26, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "J" + } + } + }, + { + data psec-str sheet, + comment "strand 77", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "J" + } + } + }, + { + data psec-str sheet, + comment "strand 78", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "J" + } + } + }, + { + data psec-str sheet, + comment "strand 79", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "J" + } + } + }, + { + data psec-str sheet, + comment "strand 80", + location int { + from 56, + to 61, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "J" + } + } + }, + { + data psec-str sheet, + comment "strand 81", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "J" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "K" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 6", + location int { + from 18, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "K" + } + } + }, + { + data psec-str helix, + comment "helix 7", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "K" + } + } + }, + { + data psec-str sheet, + comment "strand 82", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "K" + } + } + }, + { + data psec-str sheet, + comment "strand 83", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "K" + } + } + }, + { + data psec-str sheet, + comment "strand 84", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "K" + } + } + }, + { + data psec-str sheet, + comment "strand 85", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "K" + } + } + }, + { + data psec-str sheet, + comment "strand 86", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "K" + } + } + }, + { + data psec-str sheet, + comment "strand 87", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "K" + } + } + }, + { + data psec-str sheet, + comment "strand 88", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "K" + } + } + }, + { + data psec-str sheet, + comment "strand 89", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "K" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "M" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 8", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "M" + } + } + }, + { + data psec-str sheet, + comment "strand 90", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "M" + } + } + }, + { + data psec-str sheet, + comment "strand 91", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "M" + } + } + }, + { + data psec-str sheet, + comment "strand 92", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "M" + } + } + }, + { + data psec-str sheet, + comment "strand 93", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "M" + } + } + }, + { + data psec-str sheet, + comment "strand 94", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "M" + } + } + }, + { + data psec-str sheet, + comment "strand 95", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "M" + } + } + }, + { + data psec-str sheet, + comment "strand 96", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "M" + } + } + }, + { + data psec-str sheet, + comment "strand 97", + location int { + from 69, + to 74, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "M" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "L" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 9", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "L" + } + } + }, + { + data psec-str sheet, + comment "strand 98", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "L" + } + } + }, + { + data psec-str sheet, + comment "strand 99", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "L" + } + } + }, + { + data psec-str sheet, + comment "strand 100", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "L" + } + } + }, + { + data psec-str sheet, + comment "strand 101", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "L" + } + } + }, + { + data psec-str sheet, + comment "strand 102", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "L" + } + } + }, + { + data psec-str sheet, + comment "strand 103", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "L" + } + } + }, + { + data psec-str sheet, + comment "strand 104", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "L" + } + } + }, + { + data psec-str sheet, + comment "strand 105", + location int { + from 84, + to 89, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "L" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "N" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 106", + location int { + from 14, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "N" + } + } + }, + { + data psec-str sheet, + comment "strand 107", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "N" + } + } + }, + { + data psec-str sheet, + comment "strand 108", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "N" + } + } + }, + { + data psec-str sheet, + comment "strand 109", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "N" + } + } + }, + { + data psec-str sheet, + comment "strand 110", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "N" + } + } + }, + { + data psec-str sheet, + comment "strand 111", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "N" + } + } + }, + { + data psec-str sheet, + comment "strand 112", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "N" + } + } + }, + { + data psec-str sheet, + comment "strand 113", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "N" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "O" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 10", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "O" + } + } + }, + { + data psec-str sheet, + comment "strand 114", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "O" + } + } + }, + { + data psec-str sheet, + comment "strand 115", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "O" + } + } + }, + { + data psec-str sheet, + comment "strand 116", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "O" + } + } + }, + { + data psec-str sheet, + comment "strand 117", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "O" + } + } + }, + { + data psec-str sheet, + comment "strand 118", + location int { + from 43, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "O" + } + } + }, + { + data psec-str sheet, + comment "strand 119", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "O" + } + } + }, + { + data psec-str sheet, + comment "strand 120", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "O" + } + } + }, + { + data psec-str sheet, + comment "strand 121", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "O" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "P" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 122", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "P" + } + } + }, + { + data psec-str sheet, + comment "strand 123", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "P" + } + } + }, + { + data psec-str sheet, + comment "strand 124", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "P" + } + } + }, + { + data psec-str sheet, + comment "strand 125", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "P" + } + } + }, + { + data psec-str sheet, + comment "strand 126", + location int { + from 43, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "P" + } + } + }, + { + data psec-str sheet, + comment "strand 127", + location int { + from 64, + to 68, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "P" + } + } + }, + { + data psec-str sheet, + comment "strand 128", + location int { + from 69, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "P" + } + } + }, + { + data psec-str sheet, + comment "strand 129", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "P" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "Q" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 130", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "Q" + } + } + }, + { + data psec-str sheet, + comment "strand 131", + location int { + from 21, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "Q" + } + } + }, + { + data psec-str sheet, + comment "strand 132", + location int { + from 26, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "Q" + } + } + }, + { + data psec-str sheet, + comment "strand 133", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "Q" + } + } + }, + { + data psec-str sheet, + comment "strand 134", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "Q" + } + } + }, + { + data psec-str sheet, + comment "strand 135", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "Q" + } + } + }, + { + data psec-str sheet, + comment "strand 136", + location int { + from 56, + to 61, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "Q" + } + } + }, + { + data psec-str sheet, + comment "strand 137", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "Q" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "R" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 11", + location int { + from 18, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "R" + } + } + }, + { + data psec-str helix, + comment "helix 12", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "R" + } + } + }, + { + data psec-str sheet, + comment "strand 138", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "R" + } + } + }, + { + data psec-str sheet, + comment "strand 139", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "R" + } + } + }, + { + data psec-str sheet, + comment "strand 140", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "R" + } + } + }, + { + data psec-str sheet, + comment "strand 141", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "R" + } + } + }, + { + data psec-str sheet, + comment "strand 142", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "R" + } + } + }, + { + data psec-str sheet, + comment "strand 143", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "R" + } + } + }, + { + data psec-str sheet, + comment "strand 144", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "R" + } + } + }, + { + data psec-str sheet, + comment "strand 145", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "R" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "T" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 13", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "T" + } + } + }, + { + data psec-str sheet, + comment "strand 146", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "T" + } + } + }, + { + data psec-str sheet, + comment "strand 147", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "T" + } + } + }, + { + data psec-str sheet, + comment "strand 148", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "T" + } + } + }, + { + data psec-str sheet, + comment "strand 149", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "T" + } + } + }, + { + data psec-str sheet, + comment "strand 150", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "T" + } + } + }, + { + data psec-str sheet, + comment "strand 151", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "T" + } + } + }, + { + data psec-str sheet, + comment "strand 152", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "T" + } + } + }, + { + data psec-str sheet, + comment "strand 153", + location int { + from 69, + to 74, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "T" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "S" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 14", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "S" + } + } + }, + { + data psec-str sheet, + comment "strand 154", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "S" + } + } + }, + { + data psec-str sheet, + comment "strand 155", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "S" + } + } + }, + { + data psec-str sheet, + comment "strand 156", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "S" + } + } + }, + { + data psec-str sheet, + comment "strand 157", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "S" + } + } + }, + { + data psec-str sheet, + comment "strand 158", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "S" + } + } + }, + { + data psec-str sheet, + comment "strand 159", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "S" + } + } + }, + { + data psec-str sheet, + comment "strand 160", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "S" + } + } + }, + { + data psec-str sheet, + comment "strand 161", + location int { + from 84, + to 89, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "S" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "U" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 162", + location int { + from 14, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "U" + } + } + }, + { + data psec-str sheet, + comment "strand 163", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "U" + } + } + }, + { + data psec-str sheet, + comment "strand 164", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "U" + } + } + }, + { + data psec-str sheet, + comment "strand 165", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "U" + } + } + }, + { + data psec-str sheet, + comment "strand 166", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "U" + } + } + }, + { + data psec-str sheet, + comment "strand 167", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "U" + } + } + }, + { + data psec-str sheet, + comment "strand 168", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "U" + } + } + }, + { + data psec-str sheet, + comment "strand 169", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "U" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AA" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 15", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AA" + } + } + }, + { + data psec-str sheet, + comment "strand 170", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AA" + } + } + }, + { + data psec-str sheet, + comment "strand 171", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AA" + } + } + }, + { + data psec-str sheet, + comment "strand 172", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AA" + } + } + }, + { + data psec-str sheet, + comment "strand 173", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AA" + } + } + }, + { + data psec-str sheet, + comment "strand 174", + location int { + from 43, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AA" + } + } + }, + { + data psec-str sheet, + comment "strand 175", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AA" + } + } + }, + { + data psec-str sheet, + comment "strand 176", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AA" + } + } + }, + { + data psec-str sheet, + comment "strand 177", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AA" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BB" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 178", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BB" + } + } + }, + { + data psec-str sheet, + comment "strand 179", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BB" + } + } + }, + { + data psec-str sheet, + comment "strand 180", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BB" + } + } + }, + { + data psec-str sheet, + comment "strand 181", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BB" + } + } + }, + { + data psec-str sheet, + comment "strand 182", + location int { + from 43, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BB" + } + } + }, + { + data psec-str sheet, + comment "strand 183", + location int { + from 64, + to 68, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BB" + } + } + }, + { + data psec-str sheet, + comment "strand 184", + location int { + from 69, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BB" + } + } + }, + { + data psec-str sheet, + comment "strand 185", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BB" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CC" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 186", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CC" + } + } + }, + { + data psec-str sheet, + comment "strand 187", + location int { + from 21, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CC" + } + } + }, + { + data psec-str sheet, + comment "strand 188", + location int { + from 26, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CC" + } + } + }, + { + data psec-str sheet, + comment "strand 189", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CC" + } + } + }, + { + data psec-str sheet, + comment "strand 190", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CC" + } + } + }, + { + data psec-str sheet, + comment "strand 191", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CC" + } + } + }, + { + data psec-str sheet, + comment "strand 192", + location int { + from 56, + to 61, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CC" + } + } + }, + { + data psec-str sheet, + comment "strand 193", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CC" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DD" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 16", + location int { + from 18, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DD" + } + } + }, + { + data psec-str helix, + comment "helix 17", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DD" + } + } + }, + { + data psec-str sheet, + comment "strand 194", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DD" + } + } + }, + { + data psec-str sheet, + comment "strand 195", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DD" + } + } + }, + { + data psec-str sheet, + comment "strand 196", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DD" + } + } + }, + { + data psec-str sheet, + comment "strand 197", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DD" + } + } + }, + { + data psec-str sheet, + comment "strand 198", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DD" + } + } + }, + { + data psec-str sheet, + comment "strand 199", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DD" + } + } + }, + { + data psec-str sheet, + comment "strand 200", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DD" + } + } + }, + { + data psec-str sheet, + comment "strand 201", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DD" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FF" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 18", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FF" + } + } + }, + { + data psec-str sheet, + comment "strand 202", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FF" + } + } + }, + { + data psec-str sheet, + comment "strand 203", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FF" + } + } + }, + { + data psec-str sheet, + comment "strand 204", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FF" + } + } + }, + { + data psec-str sheet, + comment "strand 205", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FF" + } + } + }, + { + data psec-str sheet, + comment "strand 206", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FF" + } + } + }, + { + data psec-str sheet, + comment "strand 207", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FF" + } + } + }, + { + data psec-str sheet, + comment "strand 208", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FF" + } + } + }, + { + data psec-str sheet, + comment "strand 209", + location int { + from 69, + to 74, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FF" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EE" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 19", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EE" + } + } + }, + { + data psec-str sheet, + comment "strand 210", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EE" + } + } + }, + { + data psec-str sheet, + comment "strand 211", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EE" + } + } + }, + { + data psec-str sheet, + comment "strand 212", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EE" + } + } + }, + { + data psec-str sheet, + comment "strand 213", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EE" + } + } + }, + { + data psec-str sheet, + comment "strand 214", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EE" + } + } + }, + { + data psec-str sheet, + comment "strand 215", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EE" + } + } + }, + { + data psec-str sheet, + comment "strand 216", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EE" + } + } + }, + { + data psec-str sheet, + comment "strand 217", + location int { + from 84, + to 89, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EE" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GG" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 218", + location int { + from 14, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GG" + } + } + }, + { + data psec-str sheet, + comment "strand 219", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GG" + } + } + }, + { + data psec-str sheet, + comment "strand 220", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GG" + } + } + }, + { + data psec-str sheet, + comment "strand 221", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GG" + } + } + }, + { + data psec-str sheet, + comment "strand 222", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GG" + } + } + }, + { + data psec-str sheet, + comment "strand 223", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GG" + } + } + }, + { + data psec-str sheet, + comment "strand 224", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GG" + } + } + }, + { + data psec-str sheet, + comment "strand 225", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GG" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HH" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 20", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HH" + } + } + }, + { + data psec-str sheet, + comment "strand 226", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HH" + } + } + }, + { + data psec-str sheet, + comment "strand 227", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HH" + } + } + }, + { + data psec-str sheet, + comment "strand 228", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HH" + } + } + }, + { + data psec-str sheet, + comment "strand 229", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HH" + } + } + }, + { + data psec-str sheet, + comment "strand 230", + location int { + from 43, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HH" + } + } + }, + { + data psec-str sheet, + comment "strand 231", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HH" + } + } + }, + { + data psec-str sheet, + comment "strand 232", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HH" + } + } + }, + { + data psec-str sheet, + comment "strand 233", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HH" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "II" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 234", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "II" + } + } + }, + { + data psec-str sheet, + comment "strand 235", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "II" + } + } + }, + { + data psec-str sheet, + comment "strand 236", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "II" + } + } + }, + { + data psec-str sheet, + comment "strand 237", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "II" + } + } + }, + { + data psec-str sheet, + comment "strand 238", + location int { + from 43, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "II" + } + } + }, + { + data psec-str sheet, + comment "strand 239", + location int { + from 61, + to 66, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "II" + } + } + }, + { + data psec-str sheet, + comment "strand 240", + location int { + from 67, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "II" + } + } + }, + { + data psec-str sheet, + comment "strand 241", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "II" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJ" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 242", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJ" + } + } + }, + { + data psec-str sheet, + comment "strand 243", + location int { + from 21, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJ" + } + } + }, + { + data psec-str sheet, + comment "strand 244", + location int { + from 26, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJ" + } + } + }, + { + data psec-str sheet, + comment "strand 245", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJ" + } + } + }, + { + data psec-str sheet, + comment "strand 246", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJ" + } + } + }, + { + data psec-str sheet, + comment "strand 247", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJ" + } + } + }, + { + data psec-str sheet, + comment "strand 248", + location int { + from 56, + to 60, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJ" + } + } + }, + { + data psec-str sheet, + comment "strand 249", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJ" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KK" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 21", + location int { + from 14, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KK" + } + } + }, + { + data psec-str helix, + comment "helix 22", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KK" + } + } + }, + { + data psec-str sheet, + comment "strand 250", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KK" + } + } + }, + { + data psec-str sheet, + comment "strand 251", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KK" + } + } + }, + { + data psec-str sheet, + comment "strand 252", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KK" + } + } + }, + { + data psec-str sheet, + comment "strand 253", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KK" + } + } + }, + { + data psec-str sheet, + comment "strand 254", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KK" + } + } + }, + { + data psec-str sheet, + comment "strand 255", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KK" + } + } + }, + { + data psec-str sheet, + comment "strand 256", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KK" + } + } + }, + { + data psec-str sheet, + comment "strand 257", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KK" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MM" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 23", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MM" + } + } + }, + { + data psec-str sheet, + comment "strand 258", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MM" + } + } + }, + { + data psec-str sheet, + comment "strand 259", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MM" + } + } + }, + { + data psec-str sheet, + comment "strand 260", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MM" + } + } + }, + { + data psec-str sheet, + comment "strand 261", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MM" + } + } + }, + { + data psec-str sheet, + comment "strand 262", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MM" + } + } + }, + { + data psec-str sheet, + comment "strand 263", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MM" + } + } + }, + { + data psec-str sheet, + comment "strand 264", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MM" + } + } + }, + { + data psec-str sheet, + comment "strand 265", + location int { + from 69, + to 74, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MM" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LL" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 24", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LL" + } + } + }, + { + data psec-str sheet, + comment "strand 266", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LL" + } + } + }, + { + data psec-str sheet, + comment "strand 267", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LL" + } + } + }, + { + data psec-str sheet, + comment "strand 268", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LL" + } + } + }, + { + data psec-str sheet, + comment "strand 269", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LL" + } + } + }, + { + data psec-str sheet, + comment "strand 270", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LL" + } + } + }, + { + data psec-str sheet, + comment "strand 271", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LL" + } + } + }, + { + data psec-str sheet, + comment "strand 272", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LL" + } + } + }, + { + data psec-str sheet, + comment "strand 273", + location int { + from 84, + to 89, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LL" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NN" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 274", + location int { + from 14, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NN" + } + } + }, + { + data psec-str sheet, + comment "strand 275", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NN" + } + } + }, + { + data psec-str sheet, + comment "strand 276", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NN" + } + } + }, + { + data psec-str sheet, + comment "strand 277", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NN" + } + } + }, + { + data psec-str sheet, + comment "strand 278", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NN" + } + } + }, + { + data psec-str sheet, + comment "strand 279", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NN" + } + } + }, + { + data psec-str sheet, + comment "strand 280", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NN" + } + } + }, + { + data psec-str sheet, + comment "strand 281", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NN" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OO" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 25", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OO" + } + } + }, + { + data psec-str sheet, + comment "strand 282", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OO" + } + } + }, + { + data psec-str sheet, + comment "strand 283", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OO" + } + } + }, + { + data psec-str sheet, + comment "strand 284", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OO" + } + } + }, + { + data psec-str sheet, + comment "strand 285", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OO" + } + } + }, + { + data psec-str sheet, + comment "strand 286", + location int { + from 43, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OO" + } + } + }, + { + data psec-str sheet, + comment "strand 287", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OO" + } + } + }, + { + data psec-str sheet, + comment "strand 288", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OO" + } + } + }, + { + data psec-str sheet, + comment "strand 289", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OO" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PP" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 290", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PP" + } + } + }, + { + data psec-str sheet, + comment "strand 291", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PP" + } + } + }, + { + data psec-str sheet, + comment "strand 292", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PP" + } + } + }, + { + data psec-str sheet, + comment "strand 293", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PP" + } + } + }, + { + data psec-str sheet, + comment "strand 294", + location int { + from 43, + to 48, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PP" + } + } + }, + { + data psec-str sheet, + comment "strand 295", + location int { + from 62, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PP" + } + } + }, + { + data psec-str sheet, + comment "strand 296", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PP" + } + } + }, + { + data psec-str sheet, + comment "strand 297", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PP" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQ" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 298", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQ" + } + } + }, + { + data psec-str sheet, + comment "strand 299", + location int { + from 21, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQ" + } + } + }, + { + data psec-str sheet, + comment "strand 300", + location int { + from 26, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQ" + } + } + }, + { + data psec-str sheet, + comment "strand 301", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQ" + } + } + }, + { + data psec-str sheet, + comment "strand 302", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQ" + } + } + }, + { + data psec-str sheet, + comment "strand 303", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQ" + } + } + }, + { + data psec-str sheet, + comment "strand 304", + location int { + from 56, + to 60, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQ" + } + } + }, + { + data psec-str sheet, + comment "strand 305", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQ" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RR" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 26", + location int { + from 18, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RR" + } + } + }, + { + data psec-str helix, + comment "helix 27", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RR" + } + } + }, + { + data psec-str sheet, + comment "strand 306", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RR" + } + } + }, + { + data psec-str sheet, + comment "strand 307", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RR" + } + } + }, + { + data psec-str sheet, + comment "strand 308", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RR" + } + } + }, + { + data psec-str sheet, + comment "strand 309", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RR" + } + } + }, + { + data psec-str sheet, + comment "strand 310", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RR" + } + } + }, + { + data psec-str sheet, + comment "strand 311", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RR" + } + } + }, + { + data psec-str sheet, + comment "strand 312", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RR" + } + } + }, + { + data psec-str sheet, + comment "strand 313", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RR" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TT" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 28", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TT" + } + } + }, + { + data psec-str sheet, + comment "strand 314", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TT" + } + } + }, + { + data psec-str sheet, + comment "strand 315", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TT" + } + } + }, + { + data psec-str sheet, + comment "strand 316", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TT" + } + } + }, + { + data psec-str sheet, + comment "strand 317", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TT" + } + } + }, + { + data psec-str sheet, + comment "strand 318", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TT" + } + } + }, + { + data psec-str sheet, + comment "strand 319", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TT" + } + } + }, + { + data psec-str sheet, + comment "strand 320", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TT" + } + } + }, + { + data psec-str sheet, + comment "strand 321", + location int { + from 69, + to 74, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TT" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SS" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 29", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SS" + } + } + }, + { + data psec-str sheet, + comment "strand 322", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SS" + } + } + }, + { + data psec-str sheet, + comment "strand 323", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SS" + } + } + }, + { + data psec-str sheet, + comment "strand 324", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SS" + } + } + }, + { + data psec-str sheet, + comment "strand 325", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SS" + } + } + }, + { + data psec-str sheet, + comment "strand 326", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SS" + } + } + }, + { + data psec-str sheet, + comment "strand 327", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SS" + } + } + }, + { + data psec-str sheet, + comment "strand 328", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SS" + } + } + }, + { + data psec-str sheet, + comment "strand 329", + location int { + from 84, + to 89, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SS" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UU" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 330", + location int { + from 14, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UU" + } + } + }, + { + data psec-str sheet, + comment "strand 331", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UU" + } + } + }, + { + data psec-str sheet, + comment "strand 332", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UU" + } + } + }, + { + data psec-str sheet, + comment "strand 333", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UU" + } + } + }, + { + data psec-str sheet, + comment "strand 334", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UU" + } + } + }, + { + data psec-str sheet, + comment "strand 335", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UU" + } + } + }, + { + data psec-str sheet, + comment "strand 336", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UU" + } + } + }, + { + data psec-str sheet, + comment "strand 337", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UU" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAA" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 30", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAA" + } + } + }, + { + data psec-str sheet, + comment "strand 338", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAA" + } + } + }, + { + data psec-str sheet, + comment "strand 339", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAA" + } + } + }, + { + data psec-str sheet, + comment "strand 340", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAA" + } + } + }, + { + data psec-str sheet, + comment "strand 341", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAA" + } + } + }, + { + data psec-str sheet, + comment "strand 342", + location int { + from 43, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAA" + } + } + }, + { + data psec-str sheet, + comment "strand 343", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAA" + } + } + }, + { + data psec-str sheet, + comment "strand 344", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAA" + } + } + }, + { + data psec-str sheet, + comment "strand 345", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAA" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBB" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 346", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBB" + } + } + }, + { + data psec-str sheet, + comment "strand 347", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBB" + } + } + }, + { + data psec-str sheet, + comment "strand 348", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBB" + } + } + }, + { + data psec-str sheet, + comment "strand 349", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBB" + } + } + }, + { + data psec-str sheet, + comment "strand 350", + location int { + from 43, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBB" + } + } + }, + { + data psec-str sheet, + comment "strand 351", + location int { + from 64, + to 68, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBB" + } + } + }, + { + data psec-str sheet, + comment "strand 352", + location int { + from 69, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBB" + } + } + }, + { + data psec-str sheet, + comment "strand 353", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBB" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCC" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 354", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCC" + } + } + }, + { + data psec-str sheet, + comment "strand 355", + location int { + from 21, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCC" + } + } + }, + { + data psec-str sheet, + comment "strand 356", + location int { + from 26, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCC" + } + } + }, + { + data psec-str sheet, + comment "strand 357", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCC" + } + } + }, + { + data psec-str sheet, + comment "strand 358", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCC" + } + } + }, + { + data psec-str sheet, + comment "strand 359", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCC" + } + } + }, + { + data psec-str sheet, + comment "strand 360", + location int { + from 56, + to 61, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCC" + } + } + }, + { + data psec-str sheet, + comment "strand 361", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCC" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDD" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 31", + location int { + from 18, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDD" + } + } + }, + { + data psec-str helix, + comment "helix 32", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDD" + } + } + }, + { + data psec-str sheet, + comment "strand 362", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDD" + } + } + }, + { + data psec-str sheet, + comment "strand 363", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDD" + } + } + }, + { + data psec-str sheet, + comment "strand 364", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDD" + } + } + }, + { + data psec-str sheet, + comment "strand 365", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDD" + } + } + }, + { + data psec-str sheet, + comment "strand 366", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDD" + } + } + }, + { + data psec-str sheet, + comment "strand 367", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDD" + } + } + }, + { + data psec-str sheet, + comment "strand 368", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDD" + } + } + }, + { + data psec-str sheet, + comment "strand 369", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDD" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFF" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 33", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFF" + } + } + }, + { + data psec-str sheet, + comment "strand 370", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFF" + } + } + }, + { + data psec-str sheet, + comment "strand 371", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFF" + } + } + }, + { + data psec-str sheet, + comment "strand 372", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFF" + } + } + }, + { + data psec-str sheet, + comment "strand 373", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFF" + } + } + }, + { + data psec-str sheet, + comment "strand 374", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFF" + } + } + }, + { + data psec-str sheet, + comment "strand 375", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFF" + } + } + }, + { + data psec-str sheet, + comment "strand 376", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFF" + } + } + }, + { + data psec-str sheet, + comment "strand 377", + location int { + from 69, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFF" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEE" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 34", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEE" + } + } + }, + { + data psec-str sheet, + comment "strand 378", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEE" + } + } + }, + { + data psec-str sheet, + comment "strand 379", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEE" + } + } + }, + { + data psec-str sheet, + comment "strand 380", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEE" + } + } + }, + { + data psec-str sheet, + comment "strand 381", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEE" + } + } + }, + { + data psec-str sheet, + comment "strand 382", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEE" + } + } + }, + { + data psec-str sheet, + comment "strand 383", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEE" + } + } + }, + { + data psec-str sheet, + comment "strand 384", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEE" + } + } + }, + { + data psec-str sheet, + comment "strand 385", + location int { + from 84, + to 89, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEE" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGG" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 386", + location int { + from 12, + to 17, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGG" + } + } + }, + { + data psec-str sheet, + comment "strand 387", + location int { + from 18, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGG" + } + } + }, + { + data psec-str sheet, + comment "strand 388", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGG" + } + } + }, + { + data psec-str sheet, + comment "strand 389", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGG" + } + } + }, + { + data psec-str sheet, + comment "strand 390", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGG" + } + } + }, + { + data psec-str sheet, + comment "strand 391", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGG" + } + } + }, + { + data psec-str sheet, + comment "strand 392", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGG" + } + } + }, + { + data psec-str sheet, + comment "strand 393", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGG" + } + } + }, + { + data psec-str sheet, + comment "strand 394", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGG" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHH" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 35", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHH" + } + } + }, + { + data psec-str sheet, + comment "strand 395", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHH" + } + } + }, + { + data psec-str sheet, + comment "strand 396", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHH" + } + } + }, + { + data psec-str sheet, + comment "strand 397", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHH" + } + } + }, + { + data psec-str sheet, + comment "strand 398", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHH" + } + } + }, + { + data psec-str sheet, + comment "strand 399", + location int { + from 43, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHH" + } + } + }, + { + data psec-str sheet, + comment "strand 400", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHH" + } + } + }, + { + data psec-str sheet, + comment "strand 401", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHH" + } + } + }, + { + data psec-str sheet, + comment "strand 402", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHH" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "III" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 403", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "III" + } + } + }, + { + data psec-str sheet, + comment "strand 404", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "III" + } + } + }, + { + data psec-str sheet, + comment "strand 405", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "III" + } + } + }, + { + data psec-str sheet, + comment "strand 406", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "III" + } + } + }, + { + data psec-str sheet, + comment "strand 407", + location int { + from 43, + to 48, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "III" + } + } + }, + { + data psec-str sheet, + comment "strand 408", + location int { + from 62, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "III" + } + } + }, + { + data psec-str sheet, + comment "strand 409", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "III" + } + } + }, + { + data psec-str sheet, + comment "strand 410", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "III" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJ" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 411", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 412", + location int { + from 21, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 413", + location int { + from 27, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 414", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 415", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 416", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 417", + location int { + from 56, + to 60, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 418", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJ" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKK" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 36", + location int { + from 19, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKK" + } + } + }, + { + data psec-str helix, + comment "helix 37", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKK" + } + } + }, + { + data psec-str sheet, + comment "strand 419", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKK" + } + } + }, + { + data psec-str sheet, + comment "strand 420", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKK" + } + } + }, + { + data psec-str sheet, + comment "strand 421", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKK" + } + } + }, + { + data psec-str sheet, + comment "strand 422", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKK" + } + } + }, + { + data psec-str sheet, + comment "strand 423", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKK" + } + } + }, + { + data psec-str sheet, + comment "strand 424", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKK" + } + } + }, + { + data psec-str sheet, + comment "strand 425", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKK" + } + } + }, + { + data psec-str sheet, + comment "strand 426", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKK" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMM" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 38", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMM" + } + } + }, + { + data psec-str sheet, + comment "strand 427", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMM" + } + } + }, + { + data psec-str sheet, + comment "strand 428", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMM" + } + } + }, + { + data psec-str sheet, + comment "strand 429", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMM" + } + } + }, + { + data psec-str sheet, + comment "strand 430", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMM" + } + } + }, + { + data psec-str sheet, + comment "strand 431", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMM" + } + } + }, + { + data psec-str sheet, + comment "strand 432", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMM" + } + } + }, + { + data psec-str sheet, + comment "strand 433", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMM" + } + } + }, + { + data psec-str sheet, + comment "strand 434", + location int { + from 69, + to 74, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMM" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLL" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 39", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLL" + } + } + }, + { + data psec-str sheet, + comment "strand 435", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLL" + } + } + }, + { + data psec-str sheet, + comment "strand 436", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLL" + } + } + }, + { + data psec-str sheet, + comment "strand 437", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLL" + } + } + }, + { + data psec-str sheet, + comment "strand 438", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLL" + } + } + }, + { + data psec-str sheet, + comment "strand 439", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLL" + } + } + }, + { + data psec-str sheet, + comment "strand 440", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLL" + } + } + }, + { + data psec-str sheet, + comment "strand 441", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLL" + } + } + }, + { + data psec-str sheet, + comment "strand 442", + location int { + from 84, + to 89, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLL" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNN" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 443", + location int { + from 14, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNN" + } + } + }, + { + data psec-str sheet, + comment "strand 444", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNN" + } + } + }, + { + data psec-str sheet, + comment "strand 445", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNN" + } + } + }, + { + data psec-str sheet, + comment "strand 446", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNN" + } + } + }, + { + data psec-str sheet, + comment "strand 447", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNN" + } + } + }, + { + data psec-str sheet, + comment "strand 448", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNN" + } + } + }, + { + data psec-str sheet, + comment "strand 449", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNN" + } + } + }, + { + data psec-str sheet, + comment "strand 450", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNN" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOO" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 40", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOO" + } + } + }, + { + data psec-str sheet, + comment "strand 451", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOO" + } + } + }, + { + data psec-str sheet, + comment "strand 452", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOO" + } + } + }, + { + data psec-str sheet, + comment "strand 453", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOO" + } + } + }, + { + data psec-str sheet, + comment "strand 454", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOO" + } + } + }, + { + data psec-str sheet, + comment "strand 455", + location int { + from 43, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOO" + } + } + }, + { + data psec-str sheet, + comment "strand 456", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOO" + } + } + }, + { + data psec-str sheet, + comment "strand 457", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOO" + } + } + }, + { + data psec-str sheet, + comment "strand 458", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOO" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPP" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 459", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPP" + } + } + }, + { + data psec-str sheet, + comment "strand 460", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPP" + } + } + }, + { + data psec-str sheet, + comment "strand 461", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPP" + } + } + }, + { + data psec-str sheet, + comment "strand 462", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPP" + } + } + }, + { + data psec-str sheet, + comment "strand 463", + location int { + from 43, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPP" + } + } + }, + { + data psec-str sheet, + comment "strand 464", + location int { + from 64, + to 68, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPP" + } + } + }, + { + data psec-str sheet, + comment "strand 465", + location int { + from 69, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPP" + } + } + }, + { + data psec-str sheet, + comment "strand 466", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPP" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQ" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 467", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 468", + location int { + from 21, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 469", + location int { + from 27, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 470", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 471", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 472", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 473", + location int { + from 56, + to 61, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 474", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQ" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRR" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 41", + location int { + from 18, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRR" + } + } + }, + { + data psec-str helix, + comment "helix 42", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRR" + } + } + }, + { + data psec-str sheet, + comment "strand 475", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRR" + } + } + }, + { + data psec-str sheet, + comment "strand 476", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRR" + } + } + }, + { + data psec-str sheet, + comment "strand 477", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRR" + } + } + }, + { + data psec-str sheet, + comment "strand 478", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRR" + } + } + }, + { + data psec-str sheet, + comment "strand 479", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRR" + } + } + }, + { + data psec-str sheet, + comment "strand 480", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRR" + } + } + }, + { + data psec-str sheet, + comment "strand 481", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRR" + } + } + }, + { + data psec-str sheet, + comment "strand 482", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRR" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTT" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 43", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTT" + } + } + }, + { + data psec-str sheet, + comment "strand 483", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTT" + } + } + }, + { + data psec-str sheet, + comment "strand 484", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTT" + } + } + }, + { + data psec-str sheet, + comment "strand 485", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTT" + } + } + }, + { + data psec-str sheet, + comment "strand 486", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTT" + } + } + }, + { + data psec-str sheet, + comment "strand 487", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTT" + } + } + }, + { + data psec-str sheet, + comment "strand 488", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTT" + } + } + }, + { + data psec-str sheet, + comment "strand 489", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTT" + } + } + }, + { + data psec-str sheet, + comment "strand 490", + location int { + from 69, + to 74, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTT" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSS" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 44", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSS" + } + } + }, + { + data psec-str sheet, + comment "strand 491", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSS" + } + } + }, + { + data psec-str sheet, + comment "strand 492", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSS" + } + } + }, + { + data psec-str sheet, + comment "strand 493", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSS" + } + } + }, + { + data psec-str sheet, + comment "strand 494", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSS" + } + } + }, + { + data psec-str sheet, + comment "strand 495", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSS" + } + } + }, + { + data psec-str sheet, + comment "strand 496", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSS" + } + } + }, + { + data psec-str sheet, + comment "strand 497", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSS" + } + } + }, + { + data psec-str sheet, + comment "strand 498", + location int { + from 84, + to 89, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSS" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUU" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 499", + location int { + from 14, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUU" + } + } + }, + { + data psec-str sheet, + comment "strand 500", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUU" + } + } + }, + { + data psec-str sheet, + comment "strand 501", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUU" + } + } + }, + { + data psec-str sheet, + comment "strand 502", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUU" + } + } + }, + { + data psec-str sheet, + comment "strand 503", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUU" + } + } + }, + { + data psec-str sheet, + comment "strand 504", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUU" + } + } + }, + { + data psec-str sheet, + comment "strand 505", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUU" + } + } + }, + { + data psec-str sheet, + comment "strand 506", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUU" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAAA" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 45", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAAA" + } + } + }, + { + data psec-str sheet, + comment "strand 507", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAAA" + } + } + }, + { + data psec-str sheet, + comment "strand 508", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAAA" + } + } + }, + { + data psec-str sheet, + comment "strand 509", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAAA" + } + } + }, + { + data psec-str sheet, + comment "strand 510", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAAA" + } + } + }, + { + data psec-str sheet, + comment "strand 511", + location int { + from 43, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAAA" + } + } + }, + { + data psec-str sheet, + comment "strand 512", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAAA" + } + } + }, + { + data psec-str sheet, + comment "strand 513", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAAA" + } + } + }, + { + data psec-str sheet, + comment "strand 514", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "AAAA" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBBB" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 515", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBBB" + } + } + }, + { + data psec-str sheet, + comment "strand 516", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBBB" + } + } + }, + { + data psec-str sheet, + comment "strand 517", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBBB" + } + } + }, + { + data psec-str sheet, + comment "strand 518", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBBB" + } + } + }, + { + data psec-str sheet, + comment "strand 519", + location int { + from 43, + to 48, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBBB" + } + } + }, + { + data psec-str sheet, + comment "strand 520", + location int { + from 63, + to 68, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBBB" + } + } + }, + { + data psec-str sheet, + comment "strand 521", + location int { + from 69, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBBB" + } + } + }, + { + data psec-str sheet, + comment "strand 522", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "BBBB" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCCC" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 523", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCCC" + } + } + }, + { + data psec-str sheet, + comment "strand 524", + location int { + from 21, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCCC" + } + } + }, + { + data psec-str sheet, + comment "strand 525", + location int { + from 26, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCCC" + } + } + }, + { + data psec-str sheet, + comment "strand 526", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCCC" + } + } + }, + { + data psec-str sheet, + comment "strand 527", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCCC" + } + } + }, + { + data psec-str sheet, + comment "strand 528", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCCC" + } + } + }, + { + data psec-str sheet, + comment "strand 529", + location int { + from 56, + to 61, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCCC" + } + } + }, + { + data psec-str sheet, + comment "strand 530", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "CCCC" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDDD" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 46", + location int { + from 15, + to 24, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDDD" + } + } + }, + { + data psec-str helix, + comment "helix 47", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDDD" + } + } + }, + { + data psec-str sheet, + comment "strand 531", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDDD" + } + } + }, + { + data psec-str sheet, + comment "strand 532", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDDD" + } + } + }, + { + data psec-str sheet, + comment "strand 533", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDDD" + } + } + }, + { + data psec-str sheet, + comment "strand 534", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDDD" + } + } + }, + { + data psec-str sheet, + comment "strand 535", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDDD" + } + } + }, + { + data psec-str sheet, + comment "strand 536", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDDD" + } + } + }, + { + data psec-str sheet, + comment "strand 537", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDDD" + } + } + }, + { + data psec-str sheet, + comment "strand 538", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "DDDD" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFFF" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 48", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFFF" + } + } + }, + { + data psec-str sheet, + comment "strand 539", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFFF" + } + } + }, + { + data psec-str sheet, + comment "strand 540", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFFF" + } + } + }, + { + data psec-str sheet, + comment "strand 541", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFFF" + } + } + }, + { + data psec-str sheet, + comment "strand 542", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFFF" + } + } + }, + { + data psec-str sheet, + comment "strand 543", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFFF" + } + } + }, + { + data psec-str sheet, + comment "strand 544", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFFF" + } + } + }, + { + data psec-str sheet, + comment "strand 545", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFFF" + } + } + }, + { + data psec-str sheet, + comment "strand 546", + location int { + from 69, + to 74, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "FFFF" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEEE" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 49", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEEE" + } + } + }, + { + data psec-str sheet, + comment "strand 547", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEEE" + } + } + }, + { + data psec-str sheet, + comment "strand 548", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEEE" + } + } + }, + { + data psec-str sheet, + comment "strand 549", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEEE" + } + } + }, + { + data psec-str sheet, + comment "strand 550", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEEE" + } + } + }, + { + data psec-str sheet, + comment "strand 551", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEEE" + } + } + }, + { + data psec-str sheet, + comment "strand 552", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEEE" + } + } + }, + { + data psec-str sheet, + comment "strand 553", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEEE" + } + } + }, + { + data psec-str sheet, + comment "strand 554", + location int { + from 84, + to 89, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "EEEE" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGGG" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 555", + location int { + from 14, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGGG" + } + } + }, + { + data psec-str sheet, + comment "strand 556", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGGG" + } + } + }, + { + data psec-str sheet, + comment "strand 557", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGGG" + } + } + }, + { + data psec-str sheet, + comment "strand 558", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGGG" + } + } + }, + { + data psec-str sheet, + comment "strand 559", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGGG" + } + } + }, + { + data psec-str sheet, + comment "strand 560", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGGG" + } + } + }, + { + data psec-str sheet, + comment "strand 561", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGGG" + } + } + }, + { + data psec-str sheet, + comment "strand 562", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "GGGG" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHHH" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 50", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHHH" + } + } + }, + { + data psec-str sheet, + comment "strand 563", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHHH" + } + } + }, + { + data psec-str sheet, + comment "strand 564", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHHH" + } + } + }, + { + data psec-str sheet, + comment "strand 565", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHHH" + } + } + }, + { + data psec-str sheet, + comment "strand 566", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHHH" + } + } + }, + { + data psec-str sheet, + comment "strand 567", + location int { + from 43, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHHH" + } + } + }, + { + data psec-str sheet, + comment "strand 568", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHHH" + } + } + }, + { + data psec-str sheet, + comment "strand 569", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHHH" + } + } + }, + { + data psec-str sheet, + comment "strand 570", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "HHHH" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "IIII" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 571", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "IIII" + } + } + }, + { + data psec-str sheet, + comment "strand 572", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "IIII" + } + } + }, + { + data psec-str sheet, + comment "strand 573", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "IIII" + } + } + }, + { + data psec-str sheet, + comment "strand 574", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "IIII" + } + } + }, + { + data psec-str sheet, + comment "strand 575", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "IIII" + } + } + }, + { + data psec-str sheet, + comment "strand 576", + location int { + from 61, + to 66, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "IIII" + } + } + }, + { + data psec-str sheet, + comment "strand 577", + location int { + from 67, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "IIII" + } + } + }, + { + data psec-str sheet, + comment "strand 578", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "IIII" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJJ" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 579", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 580", + location int { + from 21, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 581", + location int { + from 27, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 582", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 583", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 584", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 585", + location int { + from 56, + to 61, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJJ" + } + } + }, + { + data psec-str sheet, + comment "strand 586", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "JJJJ" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKKK" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 51", + location int { + from 14, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKKK" + } + } + }, + { + data psec-str helix, + comment "helix 52", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKKK" + } + } + }, + { + data psec-str sheet, + comment "strand 587", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKKK" + } + } + }, + { + data psec-str sheet, + comment "strand 588", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKKK" + } + } + }, + { + data psec-str sheet, + comment "strand 589", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKKK" + } + } + }, + { + data psec-str sheet, + comment "strand 590", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKKK" + } + } + }, + { + data psec-str sheet, + comment "strand 591", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKKK" + } + } + }, + { + data psec-str sheet, + comment "strand 592", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKKK" + } + } + }, + { + data psec-str sheet, + comment "strand 593", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKKK" + } + } + }, + { + data psec-str sheet, + comment "strand 594", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "KKKK" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMMM" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 53", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMMM" + } + } + }, + { + data psec-str sheet, + comment "strand 595", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMMM" + } + } + }, + { + data psec-str sheet, + comment "strand 596", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMMM" + } + } + }, + { + data psec-str sheet, + comment "strand 597", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMMM" + } + } + }, + { + data psec-str sheet, + comment "strand 598", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMMM" + } + } + }, + { + data psec-str sheet, + comment "strand 599", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMMM" + } + } + }, + { + data psec-str sheet, + comment "strand 600", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMMM" + } + } + }, + { + data psec-str sheet, + comment "strand 601", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMMM" + } + } + }, + { + data psec-str sheet, + comment "strand 602", + location int { + from 69, + to 74, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "MMMM" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLLL" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 54", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLLL" + } + } + }, + { + data psec-str sheet, + comment "strand 603", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLLL" + } + } + }, + { + data psec-str sheet, + comment "strand 604", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLLL" + } + } + }, + { + data psec-str sheet, + comment "strand 605", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLLL" + } + } + }, + { + data psec-str sheet, + comment "strand 606", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLLL" + } + } + }, + { + data psec-str sheet, + comment "strand 607", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLLL" + } + } + }, + { + data psec-str sheet, + comment "strand 608", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLLL" + } + } + }, + { + data psec-str sheet, + comment "strand 609", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLLL" + } + } + }, + { + data psec-str sheet, + comment "strand 610", + location int { + from 84, + to 88, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "LLLL" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNNN" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 611", + location int { + from 12, + to 17, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNNN" + } + } + }, + { + data psec-str sheet, + comment "strand 612", + location int { + from 18, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNNN" + } + } + }, + { + data psec-str sheet, + comment "strand 613", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNNN" + } + } + }, + { + data psec-str sheet, + comment "strand 614", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNNN" + } + } + }, + { + data psec-str sheet, + comment "strand 615", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNNN" + } + } + }, + { + data psec-str sheet, + comment "strand 616", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNNN" + } + } + }, + { + data psec-str sheet, + comment "strand 617", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNNN" + } + } + }, + { + data psec-str sheet, + comment "strand 618", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNNN" + } + } + }, + { + data psec-str sheet, + comment "strand 619", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "NNNN" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOOO" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D3", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 125, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 125, + seq-data ncbieaa "MSIGVPIKVLHEAEGHIVTCETNTGEVYRGKLIEAEDNMNCQMSNITVTYRD +GRVAQLEQVYIRGSKIRFLILPDMLKNAPMLKSMKNKNQGSGAGRGKAAILKAQVAARGRGRGMGRGNIFQKR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 55", + location int { + from 4, + to 12, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOOO" + } + } + }, + { + data psec-str sheet, + comment "strand 620", + location int { + from 14, + to 22, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOOO" + } + } + }, + { + data psec-str sheet, + comment "strand 621", + location int { + from 24, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOOO" + } + } + }, + { + data psec-str sheet, + comment "strand 622", + location int { + from 29, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOOO" + } + } + }, + { + data psec-str sheet, + comment "strand 623", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOOO" + } + } + }, + { + data psec-str sheet, + comment "strand 624", + location int { + from 43, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOOO" + } + } + }, + { + data psec-str sheet, + comment "strand 625", + location int { + from 52, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOOO" + } + } + }, + { + data psec-str sheet, + comment "strand 626", + location int { + from 58, + to 64, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOOO" + } + } + }, + { + data psec-str sheet, + comment "strand 627", + location int { + from 68, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "OOOO" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPPP" + } + }, + descr { + comment "Small nuclear ribonucleoprotein-associated proteins B and B'", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MTVGKSSKMLQHIDYRMRCILQDGRIFIGTFKAFDKHMNLILCDCDEFRKIK +PKNSKQAEREEKRVLGLVLLRGENLVSMTVEGPPPKDTGIARV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 628", + location int { + from 13, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPPP" + } + } + }, + { + data psec-str sheet, + comment "strand 629", + location int { + from 23, + to 29, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPPP" + } + } + }, + { + data psec-str sheet, + comment "strand 630", + location int { + from 30, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPPP" + } + } + }, + { + data psec-str sheet, + comment "strand 631", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPPP" + } + } + }, + { + data psec-str sheet, + comment "strand 632", + location int { + from 43, + to 48, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPPP" + } + } + }, + { + data psec-str sheet, + comment "strand 633", + location int { + from 64, + to 68, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPPP" + } + } + }, + { + data psec-str sheet, + comment "strand 634", + location int { + from 69, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPPP" + } + } + }, + { + data psec-str sheet, + comment "strand 635", + location int { + from 75, + to 81, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "PPPP" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQQ" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D1", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MKLVRFLMKLSHETVTIELKNGTQVHGTITGVDVSMNTHLKAVKMTLKNREP +VQLETLSIRGNNIRYFILPDSLPLDTLLVDVEPKVKSKKREAVAGRGRGRGRGRGRGRGRGRGGPR" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 636", + location int { + from 12, + to 19, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 637", + location int { + from 21, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 638", + location int { + from 27, + to 33, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 639", + location int { + from 35, + to 40, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 640", + location int { + from 41, + to 47, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 641", + location int { + from 51, + to 55, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 642", + location int { + from 56, + to 61, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQQ" + } + } + }, + { + data psec-str sheet, + comment "strand 643", + location int { + from 65, + to 69, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "QQQQ" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRRR" + } + }, + descr { + comment "Small nuclear ribonucleoprotein Sm D2", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 118, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 118, + seq-data ncbieaa "MSLLNKPKSEMTPEELQKREEEEFNTGPLSVLTQSVKNNTQVLINCRNNKKL +LGRVKAFDRHCNMVLENVKEMWTEVPKSGKGKKKSKPVNKDRYISKMFLRGDSVIVVLRNPLIAGK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 56", + location int { + from 18, + to 25, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRRR" + } + } + }, + { + data psec-str helix, + comment "helix 57", + location int { + from 29, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRRR" + } + } + }, + { + data psec-str sheet, + comment "strand 644", + location int { + from 40, + to 46, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRRR" + } + } + }, + { + data psec-str sheet, + comment "strand 645", + location int { + from 48, + to 52, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRRR" + } + } + }, + { + data psec-str sheet, + comment "strand 646", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRRR" + } + } + }, + { + data psec-str sheet, + comment "strand 647", + location int { + from 61, + to 67, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRRR" + } + } + }, + { + data psec-str sheet, + comment "strand 648", + location int { + from 68, + to 76, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRRR" + } + } + }, + { + data psec-str sheet, + comment "strand 649", + location int { + from 88, + to 94, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRRR" + } + } + }, + { + data psec-str sheet, + comment "strand 650", + location int { + from 95, + to 102, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRRR" + } + } + }, + { + data psec-str sheet, + comment "strand 651", + location int { + from 104, + to 110, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "RRRR" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTTT" + } + }, + descr { + comment "Small nuclear ribonucleoprotein F", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 86, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "MSLPLNPKPFLNGLTGKPVMVKLKWGMEYKGYLVSVDGYMNMQLANTEEYID +GALSGHLGEVLIRCNNVLYIRGVEEEEEDGEMRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 58", + location int { + from 6, + to 13, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTTT" + } + } + }, + { + data psec-str sheet, + comment "strand 652", + location int { + from 15, + to 23, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTTT" + } + } + }, + { + data psec-str sheet, + comment "strand 653", + location int { + from 25, + to 30, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTTT" + } + } + }, + { + data psec-str sheet, + comment "strand 654", + location int { + from 31, + to 37, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTTT" + } + } + }, + { + data psec-str sheet, + comment "strand 655", + location int { + from 39, + to 44, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTTT" + } + } + }, + { + data psec-str sheet, + comment "strand 656", + location int { + from 45, + to 50, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTTT" + } + } + }, + { + data psec-str sheet, + comment "strand 657", + location int { + from 53, + to 59, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTTT" + } + } + }, + { + data psec-str sheet, + comment "strand 658", + location int { + from 60, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTTT" + } + } + }, + { + data psec-str sheet, + comment "strand 659", + location int { + from 69, + to 74, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "TTTT" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSSS" + } + }, + descr { + comment "Small nuclear ribonucleoprotein E", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 92, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 92, + seq-data ncbieaa "MAYRGQGQKVQKVMVQPINLIFRYLQNRSRIQVWLYEQVNMRIEGCIIGFDE +YMNLVLDDAEEIHSKTKSRKQLGRIMLKGDNITLLQSVSN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 59", + location int { + from 16, + to 26, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSSS" + } + } + }, + { + data psec-str sheet, + comment "strand 660", + location int { + from 29, + to 34, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSSS" + } + } + }, + { + data psec-str sheet, + comment "strand 661", + location int { + from 39, + to 45, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSSS" + } + } + }, + { + data psec-str sheet, + comment "strand 662", + location int { + from 46, + to 51, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSSS" + } + } + }, + { + data psec-str sheet, + comment "strand 663", + location int { + from 53, + to 57, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSSS" + } + } + }, + { + data psec-str sheet, + comment "strand 664", + location int { + from 58, + to 65, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSSS" + } + } + }, + { + data psec-str sheet, + comment "strand 665", + location int { + from 68, + to 73, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSSS" + } + } + }, + { + data psec-str sheet, + comment "strand 666", + location int { + from 74, + to 80, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSSS" + } + } + }, + { + data psec-str sheet, + comment "strand 667", + location int { + from 84, + to 89, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "SSSS" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUUU" + } + }, + descr { + comment "Small nuclear ribonucleoprotein G", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 76, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 76, + seq-data ncbieaa "MSKAHPPELKKFMDKKLSLKLNGGRHVQGILRGFDPFMNLVIDECVEMATSG +QQNNIGMVVIRGNSIIMLEALERV" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 668", + location int { + from 14, + to 21, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUUU" + } + } + }, + { + data psec-str sheet, + comment "strand 669", + location int { + from 23, + to 28, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUUU" + } + } + }, + { + data psec-str sheet, + comment "strand 670", + location int { + from 29, + to 35, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUUU" + } + } + }, + { + data psec-str sheet, + comment "strand 671", + location int { + from 37, + to 42, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUUU" + } + } + }, + { + data psec-str sheet, + comment "strand 672", + location int { + from 43, + to 49, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUUU" + } + } + }, + { + data psec-str sheet, + comment "strand 673", + location int { + from 51, + to 56, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUUU" + } + } + }, + { + data psec-str sheet, + comment "strand 674", + location int { + from 57, + to 63, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUUU" + } + } + }, + { + data psec-str sheet, + comment "strand 675", + location int { + from 65, + to 72, + id pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "UUUU" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "V" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "X" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "Y" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "VV" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "XX" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "YY" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "VVV" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "XXX" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "YYY" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "VVVV" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "XXXX" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + }, + seq { + id { + pdb { + mol "4WZJ", + rel std { + year 2018, + month 4, + day 18 + }, + chain-id "YYYY" + } + }, + descr { + comment "U4 small nuclear RNA variant: Native sequence 85-145, of + which nucleotides 97-104 are replaced with GAAA tetraloop and nucleotides + 134-137 are replaced with GAAA tetraloop receptor.", + source { + org { + taxname "Homo sapiens", + common "human", + db { + { + db "taxon", + tag id 9606 + } + }, + orgname { + name binomial { + genus "Homo", + species "sapiens" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; + Catarrhini; Hominidae; Homo", + gcode 1, + mgcode 2, + div "PRI" + } + } + }, + num enum { + num 68, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 68, + seq-data iupacna "GCCGTGACGACTGAAAAGTCGGCATTGGCAATTTTTGACAGTCTCTATGGGT +AACCTAAGGAGACTGG" + }, + annot { + { + data ftable { + } + } + } + } + } +} diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/data/a4WZJ.ids b/c++/src/objtools/blast/seqdb_writer/unit_test/data/a4WZJ.ids new file mode 100644 index 00000000..345c10df --- /dev/null +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/data/a4WZJ.ids @@ -0,0 +1,84 @@ +4WZJ_A +4WZJ_B +4WZJ_C +4WZJ_D +4WZJ_F +4WZJ_E +4WZJ_G +4WZJ_H +4WZJ_I +4WZJ_J +4WZJ_K +4WZJ_M +4WZJ_L +4WZJ_N +4WZJ_O +4WZJ_P +4WZJ_Q +4WZJ_R +4WZJ_T +4WZJ_S +4WZJ_U +4WZJ_AA +4WZJ_BB +4WZJ_CC +4WZJ_DD +4WZJ_FF +4WZJ_EE +4WZJ_GG +4WZJ_HH +4WZJ_II +4WZJ_JJ +4WZJ_KK +4WZJ_MM +4WZJ_LL +4WZJ_NN +4WZJ_OO +4WZJ_PP +4WZJ_QQ +4WZJ_RR +4WZJ_TT +4WZJ_SS +4WZJ_UU +4WZJ_AAA +4WZJ_BBB +4WZJ_CCC +4WZJ_DDD +4WZJ_FFF +4WZJ_EEE +4WZJ_GGG +4WZJ_HHH +4WZJ_III +4WZJ_JJJ +4WZJ_KKK +4WZJ_MMM +4WZJ_LLL +4WZJ_NNN +4WZJ_OOO +4WZJ_PPP +4WZJ_QQQ +4WZJ_RRR +4WZJ_TTT +4WZJ_SSS +4WZJ_UUU +4WZJ_AAAA +4WZJ_BBBB +4WZJ_CCCC +4WZJ_DDDD +4WZJ_FFFF +4WZJ_EEEE +4WZJ_GGGG +4WZJ_HHHH +4WZJ_IIII +4WZJ_JJJJ +4WZJ_KKKK +4WZJ_MMMM +4WZJ_LLLL +4WZJ_NNNN +4WZJ_OOOO +4WZJ_PPPP +4WZJ_QQQQ +4WZJ_RRRR +4WZJ_TTTT +4WZJ_SSSS +4WZJ_UUUU diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/data/a5AJ4.ASN1 b/c++/src/objtools/blast/seqdb_writer/unit_test/data/a5AJ4.ASN1 new file mode 100755 index 00000000..0663b452 --- /dev/null +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/data/a5AJ4.ASN1 @@ -0,0 +1,61074 @@ +Seq-entry ::= set { + class pdb-entry, + descr { + pdb { + deposition std { + year 2015, + month 2, + day 20 + }, + class "RIBOSOME", + compound { + "Structure of the 55S mammalian mitoribosome." + }, + source { + "Mmdb_id: 161217", + "Pdb_id 1: 5AJ4" + }, + exp-method "Electron Microscopy" + }, + pub { + pub { + sub { + authors { + names std { + { + name name { + last "Greber", + full "B.J.Greber", + initials "B.J." + } + }, + { + name name { + last "Bieri", + full "P.Bieri", + initials "P." + } + }, + { + name name { + last "Leibundgut", + full "M.Leibundgut", + initials "M." + } + }, + { + name name { + last "Leitner", + full "A.Leitner", + initials "A." + } + }, + { + name name { + last "Aebersold", + full "R.Aebersold", + initials "R." + } + }, + { + name name { + last "Boehringer", + full "D.Boehringer", + initials "D." + } + }, + { + name name { + last "Ban", + full "N.Ban", + initials "N." + } + } + } + }, + date std { + year 2015, + month 2, + day 20 + } + } + } + }, + pub { + pub { + article { + title { + name "Ribosome. The complete structure of the 55S mammalian + mitochondrial ribosome." + }, + authors { + names std { + { + name name { + last "Greber", + full "B.J.Greber", + initials "B.J." + } + }, + { + name name { + last "Bieri", + full "P.Bieri", + initials "P." + } + }, + { + name name { + last "Leibundgut", + full "M.Leibundgut", + initials "M." + } + }, + { + name name { + last "Leitner", + full "A.Leitner", + initials "A." + } + }, + { + name name { + last "Aebersold", + full "R.Aebersold", + initials "R." + } + }, + { + name name { + last "Boehringer", + full "D.Boehringer", + initials "D." + } + }, + { + name name { + last "Ban", + full "N.Ban", + initials "N." + } + } + } + }, + from journal { + title { + iso-jta "Science", + ml-jta "Science", + issn "1095-9203", + name "Science (New York, N.Y.)" + }, + imp { + date std { + year 2015, + month 4, + day 17 + }, + volume "348", + issue "6232", + pages "303-308", + language "eng", + pubstatus ppublish, + history { + { + pubstatus received, + date std { + year 2014, + month 11, + day 27 + } + }, + { + pubstatus accepted, + date std { + year 2015, + month 3, + day 6 + } + }, + { + pubstatus other, + date std { + year 2015, + month 4, + day 4, + hour 6, + minute 0 + } + }, + { + pubstatus pubmed, + date std { + year 2015, + month 4, + day 4, + hour 6, + minute 0 + } + }, + { + pubstatus medline, + date std { + year 2015, + month 5, + day 20, + hour 6, + minute 0 + } + } + } + } + }, + ids { + pubmed 25837512, + pii "science.aaa3872", + doi "10.1126/science.aaa3872", + other { + db "ELocationID doi", + tag str "10.1126/science.aaa3872" + }, + other { + db "PDB Citation Status", + tag str "primary" + } + } + }, + medline { + em std { + year 2015, + month 4, + day 4 + }, + cit { + from journal { + title { + name "Ribosome. The complete structure of the 55S mammalian + mitochondrial ribosome" + }, + imp { + date std { + year 2015, + month 4, + day 17 + } + } + } + }, + abstract "Mammalian mitochondrial ribosomes (mitoribosomes) + synthesize mitochondrially encoded membrane proteins that are critical for + mitochondrial function. Here we present the complete atomic structure of the + porcine 55S mitoribosome at 3.8 angstrom resolution by cryo-electron + microscopy and chemical cross-linking/mass spectrometry. The structure of the + 28S subunit in the complex was resolved at 3.6 angstrom resolution by focused + alignment, which allowed building of a detailed atomic structure including + all of its 15 mitoribosomal-specific proteins. The structure reveals the + intersubunit contacts in the 55S mitoribosome, the molecular architecture of + the mitoribosomal messenger RNA (mRNA) binding channel and its interaction + with transfer RNAs, and provides insight into the highly specialized + mechanism of mRNA recruitment to the 28S subunit. Furthermore, the structure + contributes to a mechanistic understanding of aminoglycoside ototoxicity. " + }, + pmid 25837512 + } + }, + het "(MG,167193)", + het "(MG,167215)", + het "(MG,167217)", + het "(MG,167222)", + het "(MG,167224)", + het "(MG,167225)", + het "(MG,167227)", + het "(MG,167231)", + het "(MG,167234)", + het "(MG,167235)", + het "(MG,167238)", + het "(MG,167239)", + het "(MG,167241)", + het "(MG,167242)", + het "(MG,167243)", + het "(MG,167244)", + het "(MG,167245)", + het "(MG,167246)", + het "(MG,167247)", + het "(MG,167248)", + het "(MG,167249)", + het "(MG,167250)", + het "(MG,167251)", + het "(MG,167253)", + het "(MG,167255)", + het "(MG,167257)", + het "(MG,167258)", + het "(MG,167261)", + het "(MG,167262)", + het "(MG,167263)", + het "(MG,167264)", + het "(MG,167265)", + het "(MG,167266)", + het "(MG,167267)", + het "(MG,167270)", + het "(MG,167272)", + het "(MG,167274)", + het "(MG,167275)", + het "(MG,167276)", + het "(MG,167281)", + het "(MG,167282)", + het "(MG,167283)", + het "(MG,167284)", + het "(MG,167285)", + het "(MG,167287)", + het "(MG,167289)", + het "(MG,167292)", + het "(MG,167295)", + het "(MG,167296)", + het "(MG,167298)", + het "(MG,167299)", + het "(MG,167301)", + het "(MG,167303)", + het "(MG,167306)", + het "(MG,167308)", + het "(MG,167309)", + het "(MG,167310)", + het "(MG,167311)", + het "(MG,167312)", + het "(MG,167314)", + het "(MG,167315)", + het "(MG,167317)", + het "(MG,167318)", + het "(MG,167319)", + het "(MG,167321)", + het "(MG,167322)", + het "(MG,167323)", + het "(MG,167324)", + het "(MG,167325)", + het "(MG,167326)", + het "(MG,167327)", + het "(MG,167328)", + het "(MG,167329)", + het "(MG,167330)", + het "(MG,167331)", + het "(MG,167332)", + het "(MG,167333)", + het "(MG,167335)", + het "(MG,167336)", + het "(GDP,167339)", + het "(MG,167369)", + het "(MG,167402)", + het "(MG,167411)", + het "(MG,167441)", + het "(MG,167451)", + het "(MG,167453)", + het "(MG,167455)", + het "(MG,167464)", + het "(MG,167470)", + het "(MG,167471)", + het "(MG,167472)", + het "(MG,167477)", + het "(MG,167479)", + het "(MG,167487)", + het "(MG,167489)", + het "(MG,167490)", + het "(MG,167494)", + het "(MG,167497)", + het "(MG,167499)", + het "(MG,167500)", + het "(MG,167502)", + het "(MG,167503)", + het "(MG,167504)", + het "(MG,167505)", + het "(MG,167506)", + het "(MG,167507)", + het "(MG,167508)", + het "(MG,167509)", + het "(MG,167510)", + het "(MG,167511)", + het "(MG,167512)", + het "(MG,167513)", + het "(MG,167514)", + het "(MG,167515)", + het "(MG,167516)", + het "(MG,167517)", + het "(MG,167518)", + het "(MG,167519)", + het "(MG,167520)", + het "(MG,167521)", + het "(MG,167522)", + het "(MG,167523)", + het "(MG,167524)", + het "(MG,167525)", + het "(MG,167526)", + het "(MG,167527)", + het "(MG,167528)", + het "(MG,167529)", + het "(MG,167530)", + het "(MG,167531)", + het "(MG,167532)", + het "(MG,167537)", + het "(MG,167538)", + het "(MG,167539)", + het "(MG,167540)", + het "(MG,167542)", + het "(MG,167544)", + het "(MG,167545)", + het "(MG,167547)", + het "(MG,167549)", + het "(MG,167551)", + het "(MG,167552)", + het "(MG,167554)", + het "(MG,167555)", + het "(MG,167556)", + het "(MG,167557)", + het "(MG,167559)", + het "(MG,167560)", + het "(MG,167562)", + het "(MG,167563)", + het "(MG,167565)", + het "(MG,167566)", + het "(MG,167567)", + het "(MG,167569)", + het "(MG,167572)", + het "(MG,167573)" + }, + seq-set { + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + }, + descr { + comment "MITORIBOSOMAL 12S Ribosomal RNA", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 962, + names { + "", + "", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "331", + "332", + "333", + "334", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348", + "349", + "350", + "351", + "352", + "353", + "354", + "355", + "356", + "357", + "358", + "359", + "360", + "361", + "362", + "363", + "364", + "365", + "366", + "367", + "368", + "369", + "370", + "371", + "372", + "373", + "374", + "375", + "376", + "377", + "378", + "379", + "380", + "381", + "382", + "383", + "384", + "385", + "386", + "387", + "388", + "389", + "390", + "391", + "392", + "393", + "394", + "395", + "396", + "397", + "398", + "399", + "400", + "401", + "402", + "403", + "404", + "405", + "406", + "407", + "408", + "409", + "410", + "411", + "412", + "413", + "414", + "415", + "416", + "417", + "418", + "419", + "420", + "421", + "422", + "423", + "424", + "425", + "426", + "427", + "428", + "429", + "430", + "431", + "432", + "433", + "434", + "435", + "436", + "437", + "438", + "439", + "440", + "441", + "442", + "443", + "444", + "445", + "446", + "447", + "448", + "449", + "450", + "451", + "452", + "453", + "454", + "455", + "456", + "457", + "458", + "459", + "460", + "461", + "462", + "463", + "464", + "465", + "466", + "467", + "468", + "469", + "470", + "471", + "472", + "473", + "474", + "475", + "476", + "477", + "478", + "479", + "480", + "481", + "482", + "483", + "484", + "485", + "486", + "487", + "488", + "489", + "490", + "491", + "492", + "493", + "494", + "495", + "496", + "497", + "498", + "499", + "500", + "501", + "502", + "503", + "504", + "505", + "506", + "507", + "508", + "509", + "510", + "511", + "512", + "513", + "514", + "515", + "516", + "517", + "518", + "519", + "520", + "521", + "522", + "523", + "524", + "525", + "526", + "527", + "528", + "529", + "530", + "531", + "532", + "533", + "534", + "535", + "536", + "537", + "538", + "539", + "540", + "541", + "542", + "543", + "544", + "545", + "546", + "547", + "548", + "549", + "550", + "551", + "552", + "553", + "554", + "555", + "556", + "557", + "558", + "559", + "560", + "561", + "562", + "563", + "564", + "565", + "566", + "567", + "568", + "569", + "570", + "571", + "572", + "573", + "574", + "575", + "576", + "577", + "578", + "579", + "580", + "581", + "582", + "583", + "584", + "585", + "586", + "587", + "588", + "589", + "590", + "591", + "592", + "593", + "594", + "595", + "596", + "597", + "598", + "599", + "600", + "601", + "602", + "603", + "604", + "605", + "606", + "607", + "608", + "609", + "610", + "611", + "612", + "613", + "614", + "615", + "616", + "617", + "618", + "619", + "620", + "621", + "622", + "623", + "624", + "625", + "626", + "627", + "628", + "629", + "630", + "631", + "632", + "633", + "634", + "635", + "636", + "637", + "638", + "639", + "640", + "641", + "642", + "643", + "644", + "645", + "646", + "647", + "648", + "649", + "650", + "651", + "652", + "653", + "654", + "655", + "656", + "657", + "658", + "659", + "660", + "661", + "662", + "663", + "664", + "665", + "666", + "667", + "668", + "669", + "670", + "671", + "672", + "673", + "674", + "675", + "676", + "677", + "678", + "679", + "680", + "681", + "682", + "683", + "684", + "685", + "686", + "687", + "688", + "689", + "690", + "691", + "692", + "693", + "694", + "695", + "696", + "697", + "698", + "699", + "700", + "701", + "702", + "703", + "704", + "705", + "706", + "707", + "708", + "709", + "710", + "711", + "712", + "713", + "714", + "715", + "716", + "717", + "718", + "719", + "720", + "721", + "722", + "723", + "724", + "725", + "726", + "727", + "728", + "729", + "730", + "731", + "732", + "733", + "734", + "735", + "736", + "737", + "738", + "739", + "740", + "741", + "742", + "743", + "744", + "745", + "746", + "747", + "748", + "749", + "750", + "751", + "752", + "753", + "754", + "755", + "756", + "757", + "758", + "759", + "760", + "761", + "762", + "763", + "764", + "765", + "766", + "767", + "768", + "769", + "770", + "771", + "772", + "773", + "774", + "775", + "776", + "777", + "778", + "779", + "780", + "781", + "782", + "783", + "784", + "785", + "786", + "787", + "788", + "789", + "790", + "791", + "792", + "793", + "794", + "795", + "796", + "797", + "798", + "799", + "800", + "801", + "802", + "803", + "804", + "805", + "806", + "807", + "808", + "809", + "810", + "811", + "812", + "813", + "814", + "815", + "816", + "817", + "818", + "819", + "820", + "821", + "822", + "823", + "824", + "825", + "826", + "827", + "828", + "829", + "830", + "831", + "832", + "833", + "834", + "835", + "836", + "837", + "838", + "839", + "840", + "841", + "842", + "843", + "844", + "845", + "846", + "847", + "848", + "849", + "850", + "851", + "852", + "853", + "854", + "855", + "856", + "857", + "858", + "859", + "860", + "861", + "862", + "863", + "864", + "865", + "866", + "867", + "868", + "869", + "870", + "871", + "872", + "873", + "874", + "875", + "876", + "877", + "878", + "879", + "880", + "881", + "882", + "883", + "884", + "885", + "886", + "887", + "888", + "889", + "890", + "891", + "892", + "893", + "894", + "895", + "896", + "897", + "898", + "899", + "900", + "901", + "902", + "903", + "904", + "905", + "906", + "907", + "908", + "909", + "910", + "911", + "912", + "913", + "914", + "915", + "916", + "917", + "918", + "919", + "920", + "921", + "922", + "923", + "924", + "925", + "926", + "927", + "928", + "929", + "930", + "931", + "932", + "933", + "934", + "935", + "936", + "937", + "938", + "939", + "940", + "941", + "942", + "943", + "944", + "945", + "946", + "947", + "948", + "949", + "950", + "951", + "952", + "953", + "954", + "955", + "956", + "957", + "958", + "959", + "960", + "961", + "962" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 962, + seq-data iupacna "ACACAGGTTTGGTCCTGGCCTTTCTATTAATTCTTAATAAAATTACACATGC +AAGTATCCGCGCCCCGGTGAGAATGCCCTCCAGATCTTAAAGATCAAAAGGAGCAGGTATCAAGCACACCTATAACGG +TAGCTCATAACGCCTTGCTCAACCACACCCCCACGGGAAACAGCAGTGATAAAAATTAAGCCATGAACGAAAGTTTGA +CTAAGTTATATTAATTAGAGTTGGTAAATCTCGTGCCAGCCACCGCGGTCATACGATTAACCCAAATTAATAGATCCA +CGGCGTAAAGAGTGTTTAAGAAAAAAAATCACAATAGAGTTAAATTATAACTAAGCTGTAAAAAGCCCTAGTTAAAAT +AAAATAACCCACGAAAGTGACTCTAATAATCCTGACACACGATAGCTAGGACCCAAACTGGGATTAGATACCCCACTA +TGCCTAGCCCTAAACCCAAATAGTTACATAACAAAACTATTCGCCAGAGTACTACTCGCAACTGCCTAAAACTCAAAG +GACTTGGCGGTGCTTCACATCCACCTAGAGGAGCCTGTTCTATAATCGATAAACCCCGATAGACCTTACCAACCCTTG +CCAATTCAGCCTATATACCGCCATCTTCAGCAAACCCTAAAAAGGAACAATAGTAAGCACAATCATAGCACATAAAAA +CGTTAGGTCAAGGTGTAGCTTATGGGTTGGAAAGAAATGGGCTACATTTTCTACATAAGAATATCCACCACACGAAAG +TTTTTATGAAACTAAAAACCAAAGGAGGATTTAGCAGTAAATCAAGAATAGAGTGCTTGATTGAATAAGGCCATGAAG +CACGCACACACCGCCCGTCACCCTCCTCAAGCATGTAGTAATAAAAATAACCTATATTCAATTACACAACCATGCAAG +AAGAGACAAGTCGTAACAAGGTAAGCATACTGGAAAGTGTGCTTGGATTACC" + }, + annot { + { + data ftable { + { + data het "(MG,167191)", + location mix { + bond { + a { + point 494, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167192)", + location mix { + bond { + a { + point 937, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167194)", + location mix { + bond { + a { + point 405, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167195)", + location mix { + bond { + a { + point 441, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 442, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167196)", + location mix { + bond { + a { + point 111, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 496, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167197)", + location mix { + bond { + a { + point 493, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167198)", + location mix { + bond { + a { + point 585, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 584, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 583, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167199)", + location mix { + bond { + a { + point 658, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167200)", + location mix { + bond { + a { + point 28, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167201)", + location mix { + bond { + a { + point 925, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 930, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 924, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 924, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167202)", + location mix { + bond { + a { + point 27, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 183, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167203)", + location mix { + bond { + a { + point 234, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 234, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 235, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167204)", + location mix { + bond { + a { + point 568, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 569, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167205)", + location mix { + bond { + a { + point 397, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167206)", + location mix { + bond { + a { + point 433, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 433, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 421, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 421, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167207)", + location mix { + bond { + a { + point 845, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 844, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167208)", + location mix { + bond { + a { + point 291, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167209)", + location mix { + bond { + a { + point 933, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 933, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 925, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 925, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 930, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 930, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167211)", + location mix { + bond { + a { + point 549, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 549, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 550, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167212)", + location mix { + bond { + a { + point 146, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 71, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 72, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167213)", + location mix { + bond { + a { + point 683, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 683, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167214)", + location mix { + bond { + a { + point 806, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167216)", + location mix { + bond { + a { + point 793, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167218)", + location mix { + bond { + a { + point 613, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 681, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167219)", + location mix { + bond { + a { + point 613, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 681, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 682, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 682, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167220)", + location mix { + bond { + a { + point 954, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 954, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167221)", + location mix { + bond { + a { + point 249, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167223)", + location mix { + bond { + a { + point 549, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 549, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167226)", + location mix { + bond { + a { + point 453, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 453, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167228)", + location mix { + bond { + a { + point 170, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 171, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167229)", + location mix { + bond { + a { + point 456, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 953, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 952, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 953, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167230)", + location mix { + bond { + a { + point 852, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167232)", + location mix { + bond { + a { + point 229, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167233)", + location mix { + bond { + a { + point 584, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167236)", + location mix { + bond { + a { + point 690, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167237)", + location mix { + bond { + a { + point 161, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167240)", + location mix { + bond { + a { + point 609, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 699, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 699, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167252)", + location mix { + bond { + a { + point 349, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167254)", + location mix { + bond { + a { + point 955, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 955, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 955, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167256)", + location mix { + bond { + a { + point 689, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167259)", + location mix { + bond { + a { + point 261, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167260)", + location mix { + bond { + a { + point 154, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167268)", + location mix { + bond { + a { + point 273, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167269)", + location mix { + bond { + a { + point 273, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 217, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167271)", + location mix { + bond { + a { + point 489, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167273)", + location mix { + bond { + a { + point 550, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167277)", + location mix { + bond { + a { + point 561, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167278)", + location mix { + bond { + a { + point 576, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167279)", + location mix { + bond { + a { + point 931, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 931, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167280)", + location mix { + bond { + a { + point 345, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167286)", + location mix { + bond { + a { + point 909, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167288)", + location mix { + bond { + a { + point 391, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167290)", + location mix { + bond { + a { + point 444, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167291)", + location mix { + bond { + a { + point 389, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 389, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167293)", + location mix { + bond { + a { + point 457, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167294)", + location mix { + bond { + a { + point 778, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167297)", + location mix { + bond { + a { + point 440, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167300)", + location mix { + bond { + a { + point 746, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 746, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 749, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167302)", + location mix { + bond { + a { + point 620, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167304)", + location mix { + bond { + a { + point 446, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167305)", + location mix { + bond { + a { + point 666, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167307)", + location mix { + bond { + a { + point 539, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 540, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 540, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167313)", + location mix { + bond { + a { + point 206, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167316)", + location mix { + bond { + a { + point 627, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167320)", + location mix { + bond { + a { + point 430, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + }, + { + data het "(MG,167334)", + location mix { + bond { + a { + point 630, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + }, + bond { + a { + point 630, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AA" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN US2M, MRPS2", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 220, + names { + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 220, + seq-data ncbieaa "NDLRDRILSEPLKHADFFNLKELFSVRSLFDARVHLGHKAGCRHRFMEPYLF +GSRLGQDIIDLEQTAAHLQLALNFTAHVAYREGIILFVSRHRQFAHLIETTARDCGEYAHTRYFKGGLLTNAPLLLGP +GVRLPDLIIFLHTLNNVFEPHVAVRDAAKMNIPTVGIVDTNCNPALITYPVPGNDDSPPAVRLFCRLFQVAISRAKEK +RRQVEALYRLQG" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 1", + location int { + from 1, + to 8, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str helix, + comment "helix 2", + location int { + from 24, + to 32, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str helix, + comment "helix 3", + location int { + from 62, + to 82, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str helix, + comment "helix 4", + location int { + from 96, + to 106, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str helix, + comment "helix 5", + location int { + from 152, + to 159, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str helix, + comment "helix 6", + location int { + from 187, + to 217, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str sheet, + comment "strand 1", + location int { + from 34, + to 37, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str sheet, + comment "strand 2", + location int { + from 51, + to 55, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str sheet, + comment "strand 3", + location int { + from 56, + to 60, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str sheet, + comment "strand 4", + location int { + from 85, + to 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str sheet, + comment "strand 5", + location int { + from 108, + to 113, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str sheet, + comment "strand 6", + location int { + from 135, + to 140, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str sheet, + comment "strand 7", + location int { + from 161, + to 168, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + }, + { + data psec-str sheet, + comment "strand 8", + location int { + from 177, + to 182, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AB" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AC" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN US3M, MRPS24", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 132, + names { + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 132, + seq-data ncbieaa "KNRAARVRVSKGDKPVTYEEAHAPHYIAHRKGWLSLHTGNLDGEDHAAERTV +EDVFLRKFMLGTFPGCLADQLVLKRRANQLEICALVLRQLPPHKFYFLVGYSETLLSHFYKCPVHLHLQTVPSKVVYK +YI" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 7", + location int { + from 46, + to 64, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AC" + } + } + }, + { + data psec-str helix, + comment "helix 8", + location int { + from 94, + to 111, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AC" + } + } + }, + { + data psec-str sheet, + comment "strand 9", + location int { + from 13, + to 16, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AC" + } + } + }, + { + data psec-str sheet, + comment "strand 10", + location int { + from 33, + to 36, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AC" + } + } + }, + { + data psec-str sheet, + comment "strand 11", + location int { + from 72, + to 78, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AC" + } + } + }, + { + data psec-str sheet, + comment "strand 12", + location int { + from 79, + to 89, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AC" + } + } + }, + { + data psec-str sheet, + comment "strand 13", + location int { + from 114, + to 123, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AC" + } + } + }, + { + data psec-str sheet, + comment "strand 14", + location int { + from 128, + to 131, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AC" + } + } + }, + { + data region "Domain 1", + comment "NCBI Domains", + location int { + from 25, + to 125, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AC" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN US5M, MRPS5", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 328, + names { + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "331", + "332", + "333", + "334", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348", + "349", + "350", + "351", + "352", + "353", + "354", + "355", + "356", + "357", + "358", + "359", + "360", + "361", + "362", + "363", + "364", + "365", + "366", + "367", + "368", + "369", + "370", + "371", + "372", + "373", + "374", + "375", + "376", + "377", + "378", + "379", + "380", + "381", + "382", + "383", + "384", + "385", + "386", + "387", + "388", + "389", + "390", + "391", + "392", + "393", + "394", + "395", + "396", + "397", + "398", + "399", + "400", + "401", + "402", + "403", + "404", + "405", + "406", + "407", + "408", + "409", + "410", + "411", + "412", + "413", + "414", + "415", + "416", + "417", + "418", + "419", + "420", + "421", + "422", + "423", + "424", + "425", + "426", + "427", + "428", + "429", + "430" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 328, + seq-data ncbieaa "SFFTKLTADELWKGALAESRRKDLNRGQIIGEGRHGFLWPGLNIPLMRNGAV +QTIAQRSKEDQEKVEADMVQQREEWDRRRKMKVKRERGWSGNTWGGVSLGPPDPGPNGETYDDFDTRILEVRNVFNMT +AKEGRKRSVRVLVAVGNGKGAAGFAIGKATERADAFRKAKNRAVHYLHYIERYEDHTIYHDISLKFKRTHIKMKKQPR +GYGLHCHRAIMTICRLIGIKDLYAKVSGSVNMLNLTRGLFLGLSRQETHQQLADKKSLHVVEFREECGPLPIVVASPQ +GALRKDPEPEDEVPDITLDWEDVKAAQGMKRSVWSGLKRAAT" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 9", + location int { + from 7, + to 15, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str helix, + comment "helix 10", + location int { + from 59, + to 82, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str helix, + comment "helix 11", + location int { + from 161, + to 173, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str helix, + comment "helix 12", + location int { + from 215, + to 224, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str helix, + comment "helix 13", + location int { + from 240, + to 252, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str helix, + comment "helix 14", + location int { + from 256, + to 263, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str helix, + comment "helix 15", + location int { + from 305, + to 312, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 15", + location int { + from 35, + to 39, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 16", + location int { + from 45, + to 48, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 17", + location int { + from 49, + to 52, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 18", + location int { + from 53, + to 57, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 19", + location int { + from 96, + to 102, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 20", + location int { + from 115, + to 120, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 21", + location int { + from 121, + to 129, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 22", + location int { + from 133, + to 147, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 23", + location int { + from 149, + to 159, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 24", + location int { + from 176, + to 179, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 25", + location int { + from 189, + to 196, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 26", + location int { + from 197, + to 205, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 27", + location int { + from 210, + to 213, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 28", + location int { + from 227, + to 233, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 29", + location int { + from 265, + to 270, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data psec-str sheet, + comment "strand 30", + location int { + from 278, + to 284, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data region "Domain 2", + comment "NCBI Domains", + location int { + from 26, + to 58, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data region "Domain 3", + comment "NCBI Domains", + location int { + from 90, + to 254, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + }, + { + data region "Domain 4", + comment "NCBI Domains", + location int { + from 255, + to 294, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AE" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AF" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BS6M, MRPS6", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 124, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 124, + seq-data ncbieaa "MPRYELALILKAMQRPETAAALKRTLEALMDRGAVVRNLENLGERMLPYKIS +AHNQRHSRGGYFLVDFYAPATTVESMMEHLSRDIDVIRPNIVKHPLTQEVKECEGIVPVPLEEKLYSTKKRK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 16", + location int { + from 15, + to 31, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AF" + } + } + }, + { + data psec-str helix, + comment "helix 17", + location int { + from 74, + to 81, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AF" + } + } + }, + { + data psec-str sheet, + comment "strand 31", + location int { + from 2, + to 11, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AF" + } + } + }, + { + data psec-str sheet, + comment "strand 32", + location int { + from 33, + to 37, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AF" + } + } + }, + { + data psec-str sheet, + comment "strand 33", + location int { + from 38, + to 43, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AF" + } + } + }, + { + data psec-str sheet, + comment "strand 34", + location int { + from 49, + to 53, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AF" + } + } + }, + { + data psec-str sheet, + comment "strand 35", + location int { + from 54, + to 58, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AF" + } + } + }, + { + data psec-str sheet, + comment "strand 36", + location int { + from 59, + to 66, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AF" + } + } + }, + { + data psec-str sheet, + comment "strand 37", + location int { + from 86, + to 94, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AF" + } + } + }, + { + data het "(ZN,167337)", + location mix { + bond { + a { + point 104, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AF" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN US7M, MRPS7", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 208, + names { + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 208, + seq-data ncbieaa "SRYGPEYKDPQIDKEYYRKPLAEQTEEEKYERDFKKTQLIKAAPATKTSSVF +EDPVISKFTNMMMKGGNKVLARSLMTQTLEAVKRKQFAKYHAASAEEQATIERNPYTIFHQALKNCEPVIGLVPILKG +GHFYQVPVPLADRRRRFLAMKWMIAECREKKHRRVLMPEKLSQELLEAFHNQGPVIKRKHDMHKMAEANRALAHYRWW" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 18", + location int { + from 23, + to 34, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + } + }, + { + data psec-str helix, + comment "helix 19", + location int { + from 54, + to 63, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + } + }, + { + data psec-str helix, + comment "helix 20", + location int { + from 69, + to 93, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + } + }, + { + data psec-str helix, + comment "helix 21", + location int { + from 95, + to 102, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + } + }, + { + data psec-str helix, + comment "helix 22", + location int { + from 106, + to 117, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + } + }, + { + data psec-str helix, + comment "helix 23", + location int { + from 141, + to 158, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + } + }, + { + data psec-str helix, + comment "helix 24", + location int { + from 166, + to 179, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + } + }, + { + data psec-str helix, + comment "helix 25", + location int { + from 184, + to 198, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + } + }, + { + data psec-str sheet, + comment "strand 38", + location int { + from 121, + to 129, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + } + }, + { + data psec-str sheet, + comment "strand 39", + location int { + from 130, + to 138, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + } + }, + { + data region "Domain 5", + comment "NCBI Domains", + location int { + from 45, + to 207, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AG" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN US9M, MRPS9", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 311, + names { + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "331", + "332", + "333", + "334", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348", + "349", + "350", + "351", + "352", + "353", + "354", + "355", + "356", + "357", + "358", + "359", + "360", + "361", + "362", + "363", + "364", + "365", + "366", + "367", + "368", + "369", + "370", + "371", + "372", + "373", + "374", + "375", + "376", + "377", + "378", + "379", + "380", + "381", + "382", + "383", + "384", + "385", + "386", + "387", + "388", + "389", + "390", + "391", + "392", + "393", + "394", + "395", + "396", + "397" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 311, + seq-data ncbieaa "TVDFIKKQIEEFNIGKRHLANMMGEDPETFTQEDIDRAIAYLFPSGLFEKRA +RPIMKHPEEIFPKQRAIQWGEDGRPFHFLFYTGKQSYYSLMHDTYGKLLDVEKHHNPIGSRWLIKEELEEMLVEKLSD +QDYAQFIRLLERLSALPCGATEEDFVNRFRRSIPXXXXXXXXXXXXXXXXGMAFSRGEGKRKTAKAEVVVYGQGSGRI +DVNGVDYLLYFPVTQDREQLMFPLHFLDRLGKHDMTCAVSGGGRSAQAGAVRLAMARALCSFVTEDEVEWMRQAGLLT +ADPRVRERKKPGQEGARRKFTWKKR" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK", + location pnt { + point 164, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 165, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 166, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 167, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 168, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 169, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 170, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 171, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 172, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 173, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 174, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 175, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 176, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 177, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 178, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 179, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str helix, + comment "helix 26", + location int { + from 1, + to 22, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str helix, + comment "helix 27", + location int { + from 31, + to 41, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str helix, + comment "helix 28", + location int { + from 86, + to 105, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str helix, + comment "helix 29", + location int { + from 116, + to 123, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str helix, + comment "helix 30", + location int { + from 129, + to 144, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str helix, + comment "helix 31", + location int { + from 149, + to 156, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str helix, + comment "helix 32", + location int { + from 221, + to 234, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str helix, + comment "helix 33", + location int { + from 251, + to 266, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str helix, + comment "helix 34", + location int { + from 274, + to 281, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str sheet, + comment "strand 40", + location int { + from 110, + to 114, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str sheet, + comment "strand 41", + location int { + from 158, + to 162, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str sheet, + comment "strand 42", + location int { + from 173, + to 177, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str sheet, + comment "strand 43", + location int { + from 180, + to 191, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str sheet, + comment "strand 44", + location int { + from 192, + to 202, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str sheet, + comment "strand 45", + location int { + from 205, + to 210, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str sheet, + comment "strand 46", + location int { + from 239, + to 245, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data psec-str sheet, + comment "strand 47", + location int { + from 246, + to 249, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data region "Domain 6", + comment "NCBI Domains", + location int { + from 64, + to 167, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + }, + { + data region "Domain 7", + comment "NCBI Domains", + location int { + from 168, + to 310, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AI" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AJ" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN US10M, MRPS10", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 201, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 201, + seq-data ncbieaa "MAARTAFGALARRLWQGSRNFSVSSSRSSTAKNGGFLLSTTMKWVQFSNLHV +DVPKDMTKPEITISDEPDTLYKRLSVLVKGHDKAVLDSYEYFAVLAAKELGISIKVHEPPRKIERFTLLKSVHIFKKH +RVQYEMRTLYRCLELEHLTGSTADVYLEYIQRNLPEGVAMEVTKTKLEQLPEHIRKPIWETMPEEKEESKS" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 35", + location int { + from 85, + to 101, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AJ" + } + } + }, + { + data psec-str helix, + comment "helix 36", + location int { + from 150, + to 162, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AJ" + } + } + }, + { + data psec-str sheet, + comment "strand 48", + location int { + from 73, + to 82, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AJ" + } + } + }, + { + data psec-str sheet, + comment "strand 49", + location int { + from 105, + to 110, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AJ" + } + } + }, + { + data psec-str sheet, + comment "strand 50", + location int { + from 112, + to 120, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AJ" + } + } + }, + { + data psec-str sheet, + comment "strand 51", + location int { + from 130, + to 145, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AJ" + } + } + }, + { + data psec-str sheet, + comment "strand 52", + location int { + from 146, + to 149, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AJ" + } + } + }, + { + data psec-str sheet, + comment "strand 53", + location int { + from 167, + to 176, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AJ" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AK" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN US11M, MRPS11", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 136, + names { + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 136, + seq-data ncbieaa "SFSIYPPIPGQENSLRWAGKKFEEIPIAHIKASYNNTQIQVVSAAHQPLAHA +SCGTEGFRNAKKGTGIAAQTAGIAAAAKATGKGVTHVRVVVKGLGPGRLSAIKGLTMGGLEVISITDNTPIPHNGCRP +RKARRL" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 37", + location int { + from 66, + to 83, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AK" + } + } + }, + { + data psec-str helix, + comment "helix 38", + location int { + from 99, + to 108, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AK" + } + } + }, + { + data psec-str sheet, + comment "strand 54", + location int { + from 24, + to 33, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AK" + } + } + }, + { + data psec-str sheet, + comment "strand 55", + location int { + from 34, + to 43, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AK" + } + } + }, + { + data psec-str sheet, + comment "strand 56", + location int { + from 46, + to 53, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AK" + } + } + }, + { + data psec-str sheet, + comment "strand 57", + location int { + from 86, + to 94, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AK" + } + } + }, + { + data psec-str sheet, + comment "strand 58", + location int { + from 111, + to 115, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AK" + } + } + }, + { + data psec-str sheet, + comment "strand 59", + location int { + from 116, + to 120, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AK" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AL" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN US12M, MRPS12", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 109, + names { + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 109, + seq-data ncbieaa "ATLNQMHRLGPPKHPPGKMGPTAGRPQLKGVVLRTFIRKPKKPNSANRKCCR +VRLSTGREAVCFIPGEGHSLQEHHVVLVQGGRTQDLPGVKLTVVRGKYDCGHVQKKK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 39", + location int { + from 1, + to 9, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AL" + } + } + }, + { + data psec-str sheet, + comment "strand 60", + location int { + from 26, + to 31, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AL" + } + } + }, + { + data psec-str sheet, + comment "strand 61", + location int { + from 32, + to 39, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AL" + } + } + }, + { + data psec-str sheet, + comment "strand 62", + location int { + from 45, + to 55, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AL" + } + } + }, + { + data psec-str sheet, + comment "strand 63", + location int { + from 57, + to 64, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AL" + } + } + }, + { + data psec-str sheet, + comment "strand 64", + location int { + from 75, + to 82, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AL" + } + } + }, + { + data psec-str sheet, + comment "strand 65", + location int { + from 91, + to 95, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AL" + } + } + }, + { + data psec-str sheet, + comment "strand 66", + location int { + from 96, + to 99, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AL" + } + } + }, + { + data psec-str sheet, + comment "strand 67", + location int { + from 100, + to 103, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AL" + } + } + }, + { + data region "Domain 8", + comment "NCBI Domains", + location int { + from 18, + to 108, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AL" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AN" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN US14M, MRPS14", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 128, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 128, + seq-data ncbieaa "MAASMLGFLLRTVRQMVPSSASGQVRSYYVDWKMLRDVKRRKMAYEYADERL +RINSLRKNTILPKDLQEVADEEIASLPRDSCPVRIRNRCVMTSRPRGVKRRWRLSRIVFRHLADHGQLSGVQRAMW" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 40", + location int { + from 31, + to 47, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AN" + } + } + }, + { + data psec-str helix, + comment "helix 41", + location int { + from 48, + to 58, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AN" + } + } + }, + { + data psec-str helix, + comment "helix 42", + location int { + from 64, + to 76, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AN" + } + } + }, + { + data psec-str helix, + comment "helix 43", + location int { + from 108, + to 116, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AN" + } + } + }, + { + data psec-str sheet, + comment "strand 68", + location int { + from 98, + to 101, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AN" + } + } + }, + { + data psec-str sheet, + comment "strand 69", + location int { + from 104, + to 107, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AN" + } + } + }, + { + data region "Domain 9", + comment "NCBI Domains", + location int { + from 0, + to 87, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AN" + } + } + }, + { + data region "Domain 10", + comment "NCBI Domains", + location int { + from 88, + to 127, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AN" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AO" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN US15M, MRPS15", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 239, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 239, + seq-data ncbieaa "MLRAAWRALSSIRTQAVTQAPVLGLPGGGCARFPSVQRALPSRPAGLILRAA +RGYAAQKPVQPNQDDPPPSMLLLDYQNVPGIHKVDDVVKRLLSLEMANQKEKLKIKKMQLMNKVLENPEDTSSLEARI +VALTVKIRNYEEHMQKHRKDKAHKRFLLMSIDQRKKMLKNLRETNYAVFEKICKELGIEYTFPPPYHRKAHRRWVTKK +ALCTQVFREVQKLKKQKRALRAAAVAAHKQG" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 44", + location int { + from 87, + to 94, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AO" + } + } + }, + { + data psec-str helix, + comment "helix 45", + location int { + from 101, + to 115, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AO" + } + } + }, + { + data psec-str helix, + comment "helix 46", + location int { + from 125, + to 146, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AO" + } + } + }, + { + data psec-str helix, + comment "helix 47", + location int { + from 151, + to 172, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AO" + } + } + }, + { + data psec-str helix, + comment "helix 48", + location int { + from 176, + to 185, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AO" + } + } + }, + { + data psec-str helix, + comment "helix 49", + location int { + from 201, + to 237, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AO" + } + } + }, + { + data region "Domain 11", + comment "NCBI Domains", + location int { + from 98, + to 193, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AO" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BS16M, MRPS16", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 117, + names { + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 117, + seq-data ncbieaa "RAYRGGHLTIRLALGGCTNRPFYRIVAAHSKCPRDGRFVEQLGSYDPLPNSH +GEKLVALNFDRIRHWIGCGAHLSKPVEKLLGLSGFYPLHPMMITNAERLRRKRAREVLLASQKTD" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 50", + location int { + from 60, + to 69, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + }, + { + data psec-str helix, + comment "helix 51", + location int { + from 75, + to 84, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + }, + { + data psec-str helix, + comment "helix 52", + location int { + from 92, + to 115, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + }, + { + data psec-str sheet, + comment "strand 70", + location int { + from 6, + to 17, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + }, + { + data psec-str sheet, + comment "strand 71", + location int { + from 18, + to 24, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + }, + { + data psec-str sheet, + comment "strand 72", + location int { + from 25, + to 29, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + }, + { + data psec-str sheet, + comment "strand 73", + location int { + from 38, + to 46, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + }, + { + data psec-str sheet, + comment "strand 74", + location int { + from 47, + to 50, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + }, + { + data psec-str sheet, + comment "strand 75", + location int { + from 51, + to 59, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + }, + { + data psec-str sheet, + comment "strand 76", + location int { + from 71, + to 74, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + }, + { + data region "Domain 12", + comment "NCBI Domains", + location int { + from 0, + to 88, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + }, + { + data het "(ZN,167338)", + location mix { + bond { + a { + point 16, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AP" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN US17M, MRPS17", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 109, + names { + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 109, + seq-data ncbieaa "SIARSSVHAKWVVGKVIGTAMQKTAKVRVTRLVLDPYLLKYFNKRKTYFAHD +ALQQCTVGDIVLLKALPVPRTKHVKHELAEIIFKVGQVIDPVTGKPCAGTTYLESPI" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 77", + location int { + from 9, + to 17, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + } + }, + { + data psec-str sheet, + comment "strand 78", + location int { + from 22, + to 36, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + } + }, + { + data psec-str sheet, + comment "strand 79", + location int { + from 37, + to 51, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + } + }, + { + data psec-str sheet, + comment "strand 80", + location int { + from 59, + to 67, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + } + }, + { + data psec-str sheet, + comment "strand 81", + location int { + from 76, + to 79, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + } + }, + { + data psec-str sheet, + comment "strand 82", + location int { + from 80, + to 84, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + } + }, + { + data psec-str sheet, + comment "strand 83", + location int { + from 89, + to 93, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + } + }, + { + data psec-str sheet, + comment "strand 84", + location int { + from 94, + to 100, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + } + }, + { + data psec-str sheet, + comment "strand 85", + location int { + from 101, + to 104, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + } + }, + { + data region "Domain 13", + comment "NCBI Domains", + location int { + from 0, + to 86, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + } + }, + { + data region "Domain 14", + comment "NCBI Domains", + location int { + from 87, + to 108, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AQ" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AR" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BS18M, MRPS18C", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 97, + names { + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 97, + seq-data ncbieaa "SNEDLPVPMENPYKEPLKKCILCEKHVDYKNVQLLSQFISPFTGCIYGRHIT +GLCGKKQKEITKAIKRAQILGFMPVTYKDPAYLKDPKVCNIKYRE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 53", + location int { + from 55, + to 71, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AR" + } + } + }, + { + data psec-str sheet, + comment "strand 86", + location int { + from 37, + to 40, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AR" + } + } + }, + { + data psec-str sheet, + comment "strand 87", + location int { + from 43, + to 46, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AR" + } + } + }, + { + data het "(ZN,167337)", + location mix { + bond { + a { + point 22, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AR" + } + } + }, + bond { + a { + point 54, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AR" + } + } + }, + bond { + a { + point 19, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AR" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AU" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BS21M, MRPS21", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 86, + names { + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 86, + seq-data ncbieaa "ANHLKFIARTVMVQEGNVEGAYRTLNRILTMDGLIEDIKRRRYYEKPCRRRQ +RESYETCRRIYNMEMARKINFLMRKNRADPWQGC" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 54", + location int { + from 18, + to 31, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AU" + } + } + }, + { + data psec-str helix, + comment "helix 55", + location int { + from 32, + to 40, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AU" + } + } + }, + { + data psec-str helix, + comment "helix 56", + location int { + from 46, + to 73, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "AU" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS22, MRPS22", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 356, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "331", + "332", + "333", + "334", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348", + "349", + "350", + "351", + "352", + "353", + "354", + "355", + "356" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 356, + seq-data ncbieaa "MATLKASALLRSLQTNSCGTGRVCFPVRARPRPRALLQPLPGACGTGTLCRG +LGSESESGNSEIRKPTFMDEEVQNILIKMTGLDLQKIFKPALQELKPPTYKLMTQAQLEEATKQAVEAAKVRLKMPPV +LEERAPINDVLAEDKILEGTETAKYVFTDISYSIPHRERFIVVREPSGTLRKASWEERDRMIQVYFPREGRRILTPVI +FKEENLQTMYSQDQHVDVLNLCVAQFEPDSAEYIKIHHHTYEDIDKCGKYDLLRSTRHFGGMAWYFVNKKKIDGLLID +QIQRDLVSDATSLVHLYHILHPXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK", + location pnt { + point 308, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 309, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 310, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 311, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 312, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 313, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 314, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 315, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 316, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 317, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 318, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 319, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 320, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 321, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 322, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 323, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 324, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 325, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 326, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 327, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 328, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 329, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 330, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 331, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 332, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 333, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 334, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 335, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 336, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 337, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 338, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 339, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 340, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 341, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 342, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 343, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 344, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 345, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 346, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 347, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 348, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 349, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 350, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 351, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 352, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 353, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 354, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data non-std-residue "UNK COOH", + location pnt { + point 355, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 57", + location int { + from 71, + to 81, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 58", + location int { + from 106, + to 124, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 59", + location int { + from 184, + to 194, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 60", + location int { + from 210, + to 219, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 61", + location int { + from 222, + to 232, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 62", + location int { + from 239, + to 254, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 63", + location int { + from 266, + to 276, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 64", + location int { + from 280, + to 289, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 65", + location int { + from 292, + to 306, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 66", + location int { + from 310, + to 318, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 67", + location int { + from 322, + to 332, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str helix, + comment "helix 68", + location int { + from 338, + to 354, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str sheet, + comment "strand 88", + location int { + from 138, + to 143, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str sheet, + comment "strand 89", + location int { + from 152, + to 158, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str sheet, + comment "strand 90", + location int { + from 169, + to 175, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data psec-str sheet, + comment "strand 91", + location int { + from 177, + to 182, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data region "Domain 15", + comment "NCBI Domains", + location mix { + int { + from 0, + to 93, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + }, + int { + from 203, + to 278, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + } + }, + { + data region "Domain 16", + comment "NCBI Domains", + location int { + from 132, + to 202, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + }, + { + data region "Domain 17", + comment "NCBI Domains", + location int { + from 279, + to 355, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aa" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ab" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS23, MRPS23", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 190, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 190, + seq-data ncbieaa "MAGSRLETVGSIFSRTRDLIRAGVLKEKPLWLDIYNAFPPLREPVFRRPRLR +YGKAKAAVQDIFYHEDRIRAKFYSAYGSGPKAFDLFNPNFKSTCQRFVEKYIELQRLGETDEEKLFVEAGKALLAEGV +TLRRVGEARTQQEGSPISRKSEPTGIKPQTAVEENQPWKEVPQDQPLEAPEKESKGLPPP" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 69", + location int { + from 11, + to 21, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ab" + } + } + }, + { + data psec-str helix, + comment "helix 70", + location int { + from 30, + to 37, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ab" + } + } + }, + { + data psec-str helix, + comment "helix 71", + location int { + from 67, + to 76, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ab" + } + } + }, + { + data psec-str helix, + comment "helix 72", + location int { + from 94, + to 108, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ab" + } + } + }, + { + data psec-str helix, + comment "helix 73", + location int { + from 113, + to 127, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ab" + } + } + }, + { + data region "Domain 18", + comment "NCBI Domains", + location int { + from 53, + to 189, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ab" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS25, MRPS25", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 169, + names { + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 169, + seq-data ncbieaa "PMKGRFPIRRTLQYLGQGDVVFKDSVKVMTVNYNTHGELGEGARKFVFFNIP +QIQYKNPWVQITMFKNMTPSPFLRFYLDSGEQVLVDVESKSNKEIVEHIRKILGKNEETLEKEEQEKKQLSHPAHFGP +RKYCLRECMCEVEGQVPCPGLVPLPKEMTGKYKAALKAT" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 74", + location int { + from 40, + to 57, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + }, + { + data psec-str helix, + comment "helix 75", + location int { + from 93, + to 103, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + }, + { + data psec-str helix, + comment "helix 76", + location int { + from 108, + to 121, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + }, + { + data psec-str helix, + comment "helix 77", + location int { + from 159, + to 167, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + }, + { + data psec-str sheet, + comment "strand 92", + location int { + from 24, + to 33, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + }, + { + data psec-str sheet, + comment "strand 93", + location int { + from 60, + to 67, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + }, + { + data psec-str sheet, + comment "strand 94", + location int { + from 72, + to 79, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + }, + { + data psec-str sheet, + comment "strand 95", + location int { + from 81, + to 88, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + }, + { + data region "Domain 19", + comment "NCBI Domains", + location int { + from 0, + to 105, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + }, + { + data het "(ZN,167338)", + location mix { + bond { + a { + point 137, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + }, + bond { + a { + point 147, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + }, + bond { + a { + point 139, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ac" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ad" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS26, MRPS26", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 177, + names { + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 177, + seq-data ncbieaa "RKTRHDPPAKSKAGRVATPPAVDPTEFFVLTERYRQYRQTVRALRLEFMSEV +RKKLHEARAGVQAERKAQEDAAEHRELMAWNQAENQRLHELRLARLRQEALEQERRQAEEAVLQAREAQAWAQLKEQE +VLQLQEEAKTFITRENLEARVEEALDSPKSYNWAITREGLVVRPQQK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 78", + location int { + from 23, + to 59, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ad" + } + } + }, + { + data psec-str helix, + comment "helix 79", + location int { + from 62, + to 111, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ad" + } + } + }, + { + data psec-str helix, + comment "helix 80", + location int { + from 112, + to 136, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ad" + } + } + }, + { + data psec-str helix, + comment "helix 81", + location int { + from 146, + to 155, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ad" + } + } + }, + { + data psec-str sheet, + comment "strand 96", + location int { + from 162, + to 166, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ad" + } + } + }, + { + data psec-str sheet, + comment "strand 97", + location int { + from 168, + to 172, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ad" + } + } + }, + { + data region "Domain 20", + comment "NCBI Domains", + location int { + from 0, + to 141, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ad" + } + } + }, + { + data region "Domain 21", + comment "NCBI Domains", + location int { + from 142, + to 176, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ad" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS27, MRPS27", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 336, + names { + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "354", + "355", + "356", + "357", + "358", + "359", + "360", + "361", + "362", + "363", + "364", + "365", + "366", + "367", + "368", + "369", + "370", + "371", + "372", + "373", + "374", + "375", + "376", + "377", + "378", + "379", + "380", + "381", + "382", + "383", + "384", + "385", + "386", + "387", + "388", + "389", + "390", + "391", + "392", + "393", + "394", + "395", + "396", + "397", + "398", + "399", + "400", + "401", + "402", + "403", + "404", + "405", + "406", + "407" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 336, + seq-data ncbieaa "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK NH3+", + location pnt { + point 0, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 1, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 2, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 3, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 4, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 5, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 6, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 7, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 8, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 9, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 10, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 11, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 12, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 13, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 14, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 15, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 16, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 17, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 18, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 19, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 20, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 21, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 22, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 23, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 24, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 25, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 26, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 27, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 28, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 29, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 30, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 31, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 32, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 33, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 34, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 35, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 36, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 37, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 38, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 39, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 40, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 41, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 42, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 43, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 44, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 45, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 46, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 47, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 48, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 49, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 50, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 51, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 52, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 53, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 54, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 55, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 56, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 57, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 58, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 59, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 60, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 61, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 62, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 63, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 64, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 65, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 66, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 67, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 68, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 69, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 70, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 71, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 72, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 73, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 74, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 75, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 76, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 77, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 78, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 79, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 80, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 81, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 82, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 83, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 84, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 85, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 86, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 87, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 88, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 89, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 90, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 92, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 93, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 94, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 95, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 96, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 97, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 98, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 99, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 100, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 101, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 102, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 103, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 104, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 105, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 106, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 107, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 108, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 109, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 110, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 111, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 112, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 113, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 114, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 115, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 116, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 117, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 118, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 119, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 120, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 121, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 122, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 123, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 124, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 125, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 126, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 127, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 128, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 129, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 130, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 131, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 132, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 133, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 134, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 135, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 136, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 137, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 138, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 139, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 140, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 141, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 142, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 143, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 144, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 145, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 146, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 147, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 148, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 149, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 150, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 151, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 152, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 153, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 154, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 155, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 156, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 157, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 158, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 159, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 160, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 161, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 162, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 163, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 164, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 165, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 166, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 167, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 168, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 169, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 170, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 171, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 172, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 173, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 174, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 175, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 176, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 177, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 178, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 179, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 180, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 181, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 182, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 183, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 184, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 185, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 186, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 187, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 188, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 189, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 190, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 191, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 192, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 193, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 194, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 195, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 196, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 197, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 198, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 199, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 200, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 201, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 202, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 203, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 204, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 205, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 206, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 207, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 208, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 209, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 210, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 211, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 212, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 213, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 214, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 215, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 216, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 217, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 218, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 219, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 220, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 221, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 222, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 223, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 224, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 225, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 226, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 227, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 228, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 229, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 230, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 231, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 232, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 233, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 234, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 235, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 236, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 237, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 238, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 239, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 240, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 241, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 242, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 243, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 244, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 245, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 246, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 247, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 248, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 249, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 250, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 251, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 252, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 253, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 254, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 255, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 256, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 257, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 258, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 259, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 260, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 261, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 262, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 263, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 264, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 265, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 266, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 267, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 268, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 269, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 270, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 271, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 272, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 273, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 274, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 275, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 276, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 277, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 278, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 279, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 280, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 281, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 282, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 283, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 284, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 285, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 286, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 287, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 288, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 289, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 290, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 291, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 292, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 293, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 294, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 295, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 296, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 297, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 298, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 299, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 300, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 301, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 302, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 303, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 304, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 305, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 306, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 307, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 308, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 309, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 310, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 311, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 312, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 313, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 314, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 315, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 316, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 317, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 318, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 319, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 320, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 321, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 322, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 323, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 324, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 325, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 326, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 327, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 328, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 329, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 330, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 331, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 332, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 333, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 334, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data non-std-residue "UNK COOH", + location pnt { + point 335, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 82", + location int { + from 1, + to 14, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 83", + location int { + from 22, + to 34, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 84", + location int { + from 41, + to 51, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 85", + location int { + from 61, + to 69, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 86", + location int { + from 77, + to 90, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 87", + location int { + from 93, + to 106, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 88", + location int { + from 112, + to 125, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 89", + location int { + from 129, + to 141, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 90", + location int { + from 147, + to 163, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 91", + location int { + from 169, + to 180, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 92", + location int { + from 187, + to 201, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 93", + location int { + from 221, + to 232, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 94", + location int { + from 250, + to 263, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 95", + location int { + from 268, + to 280, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 96", + location int { + from 284, + to 291, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data psec-str helix, + comment "helix 97", + location int { + from 295, + to 334, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data region "Domain 22", + comment "NCBI Domains", + location int { + from 19, + to 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + }, + { + data region "Domain 23", + comment "NCBI Domains", + location mix { + int { + from 92, + to 166, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + }, + int { + from 283, + to 293, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + } + }, + { + data region "Domain 24", + comment "NCBI Domains", + location int { + from 167, + to 282, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ae" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Af" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS28, MRPS28", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 188, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 188, + seq-data ncbieaa "MAALCRTRAVTAKSHFLRVFFFSRPCRGTGTESGSGSESSESTEPKQRPGGF +ASALERHSELQQKAEFGRMRGSPKNVESFASMLRHSPLTQMGPAKNKLVIGQIFHIVEDDLYIDFGGKFHCVCKRPEV +DGEKYQKGTRVRLRLLDLELTSRFLGATTDTTILEADAVLLGLQESKDSKSKEERHEK" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 98", + location int { + from 96, + to 101, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Af" + } + } + }, + { + data psec-str sheet, + comment "strand 99", + location int { + from 102, + to 106, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Af" + } + } + }, + { + data psec-str sheet, + comment "strand 100", + location int { + from 110, + to 116, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Af" + } + } + }, + { + data psec-str sheet, + comment "strand 101", + location int { + from 121, + to 126, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Af" + } + } + }, + { + data psec-str sheet, + comment "strand 102", + location int { + from 137, + to 141, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Af" + } + } + }, + { + data psec-str sheet, + comment "strand 103", + location int { + from 142, + to 146, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Af" + } + } + }, + { + data psec-str sheet, + comment "strand 104", + location int { + from 149, + to 152, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Af" + } + } + }, + { + data psec-str sheet, + comment "strand 105", + location int { + from 159, + to 162, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Af" + } + } + }, + { + data psec-str sheet, + comment "strand 106", + location int { + from 165, + to 171, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Af" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS29, MRPS29", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 397, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "331", + "332", + "333", + "334", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348", + "349", + "350", + "351", + "352", + "353", + "354", + "355", + "356", + "357", + "358", + "359", + "360", + "361", + "362", + "363", + "364", + "365", + "366", + "367", + "368", + "369", + "370", + "371", + "372", + "373", + "374", + "375", + "376", + "377", + "378", + "379", + "380", + "381", + "382", + "383", + "384", + "385", + "386", + "387", + "388", + "389", + "390", + "391", + "392", + "393", + "394", + "395", + "396", + "397" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 397, + seq-data ncbieaa "MLKGVTRLISRVHKLDPGHVLYMGTQAPQSLAAHLDNQVPVKSPRAISRTSX +XXXXXXXXXXXXXXXXXSLQELKTVFPHGLPPRFAMQVKTFNEACLMVRKPALELLHYLKNTNFAHPAVRYVLYGEKG +TGKTLSLCHILHFCAKQNWLILHIPDAHIWVKNCRDLLQSNYNKQRFDQPLEASTWLKNFKTANEHFLSQIKVQEKYV +WNKRESTEKGRPLGEVVEQGIMRVRNATDAVGIVLKELKRQSSLGIFHLLVAVDGVNALWGRTTLKREDKSPIAPEEL +ALIHNLRKMVKNDWQGGAIVLTVSQTGSLFKPRNAYLPQELLGKEGFDALDPFIPILVSNYNPKEFESCIQYYLENNW +LQHEKAHTEEGKKELLFLSNRNPGQLERLCAYL" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK", + location pnt { + point 51, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 52, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 53, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 54, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 55, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 56, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 57, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 58, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 59, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 60, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 61, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 62, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 63, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 64, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 65, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 66, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 67, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 68, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 98", + location int { + from 70, + to 77, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 99", + location int { + from 83, + to 92, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 100", + location int { + from 101, + to 112, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 101", + location int { + from 132, + to 146, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 102", + location int { + from 179, + to 191, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 103", + location int { + from 221, + to 230, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 104", + location int { + from 234, + to 248, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 105", + location int { + from 286, + to 293, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 106", + location int { + from 328, + to 335, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 107", + location int { + from 348, + to 361, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 108", + location int { + from 373, + to 382, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str helix, + comment "helix 109", + location int { + from 386, + to 393, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 107", + location int { + from 64, + to 68, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 108", + location int { + from 95, + to 99, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 109", + location int { + from 119, + to 126, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 110", + location int { + from 148, + to 154, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 111", + location int { + from 166, + to 171, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 112", + location int { + from 173, + to 178, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 113", + location int { + from 200, + to 203, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 114", + location int { + from 204, + to 208, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 115", + location int { + from 212, + to 219, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 116", + location int { + from 255, + to 262, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 117", + location int { + from 272, + to 275, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 118", + location int { + from 277, + to 280, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 119", + location int { + from 301, + to 308, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data psec-str sheet, + comment "strand 120", + location int { + from 338, + to 343, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data region "Domain 25", + comment "NCBI Domains", + location mix { + int { + from 0, + to 100, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + }, + int { + from 346, + to 367, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + } + }, + { + data region "Domain 26", + comment "NCBI Domains", + location mix { + int { + from 101, + to 160, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + }, + int { + from 252, + to 345, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + } + }, + { + data region "Domain 27", + comment "NCBI Domains", + location int { + from 179, + to 251, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + }, + { + data het "(MG,167367)", + location mix { + bond { + a { + point 133, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ag" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ah" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS31, MRPS31", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 103, + names { + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "331", + "332", + "333", + "334", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348", + "349", + "350", + "351", + "352", + "353", + "354", + "355", + "356", + "357", + "358", + "359", + "360", + "361", + "362", + "363", + "364", + "365", + "366", + "367", + "368", + "369", + "370", + "371", + "372", + "373", + "374", + "375", + "376", + "377", + "378", + "379", + "380", + "381", + "382", + "383", + "384", + "385", + "386", + "387" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 103, + seq-data ncbieaa "PFQNGFEEMIQWTKEGKLWEFPINNEAGFDDDGSEFHEHIFLDKYLQDFPKQ +GPIRHFMELVTCGLSKNPYLSVKQKVEHIEWFRNYFNEKRVILKESGIQLN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 110", + location int { + from 4, + to 14, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ah" + } + } + }, + { + data psec-str helix, + comment "helix 111", + location int { + from 54, + to 67, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ah" + } + } + }, + { + data psec-str helix, + comment "helix 112", + location int { + from 74, + to 89, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ah" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ai" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS33, MRPS33", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 99, + names { + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 99, + seq-data ncbieaa "LSEYAVRMSRLSARLFGEVARPTDSKSMKVVKLFSEQPLAKRKETYDWYPNH +NTYFALMGTLRFLGLYRDEHQDFRDEQLRLKKLRGKGKPRKGEGKRA" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 113", + location int { + from 2, + to 15, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ai" + } + } + }, + { + data psec-str helix, + comment "helix 114", + location int { + from 27, + to 35, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ai" + } + } + }, + { + data psec-str helix, + comment "helix 115", + location int { + from 51, + to 64, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ai" + } + } + }, + { + data psec-str helix, + comment "helix 116", + location int { + from 70, + to 85, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ai" + } + } + }, + { + data region "Domain 28", + comment "NCBI Domains", + location int { + from 0, + to 67, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ai" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS34, MRPS34", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 218, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 218, + seq-data ncbieaa "MARKKVRPRLIAELARRVRALREQRERPRDSQRYALDYETMMRPHSGNRLPM +RAWADVRHESRLLQLLNRLPLFGLGRLVTRKSWLWQHDEPCYWRLTRVRPDYAAENLDHGKAWGILTFKGKTESEARE +IEQVMYHDWRLVPKHEEEAFTXXXXXXXDTPLSVPYPPLLRAMILAQRQKYGDTSTEEPMLNLERARIDPWDYPAKLE +AKTKTKGAAV" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK", + location pnt { + point 151, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 152, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 153, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 154, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 155, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 156, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 157, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str helix, + comment "helix 117", + location int { + from 11, + to 25, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str helix, + comment "helix 118", + location int { + from 145, + to 152, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str helix, + comment "helix 119", + location int { + from 167, + to 180, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 121", + location int { + from 40, + to 43, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 122", + location int { + from 46, + to 49, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 123", + location int { + from 78, + to 82, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 124", + location int { + from 91, + to 97, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 125", + location int { + from 98, + to 103, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 126", + location int { + from 110, + to 115, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 127", + location int { + from 116, + to 120, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 128", + location int { + from 121, + to 125, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 129", + location int { + from 127, + to 130, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 130", + location int { + from 137, + to 141, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 131", + location int { + from 161, + to 165, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data psec-str sheet, + comment "strand 132", + location int { + from 188, + to 192, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data region "Domain 29", + comment "NCBI Domains", + location int { + from 64, + to 156, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + }, + { + data region "Domain 30", + comment "NCBI Domains", + location int { + from 157, + to 217, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Aj" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS35, MRPS35", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 275, + names { + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 275, + seq-data ncbieaa "RRKALPPRTEKMAVDQDWPSVYPVAAPFKPSAVPLPVRMGYPVKRGVPMAKE +GNLELLKIPNFLHLTPVAIKRHCEALKDFCTEWPAALDSDEKCEKHFPIEIDTADYVSAGPSIRNPKARVVTLRVKLS +SLNLDDHAKKKLIKLVGDRYCKSTDVLTIKTDRCPLKRQNYDYAVYLLTVLYHESWKTEEWEKKKTEADMEEYIWENS +TSEKNILETLLQIKAAEKNLELSKEELLGTKEVEDYRKSVVSLKNEGDNENTLSQYKESVKRLLNLA" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 120", + location int { + from 67, + to 77, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data psec-str helix, + comment "helix 121", + location int { + from 90, + to 98, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data psec-str helix, + comment "helix 122", + location int { + from 135, + to 145, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data psec-str helix, + comment "helix 123", + location int { + from 166, + to 185, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data psec-str helix, + comment "helix 124", + location int { + from 208, + to 224, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data psec-str helix, + comment "helix 125", + location int { + from 230, + to 237, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data psec-str helix, + comment "helix 126", + location int { + from 239, + to 252, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data psec-str helix, + comment "helix 127", + location int { + from 257, + to 271, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data psec-str sheet, + comment "strand 133", + location int { + from 100, + to 104, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data psec-str sheet, + comment "strand 134", + location int { + from 121, + to 128, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data psec-str sheet, + comment "strand 135", + location int { + from 148, + to 152, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data psec-str sheet, + comment "strand 136", + location int { + from 153, + to 161, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data region "Domain 31", + comment "NCBI Domains", + location int { + from 100, + to 196, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + }, + { + data region "Domain 32", + comment "NCBI Domains", + location int { + from 197, + to 274, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ak" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Am" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS37, MRPS37", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 116, + names { + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 116, + seq-data ncbieaa "ATPSLRGRLARLGNPRRPILKPNKPLILANHVGERRREKGEATCITEMSVMM +ACWKQNEFRDEACRKEIQDFFDCASRAEAARKMRSIQENLGELGSLPPKKLNKLLHRFPNKPHV" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 128", + location int { + from 45, + to 57, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Am" + } + } + }, + { + data psec-str helix, + comment "helix 129", + location int { + from 66, + to 87, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Am" + } + } + }, + { + data psec-str helix, + comment "helix 130", + location int { + from 100, + to 109, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Am" + } + } + }, + { + data bond disulfide, + location bond { + a { + point 43, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Am" + } + }, + b { + point 74, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Am" + } + } + } + }, + { + data bond disulfide, + location bond { + a { + point 53, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Am" + } + }, + b { + point 64, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Am" + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "An" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS38, MRPS38", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 72, + names { + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 72, + seq-data ncbieaa "KNILKIRRRKMNHHKYRKLVKRTRFLRRKVREGRLRQKQVKFERDLKRIWQK +AGLKEAPAGWQTPKIYLKNQ" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 131", + location int { + from 2, + to 22, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "An" + } + } + }, + { + data psec-str helix, + comment "helix 132", + location int { + from 23, + to 52, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "An" + } + } + }, + { + data het "(MG,167210)", + location mix { + bond { + a { + point 11, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "An" + } + } + }, + bond { + a { + point 11, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "An" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN MS39, MRPS39", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 530, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348", + "349", + "355", + "356", + "357", + "358", + "359", + "360", + "361", + "362", + "363", + "364", + "365", + "366", + "367", + "368", + "371", + "372", + "373", + "374", + "375", + "376", + "377", + "378", + "379", + "380", + "381", + "382", + "383", + "384", + "385", + "390", + "391", + "392", + "393", + "394", + "395", + "396", + "397", + "398", + "399", + "400", + "401", + "402", + "403", + "404", + "405", + "410", + "411", + "412", + "413", + "414", + "415", + "416", + "417", + "418", + "419", + "420", + "421", + "422", + "423", + "424", + "425", + "426", + "427", + "428", + "429", + "430", + "433", + "434", + "435", + "436", + "437", + "438", + "439", + "440", + "441", + "442", + "443", + "444", + "445", + "446", + "447", + "456", + "457", + "458", + "459", + "460", + "461", + "462", + "463", + "464", + "465", + "466", + "467", + "468", + "469", + "470", + "475", + "476", + "477", + "478", + "479", + "480", + "481", + "482", + "483", + "484", + "485", + "486", + "487", + "488", + "489", + "490", + "492", + "493", + "494", + "495", + "496", + "497", + "498", + "499", + "500", + "501", + "502", + "503", + "504", + "505", + "506", + "507", + "510", + "511", + "512", + "513", + "514", + "515", + "516", + "517", + "518", + "519", + "520", + "521", + "522", + "523", + "524", + "525", + "531", + "532", + "533", + "534", + "535", + "536", + "537", + "538", + "539", + "540", + "541", + "542", + "546", + "547", + "548", + "549", + "550", + "551", + "552", + "553", + "554", + "555", + "556", + "557", + "558", + "559", + "560", + "575", + "576", + "577", + "578", + "579", + "580", + "581", + "582", + "583", + "584", + "585", + "593", + "594", + "595", + "596", + "597", + "598", + "599", + "600", + "601", + "602", + "603", + "604", + "605", + "606", + "607", + "651", + "652", + "653", + "654", + "655", + "656", + "657", + "658", + "659", + "660", + "661", + "666", + "667", + "668", + "669", + "670", + "671", + "672", + "673", + "674", + "675", + "676", + "677" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 530, + seq-data ncbieaa "MAAVASARWLGVRSGLCLPLTGRRVGPCGRTPRSRFYSGSAAHPEVEGANVT +GIEEVVIPKKKTWDKVAILQALASTVHRDSTAAPYVFQDDPYLIPTSSVESHSFLLAKKSGENAAKFIINSYPKYFQK +DIAEPHIPCLMPEXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXX" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK", + location pnt { + point 143, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 144, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 145, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 146, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 147, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 148, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 149, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 150, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 151, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 152, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 153, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 154, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 155, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 156, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 157, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 158, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 159, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 160, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 161, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 162, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 163, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 164, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 165, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 166, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 167, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 168, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 169, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 170, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 171, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 172, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 173, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 174, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 175, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 176, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 177, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 178, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 179, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 180, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 181, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 182, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 183, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 184, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 185, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 186, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 187, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 188, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 189, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 190, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 191, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 192, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 193, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 194, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 195, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 196, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 197, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 198, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 199, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 200, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 201, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 202, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 203, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 204, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 205, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 206, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 207, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 208, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 209, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 210, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 211, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 212, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 213, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 214, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 215, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 216, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 217, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 218, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 219, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 220, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 221, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 222, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 223, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 224, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 225, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 226, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 227, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 228, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 229, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 230, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 231, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 232, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 233, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 234, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 235, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 236, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 237, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 238, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 239, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 240, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 241, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 242, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 243, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 244, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 245, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 246, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 247, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 248, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 249, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 250, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 251, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 252, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 253, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 254, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 255, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 256, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 257, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 258, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 259, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 260, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 261, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 262, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 263, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 264, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 265, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 266, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 267, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 268, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 269, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 270, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 271, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 272, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 273, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 274, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 275, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 276, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 277, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 278, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 279, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 280, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 281, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 282, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 283, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 284, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 285, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 286, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 287, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 288, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 289, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 290, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 291, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 292, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 293, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 294, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 295, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 296, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 297, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 298, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 299, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 300, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 301, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 302, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 303, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 304, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 305, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 306, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 307, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 308, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 309, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 310, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 311, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 312, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 313, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 314, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 315, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 316, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 317, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 318, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 319, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 320, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 321, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 322, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 323, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 324, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 325, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 326, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 327, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 328, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 329, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 330, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 331, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 332, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 333, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 334, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 335, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 336, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 337, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 338, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 339, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 340, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 341, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 342, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 343, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 344, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 345, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 346, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 347, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 348, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 349, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 350, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 351, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 352, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 353, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 354, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 355, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 356, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 357, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 358, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 359, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 360, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 361, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 362, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 363, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 364, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 365, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 366, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 367, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 368, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 369, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 370, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 371, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 372, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 373, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 374, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 375, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 376, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 377, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 378, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 379, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 380, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 381, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 382, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 383, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 384, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 385, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 386, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 387, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 388, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 389, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 390, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 391, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 392, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 393, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 394, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 395, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 396, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 397, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 398, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 399, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 400, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 401, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 402, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 403, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 404, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 405, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 406, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 407, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 408, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 409, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 410, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 411, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 412, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 413, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 414, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 415, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 416, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 417, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 418, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 419, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 420, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 421, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 422, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 423, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 424, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 425, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 426, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 427, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 428, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 429, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 430, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 431, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 432, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 433, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 434, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 435, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 436, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 437, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 438, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 439, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 440, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 441, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 442, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 443, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 444, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 445, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 446, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 447, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 448, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 449, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 450, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 451, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 452, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 453, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 454, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 455, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 456, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 457, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 458, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 459, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 460, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 461, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 462, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 463, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 464, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 465, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 466, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 467, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 468, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 469, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 470, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 471, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 472, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 473, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 474, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 475, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 476, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 477, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 478, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 479, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 480, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 481, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 482, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 483, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 484, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 485, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 486, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 487, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 488, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 489, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 490, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 491, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 492, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 493, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 494, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 495, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 496, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 497, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 498, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 499, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 500, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 501, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 502, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 503, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 504, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 505, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 506, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 507, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 508, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 509, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 510, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 511, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 512, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 513, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 514, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 515, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 516, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 517, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 518, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 519, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 520, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 521, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 522, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 523, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 524, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 525, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 526, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 527, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 528, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data non-std-residue "UNK COOH", + location pnt { + point 529, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 133", + location int { + from 67, + to 75, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 134", + location int { + from 99, + to 123, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 135", + location int { + from 146, + to 154, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 136", + location int { + from 157, + to 168, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 137", + location int { + from 172, + to 182, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 138", + location int { + from 186, + to 198, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 139", + location int { + from 202, + to 215, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 140", + location int { + from 218, + to 228, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 141", + location int { + from 233, + to 245, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 142", + location int { + from 249, + to 260, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 143", + location int { + from 264, + to 277, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 144", + location int { + from 282, + to 293, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 145", + location int { + from 297, + to 307, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 146", + location int { + from 311, + to 321, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 147", + location int { + from 326, + to 337, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 148", + location int { + from 341, + to 352, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 149", + location int { + from 357, + to 374, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 150", + location int { + from 377, + to 388, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 151", + location int { + from 392, + to 403, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 152", + location int { + from 408, + to 420, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 153", + location int { + from 423, + to 436, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 154", + location int { + from 439, + to 451, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 155", + location int { + from 455, + to 464, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 156", + location int { + from 467, + to 478, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 157", + location int { + from 483, + to 490, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 158", + location int { + from 495, + to 505, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 159", + location int { + from 508, + to 515, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data psec-str helix, + comment "helix 160", + location int { + from 521, + to 528, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data region "Domain 33", + comment "NCBI Domains", + location int { + from 171, + to 262, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data region "Domain 34", + comment "NCBI Domains", + location int { + from 263, + to 354, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data region "Domain 35", + comment "NCBI Domains", + location int { + from 355, + to 405, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data region "Domain 36", + comment "NCBI Domains", + location int { + from 406, + to 480, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + }, + { + data region "Domain 37", + comment "NCBI Domains", + location int { + from 481, + to 529, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ao" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + }, + descr { + comment "28S RIBOSOMAL PROTEIN S18B, MITOCHONDRIAL", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 188, + names { + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 188, + seq-data ncbieaa "YKDEPWKYLDSEEYQNRYGSRPVWADYRRNHKGGIPPQRTRKMCIRGNKVAG +NPCPICRDQKLHVDFRNVKLLEQFVCAHTGIIFHAPYTGVCMKQHKKLTQAIQKARDHGLLRYHIPQVEPRDLDFSTT +HGAVSSTPPAPTLVSGDPWYPWYSWKQPPERELSRLRRLYQGRLREESGPPPELMPEV" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 161", + location int { + from 10, + to 17, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + }, + { + data psec-str helix, + comment "helix 162", + location int { + from 93, + to 109, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + }, + { + data psec-str helix, + comment "helix 163", + location int { + from 159, + to 169, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + }, + { + data psec-str sheet, + comment "strand 137", + location int { + from 43, + to 46, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + }, + { + data psec-str sheet, + comment "strand 138", + location int { + from 47, + to 50, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + }, + { + data psec-str sheet, + comment "strand 139", + location int { + from 75, + to 79, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + }, + { + data psec-str sheet, + comment "strand 140", + location int { + from 80, + to 84, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + }, + { + data region "Domain 38", + comment "NCBI Domains", + location int { + from 63, + to 134, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + }, + { + data het "(ZN,167368)", + location mix { + bond { + a { + point 54, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + }, + bond { + a { + point 43, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + }, + bond { + a { + point 57, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + }, + bond { + a { + point 92, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ap" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + }, + descr { + comment "UNASSIGNED HELICES", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 16, + names { + "2360", + "2361", + "2362", + "2363", + "2364", + "2365", + "2366", + "2367", + "2368", + "2369", + "2370", + "2371", + "2372", + "2373", + "2374", + "2375" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 16, + seq-data ncbieaa "XXXXXXXXXXXXXXXX" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK NH3+", + location pnt { + point 0, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 1, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 2, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 3, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 4, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 5, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 6, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 7, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 8, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 9, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 10, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 11, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 12, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 13, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 14, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data non-std-residue "UNK COOH", + location pnt { + point 15, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + }, + { + data psec-str helix, + comment "helix 164", + location int { + from 1, + to 14, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "As" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + }, + descr { + comment "UNASSIGNED HELICES", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 17, + names { + "692", + "693", + "694", + "695", + "696", + "697", + "698", + "699", + "700", + "701", + "702", + "703", + "704", + "705", + "706", + "707", + "708" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 17, + seq-data ncbieaa "XXXXXXXXXXXXXXXXX" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK NH3+", + location pnt { + point 0, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 1, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 2, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 3, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 4, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 5, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 6, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 7, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 8, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 9, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 10, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 11, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 12, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 13, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 14, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 15, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data non-std-residue "UNK COOH", + location pnt { + point 16, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + }, + { + data psec-str helix, + comment "helix 165", + location int { + from 8, + to 15, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Az" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B0" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL27M, MRPL27", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 148, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 148, + seq-data ncbieaa "MALAVLALRTRAAVTALLSPPQAAALAVRYASKKTGGSSKNLGGKSPGKRFG +IKKMEGHYVHAGNILATQRHFRWHPGAHVGLGKNKCLYALEEGVVRYTKEVYVPNPSNSEAVDLVTRLPQGAVLYKTF +VHVVPAKPEGTFKLVAML" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 166", + location int { + from 110, + to 117, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B0" + } + } + }, + { + data psec-str sheet, + comment "strand 141", + location int { + from 50, + to 54, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B0" + } + } + }, + { + data psec-str sheet, + comment "strand 142", + location int { + from 58, + to 63, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B0" + } + } + }, + { + data psec-str sheet, + comment "strand 143", + location int { + from 64, + to 69, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B0" + } + } + }, + { + data psec-str sheet, + comment "strand 144", + location int { + from 79, + to 84, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B0" + } + } + }, + { + data psec-str sheet, + comment "strand 145", + location int { + from 85, + to 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B0" + } + } + }, + { + data psec-str sheet, + comment "strand 146", + location int { + from 92, + to 96, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B0" + } + } + }, + { + data psec-str sheet, + comment "strand 147", + location int { + from 97, + to 103, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B0" + } + } + }, + { + data psec-str sheet, + comment "strand 148", + location int { + from 125, + to 134, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B0" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL28M, MRPL28", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 256, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 256, + seq-data ncbieaa "MPLHKVPVGLWKQLRLWEGIYSRLPRHYLRSLEEARTPTPVHYRPHGAKFKI +NPKNWQRERVEDVPIPVHYPPESQLGLWGGEGWVLGHRYVNNDKLSKRVRKVWKPQLFQRELYSEILDKRFTVTVTMR +TLDLIDQACGFDFYILKTPKEDLCSKFGMDLKRGMLLRLARQDPQLHPDDPARRAAIYDRYKAFVIPEAEAEWVGLTL +DEAVEKQRLLEEKDPIPLFKIFVEELLGQLQQQALSEPAVVQTRASRK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 167", + location int { + from 9, + to 17, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str helix, + comment "helix 168", + location int { + from 24, + to 32, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str helix, + comment "helix 169", + location int { + from 128, + to 137, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str helix, + comment "helix 170", + location int { + from 139, + to 147, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str helix, + comment "helix 171", + location int { + from 155, + to 170, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str helix, + comment "helix 172", + location int { + from 180, + to 188, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str helix, + comment "helix 173", + location int { + from 207, + to 218, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str helix, + comment "helix 174", + location int { + from 226, + to 242, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str sheet, + comment "strand 149", + location int { + from 48, + to 53, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str sheet, + comment "strand 150", + location int { + from 56, + to 61, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str sheet, + comment "strand 151", + location int { + from 83, + to 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str sheet, + comment "strand 152", + location int { + from 98, + to 106, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str sheet, + comment "strand 153", + location int { + from 108, + to 116, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data psec-str sheet, + comment "strand 154", + location int { + from 118, + to 127, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + }, + { + data region "Domain 39", + comment "NCBI Domains", + location int { + from 108, + to 197, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B1" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B2" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL29M, MRPL47", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 252, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 252, + seq-data ncbieaa "MAAAGLAVFCRRVSAALKACRLLIRPQAPPSTSCRFSPSLLPKNTPNVTSFH +QFRIFHTTFSRRGLEEFFDDPKNWGEEKVKSGASWTCQQLRNKSNEDLHKLWYVLLKERNMLLTLEQEAKRQRLPMPS +PERLEKVVDSMDALDKVVQEREDALRLLQTGQEKARPGAWRRDIFGRIIWHKFKQWPIPWYLNKKYNRKRFFAMPYVE +RFVRMRIEKQARIKARKRSLERKKEKFLQEKFPHLSETQKSSHV" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 175", + location int { + from 96, + to 123, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B2" + } + } + }, + { + data psec-str helix, + comment "helix 176", + location int { + from 132, + to 158, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B2" + } + } + }, + { + data psec-str helix, + comment "helix 177", + location int { + from 207, + to 239, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B2" + } + } + }, + { + data psec-str sheet, + comment "strand 155", + location int { + from 166, + to 173, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B2" + } + } + }, + { + data psec-str sheet, + comment "strand 156", + location int { + from 175, + to 182, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B2" + } + } + }, + { + data region "Domain 40", + comment "NCBI Domains", + location int { + from 163, + to 251, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B2" + } + } + }, + { + data het "(MG,167370)", + location mix { + bond { + a { + point 229, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B2" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B3" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL30M, MRPL30", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 161, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 161, + seq-data ncbieaa "MAGILRSVVQRPPGRLQTVTKGMESLICTDWIRHKFTRSRIPDKVFQPSPED +HEKYGGDPQYPHKLHIVTRIKSTKRRPYWEKDIIKMLGLEKAHTPQVHKNIPSVNAKLKVVKHLIRIKPLKLPQGLPT +EEDMANTCLKSNGELVVRWLLNPANQEARKS" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 178", + location int { + from 79, + to 88, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B3" + } + } + }, + { + data psec-str helix, + comment "helix 179", + location int { + from 105, + to 113, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B3" + } + } + }, + { + data psec-str sheet, + comment "strand 157", + location int { + from 64, + to 71, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B3" + } + } + }, + { + data psec-str sheet, + comment "strand 158", + location int { + from 96, + to 101, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B3" + } + } + }, + { + data psec-str sheet, + comment "strand 159", + location int { + from 115, + to 121, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B3" + } + } + }, + { + data psec-str sheet, + comment "strand 160", + location int { + from 135, + to 140, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B3" + } + } + }, + { + data psec-str sheet, + comment "strand 161", + location int { + from 142, + to 147, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B3" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B4" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL31M, MRPL55", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 126, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 126, + seq-data ncbieaa "MAAKGSLLRLLWQRVVTGAAPESCRHLYTSSWRADCNRALLTRLHRQTYARL +YPVLLVKQDGSTIHIRYREPRRMLTMPVDLDSLSPEERRARFRKREAKFKEKKEEPELSDDFDVEQYKRFWTKK" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 162", + location int { + from 51, + to 59, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B4" + } + } + }, + { + data psec-str sheet, + comment "strand 163", + location int { + from 61, + to 69, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B4" + } + } + }, + { + data psec-str sheet, + comment "strand 164", + location int { + from 73, + to 77, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B4" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL32M, MRPL32", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 188, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 188, + seq-data ncbieaa "MASAMLVLVVPPWPAARGLLRNWWEQLQRKLQHNRLGLPLHPWGPALAVQGP +AICTEPANDTNGSKAISSLLDSVFWMAAPKNRRSIEVNRCRRRNPQKLIKVKNNIDVCPECGHLKQKHILCGYCYEKV +RKETAEIRRQMGKQEGGPFRAPTTETVVLYSGETPSEQDQGKRIIERERKRPSWFTQN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 180", + location int { + from 86, + to 94, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + } + }, + { + data psec-str helix, + comment "helix 181", + location int { + from 123, + to 143, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + } + }, + { + data psec-str sheet, + comment "strand 165", + location int { + from 105, + to 109, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + } + }, + { + data psec-str sheet, + comment "strand 166", + location int { + from 114, + to 118, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + } + }, + { + data psec-str sheet, + comment "strand 167", + location int { + from 154, + to 160, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + } + }, + { + data psec-str sheet, + comment "strand 168", + location int { + from 171, + to 177, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + } + }, + { + data region "Domain 41", + comment "NCBI Domains", + location int { + from 121, + to 187, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + } + }, + { + data het "(ZN,167371)", + location mix { + bond { + a { + point 109, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + } + }, + bond { + a { + point 112, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + } + }, + bond { + a { + point 122, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + } + }, + bond { + a { + point 125, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B5" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B6" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL33M, MRPL33", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 65, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 65, + seq-data ncbieaa "MFLSAVTFAKSKSKTILVKMMSQAGTGFSFNTKRSRLREKLTLLHYDPVVKK +KVLFVEQKKIRSL" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 169", + location int { + from 13, + to 21, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B6" + } + } + }, + { + data psec-str sheet, + comment "strand 170", + location int { + from 26, + to 34, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B6" + } + } + }, + { + data psec-str sheet, + comment "strand 171", + location int { + from 40, + to 48, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B6" + } + } + }, + { + data psec-str sheet, + comment "strand 172", + location int { + from 49, + to 54, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B6" + } + } + }, + { + data psec-str sheet, + comment "strand 173", + location int { + from 55, + to 58, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B6" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B7" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL34M, MRPL34", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 95, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 95, + seq-data ncbieaa "MAFLARSVGRLLDPVSRSAALVGGRWLQPQAWLGFPDTWGLPAMQQTRGKAR +GNEYQPSNIKRKHKHGWVRRLRTPTGVQVILRRMHKGRKSLSH" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 182", + location int { + from 58, + to 66, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B7" + } + } + }, + { + data psec-str helix, + comment "helix 183", + location int { + from 75, + to 87, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B7" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B8" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL35M, MRPL35", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 188, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 188, + seq-data ncbieaa "MAASAFAGAVRAATGILRPLHILASSAYQNCAKNACLSSVLSSRHFSHIQTS +ALLSAPRLITSVRNLMCGPTAPVLNRVAPLLPHVLKPPVRTVTYFSSRKGKRKTVKAVIYRFLRLHSGLWLRRKAGYK +KKLWKKTAARKRRLREFVFCNKTQSKLLDKMTTSFWKRRNWYADDPYQKYQDRTNLKV" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 184", + location int { + from 136, + to 144, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B8" + } + } + }, + { + data psec-str helix, + comment "helix 185", + location int { + from 151, + to 160, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B8" + } + } + }, + { + data psec-str sheet, + comment "strand 174", + location int { + from 113, + to 117, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B8" + } + } + }, + { + data psec-str sheet, + comment "strand 175", + location int { + from 119, + to 124, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B8" + } + } + }, + { + data psec-str sheet, + comment "strand 176", + location int { + from 146, + to 149, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B8" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B9" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL36M, MRPL36", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 100, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 100, + seq-data ncbieaa "MATAFLRTVLSAVGPLLHLGGRPLSTFAAGPPRAALAVGAQPSPAAALLSAR +PLLGPQPALGFKTKGVLKKRCRDCYLVKRRGRWFIYCKTNPKHKQRQM" + }, + annot { + { + data ftable { + { + data psec-str sheet, + comment "strand 177", + location int { + from 62, + to 66, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B9" + } + } + }, + { + data psec-str sheet, + comment "strand 178", + location int { + from 75, + to 81, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B9" + } + } + }, + { + data psec-str sheet, + comment "strand 179", + location int { + from 82, + to 88, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B9" + } + } + }, + { + data psec-str sheet, + comment "strand 180", + location int { + from 94, + to 99, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B9" + } + } + }, + { + data het "(ZN,167372)", + location mix { + bond { + a { + point 94, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B9" + } + } + }, + bond { + a { + point 72, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B9" + } + } + }, + bond { + a { + point 75, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B9" + } + } + }, + bond { + a { + point 88, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "B9" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + }, + descr { + comment "MITORIBOSOMAL 16S Ribosomal RNA", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 1570, + names { + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "", + "", + "", + "", + "", + "", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "", + "", + "", + "", + "", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "331", + "332", + "333", + "334", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348", + "349", + "350", + "351", + "352", + "353", + "354", + "355", + "356", + "357", + "358", + "359", + "360", + "361", + "362", + "363", + "364", + "365", + "366", + "367", + "368", + "369", + "370", + "371", + "372", + "373", + "374", + "375", + "376", + "377", + "378", + "379", + "380", + "381", + "382", + "383", + "384", + "385", + "386", + "387", + "388", + "389", + "390", + "391", + "392", + "393", + "394", + "395", + "396", + "397", + "398", + "399", + "400", + "401", + "402", + "403", + "404", + "405", + "406", + "407", + "408", + "409", + "410", + "411", + "412", + "413", + "414", + "415", + "416", + "417", + "418", + "419", + "420", + "421", + "422", + "423", + "424", + "425", + "426", + "427", + "428", + "429", + "430", + "431", + "432", + "433", + "434", + "435", + "436", + "437", + "438", + "439", + "440", + "441", + "442", + "443", + "444", + "445", + "446", + "447", + "448", + "449", + "450", + "451", + "452", + "453", + "454", + "455", + "456", + "457", + "458", + "459", + "460", + "461", + "462", + "463", + "464", + "465", + "466", + "467", + "468", + "469", + "470", + "471", + "472", + "473", + "474", + "475", + "476", + "477", + "478", + "479", + "480", + "481", + "482", + "483", + "484", + "485", + "486", + "487", + "488", + "489", + "490", + "491", + "492", + "493", + "494", + "495", + "496", + "497", + "498", + "499", + "500", + "501", + "502", + "503", + "504", + "505", + "506", + "507", + "508", + "509", + "510", + "511", + "512", + "513", + "514", + "515", + "516", + "517", + "518", + "519", + "520", + "521", + "522", + "523", + "524", + "525", + "526", + "527", + "528", + "529", + "530", + "531", + "532", + "533", + "534", + "535", + "536", + "537", + "538", + "539", + "540", + "541", + "542", + "543", + "544", + "545", + "546", + "547", + "548", + "549", + "550", + "551", + "552", + "553", + "554", + "555", + "556", + "557", + "558", + "559", + "560", + "561", + "562", + "563", + "564", + "565", + "566", + "567", + "568", + "569", + "570", + "571", + "572", + "573", + "574", + "575", + "576", + "577", + "578", + "579", + "580", + "581", + "582", + "583", + "584", + "585", + "586", + "587", + "588", + "589", + "590", + "591", + "592", + "593", + "594", + "595", + "596", + "597", + "598", + "599", + "600", + "601", + "602", + "603", + "604", + "605", + "606", + "607", + "608", + "609", + "610", + "611", + "612", + "613", + "614", + "615", + "616", + "617", + "618", + "619", + "620", + "621", + "622", + "623", + "624", + "625", + "626", + "627", + "628", + "629", + "630", + "631", + "632", + "633", + "634", + "635", + "636", + "637", + "638", + "639", + "640", + "641", + "642", + "643", + "644", + "645", + "646", + "647", + "648", + "649", + "650", + "651", + "652", + "653", + "654", + "655", + "656", + "657", + "658", + "659", + "660", + "661", + "662", + "663", + "664", + "665", + "666", + "667", + "668", + "669", + "670", + "671", + "672", + "673", + "674", + "675", + "676", + "677", + "678", + "679", + "680", + "681", + "682", + "683", + "684", + "685", + "686", + "687", + "688", + "689", + "690", + "691", + "692", + "693", + "694", + "695", + "696", + "697", + "698", + "699", + "700", + "701", + "702", + "703", + "704", + "705", + "706", + "707", + "708", + "709", + "710", + "711", + "712", + "713", + "714", + "715", + "716", + "717", + "718", + "719", + "720", + "721", + "722", + "723", + "724", + "725", + "726", + "727", + "728", + "729", + "730", + "731", + "732", + "733", + "734", + "735", + "736", + "737", + "738", + "739", + "740", + "741", + "742", + "743", + "744", + "745", + "746", + "747", + "748", + "749", + "750", + "751", + "752", + "753", + "754", + "755", + "756", + "757", + "758", + "759", + "760", + "761", + "762", + "763", + "764", + "765", + "766", + "767", + "768", + "769", + "770", + "771", + "772", + "773", + "774", + "775", + "776", + "777", + "778", + "779", + "780", + "781", + "782", + "783", + "784", + "785", + "786", + "787", + "788", + "789", + "790", + "791", + "792", + "793", + "794", + "795", + "796", + "797", + "798", + "799", + "800", + "801", + "802", + "803", + "804", + "805", + "806", + "807", + "808", + "809", + "810", + "811", + "812", + "813", + "814", + "815", + "816", + "817", + "818", + "819", + "820", + "821", + "822", + "823", + "824", + "825", + "826", + "827", + "828", + "829", + "830", + "831", + "832", + "833", + "834", + "835", + "836", + "837", + "838", + "839", + "840", + "841", + "842", + "843", + "844", + "845", + "846", + "847", + "848", + "849", + "850", + "851", + "852", + "853", + "854", + "855", + "856", + "857", + "858", + "859", + "860", + "861", + "862", + "863", + "864", + "865", + "866", + "867", + "868", + "869", + "870", + "871", + "872", + "873", + "874", + "875", + "876", + "877", + "878", + "879", + "880", + "881", + "882", + "883", + "884", + "885", + "886", + "", + "", + "889", + "890", + "891", + "892", + "893", + "894", + "895", + "896", + "897", + "898", + "899", + "900", + "901", + "902", + "903", + "904", + "905", + "906", + "", + "", + "909", + "910", + "911", + "912", + "913", + "914", + "915", + "916", + "917", + "918", + "919", + "920", + "921", + "922", + "923", + "924", + "925", + "926", + "927", + "928", + "929", + "930", + "931", + "932", + "933", + "934", + "935", + "936", + "937", + "938", + "939", + "940", + "941", + "942", + "943", + "944", + "945", + "946", + "947", + "948", + "949", + "950", + "951", + "952", + "953", + "954", + "955", + "956", + "957", + "958", + "959", + "960", + "961", + "962", + "963", + "964", + "965", + "966", + "967", + "968", + "969", + "970", + "971", + "972", + "973", + "974", + "975", + "976", + "977", + "978", + "979", + "980", + "981", + "982", + "983", + "984", + "985", + "986", + "987", + "988", + "989", + "990", + "991", + "992", + "993", + "994", + "995", + "996", + "997", + "998", + "999", + "1000", + "1001", + "1002", + "1003", + "1004", + "1005", + "1006", + "1007", + "1008", + "1009", + "1010", + "1011", + "1012", + "1013", + "1014", + "1015", + "1016", + "1017", + "1018", + "1019", + "1020", + "1021", + "1022", + "1023", + "1024", + "1025", + "1026", + "1027", + "1028", + "1029", + "1030", + "1031", + "1032", + "1033", + "1034", + "1035", + "1036", + "1037", + "1038", + "1039", + "1040", + "1041", + "1042", + "1043", + "1044", + "1045", + "1046", + "1047", + "1048", + "1049", + "1050", + "1051", + "1052", + "1053", + "1054", + "1055", + "1056", + "1057", + "1058", + "1059", + "1060", + "1061", + "1062", + "1063", + "1064", + "1065", + "1066", + "1067", + "1068", + "1069", + "1070", + "1071", + "1072", + "1073", + "1074", + "1075", + "1076", + "1077", + "1078", + "1079", + "1080", + "1081", + "1082", + "1083", + "1084", + "1085", + "1086", + "1087", + "1088", + "1089", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "1122", + "1123", + "1124", + "1125", + "1126", + "1127", + "1128", + "1129", + "1130", + "1131", + "1132", + "1133", + "1134", + "1135", + "1136", + "1137", + "1138", + "1139", + "1140", + "1141", + "1142", + "1143", + "1144", + "1145", + "1146", + "1147", + "1148", + "1149", + "1150", + "1151", + "1152", + "1153", + "1154", + "1155", + "1156", + "1157", + "1158", + "1159", + "1160", + "1161", + "1162", + "1163", + "1164", + "1165", + "1166", + "1167", + "1168", + "1169", + "1170", + "1171", + "1172", + "1173", + "1174", + "1175", + "1176", + "1177", + "1178", + "1179", + "1180", + "1181", + "1182", + "1183", + "1184", + "1185", + "1186", + "1187", + "1188", + "1189", + "1190", + "1191", + "1192", + "1193", + "1194", + "1195", + "1196", + "1197", + "1198", + "1199", + "1200", + "1201", + "1202", + "1203", + "1204", + "1205", + "1206", + "1207", + "1208", + "1209", + "1210", + "1211", + "", + "", + "", + "", + "", + "", + "", + "", + "1220", + "1221", + "1222", + "1223", + "1224", + "1225", + "1226", + "1227", + "1228", + "1229", + "1230", + "1231", + "1232", + "1233", + "1234", + "1235", + "1236", + "1237", + "1238", + "1239", + "1240", + "1241", + "1242", + "1243", + "1244", + "1245", + "1246", + "1247", + "1248", + "1249", + "1250", + "1251", + "1252", + "1253", + "1254", + "1255", + "1256", + "1257", + "1258", + "1259", + "1260", + "1261", + "1262", + "1263", + "1264", + "1265", + "1266", + "1267", + "1268", + "1269", + "1270", + "1271", + "1272", + "1273", + "1274", + "1275", + "1276", + "1277", + "1278", + "1279", + "1280", + "1281", + "1282", + "1283", + "1284", + "1285", + "1286", + "1287", + "1288", + "1289", + "1290", + "1291", + "1292", + "1293", + "1294", + "1295", + "1296", + "1297", + "1298", + "1299", + "1300", + "1301", + "1302", + "1303", + "1304", + "1305", + "1306", + "1307", + "1308", + "1309", + "1310", + "1311", + "1312", + "1313", + "1314", + "1315", + "1316", + "1317", + "1318", + "1319", + "1320", + "1321", + "1322", + "1323", + "1324", + "1325", + "1326", + "1327", + "1328", + "1329", + "1330", + "1331", + "1332", + "1333", + "1334", + "1335", + "1336", + "1337", + "1338", + "1339", + "1340", + "1341", + "1342", + "1343", + "1344", + "1345", + "1346", + "1347", + "1348", + "1349", + "1350", + "1351", + "1352", + "1353", + "1354", + "1355", + "1356", + "1357", + "1358", + "1359", + "1360", + "1361", + "1362", + "1363", + "1364", + "1365", + "1366", + "1367", + "1368", + "1369", + "1370", + "1371", + "1372", + "1373", + "1374", + "1375", + "1376", + "1377", + "1378", + "1379", + "1380", + "1381", + "1382", + "1383", + "1384", + "1385", + "1386", + "1387", + "1388", + "1389", + "1390", + "1391", + "1392", + "1393", + "1394", + "1395", + "1396", + "1397", + "1398", + "1399", + "1400", + "1401", + "1402", + "1403", + "1404", + "1405", + "1406", + "1407", + "1408", + "1409", + "1410", + "1411", + "1412", + "1413", + "1414", + "1415", + "1416", + "1417", + "1418", + "1419", + "1420", + "1421", + "1422", + "1423", + "1424", + "1425", + "1426", + "1427", + "1428", + "1429", + "1430", + "1431", + "1432", + "1433", + "1434", + "1435", + "1436", + "1437", + "1438", + "1439", + "1440", + "1441", + "1442", + "1443", + "1444", + "1445", + "1446", + "1447", + "1448", + "1449", + "1450", + "1451", + "1452", + "1453", + "1454", + "1455", + "1456", + "1457", + "1458", + "1459", + "1460", + "1461", + "1462", + "1463", + "1464", + "1465", + "1466", + "1467", + "1468", + "1469", + "1470", + "1471", + "1472", + "1473", + "1474", + "1475", + "1476", + "1477", + "1478", + "1479", + "1480", + "1481", + "1482", + "1483", + "1484", + "1485", + "1486", + "1487", + "1488", + "1489", + "1490", + "1491", + "1492", + "1493", + "1494", + "1495", + "1496", + "1497", + "1498", + "1499", + "1500", + "1501", + "1502", + "1503", + "1504", + "1505", + "1506", + "1507", + "1508", + "1509", + "1510", + "1511", + "1512", + "1513", + "1514", + "1515", + "1516", + "1517", + "1518", + "1519", + "1520", + "1521", + "1522", + "1523", + "1524", + "1525", + "1526", + "1527", + "1528", + "1529", + "1530", + "1531", + "1532", + "1533", + "1534", + "1535", + "1536", + "1537", + "1538", + "1539", + "1540", + "1541", + "1542", + "1543", + "1544", + "1545", + "1546", + "1547", + "1548", + "1549", + "1550", + "1551", + "1552", + "1553", + "1554", + "1555", + "1556", + "1557", + "1558", + "1559", + "1560", + "1561", + "1562", + "1563", + "1564", + "1565", + "1566", + "1567", + "1568", + "1569" + } + }, + molinfo { + biomol other + } + }, + inst { + repr raw, + mol rna, + length 1570, + seq-data iupacna "NCCAAAGCTAGCTCAACATACTAAACAAATACAAAAATACACCAAAATAAAA +TAAAACATTCACCTAACATTAAAGTATAGGAGATAGAAATTTTTATCCTGACGCTATAGAGATAGTACCGTAAGGGAA +AGATGAAAGAATAAAATAAAAGTAAAAAAAAGCAAAGATTACCCCTTCTACCTTTTGCATAATGGTTTAACCAGAAAA +AATCTAACAAAGAGAACTTTAGCTAGATACCCCGAAACCAGACGAGCTACCCATGAGCAGTTTAAAAGAACCAACTCA +TCTATGTGGCAAAATAGTGAGAAGACTTGTAGGTAGAGGTGAAAAGCCTAACGAGCCTGGTGATAGCTGGTTGTCCGA +GAAAGAATTTTAGTTCAACCTTAAAAATACCCCAAAAACCCTAAATTCCAATGTATTTTTAAGAGATAGTCTAAAAAG +GTACAGCTTTTTAGAAACGGATACAACCTTGACTAGAGAGTAAATCTTAATACTACCATAGTAGGCCTAAAAGCAGCC +ATCAATTGAGAAAGCGTTAAAGCTCAACAAATTCACCAACATAATCCCAAAAACTAATAACAAACTCCTAGCCCAATA +CCGGACTAATCTATTGAAACATAGAAGCAATAATGTTAATATGAGTAACAAGAAGCCTTTCTCCTCGCACACGCTTAC +ATCAGTAACTAATAATATACTGATAATTAACAACCAATAAACCAAAACAACACTAAAACGTTTATTAATTACATTGTT +AACCCAACACAGGAGTGCACCAAGGAAAGATTAAAAGAAGTAAAAGGAACTCGGCAAACACAAACCCCGCCTGTTTAC +CAAAAACATCACCTCTAGCATTACTAGTATTAGAGGCAATGCCTGCCCAGTGACACCAGTTTAACGGCCGCGGTATTC +TGACCGTGCAAAGGTAGCATAATCACTTGTTCTCCAAATAAGGACTTGTATGAATGGCCACACGAGGGTTTTACTGTC +TCTTACTTCCAATCAGTGAAATTAACCTTCCCGTGAAGAGGCGGGAATAAAAAAATAAGACGAGAAGACCCTATGGAG +CTTTAATTAACTATTCCAAAAGTTAAACAACTCAACCACAAAGGGATAAAACATAACTTAACATGGACTAGCAATTTC +GGTTGGGGTGACCTCGGAGTACAAAAAACCCTCCGAGTGATTTTAATCTAGACAAACCAGTCAAAATAACCATAACAT +CACTTATTGATCCAAAATTTTGATCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATCCTGTTCTAGAGTTCCTA +TCGACAATAGGGTTTACGACCTCGATGTTGGATCAGGACACCCAAATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTC +AACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGCAATCCAGGTCGGTTTCTATCTATTATAAATTTCTCCCA +GTACGAAAGGACAAGAGAAATGGGACCAACCTCACAAACGCGTCTCAGAGATAATTAATGATTTAATCTTAACCTAAT +TAACTCATAATAAATCCAGCCCTAGAACAGGGCACA" + }, + annot { + { + data ftable { + { + data non-std-residue "A RNA 5'OH", + location pnt { + point 0, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + { + data het "(MG,167373)", + location mix { + bond { + a { + point 1324, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 192, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167374)", + location mix { + bond { + a { + point 1026, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1021, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167375)", + location mix { + bond { + a { + point 752, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 704, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 704, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167376)", + location mix { + bond { + a { + point 287, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 287, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167377)", + location mix { + bond { + a { + point 449, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167378)", + location mix { + bond { + a { + point 1400, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1432, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1400, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1432, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167379)", + location mix { + bond { + a { + point 183, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 183, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167380)", + location mix { + bond { + a { + point 376, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 376, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167381)", + location mix { + bond { + a { + point 839, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167382)", + location mix { + bond { + a { + point 330, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1409, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167383)", + location mix { + bond { + a { + point 306, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167384)", + location mix { + bond { + a { + point 1055, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1055, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167385)", + location mix { + bond { + a { + point 329, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 330, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1410, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167386)", + location mix { + bond { + a { + point 54, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 56, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 57, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167387)", + location mix { + bond { + a { + point 1022, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167388)", + location mix { + bond { + a { + point 374, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167389)", + location mix { + bond { + a { + point 51, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1141, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 51, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167390)", + location mix { + bond { + a { + point 173, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 173, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167391)", + location mix { + bond { + a { + point 1147, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1146, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167392)", + location mix { + bond { + a { + point 113, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 110, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167393)", + location mix { + bond { + a { + point 110, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167394)", + location mix { + bond { + a { + point 285, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167395)", + location mix { + bond { + a { + point 1249, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1249, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1251, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 371, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 371, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1250, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167396)", + location mix { + bond { + a { + point 1251, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 372, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 372, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167397)", + location mix { + bond { + a { + point 1324, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1324, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 192, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167398)", + location mix { + bond { + a { + point 297, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 297, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 760, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 760, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167399)", + location mix { + bond { + a { + point 186, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1269, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1269, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1320, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167400)", + location mix { + bond { + a { + point 625, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 626, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 626, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167401)", + location mix { + bond { + a { + point 1044, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1435, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167403)", + location mix { + bond { + a { + point 237, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 204, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 204, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 204, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167404)", + location mix { + bond { + a { + point 1328, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1394, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1395, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1396, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167405)", + location mix { + bond { + a { + point 257, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167406)", + location mix { + bond { + a { + point 824, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167407)", + location mix { + bond { + a { + point 582, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167408)", + location mix { + bond { + a { + point 1397, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1398, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167409)", + location mix { + bond { + a { + point 1322, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1325, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167410)", + location mix { + bond { + a { + point 1436, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 641, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 641, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 641, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167412)", + location mix { + bond { + a { + point 780, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 781, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 781, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167413)", + location mix { + bond { + a { + point 798, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 798, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167414)", + location mix { + bond { + a { + point 838, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167415)", + location mix { + bond { + a { + point 306, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167416)", + location mix { + bond { + a { + point 870, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167417)", + location mix { + bond { + a { + point 990, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167418)", + location mix { + bond { + a { + point 292, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167419)", + location mix { + bond { + a { + point 242, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 242, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167420)", + location mix { + bond { + a { + point 632, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 630, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167421)", + location mix { + bond { + a { + point 1062, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1063, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167422)", + location mix { + bond { + a { + point 1153, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1153, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167423)", + location mix { + bond { + a { + point 1316, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1316, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167424)", + location mix { + bond { + a { + point 1540, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167425)", + location mix { + bond { + a { + point 149, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 150, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167426)", + location mix { + bond { + a { + point 803, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 804, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167427)", + location mix { + bond { + a { + point 1327, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1327, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167428)", + location mix { + bond { + a { + point 1404, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167429)", + location mix { + bond { + a { + point 1054, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1323, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1323, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167430)", + location mix { + bond { + a { + point 276, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 277, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 307, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167431)", + location mix { + bond { + a { + point 585, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167432)", + location mix { + bond { + a { + point 347, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167433)", + location mix { + bond { + a { + point 253, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 253, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167434)", + location mix { + bond { + a { + point 931, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167435)", + location mix { + bond { + a { + point 1024, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 191, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167436)", + location mix { + bond { + a { + point 1015, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167437)", + location mix { + bond { + a { + point 809, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167438)", + location mix { + bond { + a { + point 963, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 963, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167439)", + location mix { + bond { + a { + point 933, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 933, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 934, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167440)", + location mix { + bond { + a { + point 525, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167442)", + location mix { + bond { + a { + point 797, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167443)", + location mix { + bond { + a { + point 413, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167444)", + location mix { + bond { + a { + point 354, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 355, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 355, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167445)", + location mix { + bond { + a { + point 961, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1413, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167446)", + location mix { + bond { + a { + point 253, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167447)", + location mix { + bond { + a { + point 793, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167448)", + location mix { + bond { + a { + point 802, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1371, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1371, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 803, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 804, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167449)", + location mix { + bond { + a { + point 1335, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167450)", + location mix { + bond { + a { + point 830, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 830, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 832, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 833, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 833, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167454)", + location mix { + bond { + a { + point 896, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 896, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167456)", + location mix { + bond { + a { + point 1171, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167457)", + location mix { + bond { + a { + point 1409, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167458)", + location mix { + bond { + a { + point 208, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 208, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 207, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167459)", + location mix { + bond { + a { + point 335, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167460)", + location mix { + bond { + a { + point 188, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167461)", + location mix { + bond { + a { + point 1269, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1319, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167462)", + location mix { + bond { + a { + point 540, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167463)", + location mix { + bond { + a { + point 597, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167465)", + location mix { + bond { + a { + point 753, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 754, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167466)", + location mix { + bond { + a { + point 1306, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167467)", + location mix { + bond { + a { + point 1063, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167468)", + location mix { + bond { + a { + point 961, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 960, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167469)", + location mix { + bond { + a { + point 645, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 998, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167473)", + location mix { + bond { + a { + point 639, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167474)", + location mix { + bond { + a { + point 90, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 90, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167475)", + location mix { + bond { + a { + point 825, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 306, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167476)", + location mix { + bond { + a { + point 595, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167478)", + location mix { + bond { + a { + point 826, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167480)", + location mix { + bond { + a { + point 1149, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1148, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1318, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167481)", + location mix { + bond { + a { + point 1339, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1339, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167482)", + location mix { + bond { + a { + point 286, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 833, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 833, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167483)", + location mix { + bond { + a { + point 1025, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167484)", + location mix { + bond { + a { + point 1252, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1251, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 372, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 372, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 372, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1252, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167485)", + location mix { + bond { + a { + point 1557, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1557, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1558, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1558, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167488)", + location mix { + bond { + a { + point 1144, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1145, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167491)", + location mix { + bond { + a { + point 1520, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1516, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1516, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167492)", + location mix { + bond { + a { + point 431, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 376, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 376, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167493)", + location mix { + bond { + a { + point 1273, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167495)", + location mix { + bond { + a { + point 1041, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167496)", + location mix { + bond { + a { + point 1564, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167498)", + location mix { + bond { + a { + point 266, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167501)", + location mix { + bond { + a { + point 1208, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1208, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167533)", + location mix { + bond { + a { + point 448, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 434, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 434, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 448, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 449, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 449, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167534)", + location mix { + bond { + a { + point 1049, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1050, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167535)", + location mix { + bond { + a { + point 1404, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167536)", + location mix { + bond { + a { + point 928, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167541)", + location mix { + bond { + a { + point 1377, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167543)", + location mix { + bond { + a { + point 185, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167546)", + location mix { + bond { + a { + point 1343, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167548)", + location mix { + bond { + a { + point 244, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167550)", + location mix { + bond { + a { + point 1070, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 1071, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167553)", + location mix { + bond { + a { + point 1197, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167558)", + location mix { + bond { + a { + point 113, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167561)", + location mix { + bond { + a { + point 974, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167564)", + location mix { + bond { + a { + point 1377, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167570)", + location mix { + bond { + a { + point 791, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + }, + { + data het "(MG,167574)", + location mix { + bond { + a { + point 749, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + }, + bond { + a { + point 749, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BA" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL2M, MRPL2", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 306, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 306, + seq-data ncbieaa "MALRVLTRALSSLSLTPRIAVAPGLNLLPAVQVTNNVLLTLPSGLMSLPCRP +ILTSVALSATSVSWKSRTKYTVMPVKMRKSGGRNHTGQIQVHGIGGGHKQRYRMIDFLRFRPEHESKPGPFEEKVIAV +RYDPCRSADIALVAGGNRKRWIIATENMKAGDTVLNSDHIGRMAVAAREGDAHPLGALPVGTLINNVESEPGRGAQYI +RAAGTCGVLLRKVNGTAIIQLPSKRQMQVLETCIATVGRVSNVDHNKRVIGKAGRNRWLGKRPNSGLWQRKGGWAGRK +IRPLPPMKSYVKLPSAAAQS" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 186", + location int { + from 260, + to 267, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 181", + location int { + from 121, + to 127, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 182", + location int { + from 128, + to 131, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 183", + location int { + from 138, + to 145, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 184", + location int { + from 146, + to 152, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 185", + location int { + from 160, + to 167, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 186", + location int { + from 179, + to 185, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 187", + location int { + from 191, + to 195, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 188", + location int { + from 213, + to 221, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 189", + location int { + from 222, + to 229, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 190", + location int { + from 231, + to 237, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 191", + location int { + from 238, + to 245, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data psec-str sheet, + comment "strand 192", + location int { + from 292, + to 295, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + }, + { + data het "(MG,167568)", + location mix { + bond { + a { + point 271, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BD" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL3M, MRPL3", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 348, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "331", + "332", + "333", + "334", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 348, + seq-data ncbieaa "MPGWGLLGRAGARVLGCGADGLGASRGLGNRTDICLLVRSLHGKSVTWWDEH +LSEENVPFVKQLVSDENKAQLASKLCPLKDEPWPIHPWEPGSSRVGLIALKLGMMPLWTKDGQKHVVTLLQVQDCHVL +KYTPKENHNGRMAALTVGGKTVSHFHKSASILEFYQELGLPPKQKVKIFNVTENAVIKPGTPLYAAHFRPGQYVDVTA +KTIGKGFQGVMRRWGFKGQPATHGQTKTHRRPGAISTGDVARVWPGTKMPGQLGNIDRTAFGLKVWRINTKHNIIYVN +GSVPGHKNCLVKIKDSKLPAYKDFCKNLPFPTYFPDGDEEALPEDLYDENVCQPGAPSITFT" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 187", + location int { + from 57, + to 75, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str helix, + comment "helix 188", + location int { + from 158, + to 167, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 193", + location int { + from 97, + to 102, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 194", + location int { + from 103, + to 111, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 195", + location int { + from 113, + to 123, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 196", + location int { + from 124, + to 133, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 197", + location int { + from 141, + to 149, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 198", + location int { + from 173, + to 181, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 199", + location int { + from 189, + to 192, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 200", + location int { + from 199, + to 210, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 201", + location int { + from 211, + to 217, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 202", + location int { + from 256, + to 261, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 203", + location int { + from 262, + to 269, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 204", + location int { + from 270, + to 274, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 205", + location int { + from 275, + to 278, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 206", + location int { + from 279, + to 286, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 207", + location int { + from 294, + to 300, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data psec-str sheet, + comment "strand 208", + location int { + from 329, + to 333, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data region "Domain 42", + comment "NCBI Domains", + location mix { + int { + from 87, + to 123, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + }, + int { + from 196, + to 347, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + } + }, + { + data region "Domain 43", + comment "NCBI Domains", + location int { + from 124, + to 195, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + }, + { + data het "(MG,167485)", + location mix { + bond { + a { + point 211, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + } + } + }, + { + data het "(MG,167570)", + location mix { + bond { + a { + point 236, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BE" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL4M, MRPL4", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 294, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 294, + seq-data ncbieaa "MLQLVRAGARTWFRPSGCRGLNTLAEEAVQQAEKPESVASLGLQPPVLRKCE +LPVPAHRRPVQAWIESLRGYEQERVGLTELHPDVFSTAPRLDILHQVAIWQKNFKRISYAKTKTRAEVRGGGRKPWVQ +KGSGRARHGSIRSPIWRGGGVAHGPRGPTSYYYMLPMKVRVQGLKVALTVKLAQDDLHIVDSLELPTADPQYLIELAR +YRRWGDSVLLVDLEHEDMPQNVVAATSGLKTFNLVPAVGLNVHSMLKHQTLVLTLPTVAFLEEKLLWHNSRYTPLYPF +RLPYCDFP" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 189", + location int { + from 93, + to 104, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str helix, + comment "helix 190", + location int { + from 166, + to 183, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str helix, + comment "helix 191", + location int { + from 200, + to 209, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str helix, + comment "helix 192", + location int { + from 227, + to 235, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str helix, + comment "helix 193", + location int { + from 248, + to 255, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str helix, + comment "helix 194", + location int { + from 263, + to 273, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str sheet, + comment "strand 209", + location int { + from 46, + to 49, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str sheet, + comment "strand 210", + location int { + from 60, + to 66, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str sheet, + comment "strand 211", + location int { + from 76, + to 82, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str sheet, + comment "strand 212", + location int { + from 107, + to 113, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str sheet, + comment "strand 213", + location int { + from 157, + to 160, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str sheet, + comment "strand 214", + location int { + from 185, + to 190, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str sheet, + comment "strand 215", + location int { + from 214, + to 220, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str sheet, + comment "strand 216", + location int { + from 238, + to 244, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + }, + { + data psec-str sheet, + comment "strand 217", + location int { + from 256, + to 262, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BF" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BI" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL9M, MRPL9", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 268, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 268, + seq-data ncbieaa "MAAAACTAAGGGLLRAGLERLLRGGVRALLRPRLEGVTPRSERAFSLSLSRS +TVIVERWWKVPLAGEGRKPRLHRRHRVYKLVEDTKHRPKDNLELVLTQSVEELGVRGDLVSVKKSVGRNRLLPQGLAV +YASPENKKLFEEEKLLRQEGKLEKLQTKAGEATVKFLRRCHLEVGMKNNVKWELNPEIVARHFLRNLGVVVAPHALKL +PEEPITQRGEYWCEVTVNGLDTVRVPMSVVNFERPKTKRYKYWLAQQAAKGDVPTSSQMI" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 195", + location int { + from 114, + to 121, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BI" + } + } + }, + { + data psec-str helix, + comment "helix 196", + location int { + from 133, + to 148, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BI" + } + } + }, + { + data psec-str sheet, + comment "strand 218", + location int { + from 52, + to 58, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BI" + } + } + }, + { + data psec-str sheet, + comment "strand 219", + location int { + from 78, + to 85, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BI" + } + } + }, + { + data psec-str sheet, + comment "strand 220", + location int { + from 92, + to 97, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BI" + } + } + }, + { + data psec-str sheet, + comment "strand 221", + location int { + from 98, + to 102, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BI" + } + } + }, + { + data psec-str sheet, + comment "strand 222", + location int { + from 104, + to 107, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BI" + } + } + }, + { + data psec-str sheet, + comment "strand 223", + location int { + from 108, + to 113, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BI" + } + } + }, + { + data psec-str sheet, + comment "strand 224", + location int { + from 127, + to 131, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BI" + } + } + }, + { + data region "Domain 44", + comment "NCBI Domains", + location int { + from 89, + to 267, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BI" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL10M, MRPL10", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 262, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 262, + seq-data ncbieaa "MAAVVAGMLRGGLLPQAGRLPTFQTVRYGSKAVTRHRRVMHFERQKLMAVTE +YIPPKPTINPRCLPPPPTPPQEETGLVRLLRREIAAVFRDNRMIAVCQHVALSAEDKLLLRHQLRKHKILMKIFPNQV +LKPFLEESKYQNLLPLFVGHNLLLVSEEPKVKEMVRILKGIPFLPLLGGCIDDTILSRQGFINYSKLPSLALVQGELV +GGLTFLTAQTYSMLQHQPRQLTALLDQYVKQQQEGHPAVPASGQPEPPDPVPDP" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 197", + location int { + from 41, + to 50, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + }, + { + data psec-str helix, + comment "helix 198", + location int { + from 76, + to 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + }, + { + data psec-str helix, + comment "helix 199", + location int { + from 105, + to 116, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + }, + { + data psec-str helix, + comment "helix 200", + location int { + from 161, + to 168, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + }, + { + data psec-str helix, + comment "helix 201", + location int { + from 187, + to 194, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + }, + { + data psec-str sheet, + comment "strand 225", + location int { + from 93, + to 99, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + }, + { + data psec-str sheet, + comment "strand 226", + location int { + from 120, + to 124, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + }, + { + data psec-str sheet, + comment "strand 227", + location int { + from 150, + to 156, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + }, + { + data psec-str sheet, + comment "strand 228", + location int { + from 174, + to 181, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + }, + { + data psec-str sheet, + comment "strand 229", + location int { + from 182, + to 186, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + }, + { + data region "Domain 45", + comment "NCBI Domains", + location int { + from 93, + to 261, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + }, + { + data het "(ZN,167575)", + location mix { + bond { + a { + point 63, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BJ" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL11M, MRPL11", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 192, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 192, + seq-data ncbieaa "MSKLSRATRALKKPEASGMIRAIVRAGQARPGPPLGPILGQRGVSINQFCKE +FNEKTKDIKEGIPLPTKIFVKPDRTFEIKIGQPTVSYFLKAAAGIEKGARHTGKEVAGLVTLKHVYEIARVKAQDDAF +ALQDVPLSSVVRSIIGSARSLGIRVVKDLSSEELAAFQKERALFLAAQREADLAAQAEAAKK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 202", + location int { + from 31, + to 38, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + }, + { + data psec-str helix, + comment "helix 203", + location int { + from 45, + to 56, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + }, + { + data psec-str helix, + comment "helix 204", + location int { + from 86, + to 94, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + }, + { + data psec-str helix, + comment "helix 205", + location int { + from 114, + to 123, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + }, + { + data psec-str helix, + comment "helix 206", + location int { + from 136, + to 150, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + }, + { + data psec-str sheet, + comment "strand 230", + location int { + from 18, + to 25, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + }, + { + data psec-str sheet, + comment "strand 231", + location int { + from 63, + to 73, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + }, + { + data psec-str sheet, + comment "strand 232", + location int { + from 75, + to 83, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + }, + { + data psec-str sheet, + comment "strand 233", + location int { + from 108, + to 113, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + }, + { + data psec-str sheet, + comment "strand 234", + location int { + from 151, + to 156, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + }, + { + data region "Domain 46", + comment "NCBI Domains", + location int { + from 0, + to 84, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + }, + { + data region "Domain 47", + comment "NCBI Domains", + location int { + from 85, + to 191, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BK" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL13M, MRPL13", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 178, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 178, + seq-data ncbieaa "MSSFSKAPQQWATFARVWYLLDGKMQPPGKLAAMASVKLQGLHKPVYHQLSD +CGDHVVIMNTRHIAFSGNKWEQKVYSSHTGYPGGFRQVTAAQLHQKDPVAIVKLAIYGMLPKNLHRRTMMQRLHLFPD +EDIPEDILKNLVEELPQPRKVPRRLDEYTQEEIEAFPRVWSPPEDYRL" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 207", + location int { + from 5, + to 13, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str helix, + comment "helix 208", + location int { + from 28, + to 39, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str helix, + comment "helix 209", + location int { + from 91, + to 98, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str helix, + comment "helix 210", + location int { + from 102, + to 110, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str helix, + comment "helix 211", + location int { + from 133, + to 140, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str helix, + comment "helix 212", + location int { + from 158, + to 165, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str sheet, + comment "strand 235", + location int { + from 16, + to 22, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str sheet, + comment "strand 236", + location int { + from 23, + to 26, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str sheet, + comment "strand 237", + location int { + from 54, + to 60, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str sheet, + comment "strand 238", + location int { + from 63, + to 66, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str sheet, + comment "strand 239", + location int { + from 74, + to 80, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str sheet, + comment "strand 240", + location int { + from 85, + to 90, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str sheet, + comment "strand 241", + location int { + from 123, + to 128, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data psec-str sheet, + comment "strand 242", + location int { + from 141, + to 144, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + }, + { + data region "Domain 48", + comment "NCBI Domains", + location int { + from 15, + to 151, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BN" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BO" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL14M, MRPL14", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 145, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 145, + seq-data ncbieaa "MAFCTGLRGPFAQVSRALSQRCFSTTGSLSAIQKMTRVRVVDNSALGTTPYH +RPPRCIHVYNKTGVGKVGDRILLAIRGQKKKALIVGHRMPGPRMTPRFDSNNVVLLEDNGNPVGTRIKTPIPSSLRQR +EGEFSKVLAIAQNFV" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 213", + location int { + from 133, + to 140, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BO" + } + } + }, + { + data psec-str sheet, + comment "strand 243", + location int { + from 34, + to 40, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BO" + } + } + }, + { + data psec-str sheet, + comment "strand 244", + location int { + from 54, + to 60, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BO" + } + } + }, + { + data psec-str sheet, + comment "strand 245", + location int { + from 70, + to 77, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BO" + } + } + }, + { + data psec-str sheet, + comment "strand 246", + location int { + from 78, + to 83, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BO" + } + } + }, + { + data psec-str sheet, + comment "strand 247", + location int { + from 84, + to 89, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BO" + } + } + }, + { + data psec-str sheet, + comment "strand 248", + location int { + from 100, + to 109, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BO" + } + } + }, + { + data psec-str sheet, + comment "strand 249", + location int { + from 111, + to 114, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BO" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL15M, MRPL15", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 296, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 296, + seq-data ncbieaa "MGGPVQGGGPRALDLLRALPRVSLANLKPNPGSRKPERRPRGRRRGRKCGRG +HKGERQRGTRPRLGFEGGQTPFYLRIPKYGFNEGHSFRHQYQPLSLNRLQYLIDLGRVDPTQPIDLTQLVNGRGVTIQ +PSKRDYGVQLVEEGADTFKAKVNIEVQMASELAIAAIEKNGGVVTTAFYDPRSLEILCKPVPFFLRGQPIPKRMLPPE +ALVPYYTDAKNRGYLADPARFPEARLELARKYGYVLPDITKDELFKMLSTRKDPRQIFFGLAPGWVVNMADKKILKPT +DENLLKYYSS" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 214", + location int { + from 98, + to 106, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str helix, + comment "helix 215", + location int { + from 117, + to 124, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str helix, + comment "helix 216", + location int { + from 160, + to 169, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str helix, + comment "helix 217", + location int { + from 180, + to 188, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str helix, + comment "helix 218", + location int { + from 228, + to 237, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str helix, + comment "helix 219", + location int { + from 287, + to 294, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str sheet, + comment "strand 250", + location int { + from 90, + to 97, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str sheet, + comment "strand 251", + location int { + from 133, + to 141, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str sheet, + comment "strand 252", + location int { + from 150, + to 154, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str sheet, + comment "strand 253", + location int { + from 155, + to 159, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str sheet, + comment "strand 254", + location int { + from 170, + to 176, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str sheet, + comment "strand 255", + location int { + from 272, + to 276, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data psec-str sheet, + comment "strand 256", + location int { + from 279, + to 283, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data region "Domain 49", + comment "NCBI Domains", + location int { + from 0, + to 254, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data region "Domain 50", + comment "NCBI Domains", + location int { + from 255, + to 295, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + }, + { + data het "(MG,167379)", + location mix { + bond { + a { + point 47, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + } + } + }, + { + data het "(MG,167571)", + location mix { + bond { + a { + point 129, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BP" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL16M, MRPL16", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 251, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 251, + seq-data ncbieaa "MWRLLARACAPLLRAPLSDSWAAPSATAGFKTLLPVPTFEDVAIPEKPKLRF +VERVPLVPKVRRERKNLSDIRGPSTEATEFTEGRFAILALGGGYLHWGHFEMMRLTINRSMDPKNMFALWRVPAPFKP +ITRKGMGQRMGGGKGAIDHYVTPVKAGRLIVEMGGRCEFKEVQGFLDLVAHKLPFPAKAVSRETLEKMRKDQEERERN +NQNPWTFERIATANMLGIRKVLSPYDLTHKGRYWGKFYTPERV" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 220", + location int { + from 98, + to 111, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str helix, + comment "helix 221", + location int { + from 169, + to 181, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str helix, + comment "helix 222", + location int { + from 192, + to 207, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str helix, + comment "helix 223", + location int { + from 213, + to 221, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str sheet, + comment "strand 257", + location int { + from 85, + to 90, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str sheet, + comment "strand 258", + location int { + from 91, + to 97, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str sheet, + comment "strand 259", + location int { + from 116, + to 122, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str sheet, + comment "strand 260", + location int { + from 127, + to 133, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str sheet, + comment "strand 261", + location int { + from 144, + to 149, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str sheet, + comment "strand 262", + location int { + from 150, + to 154, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str sheet, + comment "strand 263", + location int { + from 155, + to 160, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str sheet, + comment "strand 264", + location int { + from 161, + to 165, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str sheet, + comment "strand 265", + location int { + from 184, + to 191, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str sheet, + comment "strand 266", + location int { + from 239, + to 242, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data psec-str sheet, + comment "strand 267", + location int { + from 243, + to 246, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + }, + { + data het "(MG,167466)", + location mix { + bond { + a { + point 182, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BQ" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BR" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL17M, MRPL17", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 169, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 169, + seq-data ncbieaa "MRLSVAAAISHGRVFRRLGLGPESRIHLLQNLLTGLVRHERIEASWARVDEL +RGYAEKLIDYGKLGDTNERAMRMADFWLTEKDLIPKLFQVLAPRFQGQNGGYTRMLQIPNRNEQDRAKMAVIEYKGNC +LPPLPLPRRDSNLTLLNQLLQGLRQDQEASSHTAQTPAV" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 224", + location int { + from 21, + to 37, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BR" + } + } + }, + { + data psec-str helix, + comment "helix 225", + location int { + from 45, + to 63, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BR" + } + } + }, + { + data psec-str helix, + comment "helix 226", + location int { + from 69, + to 78, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BR" + } + } + }, + { + data psec-str helix, + comment "helix 227", + location int { + from 144, + to 159, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BR" + } + } + }, + { + data psec-str sheet, + comment "strand 268", + location int { + from 39, + to 44, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BR" + } + } + }, + { + data psec-str sheet, + comment "strand 269", + location int { + from 103, + to 109, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BR" + } + } + }, + { + data psec-str sheet, + comment "strand 270", + location int { + from 120, + to 126, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BR" + } + } + }, + { + data region "Domain 51", + comment "NCBI Domains", + location int { + from 0, + to 135, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BR" + } + } + }, + { + data het "(MG,167485)", + location mix { + bond { + a { + point 10, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BR" + } + } + }, + bond { + a { + point 10, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BR" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BS" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL18M, MRPL18", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 180, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 180, + seq-data ncbieaa "MALRSRFWALLSICRNPGCRAAALSTSCKPATNPETDPVENEAVAPEFTNRN +PRNLELLAVARKERGWGTVWPSREFWHRLRVIRTQHHIEALVEHRNGQVVVSASTREWAIKKHLYSTRNVVACESVGR +VLAERCLEAGINFMVYHPTPWEAASDSIKRLQHAMTEGGVVLREPRRIYE" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 228", + location int { + from 121, + to 138, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BS" + } + } + }, + { + data psec-str helix, + comment "helix 229", + location int { + from 156, + to 167, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BS" + } + } + }, + { + data psec-str sheet, + comment "strand 271", + location int { + from 78, + to 86, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BS" + } + } + }, + { + data psec-str sheet, + comment "strand 272", + location int { + from 87, + to 96, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BS" + } + } + }, + { + data psec-str sheet, + comment "strand 273", + location int { + from 98, + to 106, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BS" + } + } + }, + { + data psec-str sheet, + comment "strand 274", + location int { + from 141, + to 144, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BS" + } + } + }, + { + data psec-str sheet, + comment "strand 275", + location int { + from 169, + to 172, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BS" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL19M, MRPL19", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 292, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 292, + seq-data ncbieaa "MAASITRGSWAAVGLGQSFPPTRALLPAPASVICRTLAGSGRQQVTGSSEPG +AXXXXXXXXXXXXXXXQRPETRFLSPEFIPPRGRTNPLKFQIERKDMLERRKILHIPEFYVGSILRVTTADPYASAKT +SQFLGICIQRSGSGLGATFILRNTIEGQGVEICFELYNPRIQEIQVVKLEKRLDNSLLYLRDALPEYSTFDVNMKPIT +QEFNQEVPVNQLKVKMKPKPWSKRWERPKFNIKGIRFDLCLTEEQMREAQKWSQPWLEFDMMREYDTSKIEAAIWDEI +EASKNS" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK", + location pnt { + point 53, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 54, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 55, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 56, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 57, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 58, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 59, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 60, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 61, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 62, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 63, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 64, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 65, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 66, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 67, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str helix, + comment "helix 230", + location int { + from 89, + to 102, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str helix, + comment "helix 231", + location int { + from 249, + to 257, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str helix, + comment "helix 232", + location int { + from 275, + to 290, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str sheet, + comment "strand 276", + location int { + from 110, + to 113, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str sheet, + comment "strand 277", + location int { + from 114, + to 122, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str sheet, + comment "strand 278", + location int { + from 128, + to 135, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str sheet, + comment "strand 279", + location int { + from 136, + to 143, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str sheet, + comment "strand 280", + location int { + from 144, + to 155, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str sheet, + comment "strand 281", + location int { + from 156, + to 165, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str sheet, + comment "strand 282", + location int { + from 169, + to 177, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str sheet, + comment "strand 283", + location int { + from 178, + to 182, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data psec-str sheet, + comment "strand 284", + location int { + from 215, + to 218, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + }, + { + data region "Domain 52", + comment "NCBI Domains", + location int { + from 0, + to 233, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BT" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BU" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL20M, MRPL20", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 149, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 149, + seq-data ncbieaa "MVFLTVPLWLRSRVTDRYWRVQEVLKHARHFRGRKNRCYRLAVRAVTRAFVK +CTRARRLKKRSLRTLWINRITAASQEHGLKYPAFIINLIKCQVELNRKVLADLAIYEPKTFKSLAALAKRRREEGFAA +ALGDGKEPDGIFSRVVQHR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 233", + location int { + from 17, + to 25, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BU" + } + } + }, + { + data psec-str helix, + comment "helix 234", + location int { + from 39, + to 55, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BU" + } + } + }, + { + data psec-str helix, + comment "helix 235", + location int { + from 56, + to 78, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BU" + } + } + }, + { + data psec-str helix, + comment "helix 236", + location int { + from 83, + to 92, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BU" + } + } + }, + { + data psec-str helix, + comment "helix 237", + location int { + from 98, + to 106, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BU" + } + } + }, + { + data psec-str helix, + comment "helix 238", + location int { + from 109, + to 132, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BU" + } + } + }, + { + data region "Domain 53", + comment "NCBI Domains", + location int { + from 56, + to 148, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BU" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN BL21M, MRPL21", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 209, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 209, + seq-data ncbieaa "MAAAVAASVLQGTFGRLVSAYSRSVLRTSRPGTAPPWSAPRRLSSQHTSSLR +GCVPKTSLGSPPWPDIVLPDPVEETRHHAEVVGQVNELIAAGRYGRLFAVVHFAGHQWKVTAEDLILIENELDVACGE +RIRLEKVLLVGADNFTLLGKPLLGKELVRVEATVIEKTESWPKINMKFQKRKNYQRKRITVNPQTVLRINSIEIAPCLC" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 239", + location int { + from 72, + to 92, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + }, + { + data psec-str sheet, + comment "strand 285", + location int { + from 98, + to 105, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + }, + { + data psec-str sheet, + comment "strand 286", + location int { + from 106, + to 112, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + }, + { + data psec-str sheet, + comment "strand 287", + location int { + from 113, + to 121, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + }, + { + data psec-str sheet, + comment "strand 288", + location int { + from 129, + to 133, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + }, + { + data psec-str sheet, + comment "strand 289", + location int { + from 135, + to 142, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + }, + { + data psec-str sheet, + comment "strand 290", + location int { + from 143, + to 148, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + }, + { + data psec-str sheet, + comment "strand 291", + location int { + from 156, + to 170, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + }, + { + data psec-str sheet, + comment "strand 292", + location int { + from 171, + to 178, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + }, + { + data psec-str sheet, + comment "strand 293", + location int { + from 183, + to 194, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + }, + { + data psec-str sheet, + comment "strand 294", + location int { + from 195, + to 204, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + }, + { + data region "Domain 54", + comment "NCBI Domains", + location int { + from 96, + to 208, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BV" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BW" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL22M, MRPL22", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 210, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 210, + seq-data ncbieaa "MAAAVLERLGGLWMQNLRGKLALRLLSSHSILPQSHIHTSASVEISRKWEKK +NKIVYPPQLPGEPRRPAEIYHCRRQIKYSKDKMWYLAKLIRGMSIDQALAQLEFSDKKGAQIIKEVLLEAQDMAVRDH +NVEFRSNLYVAESTSGRGQYLKRIRYHGRGRFGIMEKVFCHYFVKLVEGPPPPREAPKTAVAHAREYIQELRNRTITH +AL" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 240", + location int { + from 82, + to 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BW" + } + } + }, + { + data psec-str helix, + comment "helix 241", + location int { + from 96, + to 103, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BW" + } + } + }, + { + data psec-str helix, + comment "helix 242", + location int { + from 111, + to 127, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BW" + } + } + }, + { + data psec-str helix, + comment "helix 243", + location int { + from 189, + to 202, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BW" + } + } + }, + { + data psec-str sheet, + comment "strand 295", + location int { + from 68, + to 76, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BW" + } + } + }, + { + data psec-str sheet, + comment "strand 296", + location int { + from 136, + to 141, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BW" + } + } + }, + { + data psec-str sheet, + comment "strand 297", + location int { + from 142, + to 149, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BW" + } + } + }, + { + data psec-str sheet, + comment "strand 298", + location int { + from 150, + to 157, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BW" + } + } + }, + { + data psec-str sheet, + comment "strand 299", + location int { + from 159, + to 178, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BW" + } + } + }, + { + data region "Domain 55", + comment "NCBI Domains", + location int { + from 0, + to 183, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BW" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BX" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL23M, MRPL23", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 150, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 150, + seq-data ncbieaa "MARNVLYPLYQLGNPQLRVFRTNFFIQLVRPGTAQPEDTVQFRIPMEMTRVD +LRNYLERIYNVPVAAVRTRVQYGSNRRRDHRNIRIKKPDYKVAYVQLALGQTFTFPDLFPERKGASVDVDVRDQVLED +QRQKHSPDPRRGGVPGWFGL" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 244", + location int { + from 49, + to 59, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BX" + } + } + }, + { + data psec-str sheet, + comment "strand 300", + location int { + from 25, + to 28, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BX" + } + } + }, + { + data psec-str sheet, + comment "strand 301", + location int { + from 37, + to 43, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BX" + } + } + }, + { + data psec-str sheet, + comment "strand 302", + location int { + from 65, + to 77, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BX" + } + } + }, + { + data psec-str sheet, + comment "strand 303", + location int { + from 85, + to 98, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BX" + } + } + }, + { + data het "(MG,167574)", + location mix { + bond { + a { + point 69, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BX" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN UL24M, MRPL24", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 216, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 216, + seq-data ncbieaa "MRLSALLALASKVTLPPNYRYGMSRPGSLSDKRKNPPGTRRRRVPVEPISDE +DWHLFCGDKVEILEGKDAGKQGKVVQVIRQRNWVVVEGLNTHYRYVGKTVDYRGTMIPSEAPLLHNQVKLVDPTDRKP +TDVEWRFTEAGERVRVSTRSGRIIPKPEFPRADGIVPETWIDGPKDTSVEDALEKTYVPRLKTLEEEVMEAMGIQETR +RHKKVYWY" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 245", + location int { + from 27, + to 34, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data psec-str helix, + comment "helix 246", + location int { + from 193, + to 201, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data psec-str sheet, + comment "strand 304", + location int { + from 57, + to 64, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data psec-str sheet, + comment "strand 305", + location int { + from 70, + to 75, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data psec-str sheet, + comment "strand 306", + location int { + from 76, + to 81, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data psec-str sheet, + comment "strand 307", + location int { + from 82, + to 88, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data psec-str sheet, + comment "strand 308", + location int { + from 91, + to 101, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data psec-str sheet, + comment "strand 309", + location int { + from 102, + to 112, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data psec-str sheet, + comment "strand 310", + location int { + from 118, + to 125, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data psec-str sheet, + comment "strand 311", + location int { + from 126, + to 131, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data psec-str sheet, + comment "strand 312", + location int { + from 132, + to 138, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data psec-str sheet, + comment "strand 313", + location int { + from 140, + to 148, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + }, + { + data region "Domain 56", + comment "NCBI Domains", + location mix { + int { + from 46, + to 75, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + }, + int { + from 116, + to 170, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + } + }, + { + data region "Domain 57", + comment "NCBI Domains", + location int { + from 76, + to 115, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "BY" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML37, MRPL37", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 423, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "331", + "332", + "333", + "334", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348", + "349", + "350", + "351", + "352", + "353", + "354", + "355", + "356", + "357", + "358", + "359", + "360", + "361", + "362", + "363", + "364", + "365", + "366", + "367", + "368", + "369", + "370", + "371", + "372", + "373", + "374", + "375", + "376", + "377", + "378", + "379", + "380", + "381", + "382", + "383", + "384", + "385", + "386", + "387", + "388", + "389", + "390", + "391", + "392", + "393", + "394", + "395", + "396", + "397", + "398", + "399", + "400", + "401", + "402", + "403", + "404", + "405", + "406", + "407", + "408", + "409", + "410", + "411", + "412", + "413", + "414", + "415", + "416", + "417", + "418", + "419", + "420", + "421", + "422", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 423, + seq-data ncbieaa "MALASGPARRALARPGRLGFGGCGAPRRGAYEWGVRSTRKPEPPPLDRVYEI +PGLEPITYAGKMHFMPGLARPVFPPWDPGWTHPKFRRLPPLHEHPLYKDQACYVFHQRCRLLEGVKQALWLTKTQLIE +GLPEKVLRLADDPRNHIENQDERVLNAISHARLWHSTEDIPKRETYCPVIVDSLIQLCKSQILKHPSLARRICAQNNT +LSATWNRESILLQVHGSSGARLNAKDPLPPVASQEEVEATKNHVLETFYPISPTMGLQECNVYDVNDDTGFQEGYPYP +CPHTLYFLESANLRPRRFQPDQLRAKMILFAFGSALAQARLLYGNDSKVLEQPVVVQSVGTDGRLFQFLVLQLNTTDL +ASDEGVKNLAWVDSDQLLYQHFWCLPVIKKKVVVEPVGPIGFQPETFRKFLALYLHGAV" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 247", + location int { + from 115, + to 123, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str helix, + comment "helix 248", + location int { + from 133, + to 140, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str helix, + comment "helix 249", + location int { + from 149, + to 161, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str helix, + comment "helix 250", + location int { + from 173, + to 188, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str helix, + comment "helix 251", + location int { + from 241, + to 248, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str helix, + comment "helix 252", + location int { + from 305, + to 327, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str helix, + comment "helix 253", + location int { + from 408, + to 418, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 314", + location int { + from 102, + to 108, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 315", + location int { + from 124, + to 130, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 316", + location int { + from 199, + to 204, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 317", + location int { + from 205, + to 211, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 318", + location int { + from 212, + to 215, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 319", + location int { + from 216, + to 224, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 320", + location int { + from 225, + to 228, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 321", + location int { + from 229, + to 232, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 322", + location int { + from 264, + to 268, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 323", + location int { + from 274, + to 277, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 324", + location int { + from 285, + to 294, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 325", + location int { + from 332, + to 335, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 326", + location int { + from 338, + to 347, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 327", + location int { + from 349, + to 358, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 328", + location int { + from 359, + to 362, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 329", + location int { + from 370, + to 384, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 330", + location int { + from 388, + to 393, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 331", + location int { + from 394, + to 400, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + }, + { + data psec-str sheet, + comment "strand 332", + location int { + from 401, + to 406, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Ba" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML38, MRPL38", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 380, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "331", + "332", + "333", + "334", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348", + "349", + "350", + "351", + "352", + "353", + "354", + "355", + "356", + "357", + "358", + "359", + "360", + "361", + "362", + "363", + "364", + "365", + "366", + "367", + "368", + "369", + "370", + "371", + "372", + "373", + "374", + "375", + "376", + "377", + "378", + "379", + "380" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 380, + seq-data ncbieaa "MAAPWWRAALCASRRWRGFSTSAALSRRAAPLGPMPNEDIDVSDLERLKKYR +SFDRYRRRAEQEARKPHWWRTYREHFGEESGPKDRVDIGLPPPKVSRTQQLLERKQALRELRANVEEERAARLQTARI +PLEAVRAEWERTCGPYHKQRLAEYCGLYRDLFHGATFVPRVPLHVAYAVGEDDLMPVYHGNEVTPTEAAQAPEVTYEA +DEGSLWTLLLTNLDGHLLEPDAEYVHWLVTNIPGNRVTEGQETCPYLPPFPARGSGFHRFAFLLFKQDKRIDFSGDTR +PSPCYQLAQRTFHTFDFYKKHQDAMTPAGLAFFQCRWDDSVTRVFHQLLDMREPVFEFVRPPPYHPKQKRFPHRQPLR +YLDRYRDSHEPTYGIY" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 254", + location int { + from 53, + to 64, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str helix, + comment "helix 255", + location int { + from 99, + to 113, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str helix, + comment "helix 256", + location int { + from 131, + to 140, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str helix, + comment "helix 257", + location int { + from 143, + to 154, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str helix, + comment "helix 258", + location int { + from 299, + to 306, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str sheet, + comment "strand 333", + location int { + from 172, + to 179, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str sheet, + comment "strand 334", + location int { + from 181, + to 186, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str sheet, + comment "strand 335", + location int { + from 188, + to 192, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str sheet, + comment "strand 336", + location int { + from 201, + to 205, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str sheet, + comment "strand 337", + location int { + from 211, + to 221, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str sheet, + comment "strand 338", + location int { + from 229, + to 232, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str sheet, + comment "strand 339", + location int { + from 233, + to 240, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str sheet, + comment "strand 340", + location int { + from 250, + to 253, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str sheet, + comment "strand 341", + location int { + from 263, + to 274, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str sheet, + comment "strand 342", + location int { + from 295, + to 298, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data psec-str sheet, + comment "strand 343", + location int { + from 311, + to 322, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + }, + { + data region "Domain 58", + comment "NCBI Domains", + location int { + from 123, + to 379, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bb" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML39, MRPL39", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 334, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 334, + seq-data ncbieaa "MAMGAWGLRLWRAVPRGEAGWRLIATSPASQLSPTELIEMQNDLFNKEKNRQ +LSLTPRTEKIEVKHVGKTDPGTVFVMNKNISTPYSCAMHLSEWYCRKSILALVDGQPWDMYKPLTKSCEIKFLTFKDD +DPGEVNKAYWRSCAMMMGCVIERAFKDEYVVSLVRAPEVPVIAGAFCYDVVLDKRLDEWMPTKENLHSFTKDARALIY +KDLPFETLEVEAKVALEIFQHNKYKLDFIEEKASQNPERIVKLHRFGDFIDVSEGPLIPRTSICFQYEVSAVHNLQTQ +SSLVRRFQGLSLPVHLRAHFTIWNKLLERSRKMVTEDKTKPTEESAST" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 259", + location int { + from 33, + to 53, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str helix, + comment "helix 260", + location int { + from 132, + to 152, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str helix, + comment "helix 261", + location int { + from 192, + to 207, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str helix, + comment "helix 262", + location int { + from 230, + to 242, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str helix, + comment "helix 263", + location int { + from 305, + to 315, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 344", + location int { + from 59, + to 66, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 345", + location int { + from 73, + to 79, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 346", + location int { + from 100, + to 105, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 347", + location int { + from 106, + to 110, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 348", + location int { + from 118, + to 125, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 349", + location int { + from 159, + to 166, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 350", + location int { + from 174, + to 182, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 351", + location int { + from 209, + to 219, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 352", + location int { + from 246, + to 254, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 353", + location int { + from 255, + to 260, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 354", + location int { + from 266, + to 269, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 355", + location int { + from 272, + to 280, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 356", + location int { + from 281, + to 285, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data psec-str sheet, + comment "strand 357", + location int { + from 286, + to 298, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data region "Domain 59", + comment "NCBI Domains", + location mix { + int { + from 0, + to 56, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + }, + int { + from 209, + to 270, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + } + }, + { + data region "Domain 60", + comment "NCBI Domains", + location int { + from 57, + to 128, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + }, + { + data region "Domain 61", + comment "NCBI Domains", + location mix { + int { + from 129, + to 208, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + }, + int { + from 271, + to 333, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bc" + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bd" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML40, MRPL40", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 206, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 206, + seq-data ncbieaa "MAAAALGAASRTLRPASRLVGAWPTQTRDAHERGSLFSFWGLVPMRAEPLRK +KKKVDPKKDQAAKDRLKKRIRRLEKASQELIPIEDFITPVKFLNKERQRPPVELPFEESERRALLLKRWSLYKQREHE +MERSAIRSLLEAQEEALQELRLSSPELHAEATKRDPSLFPFERQGPDYTPPISDYQPPEGRYQDITKVYTQVEFKK" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 264", + location int { + from 107, + to 152, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bd" + } + } + }, + { + data psec-str helix, + comment "helix 265", + location int { + from 153, + to 160, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bd" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Be" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML41, MRPL41", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 135, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 135, + seq-data ncbieaa "MGLLSGAARALVRGADRMSKWTSKRGPRTFCKGRGAKGTGFHGRDGKFVQIK +EMIPELVVPELAGFKLKPYVNYRAPEGTDTPLTAKQLFLETAAPAIEKDFKAGTFDPEHLEKYGFEPTQEGKLFQLYP +KNFPR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 266", + location int { + from 85, + to 103, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Be" + } + } + }, + { + data psec-str sheet, + comment "strand 358", + location int { + from 39, + to 43, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Be" + } + } + }, + { + data psec-str sheet, + comment "strand 359", + location int { + from 45, + to 49, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Be" + } + } + }, + { + data psec-str sheet, + comment "strand 360", + location int { + from 125, + to 128, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Be" + } + } + }, + { + data psec-str sheet, + comment "strand 361", + location int { + from 129, + to 132, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Be" + } + } + }, + { + data region "Domain 62", + comment "NCBI Domains", + location int { + from 68, + to 134, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Be" + } + } + }, + { + data het "(MG,167486)", + location mix { + bond { + a { + point 26, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Be" + } + } + }, + bond { + a { + point 22, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Be" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML42, MRPL42", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 142, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 142, + seq-data ncbieaa "MALAAVKWVISSRTILKHLFPIQNSASYCVCHKSTYSSLPDDYNCKVELALT +SDGRTIVCYHPSVDIPYEHTKPIPXXXXXXXXXXXXXXXXXXXXXXXXEHLEQGPMIEQLSKMFFTTKHRWYPRGQYH +RRRRKPNPPKDR" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK", + location pnt { + point 76, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 77, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 78, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 79, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 80, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 81, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 82, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 83, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 84, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 85, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 86, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 87, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 88, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 89, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 90, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 92, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 93, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 94, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 95, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 96, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 97, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 98, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 99, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data psec-str helix, + comment "helix 267", + location int { + from 87, + to 94, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data psec-str helix, + comment "helix 268", + location int { + from 104, + to 115, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data psec-str sheet, + comment "strand 362", + location int { + from 44, + to 51, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data psec-str sheet, + comment "strand 363", + location int { + from 56, + to 62, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + }, + { + data region "Domain 63", + comment "NCBI Domains", + location int { + from 0, + to 99, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bf" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bg" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML43, MRPL43", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 159, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 159, + seq-data ncbieaa "MTARGTASRFLTSVLHNGLGRYVQQLQRLSFSLSRDAPSSRGAREFVEREVT +DFARRNPGVVIYVNPRPCCVPRVVAEYLNGAVREESIHCKSVEEIAALVQKLADQSGLDVIRIRKPFHTDSPSIQGQW +HPFTNKPTTLGGLRPREVQDPAPAQVQAQ" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 269", + location int { + from 40, + to 56, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bg" + } + } + }, + { + data psec-str helix, + comment "helix 270", + location int { + from 93, + to 105, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bg" + } + } + }, + { + data psec-str sheet, + comment "strand 364", + location int { + from 6, + to 9, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bg" + } + } + }, + { + data psec-str sheet, + comment "strand 365", + location int { + from 24, + to 33, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bg" + } + } + }, + { + data psec-str sheet, + comment "strand 366", + location int { + from 60, + to 67, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bg" + } + } + }, + { + data psec-str sheet, + comment "strand 367", + location int { + from 72, + to 79, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bg" + } + } + }, + { + data psec-str sheet, + comment "strand 368", + location int { + from 81, + to 88, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bg" + } + } + }, + { + data psec-str sheet, + comment "strand 369", + location int { + from 112, + to 115, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bg" + } + } + }, + { + data region "Domain 64", + comment "NCBI Domains", + location int { + from 17, + to 108, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bg" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML44, MRPL44", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 332, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 332, + seq-data ncbieaa "MASGLVRLLQWGPRRLLAPAAPTLAPPVRGAKKGFRAAYRFQKELERWRLLR +CPPPPVRRSEKPNWDYHAEIQAFGHRLQETFSLDLLKTAFVNSCYIKSEEAKRQKLGIDKEAALLNLKDNQELSEQGI +SFSQTCLTQFFEDAFPDLPTEGVTSLVDFLTSEEVVCHVARNLAVEQLALSAEFPVPPPVLRQTFFAVIGALLQSSGP +ERTALFIRDFLITQMTGKELFEMWTITNPMGLLVEELKKRKISAPESRLTRQSGSTTALPVYFVGLYCDRKLIAEGPG +ETVLVAEEEAARVALRKLFGFTENRRPWDYSKPKEHVRAEKTITAS" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 271", + location int { + from 34, + to 52, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str helix, + comment "helix 272", + location int { + from 67, + to 78, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str helix, + comment "helix 273", + location int { + from 84, + to 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str helix, + comment "helix 274", + location int { + from 94, + to 107, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str helix, + comment "helix 275", + location int { + from 111, + to 118, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str helix, + comment "helix 276", + location int { + from 122, + to 144, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str helix, + comment "helix 277", + location int { + from 149, + to 160, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str helix, + comment "helix 278", + location int { + from 161, + to 172, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str helix, + comment "helix 279", + location int { + from 188, + to 204, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str helix, + comment "helix 280", + location int { + from 207, + to 217, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str helix, + comment "helix 281", + location int { + from 236, + to 247, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str helix, + comment "helix 282", + location int { + from 288, + to 304, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str sheet, + comment "strand 370", + location int { + from 252, + to 261, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str sheet, + comment "strand 371", + location int { + from 267, + to 275, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data psec-str sheet, + comment "strand 372", + location int { + from 278, + to 286, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data region "Domain 65", + comment "NCBI Domains", + location int { + from 60, + to 226, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + }, + { + data region "Domain 66", + comment "NCBI Domains", + location int { + from 227, + to 331, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bh" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML45, MRPL45", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 312, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 312, + seq-data ncbieaa "MAAPVTRGLSCLPRVLGWWSRQPVLVTQSTAVVPVRTKKRFTPPTYQPKYKS +EKEFVEHARKAGLVIPHERLERPIHLACTAGIFDAYVPPEGDARISSLSKEGLAQRAERLKKNVASQLSIRKIRESDP +NFKIKDFPEKAKDIFIEAHLCLNNSDHDRLHTLVTENCFPDMVWDIRYKTVRWSFVESLEPPQVVQVRCSSLMNQGNI +YGQVTVRMHTRQTLAIYDRFGRLMYGQEDVPRDVLEYVVFEKHLVDPYGSWRMHGKIIPPWAPPKQPILKTVMIPGPQ +LKPWEEFEEPQGEVHKPQPARRRNDS" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 283", + location int { + from 100, + to 114, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str helix, + comment "helix 284", + location int { + from 115, + to 127, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str helix, + comment "helix 285", + location int { + from 137, + to 153, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str helix, + comment "helix 286", + location int { + from 156, + to 163, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str sheet, + comment "strand 373", + location int { + from 177, + to 188, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str sheet, + comment "strand 374", + location int { + from 191, + to 202, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str sheet, + comment "strand 375", + location int { + from 205, + to 214, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str sheet, + comment "strand 376", + location int { + from 215, + to 220, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str sheet, + comment "strand 377", + location int { + from 221, + to 226, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str sheet, + comment "strand 378", + location int { + from 228, + to 234, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str sheet, + comment "strand 379", + location int { + from 238, + to 242, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str sheet, + comment "strand 380", + location int { + from 243, + to 252, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + }, + { + data psec-str sheet, + comment "strand 381", + location int { + from 261, + to 264, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bi" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML46, MRPL46", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 279, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 279, + seq-data ncbieaa "MAAPVRRTMLRVVRGWRRFEGPWAHSLGSRNLALAVAPSSSSSPWRLLGALC +LQRPPLVTKPLTPLQEEMADLLQQIEIERSLYSDHELRALDEAQQLAKKKSDLYEEEDEQNILLAQDLEDMWEQKFLH +FKLGARLTEADKKDDRTSLHRKLDRNLILLVREKLGDQDIWMLPQSDWQPGETLRQTAERTLATLSENNMEAKFLGNA +PCGHYKFKFPQAMRTETSLGAKVFFFKALLLTGDFSQAGKKGHHVWVSKEELGDYLKPKYLAQVRRFLLDL" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 287", + location int { + from 64, + to 79, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str helix, + comment "helix 288", + location int { + from 85, + to 102, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str helix, + comment "helix 289", + location int { + from 116, + to 129, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str helix, + comment "helix 290", + location int { + from 183, + to 193, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str helix, + comment "helix 291", + location int { + from 256, + to 263, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str sheet, + comment "strand 382", + location int { + from 43, + to 58, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str sheet, + comment "strand 383", + location int { + from 152, + to 165, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str sheet, + comment "strand 384", + location int { + from 166, + to 172, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str sheet, + comment "strand 385", + location int { + from 173, + to 177, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str sheet, + comment "strand 386", + location int { + from 196, + to 203, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str sheet, + comment "strand 387", + location int { + from 209, + to 215, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str sheet, + comment "strand 388", + location int { + from 228, + to 236, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str sheet, + comment "strand 389", + location int { + from 240, + to 243, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + }, + { + data psec-str sheet, + comment "strand 390", + location int { + from 250, + to 255, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bj" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML48, MRPL48", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 212, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 212, + seq-data ncbieaa "MNGALGKALCLRNDTVLKQALSLIRVRASGESPICSAGGILLSTSRHYRSKP +THGIGRYKHLVKAQEPKKKKGKVEVRPINLGTDYEYGVLNIHLIAYDMALAESYAQYVHNLCNHLAIKVEESYAMPTK +TMEVLQLQEQGSKMFLDAVLTTHERVVQISGLSATFAEIFLEIIHSNLPEGVKLSVREHTEEDFKGRFKARPELEELL +AKLN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 292", + location int { + from 99, + to 116, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + } + }, + { + data psec-str helix, + comment "helix 293", + location int { + from 163, + to 176, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + } + }, + { + data psec-str sheet, + comment "strand 391", + location int { + from 78, + to 82, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + } + }, + { + data psec-str sheet, + comment "strand 392", + location int { + from 84, + to 89, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + } + }, + { + data psec-str sheet, + comment "strand 393", + location int { + from 90, + to 97, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + } + }, + { + data psec-str sheet, + comment "strand 394", + location int { + from 119, + to 126, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + } + }, + { + data psec-str sheet, + comment "strand 395", + location int { + from 128, + to 137, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + } + }, + { + data psec-str sheet, + comment "strand 396", + location int { + from 143, + to 150, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + } + }, + { + data psec-str sheet, + comment "strand 397", + location int { + from 151, + to 158, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + } + }, + { + data psec-str sheet, + comment "strand 398", + location int { + from 159, + to 162, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + } + }, + { + data psec-str sheet, + comment "strand 399", + location int { + from 181, + to 188, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bk" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bl" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML49, MRPL49", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 166, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 166, + seq-data ncbieaa "MAATVLCGVLRAWRTGVPLGCGLRRLSQTQGTPEYPSFVESVDEYHFVERLL +PPASIPRPPKHEHYPTPSGWQPPRDPAPSLPYFVRRSRMHNIPVYRDITHGNRQMTVIRKVEGDIWALQKDVEDFLSP +LLGKTPVTQVNEVTGTLRVKGYFDQQLKAWLLEKGF" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 294", + location int { + from 117, + to 127, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bl" + } + } + }, + { + data psec-str helix, + comment "helix 295", + location int { + from 153, + to 163, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bl" + } + } + }, + { + data psec-str sheet, + comment "strand 400", + location int { + from 82, + to 86, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bl" + } + } + }, + { + data psec-str sheet, + comment "strand 401", + location int { + from 94, + to 101, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bl" + } + } + }, + { + data psec-str sheet, + comment "strand 402", + location int { + from 103, + to 111, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bl" + } + } + }, + { + data psec-str sheet, + comment "strand 403", + location int { + from 112, + to 115, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bl" + } + } + }, + { + data psec-str sheet, + comment "strand 404", + location int { + from 136, + to 141, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bl" + } + } + }, + { + data psec-str sheet, + comment "strand 405", + location int { + from 144, + to 150, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bl" + } + } + }, + { + data het "(MG,167452)", + location mix { + bond { + a { + point 98, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bl" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bm" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML50, MRPL50", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 159, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 159, + seq-data ncbieaa "MAARWVSGLARRSLTCAVSGAPRREFWSPFRKEKQPVVAETVEEVKKEPILV +CPPIQSRTYIPPEDLQSRLESHVKEVFGSSVPSNWQDISLEDVHLKFSFLARLANDLRHAVPNSRLHQMCRVRDVLDF +YNVPVQDRSKFDELIASNLPHNLKITWGY" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 296", + location int { + from 66, + to 78, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bm" + } + } + }, + { + data psec-str helix, + comment "helix 297", + location int { + from 94, + to 108, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bm" + } + } + }, + { + data psec-str helix, + comment "helix 298", + location int { + from 124, + to 131, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bm" + } + } + }, + { + data psec-str helix, + comment "helix 299", + location int { + from 138, + to 146, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bm" + } + } + }, + { + data psec-str sheet, + comment "strand 406", + location int { + from 58, + to 61, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bm" + } + } + }, + { + data psec-str sheet, + comment "strand 407", + location int { + from 133, + to 136, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bm" + } + } + }, + { + data region "Domain 67", + comment "NCBI Domains", + location int { + from 0, + to 137, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bm" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bn" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML51, MRPL51", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 128, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 128, + seq-data ncbieaa "MAGSLSWVTGRGLWGQLPLTCRSFSLGIPRLFHVRVTLPPRKVVDRWNEKRA +MFGVYDNIGILGNFEKHPKELIKGPIWLRGWKGNELQRCIRKKRMVGNRMFIDDLHNLNKRISYLYKHFNRHGKYR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 300", + location int { + from 86, + to 96, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bn" + } + } + }, + { + data psec-str helix, + comment "helix 301", + location int { + from 103, + to 122, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bn" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bo" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML52, MRPL52", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 124, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 124, + seq-data ncbieaa "MAAWGILLSTGVRRLHCGTAAQAGSQWRLQQGFAANPSGYGPLTELPDWSYA +DGRPAPPMKGQLRRKAQREKFARRVVLLSQEMDAGLQAWQLRQQEKLQEEEGKQKNALKSKGALLQNPQPSQ" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 302", + location int { + from 24, + to 31, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bo" + } + } + }, + { + data psec-str helix, + comment "helix 303", + location int { + from 60, + to 106, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bo" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bp" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML53, MRPL53", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 112, + names { + "", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 112, + seq-data ncbieaa "MAAALARLGLRAVKQVRVQFCPFEKNVESTRTFLQAVSSEKVRCTNLNCSVI +ADVRHDGSEPCVDVLFGDGHRLIMRGAHLTAQEMLTAFASHIQARGAAGSGDKPSASTGR" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 304", + location int { + from 27, + to 37, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bp" + } + } + }, + { + data psec-str helix, + comment "helix 305", + location int { + from 82, + to 95, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bp" + } + } + }, + { + data psec-str sheet, + comment "strand 408", + location int { + from 11, + to 20, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bp" + } + } + }, + { + data psec-str sheet, + comment "strand 409", + location int { + from 47, + to 55, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bp" + } + } + }, + { + data psec-str sheet, + comment "strand 410", + location int { + from 61, + to 68, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bp" + } + } + }, + { + data psec-str sheet, + comment "strand 411", + location int { + from 70, + to 77, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bp" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bq" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML54, MRPL54", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 138, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 138, + seq-data ncbieaa "MAARRLFGAARSWAAWRAWELSDAAVSGRLHVRNYAKRPVIKGGKGGKGAVV +GEALKDPEVCTDPFRLTTHAMGVNIYKEGQDVVLKPDSEYPEWLFEMNVGPPKKLEELDPETREYWRLLRKHNIWRHN +RLSKNRKF" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 306", + location int { + from 118, + to 132, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bq" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bt" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML63, MRPL57", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 102, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 102, + seq-data ncbieaa "MFLTALLRRNRIPGRQWIGKHRRPRPVSAQAKQNMIRRLETEAENQYWLSRP +FLTAEQERGHAAVRRAAAFQALKAAQAARFPAHRRLEEQLGHLLVTRKWS" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 307", + location int { + from 28, + to 48, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bt" + } + } + }, + { + data psec-str helix, + comment "helix 308", + location int { + from 63, + to 78, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bt" + } + } + }, + { + data het "(MG,167387)", + location mix { + bond { + a { + point 13, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bt" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML62, MRPL58", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 205, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "", + "", + "", + "", + "", + "", + "", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 205, + seq-data ncbieaa "MAAARCLRWGLNRAGAWLLPSPTRYPRRALHKQVEGTEFQSIYSLDKLYPES +RGSDTAWRLPDDAKQANDIPVDRLTISYCRSSGPGGQNVNKVNSKAEVRFHLASADWIAEPVRLKLAVKHKNRINRSG +ELILTSECSRYQFRNLADCLQKLRDMIAEASQPXXXXXXXXXXLRRSRIENMNRERLRKKRISSAIKTSRRVDVD" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK", + location pnt { + point 163, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 164, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 165, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 166, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 167, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 168, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 169, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 170, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 171, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 172, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data psec-str helix, + comment "helix 309", + location int { + from 111, + to 118, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data psec-str helix, + comment "helix 310", + location int { + from 142, + to 160, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data psec-str helix, + comment "helix 311", + location int { + from 174, + to 187, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data psec-str sheet, + comment "strand 412", + location int { + from 75, + to 83, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data psec-str sheet, + comment "strand 413", + location int { + from 94, + to 103, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data psec-str sheet, + comment "strand 414", + location int { + from 124, + to 127, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data psec-str sheet, + comment "strand 415", + location int { + from 129, + to 136, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + }, + { + data region "Domain 68", + comment "NCBI Domains", + location int { + from 0, + to 167, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bu" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML64, MRPL59", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 222, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 222, + seq-data ncbieaa "MAAPVQQARTLLRLVTTLGQGSRGYRARPPPRRSQEPRWPDPDDPLTPRWQL +SPRYAAKQFARHGAASGVAAGSLWPSQEQLRELEAEEREWYPSLAAMQESLRVQQLAEEQKRQAREQLIEECMAKMPQ +MIENWRQQQQERRXXXXXXXXXXXXLQAEAQERLGYHVDPRSARFQELLQDMEKQHRKRLKEEKQRKKKEARAAAMAA +AAAQDPADSETPSS" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK", + location pnt { + point 143, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 144, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 145, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 146, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 147, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 148, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 149, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 150, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 151, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 152, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 153, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 154, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data psec-str helix, + comment "helix 312", + location int { + from 53, + to 62, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data psec-str helix, + comment "helix 313", + location int { + from 78, + to 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data psec-str helix, + comment "helix 314", + location int { + from 95, + to 128, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data psec-str helix, + comment "helix 315", + location int { + from 129, + to 153, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + }, + { + data region "Domain 69", + comment "NCBI Domains", + location int { + from 71, + to 221, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bv" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML65, MRPS30", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 433, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196", + "197", + "198", + "199", + "200", + "201", + "202", + "203", + "204", + "205", + "206", + "207", + "208", + "209", + "210", + "211", + "212", + "213", + "214", + "215", + "216", + "217", + "218", + "219", + "220", + "221", + "222", + "223", + "224", + "225", + "226", + "227", + "228", + "229", + "230", + "231", + "232", + "233", + "234", + "235", + "236", + "237", + "238", + "239", + "240", + "241", + "242", + "243", + "244", + "245", + "246", + "247", + "248", + "249", + "250", + "251", + "252", + "253", + "254", + "255", + "256", + "257", + "258", + "259", + "260", + "261", + "262", + "263", + "264", + "265", + "266", + "267", + "268", + "269", + "270", + "271", + "272", + "273", + "274", + "275", + "276", + "277", + "278", + "279", + "280", + "281", + "282", + "283", + "284", + "285", + "286", + "287", + "288", + "289", + "290", + "291", + "292", + "293", + "294", + "295", + "296", + "297", + "298", + "299", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "316", + "317", + "318", + "319", + "320", + "321", + "322", + "323", + "324", + "325", + "326", + "327", + "328", + "329", + "330", + "331", + "332", + "333", + "334", + "335", + "336", + "337", + "338", + "339", + "340", + "341", + "342", + "343", + "344", + "345", + "346", + "347", + "348", + "349", + "350", + "351", + "352", + "353", + "354", + "355", + "356", + "357", + "358", + "359", + "360", + "361", + "362", + "363", + "364", + "365", + "366", + "367", + "368", + "369", + "370", + "371", + "372", + "373", + "374", + "375", + "376", + "377", + "378", + "379", + "380", + "381", + "382", + "383", + "384", + "385", + "386", + "387", + "388", + "389", + "390", + "391", + "392", + "393", + "394", + "395", + "396", + "397", + "398", + "399", + "400", + "401", + "402", + "403", + "404", + "405", + "406", + "407", + "408", + "409", + "410", + "411", + "412", + "413", + "414", + "415", + "416", + "417", + "418", + "419", + "420", + "421", + "422", + "423", + "424", + "425", + "426", + "", + "", + "", + "", + "", + "", + "" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 433, + seq-data ncbieaa "MAAARCRRFPLRGAGLSLHTAAKAAVTAPEVTGPDVPATPVARYPPIVASLT +AKSKAARQRRVEQWQATVHAAKSVDEKLRILTKMQFMKYVVYPQTFALNADNWYQSFTKTVFLSGLPPTPAKLEPEPT +LDITALREAVCDCLLQEHFFLRRKKRAPVIQDREAIASPFLDQLVASLTGLLSVHNPVLAAAALDCKRPVHFFWLRGE +EIIPRGHRKGRVDALRYQINDKPHNQIRISRQLPEFVPLDYSIPIEVPVMSCKPDKLPLFKRQYENTIFIGSKTADPL +CYGHTQFHLLPDKLKREKLLKQNCADQIEVVFRANAIASLFAWTGAQAMYQGFWSEADVTRPFVSQGVITDGKYFSFF +CYQLNTLALTAQADQNNPRKNICWGTQSKPLYETIEDNNVKGFNDDVLLQLVQFLLNRPKEDKSQLLEN" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 316", + location int { + from 55, + to 71, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str helix, + comment "helix 317", + location int { + from 75, + to 83, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str helix, + comment "helix 318", + location int { + from 102, + to 109, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str helix, + comment "helix 319", + location int { + from 132, + to 147, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str helix, + comment "helix 320", + location int { + from 167, + to 180, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str helix, + comment "helix 321", + location int { + from 300, + to 308, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str helix, + comment "helix 322", + location int { + from 313, + to 336, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str helix, + comment "helix 323", + location int { + from 409, + to 419, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 416", + location int { + from 88, + to 92, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 417", + location int { + from 110, + to 116, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 418", + location int { + from 191, + to 195, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 419", + location int { + from 196, + to 202, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 420", + location int { + from 203, + to 207, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 421", + location int { + from 208, + to 211, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 422", + location int { + from 216, + to 222, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 423", + location int { + from 223, + to 230, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 424", + location int { + from 231, + to 237, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 425", + location int { + from 267, + to 270, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 426", + location int { + from 273, + to 278, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 427", + location int { + from 286, + to 295, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 428", + location int { + from 347, + to 357, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 429", + location int { + from 358, + to 369, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 430", + location int { + from 382, + to 390, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 431", + location int { + from 391, + to 400, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + }, + { + data psec-str sheet, + comment "strand 432", + location int { + from 401, + to 407, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bw" + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + }, + descr { + comment "MITORIBOSOMAL PROTEIN ML66, MRPS18A", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 196, + names { + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124", + "125", + "126", + "127", + "128", + "129", + "130", + "131", + "132", + "133", + "134", + "135", + "136", + "137", + "138", + "139", + "140", + "141", + "142", + "143", + "144", + "145", + "146", + "147", + "148", + "149", + "150", + "151", + "152", + "153", + "154", + "155", + "156", + "157", + "158", + "159", + "160", + "161", + "162", + "163", + "164", + "165", + "166", + "167", + "168", + "169", + "170", + "171", + "172", + "173", + "174", + "175", + "176", + "177", + "178", + "179", + "180", + "181", + "182", + "183", + "184", + "185", + "186", + "187", + "188", + "189", + "190", + "191", + "192", + "193", + "194", + "195", + "196" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 196, + seq-data ncbieaa "MVGLNVLVSGCGRLLRGLLAGPAATSWARPPSRGFREVVEILDGKTTIIEGR +ITETPQESPNPPNPTGQCPICRWNLKHKYNYEDVLLLSQFIRPHGGMLPRRITGLCQEEHLKIEECVKMAHRAGLLPN +HRPKLPEGFVPKSKPRLNRYLTRWSPRSVKPIYNKGHRWNKVRMPVGSPLLKDNVSYSGRPLLLYH" + }, + annot { + { + data ftable { + { + data psec-str helix, + comment "helix 324", + location int { + from 108, + to 124, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + } + }, + { + data psec-str sheet, + comment "strand 433", + location int { + from 36, + to 43, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + } + }, + { + data psec-str sheet, + comment "strand 434", + location int { + from 44, + to 53, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + } + }, + { + data psec-str sheet, + comment "strand 435", + location int { + from 91, + to 94, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + } + }, + { + data psec-str sheet, + comment "strand 436", + location int { + from 96, + to 99, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + } + }, + { + data psec-str sheet, + comment "strand 437", + location int { + from 164, + to 167, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + } + }, + { + data psec-str sheet, + comment "strand 438", + location int { + from 168, + to 171, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + } + }, + { + data region "Domain 70", + comment "NCBI Domains", + location int { + from 73, + to 144, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + } + }, + { + data het "(ZN,167575)", + location mix { + bond { + a { + point 69, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + } + }, + bond { + a { + point 72, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + } + }, + bond { + a { + point 107, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bx" + } + } + } + } + } + } + } + } + }, + seq { + id { + pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + }, + descr { + comment "UNASSIGNED SECONDARY STRUCTURE ELEMENTS", + source { + org { + taxname "Sus scrofa", + common "pig", + db { + { + db "taxon", + tag id 9823 + } + }, + orgname { + name binomial { + genus "Sus", + species "scrofa" + }, + attrib "specified", + lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; + Euteleostomi; Mammalia; Eutheria; Laurasiatheria; Cetartiodactyla; Suina; + Suidae; Sus", + gcode 1, + mgcode 2, + div "MAM" + } + } + }, + num enum { + num 94, + names { + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "99", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "300", + "301", + "302", + "303", + "304", + "305", + "306", + "307", + "308", + "309", + "310", + "311", + "312", + "313", + "314", + "315", + "399", + "400", + "401", + "402", + "403", + "404", + "405", + "406", + "407", + "408", + "409", + "410", + "411", + "412", + "413", + "414", + "415", + "416", + "417", + "418", + "419", + "420", + "421", + "422", + "423", + "424", + "425", + "601", + "602", + "603", + "604", + "605", + "606", + "607", + "608", + "609", + "610", + "611", + "612", + "613", + "614", + "615" + } + }, + molinfo { + biomol peptide + } + }, + inst { + repr raw, + mol aa, + length 94, + seq-data ncbieaa "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + }, + annot { + { + data ftable { + { + data non-std-residue "UNK NH3+", + location pnt { + point 0, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 1, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 2, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 3, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 4, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 5, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 6, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 7, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 8, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 9, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 10, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 11, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 12, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 13, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 14, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 15, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 16, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 17, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 18, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 19, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 20, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 21, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 22, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 23, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 24, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 25, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 26, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 27, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 28, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 29, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 30, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 31, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 32, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 33, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 34, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 35, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 36, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 37, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 38, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 39, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 40, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 41, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 42, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 43, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 44, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 45, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 46, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 47, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 48, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 49, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 50, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 51, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 52, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 53, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 54, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 55, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 56, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 57, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 58, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 59, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 60, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 61, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 62, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 63, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 64, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 65, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 66, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 67, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 68, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 69, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 70, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 71, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 72, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 73, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 74, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 75, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 76, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 77, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 78, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 79, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 80, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 81, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 82, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 83, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 84, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 85, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 86, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 87, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 88, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 89, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 90, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 91, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK", + location pnt { + point 92, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data non-std-residue "UNK COOH", + location pnt { + point 93, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data psec-str helix, + comment "helix 325", + location int { + from 1, + to 25, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data psec-str helix, + comment "helix 326", + location int { + from 37, + to 49, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data psec-str helix, + comment "helix 327", + location int { + from 55, + to 77, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data psec-str helix, + comment "helix 328", + location int { + from 80, + to 92, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + }, + { + data region "Domain 71", + comment "NCBI Domains", + location int { + from 32, + to 93, + id pdb { + mol "5AJ4", + rel std { + year 2018, + month 4, + day 4 + }, + chain-id "Bz" + } + } + } + } + } + } + } + } +} diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/data/a5AJ4.ids b/c++/src/objtools/blast/seqdb_writer/unit_test/data/a5AJ4.ids new file mode 100644 index 00000000..767c57c7 --- /dev/null +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/data/a5AJ4.ids @@ -0,0 +1,83 @@ +5AJ4_AB +5AJ4_AC +5AJ4_AE +5AJ4_AF +5AJ4_AG +5AJ4_AI +5AJ4_AJ +5AJ4_AK +5AJ4_AL +5AJ4_AN +5AJ4_AO +5AJ4_AP +5AJ4_AQ +5AJ4_AR +5AJ4_AU +5AJ4_Aa +5AJ4_Ab +5AJ4_Ac +5AJ4_Ad +5AJ4_Ae +5AJ4_Af +5AJ4_Ag +5AJ4_Ah +5AJ4_Ai +5AJ4_Aj +5AJ4_Ak +5AJ4_Am +5AJ4_An +5AJ4_Ao +5AJ4_Ap +5AJ4_As +5AJ4_Az +5AJ4_B0 +5AJ4_B1 +5AJ4_B2 +5AJ4_B3 +5AJ4_B4 +5AJ4_B5 +5AJ4_B6 +5AJ4_B7 +5AJ4_B8 +5AJ4_B9 +5AJ4_BD +5AJ4_BE +5AJ4_BF +5AJ4_BI +5AJ4_BJ +5AJ4_BK +5AJ4_BN +5AJ4_BO +5AJ4_BP +5AJ4_BQ +5AJ4_BR +5AJ4_BS +5AJ4_BT +5AJ4_BU +5AJ4_BV +5AJ4_BW +5AJ4_BX +5AJ4_BY +5AJ4_Ba +5AJ4_Bb +5AJ4_Bc +5AJ4_Bd +5AJ4_Be +5AJ4_Bf +5AJ4_Bg +5AJ4_Bh +5AJ4_Bi +5AJ4_Bj +5AJ4_Bk +5AJ4_Bl +5AJ4_Bm +5AJ4_Bn +5AJ4_Bo +5AJ4_Bp +5AJ4_Bq +5AJ4_Bt +5AJ4_Bu +5AJ4_Bv +5AJ4_Bw +5AJ4_Bx +5AJ4_Bz diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/data/lclseqs.fsa b/c++/src/objtools/blast/seqdb_writer/unit_test/data/lclseqs.fsa new file mode 100644 index 00000000..3d65a676 --- /dev/null +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/data/lclseqs.fsa @@ -0,0 +1,8 @@ +>seq8 First 80 letters of RVM16940.1 +CLMLKGYRIVAMRYRTRLGEIDIIARRGDLVACVEVKARVSLEDAVFAVTDTAQRRIRAASDLWLSRQGDFHRLSVRYDI +>seq9 All of RVP96559.1 +MFLSDGGMETALIFQEGIELPHFASFVLLSTAEGRRRLLRYYTRYLEIARRHGTGFVLDTATWRANADWGEKLGYDAAAL +RKVNLDAVDLLTGLRTEYERPQAPVVLNGVIGPRGDGYQAGRITADEAEDYHSAQVAAFADSQADMITAVTMTNTEEAIG +VVRAAKAHDMPCAISFTVETDGRLVTGRSLQHAIETVDAETGGYPHYYMINCAHPTHFENVLERQSAWVRRIGGIRANAS +TKSHAELDESETLDAGDACDLAERYRSLTGRLPHLRVLGGCCGTDHRHMAAICEACLPRAALSA + diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/data/lclseqs_taxidmap.txt b/c++/src/objtools/blast/seqdb_writer/unit_test/data/lclseqs_taxidmap.txt new file mode 100644 index 00000000..8436a7ab --- /dev/null +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/data/lclseqs_taxidmap.txt @@ -0,0 +1,2 @@ +lcl|seq8 382 +seq9 382 diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/data/pdb_ids.ref b/c++/src/objtools/blast/seqdb_writer/unit_test/data/pdb_ids.ref new file mode 100644 index 00000000..40794791 --- /dev/null +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/data/pdb_ids.ref @@ -0,0 +1,66 @@ +5B64_B +4X62_M +4X64_M +4X65_m +4X66_M +5WNQ_M +5WNR_M +5WNS_M +5WNU_M +5WNV_M +5WNP_M +5WNT_M +6CAO_M +6CAP_MM +6CAQ_M +4M64_A +4M64_B +4M64_C +4M64_D +3S7Y_A +3S7Y_X +3ETQ_A +3ETQ_B +2KJ7_A +2KA9_B +3G7D_A +3GBF_A +2W80_A +2W80_B +2W80_eF +2V64_G +2W81_A +2W81_B +2W81_E +2K7W_B +2V64_B1 +2V64_b +2V64_i4 +2QYF_E +2QYF_F +1KLQ_B +2Z64_A +3MSI_A +1CLH_AA +1JJ2_Y +1KQS_Y +1K8A_1 +1K9M_1 +1KD1_1 +1M1K_1 +1M90_1 +1K73_1 +1KC8_12 +1N8R_1 +1NJI_1 +1Q7Y_1 +1Q81_1 +1Q82_1b +1Q86_1 +1QVF_Y +1QVG_Y +1W2B_YY +3CXC_Y +3OW2_Y +6ES9_A +6ES9_aB diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/data/pdbs.fasta b/c++/src/objtools/blast/seqdb_writer/unit_test/data/pdbs.fasta new file mode 100644 index 00000000..8b6b27fd --- /dev/null +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/data/pdbs.fasta @@ -0,0 +1,68 @@ +>5B64_B Chain B, A Novel Binding Mode Of Maguk Gk Domain Revealed By Dlg Gk Domain In Complex With Kif13b Mbs Domain +GPGSATLNNSLMRLREQIVKANLLVREASYIAEELDKRTEYKVTLQIPTSSLDANRKRGSLLSEPAIQVRRKGKGKQIWS +LEKLENRLLDMRDLYQEWKECEEDSPVSRSYFKRADPFYDEQENHS +>4X62_M Chain M, Crystal Structure Of 30s Ribosomal Subunit From Thermus Thermophilus4X64_M Chain M, Crystal Structure Of 30s Ribosomal Subunit From Thermus Thermophilus4X65_m Chain m, Crystal Structure Of 30s Ribosomal Subunit From Thermus Thermophilus4X66_M Chain M, Crystal Structure Of 30s Ribosomal Subunit From Thermus Thermophilus5WNQ_M Chain M, 30S ribosomal protein S135WNR_M Chain M, Crystal Structure of 30S ribosomal subunit from Thermus thermophilus5WNS_M Chain M, 30S ribosomal protein S135WNU_M Chain M, Crystal Structure of 30S ribosomal subunit from Thermus thermophilus5WNV_M Chain M, Crystal Structure of 30S ribosomal subunit from Thermus thermophilus5WNP_M Chain M, Crystal Structure of 30S ribosomal subunit from Thermus thermophilus5WNT_M Chain M, Crystal Structure of 30S ribosomal subunit from Thermus thermophilus6CAO_M Chain M, 30S ribosomal protein S136CAP_MM Chain MM, Crystal Structure of 30S ribosomal subunit from Thermus thermophilus in complex with Sisomicin6CAQ_M Chain M, Crystal Structure of 30S ribosomal subunit from Thermus thermophilus +ARIAGVEIPRNKRVDVALTYIYGIGKARAKEALEKTGINPATRVKDLTEAEVVRLREYVENTWKLEGELRAEVAANIKRL +MDIGCYRGLRHRRGLPVRGQRTRTNARTRKGPRKTVAG +>4M64_A Chain A, 3d Crystal Structure Of Na+/melibiose Symporter Of Salmonella Typhimurium4M64_B Chain B, 3d Crystal Structure Of Na+/melibiose Symporter Of Salmonella Typhimurium4M64_C Chain C, 3d Crystal Structure Of Na+/melibiose Symporter Of Salmonella Typhimurium4M64_D Chain D, 3d Crystal Structure Of Na+/melibiose Symporter Of Salmonella Typhimurium +MSISMTTKLSYGFGAFGKDFAIGIVYMYLMYYYTDVVGLSVGLVGTLFLVARIWDAINDPIMGWIVNATRSRWGKFKPWI +LIGTLTNSLVLFLLFSAHLFEGTAQVVFVCVTYILWGMTYTIMDIPFWSLVPTITLDKREREQLVPFPRFFASLAGFVTA +GITLPFVSYVGGADRGFGFQMFTLVLIAFFIASTIVTLRNVHEVYSSDNGVTAGRPHLTLKTIVGLIYKNDQLSCLLGMA +LAYNIASNIINGFAIYYFTYVIGDADLFPYYLSYAGAANLLTLIVFPRLVKMLSRRILWAGASVMPVLSCAGLFAMALAD +IHNAALIVAAGIFLNIGTALFWVLQVIMVADTVDYGEFKLNIRCESIAYSVQTMVVKGGSAFAAFFIALVLGLIGYTPNV +AQSAQTLQGMQFIMIVLPVLFFMMTLVLYFRYYRLNGDMLRKIQIHLLDKYRKTPPFVEQPDSPAISVVATSDVKAHHHH +HHHHHH +>pdb|3S7Y|A Chain A, Crystal Structure Of Mmnags In Space Group P3121 At 4.3 A Resolution3S7Y_X Chain X, Crystal Structure Of Mmnags In Space Group P3121 At 4.3 A Resolution +MGSSHHHHHHSSGLVPRGSHMNPNAPGVRQTIVQLLSHMRDGKEIREYLHRFSGIDQERFAVIKVGGAVIQDDLPGLASA +LAFLQTVGLTPVVVHGGGPQLDAALEAADIPTERVDGLRVTRDEAIPIIRDTLTQANLALVDAIRDAGGRAAAVPRGVFE +ADIVDADKLGRVGEPRHIHLDLVGSAARAGQAAILACLGETPDGTLVNINADVAVRALVHALQPYKVVFLTGTGGLLDED +GDILSSINLATDFGDLMQADWVNGGMRLKLEEIKRLLDDLPLSSSVSITRPSELARELFTHAGSGTLIRRGERIVATDDK +SSLDLGRLDNLVKAAFGRPAVEGYWDRLRVDRAFVTESYRAAAITTRLDGWVYLDKFAVLDDARGEGLGRTVWNRLVDYA +PQLIWRSRTNNPVNGFYFEECDGAVRRDEWTVFWRGEMGPVEVADVVEKAFALPPTLEAPQ +>3ETQ_A Chain A, X-ray Structure Of Cysteine-free Fragment Of Mhcn2 C-terminal Region From Amino Acids 443-630 Including C508n, C584s, And C601s Mutations3ETQ_B Chain B, X-ray Structure Of Cysteine-free Fragment Of Mhcn2 C-terminal Region From Amino Acids 443-630 Including C508n, C584s, And C601s Mutations +LVPRGSDSSRRQYQEKYKQVEQYMSFHKLPADFRQKIHDYYEHRYQGKMFDEDSILGELNGPLREEIVNFNNRKLVASMP +LFANADPNFVTAMLTKLKFEVFQPGDYIIREGTIGKKMYFIQHGVVSVLTKGNKEMKLSDGSYFGEISLLTRGRRTASVR +ADTYSRLYSLSVDNFNEVLEEYPMMRRAFETVAIDRLDRIGKKN +>2KJ7_A Chain A, Three-Dimensional NMR Structure of Rat Islet Amyloid Polypeptide in DPC micelles +KCNTATCATQRLANFLVRSSNNLGPVLPPTNVGSNTYX +>2KA9_B Chain B, Solution structure of PSD-95 PDZ12 complexed with cypin peptide2KA9_C Chain C, Solution structure of PSD-95 PDZ12 complexed with cypin peptide +QVVPFSSSV +>3G7D_A Chain A, Native Phpd With Cadmium Atoms3GBF_A Chain A, Phpd With Cadmium Complexed With Hydroethylphosphonate (Hep) +XRIDPFKLAHWXNARKYTAAQTADLAGLPLDDLRRLLGDEANEPDPAAATALAEALSVEPSQLAADAHRNLTVVHKSAEE +XHASRRPIQRDGIHFYNYYTLAAPEGRVAPVVLDILCPSDRLPALNNGHLEPAITVNLGPGDINGRWGEEITPQTWRVLH +ANHGGDRWITGDSYVEPSYCPHSYSLAGDAPARIVSYTAQSNISPLXTEANNWSTGAFEEALKALSGKVSAGSVLDLFLA +RRAHTRTSAAEAAGVPPADLEAALRSPASETGLTVLRTLGRALGFDYRVLLPADDQHDGVGKTWTTIEDSRRSRRTFGTY +EAASXASAAHLPDLVGSFLRVDADGRGADLIDHAENHYVVTEGRLTLEWDGPDGPASVELEPDGSAWTGPFVRHRWHGTG +TVLKFGSGAHLGYQDWLELTNTFEPAATLRRGRRDLAGWGYDN +>2W80_A Chain A, Structure Of A Complex Between Neisseria Meningitidis Factor H Binding Protein And Ccps 6-7 Of Human Complement Factor H2W80_B Chain B, Structure Of A Complex Between Neisseria Meningitidis Factor H Binding Protein And Ccps 6-7 Of Human Complement Factor H2W80_eF Chain eF, Structure Of A Complex Between Neisseria Meningitidis Factor H Binding Protein And Ccps 6-7 Of Human Complement Factor H2V64_G Chain G, Structure Of A Complex Between Neisseria Meningitidis Factor H Binding Protein And Ccps 6-7 Of Human Complement Factor H2W81_A Chain A, Structure Of A Complex Between Neisseria Meningitidis Factor H Binding Protein And Ccps 6-7 Of Human Complement Factor H2W81_B Chain B, Structure Of A Complex Between Neisseria Meningitidis Factor H Binding Protein And Ccps 6-7 Of Human Complement Factor H2W81_E Chain E, Structure Of A Complex Between Neisseria Meningitidis Factor H Binding Protein And Ccps 6-7 Of Human Complement Factor H +TLKPCDYPDIKHGGLYHENMRRPYFPVAVGKYYSYYCDEHFETPSGSYWDHIHCTQDGWSPAVPCLRKCYFPYLENGYNQ +NHGRKFVQGKSIDVACHPGYALPKAQTTVTCMENGWSPTPRCI +>2K7W_B Chain B, BAX Activation is Initiated at a Novel Interaction Site +EIWIAQELRRIGDEFNAYYA +>2V64_B1 Chain B1, Crystallographic Structure Of The Conformational Dimer Of The Spindle Assembly Checkpoint Protein Mad2.2V64_b Chain b, Crystallographic Structure Of The Conformational Dimer Of The Spindle Assembly Checkpoint Protein Mad2.2V64_i4 Chain i4, Crystallographic Structure Of The Conformational Dimer Of The Spindle Assembly Checkpoint Protein Mad2.2QYF_E Chain E, Crystal Structure Of The Mad2/p31(comet)/mad2-binding Peptide Ternary Complex2QYF_F Chain F, Crystal Structure Of The Mad2/p31(comet)/mad2-binding Peptide Ternary Complex1KLQ_B Chain B, The Mad2 Spindle Checkpoint Protein Undergoes Similar Major Conformational Changes upon Binding to Either Mad1 or Cdc20 +SWYSYPPPQRAV +>2Z64_A Chain A, Crystal Structure Of Mouse Tlr4 And Mouse Md-2 Complex +PCIEVVPNITYQCMDQKLSKVPDDIPSSTKNIDLSFNPLKILKSYSFSNFSELQWLDLSRCEIETIEDKAWHGLHHLSNL +ILTGNPIQSFSPGSFSGLTSLENLVAVETKLASLESFPIGQLITLKKLNVAHNFIHSCKLPAYFSNLTNLVHVDLSYNYI +QTITVNDLQFLRENPQVNLSLDMSLNPIDFIQDQAFQGIKLHELTLRGNFNSSNIMKTCLQNLAGLHVHRLILGEFKDER +NLEIFEPSIMEGLCDVTIDEFRLTYTNDFSDDIVKFHCLANVSAMSLAGVSIKYLEDVPKHFKWQSLSIIRCQLKQFPTL +DLPFLKSLTLTMNKGSISFKKVALPSLSYLDLSRNALSFSGCCSYSDLGTNSLRHLDLSFNGAIIMSANFMGLEELQHLD +FQHSTLKRVTEFSAFLSLEKLLYLDISYTNTKIDFDGIFLGLTSLNTLKMAGNSFKDNTLSNVFANTTNLTFLDLSKCQL +EQISWGVFDTLHRLQLLNMSHNNLLFLDSSHYNQLYSLSTLDCSFNRIETSKGILQHFPKSLAFFNLTNNSVACICEHQK +FLQWVKEQKQFLVNVEQMTCATPVEMNTSLVLDFNNSTC +>3MSI_A Chain A, Type Iii Antifreeze Protein Isoform Hplc 12 +MAQASVVANQLIPINTHLTLVMMRSEVVTPVGIPAEDIPRLVSMQVNRAVPLGTTLMPDMVKGYAA +>1CLH_AA Chain A, THREE-DIMENSIONAL SOLUTION STRUCTURE OF ESCHERICHIA COLI PERIPLASMIC CYCLOPHILIN +AKGDPHVLLTTSAGNIELELDKQKAPVSVQNFVDYVNSGFYNNTTFHRVIPGFMIQGGGFTEQMQQKKPNPPIKNEADNG +LRNTRGTIAMARTADKDSATSQFFINVADNAFLDHGQRDFGYAVFGKVVKGMDVADKISQVPTHDVGPYQNVPSKPVVIL +SAKVLP +>1JJ2_Y Chain Y, Fully Refined Crystal Structure Of The Haloarcula Marismortui Large Ribosomal Subunit At 2.4 Angstrom Resolution1KQS_Y Chain Y, The Haloarcula Marismortui 50s Complexed With A Pretranslocational Intermediate In Protein Synthesis1K8A_1 Chain 1, Co-Crystal Structure Of Carbomycin A Bound To The 50s Ribosomal Subunit Of Haloarcula Marismortui1K9M_1 Chain 1, Co-Crystal Structure Of Tylosin Bound To The 50s Ribosomal Subunit Of Haloarcula Marismortui1KD1_1 Chain 1, Co-crystal Structure Of Spiramycin Bound To The 50s Ribosomal Subunit Of Haloarcula Marismortui1M1K_1 Chain 1, Co-Crystal Structure Of Azithromycin Bound To The 50s Ribosomal Subunit Of Haloarcula Marismortui1M90_1 Chain 1, Co-crystal structure of CCA-Phe-caproic acid-biotin and sparsomycin bound to the 50S ribosomal subunit1K73_1 Chain 1, Co-Crystal Structure Of Anisomycin Bound To The 50s Ribosomal Subunit1KC8_12 Chain 12, Co-Crystal Structure Of Blasticidin S Bound To The 50s Ribosomal Subunit1N8R_1 Chain 1, Structure Of Large Ribosomal Subunit In Complex With Virginiamycin M1NJI_1 Chain 1, Structure Of Chloramphenicol Bound To The 50s Ribosomal Subunit1Q7Y_1 Chain 1, Crystal Structure of CCdAP-Puromycin bound at the Peptidyl transferase center of the 50S ribosomal subunit1Q81_1 Chain 1, Crystal Structure of minihelix with 3' puromycin bound to A-site of the 50S ribosomal subunit.1Q82_1b Chain 1b, Crystal Structure of CC-Puromycin bound to the A-site of the 50S ribosomal subunit1Q86_1 Chain 1, Crystal structure of CCA-Phe-cap-biotin bound simultaneously at half occupancy to both the A-site and P-site of the the 50S ribosomal Subunit.1QVF_Y Chain Y, Structure of a deacylated tRNA minihelix bound to the E site of the large ribosomal subunit of Haloarcula marismortui1QVG_Y Chain Y, Structure Of Cca Oligonucleotide Bound To The Trna Binding Sites Of The Large Ribosomal Subunit Of Haloarcula Marismortui1W2B_YY Chain YY, Trigger Factor Ribosome Binding Domain In Complex With 50s3CXC_Y Chain Y, The Structure Of An Enhanced Oxazolidinone Inhibitor Bound To The 50s Ribosomal Subunit Of H. Marismortui3OW2_Y Chain Y, Crystal Structure Of Enhanced Macrolide Bound To 50s Ribosomal Subunit +RTGRFGPRYGLKIRVRVADVEIKHKKKHKCPVCGFKKLKRAGTGIWMCGHCGYKIAGGCYQPETVAGKAVMKA +>6ES9_A Chain A, Methylsuccinyl-CoA dehydrogenase of Paracoccus denitrificans with bound flavin adenine dinucleotidepdb|6ES9|aB Chain B, Methylsuccinyl-CoA dehydrogenase of Paracoccus denitrificans with bound flavin adenine dinucleotide +MKDMPAMPADTPSALLALAGEALPELESLQSRATEALRALVAPAGKPQPALLEQHQHAAHALSWLTTYVESIRQLSGWAG +RLAEAGNLGRIEALILQIGLGEYLGQIAGGIPMSQTEFARLSDLELDWQPGEAAAKLMRGNTAPARAELARLMQDNHGRA +TFGATGLDEDLEMIRDQFRRYAEERVIPNAHEWHLKDQLIPMEIIEELAELGVFGLTIPEEFGGLGLSKASMVVVTEELS +RGYIGVGSLGTRSEIAAELILCGGTEAQKAKWLPGLASGEILSTAVFTEPNTGSDLGSLRTRAVRDGEDWVVTGNKTWIT +HAQRTHVMTLLARTDPETTDWRGLSMFLAEKEPGTDDDPFPTPGMTGGEIEVLGYRGMKEYELGFDGFRIKGENLLGGEP +GRGFKQLMETFESARIQTAARAVGVAQSAAEIGMRYAVDRKQFGKSLIEFPRVADKLAMMAVEIMIARQLTYFSAWEKDH +GRRCDLEAGMAKLLGARVAWAAADNALQIHGGNGFALEYAISRVLCDARILNIFEGAAEIQAQVIARRLLD diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp b/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp index b1b2462a..9a06bbf4 100644 --- a/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp @@ -1,4 +1,4 @@ -/* $Id: writedb_unit_test.cpp 560817 2018-03-27 17:24:45Z rackerst $ +/* $Id: writedb_unit_test.cpp 581748 2019-03-05 16:48:25Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1510,7 +1510,7 @@ BOOST_AUTO_TEST_CASE(PDBIdLowerCase) string title = "pdb-id"; string - I1("pdb|3E3Q|BB"), T1("Lower case chain b"); + I1("pdb|3E3Q|b"), T1("Lower case chain b"); { CRef wr(new CWriteDB(title, @@ -1537,15 +1537,9 @@ BOOST_AUTO_TEST_CASE(PDBIdLowerCase) BOOST_REQUIRE(rd.GetNumOIDs() == 1); vector oids; - rd.AccessionToOids("3e3q bb", oids); + rd.AccessionToOids("3e3q_b", oids); BOOST_REQUIRE(oids.size() == 1); - BOOST_REQUIRE(oids[0] == 0); - - oids.clear(); - rd.AccessionToOids("3e3q b", oids); - - BOOST_REQUIRE(oids.size() == 0); } s_WrapUpFiles(files); @@ -2615,6 +2609,42 @@ BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMap) DeleteBlastDb(kDbName, CSeqDB::eNucleotide); } +BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMapLclIds) +{ + const int kTaxId(382); + CRef tis(new CTaxIdSet()); + const string kDbName("foo"); + CWriteDB blastdb(kDbName, CWriteDB::eProtein, kDbName); + const CFastaReader::TFlags flags = + CFastaReader::fAssumeProt | CFastaReader::fAllSeqIds; + // This file contains TAB characters, which shouldn't create any warnings + CFastaReader reader("data/lclseqs.fsa", flags); + CNcbiIfstream taxidmap("data/lclseqs_taxidmap.txt"); + tis->SetMappingFromFile(taxidmap); + while (!reader.AtEOF()) { + CRef se = reader.ReadOneSeq(); + BOOST_REQUIRE(se.NotEmpty()); + BOOST_REQUIRE(se->IsSeq()); + CRef bs(&se->SetSeq()); + CRef bds(CWriteDB::ExtractBioseqDeflines(*bs)); + tis->FixTaxId(bds); + blastdb.AddSequence(*bs); + blastdb.SetDeflines(*bds); + } + blastdb.Close(); + + CSeqDB db(kDbName, CSeqDB::eProtein); + int total=db.GetNumSeqs(); + for (int oid=0; oid taxids; + db.GetTaxIDs(oid, taxids); + BOOST_REQUIRE(taxids.size() == 1); + BOOST_REQUIRE_EQUAL(kTaxId, taxids.front()); + } + DeleteBlastDb(kDbName, CSeqDB::eNucleotide); +} + BOOST_AUTO_TEST_CASE(CBuildDatabase_TestDirectoryCreation) { CTmpFile tmpfile; @@ -3097,7 +3127,7 @@ BOOST_AUTO_TEST_CASE(ReadBareIDNucleotide) } -BOOST_AUTO_TEST_CASE(ReadLongIDProtein) +BOOST_AUTO_TEST_CASE(ReadMixIDsProtein) { // create a FASTA file with bare and legacy IDs CTmpFile tmpfile; @@ -3105,19 +3135,19 @@ BOOST_AUTO_TEST_CASE(ReadLongIDProtein) string sequence = "MASTQNIVEEVQKMLDTYDTNKDGEITKAEAVEYFKGKKAFNPER"; std::unordered_map fasta_ids = { - {"XP_642131.1", CSeq_id::e_Local}, + {"XP_642131.1", CSeq_id::e_Other}, {"ref|XP_642837.1", CSeq_id::e_Other}, - {"BAA06266.1", CSeq_id::e_Local}, + {"BAA06266.1", CSeq_id::e_Ddbj}, {"dbj|GAE97797.1", CSeq_id::e_Ddbj}, {"320460102", CSeq_id::e_Local}, {"gi|716054866", CSeq_id::e_Gi}, - {"Q02VU1.1", CSeq_id::e_Local}, + {"Q02VU1.1", CSeq_id::e_Swissprot}, {"sp|Q6GIX1.1|CADA_STAAR", CSeq_id::e_Swissprot}, - {"EQR80552.1", CSeq_id::e_Local}, + {"EQR80552.1", CSeq_id::e_Genbank}, {"gb|EQS08124.1", CSeq_id::e_Genbank}, {"Somestring", CSeq_id::e_Local}, {"lcl|anotherstring", CSeq_id::e_Local}, - {"12AS_A", CSeq_id::e_Local}, + {"12AS_A", CSeq_id::e_Pdb}, {"pdb|1I4D|D", CSeq_id::e_Pdb}, {"2209341B", CSeq_id::e_Local}, {"prf||2209335A", CSeq_id::e_Prf}, @@ -3166,7 +3196,7 @@ BOOST_AUTO_TEST_CASE(ReadLongIDProtein) } -BOOST_AUTO_TEST_CASE(ReadMultipleLongIDs) +BOOST_AUTO_TEST_CASE(ReadMultipleMixLongIDs) { // create a FASTA file with bare and legacy IDs CTmpFile tmpfile; @@ -3174,9 +3204,9 @@ BOOST_AUTO_TEST_CASE(ReadMultipleLongIDs) string sequence = "MASTQNIVEEVQKMLDTYDTNKDGEITKAEAVEYFKGKKAFNPER"; std::unordered_map fasta_ids = { - {"XP_642131.1", CSeq_id::e_Local}, + {"XP_642131.1", CSeq_id::e_Other}, {"ref|XP_642837.1", CSeq_id::e_Other}, - {"BAA06266.1", CSeq_id::e_Local}, + {"BAA06266.1", CSeq_id::e_Ddbj}, {"dbj|GAE97797.1", CSeq_id::e_Ddbj}, {"320460102", CSeq_id::e_Local}, {"gi|716054866", CSeq_id::e_Gi}}; @@ -3228,7 +3258,7 @@ BOOST_AUTO_TEST_CASE(ReadMultipleLongIDs) } -BOOST_AUTO_TEST_CASE(ReadLongIDNucleotide) +BOOST_AUTO_TEST_CASE(ReadMixIDNucleotide) { // create a FASTA file with bare and legacy IDs CTmpFile tmpfile; @@ -3236,11 +3266,11 @@ BOOST_AUTO_TEST_CASE(ReadLongIDNucleotide) string sequence = "AACTAGTATTAGAGGCACTGCCTGCCCAGTGACAATCGTTAAACGGCCG"; std::unordered_map fasta_ids = { - {"U13103.1", CSeq_id::e_Local}, + {"U13103.1", CSeq_id::e_Genbank}, {"gb|U13080.1", CSeq_id::e_Genbank}, - {"Z18633.1", CSeq_id::e_Local}, + {"Z18633.1", CSeq_id::e_Embl}, {"emb|Z18632.1", CSeq_id::e_Embl}, - {"NM_176670.2", CSeq_id::e_Local}, + {"NM_176670.2", CSeq_id::e_Other}, {"ref|NM_175822.2", CSeq_id::e_Other}}; for (auto it: fasta_ids) { @@ -3354,6 +3384,177 @@ BOOST_AUTO_TEST_CASE(CreateV5Seqidlist) } } +BOOST_AUTO_TEST_CASE(ReadMultiSeqIdsDefline) +{ + static const int num_ids = 7; + pair fasta_ids[num_ids] = { + make_pair("497371450", CSeq_id::e_Gi), + make_pair("WP_009685663.1", CSeq_id::e_Other), + make_pair("955937162", CSeq_id::e_Gi), + make_pair("KSD99966.1", CSeq_id::e_Genbank), + make_pair("956677830", CSeq_id::e_Gi), + make_pair("KSL27839.1", CSeq_id::e_Genbank), + make_pair("6ES9_A", CSeq_id::e_Pdb)}; + + CNcbiIfstream istr("data/WP_009685663.fasta"); + string dbname = "data/multiseqids"; + string title = "Temporary unit test db"; + ostringstream log; + CBuildDatabase db(dbname, title, true, false, true, false, &log, true); + + db.StartBuild(); + db.AddFasta(istr); + db.EndBuild(); + + CFileDeleteAtExit::Add(dbname + ".phr"); + CFileDeleteAtExit::Add(dbname + ".pin"); + CFileDeleteAtExit::Add(dbname + ".psq"); + CFileDeleteAtExit::Add(dbname + ".pog"); + CFileDeleteAtExit::Add(dbname + ".psd"); + CFileDeleteAtExit::Add(dbname + ".psi"); + CFileDeleteAtExit::Add(dbname + ".pni"); + CFileDeleteAtExit::Add(dbname + ".pnd"); + + CSeqDB seqdb(dbname, CSeqDB::eProtein); + + list< CRef > ids = seqdb.GetSeqIDs(0); + BOOST_REQUIRE_EQUAL(ids.size(), num_ids); + + auto seqdb_id = ids.begin(); + for (auto it: fasta_ids) { + BOOST_REQUIRE_EQUAL((*seqdb_id)->Which(),it.second); + BOOST_REQUIRE_EQUAL((*seqdb_id)->GetSeqIdString(true),it.first); + ++seqdb_id; + } +} + +BOOST_AUTO_TEST_CASE(ReadPDBFasta) +{ + CNcbiIfstream istr("data/pdbs.fasta"); + string dbname = "data/pdbs_v5"; + string title = "Temporary unit test db"; + ostringstream log; + { + CNcbiApplication::Instance()->SetEnvironment("BLASTDB_LMDB_MAP_SIZE", "100000"); + CBuildDatabase db(dbname, title, true, false, true, false, &log, true, eBDB_Version5); + db.StartBuild(); + db.AddFasta(istr); + db.EndBuild(); + } + + + vector db_ids; + vector db_oids; + CSeqDB seqdb(dbname, CSeqDB::eProtein); + int oid= 0; + while (seqdb.CheckOrFindOID(oid)) { + list > seq_ids = seqdb.GetSeqIDs(oid); + ITERATE(list >, itr, seq_ids) { + BOOST_REQUIRE_EQUAL((*itr)->Which(), CSeq_id::e_Pdb); + } + oid++; + } + CNcbiIfstream ref_ids_file("data/pdbs_ids.ref"); + vector ref_ids; + string line; + while (getline(ref_ids_file, line)) { + ref_ids.push_back(line); + } + vector oids; + seqdb.AccessionsToOids(ref_ids, oids); + + ITERATE(vector, itr, oids){ + BOOST_CHECK(*itr != kSeqDBEntryNotFound); + } + + CFileDeleteAtExit::Add(dbname + ".phr"); + CFileDeleteAtExit::Add(dbname + ".pin"); + CFileDeleteAtExit::Add(dbname + ".psq"); + CFileDeleteAtExit::Add(dbname + ".pog"); + CFileDeleteAtExit::Add(dbname + ".psd"); + CFileDeleteAtExit::Add(dbname + ".psi"); + CFileDeleteAtExit::Add(dbname + ".pos"); + CFileDeleteAtExit::Add(dbname + ".pot"); + CFileDeleteAtExit::Add(dbname + ".ptf"); + CFileDeleteAtExit::Add(dbname + ".pto"); + CFileDeleteAtExit::Add(dbname + ".pdb"); + +} + +void s_TestReadPDBAsn1(CNcbiIfstream & istr, CNcbiIfstream & ref_ids_file, int num_oids) +{ + string dbname = "data/asn1_v5"; + string title = "Temporary unit test db"; + ostringstream log; + { + CNcbiApplication::Instance()->SetEnvironment("BLASTDB_LMDB_MAP_SIZE", "100000"); + CRef seq_entry(new CSeq_entry); + istr >> MSerial_AsnText >> *seq_entry; + CSeqEntryGetSource seq_src(seq_entry); + CBuildDatabase db(dbname, title, true, false, true, false, &log, true, eBDB_Version5); + db.StartBuild(); + db.AddSequences(seq_src); + db.EndBuild(); + } + + + vector db_ids; + vector db_oids; + CSeqDB seqdb(dbname, CSeqDB::eProtein); + int oid= 0; + while (seqdb.CheckOrFindOID(oid)) { + list > seq_ids = seqdb.GetSeqIDs(oid); + ITERATE(list >, itr, seq_ids) { + BOOST_REQUIRE_EQUAL((*itr)->Which(), CSeq_id::e_Pdb); + } + oid++; + } + + BOOST_REQUIRE_EQUAL(oid, num_oids); + + vector ref_ids; + string line; + while (getline(ref_ids_file, line)) { + ref_ids.push_back(line); + } + vector oids; + seqdb.AccessionsToOids(ref_ids, oids); + + for(unsigned int i=0; i 0){ + m_Lmdbdb.Reset(new CWriteDB_LMDB(lmdb_fname_w_path, map_size)); + m_Taxdb.Reset(new CWriteDB_TaxID( + GetFileNameFromExistingLMDBFile(lmdb_fname_w_path, ELMDBFileType::eTaxId2Offsets), + map_size)); + } + else { + m_Lmdbdb.Reset(new CWriteDB_LMDB(lmdb_fname_w_path)); + m_Taxdb.Reset(new CWriteDB_TaxID( GetFileNameFromExistingLMDBFile(lmdb_fname_w_path, ELMDBFileType::eTaxId2Offsets))); + } } x_CookData(); @@ -1063,7 +1076,9 @@ void CWriteDB_Impl::x_Publish() m_Blobs, m_MaskDataColumn); if (done && (m_DbVersion == eBDB_Version5) && m_Lmdbdb) { - m_Lmdbdb->InsertEntries(m_Ids,m_LmdbOid); + if (m_ParseIDs) { + m_Lmdbdb->InsertEntries(m_Ids,m_LmdbOid); + } m_Taxdb->InsertEntries(m_TaxIds, m_LmdbOid); m_LmdbOid++; } @@ -1118,7 +1133,9 @@ void CWriteDB_Impl::x_Publish() m_MaskDataColumn); if (done && (m_DbVersion == eBDB_Version5) && m_Lmdbdb) { - m_Lmdbdb->InsertEntries(m_Ids,m_LmdbOid); + if (m_ParseIDs){ + m_Lmdbdb->InsertEntries(m_Ids,m_LmdbOid); + } m_Taxdb->InsertEntries(m_TaxIds, m_LmdbOid); m_LmdbOid++; } @@ -1603,74 +1620,7 @@ void CWriteDB_Impl::x_ComputeHash(const CBioseq & sequence) #define TAB_REPLACEMENT " " -static bool s_IsValidPdb(const string& s) -{ - size_t len = s.size(); - // If bare ID, only valid lengths are 4 (no chain ID), - // 6 (chain ID is in [A-Z0-9]), or 7 (chain ID is double [A-Z]). - // Char before chain ID is '_'. - // If long ID, it's prefixed with 'pdb|' and chain ID is preceded by '|'. - bool long_seqid = false; - if (len == 4 || len == 6 || len == 7) { - if (s.find('|') != NPOS) { - return false; - } - } else if (len == 8 || len == 10 || len == 11) { - string lc("pdb|"); - string uc("PDB|"); - if (s.compare(0, lc.size(), lc) != 0 - && s.compare(0, uc.size(), uc) != 0) { - return false; - } - long_seqid = true; - } else { - return false; - } - char sep = long_seqid ? '|' : '_'; - auto it = s.begin(); - if (long_seqid) { - it += 4; // skip "pdb|" - } - // Is first character a digit, excluding '0'? - if (!isdigit(*it) || *it == '0') { - return false; - } - ++it; - // Is each of the next 3 characters either upper-case alpha or digit? - for (int i = 0; i < 3; ++i) { - if (!isupper(*it) && !isdigit(*it)) { - return false; - } - ++it; - } - // We're done if length was 4 characters. - // Otherwise... - if (len > 4) { - // Is next character the correct separator? - if (*it != sep) { - return false; - } - ++it; - if (len == 6) { - // Is single-character chain ID either upper-case alpha or digit? - if (!isupper(*it) && !isdigit(*it)) { - return false; - } - } else /* len == 7 */ { - // Is first character of 2-character chain ID upper-case alpha? - if (!isupper(*it)) { - return false; - } - // If so, is second character the same? - // (Double upper-case alpha represents a single lower-case alpha.) - auto c = *it++; - if (c != *it) { - return false; - } - } - } - return true; -} + void CWriteDB_Impl:: x_GetFastaReaderDeflines(const CBioseq & bioseq, @@ -1805,28 +1755,25 @@ x_GetFastaReaderDeflines(const CBioseq & bioseq, // Parse ids. They may or may not be bar-separated. list< CRef > seqids; - if ((ids.find('|') == NPOS && long_seqids) - || (!isalpha(ids[0]) && !s_IsValidPdb(ids))) { - seqids.push_back(CRef (new CSeq_id(CSeq_id::e_Local, ids))); - } else { - CSeq_id::ParseFastaIds(seqids, ids); - - if (!long_seqids) { - // If accession's molecule type is different than - // expected, change sequence id to local. CFastaReader - // cannot distingush between bare pir protein ids genbank - // nucleotide ids. - for (auto& it: seqids) { - CSeq_id::EAccessionInfo info = it->IdentifyAccession(); - if (!it->IsLocal() && !it->IsGi() && - (info & (CSeq_id::fAcc_prot | CSeq_id::fAcc_nuc)) && - bioseq.IsAa() == !!(info & CSeq_id::fAcc_nuc)) { - - string label = it->GetSeqIdString(true); - it.Reset(new CSeq_id(CSeq_id::e_Local, label)); - } - } + if (ids.find('|') != NPOS){ + CSeq_id::ParseFastaIds(seqids, ids); + } + else { + CRef id(new CSeq_id(ids, CSeq_id::fParse_RawText | CSeq_id::fParse_ValidLocal)); + if ((id->Which() == CSeq_id::e_Prf) || + (id->Which() == CSeq_id::e_Pir)){ + string label = id->GetSeqIdString(true); + id.Reset(new CSeq_id(CSeq_id::e_Local, label)); } + + CSeq_id::EAccessionInfo info = id->IdentifyAccession(); + if ((info & (CSeq_id::fAcc_prot | CSeq_id::fAcc_nuc)) && + (bioseq.IsAa() == !!(info & CSeq_id::fAcc_nuc))) { + string label = id->GetSeqIdString(true); + id.Reset(new CSeq_id(CSeq_id::e_Local, label)); + } + + seqids.push_back(id); } // Build the actual defline. diff --git a/c++/src/objtools/blast/seqdb_writer/writedb_isam.cpp b/c++/src/objtools/blast/seqdb_writer/writedb_isam.cpp index 5c3d7a5e..a39348dd 100644 --- a/c++/src/objtools/blast/seqdb_writer/writedb_isam.cpp +++ b/c++/src/objtools/blast/seqdb_writer/writedb_isam.cpp @@ -1,4 +1,4 @@ -/* $Id: writedb_isam.cpp 556775 2018-02-05 15:13:23Z fongah2 $ +/* $Id: writedb_isam.cpp 581741 2019-03-05 16:45:40Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -632,6 +632,13 @@ void CWriteDB_IsamIndex::x_AddPdb(int oid, // "102l| " // "pdb|102l| " + if(pdb.CanGetChain_id() && pdb.IsSetChain_id()){ + string chain_id= pdb.GetChain_id(); + if(chain_id.size() > 1){ + NCBI_THROW(CWriteDBException, eArgErr, + "Multi-letters chain PDB id is not supported in v4 BLAST DB"); + } + } CTempString mol; if (pdb.CanGetMol()) { mol = pdb.GetMol().Get(); diff --git a/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp b/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp index b53c4738..12fe387c 100644 --- a/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp +++ b/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp @@ -137,6 +137,8 @@ void CWriteDB_LMDB::x_InsertEntry(const CRef &seqid, const blastdb::TOi kv_pdb_mol.id = seqid->GetPdb().GetMol().Get(); kv_pdb_mol.oid = oid; m_list.push_back(kv_pdb_mol); + // mol code should be case insensitive but c++ tooklit + // is not converting it all to uppercase now string id_upper = kv_pdb_mol.id; NStr::ToUpper(id_upper); if(kv_pdb_mol.id != id_upper) { @@ -145,6 +147,12 @@ void CWriteDB_LMDB::x_InsertEntry(const CRef &seqid, const blastdb::TOi kv_u.oid = oid; m_list.push_back(kv_u); } + SKeyValuePair kv; + kv.id = seqid->GetSeqIdString(true); + kv.oid = oid; + kv.saveToOidList = true; + m_list.push_back(kv); + return; } if(seqid->GetTextseq_Id() != NULL) { diff --git a/c++/src/objtools/blast/seqdb_writer/writedb_volume.cpp b/c++/src/objtools/blast/seqdb_writer/writedb_volume.cpp index 0278b6ce..5560386f 100644 --- a/c++/src/objtools/blast/seqdb_writer/writedb_volume.cpp +++ b/c++/src/objtools/blast/seqdb_writer/writedb_volume.cpp @@ -1,4 +1,4 @@ -/* $Id: writedb_volume.cpp 553487 2017-12-18 14:23:38Z fongah2 $ +/* $Id: writedb_volume.cpp 581729 2019-03-05 16:41:46Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -218,16 +218,17 @@ bool CWriteDB_Volume::WriteSequence(const string & seq, // check the uniqueness of id if (m_Indices != CWriteDB::eNoIndex) { set::size_type orig_size = m_IdSet.size(); - string id_u; + string id; pair::iterator, bool > rv; + CSeq_id::TLabelFlags label_flags = + CSeq_id::fLabel_Default | CSeq_id::fLabel_UpperCase; ITERATE(TIdList, iter, idlist) { - string id = kEmptyStr; - (*iter)->GetLabel(&id); - id_u = NStr::ToUpper(id); - rv = m_IdSet.insert(id_u); + id = kEmptyStr; + (*iter)->GetLabel(&id, CSeq_id::eDefault, label_flags); + rv = m_IdSet.insert(id); if((rv.second == false) && (!(*iter)->IsLocal())) { CNcbiOstrstream msg; - msg << "Error: Duplicate seq_ids are found: " << endl << id_u << endl; + msg << "Error: Duplicate seq_ids are found: " << endl << id << endl; NCBI_THROW(CWriteDBException, eArgErr, CNcbiOstrstreamToString(msg)); } } @@ -235,7 +236,7 @@ bool CWriteDB_Volume::WriteSequence(const string & seq, if(m_IdSet.size() == orig_size) { CNcbiOstrstream msg; msg << "Error: Duplicate seq_ids are found: " << endl - << id_u << endl; + << id << endl; NCBI_THROW(CWriteDBException, eArgErr, CNcbiOstrstreamToString(msg)); } } diff --git a/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt b/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt index abbeef88..c7be2b31 100644 --- a/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt +++ b/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt @@ -1,12 +1,12 @@ ############################################################################# -# $Id: CMakeLists.cleanup.lib.txt 574562 2018-11-15 12:31:02Z ivanov $ +# $Id: CMakeLists.cleanup.lib.txt 580121 2019-02-11 16:07:50Z ivanov $ ############################################################################# NCBI_begin_lib(xcleanup) NCBI_sources( autogenerated_cleanup autogenerated_extended_cleanup cleanup cleanup_utils gene_qual_normalization cleanup_user_object cleanup_author - newcleanupp capitalization_string + cleanup_pub newcleanupp capitalization_string fix_feature_id ) NCBI_uses_toolkit_libraries(xobjedit taxon3 valid xobjutil) NCBI_project_watchers(bollin kans) diff --git a/c++/src/objtools/cleanup/Makefile.cleanup.lib b/c++/src/objtools/cleanup/Makefile.cleanup.lib index abcd8024..32918143 100644 --- a/c++/src/objtools/cleanup/Makefile.cleanup.lib +++ b/c++/src/objtools/cleanup/Makefile.cleanup.lib @@ -1,4 +1,4 @@ -# $Id: Makefile.cleanup.lib 570486 2018-09-10 17:07:44Z ivanov $ +# $Id: Makefile.cleanup.lib 580121 2019-02-11 16:07:50Z ivanov $ # Build library "xcleanup" ############################### @@ -8,7 +8,7 @@ WATCHERS = bollin kans ASN_DEP = submit valid SRC = autogenerated_cleanup autogenerated_extended_cleanup cleanup \ cleanup_utils gene_qual_normalization cleanup_user_object cleanup_author \ - newcleanupp capitalization_string + cleanup_pub newcleanupp capitalization_string fix_feature_id DLL_LIB = xregexp $(PCRE_LIB) LIB = xcleanup diff --git a/c++/src/objtools/cleanup/cleanup.cpp b/c++/src/objtools/cleanup/cleanup.cpp index 69641bf3..813a02d9 100644 --- a/c++/src/objtools/cleanup/cleanup.cpp +++ b/c++/src/objtools/cleanup/cleanup.cpp @@ -1,4 +1,4 @@ -/* $Id: cleanup.cpp 574562 2018-11-15 12:31:02Z ivanov $ +/* $Id: cleanup.cpp 578144 2019-01-14 15:59:00Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2524,6 +2524,7 @@ bool CCleanup::WGSCleanup(CSeq_entry_Handle entry, bool instantiate_missing_prot bool any_changes = false; int protein_id_counter = 1; + bool create_general_only = objects::edit::IsGeneralIdProtPresent(entry.GetTopLevelEntry()); SAnnotSelector sel(CSeqFeatData::e_Cdregion); for (CFeat_CI cds_it(entry, sel); cds_it; ++cds_it) { bool change_this_cds = false; @@ -2545,7 +2546,7 @@ bool CCleanup::WGSCleanup(CSeq_entry_Handle entry, bool instantiate_missing_prot // need to set product if not set if (!new_cds->IsSetProduct() && !sequence::IsPseudo(*new_cds, entry.GetScope())) { string id_label; - CRef new_id = objects::edit::GetNewProtId(entry.GetScope().GetBioseqHandle(new_cds->GetLocation()), protein_id_counter, id_label, false); + CRef new_id = objects::edit::GetNewProtId(entry.GetScope().GetBioseqHandle(new_cds->GetLocation()), protein_id_counter, id_label, create_general_only); if (new_id) { new_cds->SetProduct().SetWhole().Assign(*new_id); change_this_cds = true; diff --git a/c++/src/objtools/cleanup/cleanup_author.cpp b/c++/src/objtools/cleanup/cleanup_author.cpp index 90a63ef9..07bf6fcf 100644 --- a/c++/src/objtools/cleanup/cleanup_author.cpp +++ b/c++/src/objtools/cleanup/cleanup_author.cpp @@ -1,4 +1,4 @@ -/* $Id: cleanup_author.cpp 567984 2018-07-27 15:57:35Z bollin $ +/* $Id: cleanup_author.cpp 580146 2019-02-11 16:18:59Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -33,7 +33,9 @@ #include #include +#include #include +#include #include #include @@ -421,6 +423,237 @@ void CCleanup::s_FixEtAl(CName_std& name) } +bool CCleanup::IsEmpty(const CAuth_list::TAffil& affil) +{ + if (affil.IsStr()) { + return NStr::IsBlank(affil.GetStr()); + } + else if (affil.IsStd()) { + const CAuth_list::TAffil::TStd& std = affil.GetStd(); + return !(std.IsSetAffil() || std.IsSetDiv() || std.IsSetCity() || + std.IsSetSub() || std.IsSetCountry() || std.IsSetStreet() || + std.IsSetEmail() || std.IsSetFax() || std.IsSetPhone() || + std.IsSetPostal_code()); + } + return true; +} + + +// Helpers for cleaning authors +struct SAuthorClean +{ + bool m_Changed; + bool m_FixInitials; + void operator()(CRef author) + { + m_Changed |= CCleanup::CleanupAuthor(*author, m_FixInitials); + } +}; + + +struct SAuthorEmpty +{ + bool operator()(CRef& cauth) + { + if (!cauth) { + return true; + } + const CAuthor& auth = *cauth; + if (!auth.IsSetName()) { + return true; + } + + const CAuthor::TName& name = auth.GetName(); + + const string* str = NULL; + switch (name.Which()) { + case CAuthor::TName::e_not_set: + return true; + + case CAuthor::TName::e_Name: + { { + const CName_std& nstd = name.GetName(); + // last name is required + if (!nstd.IsSetLast() || NStr::IsBlank(nstd.GetLast())) { + return true; + } + break; + }} + + case CAuthor::TName::e_Ml: + str = &(name.GetMl()); + break; + case CAuthor::TName::e_Str: + str = &(name.GetStr()); + break; + case CAuthor::TName::e_Consortium: + str = &(name.GetConsortium()); + break; + + default: + break; + }; + if (str != NULL && NStr::IsBlank(*str)) { + return true; + } + return false; + } +}; + + +bool CCleanup::CleanupAuthList(CAuth_list& al, bool fix_initials) +{ + bool rval = false; + + if (al.IsSetAffil()) { + rval |= CleanupAffil(al.SetAffil()); + if (IsEmpty(al.GetAffil())) { + al.ResetAffil(); + rval = true; + } + } + if (al.IsSetNames()) { + typedef CAuth_list::TNames TNames; + switch (al.GetNames().Which()) { + case TNames::e_Ml: + { { + if (ConvertAuthorContainerMlToStd(al)) { + rval = true; + } + }} + // !!!!!!!!!!!!!!!!!!!!!! + // !!!!!FALL-THROUGH!!!!! + // !!!!!!!!!!!!!!!!!!!!!! + // ( since we just converted the ml to an std, we need to do the + // std clean-up step ) + case TNames::e_Std: + { { + auto& alnames = al.SetNames(); + auto& std = alnames.SetStd(); + SAuthorClean cleaner{ rval, fix_initials }; + std::for_each(std.begin(), std.end(), cleaner); + rval |= cleaner.m_Changed; + size_t before = std.size(); + SAuthorEmpty em; + std.erase(std::remove_if(std.begin(), std.end(), em), std.end()); + if (std.size() != before) { + rval = true; + } + + if (std.empty()) { + ResetAuthorNames(alnames); + rval = true; + } + break; + }} + case TNames::e_Str: + { { + TNames& names = al.SetNames(); + for (auto& it : names.SetStr()) { + rval |= Asn2gnbkCompressSpaces(it); + } + + rval |= CleanVisStringContainer(names.SetStr()); + if (names.GetStr().empty()) { + ResetAuthorNames(names); + rval = true; + } + break; + }} + default: + break; + } + } + // if no remaining authors, put in default author for legal ASN.1 + if (!al.IsSetNames()) { + al.SetNames().SetStr().push_back("?"); + rval = true; + } + return rval; +} + + +// when we reset author names, we need to put in a place holder - otherwise the ASN.1 becomes invalid +void CCleanup::ResetAuthorNames(CAuth_list::TNames& names) +{ + names.Reset(); + list< string > &auth_list = names.SetStr(); + auth_list.clear(); + auth_list.push_back("?"); +} + + +static bool CleanAndCompressJunk(string& str) +{ + bool rval = false; + rval |= Asn2gnbkCompressSpaces(str); + rval |= CleanVisStringJunk(str); + return rval; +} + + +bool CCleanup::CleanupAffil(CAffil& af) +{ + bool rval = false; + switch (af.Which()) { + case CAffil::e_Str: + rval |= Asn2gnbkCompressSpaces(af.SetStr()); + rval |= CleanVisString(af.SetStr()); + break; + case CAffil::e_Std: + { { + CAffil::TStd& std = af.SetStd(); +#define CLEAN_AFFIL_MEMBER(x) \ + if (std.IsSet##x()) { \ + string& val = std.Set##x(); \ + rval |= CleanAndCompressJunk(val); \ + if (val.empty()) { \ + std.Reset##x(); \ + rval = true; \ + } \ + } + + CLEAN_AFFIL_MEMBER(Affil); + CLEAN_AFFIL_MEMBER(Div); + CLEAN_AFFIL_MEMBER(City); + CLEAN_AFFIL_MEMBER(Sub); + CLEAN_AFFIL_MEMBER(Country); + CLEAN_AFFIL_MEMBER(Street); + CLEAN_AFFIL_MEMBER(Email); + CLEAN_AFFIL_MEMBER(Fax); + CLEAN_AFFIL_MEMBER(Phone); + CLEAN_AFFIL_MEMBER(Postal_code); +#undef CLEAN_AFFIL_MEMBER + + if (std.IsSetCountry()) { + const string& country = std.GetCountry(); + if (NStr::EqualNocase(country, "U.S.A.")) { + std.SetCountry("USA"); + rval = true; + } else if (NStr::EqualNocase(country, "USA") && !NStr::EqualCase(country, "USA")) { + std.SetCountry("USA"); + rval = true; + } + } + + if (std.IsSetSub() && std.IsSetCountry()) { + if (NStr::EqualCase(std.GetCountry(), "USA")) { + string oldsub = std.GetSub(); + string newsub = NStr::Replace(oldsub, ".", ""); + if (!NStr::EqualNocase(oldsub, newsub)) { + std.SetSub(newsub); + rval = true; + } + } + } + break; + }} + default: + break; + } + return rval; +} + END_SCOPE(objects) diff --git a/c++/src/objtools/cleanup/cleanup_pub.cpp b/c++/src/objtools/cleanup/cleanup_pub.cpp new file mode 100644 index 00000000..55b9b3cd --- /dev/null +++ b/c++/src/objtools/cleanup/cleanup_pub.cpp @@ -0,0 +1,651 @@ +/* $Id: cleanup_pub.cpp 580152 2019-02-11 16:22:07Z ivanov $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Author: Colleen Bollin + * + * File Description: + * Code for cleaning up publications + * + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include "cleanup_utils.hpp" +#include + +BEGIN_NCBI_SCOPE +BEGIN_SCOPE(objects) + + +bool CCleanupPub::x_CleanPubdescComment(string& str) +{ + bool any_change = false; + if (CleanDoubleQuote(str)) { + any_change = true; + } + if (CleanVisString(str)) { + any_change = true; + } + return any_change; +} + +bool CCleanupPub::CleanPubdesc(CPubdesc& pubdesc, bool strip_serial) +{ + bool any_change = false; + if (pubdesc.IsSetComment()) { + string& comment = pubdesc.SetComment(); + any_change |= x_CleanPubdescComment(comment); + if (comment.empty()) { + pubdesc.ResetComment(); + any_change = true; + } + } + + if (pubdesc.IsSetPub()) { + CPubEquivCleaner cleaner(pubdesc.SetPub()); + bool fix_initials = CPubEquivCleaner::ShouldWeFixInitials(pubdesc.GetPub()); + if (cleaner.Clean(fix_initials, strip_serial)) { + any_change = true; + } + } + return any_change; +} + + +static size_t s_PubPriority(CPub::E_Choice val) +{ + size_t priority = 0; + switch (val) { + case CPub::e_not_set: + priority = 0; + break; + case CPub::e_Gen: + priority = 3; + break; + case CPub::e_Sub: + priority = 4; + break; + case CPub::e_Medline: + priority = 13; + break; + case CPub::e_Muid: + priority = 2; + break; + case CPub::e_Article: + priority = 5; + break; + case CPub::e_Journal: + priority = 6; + break; + case CPub::e_Book: + priority = 7; + break; + case CPub::e_Proc: + priority = 8; + break; + case CPub::e_Patent: + priority = 9; + break; + case CPub::e_Pat_id: + priority = 10; + break; + case CPub::e_Man: + priority = 11; + break; + case CPub::e_Equiv: + priority = 12; + break; + case CPub::e_Pmid: + priority = 1; + break; + } + return priority; +} + +inline +static +bool s_PubWhichCompare(CRef pub1, CRef pub2) { + size_t pr1 = s_PubPriority(pub1->Which()); + size_t pr2 = s_PubPriority(pub2->Which()); + return (pr1 < pr2); +} + + +struct SPMIDMatch { + const CPubMedId& m_ID; + + bool operator()(CRef< CArticleId > other_id) + { + return (other_id->IsPubmed() && other_id->GetPubmed() == m_ID); + } +}; + +void RemoveDuplicatePubMedArticleIds(CArticleIdSet::Tdata& id_set) +{ + auto it = id_set.begin(); + while (it != id_set.end()) { + while (it != id_set.end() && !(*it)->IsPubmed()) { + ++it; + } + if (it != id_set.end()) { + auto it2 = it; + ++it2; + SPMIDMatch matcher{ (*it)->GetPubmed() }; + id_set.erase(std::remove_if(it2, id_set.end(), matcher), id_set.end()); + ++it; + } + } + +} + +bool CPubEquivCleaner::Clean(bool fix_initials, bool strip_serial) +{ + bool change = false; + + if (!m_Equiv.IsSet()) { + return change; + } + + if (s_Flatten(m_Equiv)) { + change = true; + } + + // we keep the last of these because we might transfer one + // to the other as necessary to fill in gaps. + int last_pmid = 0; + int last_article_pubmed_id = 0; // the last from a journal + CRef last_article; + + auto& pe_set = m_Equiv.Set(); + + pe_set.sort(s_PubWhichCompare); + + auto it = pe_set.begin(); + while (it != pe_set.end()) { + CPub &pub = **it; + + CRef cleaner = PubCleanerFactory(pub); + if (cleaner) { + if (cleaner->Clean(fix_initials, strip_serial)) { + change = true; + } + if (cleaner->IsEmpty()) { + it = pe_set.erase(it); + continue; + } + } + + // storing these so at the end we'll know the last values + if (pub.IsPmid()) { + last_pmid = pub.GetPmid().Get(); + } + if (pub.IsArticle()) { + last_article.Reset(&pub.SetArticle()); + if (last_article->IsSetIds()) { + auto& ids = last_article->SetIds().Set(); + size_t old_size = ids.size(); + RemoveDuplicatePubMedArticleIds(last_article->SetIds()); + change = (ids.size() != old_size); + // find last article pubmed_id + auto id_it = ids.rbegin(); + while (id_it != ids.rend()) { + if ((*id_it)->IsPubmed()) { + last_article_pubmed_id = (*id_it)->GetPubmed(); + break; + } + ++id_it; + } + } + } + ++it; + } + + // Now, we might have to transfer data to fill in missing information + if (last_pmid == 0 && last_article_pubmed_id > 0) { + CRef new_pub(new CPub); + new_pub->SetPmid().Set(last_article_pubmed_id); + m_Equiv.Set().insert(m_Equiv.Set().begin(), new_pub); + change = true; + } + else if (last_pmid > 0 && last_article_pubmed_id == 0 && last_article) { + CRef new_article_id(new CArticleId); + new_article_id->SetPubmed().Set(last_pmid); + last_article->SetIds().Set().push_back(new_article_id); + change = true; + } + return change; +} + + +bool CPubEquivCleaner::IsEmpty() +{ + return !m_Equiv.IsSet() || m_Equiv.Get().empty(); +} + +bool CPubEquivCleaner::ShouldWeFixInitials(const CPub_equiv& equiv) +{ + if (!equiv.IsSet()) { + return false; + } + bool has_id = false, + has_art = false; + + for (auto it : equiv.Get()) { + if ((it->IsPmid() && it->GetPmid() > 0) || + (it->IsMuid() && it->GetMuid() > 0)) { + has_id = true; + } + else if (it->IsArticle()) { + has_art = true; + } + } + return !(has_art && has_id); +} + + +bool CPubEquivCleaner::s_Flatten(CPub_equiv& pub_equiv) +{ + bool any_change = false; + CPub_equiv::Tdata& data = pub_equiv.Set(); + + auto it = data.begin(); + while (it != data.end()) { + if ((*it)->IsEquiv()) { + CPub_equiv& sub_equiv = (*it)->SetEquiv(); + s_Flatten(sub_equiv); + copy(sub_equiv.Set().begin(), sub_equiv.Set().end(), back_inserter(data)); + it = data.erase(it); + any_change = true; + } + else { + ++it; + } + } + return any_change; +} + + + + + + + +CRef PubCleanerFactory(CPub& pub) +{ + switch (pub.Which()) { + case CPub::e_Gen: + return CRef(new CCitGenCleaner(pub.SetGen())); + break; + case CPub::e_Equiv: + return CRef(new CPubEquivCleaner(pub.SetEquiv())); + break; + case CPub::e_Sub: + return CRef(new CCitSubCleaner(pub.SetSub())); + break; + case CPub::e_Article: + return CRef(new CCitArtCleaner(pub.SetArticle())); + break; + case CPub::e_Journal: + return CRef(new CCitJourCleaner(pub.SetJournal())); + break; + case CPub::e_Book: + return CRef(new CCitBookCleaner(pub.SetBook())); + break; + case CPub::e_Proc: + return CRef(new CCitProcCleaner(pub.SetProc())); + break; + case CPub::e_Patent: + return CRef(new CCitPatCleaner(pub.SetPatent())); + break; + case CPub::e_Man: + return CRef(new CCitLetCleaner(pub.SetMan())); + break; + case CPub::e_Medline: + return CRef(new CMedlineEntryCleaner(pub.SetMedline())); + break; + default: + return CRef(NULL); + } +} + + +bool CCitGenCleaner::Clean(bool fix_initials, bool strip_serial) +{ + bool rval = false; + if (m_Gen.IsSetAuthors()) { + if (CCleanup::CleanupAuthList(m_Gen.SetAuthors(), fix_initials)) { + rval = true; + } + } + if (m_Gen.IsSetCit()) { + CCit_gen::TCit& cit = m_Gen.SetCit(); + if (NStr::StartsWith(cit, "unpublished", NStr::eNocase) && cit[0] != 'U') { + cit[0] = 'U'; + rval = true; + } + if (!m_Gen.IsSetJournal() + && (m_Gen.IsSetVolume() || m_Gen.IsSetPages() || m_Gen.IsSetIssue())) + { + m_Gen.ResetVolume(); + m_Gen.ResetPages(); + m_Gen.ResetIssue(); + rval = true; + } + const size_t old_cit_size = cit.size(); + NStr::TruncateSpacesInPlace(cit); + if (old_cit_size != cit.size()) { + rval = true; + } + } + if (m_Gen.IsSetPages()) { + if (RemoveSpaces(m_Gen.SetPages())) { + rval = true; + } + } + + // title strstripspaces (see 8728 in sqnutil1.c, Mar 11, 2011) + if (m_Gen.IsSetTitle() && StripSpaces(m_Gen.SetTitle())) { + rval = true; + } + + if (strip_serial && m_Gen.IsSetSerial_number()) { + m_Gen.ResetSerial_number(); + rval = true; + } + + // erase if the Cit-gen is now entirely blank + return rval; +} + + +bool CCitGenCleaner::IsEmpty() +{ + return (!m_Gen.IsSetCit()) && + !m_Gen.IsSetAuthors() && + (!m_Gen.IsSetMuid() || m_Gen.GetMuid() <= 0) && + !m_Gen.IsSetJournal() && + (!m_Gen.IsSetVolume() || m_Gen.GetVolume().empty()) && + (!m_Gen.IsSetIssue() || m_Gen.GetIssue().empty()) && + (!m_Gen.IsSetPages() || m_Gen.GetPages().empty()) && + !m_Gen.IsSetDate() && + (!m_Gen.IsSetSerial_number() || m_Gen.GetSerial_number() <= 0) && + (!m_Gen.IsSetTitle() || m_Gen.GetTitle().empty()) && + (!m_Gen.IsSetPmid() || m_Gen.GetPmid() <= 0); +} + + +bool CCitSubCleaner::Clean(bool fix_initials, bool strip_serial) +{ + bool any_change = false; + + if (m_Sub.IsSetAuthors()) { + auto& authors = m_Sub.SetAuthors(); + if (CCleanup::CleanupAuthList(authors, fix_initials)) { + any_change = true; + } + if (!authors.IsSetAffil() && m_Sub.IsSetImp()) { + auto& imp = m_Sub.SetImp(); + if (imp.IsSetPub()) { + authors.SetAffil(imp.SetPub()); + imp.ResetPub(); + any_change = true; + } + } + if (authors.IsSetAffil()) { + auto& affil = authors.SetAffil(); + if (affil.IsStr()) { + string &str = affil.SetStr(); + static const string& kBadAffil1 = "to the DDBJ/EMBL/GenBank databases"; + static const string& kBadAffil2 = "to the INSDC databases"; + if (NStr::StartsWith(str, kBadAffil1)) { + str = str.substr(kBadAffil1.length()); + NStr::TrimPrefixInPlace(str, "."); + any_change = true; + } + if (NStr::StartsWith(str, kBadAffil2)) { + str = str.substr(kBadAffil2.length()); + NStr::TrimPrefixInPlace(str, "."); + any_change = true; + } + + if (CCleanup::CleanupAffil(affil)) { + any_change = true; + } + if (CCleanup::IsEmpty(affil)) { + authors.ResetAffil(); + any_change = true; + } + } + + } + } + if (m_Sub.IsSetImp() && !m_Sub.IsSetDate()) { + auto& imp = m_Sub.SetImp(); + if (imp.IsSetDate()) { + m_Sub.SetDate().Assign(imp.GetDate()); + m_Sub.ResetImp(); + } + any_change = true; + } + + return any_change; +} + + +bool CCitSubCleaner::IsEmpty() +{ + return false; +} + + +bool CCitArtCleaner::Clean(bool fix_initials, bool strip_serial) +{ + bool change = false; + if (m_Art.IsSetAuthors()) { + if (CCleanup::CleanupAuthList(m_Art.SetAuthors(), fix_initials)) { + change = true; + } + } + if (m_Art.IsSetFrom()) { + auto& from = m_Art.SetFrom(); + if (from.IsBook()) { + CCitBookCleaner cleaner(from.SetBook()); + change |= cleaner.Clean(fix_initials, strip_serial); + } else if (from.IsProc()) { + CCitProcCleaner cleaner(from.SetProc()); + change |= cleaner.Clean(fix_initials, strip_serial); + } else if (from.IsJournal()) { + CCitJourCleaner cleaner(from.SetJournal()); + change |= cleaner.Clean(fix_initials, strip_serial); + } + } + + return change; +} + + +bool CCitBookCleaner::Clean(bool fix_initials, bool strip_serial) +{ + bool change = false; + if (m_Book.IsSetAuthors() && CCleanup::CleanupAuthList(m_Book.SetAuthors(), fix_initials)) { + change = true; + } + if (m_Book.IsSetImp() && CleanImprint(m_Book.SetImp(), eImprintBC_ForbidStatusChange)) { + change = true; + } + + return change; +} + + +bool CCitJourCleaner::Clean(bool fix_initials, bool strip_serial) +{ + bool change = false; + if (m_Jour.IsSetImp()) { + change |= CleanImprint(m_Jour.SetImp(), eImprintBC_AllowStatusChange); + } + + return change; +} + + +bool CCitProcCleaner::Clean(bool fix_initials, bool strip_serial) +{ + bool change = false; + if (m_Proc.IsSetBook()) { + CCitBookCleaner cleaner(m_Proc.SetBook()); + change = cleaner.Clean(fix_initials, strip_serial); + } + return change; +} + + +bool CPubCleaner::CleanImprint(CImprint& imprint, EImprintBC is_status_change_allowed) +{ + bool any_change = false; + if (is_status_change_allowed == eImprintBC_AllowStatusChange) { + if (imprint.IsSetPubstatus()) { + auto pubstatus = imprint.GetPubstatus(); + switch (pubstatus) { + case ePubStatus_aheadofprint: + if (!imprint.IsSetPrepub() || imprint.GetPrepub() != CImprint::ePrepub_in_press) + { + if (!imprint.IsSetVolume() || NStr::IsBlank(imprint.GetVolume()) + || !imprint.IsSetPages() || NStr::IsBlank(imprint.GetPages())) { + imprint.SetPrepub(CImprint::ePrepub_in_press); + any_change = true; + } + } + else if (imprint.IsSetVolume() && !NStr::IsBlank(imprint.GetVolume()) + && imprint.IsSetPages() && !NStr::IsBlank(imprint.GetPages())) { + imprint.ResetPrepub(); + any_change = true; + } + break; + case ePubStatus_epublish: + if (imprint.IsSetPrepub() && imprint.GetPrepub() == CImprint::ePrepub_in_press) { + imprint.ResetPrepub(); + any_change = true; + } + break; + default: + break; + } + } + } +#define FIX_IMPRINT_FIELD(x) \ + if (imprint.IsSet##x()) { \ + string& str = imprint.Set##x(); \ + const size_t old_len = str.length(); \ + Asn2gnbkCompressSpaces(str); \ + CleanVisString(str); \ + if( old_len != str.length() ) { \ + any_change = true; \ + } \ + if (NStr::IsBlank(str)) { \ + imprint.Reset##x(); \ + any_change = true; \ + } \ + } + + FIX_IMPRINT_FIELD(Volume); + FIX_IMPRINT_FIELD(Issue); + FIX_IMPRINT_FIELD(Pages); + FIX_IMPRINT_FIELD(Section); + FIX_IMPRINT_FIELD(Part_sup); + FIX_IMPRINT_FIELD(Language); + FIX_IMPRINT_FIELD(Part_supi); +#undef FIX_IMPRINT_FIELD + return any_change; +} + + +bool CCitPatCleaner::Clean(bool fix_initials, bool strip_serial) +{ + bool change = false; + if (m_Pat.IsSetAuthors() && CCleanup::CleanupAuthList(m_Pat.SetAuthors(), fix_initials)) { + change = true; + } + if (m_Pat.IsSetApplicants() && CCleanup::CleanupAuthList(m_Pat.SetApplicants(), fix_initials)) { + change = true; + } + if (m_Pat.IsSetAssignees() && CCleanup::CleanupAuthList(m_Pat.SetAssignees(), fix_initials)) { + change = true; + } + + if (m_Pat.IsSetCountry()) { + if (NStr::Equal(m_Pat.GetCountry(), "USA")) { + m_Pat.SetCountry("US"); + change = true; + } + } + + return change; +} + + +bool CCitLetCleaner::Clean(bool fix_initials, bool strip_serial) +{ + bool change = false; + if (m_Let.IsSetCit() && m_Let.IsSetType() && m_Let.GetType() == CCit_let::eType_thesis) { + CCitBookCleaner cleaner(m_Let.SetCit()); + if (cleaner.Clean(fix_initials, strip_serial)) { + change = true; + } + } + + return change; +} + + +bool CMedlineEntryCleaner::Clean(bool fix_initials, bool strip_serial) +{ + bool change = false; + if (m_Men.IsSetCit() && m_Men.GetCit().IsSetAuthors()) { + change = CCleanup::CleanupAuthList(m_Men.SetCit().SetAuthors(), fix_initials); + } + + return change; +} + + +END_SCOPE(objects) +END_NCBI_SCOPE diff --git a/c++/src/objtools/cleanup/cleanup_utils.cpp b/c++/src/objtools/cleanup/cleanup_utils.cpp index 85694c41..7c4a016a 100644 --- a/c++/src/objtools/cleanup/cleanup_utils.cpp +++ b/c++/src/objtools/cleanup/cleanup_utils.cpp @@ -1,4 +1,4 @@ -/* $Id: cleanup_utils.cpp 574795 2018-11-19 16:47:08Z ivanov $ +/* $Id: cleanup_utils.cpp 580124 2019-02-11 16:09:06Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/src/objtools/cleanup/cleanup_utils.hpp b/c++/src/objtools/cleanup/cleanup_utils.hpp index a8b5d440..c2e1f195 100644 --- a/c++/src/objtools/cleanup/cleanup_utils.hpp +++ b/c++/src/objtools/cleanup/cleanup_utils.hpp @@ -1,7 +1,7 @@ #ifndef OBJECTS_GENERAL___CLEANUP_UTILS__HPP #define OBJECTS_GENERAL___CLEANUP_UTILS__HPP -/* $Id: cleanup_utils.hpp 574795 2018-11-19 16:47:08Z ivanov $ +/* $Id: cleanup_utils.hpp 580124 2019-02-11 16:09:06Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/src/objtools/cleanup/fix_feature_id.cpp b/c++/src/objtools/cleanup/fix_feature_id.cpp new file mode 100644 index 00000000..533aeaf9 --- /dev/null +++ b/c++/src/objtools/cleanup/fix_feature_id.cpp @@ -0,0 +1,190 @@ +/* $Id: fix_feature_id.cpp 578199 2019-01-15 16:38:57Z ivanov $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Authors: Igor Filippov + */ + + +#include +#include +#include +#include +#include + +BEGIN_NCBI_SCOPE +USING_SCOPE(objects); + + +CObject_id::TId CFixFeatureId::s_FindHighestFeatureId(const CSeq_entry_Handle& entry) +{ + CObject_id::TId feat_id = 0; + for (CFeat_CI feat_it(entry); feat_it; ++feat_it) { + if (feat_it->IsSetId()) { + const CFeat_id &id = feat_it->GetId(); + if (id.IsLocal() && id.GetLocal().IsId() && id.GetLocal().GetId() > feat_id) { + feat_id = id.GetLocal().GetId(); + } + } + } + return feat_id; +} + +void CFixFeatureId::s_UpdateFeatureIds(const CSeq_entry_Handle& entry, map > &changed_feats, int offset) +{ + for ( CFeat_CI feat_it(entry); feat_it; ++feat_it ) + { + bool modified = false; + CRef edited; + CSeq_feat_Handle fh = feat_it->GetSeq_feat_Handle(); + if (changed_feats.find(fh) != changed_feats.end()) + { + edited = changed_feats[fh]; + } + else + { + edited.Reset(new CSeq_feat); + edited->Assign(feat_it->GetOriginalFeature()); + } + + if (edited->IsSetId() && edited->GetId().IsLocal() && edited->GetId().GetLocal().IsId()) + { + int id = edited->GetId().GetLocal().GetId(); + edited->SetId().SetLocal().SetId(id+offset); + modified = true; + } + if (edited->IsSetXref()) + { + CSeq_feat::TXref::iterator xref_it = edited->SetXref().begin(); + while ( xref_it != edited->SetXref().end() ) + { + if ((*xref_it)-> IsSetId() && (*xref_it)->GetId().IsLocal() && (*xref_it)->GetId().GetLocal().IsId()) + { + int id = (*xref_it)->GetId().GetLocal().GetId(); + (*xref_it)->SetId().SetLocal().SetId(id+offset); + modified = true; + } + ++xref_it; + } + } + if (modified) + { + changed_feats[fh] = edited; + } + } +} + + +void CFixFeatureId::s_ApplyToSeqInSet(CSeq_entry_Handle tse, map > &changed_feats) +{ + int offset = 0; + if (tse && tse.IsSet() && tse.GetSet().IsSetClass() && tse.GetSet().GetClass() == CBioseq_set::eClass_genbank) + { + for(CSeq_entry_CI direct_child_ci( tse.GetSet(), CSeq_entry_CI::eNonRecursive ); direct_child_ci; ++direct_child_ci ) + { + const CSeq_entry_Handle& entry = *direct_child_ci; + CObject_id::TId top_id = s_FindHighestFeatureId(entry); + s_UpdateFeatureIds(entry, changed_feats, offset); + offset += top_id; + } + } +} + +// This function maps existing feature ids to the sequential ints - 1,2,3,... +void CFixFeatureId::s_MakeIDPairs(const CSeq_entry_Handle& entry, map &id_pairs) +{ + int feat_id = 0; + for (CFeat_CI feat_it(entry); feat_it; ++feat_it) { + if (feat_it->IsSetId()) { + const CFeat_id &id = feat_it->GetId(); + if (id.IsLocal() && id.GetLocal().IsId() && id_pairs.find(id.GetLocal().GetId()) == id_pairs.end()) { + id_pairs[id.GetLocal().GetId()] = ++feat_id; + } + } + } +} + +// Create a map from the existing feature ids to the sequential ints 1,2,3... +// and prepare a map from feature handles to the modified features with the reassigned ids both in the feature id and in the xrefs +void CFixFeatureId::s_ReassignFeatureIds(const CSeq_entry_Handle& entry, map > &changed_feats) +{ + if (!entry) + return; + map id_pairs; + CFixFeatureId::s_MakeIDPairs(entry, id_pairs); + + for ( CFeat_CI feat_it(entry); feat_it; ++feat_it ) + { + bool modified = false; + CRef edited; + CSeq_feat_Handle fh = feat_it->GetSeq_feat_Handle(); + if (changed_feats.find(fh) != changed_feats.end()) + { + edited = changed_feats[fh]; + } + else + { + edited.Reset(new CSeq_feat); + edited->Assign(feat_it->GetOriginalFeature()); + } + + if (edited->IsSetId() && edited->GetId().IsLocal() && edited->GetId().GetLocal().IsId()) + { + int id = id_pairs[edited->GetId().GetLocal().GetId()]; + edited->SetId().SetLocal().SetId(id); + modified = true; + } + if (edited->IsSetXref()) + { + CSeq_feat::TXref::iterator xref_it = edited->SetXref().begin(); + while ( xref_it != edited->SetXref().end() ) + { + if ((*xref_it)-> IsSetId() && (*xref_it)->GetId().IsLocal() && (*xref_it)->GetId().GetLocal().IsId()) + { + modified = true; + if (id_pairs.find((*xref_it)->GetId().GetLocal().GetId()) != id_pairs.end()) + { + int id = id_pairs[(*xref_it)->GetId().GetLocal().GetId()]; + (*xref_it)->SetId().SetLocal().SetId(id); + } + else + { + (*xref_it)->ResetId(); + xref_it = edited->SetXref().erase(xref_it); + continue; + } + } + ++xref_it; + } + if (edited->SetXref().empty()) + edited->ResetXref(); + } + if (modified) + { + changed_feats[fh] = edited; + } + } +} + +END_NCBI_SCOPE diff --git a/c++/src/objtools/cleanup/newcleanupp.cpp b/c++/src/objtools/cleanup/newcleanupp.cpp index 2251f27f..176411d0 100644 --- a/c++/src/objtools/cleanup/newcleanupp.cpp +++ b/c++/src/objtools/cleanup/newcleanupp.cpp @@ -54,6 +54,9 @@ #include "cleanup_utils.hpp" +#include +#include + #include #include #include @@ -68,6 +71,8 @@ #include #include +#include + #include "autogenerated_cleanup.hpp" #include "autogenerated_extended_cleanup.hpp" @@ -607,33 +612,7 @@ void CNewCleanup_imp::LeavingEntry ( // with one space. Strips all spaces after '(' and before ( ')' or ',' ). void CNewCleanup_imp::x_StripSpacesMarkChanged(string& str) { - if (str.empty()) { - return; - } - - const string::size_type old_size = str.length(); - - string::iterator end = str.end(); - string::iterator it = str.begin(); - string::iterator new_str = it; - while (it != end) { - *new_str++ = *it; - if ( (*it == ' ') || (*it == '\t') || (*it == '(') ) { - for (++it; (it != end) && (*it == ' ' || *it == '\t'); ++it) continue; - if ((it != end) && (*it == ')' || *it == ',') ) { - // this "if" protects against the case "(...bunch of spaces and tabs...)". - // Otherwise, the first '(' is unintentionally erased - if( *(new_str - 1) != '(' ) { - --new_str; - } - } - } else { - ++it; - } - } - str.erase(new_str, str.end()); - - if( str.length() != old_size ) { + if (StripSpaces(str)) { ChangeMade(CCleanupChange::eTrimSpaces); } } @@ -762,9 +741,11 @@ static CMolInfo::TCompleteness GetCompletenessFromFlags(bool partial5, bool part void CNewCleanup_imp::ProtSeqBC (CBioseq& bs) { // Bail if not protein - if( ! FIELD_CHAIN_OF_2_IS_SET(bs, Inst, Mol) || - bs.GetInst().GetMol() != NCBI_SEQMOL(aa) ) - { + if (!bs.IsSetInst()) { + return; + } + CSeq_inst& inst = bs.SetInst(); + if (!inst.IsSetMol() || inst.GetMol() != CSeq_inst::eMol_aa) { return; } @@ -1116,17 +1097,9 @@ void CNewCleanup_imp::GBblockBC ( // split keywords at semicolons if (gbk.IsSetKeywords()) { - CGB_block::TKeywords::iterator it = gbk.SetKeywords().begin(); - while (it != gbk.SetKeywords().end()) { - vector tokens; - NStr::Split(*it, ";", tokens, NStr::fSplit_Tokenize); - if (tokens.size() > 1) { - it = gbk.SetKeywords().erase(it); - gbk.SetKeywords().insert(it, tokens.begin(), tokens.end()); - } else { - ++it; - } - } + string one_string = NStr::Join(gbk.GetKeywords(), ";"); + gbk.ResetKeywords(); + NStr::Split(one_string, ";", gbk.SetKeywords()); } CLEAN_STRING_LIST (gbk, Keywords); @@ -1360,23 +1333,29 @@ void CNewCleanup_imp::BiosourceFeatBC ( { // consolidate all orgmods of subtype "other" into one CRef pFirstOtherOrgMod; - EDIT_EACH_ORGMOD_ON_BIOSOURCE(orgmod_it, biosrc) { - COrgMod & orgmod = **orgmod_it; - - // we're only cleaning the ones of type "other" - if( ! FIELD_EQUALS(orgmod, Subtype, NCBI_ORGMOD(other)) || - ! FIELD_IS_SET(orgmod, Subname) ) - { - continue; - } + if (biosrc.IsSetOrg() && biosrc.GetOrg().IsSetOrgname() && biosrc.GetOrg().GetOrgname().IsSetMod()) { + auto& mod_set = biosrc.SetOrg().SetOrgname().SetMod(); + auto mod_it = mod_set.begin(); + while (mod_it != mod_set.end()) { + COrgMod & orgmod = **mod_it; + + // we're only cleaning the ones of type "other" + if (!FIELD_EQUALS(orgmod, Subtype, NCBI_ORGMOD(other)) || + !FIELD_IS_SET(orgmod, Subname)) + { + ++mod_it; + continue; + } - if( pFirstOtherOrgMod ) { - STRING_FIELD_APPEND(*pFirstOtherOrgMod, Subname, "; ", GET_STRING_FLD_OR_BLANK(orgmod, Subname) ); - ChangeMade(CCleanupChange::eChangeOrgmod); - ERASE_ORGMOD_ON_BIOSOURCE(orgmod_it, biosrc); - ChangeMade(CCleanupChange::eRemoveOrgmod); - } else { - pFirstOtherOrgMod.Reset( &orgmod ); + if (pFirstOtherOrgMod) { + STRING_FIELD_APPEND(*pFirstOtherOrgMod, Subname, "; ", GET_STRING_FLD_OR_BLANK(orgmod, Subname)); + ChangeMade(CCleanupChange::eChangeOrgmod); + mod_it = mod_set.erase(mod_it); + ChangeMade(CCleanupChange::eRemoveOrgmod); + } else { + pFirstOtherOrgMod.Reset(&orgmod); + ++mod_it; + } } } @@ -1707,18 +1686,24 @@ void CNewCleanup_imp::BiosourceBC ( } // correct specific cases of inconsistently applied tildes - EDIT_EACH_ORGMOD_ON_BIOSOURCE(orgmod_it, biosrc) { - COrgMod & orgmod = **orgmod_it; + if (biosrc.IsSetOrg() && biosrc.GetOrg().IsSetOrgname()) { + auto& orgname = biosrc.SetOrg().SetOrgname(); + if (orgname.IsSetMod()) { + auto& mod_set = orgname.SetMod(); + for (auto& orgmod_it : mod_set) { + COrgMod & orgmod = *orgmod_it; + + // we're only correcting tildes for the ones of type "other" + if (!FIELD_EQUALS(orgmod, Subtype, NCBI_ORGMOD(other)) || + !FIELD_IS_SET(orgmod, Subname)) + { + continue; + } - // we're only correcting tildes for the ones of type "other" - if( ! FIELD_EQUALS(orgmod, Subtype, NCBI_ORGMOD(other)) || - ! FIELD_IS_SET(orgmod, Subname) ) - { - continue; + string &subname = GET_MUTABLE(orgmod, Subname); + s_CorrectTildes(subname); + } } - - string &subname = GET_MUTABLE (orgmod, Subname); - s_CorrectTildes(subname); } EDIT_EACH_SUBSOURCE_ON_BIOSOURCE( subsrc_iter, biosrc ) { @@ -2295,620 +2280,19 @@ void CNewCleanup_imp::PubdescBC ( CPubdesc& pubdesc ) { - if ( FIELD_IS_SET(pubdesc, Comment)) { - x_ConvertDoubleQuotesMarkChanged( GET_MUTABLE(pubdesc, Comment) ); - } - - CLEAN_STRING_MEMBER(pubdesc, Comment); - - if ( FIELD_IS_SET(pubdesc, Pub) ) { - PubEquivBC( GET_MUTABLE(pubdesc, Pub) ); - } -} - -static bool s_ShouldWeFixInitials(const CPub_equiv& equiv) -{ - bool has_id = false, - has_art = false; - - FOR_EACH_PUB_ON_PUBEQUIV(pub_iter, equiv) { - if ( ( (*pub_iter)->IsPmid() && (*pub_iter)->GetPmid() > 0 ) || - ( (*pub_iter)->IsMuid() && (*pub_iter)->GetMuid() > 0 ) ) { - has_id = true; - } else if ((*pub_iter)->IsArticle()) { - has_art = true; - } - } - return !(has_art && has_id); -} - -static size_t s_PubPriority( CPub::E_Choice val) -{ - size_t priority = 0; - switch (val) { - case CPub::e_not_set: - priority = 0; - break; - case CPub::e_Gen: - priority = 3; - break; - case CPub::e_Sub: - priority = 4; - break; - case CPub::e_Medline: - priority = 13; - break; - case CPub::e_Muid: - priority = 2; - break; - case CPub::e_Article: - priority = 5; - break; - case CPub::e_Journal: - priority = 6; - break; - case CPub::e_Book: - priority = 7; - break; - case CPub::e_Proc: - priority = 8; - break; - case CPub::e_Patent: - priority = 9; - break; - case CPub::e_Pat_id: - priority = 10; - break; - case CPub::e_Man: - priority = 11; - break; - case CPub::e_Equiv: - priority = 12; - break; - case CPub::e_Pmid: - priority = 1; - break; - } - return priority; -} - -inline -static -bool s_PubWhichCompare( CRef pub1, CRef pub2 ) { - size_t pr1 = s_PubPriority(pub1->Which()); - size_t pr2 = s_PubPriority(pub2->Which()); - return (pr1 < pr2); -} - -void CNewCleanup_imp::PubEquivBC (CPub_equiv& pub_equiv) -{ - x_FlattenPubEquiv(pub_equiv); - - // we keep the last of these because we might transfer one - // to the other as necessary to fill in gaps. - int last_pmid = 0; - int last_article_pubmed_id = 0; // the last from a journal - CRef last_article; - - bool fix_initials = s_ShouldWeFixInitials(pub_equiv); - - pub_equiv.Set().sort(s_PubWhichCompare); - - EDIT_EACH_PUB_ON_PUBEQUIV(it, pub_equiv) { - CPub &pub = **it; - - if( PubBC(pub, fix_initials) == eAction_Erase ) { - ERASE_PUB_ON_PUBEQUIV(it, pub_equiv); - ChangeMade(CCleanupChange::eRemoveEmptyPub); - continue; - } - - // storing these so at the end we'll know the last values - if( pub.IsPmid() ) { - last_pmid = pub.GetPmid().Get(); - } - if( pub.IsArticle() ) { - last_article.Reset( &pub.SetArticle()); - if (last_article->IsSetIds()) { - CArticleIdSet::Tdata::iterator id_it = last_article->SetIds().Set().begin(); - while (id_it != last_article->SetIds().Set().end()) { - if ((*id_it)->IsPubmed() && last_article_pubmed_id != 0 && - last_article_pubmed_id == (*id_it)->GetPubmed()) { - // erase duplicate - id_it = last_article->SetIds().Set().erase(id_it); - ChangeMade(CCleanupChange::eChangePublication); - } else { - if ((*id_it)->IsPubmed()) { - last_article_pubmed_id = (*id_it)->GetPubmed(); - } - ++id_it; - } - } - } - } - } - - // Now, we might have to transfer data to fill in missing information - if (last_pmid == 0 && last_article_pubmed_id > 0) { - CRef new_pub( new CPub ); - new_pub->SetPmid().Set( last_article_pubmed_id ); - pub_equiv.Set().insert(pub_equiv.Set().begin(), new_pub); - ChangeMade(CCleanupChange::eChangePublication); - } else if (last_pmid > 0 && last_article_pubmed_id == 0 && last_article ) { - CRef new_article_id(new CArticleId); - new_article_id->SetPubmed().Set(last_pmid); - last_article->SetIds().Set().push_back(new_article_id); + if (CCleanupPub::CleanPubdesc(pubdesc, m_StripSerial)) { ChangeMade(CCleanupChange::eChangePublication); } - - -} - -CNewCleanup_imp::EAction CNewCleanup_imp::PubBC(CPub& pub, bool fix_initials) -{ - EAction action = eAction_Nothing; - -#define PUBBC_CASE(cit_type, func) \ - case NCBI_PUB(cit_type): \ - action = func( GET_MUTABLE(pub, cit_type), fix_initials); \ - break; - - switch (pub.Which()) { - PUBBC_CASE(Gen, CitGenBC) - PUBBC_CASE(Sub, CitSubBC) - PUBBC_CASE(Article, CitArtBC) - PUBBC_CASE(Book, CitBookBC) - PUBBC_CASE(Patent, CitPatBC) - PUBBC_CASE(Man, CitLetBC) - PUBBC_CASE(Medline, MedlineEntryBC) - default: - action = eAction_Nothing; - } -#undef PUBBC_CASE - - string new_label; - pub.GetLabel( &new_label, CPub::eContent, true); - m_PubToNewPubLabelMap[CRef(&pub)] = new_label; - - return action; -} - -static bool s_IsEmpty(const CAuth_list::TAffil& affil) -{ - if ( FIELD_IS(affil, Str) ) { - return NStr::IsBlank( GET_FIELD(affil, Str) ); - } else if ( FIELD_IS(affil, Std) ) { - const CAuth_list::TAffil::TStd& std = GET_FIELD(affil, Std); - return !(std.IsSetAffil() || std.IsSetDiv() || std.IsSetCity() || - std.IsSetSub() || std.IsSetCountry() || std.IsSetStreet() || - std.IsSetEmail() || std.IsSetFax() || std.IsSetPhone() || - std.IsSetPostal_code()); - } - return true; -} - -static -bool s_IsEmpty( const CCit_gen &cg ) -{ - return ( ! FIELD_IS_SET(cg, Cit) ) && - ! FIELD_IS_SET(cg, Authors) && - ( ! FIELD_IS_SET(cg, Muid) || GET_FIELD(cg, Muid) <= 0 ) && - ! FIELD_IS_SET(cg, Journal) && - ( ! FIELD_IS_SET(cg, Volume) || GET_FIELD(cg, Volume).empty() ) && - ( ! FIELD_IS_SET(cg, Issue) || GET_FIELD(cg, Issue).empty() ) && - ( ! FIELD_IS_SET(cg, Pages) || GET_FIELD(cg, Pages).empty() ) && - ! FIELD_IS_SET(cg, Date) && - ( ! FIELD_IS_SET(cg, Serial_number) || GET_FIELD(cg, Serial_number) <= 0 ) && - ( ! FIELD_IS_SET(cg, Title) || GET_FIELD(cg, Title).empty() ) && - ( ! FIELD_IS_SET(cg, Pmid) || GET_FIELD(cg, Pmid) <= 0 ); -} - -CNewCleanup_imp::EAction CNewCleanup_imp::CitGenBC(CCit_gen& cg, bool fix_initials) -{ - if( FIELD_IS_SET(cg, Authors) ) { - AuthListBC( GET_MUTABLE(cg, Authors), fix_initials ); - } - if ( FIELD_IS_SET(cg, Cit) ) { - CCit_gen::TCit& cit = GET_MUTABLE( cg, Cit ); - if (NStr::StartsWith(cit, "unpublished", NStr::eNocase) && cit[0] != 'U' ) { - cit[0] = 'U'; - ChangeMade(CCleanupChange::eChangePublication); - } - if (! FIELD_IS_SET(cg, Journal) - && ( FIELD_IS_SET(cg, Volume) || FIELD_IS_SET(cg, Pages) || FIELD_IS_SET(cg, Issue))) - { - RESET_FIELD(cg, Volume); - RESET_FIELD(cg, Pages); - RESET_FIELD(cg, Issue); - ChangeMade(CCleanupChange::eChangePublication); - } - const size_t old_cit_size = cit.size(); - NStr::TruncateSpacesInPlace(cit); - if (old_cit_size != cit.size()) { - ChangeMade(CCleanupChange::eChangePublication); - } - } - if ( FIELD_IS_SET(cg, Pages) ) { - if (RemoveSpaces( GET_MUTABLE(cg, Pages) ) ) { - ChangeMade(CCleanupChange::eChangePublication); + // need to construct m_PubToNewPubLabelMap separately + if (pubdesc.IsSetPub()) { + for (auto p : pubdesc.SetPub().Set()) { + string new_label; + p->GetLabel(&new_label, CPub::eContent, true); + m_PubToNewPubLabelMap[p] = new_label; } } - - // title strstripspaces (see 8728 in sqnutil1.c, Mar 11, 2011) - if( FIELD_IS_SET(cg, Title) ) { - x_StripSpacesMarkChanged( GET_MUTABLE(cg, Title) ); - } - - if( m_StripSerial && cg.IsSetSerial_number() ) { - RESET_FIELD( cg, Serial_number ); - ChangeMade(CCleanupChange::eStripSerial); - } - - // erase if the Cit-gen is now entirely blank - return ( s_IsEmpty(cg) ? eAction_Erase : eAction_Nothing ); } -CNewCleanup_imp::EAction CNewCleanup_imp::CitSubBC(CCit_sub& citsub, bool fix_initials) -{ - CRef authors; - if ( FIELD_IS_SET(citsub, Authors) ) { - authors.Reset(& GET_MUTABLE(citsub, Authors) ); - AuthListBC( *authors, fix_initials); - } - - if ( FIELD_IS_SET(citsub, Imp) ) { - CCit_sub::TImp& imp = GET_MUTABLE(citsub, Imp); - if (authors && ! FIELD_IS_SET(*authors, Affil) && FIELD_IS_SET(imp, Pub) ) { - SET_FIELD(*authors, Affil, GET_MUTABLE(imp, Pub) ); - RESET_FIELD(imp, Pub); - ChangeMade(CCleanupChange::eChangePublication); - } - if (! FIELD_IS_SET(citsub, Date) && FIELD_IS_SET(imp, Date) ) { - GET_MUTABLE(citsub, Date).Assign( GET_FIELD(imp, Date) ); - RESET_FIELD(citsub, Imp); - ChangeMade(CCleanupChange::eChangePublication); - } - } - if (authors && FIELD_IS_SET(*authors, Affil) ) { - CCit_sub::TAuthors::TAffil& affil = GET_MUTABLE(*authors, Affil); - if ( FIELD_IS(affil, Str) ) { - string str = GET_MUTABLE(affil, Str); - if (NStr::StartsWith(str, "to the ", NStr::eNocase) && - str.size() >= 34 && - NStr::StartsWith(str.substr(24), " databases", NStr::eNocase) ) { - if ( str.size() > 34 && str[34] == '.') { - str = str.substr(35); - } else { - str = str.substr(34); - } - SET_FIELD(affil, Str, str); - ChangeMade(CCleanupChange::eChangePublication); - AffilBC(affil); - if ( s_IsEmpty(affil) ) { - RESET_FIELD(*authors, Affil); - ChangeMade(CCleanupChange::eChangePublication); - } - } - } - } - - return eAction_Nothing; -} - -CNewCleanup_imp::EAction CNewCleanup_imp::CitArtBC(CCit_art& citart, bool fix_initials) -{ - if ( FIELD_IS_SET(citart, Authors) ) { - AuthListBC( GET_MUTABLE(citart, Authors), fix_initials); - } - if ( FIELD_IS_SET(citart, From) ) { - CCit_art::TFrom& from = GET_MUTABLE(citart, From); - if ( FIELD_IS(from, Book) ) { - CitBookBC(GET_MUTABLE(from, Book), fix_initials); - } else if ( FIELD_IS(from, Proc) ) { - CitProcBC( GET_MUTABLE(from, Proc), fix_initials); - } else if (FIELD_IS(from, Journal) ) { - CitJourBC(GET_MUTABLE(from, Journal), fix_initials); - } - } - - return eAction_Nothing; -} - -CNewCleanup_imp::EAction CNewCleanup_imp::CitBookBC(CCit_book& citbook, bool fix_initials) -{ - if ( FIELD_IS_SET(citbook, Authors) ) { - AuthListBC( GET_MUTABLE(citbook, Authors), fix_initials); - } - if ( FIELD_IS_SET(citbook, Imp) ) { - ImprintBC( GET_MUTABLE(citbook, Imp), eImprintBC_ForbidStatusChange ); - } - - return eAction_Nothing; -} - -CNewCleanup_imp::EAction CNewCleanup_imp::CitPatBC(CCit_pat& citpat, bool fix_initials) -{ - if ( FIELD_IS_SET(citpat, Authors) ) { - AuthListBC( GET_MUTABLE(citpat, Authors), fix_initials); - } - if ( FIELD_IS_SET(citpat, Applicants) ) { - AuthListBC( GET_MUTABLE(citpat, Applicants), fix_initials); - } - if ( FIELD_IS_SET(citpat, Assignees) ) { - AuthListBC( GET_MUTABLE(citpat, Assignees), fix_initials); - } - - if ( FIELD_IS_SET(citpat, Country) ) { - if (NStr::Equal(citpat.GetCountry(), "USA")) { - citpat.SetCountry("US"); - ChangeMade(CCleanupChange::eChangePublication); - } - } - - return eAction_Nothing; -} - -CNewCleanup_imp::EAction CNewCleanup_imp::CitLetBC(CCit_let& citlet, bool fix_initials) -{ - if ( FIELD_IS_SET(citlet, Cit) && FIELD_EQUALS( citlet, Type, CCit_let::eType_thesis ) ) { - CitBookBC( GET_MUTABLE(citlet, Cit), fix_initials); - } - - return eAction_Nothing; -} - -CNewCleanup_imp::EAction CNewCleanup_imp::CitProcBC(CCit_proc& citproc, bool fix_initials) -{ - if ( FIELD_IS_SET(citproc, Book) ) { - CitBookBC( GET_MUTABLE(citproc, Book), fix_initials); - } - - return eAction_Nothing; -} - -CNewCleanup_imp::EAction CNewCleanup_imp::CitJourBC(CCit_jour &citjour, bool fix_initials) -{ - if ( FIELD_IS_SET(citjour, Imp) ) { - ImprintBC( GET_MUTABLE(citjour, Imp), eImprintBC_AllowStatusChange ); - } - - return eAction_Nothing; -} - -CNewCleanup_imp::EAction CNewCleanup_imp::MedlineEntryBC(CMedline_entry& medline, bool fix_initials) -{ - if ( ! FIELD_IS_SET(medline, Cit) || ! FIELD_IS_SET(medline.GetCit(), Authors) ) { - return eAction_Nothing; - } - AuthListBC( GET_MUTABLE(medline.SetCit(), Authors), fix_initials ); - - return eAction_Nothing; -} - -static bool s_IsEmpty(const CAuthor& auth) -{ - if (! FIELD_IS_SET(auth, Name)) { - return true; - } - - const CAuthor::TName& name = GET_FIELD(auth, Name); - - const string* str = NULL; - switch (name.Which()) { - case CAuthor::TName::e_not_set: - return true; - - case CAuthor::TName::e_Name: - {{ - const CName_std& nstd = name.GetName(); - // last name is required - if( (!nstd.IsSetLast() || NStr::IsBlank(nstd.GetLast())) ) { - return true; - } - // also fails if all fields are blank - if ((!nstd.IsSetLast() || NStr::IsBlank(nstd.GetLast())) && - (!nstd.IsSetFirst() || NStr::IsBlank(nstd.GetFirst())) && - (!nstd.IsSetMiddle() || NStr::IsBlank(nstd.GetMiddle())) && - (!nstd.IsSetFull() || NStr::IsBlank(nstd.GetFull())) && - (!nstd.IsSetInitials() || NStr::IsBlank(nstd.GetInitials())) && - (!nstd.IsSetSuffix() || NStr::IsBlank(nstd.GetSuffix())) && - (!nstd.IsSetTitle() || NStr::IsBlank(nstd.GetTitle()))) { - return true; - } - break; - }} - - case CAuthor::TName::e_Ml: - str = &GET_FIELD(name, Ml); - break; - case CAuthor::TName::e_Str: - str = &GET_FIELD(name, Str); - break; - case CAuthor::TName::e_Consortium: - str = &GET_FIELD(name, Consortium); - break; - - default: - break; - }; - if (str != NULL && NStr::IsBlank(*str)) { - return true; - } - return false; -} - -// when we reset author names, we need to put in a place holder - otherwise the ASN.1 becomes invalid -static -void s_ResetAuthorNames (CAuth_list::TNames& names) -{ - names.Reset(); - list< string > &auth_list = names.SetStr(); - auth_list.clear(); - auth_list.push_back("?"); -} - -void CNewCleanup_imp::AuthListBC( CAuth_list& al, bool fix_initials ) -{ - if ( FIELD_IS_SET(al, Affil) ) { - AffilBC( GET_MUTABLE(al, Affil) ); - if (s_IsEmpty( GET_FIELD(al, Affil) )) { - RESET_FIELD(al, Affil); - ChangeMade(CCleanupChange::eChangePublication); - } - } - if ( FIELD_IS_SET(al, Names) ) { - typedef CAuth_list::TNames TNames; - switch ( GET_MUTABLE(al, Names).Which() ) { - case TNames::e_Ml: - {{ - if (ConvertAuthorContainerMlToStd(al)) { - ChangeMade(CCleanupChange::eChangePublication); - } - }} - // !!!!!!!!!!!!!!!!!!!!!! - // !!!!!FALL-THROUGH!!!!! - // !!!!!!!!!!!!!!!!!!!!!! - // ( since we just converted the ml to an std, we need to do the - // std clean-up step ) - case TNames::e_Std: - {{ - // The "names" variable is not above the switch() because - // the case fall-through means it may have been invalidated. - TNames& names = GET_MUTABLE(al, Names); - // call BasicCleanup for each CAuthor - EDIT_EACH_AUTHOR_ON_AUTHLIST( it, al ) { - if (CCleanup::CleanupAuthor(**it, fix_initials)) { - ChangeMade(CCleanupChange::eChangePublication); - } - if( s_IsEmpty(**it) ) { - ERASE_AUTHOR_ON_AUTHLIST( it, al ); - ChangeMade(CCleanupChange::eChangePublication); - } - } - if ( AUTHOR_ON_AUTHLIST_IS_EMPTY(al) ) { - s_ResetAuthorNames (names); - ChangeMade(CCleanupChange::eChangePublication); - } - break; - }} - case TNames::e_Str: - {{ - TNames& names = GET_MUTABLE(al, Names); - EDIT_EACH_STRING_IN_LIST( str_iter, GET_MUTABLE(names, Str) ) { - x_CompressStringSpacesMarkChanged(*str_iter); - } - if (CleanVisStringContainer( GET_MUTABLE(names, Str) )) { - ChangeMade(CCleanupChange::eChangePublication); - } - if (names.GetStr().empty()) { - s_ResetAuthorNames (names); - ChangeMade(CCleanupChange::eChangePublication); - } - break; - }} - default: - break; - } - } - // if no remaining authors, put in default author for legal ASN.1 - if (! FIELD_IS_SET(al, Names) ) { - al.SetNames().SetStr().push_back("?"); - ChangeMade(CCleanupChange::eChangePublication); - } -} - -void CNewCleanup_imp::AffilBC( CAffil& af ) -{ - switch (af.Which()) { - case CAffil::e_Str: - {{ - x_CompressStringSpacesMarkChanged( GET_MUTABLE(af, Str) ); - x_CleanupStringMarkChanged( GET_MUTABLE(af, Str) ); - break; - }} - case CAffil::e_Std: - {{ - CAffil::TStd& std = GET_MUTABLE(af, Std); - - CLEAN_AND_COMPRESS_STRING_MEMBER_JUNK(std, Affil); - CLEAN_AND_COMPRESS_STRING_MEMBER_JUNK(std, Div); - CLEAN_AND_COMPRESS_STRING_MEMBER_JUNK(std, City); - CLEAN_AND_COMPRESS_STRING_MEMBER_JUNK(std, Sub); - CLEAN_AND_COMPRESS_STRING_MEMBER_JUNK(std, Country); - CLEAN_AND_COMPRESS_STRING_MEMBER_JUNK(std, Street); - CLEAN_AND_COMPRESS_STRING_MEMBER_JUNK(std, Email); - CLEAN_AND_COMPRESS_STRING_MEMBER_JUNK(std, Fax); - CLEAN_AND_COMPRESS_STRING_MEMBER_JUNK(std, Phone); - CLEAN_AND_COMPRESS_STRING_MEMBER_JUNK(std, Postal_code); - - if (std.CanGetCountry() ) { - if ( NStr::EqualNocase(std.GetCountry(), "U.S.A.") ) { - SET_FIELD( std, Country, "USA"); - ChangeMade (CCleanupChange::eChangePublication); - } - } - - if (std.CanGetCountry() ) { - if ( NStr::EqualNocase(std.GetCountry(), "USA") && ! NStr::EqualCase(std.GetCountry(), "USA") ) { - SET_FIELD( std, Country, "USA"); - ChangeMade (CCleanupChange::eChangePublication); - } - } - - if (std.CanGetSub() && std.CanGetCountry() ) { - if ( NStr::EqualCase(std.GetCountry(), "USA") ) { - string oldsub = std.GetSub(); - string newsub = NStr::Replace(oldsub, ".", ""); - if ( ! NStr::EqualNocase(oldsub, newsub) ) { - SET_FIELD( std, Sub, newsub); - ChangeMade (CCleanupChange::eChangePublication); - } - } - } - break; - }} - default: - break; - } -} - -void CNewCleanup_imp::ImprintBC( CImprint& imprint, EImprintBC is_status_change_allowed ) -{ - if( is_status_change_allowed == eImprintBC_AllowStatusChange ) { - if ( FIELD_EQUALS(imprint, Pubstatus, ePubStatus_aheadofprint) && - (! FIELD_EQUALS(imprint, Prepub, CImprint::ePrepub_in_press) ) ) - { - if (!imprint.IsSetVolume() || NStr::IsBlank (imprint.GetVolume()) - || !imprint.IsSetPages() || NStr::IsBlank (imprint.GetPages())) { - SET_FIELD(imprint, Prepub, CImprint::ePrepub_in_press); - ChangeMade (CCleanupChange::eChangePublication); - } - } - if (FIELD_EQUALS(imprint, Pubstatus, ePubStatus_aheadofprint) && - FIELD_EQUALS(imprint, Prepub, CImprint::ePrepub_in_press) ) - { - if (imprint.IsSetVolume() && !NStr::IsBlank (imprint.GetVolume()) - && imprint.IsSetPages() && !NStr::IsBlank (imprint.GetPages())) { - RESET_FIELD(imprint, Prepub); - ChangeMade (CCleanupChange::eChangePublication); - } - } - - if (FIELD_EQUALS(imprint, Pubstatus, ePubStatus_epublish) && - FIELD_EQUALS(imprint, Prepub, CImprint::ePrepub_in_press) ) { - RESET_FIELD(imprint, Prepub); - ChangeMade (CCleanupChange::eChangePublication); - } - } - - CLEAN_AND_COMPRESS_STRING_MEMBER(imprint, Volume); - CLEAN_AND_COMPRESS_STRING_MEMBER(imprint, Issue); - CLEAN_AND_COMPRESS_STRING_MEMBER(imprint, Pages); - CLEAN_AND_COMPRESS_STRING_MEMBER(imprint, Section); - CLEAN_AND_COMPRESS_STRING_MEMBER(imprint, Part_sup); - CLEAN_AND_COMPRESS_STRING_MEMBER(imprint, Language); - CLEAN_AND_COMPRESS_STRING_MEMBER(imprint, Part_supi); -} typedef pair > TCit; struct TSortCit { @@ -2937,7 +2321,7 @@ struct TSortCit { const CCit_gen& g1 = p1.GetGen(); const CCit_gen& g2 = p2.GetGen(); if ( g1.IsSetTitle() != g2.IsSetTitle() ) { - return (g1.IsSetTitle() - g2.IsSetTitle()); + return (g1.IsSetTitle() || g2.IsSetTitle()); } else if( ! g1.IsSetTitle() && ! g2.IsSetTitle() ) { return false; } @@ -2953,7 +2337,7 @@ bool cmpSortedvsOld(const TCit& e1, const CRef& e2) { void CNewCleanup_imp::PubSetBC( CPub_set &pub_set ) { // The Pub-set should always be pub. Ignore if not. - if( ! FIELD_IS( pub_set, Pub ) ) { + if( ! pub_set.IsPub() ) { return; } @@ -2961,26 +2345,28 @@ void CNewCleanup_imp::PubSetBC( CPub_set &pub_set ) // indexed by a label generated for each CPub. typedef set TCitSet; TCitSet cit_set; - ITERATE (CPub_set::TPub, cit_it, pub_set.GetPub()) { + for (auto cit_it : pub_set.GetPub()) { string label; - (*cit_it)->GetLabel(&label, CPub::eContent, CPub::fLabel_Unique, CPub::eLabel_V1 ); + cit_it->GetLabel(&label, CPub::eContent, CPub::fLabel_Unique, CPub::eLabel_V1 ); // the following line may fail due to dups // (that's okay; it lets us automatically remove dups) - cit_set.insert( TCit(label, *cit_it) ); + cit_set.insert( TCit(label, cit_it) ); } + auto& publist = pub_set.SetPub(); // Has anything been deleted, or has the order changed? - if ( cit_set.size() != pub_set.SetPub().size() || - ! equal(cit_set.begin(), cit_set.end(), pub_set.SetPub().begin(), cmpSortedvsOld) ) + if ( cit_set.size() != publist.size() || + ! equal(cit_set.begin(), cit_set.end(), publist.begin(), cmpSortedvsOld) ) { // put everything left back into the feature's citation list. - pub_set.SetPub().clear(); + publist.clear(); ITERATE (TCitSet, citset_it, cit_set) { - pub_set.SetPub().push_back(citset_it->second); + publist.push_back(citset_it->second); } ChangeMade(CCleanupChange::eCleanCitonFeat); } } + void CNewCleanup_imp::ImpFeatBC( CSeq_feat& feat ) { if( ! FIELD_IS_SET_AND_IS(feat, Data, Imp) ) { @@ -3185,7 +2571,7 @@ static const TSiteElem sc_site_map[] = { typedef CStaticArrayMap TSiteMap; DEFINE_STATIC_ARRAY_MAP_WITH_COPY(TSiteMap, sc_SiteMap, sc_site_map); -void CNewCleanup_imp::SiteFeatBC( CSeqFeatData::ESite &site, CSeq_feat& feat ) +void CNewCleanup_imp::SiteFeatBC( const CSeqFeatData::ESite &site, CSeq_feat& feat ) { // If site set to "other", try to extract it from the comment if ( FIELD_IS_SET(feat, Comment) && @@ -3331,10 +2717,10 @@ void CNewCleanup_imp::ConvertSeqLocWholeToInt( CSeq_loc &loc ) } if (bsh) { TSeqPos bs_len = bsh.GetBioseqLength(); - - loc.SetInt().SetId(*id); - loc.SetInt().SetFrom(0); - loc.SetInt().SetTo(bs_len - 1); + auto& interval = loc.SetInt(); + interval.SetId(*id); + interval.SetFrom(0); + interval.SetTo(bs_len - 1); ChangeMade(CCleanupChange::eChangeWholeLocation); } } @@ -3572,8 +2958,9 @@ static const char *s_FindKeyFromFeatDefType( const CSeq_feat &feat ) { static const char *kFeatBad = "???"; + const CSeqFeatData& fdata = feat.GetData(); - SWITCH_ON_SEQFEAT_CHOICE(feat) { + switch (fdata.Which()) { case NCBI_SEQFEAT(Gene): return "Gene"; case NCBI_SEQFEAT(Org): @@ -3581,7 +2968,7 @@ const char *s_FindKeyFromFeatDefType( const CSeq_feat &feat ) case NCBI_SEQFEAT(Cdregion): return "CDS"; case NCBI_SEQFEAT(Prot): - if( feat.GetData().GetProt().CanGetProcessed() ) { + if(fdata.GetProt().IsSetProcessed() ) { switch( feat.GetData().GetProt().GetProcessed() ) { case NCBI_PROTREF(not_set): return "Protein"; @@ -3601,8 +2988,9 @@ const char *s_FindKeyFromFeatDefType( const CSeq_feat &feat ) } return "Protein"; case NCBI_SEQFEAT(Rna): - if( feat.GetData().GetRna().IsSetType() ) { - switch ( feat.GetData().GetRna().GetType() ) + if(fdata.GetRna().IsSetType() ) { + const auto& rna = fdata.GetRna(); + switch (rna.GetType() ) { case NCBI_RNAREF(unknown): return "misc_RNA"; // unknownrna mapped to otherrna @@ -3627,8 +3015,8 @@ const char *s_FindKeyFromFeatDefType( const CSeq_feat &feat ) case NCBI_RNAREF(miscRNA): return "misc_RNA"; case NCBI_RNAREF(other): - if ( FIELD_IS_SET_AND_IS(feat.GetData().GetRna(), Ext, Name) ) { - const string &name = feat.GetData().GetRna().GetExt().GetName(); + if ( FIELD_IS_SET_AND_IS(rna, Ext, Name) ) { + const string &name = rna.GetExt().GetName(); if ( NStr::EqualNocase(name, "misc_RNA")) return "misc_RNA"; if ( NStr::EqualNocase(name, "ncRNA") ) return "ncRNA"; if ( NStr::EqualNocase(name, "tmRNA") ) return "tmRNA"; @@ -3644,7 +3032,7 @@ const char *s_FindKeyFromFeatDefType( const CSeq_feat &feat ) case NCBI_SEQFEAT(Seq): return "Xref"; case NCBI_SEQFEAT(Imp): - return s_FindImpFeatType( feat.GetData().GetImp() ); + return s_FindImpFeatType( fdata.GetImp() ); case NCBI_SEQFEAT(Region): return "Region"; case NCBI_SEQFEAT(Comment): @@ -4101,7 +3489,7 @@ void s_ExpandThisQual( string qual_type = qual.GetQual(); string& val = qual.SetVal(); if (NStr::Equal(val, "()")) { - val = ""; + val.clear(); return; } if ( ! s_IsCompoundRptTypeValue( val ) ) { @@ -4477,9 +3865,10 @@ static CRef s_ParseTRnaFromAnticodonString (const string &str, const pos_str = pos_str.substr (0, pos_str.length() - 1); } } - CRef anticodon = ReadLocFromText (pos_str, feat.GetLocation().GetId(), scope); + const CSeq_loc& loc = feat.GetLocation(); + CRef anticodon = ReadLocFromText (pos_str, loc.GetId(), scope); if( anticodon ) { - CBioseq_Handle bsh = scope->GetBioseqHandle(*(feat.GetLocation().GetId())); + CBioseq_Handle bsh = scope->GetBioseqHandle(*(loc.GetId())); if (!bsh) { trna.Reset(NULL); return trna; @@ -4489,7 +3878,7 @@ static CRef s_ParseTRnaFromAnticodonString (const string &str, const return trna; } if (feat.GetLocation().IsSetStrand()) { - anticodon->SetStrand(feat.GetLocation().GetStrand()); + anticodon->SetStrand(loc.GetStrand()); } else { anticodon->SetStrand(eNa_strand_plus); // anticodon is always on plus strand } @@ -4780,60 +4169,7 @@ CNewCleanup_imp::x_HandleTrnaProductGBQual(CSeq_feat& feat, CRNA_ref& rna, const CNewCleanup_imp::EAction CNewCleanup_imp::x_HandleStandardNameRnaGBQual(CSeq_feat& feat, CRNA_ref& rna, const string& standard_name) { - if (!rna.IsSetType()) { - return eAction_Nothing; - } - EAction rval = eAction_Nothing; - - TRNAREF_TYPE rna_type = rna.GetType(); - string previous_product = rna.GetRnaProductName(); - - switch (rna_type) - { - case CRNA_ref::eType_rRNA: - case CRNA_ref::eType_premsg: - /* - if (NStr::IsBlank(previous_product)) { - string remainder; - rna.SetRnaProductName(standard_name, remainder); - if (!NStr::IsBlank(remainder)) { - x_AddToComment(feat, remainder); - } - } else { - x_AddToComment(feat, standard_name); - } - rval = eAction_Erase; - */ - break; - case CRNA_ref::eType_ncRNA: - /* - x_AddToComment(feat, standard_name); - rval = eAction_Erase; - */ - break; - case CRNA_ref::eType_mRNA: - /* - if (NStr::IsBlank(standard_name)) { - if (!m_SeqEntryInfoStack.top().m_IsEmblOrDdbj) { - rval = eAction_Erase; - } - } else { - if (NStr::IsBlank(previous_product)) { - rna.SetExt().SetName(standard_name); - rval = eAction_Erase; - } - } - */ - break; - case CRNA_ref::eType_tRNA: - /* - rval = x_HandleTrnaProductGBQual(feat, rna, standard_name); - */ - break; - default: - break; - } - return rval; + return eAction_Nothing; } @@ -5065,13 +4401,18 @@ void CNewCleanup_imp::BioSourceEC(CBioSource& biosrc) void CNewCleanup_imp::x_AddEnvSamplOrMetagenomic(CBioSource& biosrc) { + if (!biosrc.IsSetOrg()) { + return; + } + auto& org = biosrc.SetOrg(); // add environmental_sample or metagenomic based on lineage or div - if ( biosrc.IsSetOrg() && biosrc.GetOrg().IsSetOrgname()) { + if ( org.IsSetOrgname()) { + const auto& orgname = org.GetOrgname(); bool needs_env_sample = false; bool needs_metagenomic = false; - if (biosrc.GetOrg().GetOrgname().IsSetLineage()) { - string lineage = biosrc.GetOrg().GetOrgname().GetLineage(); + if (orgname.IsSetLineage()) { + string lineage = orgname.GetLineage(); if (NStr::FindNoCase(lineage, "environmental sample") != string::npos) { needs_env_sample = true; } @@ -5079,7 +4420,7 @@ void CNewCleanup_imp::x_AddEnvSamplOrMetagenomic(CBioSource& biosrc) needs_metagenomic = true; } } - if (biosrc.GetOrg().GetOrgname().IsSetDiv() + if (orgname.IsSetDiv() && NStr::Equal(biosrc.GetOrg().GetOrgname().GetDiv(), "ENV")) { needs_env_sample = true; } @@ -5114,30 +4455,39 @@ void CNewCleanup_imp::x_AddEnvSamplOrMetagenomic(CBioSource& biosrc) } +struct SRemovableOldname +{ + const string& m_Taxname; + bool operator()(CRef mod) + { + return (mod->IsSetSubtype() && + mod->GetSubtype() == COrgMod::eSubtype_old_name && + mod->IsSetSubname() && + NStr::Equal(mod->GetSubname(), m_Taxname) && + (!mod->IsSetAttrib() || NStr::IsBlank(mod->GetAttrib()))); + } +}; + + void CNewCleanup_imp::x_CleanupOldName(COrg_ref& org) { if (org.IsSetTaxname() && org.IsSetOrgname() && org.GetOrgname().IsSetMod()) { - COrgName::TMod::iterator it = org.SetOrgname().SetMod().begin(); - while (it != org.SetOrgname().SetMod().end()) { - if ((*it)->IsSetSubtype() && - (*it)->GetSubtype() == COrgMod::eSubtype_old_name && - (*it)->IsSetSubname() && - NStr::Equal((*it)->GetSubname(), org.GetTaxname()) && - (!(*it)->IsSetAttrib() || NStr::IsBlank((*it)->GetAttrib()))) { - it = org.SetOrgname().SetMod().erase(it); - ChangeMade(CCleanupChange::eRemoveOrgmod); - } else { - ++it; - } - } - if (org.GetOrgname().GetMod().empty()) { + SRemovableOldname matcher{ org.GetTaxname() }; + auto& modset = org.SetOrgname().SetMod(); + size_t before = modset.size(); + modset.erase(std::remove_if(modset.begin(), modset.end(), matcher), modset.end()); + if (before != modset.size()) { + ChangeMade(CCleanupChange::eRemoveOrgmod); + } + if (modset.empty()) { org.SetOrgname().ResetMod(); - + ChangeMade(CCleanupChange::eRemoveOrgmod); } } } + bool s_HasMatchingGBMod(const COrgName& org, const string& val) { if (!org.IsSetMod()) { @@ -5147,8 +4497,8 @@ bool s_HasMatchingGBMod(const COrgName& org, const string& val) if ((*it)->IsSetSubtype() && ((*it)->GetSubtype() == COrgMod::eSubtype_gb_acronym || (*it)->GetSubtype() == COrgMod::eSubtype_gb_anamorph || - (*it)->GetSubtype() == COrgMod::eSubtype_gb_synonym) && - (*it)->IsSetSubname() && + (*it)->GetSubtype() == COrgMod::eSubtype_gb_synonym) && + (*it)->IsSetSubname() && NStr::Equal((*it)->GetSubname(), val)) { return true; } @@ -5157,25 +4507,31 @@ bool s_HasMatchingGBMod(const COrgName& org, const string& val) } +struct SRemovableOrgModNote { + const COrg_ref& org; + bool operator()(CRef mod) { + return (mod->IsSetSubtype() && + mod->GetSubtype() == COrgMod::eSubtype_other && + mod->IsSetSubname() && + (s_HasMatchingGBMod(org.GetOrgname(), mod->GetSubname()) || + (org.IsSetTaxname() && NStr::Equal(org.GetTaxname(), mod->GetSubname())))); + + } +}; + void CNewCleanup_imp::x_CleanupOrgModNoteEC(COrg_ref& org) { if (!org.IsSetOrgname() || !org.GetOrgname().IsSetMod()) { return; } - COrgName::TMod::iterator it = org.SetOrgname().SetMod().begin(); - while (it != org.SetOrgname().SetMod().end()) { - if ((*it)->IsSetSubtype() && - (*it)->GetSubtype() == COrgMod::eSubtype_other && - (*it)->IsSetSubname() && - (s_HasMatchingGBMod(org.GetOrgname(), (*it)->GetSubname()) || - (org.IsSetTaxname() && NStr::Equal(org.GetTaxname(), (*it)->GetSubname())))) { - ChangeMade(CCleanupChange::eRemoveOrgmod); - it = org.SetOrgname().SetMod().erase(it); - } else { - ++it; - } + auto& modset = org.SetOrgname().SetMod(); + SRemovableOrgModNote matcher{ org }; + size_t before = modset.size(); + modset.erase(std::remove_if(modset.begin(), modset.end(), matcher), modset.end()); + if (before != modset.size()) { + ChangeMade(CCleanupChange::eRemoveOrgmod); } - if (org.GetOrgname().GetMod().empty()) { + if (modset.empty()) { org.SetOrgname().ResetMod(); ChangeMade(CCleanupChange::eRemoveOrgmod); } @@ -5216,31 +4572,30 @@ void CNewCleanup_imp::x_DateStdBC( CDate_std& date ) ChangeMade(CCleanupChange::eCleanupDate); } - if ( ! FIELD_IS_SET(date, Minute) || FIELD_OUT_OF_RANGE(date, Minute, 0, 59) ) { - if( FIELD_IS_SET(date, Minute) ) { - RESET_FIELD(date, Minute); - ChangeMade(CCleanupChange::eCleanupDate); - } - if( FIELD_IS_SET(date, Second) ) { - RESET_FIELD(date, Second); + if (date.IsSetMinute()) { + if (date.GetMinute() < 0 || date.GetMinute() > 59) { + date.ResetMinute(); + date.ResetSecond(); ChangeMade(CCleanupChange::eCleanupDate); } + } else if (date.IsSetSecond()) { + date.ResetSecond(); + ChangeMade(CCleanupChange::eCleanupDate); } - - if ( ! FIELD_IS_SET(date, Hour) || FIELD_OUT_OF_RANGE(date, Hour, 0, 23) ) { - if( FIELD_IS_SET(date, Hour) ) { - RESET_FIELD(date, Hour); - ChangeMade(CCleanupChange::eCleanupDate); - } - if( FIELD_IS_SET(date, Minute) ) { - RESET_FIELD(date, Minute); - ChangeMade(CCleanupChange::eCleanupDate); - } - if( FIELD_IS_SET(date, Second) ) { - RESET_FIELD(date, Second); + + if (date.IsSetHour()) { + if (date.GetHour() < 0 || date.GetHour() > 23) { + date.ResetHour(); + date.ResetMinute(); + date.ResetSecond(); ChangeMade(CCleanupChange::eCleanupDate); } + } else if (date.IsSetMinute() || date.IsSetSecond()) { + date.ResetMinute(); + date.ResetSecond(); + ChangeMade(CCleanupChange::eCleanupDate); } + } @@ -5270,11 +4625,23 @@ void CNewCleanup_imp::x_SeqIntervalBC( CSeq_interval & seq_interval ) ChangeMade(CCleanupChange::eChangeSeqloc); } // change bad strand values. - if (seq_interval.CanGetStrand()) { - ENa_strand strand = seq_interval.GetStrand(); - if (strand == eNa_strand_unknown ) { - seq_interval.ResetStrand(); - ChangeMade(CCleanupChange::eChangeStrand); + if (m_Scope && seq_interval.IsSetId()) { + CBioseq_Handle bsh = m_Scope->GetBioseqHandle(seq_interval.GetId()); + if (bsh) { + if (bsh.IsProtein()) { + if (seq_interval.IsSetStrand()) { + seq_interval.ResetStrand(); + ChangeMade(CCleanupChange::eChangeStrand); + } + } else if (seq_interval.IsSetStrand()) { + if (seq_interval.GetStrand() == eNa_strand_unknown) { + seq_interval.SetStrand(eNa_strand_plus); + ChangeMade(CCleanupChange::eChangeStrand); + } + } else { + seq_interval.SetStrand(eNa_strand_plus); + ChangeMade(CCleanupChange::eChangeStrand); + } } } } @@ -5333,10 +4700,14 @@ void CNewCleanup_imp::x_BothStrandBC( CSeq_interval & seq_interval ) void CNewCleanup_imp::x_SplitDbtag( CDbtag &dbt, vector< CRef< CDbtag > > & out_new_dbtags ) { // check the common case of nothing to split - if( ! dbt.IsSetTag() || ! dbt.GetTag().IsStr() ) { + if (!dbt.IsSetTag()) { + return; + } + auto& tag = dbt.SetTag(); + if (!tag.IsStr()) { return; } - if( dbt.GetTag().GetStr().find(":") == string::npos ) { + if( tag.GetStr().find(":") == string::npos ) { return; } @@ -5354,11 +4725,11 @@ void CNewCleanup_imp::x_SplitDbtag( CDbtag &dbt, vector< CRef< CDbtag > > & out_ // split by colon and generate new tags vector tags; - NStr::Split(dbt.GetTag().GetStr(), ":", tags, NStr::fSplit_Tokenize); + NStr::Split(tag.GetStr(), ":", tags, NStr::fSplit_Tokenize); _ASSERT( tags.size() >= 2 ); // treat the CDbtag argument as the first of the new CDbtags - dbt.SetTag().SetStr( tags.front() ); + tag.SetStr( tags.front() ); vector::const_iterator str_iter = tags.begin() + 1; for( ; str_iter != tags.end(); ++str_iter ) { CRef new_tag( new CDbtag ); @@ -5722,8 +5093,6 @@ void s_SplitAtSingleTildes( list &piece_vec, const string &str ) return; } - vector pieces; - // piece_start is the beginning of the piece we're working on, // but search_start is where to start looking for tildes on this iteration // ( invariant: search_pos >= piece_start_pos ) @@ -5929,45 +5298,61 @@ void CNewCleanup_imp::x_CleanupOrgModAndSubSourceOther( COrgName &orgname, CBioS // * if strain begins with "subsp. " move remaining text to a subspecies // qualifier, unless a subspecies qualifier is already present, in which case // add to note +static const string kSubsp = "subsp. "; +static const string kSerovar = "serovar "; +struct SRemoveNamedStrain { + bool operator()(CRef m) { + bool rval = false; + if (m->IsSetSubtype() && m->IsSetSubname()) { + auto subtype = m->GetSubtype(); + auto subname = m->GetSubname(); + if (subtype == COrgMod::eSubtype_serovar) { + if (NStr::StartsWith(subname, kSubsp)) { + rval = true; + } + } else if (subtype == COrgMod::eSubtype_strain) { + if (NStr::StartsWith(subname, kSubsp) || NStr::StartsWith(subname, kSerovar)) { + rval = true; + } + } + + } + return rval; + } +}; + void CNewCleanup_imp::x_MovedNamedValuesInStrain(COrgName& orgname) { if (!orgname.IsSetMod()) { return; } - COrgName::TMod::iterator m = orgname.SetMod().begin(); - while (m != orgname.SetMod().end()) { - if ((*m)->IsSetSubtype() && (*m)->IsSetSubname()) { - bool do_erase = false; - switch ((*m)->GetSubtype()) { - case COrgMod::eSubtype_serovar: - if (NStr::StartsWith((*m)->GetSubname(), "subsp. ")) { - string val = (*m)->GetSubname().substr(7); - x_MovedNamedValuesInStrain(orgname, COrgMod::eSubtype_sub_species, val); - do_erase = true; - } - break; - case COrgMod::eSubtype_strain: - if (NStr::StartsWith((*m)->GetSubname(), "subsp. ")) { - string val = (*m)->GetSubname().substr(7); - x_MovedNamedValuesInStrain(orgname, COrgMod::eSubtype_sub_species, val); - do_erase = true; - } else if (NStr::StartsWith((*m)->GetSubname(), "serovar ")) { - string val = (*m)->GetSubname().substr(8); - x_MovedNamedValuesInStrain(orgname, COrgMod::eSubtype_serovar, val); - do_erase = true; - } - break; - default: - break; - } - if (do_erase) { - m = orgname.SetMod().erase(m); - ChangeMade(CCleanupChange::eRemoveOrgmod); - } else { - ++m; + auto& mods = orgname.SetMod(); + for (auto m : mods) { + if (m->IsSetSubtype() && m->IsSetSubname()) { + switch (m->GetSubtype()) { + case COrgMod::eSubtype_serovar: + if (NStr::StartsWith(m->GetSubname(), kSubsp)) { + string val = m->GetSubname().substr(kSubsp.length()); + x_MovedNamedValuesInStrain(orgname, COrgMod::eSubtype_sub_species, val); + } + break; + case COrgMod::eSubtype_strain: + if (NStr::StartsWith(m->GetSubname(), kSubsp)) { + string val = m->GetSubname().substr(kSubsp.length()); + x_MovedNamedValuesInStrain(orgname, COrgMod::eSubtype_sub_species, val); + } + else if (NStr::StartsWith(m->GetSubname(), kSerovar)) { + string val = m->GetSubname().substr(kSerovar.length()); + x_MovedNamedValuesInStrain(orgname, COrgMod::eSubtype_serovar, val); + } + break; + default: + break; } } } + SRemoveNamedStrain matcher; + mods.erase(std::remove_if(mods.begin(), mods.end(), matcher), mods.end()); } @@ -6588,13 +5973,14 @@ bool SortGBQuals(CSeq_feat& sf) // first, extract product qualifier values, because order must be // preserved vector products; - CSeq_feat::TQual::iterator it = sf.SetQual().begin(); - while (it != sf.SetQual().end()) { + auto& qualset = sf.SetQual(); + CSeq_feat::TQual::iterator it = qualset.begin(); + while (it != qualset.end()) { if ((*it)->IsSetQual() && NStr::EqualNocase((*it)->GetQual(), "product")) { if ((*it)->IsSetVal() && !NStr::IsBlank((*it)->GetVal())) { products.push_back((*it)->GetVal()); } - it = sf.SetQual().erase(it); + it = qualset.erase(it); } else { ++it; } @@ -6605,8 +5991,8 @@ bool SortGBQuals(CSeq_feat& sf) } // insert product qualifiers back in list - it = sf.SetQual().begin(); - while (it != sf.SetQual().end()) { + it = qualset.begin(); + while (it != qualset.end()) { if (!(*it)->IsSetQual() || s_CompareNoCaseCStyle("product", (*it)->GetQual()) < 0 || s_IsIllegalQual((*it)->GetQual())) { @@ -6614,15 +6000,14 @@ bool SortGBQuals(CSeq_feat& sf) } ++it; } - if (it == sf.SetQual().end()) { + if (it == qualset.end()) { ITERATE(vector, s, products) { - CRef pq(new CGb_qual("product", *s)); - sf.SetQual().push_back(pq); + qualset.emplace_back(new CGb_qual("product", *s)); } } else { ITERATE(vector, s, products) { CRef pq(new CGb_qual("product", *s)); - it = sf.SetQual().insert(it, pq); + it = qualset.insert(it, pq); } } return !(orig->Equals(sf)); @@ -7038,6 +6423,23 @@ CConstRef s_FindUserObjectTypeRecursive( const CUser_object &user_ return CConstRef(); } + +bool s_CopyDbToFeat(CGene_ref& gene_ref, CSeq_feat& seq_feat) +{ + bool any_change = false; + if (gene_ref.IsSetDb()) { + for (auto db_itr : gene_ref.GetDb()) { + CRef dbc(new CDbtag()); + dbc->Assign(*db_itr); + seq_feat.SetDbxref().push_back(dbc); + } + gene_ref.ResetDb(); + any_change = true; + } + return any_change; +} + + void CNewCleanup_imp::GeneFeatBC ( CGene_ref& gene_ref, CSeq_feat& seq_feat @@ -7066,35 +6468,28 @@ void CNewCleanup_imp::GeneFeatBC ( } // move gene.db to feat.dbxref - if (GENEREF_HAS_DBXREF (gene_ref) ) { - FOR_EACH_DBXREF_ON_GENEREF (db_itr, gene_ref) { - CRef dbc (*db_itr); - ADD_DBXREF_TO_SEQFEAT (seq_feat, dbc); - } - RESET_FIELD (gene_ref, Db); + if (s_CopyDbToFeat(gene_ref, seq_feat)) { ChangeMade (CCleanupChange::eChangeDbxrefs); } // move feat.xref.gene.db to feat.dbxref - if (SEQFEAT_HAS_SEQFEATXREF (seq_feat)) { - EDIT_EACH_SEQFEATXREF_ON_SEQFEAT (xr_itr, seq_feat) { + if (seq_feat.IsSetXref()) { + auto xr_itr = seq_feat.SetXref().begin(); + while (xr_itr != seq_feat.SetXref().end()) { CSeqFeatXref& sfx = **xr_itr; - if (! FIELD_IS_SET (sfx, Data)) continue; - CSeqFeatData& sfd = GET_MUTABLE (sfx, Data); - if (! FIELD_IS (sfd, Gene)) continue; - CGene_ref& gene_ref = GET_MUTABLE (sfd, Gene); - if (GENEREF_HAS_DBXREF (gene_ref)) { - FOR_EACH_DBXREF_ON_GENEREF (db_itr, gene_ref) { - CRef dbc (*db_itr); - ADD_DBXREF_TO_SEQFEAT (seq_feat, dbc); + if (sfx.IsSetData() && sfx.GetData().IsGene()) { + CGene_ref& gene_ref = sfx.SetData().SetGene(); + if (s_CopyDbToFeat(gene_ref, seq_feat)) { + ChangeMade(CCleanupChange::eChangeDbxrefs); + } + + if (s_IsEmptyGeneRef(gene_ref)) { + xr_itr = seq_feat.SetXref().erase(xr_itr); + ChangeMade(CCleanupChange::eChangeDbxrefs); + continue; } - RESET_FIELD (gene_ref, Db); - ChangeMade (CCleanupChange::eChangeDbxrefs); - } - if (s_IsEmptyGeneRef (gene_ref)) { - ERASE_SEQFEATXREF_ON_SEQFEAT (xr_itr, seq_feat); - ChangeMade (CCleanupChange::eChangeDbxrefs); } + ++xr_itr; } } @@ -7214,11 +6609,9 @@ void CNewCleanup_imp::ProtrefBC ( } if (prot_ref.IsSetName()) { - for (CProt_ref::TName::iterator it = prot_ref.SetName().begin(); - it != prot_ref.SetName().end(); - it++) { - ProtNameBC(*it); - x_CompressStringSpacesMarkChanged(*it); + for (auto& it : prot_ref.SetName()) { + ProtNameBC(it); + x_CompressStringSpacesMarkChanged(it); } } @@ -7402,13 +6795,11 @@ void CNewCleanup_imp::ProtFeatfBC ( } // move prot.db to feat.dbxref - if (PROTREF_HAS_DBXREF (pr)) { - FOR_EACH_DBXREF_ON_PROTREF (db_itr, pr) { - CRef dbc (*db_itr); - ADD_DBXREF_TO_SEQFEAT (sf, dbc); - } - RESET_FIELD (pr, Db); - ChangeMade (CCleanupChange::eChangeDbxrefs); + if (pr.IsSetDb()) { + auto& sfxref = sf.SetDbxref(); + sfxref.insert(sfxref.end(), pr.SetDb().begin(), pr.SetDb().end()); + pr.ResetDb(); + ChangeMade(CCleanupChange::eChangeDbxrefs); } REMOVE_IF_EMPTY_NAME_ON_PROTREF(pr); @@ -7877,8 +7268,6 @@ void CNewCleanup_imp::x_GBQualToOrgRef( COrg_ref &org, CSeq_feat &seqfeat ) return; } - bool any_conversions = false; - EDIT_EACH_GBQUAL_ON_SEQFEAT( qual_iter, seqfeat ) { CGb_qual &gb_qual = **qual_iter; if( FIELD_IS_SET(gb_qual, Qual) && FIELD_IS_SET(gb_qual, Val) ) { @@ -7900,7 +7289,6 @@ void CNewCleanup_imp::x_GBQualToOrgRef( COrg_ref &org, CSeq_feat &seqfeat ) ERASE_GBQUAL_ON_SEQFEAT( qual_iter, seqfeat ); ChangeMade(CCleanupChange::eAddOrgMod); ChangeMade(CCleanupChange::eRemoveQualifier); - any_conversions = true; } } } @@ -7993,16 +7381,17 @@ void CNewCleanup_imp::x_AddNcbiCleanupObject( CSeq_entry &seq_entry ) { // remove from lower levels if (seq_entry.IsSet() && seq_entry.GetSet().IsSetSeq_set()) { - NON_CONST_ITERATE(CBioseq_set::TSeq_set, it, seq_entry.SetSet().SetSeq_set()) { - CCleanup::RemoveNcbiCleanupObject(**it); + for (auto it : seq_entry.GetSet().GetSeq_set()) { + CCleanup::RemoveNcbiCleanupObject(*it); } } // update existing if (seq_entry.IsSetDescr()) { - NON_CONST_ITERATE(CBioseq::TDescr::Tdata, it, seq_entry.SetDescr().Set()) { - if ((*it)->IsUser() && (*it)->GetUser().GetObjectType() == CUser_object::eObjectType_Cleanup) { - (*it)->SetUser().UpdateNcbiCleanup(NCBI_CLEANUP_VERSION); + auto& dset = seq_entry.SetDescr().Set(); + for (auto it : dset) { + if (it->IsUser() && it->GetUser().GetObjectType() == CUser_object::eObjectType_Cleanup) { + it->SetUser().UpdateNcbiCleanup(NCBI_CLEANUP_VERSION); ChangeMade(CCleanupChange::eAddNcbiCleanupObject); return; } @@ -8121,8 +7510,9 @@ bool s_FixncRNA(CSeq_feat& feat) if (feat.IsSetQual() && (rna_type == CRNA_ref::eType_ncRNA || rna_type == NCBI_RNAREF(other))) { - CSeq_feat::TQual::iterator qual_iter = feat.SetQual().begin(); - while (qual_iter != feat.SetQual().end()) { + auto& qual_list = feat.SetQual(); + CSeq_feat::TQual::iterator qual_iter = qual_list.begin(); + while (qual_iter != qual_list.end()) { string &qual = (*qual_iter)->SetQual(); string &val = (*qual_iter)->SetVal(); if (qual == "ncRNA_class") { @@ -8134,13 +7524,13 @@ bool s_FixncRNA(CSeq_feat& feat) rna.SetRnaProductName(product, remainder); } any_change = true; - qual_iter = feat.SetQual().erase(qual_iter); + qual_iter = qual_list.erase(qual_iter); } else { ++qual_iter; } } - if (feat.SetQual().empty()) { + if (qual_list.empty()) { feat.ResetQual(); } } @@ -8181,8 +7571,9 @@ bool s_FixtmRNA(CSeq_feat& feat) (rna_type == CRNA_ref::eType_other || rna_type == CRNA_ref::eType_tmRNA || rna_type == CRNA_ref::eType_ncRNA)) { - CSeq_feat::TQual::iterator qual_iter = feat.SetQual().begin(); - while (qual_iter != feat.SetQual().end()) { + auto& qual_list = feat.SetQual(); + CSeq_feat::TQual::iterator qual_iter = qual_list.begin(); + while (qual_iter != qual_list.end()) { string &qual = (*qual_iter)->SetQual(); string &val = (*qual_iter)->SetVal(); if (qual == "tag_peptide") { @@ -8194,16 +7585,16 @@ bool s_FixtmRNA(CSeq_feat& feat) rna_qual->SetVal(val); rna.SetExt().SetGen().SetQuals().Set().push_back(rna_qual); any_change = true; - qual_iter = feat.SetQual().erase(qual_iter); + qual_iter = qual_list.erase(qual_iter); } else if (qual == "ncRNA_class" && rna_type == CRNA_ref::eType_tmRNA) { rna.SetExt().SetGen().SetClass(val); any_change = true; - qual_iter = feat.SetQual().erase(qual_iter); + qual_iter = qual_list.erase(qual_iter); } else { ++qual_iter; } } - if (feat.SetQual().empty()) { + if (qual_list.empty()) { feat.ResetQual(); } } @@ -8256,8 +7647,9 @@ bool CNewCleanup_imp::x_FixMiscRNA(CSeq_feat& feat) string product_name = rna.GetRnaProductName(); if (NStr::IsBlank(product_name) && feat.IsSetQual()) { - CSeq_feat::TQual::iterator qual_iter = feat.SetQual().begin(); - while (qual_iter != feat.SetQual().end()) { + auto& qual_list = feat.SetQual(); + CSeq_feat::TQual::iterator qual_iter = qual_list.begin(); + while (qual_iter != qual_list.end()) { string &qual = (*qual_iter)->SetQual(); string &val = (*qual_iter)->SetVal(); if (qual == "product") { @@ -8265,7 +7657,7 @@ bool CNewCleanup_imp::x_FixMiscRNA(CSeq_feat& feat) CNewCleanup_imp::TranslateITSName(val); rna.SetExt().SetGen().SetProduct(val); any_change = true; - qual_iter = feat.SetQual().erase(qual_iter); + qual_iter = qual_list.erase(qual_iter); } else { ++qual_iter; @@ -8425,15 +7817,19 @@ void CNewCleanup_imp::RnaFeatBC ( seq_feat.ResetComment(); ChangeMade(CCleanupChange::eChangeITS); } - } else if (rna.GetExt().IsName()) { - if (IsInternalTranscribedSpacer(rna.GetExt().GetName()) || - TranslateITSName(rna.SetExt().SetName())) { - rna.SetExt().SetName(rna.GetExt().GetName()); - ChangeMade(CCleanupChange::eChangeITS); + } else { + auto& ext = rna.SetExt(); + if (ext.IsName()) { + if (IsInternalTranscribedSpacer(ext.GetName()) || + TranslateITSName(ext.SetName())) { + ext.SetName(ext.GetName()); + ChangeMade(CCleanupChange::eChangeITS); + } } - } else if (rna.GetExt().IsGen() && rna.GetExt().GetGen().IsSetProduct()) { - if (TranslateITSName(rna.SetExt().SetGen().SetProduct())) { - ChangeMade(CCleanupChange::eChangeITS); + else if (ext.IsGen() && ext.GetGen().IsSetProduct()) { + if (TranslateITSName(rna.SetExt().SetGen().SetProduct())) { + ChangeMade(CCleanupChange::eChangeITS); + } } } } @@ -8489,9 +7885,10 @@ void CNewCleanup_imp::RnaFeatBC ( { RESET_FIELD(seq_feat, Comment); ChangeMade(CCleanupChange::eChangeComment); - } else { - FOR_EACH_QUAL_ON_RNAGEN( qual_iter, gen ) { - const CRNA_qual &rna_qual = **qual_iter; + } else if (gen.IsSetQuals()) { + const auto& genquals = gen.GetQuals().Get(); + for (auto qual_iter : genquals) { + const CRNA_qual &rna_qual = *qual_iter; if( FIELD_EQUALS(rna_qual, Val, comment) ) { RESET_FIELD(seq_feat, Comment); ChangeMade(CCleanupChange::eChangeComment); @@ -8503,7 +7900,7 @@ void CNewCleanup_imp::RnaFeatBC ( // if not tRNA and ext is tRNA and tRNA is empty, remove ext.tRNA if (rna.IsSetType() && - (rna.GetType() == CRNA_ref::eType_mRNA || rna.GetType() == CRNA_ref::eType_rRNA || CRNA_ref::eType_tRNA) && + (rna.GetType() == CRNA_ref::eType_mRNA || rna.GetType() == CRNA_ref::eType_rRNA || rna.GetType() == CRNA_ref::eType_tRNA) && rna.IsSetExt() && rna.GetExt().IsTRNA() && s_IsEmpty(rna.GetExt().GetTRNA())) { rna.ResetExt(); @@ -8833,15 +8230,16 @@ void CNewCleanup_imp::x_MoveCdregionXrefsToProt (CCdregion& cds, CSeq_feat& seqf for (auto fit : ait->GetData().GetFtable()) { if (fit->IsSetData() && fit->GetData().GetSubtype() == CSeqFeatData::eSubtype_prot) { - auto xref = seqfeat.SetXref().begin(); - while (xref != seqfeat.SetXref().end()) { + auto& xref_list = seqfeat.SetXref(); + auto xref = xref_list.begin(); + while (xref != xref_list.end()) { if ((*xref)->IsSetData() && (*xref)->GetData().IsProt()) { CRef pfeat(const_cast(fit.GetPointer())); ProtrefBC(pfeat->SetData().SetProt()); ProtrefBC((*xref)->SetData().SetProt()); s_CopyProtXrefToProtFeat(pfeat->SetData().SetProt(), (*xref)->SetData().SetProt()); - xref = seqfeat.SetXref().erase(xref); + xref = xref_list.erase(xref); ChangeMade(CCleanupChange::eMoveToProtXref); } else { ++xref; @@ -8991,12 +8389,16 @@ void CNewCleanup_imp::x_PCRPrimerSetBC( CPCRPrimerSet &primer_set ) void CNewCleanup_imp::x_CopyGBBlockDivToOrgnameDiv( CSeq_entry &seq_entry) { + if (!seq_entry.IsSetDescr()) { + return; + } + auto& dset = seq_entry.SetDescr().Set(); // find the "org" and find the "genbank" COrgName *orgname = NULL; CGB_block *gb_block = NULL; - EDIT_EACH_SEQDESC_ON_SEQENTRY(desc_iter, seq_entry) { - CSeqdesc &desc = **desc_iter; + for (auto desc_iter : dset) { + CSeqdesc &desc = *desc_iter; if( FIELD_IS(desc, Genbank) ) { gb_block = &desc.SetGenbank(); @@ -9020,7 +8422,7 @@ void CNewCleanup_imp::x_CopyGBBlockDivToOrgnameDiv( CSeq_entry &seq_entry) void CNewCleanup_imp::x_AuthListBCWithFixInitials( CAuth_list& al ) { - AuthListBC( al, true ); + CCleanup::CleanupAuthList( al, true ); } void CNewCleanup_imp::x_PostProcessing(void) @@ -9138,8 +8540,9 @@ void CNewCleanup_imp::x_RemoveSingleStrand(CBioseq& bioseq) { // do not remove single-strandedness for patent sequences bool is_patent = false; - for (auto id = bioseq.GetId().begin(); id != bioseq.GetId().end(); id++) { - if ((*id)->IsPatent()) { + const auto& idset = bioseq.GetId(); + for (auto id : idset) { + if (id->IsPatent()) { is_patent = true; break; } @@ -9248,80 +8651,53 @@ void CNewCleanup_imp::x_SortSeqDescs( CSeq_entry & seq_entry ) } } -namespace { - // T can be CBioseq or CBioseq_set. - // Logic is basically the same for bioseq and bioseq-set, but since they - // don't share the right functions in the class inheritance hierarchy, we - // have to use templates instead of polymorphism. - - // returns true if change made - template - bool x_RemoveDupBioSourceImpl( T & obj ) - { - if( ! obj.IsSetDescr() || ! obj.SetDescr().IsSet() || - obj.SetDescr().Set().empty() ) - { - // nothing to remove - return false; - } - CSeq_descr::Tdata & descr_vec = obj.SetDescr().Set(); - // erase BioSources that are equal to a BioSource in some ancestor - // Bioseq-set - typedef vector TBioSrcIterVec; - TBioSrcIterVec sourcesToErase; - NON_CONST_ITERATE( CSeq_descr::Tdata, descr_iter, descr_vec ) { - if( ! (*descr_iter)->IsSource() ) { - continue; - } +void CNewCleanup_imp::x_RemoveDupBioSource( CBioseq & bioseq ) +{ +} - // climb the hierarchy looking for identical BioSource - bool bShouldEraseDescr = false; - CConstRef< CBioseq_set > pParent = obj.GetParentSet(); - for( ; pParent; pParent = pParent->GetParentSet() ) { - if( ! pParent->IsSetDescr() || ! pParent->GetDescr().IsSet() ) { - continue; - } - ITERATE( CSeq_descr::Tdata, parent_descr_iter, pParent->GetDescr().Get() ) { - if( ! (*parent_descr_iter)->IsSource() ) { - continue; - } - if( (*parent_descr_iter)->Equals(**descr_iter) ) { - bShouldEraseDescr = true; - break; - } - } - if( bShouldEraseDescr ) { - break; - } - } - if( bShouldEraseDescr ) { - sourcesToErase.push_back(descr_iter); +void CNewCleanup_imp::x_RemoveDupBioSource( CBioseq_set & bioseq_set ) +{ + if (!bioseq_set.IsSetDescr() || !bioseq_set.IsSetSeq_set()) { + return; + } + for (auto it : bioseq_set.GetDescr().Get()) { + if (it->IsSource()) { + for (auto se : bioseq_set.SetSeq_set()) { + x_RemoveDupBioSource(*se, it->GetSource()); } } - // erase the BioSources we've decided to erase - NON_CONST_ITERATE(TBioSrcIterVec, iter_iter, sourcesToErase) { - descr_vec.erase(*iter_iter); - } - return ! sourcesToErase.empty(); } } -void CNewCleanup_imp::x_RemoveDupBioSource( CBioseq & bioseq ) -{ - if( x_RemoveDupBioSourceImpl( bioseq ) ) { - ChangeMade( CCleanupChange::eRemoveDupBioSource ); - } -} -void CNewCleanup_imp::x_RemoveDupBioSource( CBioseq_set & bioseq_set ) +struct SMatchSrc { + const CBioSource& m_Src; + bool operator()(CRef desc) { + return (desc->IsSource() && desc->GetSource().Equals(m_Src)); + } +}; +void CNewCleanup_imp::x_RemoveDupBioSource(CSeq_entry& se, const CBioSource& src) { - if( x_RemoveDupBioSourceImpl( bioseq_set ) ) { - ChangeMade( CCleanupChange::eRemoveDupBioSource ); + if (se.IsSetDescr()) { + SMatchSrc matcher{ src }; + auto dset = se.SetDescr().Set(); + auto before = dset.size(); + dset.erase(std::remove_if(dset.begin(), dset.end(), matcher), dset.end()); + if (dset.size() != before) { + ChangeMade(CCleanupChange::eRemoveDupBioSource); + } + } + if (se.IsSet() && se.GetSet().IsSetSeq_set()) { + for (auto sub : se.SetSet().SetSeq_set()) { + x_RemoveDupBioSource(*sub, src); + } } } + + void CNewCleanup_imp::x_RemoveDupPubs(CSeq_descr & descr) { if (CCleanup::RemoveDuplicatePubs(descr)) { @@ -9336,9 +8712,10 @@ void CNewCleanup_imp::x_FixStructuredCommentKeywords( CBioseq & bioseq ) vector controlled_keywords = CComment_rule::GetKeywordList(); vector original_keywords; - if (beh.IsSetDescr()) { - CBioseq::TDescr::Tdata::iterator it = beh.SetDescr().Set().begin(); - while (it != beh.SetDescr().Set().end()) { + auto& dset = beh.SetDescr().Set(); + if (!dset.empty()) { + CBioseq::TDescr::Tdata::iterator it = dset.begin(); + while (it != dset.end()) { CSeqdesc& desc = **it; if (desc.Which() != CSeqdesc::e_Genbank) { ++it; @@ -9358,7 +8735,7 @@ void CNewCleanup_imp::x_FixStructuredCommentKeywords( CBioseq & bioseq ) gb_block.ResetKeywords(); } if (gb_block.IsEmpty()) { - it = beh.SetDescr().Set().erase(it); + it = dset.erase(it); } else { ++it; } @@ -9390,8 +8767,8 @@ void CNewCleanup_imp::x_FixStructuredCommentKeywords( CBioseq & bioseq ) vector final_keywords; if (new_keywords.size() > 0) { CGB_block *gb_block = NULL; - EDIT_EACH_SEQDESC_ON_BIOSEQ ( itr, bioseq ) { - CSeqdesc& desc = **itr; + for (auto itr : dset) { + CSeqdesc& desc = *itr; if ( desc.Which() != CSeqdesc::e_Genbank ) continue; gb_block = &desc.SetGenbank(); } @@ -9417,8 +8794,8 @@ void CNewCleanup_imp::x_FixStructuredCommentKeywords( CBioseq & bioseq ) if (!NStr::Equal(*orig_k, *final_k)) { any_change = true; } - orig_k++; - final_k++; + ++orig_k; + ++final_k; } if (orig_k != original_keywords.end() || final_k != final_keywords.end()) { any_change = true; @@ -9426,6 +8803,9 @@ void CNewCleanup_imp::x_FixStructuredCommentKeywords( CBioseq & bioseq ) if (any_change) { ChangeMade(CCleanupChange::eChangeKeywords); } + if (dset.empty()) { + beh.ResetDescr(); + } } void CNewCleanup_imp::x_RemoveProtDescThatDupsProtName( CProt_ref & prot ) @@ -9515,17 +8895,17 @@ unsigned char s_GetAaAsChar(const CTrna_ext& trna) switch (trna.GetAa().Which()) { case CTrna_ext::C_Aa::e_Iupacaa: str = trna.GetAa().GetIupacaa(); - CSeqConvert::Convert(str, CSeqUtil::e_Iupacaa, 0, str.size(), seqData, CSeqUtil::e_Ncbieaa); + CSeqConvert::Convert(str, CSeqUtil::e_Iupacaa, 0, (TSeqPos)str.size(), seqData, CSeqUtil::e_Ncbieaa); aa = seqData[0]; break; case CTrna_ext::C_Aa::e_Ncbi8aa: str = trna.GetAa().GetNcbi8aa(); - CSeqConvert::Convert(str, CSeqUtil::e_Ncbi8aa, 0, str.size(), seqData, CSeqUtil::e_Ncbieaa); + CSeqConvert::Convert(str, CSeqUtil::e_Ncbi8aa, 0, (TSeqPos)str.size(), seqData, CSeqUtil::e_Ncbieaa); aa = seqData[0]; break; case CTrna_ext::C_Aa::e_Ncbistdaa: str = trna.GetAa().GetNcbi8aa(); - CSeqConvert::Convert(str, CSeqUtil::e_Ncbistdaa, 0, str.size(), seqData, CSeqUtil::e_Ncbieaa); + CSeqConvert::Convert(str, CSeqUtil::e_Ncbistdaa, 0, (TSeqPos)str.size(), seqData, CSeqUtil::e_Ncbieaa); aa = seqData[0]; break; case CTrna_ext::C_Aa::e_Ncbieaa: @@ -9595,12 +8975,21 @@ static string s_Reverse(const string& str) } +static bool s_IsRealTrna(const CSeq_feat& seq_feat) +{ + if (!seq_feat.IsSetData()) { + return false; + } + const auto& fdata = seq_feat.GetData(); + return (fdata.GetSubtype() == CSeqFeatData::eSubtype_tRNA && + fdata.GetRna().IsSetExt() && + fdata.GetRna().GetExt().IsTRNA()); +} + + void CNewCleanup_imp::x_tRNACodonEC(CSeq_feat& seq_feat) { - if (!seq_feat.IsSetData() || - seq_feat.GetData().GetSubtype() != CSeqFeatData::eSubtype_tRNA || - !seq_feat.GetData().GetRna().IsSetExt() || - !seq_feat.GetData().GetRna().GetExt().IsTRNA()) { + if (!s_IsRealTrna(seq_feat)) { return; } CTrna_ext& trna = seq_feat.SetData().SetRna().SetExt().SetTRNA(); @@ -9674,10 +9063,7 @@ void CNewCleanup_imp::x_tRNACodonEC(CSeq_feat& seq_feat) void CNewCleanup_imp::x_tRNAEC(CSeq_feat& seq_feat) { - if (!seq_feat.IsSetData() || - seq_feat.GetData().GetSubtype() != CSeqFeatData::eSubtype_tRNA || - !seq_feat.GetData().GetRna().IsSetExt() || - !seq_feat.GetData().GetRna().GetExt().IsTRNA()) { + if (!s_IsRealTrna(seq_feat)) { return; } @@ -9719,28 +9105,41 @@ void CNewCleanup_imp::x_RemoveEmptyUserObject( CSeq_descr & seq_descr ) } -bool CNewCleanup_imp::s_ShouldRemoveKeyword(const string& keyword, CMolInfo::TTech tech) +// Helper for removing GenBank Block Keywords +struct SKeywordChecker { - if (NStr::Equal(keyword, "HTG")) { - return true; - } else if (tech == CMolInfo::eTech_htgs_0 && NStr::Equal(keyword, "HTGS_PHASE0")) { - return true; - } else if (tech == CMolInfo::eTech_htgs_1 && NStr::Equal(keyword, "HTGS_PHASE1")) { - return true; - } else if (tech == CMolInfo::eTech_htgs_2 && NStr::Equal(keyword, "HTGS_PHASE2")) { - return true; - } else if (tech == CMolInfo::eTech_htgs_3 && NStr::Equal(keyword, "HTGS_PHASE3")) { - return true; - } else if (tech == CMolInfo::eTech_est && NStr::Equal(keyword, "EST")) { - return true; - } else if (tech == CMolInfo::eTech_sts && NStr::Equal(keyword, "STS")) { - return true; - } else if (tech == CMolInfo::eTech_survey && NStr::Equal(keyword, "GSS")) { - return true; - } else { - return false; + CMolInfo::TTech m_Tech; + bool operator()(const string& keyword) + { + if (NStr::Equal(keyword, "HTG")) { + return true; + } + else if (m_Tech == CMolInfo::eTech_htgs_0 && NStr::Equal(keyword, "HTGS_PHASE0")) { + return true; + } + else if (m_Tech == CMolInfo::eTech_htgs_1 && NStr::Equal(keyword, "HTGS_PHASE1")) { + return true; + } + else if (m_Tech == CMolInfo::eTech_htgs_2 && NStr::Equal(keyword, "HTGS_PHASE2")) { + return true; + } + else if (m_Tech == CMolInfo::eTech_htgs_3 && NStr::Equal(keyword, "HTGS_PHASE3")) { + return true; + } + else if (m_Tech == CMolInfo::eTech_est && NStr::Equal(keyword, "EST")) { + return true; + } + else if (m_Tech == CMolInfo::eTech_sts && NStr::Equal(keyword, "STS")) { + return true; + } + else if (m_Tech == CMolInfo::eTech_survey && NStr::Equal(keyword, "GSS")) { + return true; + } + else { + return false; + } } -} +}; bool CNewCleanup_imp::x_CleanGenbankKeywords(CGB_block& blk, CMolInfo::TTech tech) @@ -9749,18 +9148,16 @@ bool CNewCleanup_imp::x_CleanGenbankKeywords(CGB_block& blk, CMolInfo::TTech tec return false; } bool any_change = false; - CGB_block::TKeywords::iterator it = blk.SetKeywords().begin(); - while (it != blk.SetKeywords().end()) { - if (s_ShouldRemoveKeyword(*it, tech)) { - it = blk.SetKeywords().erase(it); - any_change = true; - } else { - ++it; - } - } - if (blk.GetKeywords().empty()) { + auto& keywords = blk.SetKeywords(); + size_t orig = keywords.size(); + SKeywordChecker matcher{ tech }; + keywords.erase(std::remove_if(keywords.begin(), keywords.end(), matcher), keywords.end()); + + if (keywords.empty()) { blk.ResetKeywords(); any_change = true; + } else if (keywords.size() != orig) { + any_change = true; } return any_change; } @@ -9830,21 +9227,22 @@ void CNewCleanup_imp::x_CleanupGenbankBlock(CBioseq_set& set) if (!set.IsSetDescr()) { return; } + auto& dset = set.SetDescr().Set(); CConstRef biosrc(NULL); CMolInfo::TTech tech = CMolInfo::eTech_unknown; - ITERATE(CBioseq_set::TDescr::Tdata, it, set.GetDescr().Get()) { - if ((*it)->IsSource()) { - biosrc.Reset(&((*it)->GetSource())); - } else if ((*it)->IsMolinfo() && - (*it)->GetMolinfo().IsSetTech()) { - tech = (*it)->GetMolinfo().GetTech(); + for (auto it : dset) { + if (it->IsSource()) { + biosrc.Reset(&(it->GetSource())); + } else if (it->IsMolinfo() && + it->GetMolinfo().IsSetTech()) { + tech = it->GetMolinfo().GetTech(); } } - EDIT_EACH_SEQDESC_ON_SEQDESCR(descr_iter, set.SetDescr()) { - CSeqdesc &desc = **descr_iter; + for (auto descr_iter : dset) { + CSeqdesc &desc = *descr_iter; if (!FIELD_IS(desc, Genbank)) { continue; } @@ -9858,8 +9256,15 @@ void CNewCleanup_imp::x_CleanupGenbankBlock(CBioseq_set& set) string s_GetDiv(const CBioSource& src) { - if (src.IsSetOrg() && src.GetOrg().IsSetOrgname() && - src.GetOrg().GetOrgname().IsSetDiv()) { + if (!src.IsSetOrg()) { + return kEmptyCStr; + } + const auto& org = src.GetOrg(); + if (!org.IsSetOrgname()) { + return kEmptyStr; + } + const auto& orgname = org.GetOrgname(); + if (orgname.IsSetDiv()) { return src.GetOrg().GetOrgname().GetDiv(); } else { return kEmptyCStr; @@ -9869,8 +9274,15 @@ string s_GetDiv(const CBioSource& src) void RemoveStrain(string& src, const CBioSource& biosrc) { - if (!biosrc.IsSetOrg() || !biosrc.GetOrg().IsSetOrgname() || - !biosrc.GetOrg().GetOrgname().IsSetMod()) { + if (!biosrc.IsSetOrg()) { + return; + } + const auto& org = biosrc.GetOrg(); + if (!org.IsSetOrgname()) { + return; + } + const auto& orgname = org.GetOrgname(); + if (!orgname.IsSetMod()) { return; } size_t pos = NStr::Find(src, "(strain "); @@ -9878,20 +9290,15 @@ void RemoveStrain(string& src, const CBioSource& biosrc) return; } - ITERATE(COrgName::TMod, it, biosrc.GetOrg().GetOrgname().GetMod()) { - if ((*it)->IsSetSubtype() && - (*it)->GetSubtype() == COrgMod::eSubtype_strain && - (*it)->IsSetSubname()) { - const string& strain = (*it)->GetSubname(); - size_t expected_len = 9 + strain.length(); - if (src.length() >= pos + expected_len) { - string compare = src.substr(pos, expected_len); - string expected = "(strain " + strain + ")"; - if (NStr::Equal(compare, expected)) { - src = src.substr(0, pos - 1) + src.substr(pos + expected_len); - NStr::ReplaceInPlace(src, " ", " "); - } - } + for (auto it : orgname.GetMod()) { + if (it->IsSetSubtype() && + it->GetSubtype() == COrgMod::eSubtype_strain && + it->IsSetSubname()) { + const string& strain = it->GetSubname(); + string expected = "(strain " + strain + ")"; + NStr::ReplaceInPlace(src, expected, ""); + NStr::ReplaceInPlace(src, " ", " "); + NStr::TruncateSpacesInPlace(src); } } } @@ -9912,22 +9319,25 @@ bool CNewCleanup_imp::x_CanRemoveGenbankBlockSource(const string& src, const CBi RemoveStrain(compare, biosrc); if (biosrc.IsSetOrg()) { - if (biosrc.GetOrg().IsSetTaxname() && - NStr::Equal(compare, biosrc.GetOrg().GetTaxname())) { + const auto& org = biosrc.GetOrg(); + if (org.IsSetTaxname() && + NStr::Equal(compare, org.GetTaxname())) { return true; } - if (biosrc.GetOrg().IsSetCommon() && - NStr::Equal(compare, biosrc.GetOrg().GetCommon())) { + if (org.IsSetCommon() && + NStr::Equal(compare, org.GetCommon())) { return true; } - if (biosrc.GetOrg().IsSetOrgname() && - biosrc.GetOrg().GetOrgname().IsSetMod()) { - ITERATE(COrgName::TMod, m, biosrc.GetOrg().GetOrgname().GetMod()) { - if ((*m)->IsSetSubtype() && - (*m)->GetSubtype() == COrgMod::eSubtype_old_name && - (*m)->IsSetSubname() && - NStr::Equal((*m)->GetSubname(), compare)) { - return true; + if (org.IsSetOrgname()) { + const auto& orgname = org.GetOrgname(); + if (orgname.IsSetMod()) { + for (auto m : orgname.GetMod()) { + if (m->IsSetSubtype() && + m->GetSubtype() == COrgMod::eSubtype_old_name && + m->IsSetSubname() && + NStr::Equal(m->GetSubname(), compare)) { + return true; + } } } } @@ -9953,12 +9363,12 @@ void CNewCleanup_imp::x_CleanupGenbankBlock(CGB_block& gb, bool is_patent, CCons gb.ResetDiv(); ChangeMade(CCleanupChange::eChangeOther); } - } else if (s_ShouldRemoveKeyword(gb.GetDiv(), tech)) { + } + else if (SKeywordChecker{ tech }(gb.GetDiv())) { gb.ResetDiv(); ChangeMade(CCleanupChange::eChangeOther); } - } - if (gb.IsSetSource() && biosrc && x_CanRemoveGenbankBlockSource(gb.GetSource(), *biosrc)) { + } if (gb.IsSetSource() && biosrc && x_CanRemoveGenbankBlockSource(gb.GetSource(), *biosrc)) { gb.ResetSource(); ChangeMade(CCleanupChange::eChangeOther); } @@ -10383,9 +9793,7 @@ bool CNewCleanup_imp::x_CleanEmptyProt(CProt_ref& prot) bool CNewCleanup_imp::x_ShouldRemoveEmptyProt(const CProt_ref& prot) { - if (prot.IsSetProcessed() && - (prot.GetProcessed() == CProt_ref::eProcessed_signal_peptide || - prot.GetProcessed() == CProt_ref::eProcessed_transit_peptide)) { + if (prot.IsSetProcessed() && prot.GetProcessed() != CProt_ref::eProcessed_not_set) { return false; } @@ -10404,9 +9812,16 @@ bool CNewCleanup_imp::x_ShouldRemoveEmptyProt(const CProt_ref& prot) // if bond is other and comment can be used to set bond type, do so. void CNewCleanup_imp::x_BondEC(CSeq_feat& feat) { - if (feat.GetData().IsImp() && - feat.GetData().GetImp().IsSetKey() && - NStr::Equal(feat.GetData().GetImp().GetKey(), "misc_feature") && + if (!feat.IsSetData()) { + return; + } + const auto& fdata = feat.GetData(); + if (!fdata.IsImp()) { + return; + } + const auto& imp = fdata.GetImp(); + if (imp.IsSetKey() && + NStr::Equal(imp.GetKey(), "misc_feature") && feat.IsSetComment() && NStr::EndsWith(feat.GetComment(), " bond")) { string bond_type = feat.GetComment().substr(0, feat.GetComment().length() - 5); @@ -10425,7 +9840,8 @@ bool HasAuthor(const CAuthor& author) return false; } if (author.GetName().IsName()) { - if (!author.GetName().GetName().IsSetLast() || NStr::IsBlank(author.GetName().GetName().GetLast())) { + const auto& aname = author.GetName().GetName(); + if (!aname.IsSetLast() || NStr::IsBlank(aname.GetLast())) { return false; } else { return true; @@ -10447,11 +9863,12 @@ bool HasAuthor(const CAuth_list& auth_list) return false; } if (auth_list.GetNames().IsStd()) { - ITERATE (CAuth_list::TNames::TStd, it, auth_list.GetNames().GetStd()) { - if (!(*it)->CanGetName()) { + const auto& stdauth = auth_list.GetNames().GetStd(); + for (auto it : stdauth) { + if (!it->CanGetName()) { continue; } - const CPerson_id& pid = (*it)->GetName(); + const CPerson_id& pid = it->GetName(); if (pid.IsName() || pid.IsMl() || pid.IsStr() || pid.IsConsortium()) { has_name = true; break; @@ -10479,16 +9896,17 @@ bool HasAuthor(const CPubdesc& pub, bool strict) } bool any_authors = false; - ITERATE(CPubdesc::TPub::Tdata, it, pub.GetPub().Get()) { - if ((*it)->IsPatent()) { + const auto& pubset = pub.GetPub().Get(); + for (auto it : pubset) { + if (it->IsPatent()) { if (!strict) { // if patent and not strict, just patent is ok return true; } } - if ((*it)->IsSetAuthors()) { + if (it->IsSetAuthors()) { any_authors = true; - if (HasAuthor((*it)->GetAuthors())) { + if (HasAuthor(it->GetAuthors())) { return true; } } @@ -10521,7 +9939,7 @@ bool IsMinimal(const CCit_gen& gen) #define CHECK_TITLE(Title_type) \ case CTitle::C_E::e_##Title_type: \ - if (!NStr::IsBlank((*title)->Get##Title_type())) { \ + if (!NStr::IsBlank(title->Get##Title_type())) { \ has_title = true; \ } \ break; @@ -10537,9 +9955,11 @@ bool CNewCleanup_imp::x_IsPubContentBad(const CId_pat& pat) } if (!pat.IsSetId()) { return false; - } else if (pat.GetId().IsApp_number() && !NStr::IsBlank(pat.GetId().GetApp_number())) { + } + const auto& id = pat.GetId(); + if (id.IsApp_number() && !NStr::IsBlank(id.GetApp_number())) { return false; - } else if (pat.GetId().IsNumber() && !NStr::IsBlank(pat.GetId().GetNumber())) { + } else if (id.IsNumber() && !NStr::IsBlank(id.GetNumber())) { return false; } @@ -10566,7 +9986,11 @@ bool CNewCleanup_imp::x_IsPubContentBad(const CPub& pub) bool CNewCleanup_imp::x_IsPubContentBad(const CPubdesc& pub, bool strict) { // remove if no pubs at all - if (!pub.IsSetPub() || pub.GetPub().Get().empty()) { + if (!pub.IsSetPub()) { + return true; + } + const auto& pubset = pub.GetPub().Get(); + if (pubset.empty()) { return true; } @@ -10581,24 +10005,25 @@ bool CNewCleanup_imp::x_IsPubContentBad(const CPubdesc& pub, bool strict) } // remove if only one pub and that pub is bad - if (pub.IsSetPub() && pub.GetPub().Get().size() == 1 && + if (pubset.size() == 1 && x_IsPubContentBad(*(pub.GetPub().Get().front()))) { return true; } - ITERATE(CPubdesc::TPub::Tdata, it, pub.GetPub().Get()) { - if ((*it)->IsArticle()) { + for (auto it : pubset) { + if (it->IsArticle()) { // all CitArt from journal must have journal title and imprint - const CCit_art& art = (*it)->GetArticle(); + const CCit_art& art = it->GetArticle(); if (art.IsSetFrom() && art.GetFrom().IsJournal()) { const CCit_jour& jour = art.GetFrom().GetJournal(); if (!jour.IsSetImp()) { return true; } bool has_title = false; - if (jour.IsSetTitle() && !jour.GetTitle().Get().empty()) { - ITERATE(CCit_jour::TTitle::Tdata, title, jour.GetTitle().Get()) { - switch ((*title)->Which()) { + if (jour.IsSetTitle()) { + const auto& titles = jour.GetTitle().Get(); + for (auto title : titles) { + switch (title->Which()) { CHECK_TITLE(Name) CHECK_TITLE(Tsub) CHECK_TITLE(Trans) @@ -10698,8 +10123,9 @@ void CNewCleanup_imp::x_RemoveEmptyFeatures( CSeq_annot & seq_annot ) bool any_erasures = true; while (any_erasures) { any_erasures = false; - CSeq_annot::C_Data::TFtable::iterator it = seq_annot.SetData().SetFtable().begin(); - while (it != seq_annot.SetData().SetFtable().end()) { + auto& ftable = seq_annot.SetData().SetFtable(); + CSeq_annot::C_Data::TFtable::iterator it = ftable.begin(); + while (it != ftable.end()) { CRef editable(new CSeq_feat()); editable->Assign(**it); bool changed = x_CleanEmptyFeature(*editable); @@ -10746,8 +10172,9 @@ bool s_RetainEmptyAnnot(const CSeq_annot& annot) if (!annot.IsSetDesc()) { return false; } - ITERATE(CSeq_annot::TDesc::Tdata, it, annot.GetDesc().Get()) { - if ((*it)->IsUser() && s_IsGenomeAnnotationStart((*it)->GetUser())) { + const auto& adesc = annot.GetDesc().Get(); + for (auto it : adesc) { + if (it->IsUser() && s_IsGenomeAnnotationStart(it->GetUser())) { return true; } } @@ -10958,28 +10385,30 @@ void CNewCleanup_imp::x_RemoveOldFeatures(CBioseq & bioseq) any_erasures = false; CFeat_CI f(bh); while (f) { - if (f->IsSetData() && - (f->GetData().IsOrg() || - (f->GetData().IsImp() && f->GetData().GetImp().IsSetKey() && - NStr::Equal(f->GetData().GetImp().GetKey(), "source")))) { - if (src) { - // remove import source features if source descriptor already present - CSeq_feat_Handle fh(*f); - CSeq_feat_EditHandle eh(fh); - eh.Remove(); - any_erasures = true; - ChangeMade(CCleanupChange::eRemoveFeat); - break; - } else { - // convert imp-source feature to biosource - CRef bsrc = BioSourceFromImpFeat(*(f->GetSeq_feat())); - if (bsrc) { - BiosourceBC(*bsrc); - CRef d(new CSeqdesc()); - d->SetSource().Assign(*bsrc); - CBioseq_EditHandle eh(bh); - eh.SetDescr().Set().push_back(d); - ChangeMade(CCleanupChange::eAddDescriptor); + if (f->IsSetData()) { + const auto& fdata = f->GetData(); + if (fdata.IsOrg() || + (fdata.IsImp() && fdata.GetImp().IsSetKey() && + NStr::Equal(fdata.GetImp().GetKey(), "source"))) { + if (src) { + // remove import source features if source descriptor already present + CSeq_feat_Handle fh(*f); + CSeq_feat_EditHandle eh(fh); + eh.Remove(); + any_erasures = true; + ChangeMade(CCleanupChange::eRemoveFeat); + break; + } else { + // convert imp-source feature to biosource + CRef bsrc = BioSourceFromImpFeat(*(f->GetSeq_feat())); + if (bsrc) { + BiosourceBC(*bsrc); + CRef d(new CSeqdesc()); + d->SetSource().Assign(*bsrc); + CBioseq_EditHandle eh(bh); + eh.SetDescr().Set().push_back(d); + ChangeMade(CCleanupChange::eAddDescriptor); + } } } } @@ -11008,13 +10437,17 @@ void CNewCleanup_imp::x_ChangePopToPhy(CBioseq_set& bioseq_set) // or feature. CSeqdesc_CI d(bsh, CSeqdesc::e_Source); if (d) { - if (d->GetSource().IsSetOrg() && d->GetSource().GetOrg().IsSetTaxname()) { - taxname = d->GetSource().GetOrg().GetTaxname(); + const CBioSource& src = d->GetSource(); + if (src.IsSetOrg() && src.GetOrg().IsSetTaxname()) { + taxname = src.GetOrg().GetTaxname(); } } else { CFeat_CI f(bsh, CSeqFeatData::e_Biosrc); - if (f && f->GetData().GetBiosrc().IsSetOrg() && f->GetData().GetBiosrc().GetOrg().IsSetTaxname()) { - taxname = f->GetData().GetBiosrc().GetOrg().GetTaxname(); + if (f) { + const CBioSource& src = f->GetData().GetBiosrc(); + if (src.IsSetOrg() && src.GetOrg().IsSetTaxname()) { + taxname = src.GetOrg().GetTaxname(); + } } } @@ -11085,41 +10518,38 @@ bool IsPubInSet(const CSeq_descr& descr, const CPubdesc& pub) } -void CNewCleanup_imp::x_RemovePub(CSeq_entry& se, const CPubdesc& pub) -{ - if (se.IsSeq()) { - x_RemovePub(se.SetSeq(), pub); - } else if (se.IsSet()) { - x_RemovePub(se.SetSet(), pub); - } -} - - -void CNewCleanup_imp::x_RemovePub(CBioseq& seq, const CPubdesc& pub) -{ - if (seq.IsSetDescr()) { - x_RemovePub(seq.SetDescr(), pub); - } -} - - -void CNewCleanup_imp::x_RemovePub(CBioseq_set& set, const CPubdesc& pub) +struct SPubMatch { - if (set.IsSetDescr()) { - x_RemovePub(set.SetDescr(), pub); + const CPubdesc& m_Pub; + bool operator()(CRef dsc) + { + return (dsc && dsc->IsPub() && dsc->GetPub().Equals(m_Pub)); } -} +}; -void CNewCleanup_imp::x_RemovePub(CSeq_descr& descr, const CPubdesc& pub) +void CNewCleanup_imp::x_RemovePub(CSeq_entry& se, const CPubdesc& pub) { - CSeq_descr::Tdata::iterator it = descr.Set().begin(); - while (it != descr.Set().end()) { - if ((*it)->IsPub() && (*it)->GetPub().Equals(pub)) { - it = descr.Set().erase(it); - ChangeMade(CCleanupChange::eRemoveDescriptor); - } else { - ++it; + SPubMatch matcher{ pub }; + if (se.IsSeq()) { + CBioseq& seq = se.SetSeq(); + if (seq.IsSetDescr()) { + auto& dset = seq.SetDescr().Set(); + size_t before = dset.size(); + dset.erase(std::remove_if(dset.begin(), dset.end(), matcher), dset.end()); + if (dset.size() != before) { + ChangeMade(CCleanupChange::eRemoveDescriptor); + } + } + } else if (se.IsSet()) { + CBioseq_set& set = se.SetSet(); + if (set.IsSetDescr()) { + auto& dset = set.SetDescr().Set(); + size_t before = dset.size(); + dset.erase(std::remove_if(dset.begin(), dset.end(), matcher), dset.end()); + if (dset.size() != before) { + ChangeMade(CCleanupChange::eRemoveDescriptor); + } } } } @@ -11156,8 +10586,10 @@ void CNewCleanup_imp::x_MovePopPhyMutPub(CBioseq_set& bioseq_set) vector > pubs_to_remove; - ITERATE(CBioseq::TDescr::Tdata, d, (*first)->GetDescr().Get()) { - if ((*d)->IsPub()) { + const auto& dset = (*first)->GetDescr().Get(); + + for (auto d : dset) { + if (d->IsPub()) { bool found_every_match = true; CBioseq_set::TSeq_set::const_iterator other = first; ++other; @@ -11167,23 +10599,23 @@ void CNewCleanup_imp::x_MovePopPhyMutPub(CBioseq_set& bioseq_set) break; } - if (!IsPubInSet((*other)->GetDescr(), (*d)->GetPub())) { + if (!IsPubInSet((*other)->GetDescr(), d->GetPub())) { found_every_match = false; break; } ++other; } if (found_every_match) { - if (!bioseq_set.IsSetDescr() || !IsPubInSet(bioseq_set.GetDescr(), (*d)->GetPub())) { + if (!bioseq_set.IsSetDescr() || !IsPubInSet(bioseq_set.GetDescr(), d->GetPub())) { // copy pub to parent CRef new_pub(new CSeqdesc()); - new_pub->Assign(**d); + new_pub->Assign(*d); bioseq_set.SetDescr().Set().push_back(new_pub); ChangeMade(CCleanupChange::eAddDescriptor); } // remove from children CRef pub_cpy(new CPubdesc()); - pub_cpy->Assign((*d)->GetPub()); + pub_cpy->Assign(d->GetPub()); pubs_to_remove.push_back(pub_cpy); } } @@ -11212,36 +10644,45 @@ namespace { void CNewCleanup_imp::x_CollapseSet(CBioseq_set& bioseq_set) { - if (bioseq_set.IsSetSeq_set() && bioseq_set.GetSeq_set().size() == 1) { + if (!bioseq_set.IsSetSeq_set()) { + return; + } + const auto& seqset = bioseq_set.GetSeq_set(); + if (seqset.size() != 1) { + return; + } + + const auto& only = seqset.front(); + + if (only->IsSet()) { CBioseq_set_EditHandle p = m_Scope->GetBioseq_setEditHandle(bioseq_set); - CSeq_entry_Handle ch = m_Scope->GetSeq_entryHandle(*(bioseq_set.GetSeq_set().front())); - if (bioseq_set.GetSeq_set().front()->IsSet()) { - const CBioseq_set& child = bioseq_set.GetSeq_set().front()->GetSet(); - if (child.IsSetAnnot()) { - while (!child.GetAnnot().empty()) { - CSeq_annot_Handle ah = m_Scope->GetSeq_annotHandle(*(child.GetAnnot().front())); - CSeq_annot_EditHandle eh = ah.GetEditHandle(); - p.TakeAnnot(eh); - } - } - if (child.IsSetDescr()) { - ITERATE(CBioseq_set::TDescr::Tdata, it, child.GetDescr().Get()) { - CRef cpy(new CSeqdesc()); - cpy->Assign(**it); - p.AddSeqdesc(*cpy); - } + CSeq_entry_Handle ch = m_Scope->GetSeq_entryHandle(*only); + const CBioseq_set& child = bioseq_set.GetSeq_set().front()->GetSet(); + if (child.IsSetAnnot()) { + while (!child.GetAnnot().empty()) { + CSeq_annot_Handle ah = m_Scope->GetSeq_annotHandle(*(child.GetAnnot().front())); + CSeq_annot_EditHandle eh = ah.GetEditHandle(); + p.TakeAnnot(eh); } - if (child.IsSetSeq_set()) { - while (!child.GetSeq_set().empty()) { - CSeq_entry_Handle h = m_Scope->GetSeq_entryHandle(*(child.GetSeq_set().front())); - CSeq_entry_EditHandle eh = h.GetEditHandle(); - p.TakeEntry(eh); - } + } + if (child.IsSetDescr()) { + const auto& cdset = child.GetDescr().Get(); + for (auto it : cdset) { + CRef cpy(new CSeqdesc()); + cpy->Assign(*it); + p.AddSeqdesc(*cpy); + } + } + if (child.IsSetSeq_set()) { + while (!child.GetSeq_set().empty()) { + CSeq_entry_Handle h = m_Scope->GetSeq_entryHandle(*(child.GetSeq_set().front())); + CSeq_entry_EditHandle eh = h.GetEditHandle(); + p.TakeEntry(eh); } - CSeq_entry_EditHandle ech = ch.GetEditHandle(); - ech.Remove(); - ChangeMade(CCleanupChange::eCollapseSet); } + CSeq_entry_EditHandle ech = ch.GetEditHandle(); + ech.Remove(); + ChangeMade(CCleanupChange::eCollapseSet); } } @@ -11251,8 +10692,9 @@ void CNewCleanup_imp::x_RemovePopPhyBioSource(CBioseq_set& set) if (!set.IsSetDescr()) { return; } - CBioseq_set::TDescr::Tdata::iterator d = set.SetDescr().Set().begin(); - while (d != set.SetDescr().Set().end()) { + auto& dset = set.SetDescr().Set(); + CBioseq_set::TDescr::Tdata::iterator d = dset.begin(); + while (d != dset.end()) { if ((*d)->IsSource()) { //propagate down if ((*d)->GetSource().IsSetOrg() && @@ -11266,7 +10708,7 @@ void CNewCleanup_imp::x_RemovePopPhyBioSource(CBioseq_set& set) } } } - d = set.SetDescr().Set().erase(d); + d = dset.erase(d); ChangeMade(CCleanupChange::eRemoveDescriptor); } else { ++d; @@ -11276,16 +10718,44 @@ void CNewCleanup_imp::x_RemovePopPhyBioSource(CBioseq_set& set) } +static bool s_HasDescriptorOfType(const CSeq_descr::Tdata& dset, CSeqdesc::E_Choice dtype) +{ + for (auto d : dset) { + if (d->Which() == dtype) { + return true; + } + } + return false; +} + + +static bool s_HasDescriptorOfType(const CBioseq_set& set, CSeqdesc::E_Choice dtype) +{ + bool rval = false; + if (set.IsSetDescr()) { + rval = s_HasDescriptorOfType(set.GetDescr().Get(), dtype); + } + return rval; +} + + +static bool s_HasDescriptorOfType(const CBioseq& seq, CSeqdesc::E_Choice dtype) +{ + bool rval = false; + if (seq.IsSetDescr()) { + rval = s_HasDescriptorOfType(seq.GetDescr().Get(), dtype); + } + return rval; +} + + void CNewCleanup_imp::x_RemovePopPhyBioSource(CBioseq_set& set, const COrg_ref& org) { // bail if already have source descriptor - if (set.IsSetDescr()) { - ITERATE(CBioseq_set::TDescr::Tdata, d, set.GetDescr().Get()) { - if ((*d)->IsSource()) { - return; - } - } + if (s_HasDescriptorOfType(set, CSeqdesc::e_Source)) { + return; } + CRef src(new CSeqdesc()); if (org.IsSetTaxname()) { src->SetSource().SetOrg().SetTaxname(org.GetTaxname()); @@ -11301,13 +10771,10 @@ void CNewCleanup_imp::x_RemovePopPhyBioSource(CBioseq_set& set, const COrg_ref& void CNewCleanup_imp::x_RemovePopPhyBioSource(CBioseq& seq, const COrg_ref& org) { // bail if already have source descriptor - if (seq.IsSetDescr()) { - ITERATE(CBioseq_set::TDescr::Tdata, d, seq.GetDescr().Get()) { - if ((*d)->IsSource()) { - return; - } - } + if (s_HasDescriptorOfType(seq, CSeqdesc::e_Source)) { + return; } + CRef src(new CSeqdesc()); if (org.IsSetTaxname()) { src->SetSource().SetOrg().SetTaxname(org.GetTaxname()); @@ -11325,8 +10792,9 @@ void CNewCleanup_imp::x_RemovePopPhyMolInfo(CBioseq_set& set) if (!set.IsSetDescr()) { return; } - CBioseq_set::TDescr::Tdata::iterator d = set.SetDescr().Set().begin(); - while (d != set.SetDescr().Set().end()) { + auto& dset = set.SetDescr().Set(); + CBioseq_set::TDescr::Tdata::iterator d = dset.begin(); + while (d != dset.end()) { if ((*d)->IsMolinfo()) { //propagate down NON_CONST_ITERATE(CBioseq_set::TSeq_set, s, set.SetSeq_set()) { @@ -11336,7 +10804,7 @@ void CNewCleanup_imp::x_RemovePopPhyMolInfo(CBioseq_set& set) x_RemovePopPhyMolInfo((*s)->SetSeq(), (*d)->GetMolinfo()); } } - d = set.SetDescr().Set().erase(d); + d = dset.erase(d); ChangeMade(CCleanupChange::eRemoveDescriptor); } else { ++d; @@ -11348,14 +10816,11 @@ void CNewCleanup_imp::x_RemovePopPhyMolInfo(CBioseq_set& set) void CNewCleanup_imp::x_RemovePopPhyMolInfo(CBioseq_set& set, const CMolInfo& mol) { - // bail if already have source descriptor - if (set.IsSetDescr()) { - ITERATE(CBioseq_set::TDescr::Tdata, d, set.GetDescr().Get()) { - if ((*d)->IsMolinfo()) { - return; - } - } + // bail if already have molinfo descriptor + if (s_HasDescriptorOfType(set, CSeqdesc::e_Molinfo)) { + return; } + CRef mi(new CSeqdesc()); mi->SetMolinfo().Assign(mol); set.SetDescr().Set().push_back(mi); @@ -11366,13 +10831,10 @@ void CNewCleanup_imp::x_RemovePopPhyMolInfo(CBioseq_set& set, const CMolInfo& mo void CNewCleanup_imp::x_RemovePopPhyMolInfo(CBioseq& seq, const CMolInfo& mol) { // bail if already have MolInfo descriptor - if (seq.IsSetDescr()) { - ITERATE(CBioseq_set::TDescr::Tdata, d, seq.GetDescr().Get()) { - if ((*d)->IsMolinfo()) { - return; - } - } + if (s_HasDescriptorOfType(seq, CSeqdesc::e_Molinfo)) { + return; } + CRef mi(new CSeqdesc()); mi->SetMolinfo().Assign(mol); seq.SetDescr().Set().push_back(mi); @@ -11385,10 +10847,11 @@ void CNewCleanup_imp::x_MoveNPTitle(CBioseq_set& set) if (!set.IsSetDescr() || !set.IsSetSeq_set()) { return; } + const auto& dset = set.GetDescr().Get(); CConstRef set_title(NULL); - ITERATE(CSeq_descr::Tdata, d, set.GetDescr().Get()) { - if ((*d)->IsTitle()) { - set_title = *d; + for (auto d : dset) { + if (d->IsTitle()) { + set_title = d; } } if (!set_title) { @@ -11397,8 +10860,9 @@ void CNewCleanup_imp::x_MoveNPTitle(CBioseq_set& set) bool have_nuc_title = false; ITERATE(CBioseq_set::TSeq_set, it, set.GetSeq_set()) { if ((*it)->IsSeq() && (*it)->GetSeq().IsNa()) { - ITERATE(CSeq_descr::Tdata, d, (*it)->GetSeq().GetDescr().Get()) { - if ((*d)->IsTitle()) { + const auto& idset = (*it)->GetSeq().GetDescr().Get(); + for (auto d : idset) { + if (d->IsTitle()) { have_nuc_title = true; break; } @@ -11450,11 +10914,13 @@ void CNewCleanup_imp::x_BioseqSetNucProtEC(CBioseq_set & bioseq_set) void CNewCleanup_imp::x_RemoveNestedGenBankSet(CBioseq_set & bioseq_set) { if (bioseq_set.IsSetSeq_set() && bioseq_set.GetSeq_set().size() == 1 && - bioseq_set.GetSeq_set().front()->IsSet() && - bioseq_set.GetSeq_set().front()->GetSet().IsSetClass() && - bioseq_set.GetSeq_set().front()->GetSet().GetClass() == CBioseq_set::eClass_genbank) { - if (bioseq_set.GetParentSet() != NULL || !m_KeepTopNestedSet) - x_CollapseSet(bioseq_set); + bioseq_set.GetSeq_set().front()->IsSet()) { + const auto& inner_set = bioseq_set.GetSeq_set().front()->GetSet(); + if (inner_set.IsSetClass() && + inner_set.GetClass() == CBioseq_set::eClass_genbank && + (bioseq_set.GetParentSet() != NULL || !m_KeepTopNestedSet)) { + x_CollapseSet(bioseq_set); + } } } @@ -11464,11 +10930,13 @@ void CNewCleanup_imp::x_RemoveNestedNucProtSet(CBioseq_set & bioseq_set) { if (bioseq_set.IsSetClass() && bioseq_set.GetClass() == CBioseq_set::eClass_nuc_prot && - bioseq_set.IsSetSeq_set() && bioseq_set.GetSeq_set().size() == 1 && - bioseq_set.GetSeq_set().front()->IsSet() && - bioseq_set.GetSeq_set().front()->GetSet().IsSetClass() && - bioseq_set.GetSeq_set().front()->GetSet().GetClass() == CBioseq_set::eClass_nuc_prot) { - x_CollapseSet(bioseq_set); + bioseq_set.IsSetSeq_set() && bioseq_set.GetSeq_set().size() == 1 && + bioseq_set.GetSeq_set().front()->IsSet()) { + const auto& inner_set = bioseq_set.GetSeq_set().front()->GetSet(); + if (inner_set.IsSetClass() && + inner_set.GetClass() == CBioseq_set::eClass_nuc_prot) { + x_CollapseSet(bioseq_set); + } } } @@ -11479,9 +10947,10 @@ void CNewCleanup_imp::x_BioseqSetGenBankEC(CBioseq_set & bioseq_set) // clean up nested GenBank sets x_RemoveNestedGenBankSet(bioseq_set); //propagate source descriptors to set components - if (bioseq_set.IsSetDescr() && bioseq_set.IsSetSeq_set() && !bioseq_set.GetSeq_set().empty()) { - CBioseq_set::TDescr::Tdata::iterator it = bioseq_set.SetDescr().Set().begin(); - while (it != bioseq_set.SetDescr().Set().end()) { + if (bioseq_set.IsSetDescr() && bioseq_set.IsSetSeq_set() && !bioseq_set.GetSeq_set().empty()) { + auto& dset = bioseq_set.SetDescr().Set(); + CBioseq_set::TDescr::Tdata::iterator it = dset.begin(); + while (it != dset.end()) { if ((*it)->IsSource()) { NON_CONST_ITERATE(CBioseq_set::TSeq_set, s, bioseq_set.SetSeq_set()) { CRef cpy(new CSeqdesc()); @@ -11492,14 +10961,14 @@ void CNewCleanup_imp::x_BioseqSetGenBankEC(CBioseq_set & bioseq_set) (*s)->SetSet().SetDescr().Set().push_back(cpy); } } - it = bioseq_set.SetDescr().Set().erase(it); + it = dset.erase(it); ChangeMade(CCleanupChange::eAddDescriptor); ChangeMade(CCleanupChange::eRemoveDescriptor); } else { ++it; } } - if (bioseq_set.SetDescr().Set().empty()) { + if (dset.empty()) { bioseq_set.ResetDescr(); } } @@ -11514,9 +10983,12 @@ void CNewCleanup_imp::x_MoveNpDBlinks(CBioseq_set& bioseq_set) // (identical DBLinks count as one) // bail if there is a DBLinkDesc on the bioseq_set itself - FOR_EACH_SEQDESC_ON_SEQSET(desc_it, bioseq_set) { - if( x_IsDBLinkUserObj(**desc_it) ) { - return; + if (bioseq_set.IsSetDescr()) { + const auto& dset = bioseq_set.GetDescr().Get(); + for (auto desc_it : dset) { + if (x_IsDBLinkUserObj(*desc_it)) { + return; + } } } @@ -11527,35 +10999,41 @@ void CNewCleanup_imp::x_MoveNpDBlinks(CBioseq_set& bioseq_set) VISIT_ALL_SEQENTRYS_WITHIN_SEQSET( entry_it, bioseq_set ) { CRef pEntry( & const_cast(*entry_it) ); - EDIT_EACH_SEQDESC_ON_SEQENTRY(desc_it, *pEntry ) - { - if( ! x_IsDBLinkUserObj(**desc_it) ) { - // ignore other types of user objects - continue; - } - - if( ! pEntry->IsSeq() ) { - // Found a DBLink on some descendent bioseq-set, - // so we bail out - return; - } + if (pEntry->IsSetDescr()) { + auto& dset = pEntry->SetDescr().Set(); + auto desc_it = dset.begin(); + while (desc_it != dset.end()) + { + if (!x_IsDBLinkUserObj(**desc_it)) { + // ignore other types of user objects + ++desc_it; + continue; + } - // there has already been a dblink. make sure it's - // identical - if( ! dblinksToDeleteVec.empty() ) { - const CSeqdesc & last_dblink = - **dblinksToDeleteVec.rbegin()->pDBLinkDesc_iter; - // bail out if there is more than one DBLink user object, - // and they are NOT identical - if( ! (*desc_it)->Equals(last_dblink) ) { + if (!pEntry->IsSeq()) { + // Found a DBLink on some descendent bioseq-set, + // so we bail out return; } - } - SDblinkDeleteInfo dblink_to_delete; - dblink_to_delete.pDBLinkDesc_iter = desc_it; - dblink_to_delete.pDBLinkDescBioseq = Ref(&pEntry->SetSeq()); - dblinksToDeleteVec.push_back( dblink_to_delete ); + // there has already been a dblink. make sure it's + // identical + if (!dblinksToDeleteVec.empty()) { + const CSeqdesc & last_dblink = + **dblinksToDeleteVec.rbegin()->pDBLinkDesc_iter; + // bail out if there is more than one DBLink user object, + // and they are NOT identical + if (!(*desc_it)->Equals(last_dblink)) { + return; + } + } + + SDblinkDeleteInfo dblink_to_delete; + dblink_to_delete.pDBLinkDesc_iter = desc_it; + dblink_to_delete.pDBLinkDescBioseq = Ref(&pEntry->SetSeq()); + dblinksToDeleteVec.push_back(dblink_to_delete); + ++desc_it; + } } } @@ -11581,8 +11059,9 @@ void CNewCleanup_imp::x_MoveNpDBlinks(CBioseq_set& bioseq_set) void CNewCleanup_imp::x_MoveNpSrc(CRef& srcdesc, CSeq_descr& descr) { - CBioseq::TDescr::Tdata::iterator d = descr.Set().begin(); - while (d != descr.Set().end()) { + auto& dset = descr.Set(); + CBioseq::TDescr::Tdata::iterator d = dset.begin(); + while (d != dset.end()) { bool do_remove = false; if ((*d)->IsSource()) { if (srcdesc && CCleanup::AreBioSourcesMergeable(srcdesc->GetSource(), (*d)->GetSource())) { @@ -11595,7 +11074,7 @@ void CNewCleanup_imp::x_MoveNpSrc(CRef& srcdesc, CSeq_descr& descr) } } if (do_remove) { - d = descr.Set().erase(d); + d = dset.erase(d); } else { ++d; } @@ -11614,21 +11093,21 @@ void CNewCleanup_imp::x_MoveNpSrc(CBioseq_set& set) bool add_desc = true; CRef srcdesc(NULL); if (set.IsSetDescr()) { - NON_CONST_ITERATE(CBioseq_set::TDescr::Tdata, it, set.SetDescr().Set()) { - if ((*it)->IsSource()) { - srcdesc = *it; + auto& dset = set.SetDescr().Set(); + for (auto& it : dset) { + if (it->IsSource()) { + srcdesc = it; add_desc = false; } } } - - NON_CONST_ITERATE(CBioseq_set::TSeq_set, it, set.SetSeq_set()) { - if ((*it)->IsSetDescr()) { - if ((*it)->IsSeq()) { - x_MoveNpSrc(srcdesc, (*it)->SetSeq().SetDescr()); - } else if ((*it)->IsSet()) { - x_MoveNpSrc(srcdesc, (*it)->SetSet().SetDescr()); + for (auto it : set.SetSeq_set()) { + if (it->IsSetDescr()) { + if (it->IsSeq()) { + x_MoveNpSrc(srcdesc, it->SetSeq().SetDescr()); + } else if (it->IsSet()) { + x_MoveNpSrc(srcdesc, it->SetSet().SetDescr()); } } } @@ -11640,10 +11119,11 @@ void CNewCleanup_imp::x_MoveNpSrc(CBioseq_set& set) void CNewCleanup_imp::x_MoveNpPub(CBioseq_set& np_set, CSeq_descr& descr) { - CSeq_descr::Tdata::iterator d = descr.Set().begin(); - while (d != descr.Set().end()) { + auto& dset = descr.Set(); + CSeq_descr::Tdata::iterator d = dset.begin(); + while (d != dset.end()) { if ((*d)->IsPub() && np_set.IsSetDescr() && CCleanup::PubAlreadyInSet((*d)->GetPub(), np_set.GetDescr())) { - d = descr.Set().erase(d); + d = dset.erase(d); ChangeMade(CCleanupChange::eRemoveDescriptor); } else if ((*d)->IsPub() && CCleanup::OkToPromoteNpPub((*d)->GetPub())) { CRef new_desc(new CSeqdesc()); @@ -11659,13 +11139,14 @@ void CNewCleanup_imp::x_MoveNpPub(CBioseq_set& np_set, CSeq_descr& descr) } -bool s_HasRefSeqPGAPStructuredComment(CSeq_entry_Handle seh) +bool s_HasRefSeqPGAPStructuredComment(const CSeq_entry_Handle& seh) { CSeqdesc_CI di(seh, CSeqdesc::e_User); while (di) { - if (di->GetUser().HasField("StructuredCommentPrefix") && di->GetUser().HasField("Annotation Provider")) { - const CUser_field& field = di->GetUser().GetField("StructuredCommentPrefix"); - const CUser_field& provider = di->GetUser().GetField("Annotation Provider"); + const auto& user = di->GetUser(); + if (user.HasField("StructuredCommentPrefix") && user.HasField("Annotation Provider")) { + const CUser_field& field = user.GetField("StructuredCommentPrefix"); + const CUser_field& provider = user.GetField("Annotation Provider"); if (field.IsSetData() && field.GetData().IsStr() && NStr::EqualNocase(field.GetData().GetStr(), "##Genome-Annotation-Data-START##") && provider.IsSetData() && provider.GetData().IsStr() && @@ -11693,18 +11174,22 @@ void CNewCleanup_imp::x_MoveNpPub(CBioseq_set& set) if (seh && s_HasRefSeqPGAPStructuredComment(seh)) { continue; } - x_MoveNpPub(set, (*it)->SetSeq().SetDescr()); - if ((*it)->SetSeq().SetDescr().Set().empty()) { - (*it)->SetSeq().ResetDescr(); + auto& seq = (*it)->SetSeq(); + auto& dset = seq.SetDescr(); + x_MoveNpPub(set, dset); + if (dset.Set().empty()) { + seq.ResetDescr(); } } else if ((*it)->IsSet() && (*it)->GetSet().IsSetDescr()) { CSeq_entry_Handle seh = m_Scope->GetSeq_entryHandle(**it); if (seh && s_HasRefSeqPGAPStructuredComment(seh)) { continue; } - x_MoveNpPub(set, (*it)->SetSet().SetDescr()); - if ((*it)->SetSet().SetDescr().Set().empty()) { - (*it)->SetSet().ResetDescr(); + auto& set = (*it)->SetSet(); + auto& dset = set.SetDescr(); + x_MoveNpPub(set, dset); + if (dset.Set().empty()) { + set.ResetDescr(); } } } @@ -11864,8 +11349,9 @@ void CNewCleanup_imp::AddProteinTitles(CBioseq& seq) if (!(m_Options & CCleanup::eClean_NoProteinTitles)) { // don't add if there is already a title directly on the sequence if (seq.IsSetDescr()) { - ITERATE(CBioseq::TDescr::Tdata, it, seq.GetDescr().Get()){ - if ((*it)->IsTitle()) { + const auto& dset = seq.GetDescr().Get(); + for (auto it : dset) { + if (it->IsTitle()) { return; } } @@ -11903,16 +11389,18 @@ bool CNewCleanup_imp::x_FixParentPartials(const CSeq_feat& sf, CSeq_feat& parent // note - this is pathological return any_changes; } - if (sf.GetLocation().IsPartialStart(eExtreme_Biological) && - !parent.GetLocation().IsPartialStart(eExtreme_Biological) && - sf.GetLocation().GetStart(eExtreme_Biological) == parent.GetLocation().GetStart(eExtreme_Biological)) { + const auto& floc = sf.GetLocation(); + const auto& ploc = parent.GetLocation(); + if (floc.IsPartialStart(eExtreme_Biological) && + !ploc.IsPartialStart(eExtreme_Biological) && + floc.GetStart(eExtreme_Biological) == ploc.GetStart(eExtreme_Biological)) { parent.SetLocation().SetPartialStart(true, eExtreme_Biological); parent.SetPartial(true); any_changes = true; } - if (sf.GetLocation().IsPartialStop(eExtreme_Biological) && - !parent.GetLocation().IsPartialStop(eExtreme_Biological) && - sf.GetLocation().GetStop(eExtreme_Biological) == parent.GetLocation().GetStop(eExtreme_Biological)) { + if (floc.IsPartialStop(eExtreme_Biological) && + !ploc.IsPartialStop(eExtreme_Biological) && + floc.GetStop(eExtreme_Biological) == ploc.GetStop(eExtreme_Biological)) { parent.SetLocation().SetPartialStop(true, eExtreme_Biological); parent.SetPartial(true); any_changes = true; @@ -12050,23 +11538,26 @@ bool CNewCleanup_imp::IsSyntheticConstruct(const CBioSource& src) void CNewCleanup_imp::x_ExtendFeatureToCoverSequence(CSeq_feat_Handle fh, const CBioseq& seq) { - if (fh.GetLocation().IsInt() && - fh.GetLocation().GetStart(eExtreme_Biological) == 0 && - fh.GetLocation().GetStop(eExtreme_Biological) == seq.GetLength() - 1) { + const auto& loc = fh.GetLocation(); + if (loc.IsInt() && + loc.GetStart(eExtreme_Biological) == 0 && + loc.GetStop(eExtreme_Biological) == seq.GetLength() - 1) { // already full length, no need to change return; } - bool partial_start = fh.GetLocation().IsPartialStart(eExtreme_Biological); - bool partial_stop = fh.GetLocation().IsPartialStop(eExtreme_Biological); + bool partial_start = loc.IsPartialStart(eExtreme_Biological); + bool partial_stop = loc.IsPartialStop(eExtreme_Biological); CRef new_feat(new CSeq_feat()); new_feat->Assign(*(fh.GetSeq_feat())); - new_feat->SetLocation().SetInt().SetId().Assign(*(fh.GetLocation().GetId())); - new_feat->SetLocation().SetInt().SetFrom(0); - new_feat->SetLocation().SetInt().SetTo(seq.GetLength() - 1); - new_feat->SetLocation().SetPartialStart(partial_start, eExtreme_Biological); - new_feat->SetLocation().SetPartialStop(partial_stop, eExtreme_Biological); + auto& new_loc = new_feat->SetLocation(); + auto& new_int = new_loc.SetInt(); + new_int.SetId().Assign(*(fh.GetLocation().GetId())); + new_int.SetFrom(0); + new_int.SetTo(seq.GetLength() - 1); + new_loc.SetPartialStart(partial_start, eExtreme_Biological); + new_loc.SetPartialStop(partial_stop, eExtreme_Biological); CSeq_feat_EditHandle eh(fh); eh.Replace(*new_feat); @@ -12079,11 +11570,14 @@ void CNewCleanup_imp::x_ExtendFeatureToCoverSequence(CSeq_feat_Handle fh, const void CNewCleanup_imp::x_ExtendProteinFeatureOnProteinSeq(CBioseq& seq) { // don't bother unless length greater than zero and protein - if (!seq.IsSetInst() || - !seq.GetInst().IsSetLength() || - seq.GetInst().GetLength() == 0 || - !seq.GetInst().IsSetMol() || - !seq.GetInst().IsAa()) { + if (!seq.IsSetInst()) { + return; + } + const auto& inst = seq.GetInst(); + if (!inst.IsSetLength() || + inst.GetLength() == 0 || + !inst.IsSetMol() || + !inst.IsAa()) { return; } CBioseq_Handle bsh = m_Scope->GetBioseqHandle(seq); @@ -12097,9 +11591,10 @@ void CNewCleanup_imp::x_ExtendProteinFeatureOnProteinSeq(CBioseq& seq) return; } - if (f->GetLocation().IsInt() && - f->GetLocation().GetStart(eExtreme_Biological) == 0 && - f->GetLocation().GetStop(eExtreme_Biological) == seq.GetLength() - 1) { + const auto& loc = f->GetLocation(); + if (loc.IsInt() && + loc.GetStart(eExtreme_Biological) == 0 && + loc.GetStop(eExtreme_Biological) == seq.GetLength() - 1) { // already full length, no need to change return; } @@ -12112,11 +11607,14 @@ void CNewCleanup_imp::x_ExtendProteinFeatureOnProteinSeq(CBioseq& seq) void CNewCleanup_imp::x_ExtendSingleGeneOnMrna(CBioseq& seq) { // don't bother unless length greater than zero and mRNA - if (!seq.IsSetInst() || - !seq.GetInst().IsSetLength() || - seq.GetInst().GetLength() == 0 || - !seq.GetInst().IsSetMol() || - !seq.GetInst().IsNa()) { + if (!seq.IsSetInst()) { + return; + } + const auto& inst = seq.GetInst(); + if (!inst.IsSetLength() || + inst.GetLength() == 0 || + !inst.IsSetMol() || + !inst.IsNa()) { return; } CBioseq_Handle bsh = m_Scope->GetBioseqHandle(seq); @@ -12142,20 +11640,21 @@ void CNewCleanup_imp::x_ExtendSingleGeneOnMrna(CBioseq& seq) CConstRef gene(NULL); while (f) { if (f->IsSetData()) { - if (f->GetData().IsGene()) { + const auto& fdata = f->GetData(); + if (fdata.IsGene()) { num_gene++; if (num_gene > 1) { // bail if more than one gene break; } gene.Reset(f->GetSeq_feat()); - } else if (f->GetData().IsCdregion()) { + } else if (fdata.IsCdregion()) { num_cds++; if (num_cds > 1) { // bail if more than one CDS break; } - } else if (f->GetData().GetSubtype() == CSeqFeatData::eSubtype_mRNA) { + } else if (fdata.GetSubtype() == CSeqFeatData::eSubtype_mRNA) { num_mrna++; if (num_mrna > 1) { // bail if more than one mRNA @@ -12189,8 +11688,9 @@ void CNewCleanup_imp::MoveDbxrefs(CSeq_feat& sf) if (!sf.IsSetQual()) { return; } - CSeq_feat::TQual::iterator it = sf.SetQual().begin(); - while (it != sf.SetQual().end()) { + auto& quals = sf.SetQual(); + CSeq_feat::TQual::iterator it = quals.begin(); + while (it != quals.end()) { if ((*it)->IsSetQual() && (*it)->IsSetVal() && NStr::Equal((*it)->GetQual(), "db_xref")) { string val = (*it)->GetVal(); string tag, db; @@ -12206,7 +11706,7 @@ void CNewCleanup_imp::MoveDbxrefs(CSeq_feat& sf) sf.SetDbxref().push_back(dbp); ChangeMade(CCleanupChange::eChangeDbxrefs); ChangeMade(CCleanupChange::eRemoveQualifier); - it = sf.SetQual().erase(it); + it = quals.erase(it); } else { ++it; } @@ -12227,17 +11727,22 @@ void CNewCleanup_imp::MoveDbxrefs(CSeq_feat& sf) void CNewCleanup_imp::MoveStandardName(CSeq_feat& sf) { + if (!sf.IsSetData()) { + return; + } // only for rRNAs - if (!sf.IsSetData() || !sf.GetData().IsRna()) { + const auto& fdata = sf.GetData(); + if (!fdata.IsRna()) { return; } - if (!sf.GetData().GetRna().IsSetType() || sf.GetData().GetRna().GetType() == CRNA_ref::eType_tmRNA) { + const auto& rna = fdata.GetRna(); + if (!rna.IsSetType() || rna.GetType() == CRNA_ref::eType_tmRNA) { return; } - if (sf.GetData().GetRna().GetType() == CRNA_ref::eType_tRNA && - sf.GetData().GetRna().IsSetExt() && - sf.GetData().GetRna().GetExt().IsTRNA() && - !s_IsEmpty(sf.GetData().GetRna().GetExt().GetTRNA())) { + if (rna.GetType() == CRNA_ref::eType_tRNA && + rna.IsSetExt() && + rna.GetExt().IsTRNA() && + !s_IsEmpty(rna.GetExt().GetTRNA())) { return; } @@ -12250,8 +11755,9 @@ void CNewCleanup_imp::MoveStandardName(CSeq_feat& sf) return; } - CSeq_feat::TQual::iterator it = sf.SetQual().begin(); - while (it != sf.SetQual().end()) { + auto& quals = sf.SetQual(); + CSeq_feat::TQual::iterator it = quals.begin(); + while (it != quals.end()) { if ((*it)->IsSetQual() && (*it)->IsSetVal() && NStr::Equal((*it)->GetQual(), "standard_name")) { string val = (*it)->GetVal(); const string product = sf.GetData().GetRna().GetRnaProductName(); @@ -12268,7 +11774,7 @@ void CNewCleanup_imp::MoveStandardName(CSeq_feat& sf) sf.SetComment(val); ChangeMade(CCleanupChange::eRemoveQualifier); } - it = sf.SetQual().erase(it); + it = quals.erase(it); } else { ++it; } @@ -12286,12 +11792,17 @@ void CNewCleanup_imp::CreatePubFromFeat(CSeq_feat& feat) void CNewCleanup_imp::ResynchProteinPartials ( CSeq_feat& feat ) { - if (!feat.IsSetData() || !feat.GetData().IsProt()) { + if (!feat.IsSetData()) { + return; + } + const auto& fdata = feat.GetData(); + if (!fdata.IsProt()) { return; } + const auto& pdata = fdata.GetProt(); - if (feat.GetData().GetProt().IsSetProcessed() && - feat.GetData().GetProt().GetProcessed() != CProt_ref::eProcessed_not_set) { + if (pdata.IsSetProcessed() && + pdata.GetProcessed() != CProt_ref::eProcessed_not_set) { // not a "real" protein feature, just set feature partial // to match location partial const unsigned int partial_loc = @@ -12343,16 +11854,17 @@ void CNewCleanup_imp::x_SetPartialsForProtein(CBioseq& seq, bool partial5, bool bool found = false; bool changed = false; if (seq.IsSetDescr()) { - NON_CONST_ITERATE(CBioseq::TDescr::Tdata, it, seq.SetDescr().Set()) { - if ((*it)->IsMolinfo()) { - if ((*it)->GetMolinfo().IsSetCompleteness()) { - if ((*it)->GetMolinfo().GetCompleteness() != desired) { - (*it)->SetMolinfo().SetCompleteness(desired); + auto& dset = seq.SetDescr().Set(); + for (auto it : dset) { + if (it->IsMolinfo()) { + if (it->GetMolinfo().IsSetCompleteness()) { + if (it->GetMolinfo().GetCompleteness() != desired) { + it->SetMolinfo().SetCompleteness(desired); ChangeMade(CCleanupChange::eChangeMolInfo); changed = true; } } else if (desired != CMolInfo::eCompleteness_unknown && desired != CMolInfo::eCompleteness_complete) { - (*it)->SetMolinfo().SetCompleteness(desired); + it->SetMolinfo().SetCompleteness(desired); ChangeMade(CCleanupChange::eChangeMolInfo); changed = true; } @@ -12409,6 +11921,16 @@ void CNewCleanup_imp::ResynchPeptidePartials ( } +// Helper for removing non-matching title descriptors +struct STitleMatchString +{ + const string& m_Val; + bool operator()(CRef desc) + { + return (desc->IsTitle() && !NStr::Equal(desc->GetTitle(), m_Val)); + } +}; + void CNewCleanup_imp::RemoveBadProteinTitle(CBioseq& seq) { if (!seq.IsSetInst() || !seq.GetInst().IsSetMol() || !seq.IsAa()) { @@ -12429,14 +11951,12 @@ void CNewCleanup_imp::RemoveBadProteinTitle(CBioseq& seq) } string new_defline = sequence::CDeflineGenerator().GenerateDefline(bsh, sequence::CDeflineGenerator::fIgnoreExisting); - CBioseq::TDescr::Tdata::iterator title_it = seq.SetDescr().Set().begin(); - while (title_it != seq.SetDescr().Set().end()) { - if ((*title_it)->IsTitle() && !NStr::Equal(new_defline, (*title_it)->GetTitle())) { - title_it = seq.SetDescr().Set().erase(title_it); - ChangeMade(CCleanupChange::eRemoveDescriptor); - } else { - ++title_it; - } + auto& dset = seq.SetDescr().Set(); + size_t orig = dset.size(); + STitleMatchString matcher{ new_defline }; + dset.erase(std::remove_if(dset.begin(), dset.end(), matcher), dset.end()); + if (dset.size() != orig) { + ChangeMade(CCleanupChange::eRemoveDescriptor); } } @@ -12461,8 +11981,9 @@ void CNewCleanup_imp::MoveCitationQuals(CBioseq& seq) if (has_citation) { CRef new_feat(new CSeq_feat()); new_feat->Assign(*(f->GetSeq_feat())); - CSeq_feat::TQual::iterator it = new_feat->SetQual().begin(); - while (it != new_feat->SetQual().end()) { + auto& newqual = new_feat->SetQual(); + CSeq_feat::TQual::iterator it = newqual.begin(); + while (it != newqual.end()) { bool do_remove = false; if ((*it)->IsSetQual() && NStr::Equal((*it)->GetQual(), "citation")) { if (!(*it)->IsSetVal() || !s_IsAllDigits((*it)->GetVal())) { @@ -12476,7 +11997,7 @@ void CNewCleanup_imp::MoveCitationQuals(CBioseq& seq) } // create appropriate Cit size_t num = NStr::StringToNonNegativeInt((*it)->GetVal()); - if (num <= pub_list.size()) { + if (num < pub_list.size()) { CRef cp(new CPub()); cp->Assign(*(pub_list[num])); new_feat->SetCit().SetPub().push_back(cp); @@ -12485,7 +12006,7 @@ void CNewCleanup_imp::MoveCitationQuals(CBioseq& seq) } } if (do_remove) { - it = new_feat->SetQual().erase(it); + it = newqual.erase(it); } else { ++it; } @@ -12524,44 +12045,83 @@ void CNewCleanup_imp::x_RemoveUnseenTitles(CBioseq_set& set) } -bool RemoveEarlierDates(CSeq_descr & seq_descr, CSeqdesc::E_Choice date_type) +struct SLaterDate { + const CDate& m_Date; + CSeqdesc::E_Choice date_type; + + bool operator()(CRef desc) { + if (desc->Which() != date_type) { + return false; + } + CDate::ECompare compare; + if (date_type == CSeqdesc::e_Create_date) { + compare = m_Date.Compare(desc->GetCreate_date()); + } else { + compare = m_Date.Compare(desc->GetUpdate_date()); + } + return (compare != CDate::eCompare_same); + } +}; + + +struct SIsDate{ + CSeqdesc::E_Choice date_type; + + bool operator()(CRef desc) { + return (desc->Which() == date_type); + } +}; + + +void RemoveDatesAfterFirst(CSeq_descr& seq_descr, CSeqdesc::E_Choice date_type) { - bool any_removed = false; - auto it = seq_descr.Set().begin(); - while (it != seq_descr.Set().end() && (*it)->Which() != date_type) { - it++; + auto& dset = seq_descr.Set(); + auto it = dset.begin(); + while (it != dset.end() && (*it)->Which() != date_type) { + ++it; } - if (it == seq_descr.Set().end()) { - return any_removed; + if (it == dset.end()) { + return; } - auto prev_date = it; - it++; - while (it != seq_descr.Set().end()) { - if ((*it)->Which() == date_type) { - CDate::ECompare compare; + ++it; + SIsDate matcher{ date_type }; + dset.erase(std::remove_if(it, dset.end(), matcher), dset.end()); +} + + +bool RemoveEarlierDates(CSeq_descr & seq_descr, CSeqdesc::E_Choice date_type) +{ + auto& dset = seq_descr.Set(); + CConstRef latest_date; + size_t num_present = 0; + // find latest item + for (auto it : dset) { + if (it->Which() == date_type) { + CConstRef this_date; if (date_type == CSeqdesc::e_Create_date) { - compare = (*prev_date)->GetCreate_date().Compare((*it)->GetCreate_date()); - } else { - compare = (*prev_date)->GetUpdate_date().Compare((*it)->GetUpdate_date()); + this_date.Reset(&(it->GetCreate_date())); } - if (compare == CDate::eCompare_after) { - // previous date is later, get rid of this one - it = seq_descr.Set().erase(it); - } else { - seq_descr.Set().erase(prev_date); - prev_date = it; - it++; - while (it != seq_descr.Set().end() && (*it)->Which() != date_type) { - it++; - } + else { + this_date.Reset(&(it->GetUpdate_date())); } - any_removed = true; - } else { - it++; + + if (!latest_date || latest_date->Compare(*this_date) == CDate::eCompare_before) { + latest_date = this_date; + } + ++num_present; } } + if (num_present < 2) { + // nothing to do here + return false; + } + + SLaterDate matcher{ *latest_date, date_type }; + dset.erase(std::remove_if(dset.begin(), dset.end(), matcher), dset.end()); - return any_removed; + RemoveDatesAfterFirst(seq_descr, date_type); + + return true; } @@ -12635,6 +12195,19 @@ void CNewCleanup_imp::x_ExtendedCleanupExtra(CSeq_entry_Handle seh) if (CCleanup::RepackageProteins(seh)) { ChangeMade(CCleanupChange::eChangeOther); } +#if 0 + // holding back for separate commit + // as requested in RW-726, uniquify feature IDs + map > changed_feats; + CFixFeatureId::s_ApplyToSeqInSet(seh, changed_feats); + for (auto &fh_feat : changed_feats) + { + auto orig_feat = fh_feat.first; + auto new_feat = fh_feat.second; + CSeq_feat_EditHandle feh(orig_feat); + feh.Replace(*new_feat); + } +#endif } @@ -12668,16 +12241,18 @@ void CNewCleanup_imp::ExtendedCleanupSeqSubmit ( BasicCleanupSeqSubmit( ss ); if( ! (m_Options & CCleanup::eClean_NoNcbiUserObjects) && ss.IsEntrys() ) { - NON_CONST_ITERATE(CSeq_submit::TData::TEntrys, it, ss.SetData().SetEntrys()) { - x_AddNcbiCleanupObject(**it); + auto& entrys = ss.SetData().SetEntrys(); + for (auto it : entrys) { + x_AddNcbiCleanupObject(*it); } } CAutogeneratedExtendedCleanup auto_ext_cleanup( *m_Scope, *this ); auto_ext_cleanup.ExtendedCleanupSeqSubmit( ss ); if (ss.IsSetData() && ss.GetData().IsEntrys()) { - NON_CONST_ITERATE(CSeq_submit::TData::TEntrys, it, ss.SetData().SetEntrys()) { - CSeq_entry_Handle seh = m_Scope->GetSeq_entryHandle(**it); + auto& entrys = ss.SetData().SetEntrys(); + for (auto it : entrys) { + CSeq_entry_Handle seh = m_Scope->GetSeq_entryHandle(*it); x_ExtendedCleanupExtra(seh); } } @@ -12718,8 +12293,9 @@ void CNewCleanup_imp::SetGlobalFlags(const CSeq_submit& ss) { ResetGlobalFlags(); if (ss.IsEntrys()) { - ITERATE(CSeq_submit::TData::TEntrys, it, ss.GetData().GetEntrys()) { - SetGlobalFlags((**it), false); + const auto& entries = ss.GetData().GetEntrys(); + for (auto it : entries) { + SetGlobalFlags((*it), false); } } } diff --git a/c++/src/objtools/cleanup/newcleanupp.hpp b/c++/src/objtools/cleanup/newcleanupp.hpp index bde17175..441821b9 100644 --- a/c++/src/objtools/cleanup/newcleanupp.hpp +++ b/c++/src/objtools/cleanup/newcleanupp.hpp @@ -258,29 +258,11 @@ private: void DbtagBC (CDbtag& dbt); void PubdescBC (CPubdesc& pub); - void PubEquivBC (CPub_equiv& pub_equiv); - EAction PubBC(CPub& pub, bool fix_initials); - EAction CitGenBC(CCit_gen& cg, bool fix_initials); - EAction CitSubBC(CCit_sub& cs, bool fix_initials); - EAction CitArtBC(CCit_art& ca, bool fix_initials); - EAction CitBookBC(CCit_book& cb, bool fix_initials); - EAction CitPatBC(CCit_pat& cp, bool fix_initials); - EAction CitLetBC(CCit_let& cl, bool fix_initials); - EAction CitProcBC(CCit_proc& cb, bool fix_initials); - EAction CitJourBC(CCit_jour &j, bool fix_initials); - EAction MedlineEntryBC(CMedline_entry& ml, bool fix_initials); - void AuthListBC( CAuth_list& al, bool fix_initials ); - void AffilBC( CAffil& af ); - enum EImprintBC { - eImprintBC_AllowStatusChange = 2, - eImprintBC_ForbidStatusChange - }; - void ImprintBC( CImprint& imprint, EImprintBC is_status_change_allowed ); void PubSetBC( CPub_set &pub_set ); void ImpFeatBC( CSeq_feat& sf ); - void SiteFeatBC( CSeqFeatData::ESite &site, CSeq_feat& sf ); + void SiteFeatBC( const CSeqFeatData::ESite &site, CSeq_feat& sf ); void SeqLocBC( CSeq_loc &loc ); void ConvertSeqLocWholeToInt( CSeq_loc &loc ); @@ -520,14 +502,14 @@ private: void x_DecodeXMLMarkChanged( std::string & str ); - private: void x_SortSeqDescs( CSeq_entry & seq_entry ); - void x_RemoveDupBioSource( CBioseq & bioseq ); void x_FixStructuredCommentKeywords( CBioseq & bioseq ); void x_RemoveDupBioSource( CBioseq_set & bioseq_set ); + void x_RemoveDupBioSource(CBioseq & bioseq); + void x_RemoveDupBioSource(CSeq_entry& se, const CBioSource& src); void x_RemoveDupPubs(CSeq_descr & descr); @@ -542,7 +524,6 @@ private: void x_RemoveEmptyUserObject( CSeq_descr & seq_descr ); void x_SetMolInfoTechFromGenBankBlock(CSeq_descr& seq_descr, CGB_block& block); void x_SetMolInfoTechFromGenBankBlock(CSeq_descr& seq_descr); - static bool s_ShouldRemoveKeyword(const string& keyword, CMolInfo::TTech tech); static bool x_CleanGenbankKeywords(CGB_block& blk, CMolInfo::TTech tech); void x_CleanupGenbankBlock(CBioseq& seq); void x_CleanupGenbankBlock(CBioseq_set& set); @@ -594,9 +575,6 @@ private: void x_MoveNpPub(CBioseq_set& np_set, CSeq_descr& descr); void x_MovePopPhyMutPub(CBioseq_set& bioseq_set); void x_RemovePub(CSeq_entry& se, const CPubdesc& pub); - void x_RemovePub(CBioseq& seq, const CPubdesc& pub); - void x_RemovePub(CBioseq_set& set, const CPubdesc& pub); - void x_RemovePub(CSeq_descr& descr, const CPubdesc& pub); bool x_IsDBLinkUserObj( const CSeqdesc & desc ); diff --git a/c++/src/objtools/data_loaders/genbank/gbloader.cpp b/c++/src/objtools/data_loaders/genbank/gbloader.cpp index d74d05d5..777b23a0 100644 --- a/c++/src/objtools/data_loaders/genbank/gbloader.cpp +++ b/c++/src/objtools/data_loaders/genbank/gbloader.cpp @@ -1,4 +1,4 @@ -/* $Id: gbloader.cpp 572532 2018-10-16 12:00:58Z ivanov $ +/* $Id: gbloader.cpp 578552 2019-01-22 15:38:07Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -137,6 +137,8 @@ public: virtual bool GetAddWGSMasterDescr(void) const; + virtual EGBErrorAction GetPTISErrorAction(void) const; + friend class CGBDataLoader; private: @@ -641,6 +643,28 @@ void CGBDataLoader::x_CreateDriver(const CGBLoaderParams& params) } } } + m_PTISErrorAction = eGBErrorAction_report; + if ( gb_params ) { + string param = + GetParam(gb_params, NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION); + if ( !param.empty() ) { + if ( NStr::EqualNocase(param, NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION_IGNORE) ) { + m_PTISErrorAction = eGBErrorAction_ignore; + } + else if ( NStr::EqualNocase(param, NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION_REPORT) ) { + m_PTISErrorAction = eGBErrorAction_report; + } + else if ( NStr::EqualNocase(param, NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION_THROW) ) { + m_PTISErrorAction = eGBErrorAction_throw; + } + else { + NCBI_THROW_FMT(CLoaderException, eBadConfig, + "Bad value of parameter " + NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION + ": \""<GetPTISErrorAction(); +} + + /* bool CGBDataLoader::LessBlobId(const TBlobId& id1, const TBlobId& id2) const { diff --git a/c++/src/objtools/data_loaders/genbank/id1/reader_id1.cpp b/c++/src/objtools/data_loaders/genbank/id1/reader_id1.cpp index 7be93ad9..cf53433a 100644 --- a/c++/src/objtools/data_loaders/genbank/id1/reader_id1.cpp +++ b/c++/src/objtools/data_loaders/genbank/id1/reader_id1.cpp @@ -1,4 +1,4 @@ -/* $Id: reader_id1.cpp 568301 2018-08-02 14:52:05Z vasilche $ +/* $Id: reader_id1.cpp 579016 2019-01-29 16:46:05Z ivanov $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information @@ -188,9 +188,7 @@ void CId1Reader::x_DisconnectAtSlot(TConn conn, bool failed) CReaderServiceConnector::SConnInfo& conn_info = m_Connections[conn]; m_Connector.RememberIfBad(conn_info); if ( conn_info.m_Stream ) { - LOG_POST_X(2, Warning << "CId1Reader("<= eTraceOpen ) { CDebugPrinter s(conn, "CId2Reader"); s << "Closing ID2 connection"; diff --git a/c++/src/objtools/data_loaders/genbank/processors.cpp b/c++/src/objtools/data_loaders/genbank/processors.cpp index ca0f8816..1b42d405 100644 --- a/c++/src/objtools/data_loaders/genbank/processors.cpp +++ b/c++/src/objtools/data_loaders/genbank/processors.cpp @@ -1,4 +1,4 @@ -/* $Id: processors.cpp 572633 2018-10-17 16:54:14Z ivanov $ +/* $Id: processors.cpp 576668 2018-12-19 13:09:21Z ivanov $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information @@ -672,12 +672,25 @@ CSeq_id_Handle s_GetWGSMasterSeq_id(const CSeq_id_Handle& idh) bool have_nz = NStr::StartsWith(acc, "NZ_"); SIZE_TYPE letters_pos = have_nz? 3: 0; - SIZE_TYPE digits_pos = letters_pos+4; + bool long_acc = false; + SIZE_TYPE digits_pos = letters_pos+4; // default WGS accession has 4 letters + if ( digits_pos < acc.size() && !isdigit(acc[digits_pos] & 0xff) ) { + long_acc = true; + digits_pos += 2; // new longer WGS accession has 6 letters + } SIZE_TYPE digits_count = acc.size() - digits_pos; - if ( digits_count < 8 || digits_count > 10 ) { - return master_idh; + if ( !long_acc ) { + if ( digits_count < 8 || digits_count > 10 ) { + return master_idh; + } + } + else { + // new longer WGS accession can have 9 to 11 digits + if ( digits_count < 9 || digits_count > 11 ) { + return master_idh; + } } - if ( !s_GoodLetters(acc.substr(letters_pos, 4)) ) { + if ( !s_GoodLetters(acc.substr(letters_pos, digits_pos-letters_pos)) ) { return master_idh; } if ( !s_GoodDigits(acc.substr(digits_pos)) ) { diff --git a/c++/src/objtools/data_loaders/genbank/reader.cpp b/c++/src/objtools/data_loaders/genbank/reader.cpp index d86523c7..fa5eb45b 100644 --- a/c++/src/objtools/data_loaders/genbank/reader.cpp +++ b/c++/src/objtools/data_loaders/genbank/reader.cpp @@ -1,4 +1,4 @@ -/* $Id: reader.cpp 568378 2018-08-03 14:54:41Z dondosha $ +/* $Id: reader.cpp 579016 2019-01-29 16:46:05Z ivanov $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information @@ -439,16 +439,23 @@ void CReader::x_AbortConnection(TConn conn, bool failed) } -void CReader::x_DisconnectAtSlot(TConn conn, bool failed) +void CReader::x_ReportDisconnect(const char* reader, const char* server, + TConn conn, bool failed) const { if ( failed ) { - LOG_POST_X(4, Warning << "CReader("<SetValue().push_back("vdb-snp"); } + if (NCBI_PARAM_TYPE(GENBANK, VDB_CDD)::GetDefault()) { + // enable VDB-based CDD sequences + param->SetValue().push_back("vdb-cdd"); + } request.SetParams().Set().push_back(param); } } @@ -2166,6 +2173,18 @@ CId2ReaderBase::x_GetError(CReaderRequestResult& result, switch ( error.GetSeverity() ) { case CID2_Error::eSeverity_warning: error_flags |= fError_warning; + if ( error.IsSetMessage() ) { + const string& msg = error.GetMessage(); + if ( msg.find("PTIS_FAILURE") != NPOS ) { + EGBErrorAction action = result.GetPTISErrorAction(); + if ( action == eGBErrorAction_throw ) { + NCBI_THROW_FMT(CLoaderException, eConnectionFailed, msg); + } + if ( action == eGBErrorAction_report ) { + ERR_POST_X(16, Warning<= CProcessor_ExtAnnot::eSat_VDB_WGS_MIN && + blob_id.GetSat() <= CProcessor_ExtAnnot::eSat_VDB_WGS_MAX) ) { mask |= fBlobHasAllLocal; } else { diff --git a/c++/src/objtools/data_loaders/genbank/request_result.cpp b/c++/src/objtools/data_loaders/genbank/request_result.cpp index 5b574ff6..e8f6d7dd 100644 --- a/c++/src/objtools/data_loaders/genbank/request_result.cpp +++ b/c++/src/objtools/data_loaders/genbank/request_result.cpp @@ -1,4 +1,4 @@ -/* $Id: request_result.cpp 493171 2016-02-24 19:31:13Z vasilche $ +/* $Id: request_result.cpp 578552 2019-01-22 15:38:07Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -792,6 +792,12 @@ bool CReaderRequestResult::GetAddWGSMasterDescr(void) const } +EGBErrorAction CReaderRequestResult::GetPTISErrorAction(void) const +{ + return eGBErrorAction_report; +} + + CReaderRequestResult::~CReaderRequestResult(void) { ReleaseLocks(); diff --git a/c++/src/objtools/edit/cds_fix.cpp b/c++/src/objtools/edit/cds_fix.cpp index 45b55bb2..2534126c 100644 --- a/c++/src/objtools/edit/cds_fix.cpp +++ b/c++/src/objtools/edit/cds_fix.cpp @@ -1,4 +1,4 @@ -/* $Id: cds_fix.cpp 574263 2018-11-08 17:55:12Z ivanov $ +/* $Id: cds_fix.cpp 577636 2019-01-07 19:27:41Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -597,86 +597,6 @@ string s_GetmRNAName (const CSeq_feat& mrna) } } -void s_AdjustForUTR_SingleSide(CSeq_loc& mrna_loc, const CSeq_loc &utr_loc, const CSeq_id &id) -{ - CRef new_loc(new CSeq_loc); - new_loc->Assign(utr_loc); - new_loc->SetId(id); - CSeq_loc_I loc_it(*new_loc); - while(loc_it) - { - ++loc_it; - } - size_t pos = loc_it.GetPos(); - if (pos > 0) - loc_it.SetPos(pos-1); - int utr_stop = loc_it.GetRange().GetTo(); - CSeq_loc_CI old_loc_it(mrna_loc, CSeq_loc_CI::eEmpty_Skip, CSeq_loc_CI::eOrder_Positional); - int cds_start = old_loc_it.GetRange().GetFrom(); - int cds_stop = old_loc_it.GetRange().GetTo(); - while (old_loc_it && cds_stop < utr_stop) - { - ++old_loc_it; - if (old_loc_it) - { - cds_start = old_loc_it.GetRange().GetFrom(); - cds_stop = old_loc_it.GetRange().GetTo(); - } - } - - if (cds_start - utr_stop <= 2 && cds_stop >= utr_stop) - { - loc_it.SetTo(cds_stop); - if (old_loc_it) - ++old_loc_it; - } - ++loc_it; - while (old_loc_it) - { - loc_it.InsertInterval(id, old_loc_it.GetRange(), old_loc_it.GetStrand()); - ++old_loc_it; - } - mrna_loc.Assign(*loc_it.MakeSeq_loc()); -} - -void s_AdjustForUTR(const CSeq_feat& utr, int cd_start, int cd_stop, CSeq_loc& mrna_loc, bool& found5, bool& found3, CScope& scope) -{ - if (utr.GetData().GetSubtype() == CSeqFeatData::eSubtype_5UTR && mrna_loc.GetStrand() == utr.GetLocation().GetStrand()) { - if (utr.GetLocation().GetStrand() == eNa_strand_minus) { - if (utr.GetLocation().GetStart(eExtreme_Positional) != cd_stop + 1) { - return; - } - } - else { - if (utr.GetLocation().GetStop(eExtreme_Positional) != cd_start - 1) { - return; - } - } - found5 = true; - s_AdjustForUTR_SingleSide(mrna_loc, utr.GetLocation(), *mrna_loc.GetId()); - mrna_loc.SetPartialStart( utr.GetLocation().IsPartialStart(eExtreme_Positional), eExtreme_Positional ); - } - else if (utr.GetData().GetSubtype() == CSeqFeatData::eSubtype_3UTR && mrna_loc.GetStrand() == utr.GetLocation().GetStrand()) { - if (utr.GetLocation().GetStrand() == eNa_strand_minus) { - if (utr.GetLocation().GetStop(eExtreme_Positional) != cd_start - 1) { - return; - } - } - else { - if (utr.GetLocation().GetStart(eExtreme_Positional) != cd_stop + 1) { - return; - } - } - found3 = true; - CRef new_loc(new CSeq_loc); - new_loc->Assign(utr.GetLocation()); - s_AdjustForUTR_SingleSide(*new_loc, mrna_loc, *mrna_loc.GetId()); - mrna_loc.Assign(*new_loc); - mrna_loc.SetPartialStop( utr.GetLocation().IsPartialStop(eExtreme_Positional), eExtreme_Positional ); - } -} - - /// MakemRNAforCDS /// A function to create a CSeq_feat that represents the @@ -720,28 +640,66 @@ CRef MakemRNAforCDS(const CSeq_feat& cds, CScope& scope) if (!mrna || !NStr::Equal(prot_nm, s_GetmRNAName(*mrna))) { new_mrna.Reset (new CSeq_feat()); new_mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA); - new_mrna->SetLocation().Assign(cds.GetLocation()); + new_mrna->SetLocation().Assign(cd_loc); new_mrna->SetData().SetRna().SetExt().SetName(prot_nm); bool found3 = false; bool found5 = false; - int cd_start = cd_loc.GetStart(eExtreme_Positional); - int cd_stop = cd_loc.GetStop(eExtreme_Positional); - //cd_loc.GetStrand(); - + CRef loc(new CSeq_loc()); + loc->Assign(new_mrna->GetLocation()); + if (bsh) { - for (CFeat_CI utr(bsh, CSeqFeatData::e_Imp); utr; ++utr) + for (CFeat_CI utr(bsh, cd_loc.IsReverseStrand() ? CSeqFeatData::eSubtype_5UTR : CSeqFeatData::eSubtype_3UTR); utr; ++utr) + { + if (utr->GetLocation().GetStart(eExtreme_Positional) == cd_loc.GetStop(eExtreme_Positional) + 1) + { + loc = sequence::Seq_loc_Add(*loc, utr->GetLocation(), CSeq_loc::fMerge_All|CSeq_loc::fSort, &scope); + if (cd_loc.IsReverseStrand()) + found5 = true; + else + found3 = true; + break; + } + } + for (CFeat_CI utr(bsh, cd_loc.IsReverseStrand() ? CSeqFeatData::eSubtype_3UTR : CSeqFeatData::eSubtype_5UTR); utr; ++utr) { - s_AdjustForUTR(utr->GetOriginalFeature(), cd_start, cd_stop, - new_mrna->SetLocation(), found5, found3, scope); + if (utr->GetLocation().GetStop(eExtreme_Positional) + 1 == cd_loc.GetStart(eExtreme_Positional) ) + { + loc = sequence::Seq_loc_Add(*loc, utr->GetLocation(), CSeq_loc::fMerge_All|CSeq_loc::fSort, &scope); + if (cd_loc.IsReverseStrand()) + found3 = true; + else + found5 = true; + break; + } } } else if (sah) { - for (CFeat_CI utr(sah, CSeqFeatData::e_Imp); utr; ++utr) { - s_AdjustForUTR(utr->GetOriginalFeature(), cd_start, cd_stop, - new_mrna->SetLocation(), found5, found3, scope); + for (CFeat_CI utr(sah, cd_loc.IsReverseStrand() ? CSeqFeatData::eSubtype_5UTR : CSeqFeatData::eSubtype_3UTR); utr; ++utr) + { + if (utr->GetLocation().GetStart(eExtreme_Positional) == cd_loc.GetStop(eExtreme_Positional) + 1) + { + loc = sequence::Seq_loc_Add(*loc, utr->GetLocation(), CSeq_loc::fMerge_All|CSeq_loc::fSort, &scope); + if (cd_loc.IsReverseStrand()) + found5 = true; + else + found3 = true; + break; + } + } + for (CFeat_CI utr(sah, cd_loc.IsReverseStrand() ? CSeqFeatData::eSubtype_3UTR : CSeqFeatData::eSubtype_5UTR); utr; ++utr) + { + if (utr->GetLocation().GetStop(eExtreme_Positional) + 1 == cd_loc.GetStart(eExtreme_Positional) ) + { + loc = sequence::Seq_loc_Add(*loc, utr->GetLocation(), CSeq_loc::fMerge_All|CSeq_loc::fSort, &scope); + if (cd_loc.IsReverseStrand()) + found3 = true; + else + found5 = true; + break; + } } } - + new_mrna->SetLocation(*loc); if (!found5) new_mrna->SetLocation().SetPartialStart(true, eExtreme_Positional); @@ -1377,7 +1335,7 @@ CRef GetNewProtId(objects::CBioseq_Handle bsh, int &offset, st gen_id = it; } } - if (gen_id || general_only) + if (gen_id && general_only) { hid = gen_id; } @@ -1386,11 +1344,28 @@ CRef GetNewProtId(objects::CBioseq_Handle bsh, int &offset, st if (!hid) NCBI_THROW(CException, eUnknown, "Seq-id of the requested type not found"); - CRef new_id = GetGeneralOrLocal(hid, bsh.GetScope(), offset, !general_only); + CRef new_id = GetGeneralOrLocal(hid, bsh.GetScope(), offset, true); new_id->GetLabel(&id_label, objects::CSeq_id::eBoth); return new_id; } +bool IsGeneralIdProtPresent(objects::CSeq_entry_Handle tse) +{ + bool found = false; + for (CBioseq_CI b_iter(tse, CSeq_inst::eMol_aa); b_iter; ++b_iter) + { + for (auto it : b_iter->GetId()) + { + if (it.GetSeqId()->IsGeneral() && it.GetSeqId()->GetGeneral().IsSetDb() && + !it.GetSeqId()->GetGeneral().IsSkippable()) + { + found = true; + break; + } + } + } + return found; +} END_SCOPE(edit) END_SCOPE(objects) diff --git a/c++/src/objtools/edit/field_handler.cpp b/c++/src/objtools/edit/field_handler.cpp index b7e64fb6..27a4643f 100644 --- a/c++/src/objtools/edit/field_handler.cpp +++ b/c++/src/objtools/edit/field_handler.cpp @@ -1,4 +1,4 @@ -/* $Id: field_handler.cpp 488758 2016-01-05 17:44:28Z asztalos $ +/* $Id: field_handler.cpp 580280 2019-02-12 19:11:02Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -149,12 +149,7 @@ CFieldHandlerFactory::Create(const string& field_name) bool CFieldHandlerFactory::s_IsSequenceIDField(const string& field) { - if (CFieldHandler::QualifierNamesAreEquivalent(field, kFieldTypeSeqId) - || CFieldHandler::QualifierNamesAreEquivalent(field, kFieldTypeSeqId)) { - return true; - } else { - return false; - } + return CFieldHandler::QualifierNamesAreEquivalent(field, kFieldTypeSeqId); } @@ -278,7 +273,7 @@ vector > GetRelatedFeatures (const CSeq_feat& obj_feat, CSe sequence::GetOverlappingFeatures (obj_feat.GetLocation(), CSeqFeatData::GetTypeFromSubtype(constraint_type), constraint_type, - sequence::eOverlap_Contained, + sequence::eOverlap_Contains, scores, *scope); ITERATE (sequence::TFeatScores, it, scores) { feat_list.push_back(it->second); diff --git a/c++/src/objtools/edit/text_object_description.cpp b/c++/src/objtools/edit/text_object_description.cpp index bb7d2bbe..a7225d59 100644 --- a/c++/src/objtools/edit/text_object_description.cpp +++ b/c++/src/objtools/edit/text_object_description.cpp @@ -1,4 +1,4 @@ -/* $Id: text_object_description.cpp 575171 2018-11-26 13:17:39Z ivanov $ +/* $Id: text_object_description.cpp 575303 2018-11-27 15:25:23Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -344,7 +344,7 @@ static string GetTextObjectDescription(const CSeq_feat& seq_feat, CScope& scope, } -static void GetTextObjectDescription(const CSeq_feat& seq_feat, CScope& scope, string &label, string &context, string &location, string &locus_tag) +void GetTextObjectDescription(const CSeq_feat& seq_feat, CScope& scope, string &label, string &context, string &location, string &locus_tag) { if (seq_feat.GetData().IsProt()) { CConstRef bioseq = sequence::GetBioseqFromSeqLoc(seq_feat.GetLocation(), scope).GetCompleteBioseq(); diff --git a/c++/src/objtools/format/accession_item.cpp b/c++/src/objtools/format/accession_item.cpp index ed1b9a07..50abad2f 100644 --- a/c++/src/objtools/format/accession_item.cpp +++ b/c++/src/objtools/format/accession_item.cpp @@ -1,4 +1,4 @@ -/* $Id: accession_item.cpp 554019 2017-12-27 15:18:41Z dondosha $ +/* $Id: accession_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -58,6 +58,11 @@ CAccessionItem::CAccessionItem(CBioseqContext& ctx) : x_GatherInfo(ctx); } +IFlatItem::EItem CAccessionItem::GetItemType(void) const +{ + return eItem_Accession; +} + void CAccessionItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/alignment_item.cpp b/c++/src/objtools/format/alignment_item.cpp index 13e69784..c5df2963 100644 --- a/c++/src/objtools/format/alignment_item.cpp +++ b/c++/src/objtools/format/alignment_item.cpp @@ -1,4 +1,4 @@ -/* $Id: alignment_item.cpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: alignment_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -47,6 +47,10 @@ CAlignmentItem::CAlignmentItem(const CSeq_align& align, CBioseqContext& ctx) { } +IFlatItem::EItem CAlignmentItem::GetItemType(void) const +{ + return eItem_Alignment; +} void CAlignmentItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/basecount_item.cpp b/c++/src/objtools/format/basecount_item.cpp index a48bbfce..99eb7fbe 100644 --- a/c++/src/objtools/format/basecount_item.cpp +++ b/c++/src/objtools/format/basecount_item.cpp @@ -1,4 +1,4 @@ -/* $Id: basecount_item.cpp 400413 2013-05-21 12:32:09Z kornbluh $ +/* $Id: basecount_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -52,6 +52,10 @@ CBaseCountItem::CBaseCountItem(CBioseqContext& ctx) : x_GatherInfo(ctx); } +IFlatItem::EItem CBaseCountItem::GetItemType(void) const +{ + return eItem_BaseCount; +} void CBaseCountItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/comment_item.cpp b/c++/src/objtools/format/comment_item.cpp index 32be88c0..e2353e99 100644 --- a/c++/src/objtools/format/comment_item.cpp +++ b/c++/src/objtools/format/comment_item.cpp @@ -1,4 +1,4 @@ -/* $Id: comment_item.cpp 559026 2018-03-06 01:23:45Z kans $ +/* $Id: comment_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -89,6 +89,11 @@ CCommentItem::CCommentItem(CBioseqContext& ctx, bool need_period) : swap(m_First, sm_FirstComment); } +IFlatItem::EItem CCommentItem::GetItemType(void) const +{ + return eItem_Comment; +} + CCommentItem::CCommentItem (const string& comment, diff --git a/c++/src/objtools/format/contig_item.cpp b/c++/src/objtools/format/contig_item.cpp index c10a4b3a..e45cb127 100644 --- a/c++/src/objtools/format/contig_item.cpp +++ b/c++/src/objtools/format/contig_item.cpp @@ -1,4 +1,4 @@ -/* $Id: contig_item.cpp 195898 2010-06-28 17:32:16Z dicuccio $ +/* $Id: contig_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -56,6 +56,11 @@ CContigItem::CContigItem(CBioseqContext& ctx) : } +IFlatItem::EItem CContigItem::GetItemType(void) const +{ + return eItem_Contig; +} + void CContigItem::Format (IFormatter& formatter, IFlatTextOStream& text_os) const diff --git a/c++/src/objtools/format/ctrl_items.cpp b/c++/src/objtools/format/ctrl_items.cpp index 61f5e682..9cef65da 100644 --- a/c++/src/objtools/format/ctrl_items.cpp +++ b/c++/src/objtools/format/ctrl_items.cpp @@ -1,4 +1,4 @@ -/* $Id: ctrl_items.cpp 472056 2015-07-06 19:29:12Z gotvyans $ +/* $Id: ctrl_items.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -71,6 +71,27 @@ CStartItem::CStartItem( CSeq_entry_Handle seh ) x_SetDate( seh ); } +IFlatItem::EItem CStartItem::GetItemType(void) const +{ + return eItem_StartItem; +} + +IFlatItem::EItem CEndItem::GetItemType(void) const +{ + return eItem_EndItem; +} + +IFlatItem::EItem CStartSectionItem::GetItemType(void) const +{ + return eItem_StartSection; +} + +IFlatItem::EItem CEndSectionItem::GetItemType(void) const +{ + return eItem_EndSection; +} + + // ---------------------------------------------------------------------------- void CStartItem::x_SetDate( diff --git a/c++/src/objtools/format/date_item.cpp b/c++/src/objtools/format/date_item.cpp index 8882b8b8..753450b9 100644 --- a/c++/src/objtools/format/date_item.cpp +++ b/c++/src/objtools/format/date_item.cpp @@ -1,4 +1,4 @@ -/* $Id: date_item.cpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: date_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -53,6 +53,10 @@ CDateItem::CDateItem(CBioseqContext& ctx) : x_GatherInfo(ctx); } +IFlatItem::EItem CDateItem::GetItemType(void) const +{ + return eItem_Date; +} void CDateItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/dbsource_item.cpp b/c++/src/objtools/format/dbsource_item.cpp index 0b25d457..93ef2f68 100644 --- a/c++/src/objtools/format/dbsource_item.cpp +++ b/c++/src/objtools/format/dbsource_item.cpp @@ -1,4 +1,4 @@ -/* $Id: dbsource_item.cpp 568762 2018-08-09 21:34:53Z kans $ +/* $Id: dbsource_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -72,6 +72,10 @@ CDBSourceItem::CDBSourceItem(CBioseqContext& ctx) : x_GatherInfo(ctx); } +IFlatItem::EItem CDBSourceItem::GetItemType(void) const +{ + return eItem_DbSource; +} void CDBSourceItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/defline_item.cpp b/c++/src/objtools/format/defline_item.cpp index b93138a9..2bc3c82e 100644 --- a/c++/src/objtools/format/defline_item.cpp +++ b/c++/src/objtools/format/defline_item.cpp @@ -1,4 +1,4 @@ -/* $Id: defline_item.cpp 567707 2018-07-23 17:19:47Z kans $ +/* $Id: defline_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -53,6 +53,10 @@ CDeflineItem::CDeflineItem(CBioseqContext& ctx) : x_GatherInfo(ctx); } +IFlatItem::EItem CDeflineItem::GetItemType(void) const +{ + return eItem_Defline; +} void CDeflineItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/feature_item.cpp b/c++/src/objtools/format/feature_item.cpp index 035394dd..106b0445 100644 --- a/c++/src/objtools/format/feature_item.cpp +++ b/c++/src/objtools/format/feature_item.cpp @@ -1,4 +1,4 @@ -/* $Id: feature_item.cpp 574586 2018-11-15 15:11:36Z ivanov $ +/* $Id: feature_item.cpp 580652 2019-02-19 12:39:22Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -525,7 +525,22 @@ static bool s_SkipFeature(const CMappedFeat& feat, if ( cfg.HideImpFeatures() && type == CSeqFeatData::e_Imp ) { return true; } - + + if ( cfg.HideMiscFeatures() ) { + if ( type == CSeqFeatData::e_Site || + type == CSeqFeatData::e_Bond || + type == CSeqFeatData::e_Region || + type == CSeqFeatData::e_Comment || + subtype == CSeqFeatData::eSubtype_misc_feature || + subtype == CSeqFeatData::eSubtype_preprotein ) { + return true; + } + } + + if ( cfg.HideExonFeatures() && subtype == CSeqFeatData::eSubtype_exon ) { + return true; + } + if ( cfg.HideIntronFeatures() && subtype == CSeqFeatData::eSubtype_intron ) { return true; } @@ -539,6 +554,14 @@ static bool s_SkipFeature(const CMappedFeat& feat, } } + if ( cfg.GeneRNACDSFeatures() ) { + if ( type != CSeqFeatData::e_Gene && + type != CSeqFeatData::e_Rna && + type != CSeqFeatData::e_Cdregion ) { + return true; + } + } + // skip genes in DDBJ format if ( cfg.IsFormatDDBJ() && type == CSeqFeatData::e_Gene ) { return true; @@ -748,6 +771,10 @@ CFeatHeaderItem::CFeatHeaderItem(CBioseqContext& ctx) : CFlatItem(&ctx) x_GatherInfo(ctx); } +IFlatItem::EItem CFeatHeaderItem::GetItemType(void) const +{ + return eItem_FeatHeader; +} void CFeatHeaderItem::x_GatherInfo(CBioseqContext& ctx) { @@ -1082,6 +1109,10 @@ CFeatureItem::CFeatureItem x_GatherInfoWithParent(ctx, parentFeatureItem); } +IFlatItem::EItem CFeatureItem::GetItemType(void) const +{ + return eItem_Feature; +} void CFeatureItem::x_GatherInfoWithParent(CBioseqContext& ctx, CConstRef parentFeatureItem ) { @@ -1641,10 +1672,7 @@ void CFeatureItem::x_AddQualsIdx( } } - if (feat_gene_xref && ! suppressed && - ! CGeneFinder::ResolveGeneXref(feat_gene_xref, ctx.GetTopLevelEntry())) { - gene_ref = feat_gene_xref; - } else if ((! feat_gene_xref || ! suppressed) && + if ((! feat_gene_xref || ! suppressed) && subtype != CSeqFeatData::eSubtype_primer_bind) { CRef ft; bool is_mapped = false; @@ -1657,15 +1685,24 @@ void CFeatureItem::x_AddQualsIdx( subtype == CSeqFeatData::eSubtype_transit_peptide_aa || subtype == CSeqFeatData::eSubtype_propeptide_aa) { try { - CRef fsx = ft->GetBestGene(); - if (fsx) { - const CMappedFeat mf = fsx->GetMappedFeat(); - if (mf) { - gene_feat = &(mf.GetMappedFeature()); - gene_ref = &(mf.GetData().GetGene()); + if ( m_Feat.IsSetXref() ) { + feat_gene_xref = m_Feat.GetGeneXref(); + if ( feat_gene_xref ) { + gene_ref = feat_gene_xref; is_mapped = true; } } + if (! is_mapped) { + CRef fsx = ft->GetBestGene(); + if (fsx) { + const CMappedFeat mf = fsx->GetMappedFeat(); + if (mf) { + gene_feat = &(mf.GetMappedFeature()); + gene_ref = &(mf.GetData().GetGene()); + is_mapped = true; + } + } + } if (! is_mapped) { // e.g., check sig_peptide for gene overlapping parent CDS CSeq_feat_Handle parent_feat_handle; @@ -2162,6 +2199,7 @@ void CFeatureItem::x_AddQualsRna( break; } case CRNA_ref::eType_mRNA: + case CRNA_ref::eType_rRNA: { if ( !pseudo && ( cfg.ShowTranscript() || cfg.IsFormatGBSeq() || cfg.IsFormatINSDSeq() ) ) { CSeqVector vec(feat.GetLocation(), scope); @@ -5700,6 +5738,11 @@ CSourceFeatureItem::CSourceFeatureItem } +IFlatItem::EItem CSourceFeatureItem::GetItemType(void) const +{ + return eItem_SourceFeat; +} + void CSourceFeatureItem::x_GatherInfo(CBioseqContext& ctx) { const CBioSource& bsrc = GetSource(); @@ -6115,6 +6158,7 @@ void CSourceFeatureItem::x_FormatQuals(CFlatFeature& ff) const DO_QUAL(collected_by); DO_QUAL(identified_by); DO_QUAL(PCR_primers); + DO_QUAL(metagenome_source); if ( !GetContext()->Config().SrcQualsToNote() ) { // some note qualifiers appear as regular quals in GBench or Dump mode @@ -6166,8 +6210,7 @@ void CSourceFeatureItem::x_FormatGBNoteQuals(CFlatFeature& ff) const DO_QUAL(frequency); } - DO_QUAL(metagenome_source), - +// DO_QUAL(metagenome_source), // DO_QUAL(collection_date); // DO_QUAL(collected_by); // DO_QUAL(identified_by); @@ -6253,11 +6296,13 @@ void CSourceFeatureItem::x_FormatNoteQuals(CFlatFeature& ff) const DO_NOTE(frequency); } + /* if (s_IsExactAndNonExactMatchOnNoteQuals(qvec, "metagenomic")) { x_FormatNoteQual(eSQ_metagenome_source, "metagenomic; derived from metagenome", qvec); } else { x_FormatNoteQual(eSQ_metagenome_source, "derived from metagenome", qvec); } + */ DO_NOTE(genotype); x_FormatNoteQual(eSQ_plastid_name, "plastid", qvec); diff --git a/c++/src/objtools/format/flat_file_config.cpp b/c++/src/objtools/format/flat_file_config.cpp index c66e1e83..fde509f0 100644 --- a/c++/src/objtools/format/flat_file_config.cpp +++ b/c++/src/objtools/format/flat_file_config.cpp @@ -1,4 +1,4 @@ -/* $Id: flat_file_config.cpp 574582 2018-11-15 15:10:09Z ivanov $ +/* $Id: flat_file_config.cpp 578289 2019-01-16 16:33:46Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -561,9 +561,10 @@ void CFlatFileConfig::AddArgumentDescriptions(CArgDescriptions& args) "Custom flat file output bits. The value is the bitwise OR (logical addition) of:\n" " 1 - hide protein_id and transcript_id\n" " 2 - hide GI number\n" - " 4 - allow long locus lines" - " 1024 - disable annot references" - " 2048 - use SeqEntryIndexer", + " 4 - allow long locus lines\n" + " 1024 - disable annot references\n" + " 2048 - use SeqEntryIndexer\n" + " 16384 - only Gene, RNA, and CDS features", CArgDescriptions::eInteger, "0"); diff --git a/c++/src/objtools/format/flat_file_generator.cpp b/c++/src/objtools/format/flat_file_generator.cpp index 8c084f03..ccab98fa 100644 --- a/c++/src/objtools/format/flat_file_generator.cpp +++ b/c++/src/objtools/format/flat_file_generator.cpp @@ -1,4 +1,4 @@ -/* $Id: flat_file_generator.cpp 573607 2018-10-30 11:57:34Z ivanov $ +/* $Id: flat_file_generator.cpp 578289 2019-01-16 16:33:46Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -298,6 +298,18 @@ void CFlatFileGenerator::Generate if ( m_Ctx->GetConfig().ShowCDDFeatures() ) { flags |= CSeqEntryIndex::fShowCDDFeats; } + if ( m_Ctx->GetConfig().HideExonFeatures() ) { + flags |= CSeqEntryIndex::fHideExonFeats; + } + if ( m_Ctx->GetConfig().HideIntronFeatures() ) { + flags |= CSeqEntryIndex::fHideIntronFeats; + } + if ( m_Ctx->GetConfig().HideMiscFeatures() ) { + flags |= CSeqEntryIndex::fHideMiscFeats; + } + if ( m_Ctx->GetConfig().GeneRNACDSFeatures() ) { + flags |= CSeqEntryIndex::fGeneRNACDSOnly; + } CRef idx(new CSeqEntryIndex( topseh, policy, flags )); m_Ctx->SetSeqEntryIndex(idx); if (idx->IsIndexFailure()) { diff --git a/c++/src/objtools/format/gap_item.cpp b/c++/src/objtools/format/gap_item.cpp index e5f3d7f0..ad5bce94 100644 --- a/c++/src/objtools/format/gap_item.cpp +++ b/c++/src/objtools/format/gap_item.cpp @@ -1,4 +1,4 @@ -/* $Id: gap_item.cpp 341078 2011-10-17 13:24:43Z kornbluh $ +/* $Id: gap_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -55,6 +55,11 @@ CGapItem::CGapItem { } +IFlatItem::EItem CGapItem::GetItemType(void) const +{ + return eItem_Gap; +} + void CGapItem::Format(IFormatter& formatter, IFlatTextOStream& text_os) const { formatter.FormatGap(*this, text_os); diff --git a/c++/src/objtools/format/gather_items.cpp b/c++/src/objtools/format/gather_items.cpp index cfaa502a..a95bc7b9 100644 --- a/c++/src/objtools/format/gather_items.cpp +++ b/c++/src/objtools/format/gather_items.cpp @@ -1,4 +1,4 @@ -/* $Id: gather_items.cpp 573606 2018-10-30 11:57:09Z ivanov $ +/* $Id: gather_items.cpp 580650 2019-02-19 12:38:27Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2355,12 +2355,18 @@ void s_SetSelection(SAnnotSelector& sel, CBioseqContext& ctx) } if ( cfg.HideExonFeatures() ) { sel.ExcludeNamedAnnots("Exon"); + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon); } if ( cfg.HideIntronFeatures() ) { sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron); } if ( cfg.HideMiscFeatures() ) { + sel.ExcludeFeatType(CSeqFeatData::e_Site); + sel.ExcludeFeatType(CSeqFeatData::e_Bond); + sel.ExcludeFeatType(CSeqFeatData::e_Region); + sel.ExcludeFeatType(CSeqFeatData::e_Comment); sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature); + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_preprotein); } if ( cfg.HideGapFeatures() ) { sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_gap); @@ -3440,6 +3446,7 @@ void CFlatGatherer::x_GatherFeaturesOnRangeIdx return; } + feat_loc = Seq_loc_Merge(*feat_loc, CSeq_loc::fMerge_Abutting, &scope); // HANDLE GAPS SECTION GOES HERE diff --git a/c++/src/objtools/format/genbank_gather.cpp b/c++/src/objtools/format/genbank_gather.cpp index fcbe0f85..f0b3e012 100644 --- a/c++/src/objtools/format/genbank_gather.cpp +++ b/c++/src/objtools/format/genbank_gather.cpp @@ -1,4 +1,4 @@ -/* $Id: genbank_gather.cpp 573606 2018-10-30 11:57:09Z ivanov $ +/* $Id: genbank_gather.cpp 576582 2018-12-18 14:13:31Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -352,6 +352,9 @@ void CGenbankGatherer::x_GatherTLS(void) const for (CSeqdesc_CI desc(ctx.GetHandle(), CSeqdesc::e_User); desc; ++desc) { const CUser_object& uo = desc->GetUser(); + if ( !uo.GetType().IsStr() ) { + continue; + } const string& type = uo.GetType().GetStr(); if ( ! NStr::EqualNocase(type, "TLSProjects") ) { diff --git a/c++/src/objtools/format/genome_project_item.cpp b/c++/src/objtools/format/genome_project_item.cpp index 4d57b16c..76e26407 100644 --- a/c++/src/objtools/format/genome_project_item.cpp +++ b/c++/src/objtools/format/genome_project_item.cpp @@ -1,4 +1,4 @@ -/* $Id: genome_project_item.cpp 556353 2018-01-30 12:00:12Z bollin $ +/* $Id: genome_project_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -54,6 +54,10 @@ CGenomeProjectItem::CGenomeProjectItem(CBioseqContext& ctx) : x_GatherInfo(ctx); } +IFlatItem::EItem CGenomeProjectItem::GetItemType(void) const +{ + return eItem_GenomeProject; +} void CGenomeProjectItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/html_anchor_item.cpp b/c++/src/objtools/format/html_anchor_item.cpp index 7e6cf3c0..1749c03b 100644 --- a/c++/src/objtools/format/html_anchor_item.cpp +++ b/c++/src/objtools/format/html_anchor_item.cpp @@ -1,4 +1,4 @@ -/* $Id: html_anchor_item.cpp 294826 2011-05-27 11:19:20Z kornbluh $ +/* $Id: html_anchor_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -45,6 +45,11 @@ CHtmlAnchorItem::CHtmlAnchorItem( CBioseqContext& ctx, const string &label_core x_GatherInfo(ctx); } +IFlatItem::EItem CHtmlAnchorItem::GetItemType(void) const +{ + return eItem_HtmlAnchor; +} + void CHtmlAnchorItem::Format(IFormatter& formatter, IFlatTextOStream& text_os) const { formatter.FormatHtmlAnchor(*this, text_os); diff --git a/c++/src/objtools/format/keywords_item.cpp b/c++/src/objtools/format/keywords_item.cpp index 39de6185..d6ea9d28 100644 --- a/c++/src/objtools/format/keywords_item.cpp +++ b/c++/src/objtools/format/keywords_item.cpp @@ -1,4 +1,4 @@ -/* $Id: keywords_item.cpp 560480 2018-03-22 21:45:28Z kans $ +/* $Id: keywords_item.cpp 580654 2019-02-19 12:40:00Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -64,6 +64,10 @@ CKeywordsItem::CKeywordsItem(CBioseqContext& ctx) : x_GatherInfo(ctx); } +IFlatItem::EItem CKeywordsItem::GetItemType(void) const +{ + return eItem_Keywords; +} void CKeywordsItem::Format (IFormatter& formatter, @@ -278,6 +282,26 @@ void CKeywordsItem::x_GatherInfo(CBioseqContext& ctx) for (CSeqdesc_CI di(bsh, CSeqdesc::e_User); di; ++di) { const CUser_object& usr = di->GetUser(); if ( ! CComment_rule::IsStructuredComment (usr) ) continue; + string pfx = CComment_rule::GetStructuredCommentPrefix ( usr, true ); + if ( NStr::EqualNocase (pfx, "MIGS:5.0-Data" )) { + x_AddKeyword("GSC:MIxS"); + x_AddKeyword("MIGS:5.0."); + } else if ( NStr::EqualNocase (pfx, "MIMS:5.0-Data" )) { + x_AddKeyword("GSC:MIxS"); + x_AddKeyword("MIMS:5.0."); + } else if ( NStr::EqualNocase (pfx, "MIMARKS:5.0-Data" )) { + x_AddKeyword("GSC:MIxS"); + x_AddKeyword("MIMARKS:5.0."); + } else if ( NStr::EqualNocase (pfx, "MISAG:5.0-Data" )) { + x_AddKeyword("GSC:MIxS"); + x_AddKeyword("MISAG:5.0."); + } else if ( NStr::EqualNocase (pfx, "MIMAG:5.0-Data" )) { + x_AddKeyword("GSC:MIxS"); + x_AddKeyword("MIMAG:5.0."); + } else if ( NStr::EqualNocase (pfx, "MIUVIG:5.0-Data" )) { + x_AddKeyword("GSC:MIxS"); + x_AddKeyword("MIUVIG:5.0."); + } try { list keywords = CComment_set::GetKeywords(usr); FOR_EACH_STRING_IN_LIST ( s_itr, keywords ) { @@ -292,12 +316,15 @@ void CKeywordsItem::x_GatherInfo(CBioseqContext& ctx) x_AddKeyword("UNVERIFIED"); } if ((unv & CBioseqContext::fUnverified_Organism) != 0) { + x_AddKeyword("UNVERIFIED"); x_AddKeyword("UNVERIFIED_ORGANISM"); } if ((unv & CBioseqContext::fUnverified_Misassembled) != 0) { + x_AddKeyword("UNVERIFIED"); x_AddKeyword("UNVERIFIED_MISASSEMBLY"); } if ((unv & CBioseqContext::fUnverified_Contaminant) != 0) { + x_AddKeyword("UNVERIFIED"); x_AddKeyword("UNVERIFIED_CONTAMINANT"); } diff --git a/c++/src/objtools/format/locus_item.cpp b/c++/src/objtools/format/locus_item.cpp index 6afcab50..894abc57 100644 --- a/c++/src/objtools/format/locus_item.cpp +++ b/c++/src/objtools/format/locus_item.cpp @@ -1,4 +1,4 @@ -/* $Id: locus_item.cpp 574584 2018-11-15 15:11:08Z ivanov $ +/* $Id: locus_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -78,6 +78,10 @@ CLocusItem::CLocusItem(CBioseqContext& ctx) : x_GatherInfo(ctx); } +IFlatItem::EItem CLocusItem::GetItemType(void) const +{ + return eItem_Locus; +} void CLocusItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/origin_item.cpp b/c++/src/objtools/format/origin_item.cpp index edf54bb4..5069e5d7 100644 --- a/c++/src/objtools/format/origin_item.cpp +++ b/c++/src/objtools/format/origin_item.cpp @@ -1,4 +1,4 @@ -/* $Id: origin_item.cpp 213605 2010-11-24 15:12:46Z kornbluh $ +/* $Id: origin_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -53,6 +53,11 @@ COriginItem::COriginItem(CBioseqContext& ctx) : } +IFlatItem::EItem COriginItem::GetItemType(void) const +{ + return eItem_Origin; +} + void COriginItem::Format (IFormatter& formatter, IFlatTextOStream& text_os) const diff --git a/c++/src/objtools/format/primary_item.cpp b/c++/src/objtools/format/primary_item.cpp index 59db15ba..c552fdf1 100644 --- a/c++/src/objtools/format/primary_item.cpp +++ b/c++/src/objtools/format/primary_item.cpp @@ -1,4 +1,4 @@ -/* $Id: primary_item.cpp 550815 2017-11-08 13:01:06Z bollin $ +/* $Id: primary_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -63,6 +63,10 @@ CPrimaryItem::CPrimaryItem(CBioseqContext& ctx) : } } +IFlatItem::EItem CPrimaryItem::GetItemType(void) const +{ + return eItem_Primary; +} void CPrimaryItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/qualifiers.cpp b/c++/src/objtools/format/qualifiers.cpp index 0097a03f..82052ed4 100644 --- a/c++/src/objtools/format/qualifiers.cpp +++ b/c++/src/objtools/format/qualifiers.cpp @@ -1,4 +1,4 @@ -/* $Id: qualifiers.cpp 564513 2018-05-29 17:40:10Z kans $ +/* $Id: qualifiers.cpp 578996 2019-01-29 13:12:39Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -865,43 +865,54 @@ void CFlatMolTypeQVal::Format(TFlatQuals& q, const CTempString& name, void CFlatSubmitterSeqidQVal::Format(TFlatQuals& q, const CTempString& name, CBioseqContext& ctx, IFlatQVal::TFlags flags) const { - if ( ctx.Config().IsModeGBench() || ctx.Config().IsModeDump() ) { - switch ( m_Tech ) { - case CMolInfo::eTech_wgs: - case CMolInfo::eTech_tsa: - case CMolInfo::eTech_targeted: - ITERATE (CBioseq::TId, itr, ctx.GetBioseqIds()) { - const CSeq_id& id = **itr; - if ( id.Which() != CSeq_id::e_General ) continue; - const CDbtag& dbtag = id.GetGeneral(); - if ( ! dbtag.IsSetDb() ) continue; - string dbname = dbtag.GetDb(); - if ( dbname.length() != 10 ) continue; - if ( ! NStr::StartsWith(dbname, "WGS:" ) && ! NStr::StartsWith(dbname, "TSA:" ) && ! NStr::StartsWith(dbname, "TLS:" ) ) continue; - bool bail = false; - for ( int i = 4; i < 8; i++ ) { - char ch = dbname[i]; - if ( ! isupper(ch) && ! islower(ch) ) { - bail = true; - } - } - if ( bail ) continue; - for ( int i = 8; i < 10; i++ ) { - char ch = dbname[i]; - if ( ! isdigit(ch) ) { + switch ( m_Tech ) { + case CMolInfo::eTech_wgs: + case CMolInfo::eTech_tsa: + case CMolInfo::eTech_targeted: + ITERATE (CBioseq::TId, itr, ctx.GetBioseqIds()) { + const CSeq_id& id = **itr; + if ( id.Which() != CSeq_id::e_General ) continue; + const CDbtag& dbtag = id.GetGeneral(); + if ( ! dbtag.IsSetDb() ) continue; + string dbname = dbtag.GetDb(); + if ( ! NStr::StartsWith(dbname, "WGS:" ) && ! NStr::StartsWith(dbname, "TSA:" ) && ! NStr::StartsWith(dbname, "TLS:" ) ) continue; + dbname.erase(0, 4); + if (NStr::StartsWith(dbname, "NZ_" )) { + dbname.erase(0, 3); + } + int num_letters = 0; + int num_digits = 0; + int len = dbname.length(); + if ( len != 6 && len != 8 ) continue; + bool bail = false; + for ( int i = 0; i < len; i++ ) { + char ch = dbname[i]; + if ( isupper(ch) || islower(ch) ) { + num_letters++; + if ( num_digits > 0 ) { bail = true; } - } - if ( bail ) continue; - if ( dbtag.IsSetTag() && dbtag.GetTag().IsStr() ) { - string tag = dbtag.GetTag().GetStr(); - x_AddFQ(q, name, tag); + } else if ( isdigit(ch) ) { + num_digits++; + } else { + bail = true; } } - break; - default: - break; + if ( num_letters != 4 && num_letters != 6 ) { + bail = true; + } + if ( num_digits != 2 ) { + bail = true; + } + if ( bail ) continue; + if ( dbtag.IsSetTag() && dbtag.GetTag().IsStr() ) { + string tag = dbtag.GetTag().GetStr(); + x_AddFQ(q, name, tag); + } } + break; + default: + break; } } diff --git a/c++/src/objtools/format/reference_item.cpp b/c++/src/objtools/format/reference_item.cpp index f35d1180..57d8ed1a 100644 --- a/c++/src/objtools/format/reference_item.cpp +++ b/c++/src/objtools/format/reference_item.cpp @@ -1,4 +1,4 @@ -/* $Id: reference_item.cpp 557236 2018-02-12 22:36:27Z kans $ +/* $Id: reference_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -239,6 +239,10 @@ CReferenceItem::CReferenceItem(const CSubmit_block& sub, CBioseqContext& ctx) : CReferenceItem::~CReferenceItem() { } +IFlatItem::EItem CReferenceItem::GetItemType(void) const +{ + return eItem_Reference; +} void CReferenceItem::SetLoc(const CConstRef& loc) { diff --git a/c++/src/objtools/format/segment_item.cpp b/c++/src/objtools/format/segment_item.cpp index 07fdeed0..c4817685 100644 --- a/c++/src/objtools/format/segment_item.cpp +++ b/c++/src/objtools/format/segment_item.cpp @@ -1,4 +1,4 @@ -/* $Id: segment_item.cpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: segment_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -49,6 +49,10 @@ CSegmentItem::CSegmentItem(CBioseqContext& ctx) : x_GatherInfo(ctx); } +IFlatItem::EItem CSegmentItem::GetItemType(void) const +{ + return eItem_Segment; +} void CSegmentItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/sequence_item.cpp b/c++/src/objtools/format/sequence_item.cpp index d8fc8ab7..e86fe9f5 100644 --- a/c++/src/objtools/format/sequence_item.cpp +++ b/c++/src/objtools/format/sequence_item.cpp @@ -1,4 +1,4 @@ -/* $Id: sequence_item.cpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: sequence_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -59,6 +59,11 @@ CSequenceItem::CSequenceItem } +IFlatItem::EItem CSequenceItem::GetItemType(void) const +{ + return eItem_Sequence; +} + void CSequenceItem::Format (IFormatter& formatter, IFlatTextOStream& text_os) const diff --git a/c++/src/objtools/format/source_item.cpp b/c++/src/objtools/format/source_item.cpp index 7f9ea01f..4f8f5313 100644 --- a/c++/src/objtools/format/source_item.cpp +++ b/c++/src/objtools/format/source_item.cpp @@ -1,4 +1,4 @@ -/* $Id: source_item.cpp 563713 2018-05-14 16:53:23Z kans $ +/* $Id: source_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -70,6 +70,10 @@ CSourceItem::CSourceItem(CBioseqContext& ctx) : x_GatherInfo(ctx); } +IFlatItem::EItem CSourceItem::GetItemType(void) const +{ + return eItem_Source; +} CSourceItem::CSourceItem(CBioseqContext& ctx, const CBioSource& bsrc, const CSerialObject& obj) : CFlatItem(&ctx), diff --git a/c++/src/objtools/format/tsa_item.cpp b/c++/src/objtools/format/tsa_item.cpp index fc01d827..aef8269c 100644 --- a/c++/src/objtools/format/tsa_item.cpp +++ b/c++/src/objtools/format/tsa_item.cpp @@ -1,4 +1,4 @@ -/* $Id: tsa_item.cpp 360035 2012-04-19 13:43:48Z kornbluh $ +/* $Id: tsa_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -56,6 +56,10 @@ CTSAItem::CTSAItem x_SetObject(uo); } +IFlatItem::EItem CTSAItem::GetItemType(void) const +{ + return eItem_Tsa; +} void CTSAItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/version_item.cpp b/c++/src/objtools/format/version_item.cpp index 8dba8883..a453f010 100644 --- a/c++/src/objtools/format/version_item.cpp +++ b/c++/src/objtools/format/version_item.cpp @@ -1,4 +1,4 @@ -/* $Id: version_item.cpp 399305 2013-05-13 19:13:43Z grichenk $ +/* $Id: version_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -51,6 +51,10 @@ CVersionItem::CVersionItem(CBioseqContext& ctx) : x_GatherInfo(ctx); } +IFlatItem::EItem CVersionItem::GetItemType(void) const +{ + return eItem_Version; +} void CVersionItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/format/wgs_item.cpp b/c++/src/objtools/format/wgs_item.cpp index e88adb18..c3c7cebf 100644 --- a/c++/src/objtools/format/wgs_item.cpp +++ b/c++/src/objtools/format/wgs_item.cpp @@ -1,4 +1,4 @@ -/* $Id: wgs_item.cpp 103491 2007-05-04 17:18:18Z kazimird $ +/* $Id: wgs_item.cpp 578290 2019-01-16 16:34:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -56,6 +56,10 @@ CWGSItem::CWGSItem x_SetObject(uo); } +IFlatItem::EItem CWGSItem::GetItemType(void) const +{ + return eItem_Wgs; +} void CWGSItem::Format (IFormatter& formatter, diff --git a/c++/src/objtools/readers/aln_reader.cpp b/c++/src/objtools/readers/aln_reader.cpp index f710dcd2..7e7c9dcc 100644 --- a/c++/src/objtools/readers/aln_reader.cpp +++ b/c++/src/objtools/readers/aln_reader.cpp @@ -1,4 +1,4 @@ -/* $Id: aln_reader.cpp 540293 2017-07-05 18:11:48Z foleyjp $ +/* $Id: aln_reader.cpp 575409 2018-11-28 17:53:34Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -104,8 +104,7 @@ static char * ALIGNMENT_CALLBACK s_ReadLine(void *user_data) return 0; } string s; - NcbiGetline(*is, s, "\n"); - NStr::ReplaceInPlace (s, "\r", ""); + NcbiGetline(*is, s, "\r\n"); return strdup(s.c_str()); } diff --git a/c++/src/objtools/readers/bed_reader.cpp b/c++/src/objtools/readers/bed_reader.cpp index d1c3d83e..ed36a176 100644 --- a/c++/src/objtools/readers/bed_reader.cpp +++ b/c++/src/objtools/readers/bed_reader.cpp @@ -1,4 +1,4 @@ -/* $Id: bed_reader.cpp 572342 2018-10-11 17:14:40Z ivanov $ +/* $Id: bed_reader.cpp 575513 2018-11-29 19:39:23Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -116,10 +116,10 @@ public: bool FillBuffer( size_t numLines) { - CTempString line; + string line; while (numLines && !mLineReader.AtEOF()) { line = *++mLineReader; - NStr::TruncateSpacesInPlace(line); + CLinePreBuffer::StripSpaceCharsInPlace(line); mBuffer.push_back(line); if (!IsCommentLine(line)) { --numLines; @@ -151,7 +151,7 @@ public: } else { temp = *++mLineReader; - NStr::TruncateSpacesInPlace(temp); + CLinePreBuffer::StripSpaceCharsInPlace(temp); } if (!IsCommentLine(temp)) { line = temp; @@ -195,6 +195,21 @@ public: } }; + static void + StripSpaceCharsInPlace( + string& str) + { + auto newFirst = 0; + while (str[newFirst] == ' ') { + ++newFirst; + } + auto newLast = str.length() - 1; + while (str[newLast] == ' ') { + --newLast; + } + str = str.substr(newFirst, newLast - newFirst + 1); + }; + protected: ILineReader& mLineReader; deque mBuffer; @@ -345,19 +360,33 @@ bool CBedReader::xSplitColumns( vector& columns) // ---------------------------------------------------------------------------- { + bool splitSuccessful = false; if (mColumnSeparator.empty()) { columns.clear(); mColumnSeparator = "\t"; NStr::Split(line, mColumnSeparator, columns, mColumnSplitFlags); if (columns.size() > 2) { - return true; + splitSuccessful = true; + } + else { + mColumnSeparator = " \t"; + mColumnSplitFlags = NStr::fSplit_MergeDelimiters; } - mColumnSeparator = " \t"; - mColumnSplitFlags = NStr::fSplit_MergeDelimiters; } - columns.clear(); - NStr::Split(line, mColumnSeparator, columns, mColumnSplitFlags); - return (columns.size() > 2); + if (!splitSuccessful) { + columns.clear(); + NStr::Split(line, mColumnSeparator, columns, mColumnSplitFlags); + if (columns.size() > 2) { + splitSuccessful = true; + } + } + if (!splitSuccessful) { + return false; + } + for (auto& column: columns) { + NStr::TruncateSpacesInPlace(column); + } + return true; } // ---------------------------------------------------------------------------- @@ -397,7 +426,6 @@ bool CBedReader::xDetermineLikelyColumnCount( continue; } - vector columns; if (!xSplitColumns(line, columns)) { pErrColumnCount->Throw(); @@ -595,8 +623,6 @@ CBedReader::xParseFeature( ILineErrorListener* pEC) // ---------------------------------------------------------------------------- { - CTempString record_copy = NStr::TruncateSpaces_Unsafe(line); - // parse vector fields; xSplitColumns(line, fields); diff --git a/c++/src/objtools/readers/gff2_data.cpp b/c++/src/objtools/readers/gff2_data.cpp index 85a537b6..68e30530 100644 --- a/c++/src/objtools/readers/gff2_data.cpp +++ b/c++/src/objtools/readers/gff2_data.cpp @@ -1,4 +1,4 @@ -/* $Id: gff2_data.cpp 561166 2018-04-03 13:59:57Z ludwigf $ +/* $Id: gff2_data.cpp 575507 2018-11-29 19:36:24Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -343,7 +343,7 @@ bool CGff2Record::AssignFromGff( columns[8].Copy(m_strAttributes, 0, CTempString::npos); - return x_AssignAttributesFromGff(m_strType, columns[8]); + return xAssignAttributesFromGff(m_strType, columns[8]); } // ---------------------------------------------------------------------------- @@ -522,7 +522,7 @@ bool x_GetNextAttribute(CTempString& input, CTempString& key, CTempString& value return !key.empty(); } -bool CGff2Record::x_AssignAttributesFromGff( +bool CGff2Record::xAssignAttributesFromGff( const string& strType, const string& strRawAttributes ) // ---------------------------------------------------------------------------- @@ -541,7 +541,7 @@ bool CGff2Record::x_AssignAttributesFromGff( } // ---------------------------------------------------------------------------- -bool CGff2Record::x_SplitGffAttributes( +bool CGff2Record::xSplitGffAttributes( const string& strRawAttributes, vector< string >& attributes) const // ---------------------------------------------------------------------------- @@ -585,14 +585,14 @@ bool CGff2Record::InitializeFeature( // ---------------------------------------------------------------------------- { return ( - x_InitFeatureLocation(flags, pFeature, seqidresolve) && + xInitFeatureLocation(flags, pFeature, seqidresolve) && xInitFeatureData(flags, pFeature) && - x_MigrateId(pFeature) && - x_MigrateStartStopStrand(pFeature) && - x_MigrateType(pFeature) && - x_MigrateScore(pFeature) && - x_MigratePhase(pFeature) && - x_MigrateAttributes(flags, pFeature) ); + xMigrateId(pFeature) && + xMigrateStartStopStrand(pFeature) && + xMigrateType(pFeature) && + xMigrateScore(pFeature) && + xMigratePhase(pFeature) && + xMigrateAttributes(flags, pFeature) ); } // ---------------------------------------------------------------------------- @@ -600,22 +600,17 @@ bool CGff2Record::UpdateFeature( int flags, CRef pFeature, SeqIdResolver seqidresolve ) const -// ---------------------------------------------------------------------------- + // ---------------------------------------------------------------------------- { - // mss-582: - // if the parent feature is a gene then don't mess with the gene's location - // - CSeqFeatData::ESubtype subtype = pFeature->GetData().GetSubtype(); - if (subtype == CSeqFeatData::eSubtype_gene) { - return true; - } + auto subtype = pFeature->GetData().GetSubtype(); auto recType = Type(); NStr::ToLower(recType); + const CSeq_loc& target = pFeature->GetLocation(); CRef pAddLoc = GetSeqLoc(flags, seqidresolve); if (target.IsInt() && target.GetInt().GetFrom() <= SeqStart() && - target.GetInt().GetTo() >= SeqStop() ) { + target.GetInt().GetTo() >= SeqStop() ) { if (recType == "start_codon" || recType == "stop_codon") { return true; } @@ -691,7 +686,7 @@ bool CGff2Record::xUpdateFeatureData( // ---------------------------------------------------------------------------- -bool CGff2Record::x_MigrateId( +bool CGff2Record::xMigrateId( CRef pFeature ) const // ---------------------------------------------------------------------------- { @@ -703,7 +698,7 @@ bool CGff2Record::x_MigrateId( } // ---------------------------------------------------------------------------- -bool CGff2Record::x_MigrateStartStopStrand( +bool CGff2Record::xMigrateStartStopStrand( CRef pFeature ) const // ---------------------------------------------------------------------------- { @@ -711,7 +706,7 @@ bool CGff2Record::x_MigrateStartStopStrand( } // ---------------------------------------------------------------------------- -bool CGff2Record::x_MigrateType( +bool CGff2Record::xMigrateType( CRef pFeature ) const // ---------------------------------------------------------------------------- { @@ -720,7 +715,7 @@ bool CGff2Record::x_MigrateType( // ---------------------------------------------------------------------------- -bool CGff2Record::x_MigrateScore( +bool CGff2Record::xMigrateScore( CRef pFeature ) const // ---------------------------------------------------------------------------- { @@ -728,7 +723,7 @@ bool CGff2Record::x_MigrateScore( } // ---------------------------------------------------------------------------- -bool CGff2Record::x_MigratePhase( +bool CGff2Record::xMigratePhase( CRef pFeature ) const // ---------------------------------------------------------------------------- { @@ -736,7 +731,7 @@ bool CGff2Record::x_MigratePhase( } // ---------------------------------------------------------------------------- -bool CGff2Record::x_MigrateAttributes( +bool CGff2Record::xMigrateAttributes( int flags, CRef pFeature ) const // ---------------------------------------------------------------------------- @@ -994,10 +989,10 @@ bool CGff2Record::x_MigrateAttributes( } if (pFeature->GetData().IsBiosrc()) { - if (!x_MigrateAttributesSubSource(flags, pFeature, attrs_left)) { + if (!xMigrateAttributesSubSource(flags, pFeature, attrs_left)) { return false; } - if (!x_MigrateAttributesOrgName(flags, pFeature, attrs_left)) { + if (!xMigrateAttributesOrgName(flags, pFeature, attrs_left)) { return false; } } @@ -1064,7 +1059,7 @@ bool CGff2Record::xMigrateAttributeDefault( } // ---------------------------------------------------------------------------- -bool CGff2Record::x_MigrateAttributesOrgName( +bool CGff2Record::xMigrateAttributesOrgName( int flags, CRef pFeature, TAttributes& attrs_left) const @@ -1131,7 +1126,7 @@ bool CGff2Record::x_MigrateAttributesOrgName( } // ---------------------------------------------------------------------------- -bool CGff2Record::x_MigrateAttributesSubSource( +bool CGff2Record::xMigrateAttributesSubSource( int flags, CRef pFeature, TAttributes& attrs_left) const @@ -1205,7 +1200,7 @@ bool CGff2Record::x_MigrateAttributesSubSource( } // ---------------------------------------------------------------------------- -bool CGff2Record::x_InitFeatureLocation( +bool CGff2Record::xInitFeatureLocation( int flags, CRef pFeature, SeqIdResolver seqidresolve ) const diff --git a/c++/src/objtools/readers/gff2_reader.cpp b/c++/src/objtools/readers/gff2_reader.cpp index 5389cd41..272cf9c1 100644 --- a/c++/src/objtools/readers/gff2_reader.cpp +++ b/c++/src/objtools/readers/gff2_reader.cpp @@ -1,4 +1,4 @@ -/* $Id: gff2_reader.cpp 560241 2018-03-20 17:57:35Z foleyjp $ +/* $Id: gff2_reader.cpp 575511 2018-11-29 19:38:28Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -232,12 +232,10 @@ CGff2Reader::ReadSeqAnnot( } } - if (!mCurrentFeatureCount) { return CRef(); } - if (!alignments.empty()) { x_ProcessAlignmentsGff(id_list, alignments, pAnnot); } @@ -274,66 +272,6 @@ CGff2Reader::ReadSeqAnnots( } } return; - - string line; - while (xGetLine(lr, line)) { - if (IsCanceled()) { - AutoPtr pErr( - CObjReaderLineException::Create( - eDiag_Info, - 0, - "Reader stopped by user.", - ILineError::eProblem_ProgressInfo)); - ProcessError(*pErr, pEC); - annots.clear(); - return; - } - xReportProgress(pEC); - if ( xParseStructuredComment(line) ) { - continue; - } - - try { - if (xIsTrackTerminator(line)) { - continue; - } - if (x_ParseBrowserLineGff(line, m_CurrentBrowserInfo)) { - continue; - } - if (xIsTrackLine(line)) { - //completely ignore in Genbank mode - if (m_iFlags & CGff2Reader::fGenbankMode) { - continue; - } - if (!annots.empty()) { - xPostProcessAnnot(annots.back(), pEC); - } - xParseTrackLine(line, pEC); - CRef< CSeq_annot > pAnnot( new CSeq_annot ); - annots.push_back(pAnnot); - continue; - } - if (xNeedsNewSeqAnnot(line)) { - if (!annots.empty()) { - xPostProcessAnnot(annots.back(), pEC); - } - mCurrentFeatureCount = 0; - mParsingAlignment = false; - CRef< CSeq_annot > pAnnot( new CSeq_annot ); - annots.push_back(pAnnot); - continue; - } - if (x_ParseFeatureGff(line, annots, pEC)) { - continue; - } - } - catch(CObjReaderLineException& err) { - err.SetLineNumber(m_uLineNumber); - } - } - if (!annots.empty()) { - xPostProcessAnnot(annots.back(), pEC); - } } // ---------------------------------------------------------------------------- @@ -471,7 +409,7 @@ CGff2Reader::xParseFeature( } //parse record: - auto_ptr pRecord(x_CreateRecord()); + shared_ptr pRecord(x_CreateRecord()); try { if (!pRecord->AssignFromGff(line)) { return false; @@ -491,7 +429,7 @@ CGff2Reader::xParseFeature( } //append feature to annot: - if (!x_UpdateAnnotFeature(*pRecord, pAnnot, pEC)) { + if (!xUpdateAnnotFeature(*pRecord, pAnnot, pEC)) { return false; } @@ -844,39 +782,6 @@ CGff2Reader::xIsCurrentDataType( return (!mParsingAlignment || !mCurrentFeatureCount); } -// ---------------------------------------------------------------------------- -bool CGff2Reader::x_ParseFeatureGff( - const string& strLine, - TAnnots& annots, - ILineErrorListener* pEC) -// ---------------------------------------------------------------------------- -{ - auto_ptr pRecord(x_CreateRecord()); - try { - if (!pRecord->AssignFromGff(strLine)) { - return false; - } - } - catch(CObjReaderLineException& err) { - ProcessError(err, pEC); - return false; - } - string ftype = pRecord->Type(); - if (xIsIgnoredFeatureType(ftype)) { - return true; - } - - if (annots.empty()) { - CRef< CSeq_annot > pAnnot( new CSeq_annot ); - if ( ! x_InitAnnot( *pRecord, pAnnot, pEC ) ) { - return false; - } - annots.push_back(pAnnot); - return true; - } - return x_UpdateAnnotFeature(*pRecord, annots.back(), pEC); -}; - // ---------------------------------------------------------------------------- @@ -934,86 +839,7 @@ bool CGff2Reader::x_ParseAlignmentGff( return true; }; -// ---------------------------------------------------------------------------- -bool CGff2Reader::x_ParseBrowserLineGff( - const string& strRawInput, - CRef< CAnnotdesc >& pAnnotDesc ) -// ---------------------------------------------------------------------------- -{ - if ( ! NStr::StartsWith( strRawInput, "browser" ) ) { - return false; - } - vector< string > columns; - NStr::Split( strRawInput, " \t", columns, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate ); - - if ( columns.size() <= 1 || 1 != ( columns.size() % 2 ) ) { - // don't know how to unwrap this - pAnnotDesc.Reset(); - return true; - } - pAnnotDesc.Reset( new CAnnotdesc ); - CUser_object& user = pAnnotDesc->SetUser(); - user.SetType().SetStr( "browser" ); - - for ( size_t u = 1 /* skip "browser" */; u < columns.size(); u += 2 ) { - user.AddField( columns[ u ], columns[ u+1 ] ); - } - return true; -}; - -// ---------------------------------------------------------------------------- -bool CGff2Reader::x_ParseTrackLineGff( - const string& strRawInput, - CRef< CAnnotdesc >& pAnnotDesc ) -// ---------------------------------------------------------------------------- -{ - const char cBlankReplace( '+' ); - if ( ! NStr::StartsWith( strRawInput, "track" ) ) { - return false; - } - - string strCookedInput( strRawInput ); - bool bInString = false; - for ( size_t u=0; u < strCookedInput.length(); ++u ) { - if ( strCookedInput[u] == ' ' && bInString ) { - strCookedInput[u] = cBlankReplace; - } - if ( strCookedInput[u] == '\"' ) { - bInString = !bInString; - } - } - vector< string > columns; - NStr::Split( strCookedInput, " \t", columns, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate ); - - if ( columns.size() <= 1 ) { - pAnnotDesc.Reset(); - return true; - } - pAnnotDesc.Reset( new CAnnotdesc ); - CUser_object& user = pAnnotDesc->SetUser(); - user.SetType().SetStr( "track" ); - - for ( size_t u = 1 /* skip "track" */; u < columns.size(); ++u ) { - string strKey; - string strValue; - NStr::SplitInTwo( columns[u], "=", strKey, strValue ); - NStr::TruncateSpacesInPlace( strKey, NStr::eTrunc_End ); - if ( NStr::StartsWith( strValue, "\"" ) && NStr::EndsWith( strValue, "\"" ) ) { - strValue = strValue.substr( 1, strValue.length() - 2 ); - } - for ( unsigned u = 0; u < strValue.length(); ++u ) { - if ( strValue[u] == cBlankReplace ) { - strValue[u] = ' '; - } - } - NStr::TruncateSpacesInPlace( strValue, NStr::eTrunc_Begin ); - user.AddField( strKey, strValue ); - } - - return true; -}; - // ---------------------------------------------------------------------------- bool CGff2Reader::x_InitAnnot( const CGff2Record& gff, @@ -1049,12 +875,12 @@ bool CGff2Reader::x_InitAnnot( } else { pAnnot->SetData().SetFtable(); - return x_UpdateAnnotFeature( gff, pAnnot, pEC ); + return xUpdateAnnotFeature( gff, pAnnot, pEC ); } } // ---------------------------------------------------------------------------- -bool CGff2Reader::x_UpdateAnnotFeature( +bool CGff2Reader::xUpdateAnnotFeature( const CGff2Record& gff, CRef< CSeq_annot > pAnnot, ILineErrorListener* pEC) @@ -1159,8 +985,8 @@ bool CGff2Reader::xSetSplicedExon( } - pExon->SetGenomic_start(gff.SeqStart()-1); - pExon->SetGenomic_end(gff.SeqStop()-1); + pExon->SetGenomic_start(static_cast(gff.SeqStart()-1)); + pExon->SetGenomic_end(static_cast(gff.SeqStop()-1)); if (gff.IsSetStrand()) { pExon->SetGenomic_strand(gff.Strand()); } @@ -1329,7 +1155,8 @@ bool CGff2Reader::xSetDensegStarts(const vector& gapParts, if (identStrand == eNa_strand_minus) { - if ( !xGetStartsOnMinusStrand(gff.SeqStop(), + if ( !xGetStartsOnMinusStrand( + static_cast(gff.SeqStop()), gapParts, isIdent, identStarts)) { @@ -1337,7 +1164,8 @@ bool CGff2Reader::xSetDensegStarts(const vector& gapParts, } } else { - if ( !xGetStartsOnPlusStrand(gff.SeqStart(), + if ( !xGetStartsOnPlusStrand( + static_cast(gff.SeqStart()), gapParts, isIdent, identStarts)) { @@ -1416,10 +1244,8 @@ bool CGff2Reader::xAlignmentSetSpliced_seg( exon->SetProduct_start().SetNucpos(NStr::StringToInt(targetParts[1])-1); exon->SetProduct_end().SetNucpos(NStr::StringToInt(targetParts[2])-1); - const auto genomic_start = gff.SeqStart(); - const auto genomic_end = gff.SeqStop(); - exon->SetGenomic_start(genomic_start); - exon->SetGenomic_end(genomic_end); + exon->SetGenomic_start(static_cast(gff.SeqStart())); + exon->SetGenomic_end(static_cast(gff.SeqStop())); string gapInfo; vector gapParts; @@ -1493,7 +1319,7 @@ bool CGff2Reader::xAlignmentSetDenseg( gapParts.push_back(string("M") + NStr::NumericToString(gff.SeqStop()-gff.SeqStart()+1)); } - int gapCount = gapParts.size(); + int gapCount = static_cast(gapParts.size()); //meta CSeq_align::TSegs& segs = pAlign->SetSegs(); @@ -1614,8 +1440,8 @@ bool CGff2Reader::x_FeatureSetLocation( CRef< CSeq_id > pId = mSeqIdResolve(record.Id(), m_iFlags, true); CRef< CSeq_loc > pLocation( new CSeq_loc ); pLocation->SetInt().SetId( *pId ); - pLocation->SetInt().SetFrom( record.SeqStart() ); - pLocation->SetInt().SetTo( record.SeqStop() ); + pLocation->SetInt().SetFrom(static_cast(record.SeqStart())); + pLocation->SetInt().SetTo(static_cast(record.SeqStop())); if ( record.IsSetStrand() ) { pLocation->SetInt().SetStrand( record.Strand() ); } @@ -1633,58 +1459,6 @@ bool CGff2Reader::x_ProcessQualifierSpecialCase( return false; } -// ---------------------------------------------------------------------------- -bool CGff2Reader::x_FeatureTrimQualifiers( - const CGff2Record& record, - CRef< CSeq_feat > pFeature ) -// ---------------------------------------------------------------------------- -{ - typedef CSeq_feat::TQual TQual; - //task: - // for each attribute of the new piece check if we already got a feature - // qualifier - // if so, and with the same value, then the qualifier is allowed to live - // otherwise it is subfeature specific and hence removed from the feature - TQual& quals = pFeature->SetQual(); - for (TQual::iterator it = quals.begin(); it != quals.end(); /**/) { - const string& qualKey = (*it)->GetQual(); - if (NStr::StartsWith(qualKey, "gff_")) { - it++; - continue; - } - if (qualKey == "locus_tag") { - it++; - continue; - } - if (qualKey == "old_locus_tag") { - it++; - continue; - } - if (qualKey == "product") { - it++; - continue; - } - if (qualKey == "protein_id") { - it++; - continue; - } - const string& qualVal = (*it)->GetVal(); - string attrVal; - if (!record.GetAttribute(qualKey, attrVal)) { - //superfluous qualifier- squish - it = quals.erase(it); - continue; - } - if (qualVal != attrVal) { - //ambiguous qualifier- squish - it = quals.erase(it); - continue; - } - it++; - } - return true; -} - // ---------------------------------------------------------------------------- bool CGff2Reader::x_FeatureSetQualifiers( const CGff2Record& record, diff --git a/c++/src/objtools/readers/gff3_reader.cpp b/c++/src/objtools/readers/gff3_reader.cpp index 6bff3a7d..b92e66f3 100644 --- a/c++/src/objtools/readers/gff3_reader.cpp +++ b/c++/src/objtools/readers/gff3_reader.cpp @@ -1,4 +1,4 @@ -/* $Id: gff3_reader.cpp 559236 2018-03-08 14:54:50Z ludwigf $ +/* $Id: gff3_reader.cpp 575509 2018-11-29 19:37:34Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -220,21 +220,7 @@ bool CGff3Reader::IsInGenbankMode() const } // ---------------------------------------------------------------------------- -bool CGff3Reader::x_UpdateFeatureCds( - const CGff2Record& gff, - CRef pFeature) -// ---------------------------------------------------------------------------- -{ - CRef pAdd = CRef(new CSeq_feat); - if (!x_FeatureSetLocation(gff, pAdd)) { - return false; - } - pFeature->SetLocation().Add(pAdd->GetLocation()); - return true; -} - -// ---------------------------------------------------------------------------- -bool CGff3Reader::x_UpdateAnnotFeature( +bool CGff3Reader::xUpdateAnnotFeature( const CGff2Record& record, CRef< CSeq_annot > pAnnot, ILineErrorListener* pEC) @@ -293,41 +279,34 @@ bool CGff3Reader::xUpdateAnnotExon( { list parents; if (record.GetAttribute("Parent", parents)) { - if (!parents.empty() && parents.front() == "ENST00000367927") { - cerr << ""; - } for (list::const_iterator it = parents.begin(); it != parents.end(); ++it) { const string& parentId = *it; - if (!xVerifyExonLocation(parentId, record, pEC)) { - //create a free agent "exon" feature - if (!record.InitializeFeature(m_iFlags, pFeature)) { - return false; - } - CRef pParent; - if (!xGetParentFeature(*pFeature, pParent)) { - //Note: The below does not quite cut it as there are types of RNA that come - // as Imps. - //if (!xGetParentFeature(*pFeature, pParent) || - // (!pParent->GetData().IsGene() && !pParent->GetData().IsRna())) { - AutoPtr pErr( - CObjReaderLineException::Create( + CRef pParent; + if (!x_GetFeatureById(parentId, pParent)) { + xAddPendingExon(parentId, record); + return true; + } + if (pParent->GetData().IsRna() && !xVerifyExonLocation(parentId, record, pEC)) { + AutoPtr pErr( + CObjReaderLineException::Create( eDiag_Error, 0, "Bad data line: Exon record referring to non-existing mRNA or gene parent.", ILineError::eProblem_FeatureBadStartAndOrStop)); - ProcessError(*pErr, pEC); - return false; - } - if (! xAddFeatureToAnnot(pFeature, pAnnot)) { + ProcessError(*pErr, pEC); + return false; + } + if (pParent->GetData().IsGene()) { + if (!xInitializeFeature(record, pFeature)) { return false; } - return true; + return xAddFeatureToAnnot(pFeature, pAnnot); } IdToFeatureMap::iterator fit = m_MapIdToFeature.find(parentId); if (fit != m_MapIdToFeature.end()) { CRef pParent = fit->second; - if (!record.UpdateFeature(m_iFlags, pParent)) { + if (!pParent->GetData().IsGene() && !record.UpdateFeature(m_iFlags, pParent)) { return false; } } @@ -389,10 +368,8 @@ bool CGff3Reader::xUpdateAnnotCds( IdToFeatureMap::iterator featIt = m_MapIdToFeature.find(parentId); if (featIt != m_MapIdToFeature.end()) { CRef pParent = featIt->second; - if (pParent->GetData().IsGene()) { - parentIsGene = true; - } - if (!record.UpdateFeature(m_iFlags, pParent)) { + parentIsGene = pParent->GetData().IsGene(); + if (!parentIsGene && !record.UpdateFeature(m_iFlags, pParent)) { return false; } //rw-143: @@ -445,7 +422,7 @@ bool CGff3Reader::xUpdateAnnotCds( else { //didn't find feature with that ID: create new one pFeature.Reset(new CSeq_feat); - record.InitializeFeature(m_iFlags, pFeature); + xInitializeFeature(record, pFeature); if (!parentId.empty()) { xFeatureSetQualifier("Parent", parentId, pFeature); xFeatureSetXrefParent(parentId, pFeature); @@ -528,6 +505,43 @@ bool CGff3Reader::xFeatureSetXrefParent( return true; } +// ---------------------------------------------------------------------------- +bool CGff3Reader::xFindFeatureUnderConstruction( + const CGff2Record& record, + CRef& underConstruction) +// ---------------------------------------------------------------------------- +{ + string id; + if (!record.GetAttribute("ID", id)) { + return false; + } + IdToFeatureMap::iterator it = m_MapIdToFeature.find(id); + if (it == m_MapIdToFeature.end()) { + return false; + } + + AutoPtr pErr(CObjReaderLineException::Create( + eDiag_Fatal, + 0, + string("Bad data line: Duplicate feature ID \"") + id + "\"", + ILineError::eProblem_DuplicateIDs) ); + if (record.Id() != mIdToSeqIdMap[id]) { + pErr->Throw(); + } + if (it->second->GetData().IsRna()) { + pErr->Throw(); + } + CSeq_feat tempFeat; + if (CSoMap::SoTypeToFeature(record.Type(), tempFeat)) { + if (it->second->GetData().GetSubtype() != tempFeat.GetData().GetSubtype()) { + pErr->Throw(); + } + } + + underConstruction = it->second; + return true; +} + // ---------------------------------------------------------------------------- bool CGff3Reader::xUpdateAnnotGeneric( const CGff2Record& record, @@ -536,12 +550,9 @@ bool CGff3Reader::xUpdateAnnotGeneric( ILineErrorListener* pEC) // ---------------------------------------------------------------------------- { - string id; - if (record.GetAttribute("ID", id)) { - IdToFeatureMap::iterator it = m_MapIdToFeature.find(id); - if (it != m_MapIdToFeature.end()) { - return record.UpdateFeature(m_iFlags, it->second); - } + CRef pUnderConstruction(new CSeq_feat); + if (xFindFeatureUnderConstruction(record, pUnderConstruction)) { + return record.UpdateFeature(m_iFlags, pUnderConstruction); } string featType = record.Type(); @@ -573,8 +584,8 @@ bool CGff3Reader::xUpdateAnnotGeneric( CSeq_interval& cbLoc = pCodeBreak->SetLoc().SetInt(); CRef< CSeq_id > pId = mSeqIdResolve(record.Id(), m_iFlags, true); cbLoc.SetId(*pId); - cbLoc.SetFrom(record.SeqStart()); - cbLoc.SetTo(record.SeqStop()); + cbLoc.SetFrom(static_cast(record.SeqStart())); + cbLoc.SetTo(static_cast(record.SeqStop())); if (record.IsSetStrand()) { cbLoc.SetStrand(record.Strand()); } @@ -586,7 +597,7 @@ bool CGff3Reader::xUpdateAnnotGeneric( codeBreaks.push_back(pCodeBreak); return true; } - if (!record.InitializeFeature(m_iFlags, pFeature)) { + if (!xInitializeFeature(record, pFeature)) { return false; } if (! xAddFeatureToAnnot(pFeature, pAnnot)) { @@ -596,7 +607,6 @@ bool CGff3Reader::xUpdateAnnotGeneric( if ( record.GetAttribute("ID", strId)) { m_MapIdToFeature[strId] = pFeature; } - auto st = pFeature->GetData().GetSubtype(); if (pFeature->GetData().IsRna() || pFeature->GetData().GetSubtype() == CSeqFeatData::eSubtype_misc_RNA) { CRef rnaLoc(new CSeq_interval); rnaLoc->Assign(pFeature->GetLocation().GetInt()); @@ -613,30 +623,14 @@ bool CGff3Reader::xUpdateAnnotMrna( ILineErrorListener* pEC) // ---------------------------------------------------------------------------- { - string id; - if (record.GetAttribute("ID", id)) { - IdToFeatureMap::iterator it = m_MapIdToFeature.find(id); - if (it != m_MapIdToFeature.end()) { - return record.UpdateFeature(m_iFlags, it->second); - } + CRef pUnderConstruction(new CSeq_feat); + if (xFindFeatureUnderConstruction(record, pUnderConstruction)) { + return record.UpdateFeature(m_iFlags, pUnderConstruction); } - if (!record.InitializeFeature(m_iFlags, pFeature)) { + if (!xInitializeFeature(record, pFeature)) { return false; } - CRef mrnaLoc(new CSeq_interval); - CSeq_loc::E_Choice choice = pFeature->GetLocation().Which(); - if (choice != CSeq_loc::e_Int) { - AutoPtr pErr( - CObjReaderLineException::Create( - eDiag_Error, - 0, - "Internal error: Unexpected location type.", - ILineError::eProblem_BadFeatureInterval)); - } - mrnaLoc->Assign(pFeature->GetLocation().GetInt()); - mMrnaLocs[id] = mrnaLoc; - string parentsStr; if ((m_iFlags & fGeneXrefs) && record.GetAttribute("Parent", parentsStr)) { list parents; @@ -656,13 +650,32 @@ bool CGff3Reader::xUpdateAnnotMrna( } } - if (! xAddFeatureToAnnot(pFeature, pAnnot)) { - return false; - } string strId; if ( record.GetAttribute("ID", strId)) { m_MapIdToFeature[strId] = pFeature; } + CRef mrnaLoc(new CSeq_interval); + CSeq_loc::E_Choice choice = pFeature->GetLocation().Which(); + if (choice != CSeq_loc::e_Int) { + AutoPtr pErr( + CObjReaderLineException::Create( + eDiag_Error, + 0, + "Internal error: Unexpected location type.", + ILineError::eProblem_BadFeatureInterval)); + } + mrnaLoc->Assign(pFeature->GetLocation().GetInt()); + mMrnaLocs[strId] = mrnaLoc; + + list pendingExons; + xGetPendingExons(strId, pendingExons); + for (auto exonRecord: pendingExons) { + CRef< CSeq_feat > pFeature(new CSeq_feat); + xUpdateAnnotExon(exonRecord, pFeature, pAnnot, pEC); + } + if (! xAddFeatureToAnnot(pFeature, pAnnot)) { + return false; + } return true; } @@ -811,5 +824,70 @@ bool CGff3Reader::xIsIgnoredFeatureType( return false; } +// ---------------------------------------------------------------------------- +bool +CGff3Reader::xInitializeFeature( + const CGff2Record& record, + CRef pFeature) +// ---------------------------------------------------------------------------- +{ + if (!record.InitializeFeature(m_iFlags, pFeature)) { + return false; + } + const auto& attrs = record.Attributes(); + const auto it = attrs.find("ID"); + if (it != attrs.end()) { + mIdToSeqIdMap[it->second] = record.Id(); + } + return true; +} + +// ---------------------------------------------------------------------------- +void +CGff3Reader::xAddPendingExon( + const string& rnaId, + const CGff2Record& exonRecord) +// ---------------------------------------------------------------------------- +{ + PENDING_EXONS::iterator it = mPendingExons.find(rnaId); + if (it == mPendingExons.end()) { + mPendingExons[rnaId] = list(); + } + mPendingExons[rnaId].push_back(exonRecord); +} + +// ---------------------------------------------------------------------------- +void +CGff3Reader::xGetPendingExons( + const string& rnaId, + list& pendingExons) +// ---------------------------------------------------------------------------- +{ + PENDING_EXONS::iterator it = mPendingExons.find(rnaId); + if (it == mPendingExons.end()) { + return; + } + pendingExons.swap(mPendingExons[rnaId]); + mPendingExons.erase(rnaId); +} + +// ---------------------------------------------------------------------------- +void CGff3Reader::xPostProcessAnnot( + CRef& pAnnot, + ILineErrorListener *pEC) + // ---------------------------------------------------------------------------- +{ + for (const auto& it: mPendingExons) { + AutoPtr pErr(CObjReaderLineException::Create( + eDiag_Warning, + 0, + "Bad data line: Record references non-existant Parent=" + it.first, + ILineError::eProblem_MissingContext) ); + ProcessError(*pErr, pEC); + } + return CGff2Reader::xPostProcessAnnot(pAnnot, pEC); +} + + END_objects_SCOPE END_NCBI_SCOPE diff --git a/c++/src/objtools/readers/gtf_reader.cpp b/c++/src/objtools/readers/gtf_reader.cpp index a14c2b08..4a4d9330 100644 --- a/c++/src/objtools/readers/gtf_reader.cpp +++ b/c++/src/objtools/readers/gtf_reader.cpp @@ -1,4 +1,4 @@ -/* $Id: gtf_reader.cpp 552058 2017-11-28 19:16:52Z ludwigf $ +/* $Id: gtf_reader.cpp 575512 2018-11-29 19:38:56Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -117,84 +117,55 @@ bool s_AnnotId( } // ---------------------------------------------------------------------------- -bool CGtfReadRecord::x_AssignAttributesFromGff( +bool CGtfReadRecord::xAssignAttributesFromGff( const string& strGtfType, const string& strRawAttributes ) // ---------------------------------------------------------------------------- { vector< string > attributes; - x_SplitGffAttributes(strRawAttributes, attributes); + xSplitGffAttributes(strRawAttributes, attributes); for ( size_t u=0; u < attributes.size(); ++u ) { - string strKey; - string strValue; - string strAttr(attributes[u]); - if (!NStr::SplitInTwo(strAttr, "=", strKey, strValue)) { - if (!NStr::SplitInTwo(strAttr, " ", strKey, strValue)) { + string key, value; + string attribute(attributes[u]); + if (!NStr::SplitInTwo(attribute, "=", key, value)) { + if (!NStr::SplitInTwo(attribute, " ", key, value)) { if (strGtfType == "gene") { - m_Attributes["gene_id"] = xNormalizedAttributeValue(strAttr); + mAttributes.AddValue( + "gene_id", xNormalizedAttributeValue(attribute)); continue; } if (strGtfType == "transcript") { - if (!NStr::SplitInTwo(strAttr, ".", strKey, strValue)) { + string gid, tid; + if (!NStr::SplitInTwo(attribute, ".", gid, tid)) { return false; } - m_Attributes["gene_id"] = xNormalizedAttributeValue(strKey); - m_Attributes["transcript_id"] = xNormalizedAttributeValue(strAttr); + mAttributes.AddValue( + "gene_id", xNormalizedAttributeValue(gid)); + mAttributes.AddValue( + "transcript_id", xNormalizedAttributeValue(attribute)); continue; } } } - strKey = xNormalizedAttributeKey( strKey ); - strValue = xNormalizedAttributeValue( strValue ); - if ( strKey.empty() && strValue.empty() ) { + key = xNormalizedAttributeKey(key); + value = xNormalizedAttributeValue(value); + if ( key.empty() && value.empty() ) { // Probably due to trailing "; ". Sequence Ontology generates such // things. continue; } - if ( NStr::StartsWith( strValue, "\"" ) ) { - strValue = strValue.substr( 1, string::npos ); + if (NStr::StartsWith(value, "\"")) { + value = value.substr(1, string::npos); } - if ( NStr::EndsWith( strValue, "\"" ) ) { - strValue = strValue.substr( 0, strValue.length() - 1 ); + if (NStr::EndsWith(value, "\"")) { + value = value.substr(0, value.length() - 1); } - m_Attributes[ strKey ] = strValue; + mAttributes.AddValue(key, value); } return true; } -// ---------------------------------------------------------------------------- -string s_GeneKey( - const CGff2Record& gff ) -// ---------------------------------------------------------------------------- -{ - string strGeneId; - if ( ! gff.GetAttribute( "gene_id", strGeneId ) ) { - cerr << "Unexpected: GTF feature without a gene_id." << endl; - return "gene_id"; - } - return strGeneId; -} - -// ---------------------------------------------------------------------------- -string s_FeatureKey( - const CGff2Record& gff ) -// ---------------------------------------------------------------------------- -{ - static unsigned int tidCounter(1); - string strGeneId = s_GeneKey( gff ); - if ( gff.Type() == "gene" ) { - return strGeneId; - } - - string strTranscriptId; - if ( ! gff.GetAttribute( "transcript_id", strTranscriptId ) ) { - strTranscriptId = "t"+NStr::IntToString(tidCounter++); - } - - return strGeneId + "_" + strTranscriptId; -} - // ---------------------------------------------------------------------------- CGtfReader::CGtfReader( unsigned int uFlags, @@ -213,15 +184,16 @@ CGtfReader::~CGtfReader() } // ---------------------------------------------------------------------------- -bool CGtfReader::x_UpdateAnnotFeature( - const CGff2Record& gff, +bool CGtfReader::xUpdateAnnotFeature( + const CGff2Record& record, CRef< CSeq_annot > pAnnot, ILineErrorListener* pEC) // ---------------------------------------------------------------------------- { + const CGtfReadRecord& gff = dynamic_cast(record); string strType = gff.Type(); - using TYPEHANDLER = bool (CGtfReader::*)(const CGff2Record&, CRef< CSeq_annot >); + using TYPEHANDLER = bool (CGtfReader::*)(const CGtfReadRecord&, CRef< CSeq_annot >); using HANDLERMAP = map; HANDLERMAP typeHandlers = { @@ -262,7 +234,7 @@ bool CGtfReader::x_UpdateAnnotFeature( // ---------------------------------------------------------------------------- bool CGtfReader::x_UpdateAnnotCds( - const CGff2Record& gff, + const CGtfReadRecord& gff, CRef< CSeq_annot > pAnnot ) // ---------------------------------------------------------------------------- { @@ -335,7 +307,7 @@ bool CGtfReader::x_UpdateAnnotCds( // ---------------------------------------------------------------------------- bool CGtfReader::x_UpdateAnnotTranscript( - const CGff2Record& gff, + const CGtfReadRecord& gff, CRef< CSeq_annot > pAnnot ) // ---------------------------------------------------------------------------- { @@ -388,7 +360,7 @@ bool CGtfReader::x_UpdateAnnotTranscript( // ---------------------------------------------------------------------------- bool CGtfReader::x_CreateFeatureId( - const CGff2Record& record, + const CGtfReadRecord& record, const string& prefix, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- @@ -407,7 +379,7 @@ bool CGtfReader::x_CreateFeatureId( // ---------------------------------------------------------------------------- bool CGtfReader::x_CreateFeatureLocation( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { @@ -435,7 +407,7 @@ bool CGtfReader::x_CreateFeatureLocation( // ---------------------------------------------------------------------------- bool CGtfReader::x_CreateGeneXrefs( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { @@ -459,7 +431,7 @@ bool CGtfReader::x_CreateGeneXrefs( // ---------------------------------------------------------------------------- bool CGtfReader::x_CreateMrnaXrefs( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { @@ -481,7 +453,7 @@ bool CGtfReader::x_CreateMrnaXrefs( // ---------------------------------------------------------------------------- bool CGtfReader::x_CreateCdsXrefs( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { @@ -505,7 +477,7 @@ bool CGtfReader::x_CreateCdsXrefs( // ---------------------------------------------------------------------------- bool CGtfReader::x_MergeFeatureLocationSingleInterval( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { @@ -524,7 +496,7 @@ bool CGtfReader::x_MergeFeatureLocationSingleInterval( // ---------------------------------------------------------------------------- bool CGtfReader::x_MergeFeatureLocationMultiInterval( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { @@ -546,7 +518,7 @@ bool CGtfReader::x_MergeFeatureLocationMultiInterval( // ----------------------------------------------------------------------------- bool CGtfReader::x_CreateParentGene( - const CGff2Record& gff, + const CGtfReadRecord& gff, CRef< CSeq_annot > pAnnot ) // ----------------------------------------------------------------------------- { @@ -567,7 +539,7 @@ bool CGtfReader::x_CreateParentGene( if ( ! xFeatureSetQualifiersGene( gff, pFeature ) ) { return false; } - m_GeneMap[ s_GeneKey( gff ) ] = pFeature; + m_GeneMap[gff.GeneKey()] = pFeature; xAddFeatureToAnnot( pFeature, pAnnot ); return true; @@ -575,7 +547,7 @@ bool CGtfReader::x_CreateParentGene( // ---------------------------------------------------------------------------- bool CGtfReader::x_MergeParentGene( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { @@ -587,7 +559,7 @@ bool CGtfReader::x_MergeParentGene( // ---------------------------------------------------------------------------- bool CGtfReader::xFeatureSetQualifiersGene( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { @@ -597,31 +569,28 @@ bool CGtfReader::xFeatureSetQualifiersGene( // // Create GB qualifiers for the record attributes: // - CRef< CGb_qual > pQual(0); - const CGff2Record::TAttributes& attrs = record.Attributes(); - CGff2Record::TAttrCit it = attrs.begin(); + + const auto& attrs = record.GtfAttributes().Get(); + auto it = attrs.begin(); for (/*NOOP*/; it != attrs.end(); ++it) { auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first); if (cit != ignoredAttrs.end()) { continue; } // special case some well-known attributes - if (x_ProcessQualifierSpecialCase(it, pFeature)) { + if (xProcessQualifierSpecialCase(it->first, it->second, pFeature)) { continue; } // turn everything else into a qualifier - pQual.Reset(new CGb_qual); - pQual->SetQual(it->first); - pQual->SetVal(it->second); - pFeature->SetQual().push_back(pQual); + xFeatureAddQualifiers(it->first, it->second, pFeature); } return true; } // ---------------------------------------------------------------------------- bool CGtfReader::xFeatureSetQualifiersRna( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { @@ -629,27 +598,27 @@ bool CGtfReader::xFeatureSetQualifiersRna( "locus_tag" }; - const CGff2Record::TAttributes& attrs = record.Attributes(); - CGff2Record::TAttrCit it = attrs.begin(); + const auto& attrs = record.GtfAttributes().Get(); + auto it = attrs.begin(); for (/*NOOP*/; it != attrs.end(); ++it) { auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first); if (cit != ignoredAttrs.end()) { continue; } // special case some well-known attributes - if (x_ProcessQualifierSpecialCase(it, pFeature)) { + if (xProcessQualifierSpecialCase(it->first, it->second, pFeature)) { continue; } // turn everything else into a qualifier - pFeature->AddQualifier(it->first, it->second); + xFeatureAddQualifiers(it->first, it->second, pFeature); } return true; } // ---------------------------------------------------------------------------- bool CGtfReader::xFeatureSetQualifiersCds( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { @@ -657,31 +626,27 @@ bool CGtfReader::xFeatureSetQualifiersCds( "locus_tag" }; - CRef< CGb_qual > pQual(0); - const CGff2Record::TAttributes& attrs = record.Attributes(); - CGff2Record::TAttrCit it = attrs.begin(); + const auto& attrs = record.GtfAttributes().Get(); + auto it = attrs.begin(); for (/*NOOP*/; it != attrs.end(); ++it) { auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first); if (cit != ignoredAttrs.end()) { continue; } // special case some well-known attributes - if (x_ProcessQualifierSpecialCase(it, pFeature)) { + if (xProcessQualifierSpecialCase(it->first, it->second, pFeature)) { continue; } // turn everything else into a qualifier - pQual.Reset(new CGb_qual); - pQual->SetQual(it->first); - pQual->SetVal(it->second); - pFeature->SetQual().push_back(pQual); + xFeatureAddQualifiers(it->first, it->second, pFeature); } return true; } // ----------------------------------------------------------------------------- bool CGtfReader::x_CreateParentCds( - const CGff2Record& gff, + const CGtfReadRecord& gff, CRef< CSeq_annot > pAnnot ) // ----------------------------------------------------------------------------- { @@ -700,7 +665,7 @@ bool CGtfReader::x_CreateParentCds( return false; } - m_CdsMap[ s_FeatureKey( gff ) ] = pFeature; + m_CdsMap[gff.FeatureKey()] = pFeature; if ( ! x_FeatureSetDataCDS( gff, pFeature ) ) { return false; @@ -726,7 +691,7 @@ bool CGtfReader::x_CreateParentCds( // ----------------------------------------------------------------------------- bool CGtfReader::x_CreateParentMrna( - const CGff2Record& gff, + const CGtfReadRecord& gff, CRef< CSeq_annot > pAnnot ) // ----------------------------------------------------------------------------- { @@ -754,18 +719,18 @@ bool CGtfReader::x_CreateParentMrna( return false; } - m_MrnaMap[ s_FeatureKey( gff ) ] = pFeature; + m_MrnaMap[gff.FeatureKey()] = pFeature; return xAddFeatureToAnnot( pFeature, pAnnot ); } // ---------------------------------------------------------------------------- bool CGtfReader::x_FindParentGene( - const CGff2Record& gff, + const CGtfReadRecord& gff, CRef< CSeq_feat >& pFeature ) // ---------------------------------------------------------------------------- { - TIdToFeature::iterator gene_it = m_GeneMap.find( s_GeneKey( gff ) ); + TIdToFeature::iterator gene_it = m_GeneMap.find(gff.GeneKey()); if ( gene_it == m_GeneMap.end() ) { return false; } @@ -775,11 +740,11 @@ bool CGtfReader::x_FindParentGene( // ---------------------------------------------------------------------------- bool CGtfReader::x_FindParentCds( - const CGff2Record& gff, + const CGtfReadRecord& gff, CRef< CSeq_feat >& pFeature ) // ---------------------------------------------------------------------------- { - TIdToFeature::iterator cds_it = m_CdsMap.find( s_FeatureKey( gff ) ); + TIdToFeature::iterator cds_it = m_CdsMap.find(gff.FeatureKey()); if ( cds_it == m_CdsMap.end() ) { return false; } @@ -789,11 +754,11 @@ bool CGtfReader::x_FindParentCds( // ---------------------------------------------------------------------------- bool CGtfReader::x_FindParentMrna( - const CGff2Record& gff, + const CGtfReadRecord& gff, CRef< CSeq_feat >& pFeature ) // ---------------------------------------------------------------------------- { - TIdToFeature::iterator rna_it = m_MrnaMap.find( s_FeatureKey( gff ) ); + TIdToFeature::iterator rna_it = m_MrnaMap.find(gff.FeatureKey()); if ( rna_it == m_MrnaMap.end() ) { return false; } @@ -803,49 +768,45 @@ bool CGtfReader::x_FindParentMrna( // ---------------------------------------------------------------------------- bool CGtfReader::x_FeatureSetDataGene( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { CGene_ref& gene = pFeature->SetData().SetGene(); - string strValue; - if (record.GetAttribute( "gene_synonym", strValue)) { - gene.SetSyn().push_back( strValue ); + const auto& attributes = record.GtfAttributes(); + string geneSynonym = attributes.ValueOf("gene_synonym"); + if (!geneSynonym.empty()) { + gene.SetSyn().push_back(geneSynonym); } - if (record.GetAttribute("locus_tag", strValue)) { - gene.SetLocus_tag(strValue); + string locusTag = attributes.ValueOf("locus_tag"); + if (!locusTag.empty()) { + gene.SetLocus_tag(locusTag); } - // mss-399: do -not- use gene_id for /gene_syn or /gene: - //if ( record.GetAttribute( "gene_id", strValue ) ) { - // gene.SetSyn().push_front( strValue ); - //} return true; } // ---------------------------------------------------------------------------- bool CGtfReader::x_FeatureSetDataMRNA( - const CGff2Record& record, + const CGtfReadRecord& record, CRef pFeature) // ---------------------------------------------------------------------------- { if ( !x_FeatureSetDataRna( record, pFeature, CSeqFeatData::eSubtype_mRNA)) { return false; - } - + } CRNA_ref& rna = pFeature->SetData().SetRna(); - string strValue; - if (record.GetAttribute("product", strValue)) { - rna.SetExt().SetName(strValue); + string product = record.GtfAttributes().ValueOf("product"); + if (!product.empty()) { + rna.SetExt().SetName(product); } - return true; } // ---------------------------------------------------------------------------- bool CGtfReader::x_FeatureSetDataRna( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature, CSeqFeatData::ESubtype subType) // ---------------------------------------------------------------------------- @@ -867,93 +828,135 @@ bool CGtfReader::x_FeatureSetDataRna( // ---------------------------------------------------------------------------- bool CGtfReader::x_FeatureSetDataCDS( - const CGff2Record& record, + const CGtfReadRecord& record, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { CCdregion& cdr = pFeature->SetData().SetCdregion(); - string strValue; - if ( record.GetAttribute( "protein_id", strValue ) ) { - CRef pId = mSeqIdResolve(strValue,m_iFlags, true); + const auto& attributes = record.GtfAttributes(); + + string proteinId = attributes.ValueOf("protein_id"); + if (!proteinId.empty()) { + CRef pId = mSeqIdResolve(proteinId, m_iFlags, true); if (pId->IsGenbank()) { pFeature->SetProduct().SetWhole(*pId); } } - if ( record.GetAttribute( "ribosomal_slippage", strValue ) ) { + string ribosomalSlippage = attributes.ValueOf("ribosomal_slippage"); + if (!ribosomalSlippage.empty()) { pFeature->SetExcept( true ); - pFeature->SetExcept_text( "ribosomal slippage" ); + pFeature->SetExcept_text("ribosomal slippage"); } - if ( record.GetAttribute( "transl_table", strValue ) ) { + string transTable = attributes.ValueOf("transl_table"); + if (!transTable.empty()) { CRef< CGenetic_code::C_E > pGc( new CGenetic_code::C_E ); - pGc->SetId( NStr::StringToUInt( strValue ) ); - cdr.SetCode().Set().push_back( pGc ); + pGc->SetId(NStr::StringToUInt(transTable)); + cdr.SetCode().Set().push_back(pGc); + } + return true; +} + +// ---------------------------------------------------------------------------- +bool CGtfReader::x_FeatureTrimQualifiers( + const CGtfReadRecord& record, + CRef< CSeq_feat > pFeature ) + // ---------------------------------------------------------------------------- +{ + typedef CSeq_feat::TQual TQual; + //task: + // for each attribute of the new piece check if we already got a feature + // qualifier + // if so, and with the same value, then the qualifier is allowed to live + // otherwise it is subfeature specific and hence removed from the feature + TQual& quals = pFeature->SetQual(); + for (TQual::iterator it = quals.begin(); it != quals.end(); /**/) { + const string& qualKey = (*it)->GetQual(); + if (NStr::StartsWith(qualKey, "gff_")) { + it++; + continue; + } + if (qualKey == "locus_tag") { + it++; + continue; + } + if (qualKey == "old_locus_tag") { + it++; + continue; + } + if (qualKey == "product") { + it++; + continue; + } + if (qualKey == "protein_id") { + it++; + continue; + } + const string& qualVal = (*it)->GetVal(); + if (!record.GtfAttributes().HasValue(qualKey, qualVal)) { + //superfluous qualifier- squish + it = quals.erase(it); + continue; + } + it++; } return true; } // ---------------------------------------------------------------------------- bool CGtfReader::x_CdsIsPartial( - const CGff2Record& record ) + const CGtfReadRecord& record ) // ---------------------------------------------------------------------------- { - string strPartial; -// if ( record.Type() != "CDS" ) { -// return false; -// } - if ( record.GetAttribute( "partial", strPartial ) ) { + if (record.GtfAttributes().HasValue("partial")) { return true; } CRef< CSeq_feat > mRna; - if ( ! x_FindParentMrna( record, mRna ) ) { + if (!x_FindParentMrna(record, mRna)) { return false; } - return ( mRna->IsSetPartial() && mRna->GetPartial() ); + return (mRna->IsSetPartial() && mRna->GetPartial()); } // ---------------------------------------------------------------------------- -bool CGtfReader::x_ProcessQualifierSpecialCase( - CGff2Record::TAttrCit it, +bool CGtfReader::xProcessQualifierSpecialCase( + const string& key, + const vector& values, CRef< CSeq_feat > pFeature ) // ---------------------------------------------------------------------------- { CRef pQual(0); - if (0 == NStr::CompareNocase(it->first, "exon_id")) { + if (0 == NStr::CompareNocase(key, "exon_id")) { return true; } - if (0 == NStr::CompareNocase(it->first, "exon_number")) { + if (0 == NStr::CompareNocase(key, "exon_number")) { return true; } - if ( 0 == NStr::CompareNocase( it->first, "note" ) ) { - pFeature->SetComment( it->second ); + if ( 0 == NStr::CompareNocase(key, "note") ) { + pFeature->SetComment(NStr::Join(values, ";")); return true; } - if ( 0 == NStr::CompareNocase( it->first, "dbxref" ) || - 0 == NStr::CompareNocase( it->first, "db_xref" ) ) + if ( 0 == NStr::CompareNocase(key, "dbxref") || + 0 == NStr::CompareNocase(key, "db_xref")) { - vector< string > tags; - NStr::Split( it->second, ";", tags ); - for ( vector::iterator it = tags.begin(); - it != tags.end(); ++it ) { - pFeature->SetDbxref().push_back( x_ParseDbtag( *it ) ); + for (auto value: values) { + vector< string > tags; + NStr::Split(value, ";", tags ); + for (auto it = tags.begin(); it != tags.end(); ++it ) { + pFeature->SetDbxref().push_back(x_ParseDbtag(*it)); + } } return true; } - if ( 0 == NStr::CompareNocase( it->first, "pseudo" ) ) { + if ( 0 == NStr::CompareNocase(key, "pseudo")) { pFeature->SetPseudo( true ); return true; } - if ( 0 == NStr::CompareNocase( it->first, "partial" ) ) { + if ( 0 == NStr::CompareNocase(key, "partial")) { pFeature->SetPartial( true ); return true; } - //if (0 == NStr::CompareNocase(it->first, "protein_id")) { - // if (pFeature->IsSetProduct()) { - // return true; - // } - //} - return false; } diff --git a/c++/src/objtools/readers/gvf_reader.cpp b/c++/src/objtools/readers/gvf_reader.cpp index f2722311..192881bc 100644 --- a/c++/src/objtools/readers/gvf_reader.cpp +++ b/c++/src/objtools/readers/gvf_reader.cpp @@ -1,4 +1,4 @@ - /* $Id: gvf_reader.cpp 539953 2017-06-29 13:16:45Z ludwigf $ + /* $Id: gvf_reader.cpp 575504 2018-11-29 19:35:15Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -149,13 +149,13 @@ bool CGvfReadRecord::AssignFromGff( } // ---------------------------------------------------------------------------- -bool CGvfReadRecord::x_AssignAttributesFromGff( +bool CGvfReadRecord::xAssignAttributesFromGff( const string& strGffType, const string& strRawAttributes ) // ---------------------------------------------------------------------------- { vector< string > attributes; - x_SplitGffAttributes(strRawAttributes, attributes); + xSplitGffAttributes(strRawAttributes, attributes); for ( size_t u=0; u < attributes.size(); ++u ) { string strKey; string strValue; diff --git a/c++/src/serial/datatool/generate.cpp b/c++/src/serial/datatool/generate.cpp index 652f7a6c..9f5da99c 100644 --- a/c++/src/serial/datatool/generate.cpp +++ b/c++/src/serial/datatool/generate.cpp @@ -1,4 +1,4 @@ -/* $Id: generate.cpp 554977 2018-01-11 14:18:53Z gouriano $ +/* $Id: generate.cpp 581625 2019-03-04 16:47:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -972,8 +972,9 @@ void CCodeGenerator::GenerateClientCode(const string& name, bool mandatory) return; // not configured } CFileCode code(this,Path(m_FileNamePrefix, name)); + CClientPseudoDataType type(*this, name, class_name); code.UseQuotedForm(m_UseQuotedForm); - code.AddType(new CClientPseudoDataType(*this, name, class_name)); + code.AddType(&type); code.GenerateCode(); string filename; code.GenerateHPP(m_HPPDir, filename); diff --git a/c++/src/serial/datatool/jsdparser.cpp b/c++/src/serial/datatool/jsdparser.cpp index fc7bc379..2f33c696 100644 --- a/c++/src/serial/datatool/jsdparser.cpp +++ b/c++/src/serial/datatool/jsdparser.cpp @@ -1,4 +1,4 @@ -/* $Id: jsdparser.cpp 569883 2018-08-30 16:59:22Z ivanov $ +/* $Id: jsdparser.cpp 580649 2019-02-19 12:35:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -547,6 +547,8 @@ void JSDParser::SkipUnknown(TToken tokend) SkipUnknown(K_END_ARRAY); } else if (tok == K_BEGIN_OBJECT) { SkipUnknown(K_END_OBJECT); + } else if (tok == T_EOF) { + ParseError("Unexpected end-of-file", tokend == K_END_ARRAY ? "end-of-array" : "end-of-object"); } } }