From: Aaron M. Ucko Date: Wed, 27 Jan 2021 01:57:37 +0000 (-0500) Subject: New upstream version 2.11.0+ds X-Git-Tag: archive/raspbian/2.16.0+ds-7+rpi1~1^2~45^2~3 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=58169a3aff60f35bc1e7b04f127b57b0b2f39ab2;p=ncbi-blast%2B.git New upstream version 2.11.0+ds --- diff --git a/c++/include/algo/blast/api/blast_aux.hpp b/c++/include/algo/blast/api/blast_aux.hpp index 778a47fb..ad815ff7 100644 --- a/c++/include/algo/blast/api/blast_aux.hpp +++ b/c++/include/algo/blast/api/blast_aux.hpp @@ -1,4 +1,4 @@ -/* $Id: blast_aux.hpp 507721 2016-07-21 14:07:53Z fongah2 $ +/* $Id: blast_aux.hpp 615182 2020-08-28 04:28:48Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -43,6 +43,7 @@ #include #include // For TSeqRange #include +#include #include // BLAST includes @@ -58,6 +59,7 @@ #include #include + BEGIN_NCBI_SCOPE BEGIN_SCOPE(objects) @@ -240,6 +242,28 @@ private: static Uint4 m_RefCounter; }; +/// Class to capture message from diag handler +class NCBI_XBLAST_EXPORT CBlastAppDiagHandler : public CDiagHandler +{ +public: + /// Constructor + CBlastAppDiagHandler():m_handler(GetDiagHandler(true)), m_save (true) {} + /// Destructor + ~CBlastAppDiagHandler(); + /// Save and post diag message + virtual void Post (const SDiagMessage & mess); + /// Reset messgae buffer, erase all saved message + void ResetMessages(void); + /// Call to turn off saving diag message, discard all saved message + void DoNotSaveMessages(void); + /// Return list of saved diag messages + list > & GetMessages(void) { return m_messages;} +private : + CDiagHandler * m_handler; + list > m_messages; + bool m_save; +}; + /** Declares class to handle deallocating of the structure using the appropriate * function diff --git a/c++/include/algo/blast/api/blast_node.hpp b/c++/include/algo/blast/api/blast_node.hpp new file mode 100644 index 00000000..91ae6f34 --- /dev/null +++ b/c++/include/algo/blast/api/blast_node.hpp @@ -0,0 +1,195 @@ +/* $Id: blast_node.hpp 615348 2020-08-31 15:38:28Z fukanchi $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Authors: Amelia Fong + * + */ + +/** @file blast_node.hpp + * BLAST node api + */ + +#ifndef ALGO_BLAST_API___BLAST_NODE__HPP +#define ALGO_BLAST_API___BLAST_NODE__HPP + +#include +#include + +BEGIN_NCBI_SCOPE +BEGIN_SCOPE(blast) + +class NCBI_XBLAST_EXPORT CBlastNodeMsg : public CObject +{ +public: + enum EMsgType { + eRunRequest, + ePostResult, + eErrorExit, + ePostLog + }; + CBlastNodeMsg(EMsgType type, void * obj_ptr): m_MsgType(type), m_Obj(obj_ptr) {} + EMsgType GetMsgType() { return m_MsgType; } + void * GetMsgBody() { return m_Obj; } +private: + EMsgType m_MsgType; + void * m_Obj; +}; + +class NCBI_XBLAST_EXPORT CBlastNodeMailbox : public CObject +{ +public: + CBlastNodeMailbox(int node_num, CConditionVariable & notify): m_NodeNum(node_num), m_Notify(notify){} + void SendMsg(CRef msg); + CRef ReadMsg() + { + CFastMutexGuard guard(m_Mutex); + CRef rv; + if (! m_MsgQueue.empty()){ + rv.Reset(m_MsgQueue.front()); + m_MsgQueue.pop_front(); + } + return rv; + } + void UnreadMsg(CRef msg) { CFastMutexGuard guard(m_Mutex); m_MsgQueue.push_front(msg);} + int GetNumMsgs () { CFastMutexGuard guard(m_Mutex); return m_MsgQueue.size(); } + int GetNodeNum() { return m_NodeNum; } + ~CBlastNodeMailbox() { m_MsgQueue.resize(0); } +private: + int m_NodeNum; + CConditionVariable & m_Notify; + list > m_MsgQueue; + CFastMutex m_Mutex; +}; + +class NCBI_XBLAST_EXPORT CBlastNode : public CThread +{ +public : + enum EState { + eInitialized, + eRunning, + eError, + eDone, + }; + CBlastNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args, + CBlastAppDiagHandler & bah, EProgram blast_program, + int query_index, int num_queries, CBlastNodeMailbox * mailbox); + + virtual int GetBlastResults(string & results) = 0; + int GetNodeNum() { return m_NodeNum;} + EState GetState() { return m_State; } + int GetStatus() { return m_Status; } + const CArgs & GetArgs() { return m_Args; } + CBlastAppDiagHandler & GetDiagHandler() { return m_Bah; } + const CNcbiArguments & GetArguments() { return m_NcbiArgs; } + void SendMsg(CBlastNodeMsg::EMsgType msg_type, void* ptr = NULL); + string & GetNodeIdStr() { return m_NodeIdStr;} + int GetNumOfQueries() {return m_NumOfQueries;} + int GetQueriesLength() {return m_QueriesLength;} +protected: + virtual ~CBlastNode(void); + virtual void* Main(void) = 0; + void SetState(EState state) { m_State = state; } + void SetStatus(int status) { m_Status = status; } + void SetQueriesLength(int l) { m_QueriesLength = l;} + int m_NodeNum; +private: + const CNcbiArguments & m_NcbiArgs; + const CArgs & m_Args; + CBlastAppDiagHandler & m_Bah; + EProgram m_BlastProgram; + int m_QueryIndex; + int m_NumOfQueries; + string m_NodeIdStr; + CRef m_Mailbox; + EState m_State; + int m_Status; + int m_QueriesLength; +}; + + +class NCBI_XBLAST_EXPORT CBlastMasterNode +{ +public: + CBlastMasterNode(CNcbiOstream & out_stream, int num_threads); + typedef map > TPostOffice; + typedef map > TRegisteredNodes; + typedef map TActiveNodes; + typedef map > TFormatQueue; + void RegisterNode(CBlastNode * node, CBlastNodeMailbox * mailbox); + int GetNumNodes() { return m_RegisteredNodes.size();} + int IsFull(); + void Shutdown() { m_MaxNumNodes = -1; } + bool Processing(); + int IsActive() + { + if ((m_MaxNumNodes < 0) && (m_RegisteredNodes.size() == 0)){ + return false; + } + return true; + } + void FormatResults(); + CConditionVariable & GetBuzzer() {return m_NewEvent;} + ~CBlastMasterNode() {} + int GetNumOfQueries() { return m_NumQueries; } + Int8 GetQueriesLength() { return m_QueriesLength; } + int GetNumErrStatus() { return m_NumErrStatus; } +private: + void x_WaitForNewEvent(); + + CNcbiOstream & m_OutputStream; + int m_MaxNumThreads; + int m_MaxNumNodes; + CFastMutex m_Mutex; + CStopWatch m_StopWatch; + TPostOffice m_PostOffice; + TRegisteredNodes m_RegisteredNodes; + TActiveNodes m_ActiveNodes; + TFormatQueue m_FormatQueue; + CConditionVariable m_NewEvent; + int m_NumErrStatus; + int m_NumQueries; + Int8 m_QueriesLength; +}; + + +class NCBI_XBLAST_EXPORT CBlastNodeInputReader : public CStreamLineReader +{ +public: + + CBlastNodeInputReader(CNcbiIstream& is, int batch_size, int est_avg_len) : + CStreamLineReader(is), m_QueryBatchSize(batch_size), m_EstAvgQueryLength(est_avg_len), m_QueryCount(0) {} + + int GetQueryBatch(string & queries, int & query_no); + +private: + const int m_QueryBatchSize; + const int m_EstAvgQueryLength; + int m_QueryCount; +}; + +END_SCOPE(blast) +END_NCBI_SCOPE + +#endif /* ALGO_BLAST_API___BLAST_NODE__HPP */ diff --git a/c++/include/algo/blast/api/blast_usage_report.hpp b/c++/include/algo/blast/api/blast_usage_report.hpp new file mode 100644 index 00000000..777605b3 --- /dev/null +++ b/c++/include/algo/blast/api/blast_usage_report.hpp @@ -0,0 +1,120 @@ +/* $Id: blast_usage_report.hpp 617231 2020-09-28 18:27:17Z ivanov $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Authors: Amelia Fong + * + */ + +/** @file blast_usage_report.hpp + * BLAST usage report api + */ + +#ifndef ALGO_BLAST_API___BLAST_USAGE_REPORT__HPP +#define ALGO_BLAST_API___BLAST_USAGE_REPORT__HPP + +#include +#include + +BEGIN_NCBI_SCOPE +BEGIN_SCOPE(blast) + +class NCBI_XBLAST_EXPORT CBlastUsageReport : public CUsageReport +{ + +public: + enum EUsageParams { + eApp, + eVersion, + eProgram, + eTask, + eExitStatus, + eRunTime, + eDBName, + eDBLength, + eDBNumSeqs, + eDBDate, + eBl2seq, + eNumSubjects, + eSubjectsLength, + eNumQueries, + eTotalQueryLength, + eEvalueThreshold, + eNumThreads, + eHitListSize, + eOutputFmt, + eTaxIdList, + eNegTaxIdList, + eGIList, + eNegGIList, + eSeqIdList, + eNegSeqIdList, + eIPGList, + eNegIPGList, + eMaskAlgo, + eCompBasedStats, + eRange, + eMTMode, + eNumQueryBatches, + eNumErrStatus, + ePSSMInput, + eConverged, + eArchiveInput, + eRIDInput, + eDBInfo, + eDBTaxInfo, + eDBEntry, + eDBDumpAll, + eDBType, + eInputType, + eParseSeqIDs, + eSeqType, + eDBTest, + eDBAliasMode, + eDocker, + eGCP, + eAWS, + eELBJobId, + eELBBatchNum + }; + + CBlastUsageReport(); + ~CBlastUsageReport(); + void AddParam(EUsageParams p, int val); + void AddParam(EUsageParams p, const string & val); + void AddParam(EUsageParams p, const double & val); + void AddParam(EUsageParams p, Int8 val); + void AddParam(EUsageParams p, bool val); + +private: + void x_CheckBlastUsageEnv(); + string x_EUsageParmsToString(EUsageParams p); + void x_CheckRunEnv(); + CUsageReportParameters m_Params; +}; + +END_SCOPE(blast) +END_NCBI_SCOPE + +#endif /* ALGO_BLAST_API___BLAST_USAGE_REPORT__HPP */ diff --git a/c++/include/algo/blast/blastinput/blast_input.hpp b/c++/include/algo/blast/blastinput/blast_input.hpp index 68fad00f..e8146bfc 100644 --- a/c++/include/algo/blast/blastinput/blast_input.hpp +++ b/c++/include/algo/blast/blastinput/blast_input.hpp @@ -1,4 +1,4 @@ -/* $Id: blast_input.hpp 575325 2018-11-27 18:22:00Z ucko $ +/* $Id: blast_input.hpp 615335 2020-08-31 15:36:38Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -310,11 +310,11 @@ public: /// be in a batch of converted sequences /// CBlastInput(CBlastInputSource* source, int batch_size = kMax_Int) - : m_Source(source), m_BatchSize(batch_size) {} + : m_Source(source), m_BatchSize(batch_size), m_NumSeqs(0), m_TotalLength(0) {} /// Destructor /// - ~CBlastInput() {} + ~CBlastInput(){} /// Read and convert all the sequences from the source /// @param scope CScope object to use in return value [in] @@ -357,6 +357,8 @@ public: /// Determine if we have reached the end of the BLAST input bool End() { return m_Source->End(); } + int GetNumSeqsProcessed() const { return m_NumSeqs; } + int GetTotalLengthProcessed() const { return m_TotalLength; } private: CRef m_Source; ///< pointer to source of sequences TSeqPos m_BatchSize; ///< total size of one block of sequences @@ -369,6 +371,12 @@ private: /// Perform the actual copy for assignment operator and copy constructor void do_copy(const CBlastInput& input); + + // # of seqs processed + int m_NumSeqs; + + // Total length processed + int m_TotalLength; }; /// Auxiliary class for creating Bioseqs given SeqIds diff --git a/c++/include/algo/blast/blastinput/cmdline_flags.hpp b/c++/include/algo/blast/blastinput/cmdline_flags.hpp index 7fd7e1b1..f8df6d4f 100644 --- a/c++/include/algo/blast/blastinput/cmdline_flags.hpp +++ b/c++/include/algo/blast/blastinput/cmdline_flags.hpp @@ -1,4 +1,4 @@ -/* $Id: cmdline_flags.hpp 605536 2020-04-13 11:07:50Z ivanov $ +/* $Id: cmdline_flags.hpp 615184 2020-08-28 04:29:55Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -469,6 +469,8 @@ NCBI_BLASTINPUT_EXPORT extern const string kArgPrintMdTag; NCBI_BLASTINPUT_EXPORT extern const string kArgUnalignedOutput; /// Argument to specify format for reporting unaligned reads NCBI_BLASTINPUT_EXPORT extern const string kArgUnalignedFormat; +/// Argument to specify mt mode (split by db or split by queries) +NCBI_BLASTINPUT_EXPORT extern const string kArgMTMode; END_SCOPE(blast) END_NCBI_SCOPE diff --git a/c++/include/algo/blast/blastinput/rpsblast_args.hpp b/c++/include/algo/blast/blastinput/rpsblast_args.hpp index aec42f16..32b528d4 100644 --- a/c++/include/algo/blast/blastinput/rpsblast_args.hpp +++ b/c++/include/algo/blast/blastinput/rpsblast_args.hpp @@ -1,4 +1,4 @@ -/* $Id: rpsblast_args.hpp 544441 2017-08-23 11:55:51Z camacho $ +/* $Id: rpsblast_args.hpp 615185 2020-08-28 04:30:03Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -56,6 +56,14 @@ public: /// @inheritDoc virtual int GetQueryBatchSize() const; + /// Get the input stream + virtual CNcbiIstream& GetInputStream(); + + /// Get the output stream + virtual CNcbiOstream& GetOutputStream(); + + virtual ~CRPSBlastAppArgs() {} + protected: /// @inheritDoc virtual CRef @@ -63,6 +71,35 @@ protected: const CArgs& args); }; +class NCBI_BLASTINPUT_EXPORT CRPSBlastNodeArgs : public CRPSBlastAppArgs +{ +public: + /// Constructor + CRPSBlastNodeArgs(const string & input); + + /// @inheritDoc + virtual int GetQueryBatchSize() const; + + /// Get the input stream + virtual CNcbiIstream& GetInputStream(); + + /// Get the output stream + virtual CNcbiOstream& GetOutputStream(); + + CNcbiOstrstream & GetOutputStrStream() { return m_OutputStream; } + + virtual ~CRPSBlastNodeArgs(); + +protected: + /// @inheritDoc + virtual CRef + x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs& args); + +private : + CNcbiOstrstream m_OutputStream; + CNcbiIstrstream * m_InputStream; +}; + END_SCOPE(blast) END_NCBI_SCOPE diff --git a/c++/include/algo/blast/blastinput/rpstblastn_args.hpp b/c++/include/algo/blast/blastinput/rpstblastn_args.hpp index 7887a0ce..bb199ebf 100644 --- a/c++/include/algo/blast/blastinput/rpstblastn_args.hpp +++ b/c++/include/algo/blast/blastinput/rpstblastn_args.hpp @@ -1,4 +1,4 @@ -/* $Id: rpstblastn_args.hpp 161402 2009-05-27 17:35:47Z camacho $ +/* $Id: rpstblastn_args.hpp 615188 2020-08-28 04:30:31Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -49,6 +49,14 @@ public: /// @inheritDoc virtual int GetQueryBatchSize() const; + /// Get the input stream + virtual CNcbiIstream& GetInputStream(); + + /// Get the output stream + virtual CNcbiOstream& GetOutputStream(); + + virtual ~CRPSTBlastnAppArgs() {} + protected: /// @inheritDoc virtual CRef @@ -56,6 +64,36 @@ protected: const CArgs& args); }; +class NCBI_BLASTINPUT_EXPORT CRPSTBlastnNodeArgs : public CRPSTBlastnAppArgs +{ +public: + /// Constructor + CRPSTBlastnNodeArgs(const string & input); + + /// @inheritDoc + virtual int GetQueryBatchSize() const; + + /// Get the input stream + virtual CNcbiIstream& GetInputStream(); + + /// Get the output stream + virtual CNcbiOstream& GetOutputStream(); + + CNcbiOstrstream & GetOutputStrStream() { return m_OutputStream; } + + virtual ~CRPSTBlastnNodeArgs(); + +protected: + /// @inheritDoc + virtual CRef + x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs& args); + +private : + CNcbiOstrstream m_OutputStream; + CNcbiIstrstream * m_InputStream; +}; + + END_SCOPE(blast) END_NCBI_SCOPE diff --git a/c++/include/algo/blast/format/blast_format.hpp b/c++/include/algo/blast/format/blast_format.hpp index 9278e4e2..be85afe1 100644 --- a/c++/include/algo/blast/format/blast_format.hpp +++ b/c++/include/algo/blast/format/blast_format.hpp @@ -1,4 +1,4 @@ -/* $Id: blast_format.hpp 591152 2019-08-12 11:18:21Z fongah2 $ +/* $Id: blast_format.hpp 615337 2020-08-31 15:36:55Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -55,6 +55,7 @@ Author: Jason Papadopoulos #include #include #include +#include BEGIN_NCBI_SCOPE @@ -323,6 +324,10 @@ public: static void PrintArchive(CRef archive, CNcbiOstream& out); + + // Extract search info in CBlastFormat and add to blast report usage + void LogBlastSearchInfo(blast::CBlastUsageReport & report); + private: /// Format type blast::CFormattingArgs::EOutputFormat m_FormatType; diff --git a/c++/include/common/config/ncbiconf_msvc.h b/c++/include/common/config/ncbiconf_msvc.h index 2fc3df75..98dcae4e 100644 --- a/c++/include/common/config/ncbiconf_msvc.h +++ b/c++/include/common/config/ncbiconf_msvc.h @@ -1,4 +1,4 @@ -/* $Id: ncbiconf_msvc.h 602172 2020-02-18 15:13:29Z ucko $ +/* $Id: ncbiconf_msvc.h 608266 2020-05-13 18:56:44Z ivanov $ * By Denis Vakatov, NCBI (vakatov@ncbi.nlm.nih.gov) * * MS-Win 32/64, MSVC++ 6.0/.NET @@ -135,7 +135,10 @@ typedef int ssize_t; #define NETDB_REENTRANT 1 #if _MSC_VER >= 1400 - +// need to include some standard header to get all debugging macros +# ifdef __cplusplus +# include +# endif /* Suppress 'deprecated' warning for STD functions */ #if !defined(_CRT_NONSTDC_DEPRECATE) #define _CRT_NONSTDC_DEPRECATE(x) diff --git a/c++/include/common/ncbi_export.h b/c++/include/common/ncbi_export.h index 84d9da09..9f94380b 100644 --- a/c++/include/common/ncbi_export.h +++ b/c++/include/common/ncbi_export.h @@ -1,7 +1,7 @@ #ifndef COMMON___NCBI_EXPORT__H #define COMMON___NCBI_EXPORT__H -/* $Id: ncbi_export.h 605871 2020-04-16 11:23:58Z ivanov $ +/* $Id: ncbi_export.h 617033 2020-09-24 18:56:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/include/common/ncbi_package_ver.h b/c++/include/common/ncbi_package_ver.h index 0ed06d2d..1bdf1427 100644 --- a/c++/include/common/ncbi_package_ver.h +++ b/c++/include/common/ncbi_package_ver.h @@ -7,8 +7,8 @@ #define NCBI_PACKAGE 1 #define NCBI_PACKAGE_NAME "blast" #define NCBI_PACKAGE_VERSION_MAJOR 2 -#define NCBI_PACKAGE_VERSION_MINOR 10 -#define NCBI_PACKAGE_VERSION_PATCH 1 +#define NCBI_PACKAGE_VERSION_MINOR 11 +#define NCBI_PACKAGE_VERSION_PATCH 0 #define NCBI_PACKAGE_CONFIG "" #define NCBI_PACKAGE_VERSION_STRINGIFY(x) #x diff --git a/c++/include/common/ncbiconf_impl.h b/c++/include/common/ncbiconf_impl.h index 65daf62d..58a3cd8b 100644 --- a/c++/include/common/ncbiconf_impl.h +++ b/c++/include/common/ncbiconf_impl.h @@ -1,7 +1,7 @@ #ifndef COMMON___NCBICONF_IMPL__H #define COMMON___NCBICONF_IMPL__H -/* $Id: ncbiconf_impl.h 606329 2020-04-20 16:28:09Z ivanov $ +/* $Id: ncbiconf_impl.h 609547 2020-06-03 17:21:47Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -41,8 +41,6 @@ # error "The header can be used from only." #endif /*!FORWARDING_NCBICONF_H*/ -#include - /** @addtogroup Portability * diff --git a/c++/include/connect/ncbi_usage_report.hpp b/c++/include/connect/ncbi_usage_report.hpp index 38d9574f..2579a032 100644 --- a/c++/include/connect/ncbi_usage_report.hpp +++ b/c++/include/connect/ncbi_usage_report.hpp @@ -1,7 +1,7 @@ #ifndef CONNECT___NCBI_USAGE_REPORT__HPP #define CONNECT___NCBI_USAGE_REPORT__HPP -/* $Id: ncbi_usage_report.hpp 602851 2020-03-03 18:47:23Z ivanov $ +/* $Id: ncbi_usage_report.hpp 617219 2020-09-28 17:23:04Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -29,7 +29,7 @@ * Authors: Vladislav Evgeniev, Vladimir Ivanov * * File Description: - * Log usage information to NCBI “pinger”. + * Log usage information to NCBI "pinger". * */ @@ -75,7 +75,7 @@ public: fOS = 1 << 3, ///< OS name ("os") fHost = 1 << 4, ///< Host name ("host") // - fDefault = fAppName | fAppVersion | fOS | fHost + fDefault = fAppName | fAppVersion | fOS }; typedef int TWhat; ///< Binary OR of "EWhat" }; @@ -325,10 +325,10 @@ public: /// to allow checking reporting progress or failures, see EState for a list of states. /// @sa /// EState, CUsageReport::Send() - virtual void OnStateChange(EState state) {}; + virtual void OnStateChange(EState /*state*/) {}; /// Copy constructor. - CUsageReportJob(const CUsageReportJob& other) { x_CopyFrom(other); }; + CUsageReportJob(const CUsageReportJob& other) : CUsageReportParameters(other) { m_State = other.m_State; }; /// Copy assignment operator. CUsageReportJob& operator=(const CUsageReportJob& other) { x_CopyFrom(other); return *this; }; diff --git a/c++/include/corelib/impl/ncbi_dbsvcmapper.hpp b/c++/include/corelib/impl/ncbi_dbsvcmapper.hpp index f5dbc2ba..f7ec6837 100644 --- a/c++/include/corelib/impl/ncbi_dbsvcmapper.hpp +++ b/c++/include/corelib/impl/ncbi_dbsvcmapper.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___DB_SERVICE_MAPPER__HPP #define CORELIB___DB_SERVICE_MAPPER__HPP -/* $Id: ncbi_dbsvcmapper.hpp 586267 2019-05-13 18:15:06Z ucko $ +/* $Id: ncbi_dbsvcmapper.hpp 610944 2020-06-25 18:30:27Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -34,8 +34,7 @@ */ -#include -#include +#include #ifdef NCBI_OS_MSWIN # include diff --git a/c++/include/corelib/mswin_no_popup.h b/c++/include/corelib/mswin_no_popup.h index 2aa87a21..7285587a 100644 --- a/c++/include/corelib/mswin_no_popup.h +++ b/c++/include/corelib/mswin_no_popup.h @@ -1,7 +1,7 @@ #ifndef CORELIB___MSWIN_NO_POPUP__H #define CORELIB___MSWIN_NO_POPUP__H -/* $Id: mswin_no_popup.h 171076 2009-09-21 16:22:34Z ivanov $ +/* $Id: mswin_no_popup.h 617213 2020-09-28 17:22:30Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -45,7 +45,7 @@ #define NCBI_MSWIN_NO_POPUP /* In case anyone needs to always disable the popup messages (regardless of DIAG_SILENT_ABDORT) - another pre-processor macro can be defined before #include’ing either + another pre-processor macro can be defined before #include'ing either (or ). */ /* #define NCBI_MSWIN_NO_POPUP_EVER */ diff --git a/c++/include/corelib/ncbi_system.hpp b/c++/include/corelib/ncbi_system.hpp index 01e1acd2..99552ee1 100644 --- a/c++/include/corelib/ncbi_system.hpp +++ b/c++/include/corelib/ncbi_system.hpp @@ -1,7 +1,7 @@ #ifndef NCBI_SYSTEM__HPP #define NCBI_SYSTEM__HPP -/* $Id: ncbi_system.hpp 603334 2020-03-10 17:10:33Z ivanov $ +/* $Id: ncbi_system.hpp 613789 2020-08-12 18:02:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -63,7 +63,7 @@ BEGIN_NCBI_SCOPE enum ELimitsExitCode { eLEC_None, ///< Normal exit. eLEC_Memory, ///< Memory limit. - eLEC_Cpu ///< CPU usage limit. + eLEC_Cpu ///< CPU time usage limit. }; /// Type of parameter for print handler. @@ -166,7 +166,12 @@ extern bool SetMemoryLimit(size_t max_size, TLimitsPrintParameter parameter = NULL); /// [UNIX only] Set soft memory limit. -/// @sa SetMemoryLimit +/// @note +/// The soft limit is the value that the kernel enforces for the corresponding resource. +/// An unprivileged process may only set its soft limit to a value in the range +/// from 0 up to the hard limit, and (irreversibly) lower its hard limit. +/// A privileged process may make arbitrary changes to either limit value. +/// @sa SetMemoryLimit, SetMemoryLimitHard NCBI_XNCBI_EXPORT extern bool SetMemoryLimitSoft(size_t max_size, TLimitsPrintHandler handler = NULL, @@ -174,18 +179,58 @@ extern bool SetMemoryLimitSoft(size_t max_size, /// [UNIX only] Set hard memory limit. /// @note +/// The hard limit acts as a ceiling for the soft limit: /// Current soft memory limit will be automatically decreased, /// if it exceed new value for the hard memory limit. /// @note /// Only privileged process can increase current hard level limit. -/// @sa SetMemoryLimit +/// @sa SetMemoryLimit, SetMemoryLimitSoft NCBI_XNCBI_EXPORT extern bool SetMemoryLimitHard(size_t max_size, TLimitsPrintHandler handler = NULL, TLimitsPrintParameter parameter = NULL); -/// [UNIX only] Set CPU usage limit. +/// [UNIX only] Get "soft" memory limit of the virtual memory (address space) in bytes for a current process. +/// @return +/// Returns "soft" value set by setrlimit(), SetMemoryLimit() or ulimit command +/// line utility for virtual memory address space. +/// 0 - if an error occurs and CNcbiError is set, or the memory limit is set to "unlimited". +/// @note +/// The implementation of malloc() can be different on many flavors of UNIX, and we +/// usually don't know how exactly it is implemented on the current system. +/// Some systems use sbrk()-based implementation (heap), other use mmap() system call +/// and virtual memory (address space) to allocate memory, some other use hybrid approach +/// and may allocate memory in two different ways depending on requested memory size +/// and certain parameters. +/// Almost all modern Unix versions uses mmap()-based approach for all memory allocations +/// or at least for big chunks of memory, so probably virtual memory limits is more +/// important nowadays. +/// @sa SetMemoryLimit, GetVirtualMemoryLimitHard +NCBI_XNCBI_EXPORT +extern size_t GetVirtualMemoryLimitSoft(void); + +/// [UNIX only] Get "hard" memory limit of the virtual memory (address space) in bytes for a current process. +/// @return +/// Returns "hard" value set by setrlimit(), SetMemoryLimit() or ulimit command +/// line utility for virtual memory address space. +/// 0 - if an error occurs and CNcbiError is set, or the memory limit is set to "unlimited". +/// @note +/// The implementation of malloc() can be different on many flavors of UNIX, and we +/// usually don't know how exactly it is implemented on the current system. +/// Some systems use sbrk()-based implementation (heap), other use mmap() system call +/// and virtual memory (address space) to allocate memory, some other use hybrid approach +/// and may allocate memory in two different ways depending on requested memory size +/// and certain parameters. +/// Almost all modern Unix versions uses mmap()-based approach for all memory allocations +/// or at least for big chunks of memory, so probably virtual memory limits is more +/// important nowadays. +/// @sa SetMemoryLimit, GetVirtualMemoryLimitSoft +NCBI_XNCBI_EXPORT +extern size_t GetVirtualMemoryLimitHard(void); + + +/// [UNIX only] Set CPU time usage limit. /// /// Set the limit for the CPU time that can be consumed by current process. /// @@ -371,9 +416,13 @@ public: /// Process owner user name, or empty string if it cannot be determined. static string GetUserName(void); - /// Return number of active CPUs (never less than 1). + /// Return number of active CPUs/cores (never less than 1). static unsigned int GetCpuCount(void); + /// Return number of allowed to use CPUs/cores for the current thread. + /// Returns 0 if unable to get this information on the current OS, or error occurs. + static unsigned int GetCpuCountAllowed(void); + /// Get system uptime in seconds. /// @return /// Seconds since last boot, or negative number if cannot determine it diff --git a/c++/include/corelib/ncbiapp_api.hpp b/c++/include/corelib/ncbiapp_api.hpp index 81b5a622..fec38c61 100644 --- a/c++/include/corelib/ncbiapp_api.hpp +++ b/c++/include/corelib/ncbiapp_api.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___NCBIAPP_API__HPP #define CORELIB___NCBIAPP_API__HPP -/* $Id: ncbiapp_api.hpp 593047 2019-09-11 15:29:02Z grichenk $ +/* $Id: ncbiapp_api.hpp 610397 2020-06-16 18:45:55Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -395,7 +395,10 @@ public: NCBI_DEPRECATED virtual bool SetupDiag_AppSpecific(void); /// Add callback to be executed from CNcbiApplicationAPI destructor. - /// @sa CNcbiActionGuard + /// @note It is executed earlier, at CNcbiApplication destructor; and, it + /// may be executed even earlier from destructors of other + /// CNcbiApplicationAPI-derived classes. + /// @sa CNcbiActionGuard, ExecuteOnExitActions() template void AddOnExitAction(TFunc func) { m_OnExitActions.AddAction(func); @@ -616,6 +619,12 @@ protected: typedef int TAppFlags; void SetAppFlags(TAppFlags flags) { m_AppFlags = flags; } + /// Should only be called from the destructors of classes derived from + /// CNcbiApplicationAPI - if it is necessary to access their data members + /// and virtual methods; or to dynamic_cast<> from the base app class. + /// @sa AddOnExitAction() + void ExecuteOnExitActions(); + private: /// Read standard NCBI application configuration settings. /// diff --git a/c++/include/corelib/ncbidiag.hpp b/c++/include/corelib/ncbidiag.hpp index 6cc92b0b..f6d83373 100644 --- a/c++/include/corelib/ncbidiag.hpp +++ b/c++/include/corelib/ncbidiag.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___NCBIDIAG__HPP #define CORELIB___NCBIDIAG__HPP -/* $Id: ncbidiag.hpp 606470 2020-04-22 14:14:58Z ivanov $ +/* $Id: ncbidiag.hpp 611708 2020-07-09 17:56:10Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2565,6 +2565,25 @@ enum EDiagFilter { NCBI_XNCBI_EXPORT extern void SetDiagFilter(EDiagFilter what, const char* filter_str); +/// Get current diagnostic filter +/// +/// @param what +/// Filter is set for, only eDiagFilter_Trace and eDiagFilter_Post values are allowed, +/// otherwise the function returns empty string. +/// @sa SetDiagFilter +NCBI_XNCBI_EXPORT +extern string GetDiagFilter(EDiagFilter what); + +/// Append diagnostic filter +/// +/// @param what +/// Filter is set for +/// @param filter_str +/// Filter string +/// @sa SetDiagFilter +NCBI_XNCBI_EXPORT +extern void AppendDiagFilter(EDiagFilter what, const char* filter_str); + ///////////////////////////////////////////////////////////////////////////// /// diff --git a/c++/include/corelib/ncbimisc.hpp b/c++/include/corelib/ncbimisc.hpp index e424fe88..72297bb4 100644 --- a/c++/include/corelib/ncbimisc.hpp +++ b/c++/include/corelib/ncbimisc.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___NCBIMISC__HPP #define CORELIB___NCBIMISC__HPP -/* $Id: ncbimisc.hpp 607883 2020-05-08 15:09:10Z grichenk $ +/* $Id: ncbimisc.hpp 609927 2020-06-08 16:52:43Z grichenk $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -874,8 +874,13 @@ const TSeqPos kInvalidSeqPos = ((TSeqPos) (-1)); /// Use this typedef rather than its expansion, which may change. typedef int TSignedSeqPos; -/// Type for Taxon1-name.taxid +/// Taxon id type typedef int TTaxId; +# define TAX_ID_TO(T, tax_id) (static_cast(tax_id)) +# define TAX_ID_FROM(T, value) (static_cast(value)) +# define TAX_ID_CONST(id) id +#define ZERO_TAX_ID TAX_ID_CONST(0) +#define INVALID_TAX_ID TAX_ID_CONST(-1) /// Type for sequence GI. /// @@ -1029,25 +1034,25 @@ class CConstGIChecker { public: static const TIntId value = gi; }; -#define GI_CONST(gi) (TGi(CConstGIChecker::value)) +#define GI_CONST(gi) (ncbi::TGi(CConstGIChecker::value)) #define ZERO_GI GI_CONST(0) #define INVALID_GI GI_CONST(-1) /// Temporary macros to convert TGi to other types (int, unsigned etc.). -#define GI_TO(T, gi) (static_cast(TIntId(gi))) -#define GI_FROM(T, value) (TGi(static_cast(value))) +#define GI_TO(T, gi) (static_cast(ncbi::TIntId(gi))) +#define GI_FROM(T, value) (ncbi::TGi(static_cast(value))) -#define ENTREZ_ID_CONST(id) (TEntrezId(CConstGIChecker::value)) +#define ENTREZ_ID_CONST(id) (ncbi::TEntrezId(CConstGIChecker::value)) #define ZERO_ENTREZ_ID ENTREZ_ID_CONST(0) #define INVALID_ENTREZ_ID ENTREZ_ID_CONST(-1) /// Temporary macros to convert TEntrezId to other types (int, unsigned etc.). -#define ENTREZ_ID_TO(T, entrez_id) (static_cast(TIntId(entrez_id))) -#define ENTREZ_ID_FROM(T, value) (TEntrezId(static_cast(value))) +#define ENTREZ_ID_TO(T, entrez_id) (static_cast(ncbi::TIntId(entrez_id))) +#define ENTREZ_ID_FROM(T, value) (ncbi::TEntrezId(static_cast(value))) /// Convert gi-compatible int to/from other types. #define INT_ID_TO(T, id) (static_cast(id)) -#define INT_ID_FROM(T, value) (static_cast(value)) +#define INT_ID_FROM(T, value) (static_cast(value)) /// Helper address class diff --git a/c++/include/corelib/ncbistre.hpp b/c++/include/corelib/ncbistre.hpp index 14829879..e303ebc5 100644 --- a/c++/include/corelib/ncbistre.hpp +++ b/c++/include/corelib/ncbistre.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___NCBISTRE__HPP #define CORELIB___NCBISTRE__HPP -/* $Id: ncbistre.hpp 606328 2020-04-20 16:27:53Z ivanov $ +/* $Id: ncbistre.hpp 617212 2020-09-28 17:22:22Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -227,41 +227,6 @@ public: CNcbiIfstream::open(_Filename.c_str(), _Mode, _Prot); } }; -#elif defined(NCBI_COMPILER_MSVC) -# if _MSC_VER >= 1200 && _MSC_VER < 1300 -class CNcbiIfstream : public IO_PREFIX::ifstream -{ -public: - CNcbiIfstream() : m_Fp(0) - { - } - - explicit CNcbiIfstream(const char* s, - IOS_BASE::openmode mode = IOS_BASE::in) - { - fastopen(s, mode); - } - - void fastopen(const char* s, IOS_BASE::openmode mode = IOS_BASE::in) - { - if (is_open() || !(m_Fp = __Fiopen(s, mode | in))) - setstate(failbit); - else - (void) new (rdbuf()) basic_filebuf >(m_Fp); - } - - virtual ~CNcbiIfstream(void) - { - if (m_Fp) - fclose(m_Fp); - } -private: - FILE* m_Fp; -}; -# else -/// Portable alias for ifstream. -typedef IO_PREFIX::ifstream CNcbiIfstream; -# endif #else /// Portable alias for ifstream. typedef IO_PREFIX::ifstream CNcbiIfstream; @@ -323,41 +288,6 @@ public: CNcbiOfstream::open(_Filename.c_str(), _Mode, _Prot); } }; -#elif defined(NCBI_COMPILER_MSVC) -# if _MSC_VER >= 1200 && _MSC_VER < 1300 -class CNcbiOfstream : public IO_PREFIX::ofstream -{ -public: - CNcbiOfstream() : m_Fp(0) - { - } - - explicit CNcbiOfstream(const char* s, - IOS_BASE::openmode mode = IOS_BASE::out) - { - fastopen(s, mode); - } - - void fastopen(const char* s, IOS_BASE::openmode mode = IOS_BASE::out) - { - if (is_open() || !(m_Fp = __Fiopen(s, mode | out))) - setstate(failbit); - else - (void) new (rdbuf()) basic_filebuf >(m_Fp); - } - - virtual ~CNcbiOfstream(void) - { - if (m_Fp) - fclose(m_Fp); - } -private: - FILE* m_Fp; -}; -# else -/// Portable alias for ofstream. -typedef IO_PREFIX::ofstream CNcbiOfstream; -# endif #else /// Portable alias for ofstream. typedef IO_PREFIX::ofstream CNcbiOfstream; @@ -396,43 +326,6 @@ public: IO_PREFIX::fstream::open(_Filename,_Mode,_Prot); } }; -#elif defined(NCBI_COMPILER_MSVC) -# if _MSC_VER >= 1200 && _MSC_VER < 1300 -class CNcbiFstream : public IO_PREFIX::fstream -{ -public: - CNcbiFstream() : m_Fp(0) - { - } - - explicit CNcbiFstream(const char* s, - IOS_BASE::openmode - mode = IOS_BASE::in | IOS_BASE::out) - { - fastopen(s, mode); - } - - void fastopen(const char* s, IOS_BASE::openmode - mode = IOS_BASE::in | IOS_BASE::out) - { - if (is_open() || !(m_Fp = __Fiopen(s, mode))) - setstate(failbit); - else - (void) new (rdbuf()) basic_filebuf >(m_Fp); - } - - virtual ~CNcbiFstream(void) - { - if (m_Fp) - fclose(m_Fp); - } -private: - FILE* m_Fp; -}; -# else -/// Portable alias for fstream. -typedef IO_PREFIX::fstream CNcbiFstream; -# endif #else /// Portable alias for fstream. typedef IO_PREFIX::fstream CNcbiFstream; @@ -698,8 +591,7 @@ NCBI_XNCBI_EXPORT CNcbiOstream& operator<<(CNcbiOstream& out, const CNcbiOstrstreamToString& s); inline -Int8 -GetOssSize(CNcbiOstrstream& oss) +Int8 GetOssSize(CNcbiOstrstream& oss) { #ifdef NCBI_SHUN_OSTRSTREAM return NcbiStreamposToInt8(oss.tellp()); @@ -709,8 +601,7 @@ GetOssSize(CNcbiOstrstream& oss) } inline -bool -IsOssEmpty(CNcbiOstrstream& oss) +bool IsOssEmpty(CNcbiOstrstream& oss) { return GetOssSize(oss) == 0; } @@ -861,13 +752,6 @@ CNcbiOstream& operator<<(CNcbiOstream& out, CPrintableStringConverter s); NCBI_XNCBI_EXPORT CNcbiOstream& operator<<(CNcbiOstream& out, CPrintableCharPtrConverter s); -#ifdef NCBI_COMPILER_MSVC -# if _MSC_VER >= 1200 && _MSC_VER < 1300 -NCBI_XNCBI_EXPORT -CNcbiOstream& operator<<(CNcbiOstream& out, __int64 val); -# endif -#endif - ///////////////////////////////////////////////////////////////////////////// /// @@ -959,8 +843,8 @@ enum EBOMDiscard { /// CStreamUtils::Pushback(). /// @sa CStreamUtils::Pushback() NCBI_XNCBI_EXPORT -EEncodingForm GetTextEncodingForm(CNcbiIstream& input, - EBOMDiscard discard_bom); +EEncodingForm GetTextEncodingForm(CNcbiIstream& input, EBOMDiscard discard_bom); + /// Byte Order Mark helper class to use in serialization /// @@ -986,9 +870,11 @@ private: EEncodingForm m_EncodingForm; }; + /// Write Byte Order Mark into output stream NCBI_XNCBI_EXPORT CNcbiOstream& operator<< (CNcbiOstream& str, const CByteOrderMark& bom); + /// Read Byte Order Mark, if present, from input stream /// /// @note @@ -1012,10 +898,10 @@ END_NCBI_SCOPE // NOTE: these must have been inside the _NCBI_SCOPE and without the // "ncbi::" and "std::" prefixes, but there is some bug in SunPro 5.0... #if defined(NCBI_USE_OLD_IOSTREAM) -extern NCBI_NS_NCBI::CNcbiOstream& operator<<(NCBI_NS_NCBI::CNcbiOstream& os, - const NCBI_NS_STD::string& str); -extern NCBI_NS_NCBI::CNcbiIstream& operator>>(NCBI_NS_NCBI::CNcbiIstream& is, - NCBI_NS_STD::string& str); +extern NCBI_NS_NCBI::CNcbiOstream& + operator<<(NCBI_NS_NCBI::CNcbiOstream& os, const NCBI_NS_STD::string& str); +extern NCBI_NS_NCBI::CNcbiIstream& + operator>>(NCBI_NS_NCBI::CNcbiIstream& is, NCBI_NS_STD::string& str); #endif // NCBI_USE_OLD_IOSTREAM diff --git a/c++/include/corelib/request_ctx.hpp b/c++/include/corelib/request_ctx.hpp index f93d84a3..712f5c5d 100644 --- a/c++/include/corelib/request_ctx.hpp +++ b/c++/include/corelib/request_ctx.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___REQUEST_CTX__HPP #define CORELIB___REQUEST_CTX__HPP -/* $Id: request_ctx.hpp 574926 2018-11-20 20:23:54Z ucko $ +/* $Id: request_ctx.hpp 617468 2020-10-01 17:54:00Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -64,7 +64,8 @@ class CRequestContext_PassThrough; class NCBI_XNCBI_EXPORT CSharedHitId { public: - explicit CSharedHitId(const string& hit) : m_HitId(hit), m_SubHitId(0) {} + explicit CSharedHitId(const string& hit) + : m_HitId(hit), m_SubHitId(0), m_AppState(GetDiagContext().GetAppState()) {} CSharedHitId(void) : m_SubHitId(0) {} ~CSharedHitId(void) {} @@ -90,6 +91,7 @@ public: m_SharedSubHitId.Reset(); m_SubHitId = 0; m_HitId = hit_id; + m_AppState = GetDiagContext().GetAppState(); } typedef unsigned int TSubHitId; @@ -106,12 +108,21 @@ public: return IsShared() ? (TSubHitId)m_SharedSubHitId->GetData().Add(1) : ++m_SubHitId; } + /// Check if this hit ID was set at request level. + bool IsRequestLevel(void) const + { + return m_AppState == eDiagAppState_RequestBegin || + m_AppState == eDiagAppState_Request || + m_AppState == eDiagAppState_RequestEnd; + } + private: typedef CObjectFor TSharedCounter; string m_HitId; TSubHitId m_SubHitId; mutable CRef m_SharedSubHitId; + EDiagAppState m_AppState; }; @@ -678,7 +689,7 @@ bool CRequestContext::IsSetHitID(EHitIDSource src) const return true; } if ((src & eHitID_Request) && x_IsSetProp(eProp_HitID)) { - return true; + return m_HitID.IsRequestLevel(); } if ((src & eHitID_Default) && GetDiagContext().x_IsSetDefaultHitID()) { return true; diff --git a/c++/include/corelib/test_boost.hpp b/c++/include/corelib/test_boost.hpp index 2476a363..563cc8a9 100644 --- a/c++/include/corelib/test_boost.hpp +++ b/c++/include/corelib/test_boost.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___TEST_BOOST__HPP #define CORELIB___TEST_BOOST__HPP -/* $Id: test_boost.hpp 604629 2020-03-31 13:43:18Z ivanov $ +/* $Id: test_boost.hpp 617213 2020-09-28 17:22:30Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -64,7 +64,11 @@ #endif #include -#include +#if BOOST_VERSION >= 107000 +# include +#else +# include +#endif #include #include #include diff --git a/c++/include/corelib/version.hpp b/c++/include/corelib/version.hpp index 92eb3d90..1c1f8845 100644 --- a/c++/include/corelib/version.hpp +++ b/c++/include/corelib/version.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___VERSION__HPP #define CORELIB___VERSION__HPP -/* $Id: version.hpp 593438 2019-09-18 18:13:51Z lavr $ +/* $Id: version.hpp 609546 2020-06-03 17:21:38Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -36,6 +36,7 @@ #include +#include @@ -51,6 +52,72 @@ BEGIN_NCBI_SCOPE # define NCBI_BUILD_TIME __DATE__ " " __TIME__ #endif +#ifdef NCBI_BUILD_TAG +# define NCBI_BUILD_TAG_PROXY NCBI_AS_STRING(NCBI_BUILD_TAG) +#else +# define NCBI_BUILD_TAG_PROXY "" +#endif + +// Cope with potentially having an older ncbi_build_info.h +#ifndef NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO +# define NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \ + .Extra(SBuildInfo::eTeamCityProjectName, NCBI_TEAMCITY_PROJECT_NAME_PROXY) +# define NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \ + .Extra(SBuildInfo::eTeamCityBuildConf, NCBI_TEAMCITY_BUILDCONF_NAME_PROXY) +# define NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \ + .Extra(SBuildInfo::eTeamCityBuildNumber, NCBI_TEAMCITY_BUILD_NUMBER_PROXY) +# define NCBI_SUBVERSION_REVISION_SBUILDINFO \ + .Extra(SBuildInfo::eSubversionRevision, NCBI_SUBVERSION_REVISION_PROXY) +# define NCBI_SC_VERSION_SBUILDINFO \ + .Extra(SBuildInfo::eStableComponentsVersion, NCBI_SC_VERSION_PROXY) +#endif + +// Cope with potentially having an older ncbi_source_ver.h +#ifndef NCBI_SRCTREE_VER_SBUILDINFO +# ifdef NCBI_SRCTREE_NAME_PROXY +# define NCBI_SRCTREE_VER_SBUILDINFO \ + .Extra(NCBI_SRCTREE_NAME_PROXY, NCBI_SRCTREE_VER_PROXY) +# else +# define NCBI_SRCTREE_VER_SBUILDINFO /* empty */ +# endif +#endif + +#ifdef NCBI_APP_BUILT_AS +# define NCBI_BUILT_AS_SBUILDINFO \ + .Extra(SBuildInfo::eBuiltAs, NCBI_AS_STRING(NCBI_APP_BUILT_AS)) +#else +# define NCBI_BUILT_AS_SBUILDINFO /* empty */ +#endif + +#ifdef NCBI_TEAMCITY_BUILD_ID +# define NCBI_BUILD_ID NCBI_TEAMCITY_BUILD_ID +#elif defined(NCBI_BUILD_SESSION_ID) +# define NCBI_BUILD_ID NCBI_AS_STRING(NCBI_BUILD_SESSION_ID) +#endif +#ifdef NCBI_BUILD_ID +# define NCBI_BUILD_ID_SBUILDINFO .Extra(SBuildInfo::eBuildID, NCBI_BUILD_ID) +#else +# define NCBI_BUILD_ID_SBUILDINFO /* empty */ +#endif + +#define NCBI_SBUILDINFO_DEFAULT_IMPL() \ + NCBI_SBUILDINFO_DEFAULT_INSTANCE() \ + NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \ + NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \ + NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \ + NCBI_BUILD_ID_SBUILDINFO \ + NCBI_SUBVERSION_REVISION_SBUILDINFO \ + NCBI_SC_VERSION_SBUILDINFO \ + NCBI_SRCTREE_VER_SBUILDINFO \ + NCBI_BUILT_AS_SBUILDINFO + +#if defined(NCBI_USE_PCH) && !defined(NCBI_TEAMCITY_BUILD_NUMBER) +#define NCBI_SBUILDINFO_DEFAULT() SBuildInfo() +#else +#define NCBI_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL() +#endif +#define NCBI_APP_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL() + #ifdef NCBI_SBUILDINFO_DEFAULT_INSTANCE # undef NCBI_SBUILDINFO_DEFAULT_INSTANCE #endif diff --git a/c++/include/corelib/version_api.hpp b/c++/include/corelib/version_api.hpp index 1d2202ec..676433d2 100644 --- a/c++/include/corelib/version_api.hpp +++ b/c++/include/corelib/version_api.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___VERSION_API__HPP #define CORELIB___VERSION_API__HPP -/* $Id: version_api.hpp 591729 2019-08-19 20:52:06Z vasilche $ +/* $Id: version_api.hpp 609546 2020-06-03 17:21:38Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -96,73 +96,8 @@ struct NCBI_XNCBI_EXPORT SBuildInfo string PrintJson(void) const; }; -#ifdef NCBI_BUILD_TAG -# define NCBI_BUILD_TAG_PROXY NCBI_AS_STRING(NCBI_BUILD_TAG) -#else -# define NCBI_BUILD_TAG_PROXY "" -#endif - -// Cope with potentially having an older ncbi_build_info.h -#ifndef NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO -# define NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \ - .Extra(SBuildInfo::eTeamCityProjectName, NCBI_TEAMCITY_PROJECT_NAME_PROXY) -# define NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \ - .Extra(SBuildInfo::eTeamCityBuildConf, NCBI_TEAMCITY_BUILDCONF_NAME_PROXY) -# define NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \ - .Extra(SBuildInfo::eTeamCityBuildNumber, NCBI_TEAMCITY_BUILD_NUMBER_PROXY) -# define NCBI_SUBVERSION_REVISION_SBUILDINFO \ - .Extra(SBuildInfo::eSubversionRevision, NCBI_SUBVERSION_REVISION_PROXY) -# define NCBI_SC_VERSION_SBUILDINFO \ - .Extra(SBuildInfo::eStableComponentsVersion, NCBI_SC_VERSION_PROXY) -#endif - -// Cope with potentially having an older ncbi_source_ver.h -#ifndef NCBI_SRCTREE_VER_SBUILDINFO -# ifdef NCBI_SRCTREE_NAME_PROXY -# define NCBI_SRCTREE_VER_SBUILDINFO \ - .Extra(NCBI_SRCTREE_NAME_PROXY, NCBI_SRCTREE_VER_PROXY) -# else -# define NCBI_SRCTREE_VER_SBUILDINFO /* empty */ -# endif -#endif - -#ifdef NCBI_APP_BUILT_AS -# define NCBI_BUILT_AS_SBUILDINFO \ - .Extra(SBuildInfo::eBuiltAs, NCBI_AS_STRING(NCBI_APP_BUILT_AS)) -#else -# define NCBI_BUILT_AS_SBUILDINFO /* empty */ -#endif - -#ifdef NCBI_TEAMCITY_BUILD_ID -# define NCBI_BUILD_ID NCBI_TEAMCITY_BUILD_ID -#elif defined(NCBI_BUILD_SESSION_ID) -# define NCBI_BUILD_ID NCBI_AS_STRING(NCBI_BUILD_SESSION_ID) -#endif -#ifdef NCBI_BUILD_ID -# define NCBI_BUILD_ID_SBUILDINFO .Extra(SBuildInfo::eBuildID, NCBI_BUILD_ID) -#else -# define NCBI_BUILD_ID_SBUILDINFO /* empty */ -#endif - #define NCBI_SBUILDINFO_DEFAULT_INSTANCE() SBuildInfo() -#define NCBI_SBUILDINFO_DEFAULT_IMPL() \ - NCBI_SBUILDINFO_DEFAULT_INSTANCE() \ - NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \ - NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \ - NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \ - NCBI_BUILD_ID_SBUILDINFO \ - NCBI_SUBVERSION_REVISION_SBUILDINFO \ - NCBI_SC_VERSION_SBUILDINFO \ - NCBI_SRCTREE_VER_SBUILDINFO \ - NCBI_BUILT_AS_SBUILDINFO - -#if defined(NCBI_USE_PCH) && !defined(NCBI_TEAMCITY_BUILD_NUMBER) -#define NCBI_SBUILDINFO_DEFAULT() SBuildInfo() -#else -#define NCBI_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL() -#endif -#define NCBI_APP_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL() ///////////////////////////////////////////////////////////////////////////// /// diff --git a/c++/include/dbapi/driver/impl/dbapi_pool_balancer.hpp b/c++/include/dbapi/driver/impl/dbapi_pool_balancer.hpp new file mode 100644 index 00000000..56411a67 --- /dev/null +++ b/c++/include/dbapi/driver/impl/dbapi_pool_balancer.hpp @@ -0,0 +1,82 @@ +#ifndef DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP +#define DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP + +/* $Id: dbapi_pool_balancer.hpp 610945 2020-06-25 18:31:37Z ivanov $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Author: Aaron Ucko + * + */ + +/// @file dbapi_pool_balancer.hpp +/// Help distribute connections within a pool across servers. + +#include + +/** @addtogroup DBAPI + * + * @{ + */ + +BEGIN_NCBI_SCOPE + +class CDBPoolBalancer : public CObject +{ +public: + CDBPoolBalancer(const string& service_name, + const string& pool_name, + const IDBServiceMapper::TOptions& options, + I_DriverContext* driver_ctx = nullptr); + + TSvrRef GetServer(CDB_Connection** conn, const CDBConnParams* params); + +private: + struct SEndpointInfo { + SEndpointInfo() + : effective_ranking(0.0), ideal_count(0.0), actual_count(0U), + penalty_level(0U) + { } + + CRef ref; + double effective_ranking; + double ideal_count; + unsigned int actual_count; + unsigned int penalty_level; + }; + typedef map TEndpoints; + + impl::TEndpointKey x_NameToKey(CTempString& name) const; + + TEndpoints m_Endpoints; + multiset m_Rankings; + I_DriverContext* m_DriverCtx; + unsigned int m_TotalCount; +}; + +END_NCBI_SCOPE + +/* @} */ + +#endif /* DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP */ diff --git a/c++/include/ncbi_pch.hpp b/c++/include/ncbi_pch.hpp index b283fde2..4d5d335f 100644 --- a/c++/include/ncbi_pch.hpp +++ b/c++/include/ncbi_pch.hpp @@ -1,5 +1,5 @@ #if defined(NCBI_USE_PCH) && !defined(NCBI_PCH__HPP) -/* $Id: ncbi_pch.hpp 608162 2020-05-12 16:01:31Z blastadm $ +/* $Id: ncbi_pch.hpp 617723 2020-10-06 07:10:56Z blastadm $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/include/ncbi_source_ver.h b/c++/include/ncbi_source_ver.h index ef07d61c..c445b140 100644 --- a/c++/include/ncbi_source_ver.h +++ b/c++/include/ncbi_source_ver.h @@ -1,4 +1,4 @@ -/* $Id: ncbi_source_ver.h 608162 2020-05-12 16:01:31Z blastadm $ +/* $Id: ncbi_source_ver.h 617723 2020-10-06 07:10:56Z blastadm $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/include/ncbiconf.h b/c++/include/ncbiconf.h index 6d7f4474..0971a38c 100644 --- a/c++/include/ncbiconf.h +++ b/c++/include/ncbiconf.h @@ -1,7 +1,7 @@ #ifndef FORWARDING_NCBICONF_H #define FORWARDING_NCBICONF_H -/* $Id: ncbiconf.h 608162 2020-05-12 16:01:31Z blastadm $ +/* $Id: ncbiconf.h 617723 2020-10-06 07:10:56Z blastadm $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/include/objects/dbsnp/primary_track/impl/snpptis_impl.hpp b/c++/include/objects/dbsnp/primary_track/impl/snpptis_impl.hpp index 126d1671..2daad030 100644 --- a/c++/include/objects/dbsnp/primary_track/impl/snpptis_impl.hpp +++ b/c++/include/objects/dbsnp/primary_track/impl/snpptis_impl.hpp @@ -1,6 +1,6 @@ #ifndef SRA__READER__SRA__IMPL__SNPPTIS__HPP #define SRA__READER__SRA__IMPL__SNPPTIS__HPP -/* $Id: snpptis_impl.hpp 597185 2019-11-18 19:46:30Z vasilche $ +/* $Id: snpptis_impl.hpp 615550 2020-09-01 13:13:11Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -58,7 +58,16 @@ private: typedef ncbi::grpcapi::dbsnp::primary_track::SeqIdRequestStringAccverUnion TRequest; string x_GetPrimarySnpTrack(const TRequest& request); - + + int max_retries; + float timeout; + float timeout_mul; + float timeout_inc; + float timeout_max; + float wait_time; + float wait_time_mul; + float wait_time_inc; + float wait_time_max; shared_ptr channel; unique_ptr stub; }; diff --git a/c++/include/objects/general/Dbtag.hpp b/c++/include/objects/general/Dbtag.hpp index 9003c632..c3ebf663 100644 --- a/c++/include/objects/general/Dbtag.hpp +++ b/c++/include/objects/general/Dbtag.hpp @@ -1,4 +1,4 @@ -/* $Id: Dbtag.hpp 591286 2019-08-13 18:04:06Z kans $ +/* $Id: Dbtag.hpp 615789 2020-09-03 18:19:18Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -220,7 +220,8 @@ public: eDbtagType_EPDnew, eDbtagType_Ensembl, eDbtagType_PseudoCAP, - eDbtagType_MarpolBase + eDbtagType_MarpolBase, + eDbtagType_dbVar }; enum EDbtagGroup { diff --git a/c++/include/objects/seqfeat/SeqFeatData.hpp b/c++/include/objects/seqfeat/SeqFeatData.hpp index cac2d2af..ba58d745 100644 --- a/c++/include/objects/seqfeat/SeqFeatData.hpp +++ b/c++/include/objects/seqfeat/SeqFeatData.hpp @@ -1,4 +1,4 @@ -/* $Id: SeqFeatData.hpp 597755 2019-11-26 19:03:13Z gotvyans $ +/* $Id: SeqFeatData.hpp 613780 2020-08-12 16:42:40Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -398,6 +398,7 @@ public: eQual_mol_type, eQual_name, eQual_nomenclature, + eQual_non_std_residue, eQual_ncRNA_class, eQual_note, eQual_number, diff --git a/c++/include/objects/taxon1/local_taxon.hpp b/c++/include/objects/taxon1/local_taxon.hpp index bed177ae..e75f91d2 100644 --- a/c++/include/objects/taxon1/local_taxon.hpp +++ b/c++/include/objects/taxon1/local_taxon.hpp @@ -1,4 +1,4 @@ -/* $Id: local_taxon.hpp 598592 2019-12-11 15:20:21Z badrazat $ +/* $Id: local_taxon.hpp 615586 2020-09-01 17:59:29Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -71,6 +71,8 @@ public: { return GetAncestorByRank(taxid, "species"); } TTaxid GetGenus(TTaxid taxid) { return GetAncestorByRank(taxid, "genus"); } + TTaxid GetOrder(TTaxid taxid) + { return GetAncestorByRank(taxid, "order"); } TLineage GetLineage(TTaxid taxid); TTaxid Join(TTaxid taxid1, TTaxid taxid2); diff --git a/c++/include/objects/valerr/ValidErrItem.hpp b/c++/include/objects/valerr/ValidErrItem.hpp index 4cb21b24..aef2a9cc 100644 --- a/c++/include/objects/valerr/ValidErrItem.hpp +++ b/c++/include/objects/valerr/ValidErrItem.hpp @@ -1,4 +1,4 @@ -/* $Id: ValidErrItem.hpp 597158 2019-11-18 17:58:02Z kans $ +/* $Id: ValidErrItem.hpp 611904 2020-07-13 15:51:08Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -717,6 +717,7 @@ enum EErrType { eErr_SEQ_FEAT_TRNAinsideTMRNA, eErr_SEQ_FEAT_IncorrectQualifierCapitalization, eErr_SEQ_FEAT_CDSdoesNotMatchVDJC, + eErr_SEQ_FEAT_GeneOnNucPositionOfPeptide, ERR_CODE_END(SEQ_FEAT), ERR_CODE_BEGIN(SEQ_ALIGN) = 5000, diff --git a/c++/include/objmgr/impl/scope_impl.hpp b/c++/include/objmgr/impl/scope_impl.hpp index d9199337..2dd0498f 100644 --- a/c++/include/objmgr/impl/scope_impl.hpp +++ b/c++/include/objmgr/impl/scope_impl.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_IMPL_SCOPE_IMPL__HPP #define OBJMGR_IMPL_SCOPE_IMPL__HPP -/* $Id: scope_impl.hpp 606922 2020-04-28 18:58:25Z ivanov $ +/* $Id: scope_impl.hpp 610058 2020-06-10 16:19:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -301,6 +301,7 @@ public: void ResetHistory(int action); // CScope::EActionIfLocked void ResetDataAndHistory(void); void RemoveFromHistory(const CTSE_Handle& tse, int action); + void RemoveFromHistory(const CSeq_id_Handle& seq_id); // Revoke data sources from the scope. Throw exception if the // operation fails (e.g. data source is in use or not found). diff --git a/c++/include/objmgr/scope.hpp b/c++/include/objmgr/scope.hpp index fa3b68cd..4aacf94c 100644 --- a/c++/include/objmgr/scope.hpp +++ b/c++/include/objmgr/scope.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_SCOPE__HPP #define OBJMGR_SCOPE__HPP -/* $Id: scope.hpp 575832 2018-12-04 21:08:18Z vasilche $ +/* $Id: scope.hpp 610058 2020-06-10 16:19:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -365,6 +365,14 @@ public: /// Bioseq, which TSE is to be removed from the cache. void RemoveFromHistory(const CBioseq_Handle& bioseq, EActionIfLocked action = eKeepIfLocked); + /// Remove the Seq-id failed resolution from the scope's history. + /// @param seq_id + /// Seq-id that failed resolution + void RemoveFromHistory(const CSeq_id_Handle& seq_id); + /// Remove the Seq-id failed resolution from the scope's history. + /// @param seq_id + /// Seq-id that failed resolution + void RemoveFromHistory(const CSeq_id& seq_id); /// Revoke data loader from the scope. Throw exception if the /// operation fails (e.g. data source is in use or not found). diff --git a/c++/include/objmgr/util/autodef_options.hpp b/c++/include/objmgr/util/autodef_options.hpp index fca8434e..dfb612b6 100644 --- a/c++/include/objmgr/util/autodef_options.hpp +++ b/c++/include/objmgr/util/autodef_options.hpp @@ -1,7 +1,7 @@ #ifndef OBJMGR_UTIL___AUTODEF_OPTIONS__HPP #define OBJMGR_UTIL___AUTODEF_OPTIONS__HPP -/* $Id: autodef_options.hpp 530276 2017-03-13 18:20:08Z bollin $ +/* $Id: autodef_options.hpp 611612 2020-07-08 17:43:23Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -104,7 +104,8 @@ public: eCompleteGenome, ePartialSequence, ePartialGenome, - eSequence + eSequence, + eWholeGenomeShotgunSequence }; typedef unsigned int TFeatureListType; diff --git a/c++/include/objmgr/util/indexer.hpp b/c++/include/objmgr/util/indexer.hpp index 2f45c496..bcef906e 100644 --- a/c++/include/objmgr/util/indexer.hpp +++ b/c++/include/objmgr/util/indexer.hpp @@ -58,6 +58,7 @@ class CGapIndex; class CDescriptorIndex; class CFeatureIndex; +typedef void (*FAddSnpFunc)(CBioseq_Handle bsh, string& na_acc); // CSeqEntryIndex // @@ -93,7 +94,8 @@ public: eInternal = 1, eExternal = 2, eExhaustive = 3, - eIncremental = 4 + eFtp = 4, + eWeb = 5 }; enum EFlags { @@ -116,21 +118,21 @@ public: // Constructors take the top-level sequence object // The primary constructor uses an existing CScope created by the application - CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1); - CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1); + CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy = eAdaptive, TFlags flags = fDefault); + CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy = eAdaptive, TFlags flags = fDefault); // Alternative constructors take an object and create a new local default CScope - CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1); - CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1); - CSeqEntryIndex (CBioseq& bioseq, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1); - CSeqEntryIndex (CSeq_submit& submit, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1); + CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy = eAdaptive, TFlags flags = fDefault); + CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy = eAdaptive, TFlags flags = fDefault); + CSeqEntryIndex (CBioseq& bioseq, EPolicy policy = eAdaptive, TFlags flags = fDefault); + CSeqEntryIndex (CSeq_submit& submit, EPolicy policy = eAdaptive, TFlags flags = fDefault); // Specialized constructors are for streaming through release files, one component at a time // Submit-block obtained from top of Seq-submit release file - CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1); + CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy = eAdaptive, TFlags flags = fDefault); // Seq-descr chain obtained from top of Bioseq-set release file - CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1); + CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy = eAdaptive, TFlags flags = fDefault); private: // Prohibit copy constructor & assignment operator @@ -153,14 +155,8 @@ public: CRef GetBioseqIndex (CBioseq_Handle bsh); // Get Bioseq index by mapped feature CRef GetBioseqIndex (const CMappedFeat& mf); - - // Subrange processing creates a new CBioseqIndex around a temporary delta Bioseq - // Get Bioseq index by sublocation CRef GetBioseqIndex (const CSeq_loc& loc); - // Get Bioseq index by subrange - CRef GetBioseqIndex (const string& accn, int from, int to, bool rev_comp); - CRef GetBioseqIndex (int from, int to, bool rev_comp); // Seqset exploration iterator template size_t IterateSeqsets (Fnc m); @@ -171,6 +167,18 @@ public: bool DistributedReferences(void); + void SetSnpFunc(FAddSnpFunc* snp); + + FAddSnpFunc* GetSnpFunc(void); + + void SetFeatDepth(int featDepth); + + int GetFeatDepth(void); + + void SetGapDepth(int gapDepth); + + int GetGapDepth(void); + // Check all Bioseqs for failure to fetch remote sequence components or feature annotation bool IsFetchFailure(void); @@ -197,16 +205,16 @@ public: public: // Initializers take the top-level sequence object - void x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth); - void x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth); + void x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags); + void x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags); - void x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth); - void x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth); - void x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth); - void x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth); + void x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags); + void x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags); + void x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags); + void x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags); - void x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth); - void x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth); + void x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags); + void x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags); private: // Prohibit copy constructor & assignment operator @@ -229,13 +237,8 @@ public: CRef GetBioseqIndex (string& str); // Get Bioseq index by feature CRef GetBioseqIndex (const CMappedFeat& mf); - - // Subrange processing creates a new CBioseqIndex around a temporary delta Bioseq // Get Bioseq index by sublocation CRef GetBioseqIndex (const CSeq_loc& loc); - // Get Bioseq index by subrange - CRef GetBioseqIndex (const string& accn, int from, int to, bool rev_comp); - CRef GetBioseqIndex (int from, int to, bool rev_comp); // Seqset exploration iterator template size_t IterateSeqsets (Fnc m); @@ -260,6 +263,18 @@ public: bool DistributedReferences (void) const { return m_DistributedReferences; } + void SetSnpFunc(FAddSnpFunc* snp); + + FAddSnpFunc* GetSnpFunc(void); + + void SetFeatDepth(int featDepth); + + int GetFeatDepth(void); + + void SetGapDepth(int gapDepth); + + int GetGapDepth(void); + // Check all Bioseqs for failure to fetch remote sequence components or remote feature annotation bool IsFetchFailure(void); @@ -274,14 +289,6 @@ private: // Recursive exploration to populate vector of index objects for Bioseqs in Seq-entry void x_InitSeqs (const CSeq_entry& sep, CRef prnt, int level = 0); - CRef x_MakeUniqueId(void); - - // Create delta sequence referring to location, using temporary local ID - CRef x_DeltaIndex(const CSeq_loc& loc); - - // Create location from range, to use in x_DeltaIndex - CConstRef x_SubRangeLoc(const string& accn, int from, int to, bool rev_comp); - private: CRef m_Objmgr; CRef m_Scope; @@ -294,7 +301,6 @@ private: CSeqEntryIndex::EPolicy m_Policy; CSeqEntryIndex::TFlags m_Flags; - int m_Depth; vector> m_BsxList; @@ -313,6 +319,11 @@ private: bool m_DistributedReferences; + FAddSnpFunc* m_SnpFunc; + + int m_FeatDepth; + int m_GapDepth; + mutable CAtomicCounter m_Counter; bool m_IndexFailure; @@ -391,9 +402,7 @@ public: CRef scope, CSeqMasterIndex& idx, CSeqEntryIndex::EPolicy policy, - CSeqEntryIndex::TFlags flags, - int depth, - bool surrogate); + CSeqEntryIndex::TFlags flags); // Destructor ~CBioseqIndex (void); @@ -412,8 +421,7 @@ public: // Feature exploration iterator template size_t IterateFeatures (Fnc m); - - template size_t IterateFeaturesByLoc (const CSeq_loc& slp, Fnc m); + template size_t IterateFeatures (CSeq_loc& slp, Fnc m); // Getters CBioseq_Handle GetBioseqHandle (void) const { return m_Bsh; } @@ -464,12 +472,14 @@ public: CSeq_inst::TLength GetLength (void) const { return m_Length; } bool IsDelta (void) const { return m_IsDelta; } + bool IsDeltaLitOnly (void) const { return m_IsDeltaLitOnly; } bool IsVirtual (void) const { return m_IsVirtual; } bool IsMap (void) const { return m_IsMap; } // Seq-id fields const string& GetAccession (void) const { return m_Accession; } + bool IsRefSeq (void) const { return m_IsRefSeq; } bool IsNC (void) const { return m_IsNC; } bool IsNM (void) const { return m_IsNM; } bool IsNR (void) const { return m_IsNR; } @@ -515,7 +525,7 @@ public: const string& GetCommon (void); const string& GetLineage (void); - int GetTaxid (void); + TTaxId GetTaxid (void); bool IsUsingAnamorph (void); CTempString GetGenus (void); @@ -584,8 +594,12 @@ private: // Common feature collection, delayed until actually needed void x_InitFeats (void); - // Collect features by location - void x_InitFeatsByLoc (const CSeq_loc& slp); + void x_InitFeats (CSeq_loc& slp); + + void x_DefaultSelector(SAnnotSelector& sel, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, bool onlyNear, CScope& scope); + + // common implementation method + void x_InitFeats (CSeq_loc* slpp); // Set BioSource flags void x_InitSource (void); @@ -625,7 +639,6 @@ private: CSeqEntryIndex::EPolicy m_Policy; CSeqEntryIndex::TFlags m_Flags; - int m_Depth; bool m_FetchFailure; @@ -637,12 +650,14 @@ private: CSeq_inst::TLength m_Length; bool m_IsDelta; + bool m_IsDeltaLitOnly; bool m_IsVirtual; bool m_IsMap; // Seq-id fields string m_Accession; + bool m_IsRefSeq; bool m_IsNC; bool m_IsNM; bool m_IsNR; @@ -690,7 +705,7 @@ private: string m_Common; string m_Lineage; - int m_Taxid; + TTaxId m_Taxid; bool m_UsingAnamorph; CTempString m_Genus; @@ -757,9 +772,6 @@ private: // Map fields string m_rEnzyme; - - // true if this index is for a temporary subrange delta Bioseq - bool m_Surrogate; }; @@ -855,6 +867,7 @@ public: // Constructor CFeatureIndex (CSeq_feat_Handle sfh, const CMappedFeat mf, + CConstRef feat_loc, CBioseqIndex& bsx); private: @@ -1071,16 +1084,15 @@ size_t CBioseqIndex::IterateFeatures (Fnc m) return count; } -// Visit CFeatureIndex objects for range of features template inline -size_t CBioseqIndex::IterateFeaturesByLoc (const CSeq_loc& slp, Fnc m) +size_t CBioseqIndex::IterateFeatures (CSeq_loc& slp, Fnc m) { int count = 0; try { // Delay feature collection until first request, but do not bail on m_FeatsInitialized flag - x_InitFeatsByLoc(slp); + x_InitFeats(slp); for (auto& sfx : m_SfxList) { count++; @@ -1088,7 +1100,7 @@ size_t CBioseqIndex::IterateFeaturesByLoc (const CSeq_loc& slp, Fnc m) } } catch (CException& e) { - LOG_POST(Error << "Error in CBioseqIndex::IterateFeaturesByLoc: " << e.what()); + LOG_POST(Error << "Error in CBioseqIndex::IterateFeatures: " << e.what()); } return count; } diff --git a/c++/include/objtools/blast/blastdb_format/blastdb_dataextract.hpp b/c++/include/objtools/blast/blastdb_format/blastdb_dataextract.hpp index 4cf05b5e..e2991b7e 100644 --- a/c++/include/objtools/blast/blastdb_format/blastdb_dataextract.hpp +++ b/c++/include/objtools/blast/blastdb_format/blastdb_dataextract.hpp @@ -1,4 +1,4 @@ -/* $Id: blastdb_dataextract.hpp 591546 2019-08-16 16:59:06Z vasilche $ +/* $Id: blastdb_dataextract.hpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -141,9 +141,9 @@ protected: /// Cache the defline (for membership bits) CRef m_Defline; /// Pair with a gi2taxid map for one Oid - pair > m_Gi2TaxidMap; + pair > m_Gi2TaxidMap; /// Pair with a gi2taxid-set map for one Oid - pair > > m_Gi2TaxidSetMap; + pair > > m_Gi2TaxidSetMap; /// Pair with a gi2accesion map for one Oid pair > m_Gi2AccMap; /// Pair with a gi2title map for one Oid @@ -156,8 +156,8 @@ protected: bool m_UseLongSeqIds; private: void x_ExtractMaskingData(CSeqDB::TSequenceRanges &ranges, int algo_id); - int x_ExtractTaxId(); - void x_ExtractLeafTaxIds(set& taxids); + TTaxId x_ExtractTaxId(); + void x_ExtractLeafTaxIds(set& taxids); /// Sets the map void x_SetGi2AccMap(); /// Sets the map diff --git a/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp b/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp index 18793bde..722319ef 100644 --- a/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp +++ b/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS_BLAST__SEQDB__SEQDB_LMDB_HPP #define OBJTOOLS_READERS_BLAST__SEQDB__SEQDB_LMDB_HPP -/* $Id: seqdb_lmdb.hpp 585739 2019-05-03 11:01:28Z fongah2 $ +/* $Id: seqdb_lmdb.hpp 616351 2020-09-15 12:19:15Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -44,6 +44,7 @@ class NCBI_XOBJREAD_EXPORT CSeqDBLMDB : public CObject { public: CSeqDBLMDB(const string & fname); + virtual ~CSeqDBLMDB(); CSeqDBLMDB& operator=(const CSeqDBLMDB&) = delete; CSeqDBLMDB(const CSeqDBLMDB&) = delete; @@ -82,21 +83,21 @@ public: /// Get Oids for Tax Ids list, idenitcal Oids are merged. /// @param tax_ids Input tax ids /Output tax ids found /// @param oids Oids found for input tax ids - void GetOidsForTaxIds(const set & tax_ids, vector& oids, vector & tax_ids_found) const; + void GetOidsForTaxIds(const set & tax_ids, vector& oids, vector & tax_ids_found) const; /// Get Oids to exclude for Tax ids /// @parm ids Input tax ids to exclude /Output tax ids found /// @param rv Oids to exclude based on input tax id list - void NegativeTaxIdsToOids(const set& ids, vector& rv, vector & tax_ids_found) const; + void NegativeTaxIdsToOids(const set& ids, vector& rv, vector & tax_ids_found) const; /// Get All Unique Tax Ids for db /// @parma tax_ids Return all unique tax ids found in db - void GetDBTaxIds(vector & tax_ids) const; + void GetDBTaxIds(vector & tax_ids) const; /// Get Tax Ids for oid list /// @param oids Input oid list /// @param tax_ids Output tax id list - void GetTaxIdsForOids(const vector & oids, set & tax_ids) const; + void GetTaxIdsForOids(const vector & oids, set & tax_ids) const; private: string m_LMDBFile; @@ -104,6 +105,7 @@ private: string m_Oid2TaxIdsFile; string m_TaxId2OidsFile; string m_TaxId2OffsetsFile; + mutable bool m_LMDBFileOpened; }; /// Build the canonical LMDB file name for BLAST databases @@ -137,8 +139,8 @@ class NCBI_XOBJREAD_EXPORT CBlastLMDBManager public: static CBlastLMDBManager & GetInstance(); lmdb::env & GetReadEnvVol(const string & fname, MDB_dbi & db_volname, MDB_dbi & db_volinfo); - lmdb::env & GetReadEnvAcc(const string & fname, MDB_dbi & db_acc); - lmdb::env & GetReadEnvTax(const string & fname, MDB_dbi & db_tax); + lmdb::env & GetReadEnvAcc(const string & fname, MDB_dbi & db_acc, bool* opened = 0); + lmdb::env & GetReadEnvTax(const string & fname, MDB_dbi & db_tax, bool* opened = 0); lmdb::env & GetWriteEnv(const string & fname, Uint8 map_size); void CloseEnv(const string & fname); @@ -149,7 +151,7 @@ private: public: CBlastEnv(const string & fname, ELMDBFileType file_type, bool read_only = true, Uint8 map_size =0); lmdb::env & GetEnv() { return m_Env; } - const string & GetFilename () { return m_Filename; } + const string & GetFilename () const { return m_Filename; } ~CBlastEnv(); unsigned int AddReference(){ m_Count++; return m_Count;} unsigned int RemoveReference(){ m_Count--; return m_Count;} @@ -162,17 +164,19 @@ private: }; MDB_dbi GetDbi(EDbiType dbi_type); void InitDbi(lmdb::env & env, ELMDBFileType file_type); + void SetMapSize(Uint8 map_size); + bool IsReadOnly() { return m_ReadOnly; } + private: string m_Filename; ELMDBFileType m_FileType; lmdb::env m_Env; unsigned int m_Count; bool m_ReadOnly; - Uint8 m_MapSize; vector m_dbis; }; - CBlastEnv* GetBlastEnv(const string & fname, ELMDBFileType file_type); + CBlastEnv* GetBlastEnv(const string & fname, ELMDBFileType file_type, bool* opened = 0); CBlastLMDBManager(){} ~CBlastLMDBManager(); friend class CSafeStatic_Allocator; diff --git a/c++/include/objtools/blast/seqdb_reader/impl/seqdbtax.hpp b/c++/include/objtools/blast/seqdb_reader/impl/seqdbtax.hpp index 72ffb794..5b4f61b0 100644 --- a/c++/include/objtools/blast/seqdb_reader/impl/seqdbtax.hpp +++ b/c++/include/objtools/blast/seqdb_reader/impl/seqdbtax.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS_SEQDB__SEQDBTAX_HPP #define OBJTOOLS_READERS_SEQDB__SEQDBTAX_HPP -/* $Id: seqdbtax.hpp 553714 2017-12-20 18:36:44Z vakatov $ +/* $Id: seqdbtax.hpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -69,7 +69,7 @@ public: /// @param locked /// The lock holder object for this thread. /// @return true if the taxonomic id was found - static bool GetTaxNames(Int4 tax_id, SSeqDBTaxInfo & info); + static bool GetTaxNames(TTaxId tax_id, SSeqDBTaxInfo & info); }; diff --git a/c++/include/objtools/blast/seqdb_reader/seqdb.hpp b/c++/include/objtools/blast/seqdb_reader/seqdb.hpp index 640ec543..1aeed62a 100644 --- a/c++/include/objtools/blast/seqdb_reader/seqdb.hpp +++ b/c++/include/objtools/blast/seqdb_reader/seqdb.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_BLAST_SEQDB_READER___SEQDB__HPP #define OBJTOOLS_BLAST_SEQDB_READER___SEQDB__HPP -/* $Id: seqdb.hpp 605340 2020-04-09 16:06:43Z ivanov $ +/* $Id: seqdb.hpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -532,7 +532,7 @@ public: /// /// This finds the leaf-node TAXIDS associated with a given OID and /// computes a mapping from GI to taxid. This mapping is added to the - /// map> provided by the user. If the "persist" flag is + /// map> provided by the user. If the "persist" flag is /// set to true, the new associations will simply be added to the /// map. If it is false (the default), the map will be cleared /// first. @@ -545,7 +545,7 @@ public: /// If false, the map will be cleared before adding new entries. void GetLeafTaxIDs( int oid, - map >& gi_to_taxid_set, + map >& gi_to_taxid_set, bool persist = false ) const; @@ -567,7 +567,7 @@ public: /// If false, the map will be cleared before adding new entries. void GetLeafTaxIDs( int oid, - vector & taxids, + vector & taxids, bool persist = false ) const; @@ -587,7 +587,7 @@ public: /// @param persist /// If false, the map will be cleared before adding new entries. void GetTaxIDs(int oid, - map & gi_to_taxid, + map & gi_to_taxid, bool persist = false) const; /// Get taxids for an OID. @@ -607,7 +607,7 @@ public: /// @param persist /// If false, the map will be cleared before adding new entries. void GetTaxIDs(int oid, - vector & taxids, + vector & taxids, bool persist = false) const; /// Get all tax ids for an oid @@ -618,7 +618,7 @@ public: /// @param taxids /// A returned a set of taxids. void GetAllTaxIDs(int oid, - set & taxids) const; + set & taxids) const; /// Get a CBioseq for a sequence. /// @@ -1213,7 +1213,7 @@ public: /// An integer identifying the taxid to fetch. /// @param info /// A structure containing taxonomic description strings. - static void GetTaxInfo(int taxid, SSeqDBTaxInfo & info); + static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info); /// Fetch data as a CSeq_data object. /// @@ -1500,11 +1500,11 @@ public: /// Get Oid list for input tax ids /// @param tax_ids taxonomy ids, return only tax ids found in db // @param rv oids corrpond to tax ids - void TaxIdsToOids(set& tax_ids, vector& rv) const; + void TaxIdsToOids(set& tax_ids, vector& rv) const; /// Get all unique tax ids from db /// @param tax_ids return taxonomy ids in db - void GetDBTaxIds(set & tax_ids) const; + void GetDBTaxIds(set & tax_ids) const; protected: /// Implementation details are hidden. (See seqdbimpl.hpp). diff --git a/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp b/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp index aabb60c2..1b4ffe33 100644 --- a/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp +++ b/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_BLAST_SEQDB_READER___SEQDBCOMMON__HPP #define OBJTOOLS_BLAST_SEQDB_READER___SEQDBCOMMON__HPP -/* $Id: seqdbcommon.hpp 605335 2020-04-09 16:04:38Z ivanov $ +/* $Id: seqdbcommon.hpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -214,7 +214,7 @@ public: }; struct STaxIdsOids { - set tax_ids; + set tax_ids; vector oids; }; @@ -473,7 +473,7 @@ public: void GetPigList(vector& pigs) const; - set & GetTaxIdsList() + set & GetTaxIdsList() { return m_TaxIdsOids.tax_ids; } @@ -507,9 +507,9 @@ public: m_SisOids.push_back(si); } - void AddTaxIds(const set & tax_ids) + void AddTaxIds(const set & tax_ids) { - set & tids = m_TaxIdsOids.tax_ids; + set & tids = m_TaxIdsOids.tax_ids; tids.insert(tax_ids.begin(), tax_ids.end()); } @@ -1078,12 +1078,12 @@ public: return m_ListInfo; } - void AddTaxIds(const set & tax_ids) + void AddTaxIds(const set & tax_ids) { m_TaxIds.insert(tax_ids.begin(), tax_ids.end()); } - set & GetTaxIdsList() + set & GetTaxIdsList() { return m_TaxIds; } @@ -1104,7 +1104,7 @@ protected: /// SeqIds to exclude from the SeqDB instance. vector m_Sis; - set m_TaxIds; + set m_TaxIds; private: /// Prevent copy constructor. @@ -1821,13 +1821,13 @@ typedef map< string, TSeqDBAliasFileVersions > TSeqDBAliasFileValues; struct SSeqDBTaxInfo { /// Default constructor /// @param t the taxonomy ID to set for this structure - SSeqDBTaxInfo(int t = 0) + SSeqDBTaxInfo(TTaxId t = ZERO_TAX_ID) : taxid(t) { } /// An identifier for this species or taxonomic group. - int taxid; + TTaxId taxid; /// Scientific name, such as "Aotus vociferans". string scientific_name; diff --git a/c++/include/objtools/blast/seqdb_writer/taxid_set.hpp b/c++/include/objtools/blast/seqdb_writer/taxid_set.hpp index 2e21d63e..5e4337c7 100644 --- a/c++/include/objtools/blast/seqdb_writer/taxid_set.hpp +++ b/c++/include/objtools/blast/seqdb_writer/taxid_set.hpp @@ -1,4 +1,4 @@ -/* $Id: taxid_set.hpp 208050 2010-10-13 15:48:11Z maning $ +/* $Id: taxid_set.hpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -44,9 +44,9 @@ BEGIN_NCBI_SCOPE class NCBI_XOBJWRITE_EXPORT CTaxIdSet : public CObject { public: - static const int kTaxIdNotSet = 0; + static const TTaxId kTaxIdNotSet; - CTaxIdSet(int global_taxid = kTaxIdNotSet) + CTaxIdSet(TTaxId global_taxid = kTaxIdNotSet) : m_GlobalTaxId(global_taxid), m_Matched(true) {} @@ -60,14 +60,14 @@ public: bool HasEverFixedId() const { return m_Matched; }; private: - int m_GlobalTaxId; - map< string, int > m_TaxIdMap; - bool m_Matched; + TTaxId m_GlobalTaxId; + map< string, TTaxId > m_TaxIdMap; + bool m_Matched; /// Selects the most suitable tax id for the input passed in, checking the /// global taxid first, then the mapping provided by an input file, and /// finally what's found in the defline argument - int x_SelectBestTaxid(const objects::CBlast_def_line & defline); + TTaxId x_SelectBestTaxid(const objects::CBlast_def_line & defline); }; diff --git a/c++/include/objtools/blast/seqdb_writer/writedb_lmdb.hpp b/c++/include/objtools/blast/seqdb_writer/writedb_lmdb.hpp index 79913967..785c1ee1 100644 --- a/c++/include/objtools/blast/seqdb_writer/writedb_lmdb.hpp +++ b/c++/include/objtools/blast/seqdb_writer/writedb_lmdb.hpp @@ -48,6 +48,13 @@ USING_SCOPE(objects); BEGIN_NCBI_SCOPE +#ifdef NCBI_OS_MSWIN +#define DEFAULT_LMDB_MAP_SIZE 500000 +#define DEFAULT_TAXID_MAP_SIZE 500000 +#else +#define DEFAULT_LMDB_MAP_SIZE 300000000000 +#define DEFAULT_TAXID_MAP_SIZE 100000000000 +#endif /// This class supports creation of a string accession to integer OID /// lmdb database @@ -59,7 +66,7 @@ public: /// Constructor for LMDB write access /// @param dbname Database name - CWriteDB_LMDB(const string& dbname, Uint8 map_size = 300000000000, Uint8 capacity = 500000); + CWriteDB_LMDB(const string& dbname, Uint8 map_size = DEFAULT_LMDB_MAP_SIZE, Uint8 capacity = 500000); // Destructor ~CWriteDB_LMDB(); @@ -93,11 +100,14 @@ private: void x_InsertEntry(const CRef &seqid, const blastdb::TOid oid); void x_CreateOidToSeqidsLookupFile(); void x_Resize(); + void x_IncreaseEnvMapSize(); + void x_IncreaseEnvMapSize(const vector & vol_names, const vector & vol_num_oids); string m_Db; lmdb::env &m_Env; Uint8 m_ListCapacity; unsigned int m_MaxEntryPerTxn; + size_t m_TotalIdsLength; struct SKeyValuePair { string id; blastdb::TOid oid; @@ -130,7 +140,7 @@ public: /// Constructor for LMDB write access /// @param dbname Database name - CWriteDB_TaxID(const string& dbname, Uint8 map_size = 300000000000, Uint8 capacity = 500000); + CWriteDB_TaxID(const string& dbname, Uint8 map_size = DEFAULT_TAXID_MAP_SIZE, Uint8 capacity = 500000); // Destructor ~CWriteDB_TaxID(); @@ -143,13 +153,15 @@ public: /// @param tax_ids list for oid /// @return number of rows added to database /// @see InsertEntry - int InsertEntries(const set & tax_ids, const blastdb::TOid oid); + int InsertEntries(const set & tax_ids, const blastdb::TOid oid); private: void x_CommitTransaction(); void x_CreateOidToTaxIdsLookupFile(); void x_CreateTaxIdToOidsLookupFile(); void x_Resize(); + void x_IncreaseEnvMapSize(); + string m_Db; lmdb::env &m_Env; @@ -157,9 +169,9 @@ private: unsigned int m_MaxEntryPerTxn; template struct SKeyValuePair { - Int4 tax_id; + TTaxId tax_id; valueType value; - SKeyValuePair(int t, valueType v) : tax_id(t), value(v) {} + SKeyValuePair(TTaxId t, valueType v) : tax_id(t), value(v) {} static bool cmp_key(const SKeyValuePair & v, const SKeyValuePair & k) { if(v.tax_id == k.tax_id) { return v.value < k.value; diff --git a/c++/include/objtools/cleanup/cleanup.hpp b/c++/include/objtools/cleanup/cleanup.hpp index 404a3d86..6d8bb694 100644 --- a/c++/include/objtools/cleanup/cleanup.hpp +++ b/c++/include/objtools/cleanup/cleanup.hpp @@ -1,7 +1,7 @@ #ifndef CLEANUP___CLEANUP__HPP #define CLEANUP___CLEANUP__HPP -/* $Id: cleanup.hpp 605251 2020-04-08 14:24:56Z ivanov $ +/* $Id: cleanup.hpp 614966 2020-08-25 16:46:33Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -63,6 +63,7 @@ class CSeq_annot_Handle; class CSeq_feat_Handle; class CCleanupChange; +class IObjtoolsListener; class NCBI_CLEANUP_EXPORT CCleanup : public CObject { @@ -414,7 +415,7 @@ public: /// Get labels for a pubdesc. To be used in citations. static void GetPubdescLabels (const CPubdesc& pd, - vector& pmids, vector& muids, vector& serials, + vector& pmids, vector& muids, vector& serials, vector& published_labels, vector& unpublished_labels); /// Get list of pubs that can be used for citations for Seq-feat on a Bioseq-handle @@ -504,7 +505,11 @@ public: /// @param str string from which to parse code break /// @param scope scope in which to find sequences referenced (used for location comparisons) /// @return bool indicates string was successfully parsed and code break was added - static bool ParseCodeBreak(const CSeq_feat& feat, CCdregion& cds, const string& str, CScope& scope); + static bool ParseCodeBreak(const CSeq_feat& feat, + CCdregion& cds, + const CTempString& str, + CScope& scope, + IObjtoolsListener* pMessageListener=nullptr); /// Parses all valid transl_except Gb-quals into code-breaks for cdregion, /// then removes the transl_except Gb-quals that were successfully parsed diff --git a/c++/include/objtools/cleanup/cleanup_message.hpp b/c++/include/objtools/cleanup/cleanup_message.hpp new file mode 100644 index 00000000..fd49384c --- /dev/null +++ b/c++/include/objtools/cleanup/cleanup_message.hpp @@ -0,0 +1,71 @@ +#ifndef _CLEANUP_MESSAGE_HPP_ +#define _CLEANUP_MESSAGE_HPP_ + +/* $Id: cleanup_message.hpp 608332 2020-05-14 16:04:14Z ivanov $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Author: Justin Foley + * + * File Description: + * ....... + * + */ + +#include + +BEGIN_NCBI_SCOPE +BEGIN_SCOPE(objects) + +class NCBI_CLEANUP_EXPORT CCleanupMessage : public CObjtoolsMessage +{ +public: + enum class ECode { + eCodeBreak + }; + + enum class ESubcode { + eBadLocation, + eParseError + }; + + CCleanupMessage(string text, EDiagSev sev, ECode code, ESubcode subcode); + + CCleanupMessage *Clone(void) const override; + + int GetCode(void) const override { + return static_cast(m_Code); + } + int GetSubCode(void) const override { + return static_cast(m_Subcode); + } +private: + ECode m_Code; + ESubcode m_Subcode; +}; + +END_SCOPE(objects) +END_NCBI_SCOPE + +#endif // _CLEANUP_MESSAGE_HPP_ diff --git a/c++/include/objtools/data_loaders/blastdb/bdbloader.hpp b/c++/include/objtools/data_loaders/blastdb/bdbloader.hpp index 5b7b34e7..546149e1 100644 --- a/c++/include/objtools/data_loaders/blastdb/bdbloader.hpp +++ b/c++/include/objtools/data_loaders/blastdb/bdbloader.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP #define OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP -/* $Id: bdbloader.hpp 368048 2012-07-02 13:25:25Z camacho $ +/* $Id: bdbloader.hpp 612733 2020-07-27 11:38:27Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -39,6 +39,7 @@ #include #include #include +#include BEGIN_NCBI_SCOPE BEGIN_SCOPE(objects) @@ -151,7 +152,7 @@ public: virtual TTSE_Lock GetBlobById(const TBlobId& blob_id); /// A mapping from sequence identifier to blob ids. - typedef map< CSeq_id_Handle, int > TIdMap; + typedef limited_size_map TIdMap; /// @note this is added to temporarily comply with the toolkit's stable /// components rule of having backwards compatible APIs diff --git a/c++/include/objtools/edit/feattable_edit.hpp b/c++/include/objtools/edit/feattable_edit.hpp index d770c4d2..ddce16cf 100644 --- a/c++/include/objtools/edit/feattable_edit.hpp +++ b/c++/include/objtools/edit/feattable_edit.hpp @@ -1,4 +1,4 @@ -/* $Id: feattable_edit.hpp 593415 2019-09-18 14:52:52Z ludwigf $ +/* $Id: feattable_edit.hpp 612521 2020-07-23 11:23:16Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -76,6 +76,7 @@ public: bool forEukaryote); void GenerateMissingParentFeaturesForEukaryote(); void GenerateMissingParentFeaturesForProkaryote(); + void ProcessCodonRecognized(); unsigned int PendingLocusTagNumber() const { return mLocusTagNumber; } @@ -113,6 +114,8 @@ protected: const CMappedFeat& mrna); void xPutErrorDifferingProteinIds( const CMappedFeat& mrna); + void xPutErrorBadCodonRecognized( + const string codonRecognized); void xFeatureAddQualifier( diff --git a/c++/include/objtools/edit/remote_updater.hpp b/c++/include/objtools/edit/remote_updater.hpp index ffffb7e8..83df4ed7 100755 --- a/c++/include/objtools/edit/remote_updater.hpp +++ b/c++/include/objtools/edit/remote_updater.hpp @@ -1,6 +1,41 @@ +/* $Id: remote_updater.hpp 614636 2020-08-20 13:02:57Z fukanchi $ +* =========================================================================== +* +* PUBLIC DOMAIN NOTICE +* National Center for Biotechnology Information +* +* This software/database is a "United States Government Work" under the +* terms of the United States Copyright Act. It was written as part of +* the author's official duties as a United States Government employee and +* thus cannot be copyrighted. This software/database is freely available +* to the public for use. The National Library of Medicine and the U.S. +* Government have not placed any restriction on its use or reproduction. +* +* Although all reasonable efforts have been taken to ensure the accuracy +* and reliability of the software and data, the NLM and the U.S. +* Government do not and cannot warrant the performance or results that +* may be obtained by using this software or data. The NLM and the U.S. +* Government disclaim all warranties, express or implied, including +* warranties of performance, merchantability or fitness for any particular +* purpose. +* +* Please cite the author in any work or product based on this material. +* +* =========================================================================== +* +* Authors: Sergiy Gotvyanskyy, NCBI +* Colleen Bolin, NCBI +* +* File Description: +* Front-end class for making remote request to MLA and taxon +* +* =========================================================================== +*/ + #ifndef __REMOTE_UPDATER_HPP_INCLUDED__ #define __REMOTE_UPDATER_HPP_INCLUDED__ +#include #include BEGIN_NCBI_SCOPE @@ -16,6 +51,8 @@ class CSeq_descr; class COrg_ref; class CMLAClient; class CAuth_list; +class IObjtoolsListener; +class CPub; BEGIN_SCOPE(edit) @@ -27,35 +64,51 @@ public: using FLogger = function; + // With this constructor, an exception is thrown + // if the updater cannot retrieve a publication for a PMID. CRemoteUpdater(bool enable_caching = true); + // With this constructor, failure to retrieve + // a publication for a PMID is logged with the supplied message listener. + // If no message listener is supplied, an exception is thrown. + CRemoteUpdater(IObjtoolsListener* pMessageListener); ~CRemoteUpdater(); void UpdatePubReferences(CSerialObject& obj); void UpdatePubReferences(CSeq_entry_EditHandle& obj); + void SetMaxMlaAttempts(int max); - void UpdateOrgFromTaxon(FLogger f_logger, CSeq_entry& entry); + NCBI_DEPRECATED void UpdateOrgFromTaxon(FLogger /*f_logger*/, CSeq_entry& entry); void UpdateOrgFromTaxon(FLogger f_logger, CSeq_entry_EditHandle& obj); - void UpdateOrgFromTaxon(FLogger f_logger, CSeqdesc& obj); + NCBI_DEPRECATED void UpdateOrgFromTaxon(FLogger f_logger, CSeqdesc& obj); + + void UpdateOrgFromTaxon(CSeq_entry& entry); + void UpdateOrgFromTaxon(CSeqdesc& desc); + + void ClearCache(); static void ConvertToStandardAuthors(CAuth_list& auth_list); static void PostProcessPubs(CSeq_entry_EditHandle& obj); static void PostProcessPubs(CSeq_entry& obj); static void PostProcessPubs(CPubdesc& pubdesc); + void SetMLAClient(CMLAClient& mlaClient); // Use either shared singleton or individual instances static CRemoteUpdater& GetInstance(); private: void xUpdatePubReferences(CSeq_entry& entry); void xUpdatePubReferences(CSeq_descr& descr); - void xUpdateOrgTaxname(FLogger f_logger, COrg_ref& org); - + void xUpdateOrgTaxname(FLogger f_logger, COrg_ref& org); + void xUpdateOrgTaxname(COrg_ref& org); + bool xUpdatePubPMID(list>& pubs, TEntrezId id); + IObjtoolsListener* m_pMessageListener=nullptr; CRef m_mlaClient; auto_ptr m_taxClient; - bool m_enable_caching; + bool m_enable_caching=true; CMutex m_Mutex; DECLARE_CLASS_STATIC_MUTEX(m_static_mutex); + int m_MaxMlaAttempts=3; }; END_SCOPE(edit) diff --git a/c++/include/objtools/format/flat_file_config.hpp b/c++/include/objtools/format/flat_file_config.hpp index 0cf3bb21..712ce9bd 100644 --- a/c++/include/objtools/format/flat_file_config.hpp +++ b/c++/include/objtools/format/flat_file_config.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT___FLAT_FILE_CONFIG__HPP #define OBJTOOLS_FORMAT___FLAT_FILE_CONFIG__HPP -/* $Id: flat_file_config.hpp 607400 2020-05-04 14:18:10Z ivanov $ +/* $Id: flat_file_config.hpp 614736 2020-08-21 13:43:48Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -89,7 +89,7 @@ public: virtual void FormatTranscriptId(string& str, const CSeq_id& seq_id, const string& nuc_id) const = 0; virtual void FormatNucSearch(CNcbiOstream& os, const string& id) const = 0; virtual void FormatNucId(string& str, const CSeq_id& seq_id, TIntId gi, const string& acc_id) const = 0; - virtual void FormatTaxid(string& str, const int taxid, const string& taxname) const = 0; + virtual void FormatTaxid(string& str, const TTaxId taxid, const string& taxname) const = 0; virtual void FormatLocation(string& str, const CSeq_loc& loc, TIntId gi, const string& visible_text) const = 0; virtual void FormatModelEvidence(string& str, const SModelEvidance& me) const = 0; virtual void FormatTranscript(string& str, const string& name) const = 0; @@ -107,7 +107,7 @@ public: void FormatTranscriptId(string& str, const CSeq_id& seq_id, const string& nuc_id) const; void FormatNucSearch(CNcbiOstream& os, const string& id) const; void FormatNucId(string& str, const CSeq_id& seq_id, TIntId gi, const string& acc_id) const; - void FormatTaxid(string& str, const int taxid, const string& taxname) const; + void FormatTaxid(string& str, const TTaxId taxid, const string& taxname) const; void FormatLocation(string& str, const CSeq_loc& loc, TIntId gi, const string& visible_text) const; void FormatModelEvidence(string& str, const SModelEvidance& me) const; void FormatTranscript(string& str, const string& name) const; @@ -200,7 +200,8 @@ public: fIgnoreExistingTitle = 1 << 13, fGeneRNACDSFeatures = 1 << 14, fShowFtablePeptides = 1 << 15, - fDisableReferenceCache = 1 << 16 + fDisableReferenceCache = 1 << 16, + fShowDeflineModifiers = 1 << 17 }; enum EView { @@ -216,7 +217,9 @@ public: ePolicy_Adaptive = 0, ePolicy_Internal, ePolicy_External, - ePolicy_Exhaustive + ePolicy_Exhaustive, + ePolicy_Ftp, + ePolicy_Web }; // These flags are used to select the GenBank sections to print or skip. @@ -390,7 +393,8 @@ public: TStyle style = eStyle_Normal, TFlags flags = 0, TView view = fViewNucleotides, - TPolicy policy = ePolicy_Adaptive); + TPolicy policy = ePolicy_Adaptive, + TCustom custom = 0); // destructor ~CFlatFileConfig(void); @@ -494,12 +498,16 @@ public: bool IsPolicyInternal(void) const { return m_Policy == ePolicy_Internal; } bool IsPolicyExternal (void) const { return m_Policy == ePolicy_External; } bool IsPolicyExhaustive (void) const { return m_Policy == ePolicy_Exhaustive; } + bool IsPolicyFtp (void) const { return m_Policy == ePolicy_Ftp; } + bool IsPolicyWeb (void) const { return m_Policy == ePolicy_Web; } // setters void SetPolicy(const TPolicy& Policy) { m_Policy = Policy; } void SetPolicyAdaptive (void) { m_Policy = ePolicy_Adaptive; } void SetPolicyInternal(void) { m_Policy = ePolicy_Internal; } void SetPolicyExternal (void) { m_Policy = ePolicy_External; } void SetPolicyExhaustive (void) { m_Policy = ePolicy_Exhaustive; } + void SetPolicyFtp (void) { m_Policy = ePolicy_Ftp; } + void SetPolicyWeb (void) { m_Policy = ePolicy_Web; } // -- Flags // getters @@ -617,6 +625,7 @@ public: bool GeneRNACDSFeatures (void) const; bool ShowFtablePeptides (void) const; bool DisableReferenceCache (void) const; + bool ShowDeflineModifiers (void) const; // setters void SetCustom(const TCustom& custom) { m_Custom = custom; } @@ -633,10 +642,17 @@ public: CFlatFileConfig& SetGeneRNACDSFeatures (bool val = true); CFlatFileConfig& SetShowFtablePeptides (bool val = true); CFlatFileConfig& SetDisableReferenceCache (bool val = true); + CFlatFileConfig& SetShowDeflineModifiers (bool val = true); // adjust mode dependant flags for RefSeq void SetRefSeqConventions(void); + int GetFeatDepth(void) const { return m_FeatDepth; } + void SetFeatDepth(const int featDepth) { m_FeatDepth = featDepth; } + + int GetGapDepth(void) const { return m_GapDepth; } + void SetGapDepth(const int gapDepth) { m_GapDepth = gapDepth; } + void SetGenbankBlocks(const TGenbankBlocks& genbank_blocks) { @@ -723,6 +739,8 @@ private: const ICanceled * m_pCanceledCallback; // instance does NOT own it bool m_BasicCleanup; TCustom m_Custom; + int m_FeatDepth; + int m_GapDepth; #ifdef NEW_HTML_FMT CRef m_html_formatter; #endif @@ -823,6 +841,7 @@ CUSTOM_ARG_IMP(IgnoreExistingTitle) CUSTOM_ARG_IMP(GeneRNACDSFeatures) CUSTOM_ARG_IMP(ShowFtablePeptides) CUSTOM_ARG_IMP(DisableReferenceCache) +CUSTOM_ARG_IMP(ShowDeflineModifiers) #undef FLAG_ARG_IMP #undef FLAG_ARG_GET diff --git a/c++/include/objtools/format/flat_file_generator.hpp b/c++/include/objtools/format/flat_file_generator.hpp index 722a6f36..ec1edb46 100644 --- a/c++/include/objtools/format/flat_file_generator.hpp +++ b/c++/include/objtools/format/flat_file_generator.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT___FLAT_FILE_GENERATOR__HPP #define OBJTOOLS_FORMAT___FLAT_FILE_GENERATOR__HPP -/* $Id: flat_file_generator.hpp 604097 2020-03-23 12:19:07Z ivanov $ +/* $Id: flat_file_generator.hpp 613149 2020-08-03 15:02:23Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -69,7 +69,8 @@ public: CFlatFileConfig::TStyle style = CFlatFileConfig::eStyle_Normal, CFlatFileConfig::TFlags flags = 0, CFlatFileConfig::TView view = CFlatFileConfig::fViewNucleotides, - CFlatFileConfig::TCustom custom = 0); + CFlatFileConfig::TCustom custom = 0, + CFlatFileConfig::TPolicy policy = CFlatFileConfig::ePolicy_Adaptive); // destructor ~CFlatFileGenerator(void); @@ -101,6 +102,7 @@ public: // Versions that loop through Bioseq components void Generate(const CSeq_entry_Handle& entry, CNcbiOstream& os, bool useSeqEntryIndexing); + void Generate(const CBioseq_Handle& bsh, CNcbiOstream& os, bool useSeqEntryIndexing); void Generate(const CSeq_entry_Handle& entry, CFlatItemOStream& item_os, bool useSeqEntryIndexing); void Generate(const CSeq_loc& loc, CScope& scope, CNcbiOstream& os, bool useSeqEntryIndexing); diff --git a/c++/include/objtools/format/gather_items.hpp b/c++/include/objtools/format/gather_items.hpp index 271782d0..425d2e93 100644 --- a/c++/include/objtools/format/gather_items.hpp +++ b/c++/include/objtools/format/gather_items.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT___GATHER_ITEMS__HPP #define OBJTOOLS_FORMAT___GATHER_ITEMS__HPP -/* $Id: gather_items.hpp 607397 2020-05-04 14:17:25Z ivanov $ +/* $Id: gather_items.hpp 610489 2020-06-18 14:52:27Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -123,11 +123,12 @@ protected: void x_GatherCDSReferences(TReferences& refs) const; // features - void x_GatherFeatures (void) const; + void x_GatherFeatures (void) const; + void x_GatherFeaturesIdx(void) const; void x_GetFeatsOnCdsProduct(const CSeq_feat& feat, CBioseqContext& ctx, CRef slice_mapper, CConstRef cdsFeatureItem = CConstRef() ) const; - void x_GetFeatsOnCdsProductIdx(CMappedFeat mf,const CSeq_feat& feat, CBioseqContext& ctx, + void x_GetFeatsOnCdsProductIdx(const CSeq_feat& feat, CBioseqContext& ctx, CRef slice_mapper, CConstRef cdsFeatureItem = CConstRef() ) const; static void x_GiveOneResidueIntervalsBogusFuzz(CSeq_loc & loc); @@ -142,8 +143,6 @@ protected: CBioseqContext& ctx) const; void x_GatherFeaturesOnRangeIdx(const CSeq_loc& loc, SAnnotSelector& sel, CBioseqContext& ctx) const; - size_t x_GatherFeaturesOnSegmentIdx(const CSeq_loc& loc, SAnnotSelector& sel, - CBioseqContext& ctx) const; void x_GatherFeaturesOnRange(const CSeq_loc& loc, SAnnotSelector& sel, CBioseqContext& ctx) const; diff --git a/c++/include/objtools/format/items/feature_item.hpp b/c++/include/objtools/format/items/feature_item.hpp index 5bf52e3e..e2f51a8b 100644 --- a/c++/include/objtools/format/items/feature_item.hpp +++ b/c++/include/objtools/format/items/feature_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___FLAT_FEATURE__HPP #define OBJTOOLS_FORMAT_ITEMS___FLAT_FEATURE__HPP -/* $Id: feature_item.hpp 604098 2020-03-23 12:19:42Z ivanov $ +/* $Id: feature_item.hpp 615031 2020-08-26 13:38:14Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -240,6 +240,8 @@ protected: void x_AddQualsRegion( CBioseqContext& ); void x_AddQualsProt( CBioseqContext&, bool ); void x_AddQualsPsecStr( CBioseqContext& ); + void x_AddQualsNonStd(CBioseqContext& ctx ); + void x_AddQualsHet( CBioseqContext& ctx ); void x_AddQualsVariation( CBioseqContext& ctx ); @@ -277,6 +279,7 @@ protected: void x_AddFTableSiteQuals(const CSeqFeatData::TSite& site); void x_AddFTablePsecStrQuals(const CSeqFeatData::TPsec_str& psec_str); void x_AddFTablePsecStrQuals(const CSeqFeatData::THet& het); + void x_AddFTableNonStdQuals(const CSeqFeatData::TNon_std_residue& res); void x_AddFTableBiosrcQuals(const CBioSource& src); void x_AddFTableDbxref(const CSeq_feat::TDbxref& dbxref); void x_AddFTableExtQuals(const CSeq_feat::TExt& ext); diff --git a/c++/include/objtools/format/items/flat_qual_slots.hpp b/c++/include/objtools/format/items/flat_qual_slots.hpp index f00aa58a..c0ad4348 100644 --- a/c++/include/objtools/format/items/flat_qual_slots.hpp +++ b/c++/include/objtools/format/items/flat_qual_slots.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FLAT___FLAT_QUAL_SLOTS__HPP #define OBJTOOLS_FLAT___FLAT_QUAL_SLOTS__HPP -/* $Id: flat_qual_slots.hpp 564513 2018-05-29 17:40:10Z kans $ +/* $Id: flat_qual_slots.hpp 613781 2020-08-12 16:42:43Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -107,6 +107,7 @@ enum EFeatureQualifier { eFQ_mol_wt, eFQ_ncRNA_class, eFQ_nomenclature, + eFQ_non_std_residue, eFQ_number, eFQ_old_locus_tag, eFQ_operon, diff --git a/c++/include/objtools/format/items/reference_item.hpp b/c++/include/objtools/format/items/reference_item.hpp index 1ed743b5..d0a42df3 100644 --- a/c++/include/objtools/format/items/reference_item.hpp +++ b/c++/include/objtools/format/items/reference_item.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_FORMAT_ITEMS___REFERENCE_ITEM__HPP #define OBJTOOLS_FORMAT_ITEMS___REFERENCE_ITEM__HPP -/* $Id: reference_item.hpp 602636 2020-02-27 20:27:11Z kans $ +/* $Id: reference_item.hpp 614619 2020-08-20 13:00:42Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -157,8 +157,8 @@ public: bool IsSetJournal (void) const { return m_Journal.NotEmpty(); } const CCit_jour& GetJournal (void) const { return *m_Journal; } - int GetPMID (void) const { return m_PMID; } - int GetMUID (void) const { return m_MUID; } + TEntrezId GetPMID (void) const { return m_PMID; } + TEntrezId GetMUID (void) const { return m_MUID; } const string& GetDOI (void) const { return m_DOI; } const string& GetPII (void) const { return m_ELocationPII; } const string& GetOldPII (void) const { return m_OldPII; } @@ -236,8 +236,8 @@ private: CConstRef m_Journal; CConstRef m_Loc; CConstRef m_Date; - int m_PMID; - int m_MUID; + TEntrezId m_PMID; + TEntrezId m_MUID; string m_DOI; string m_ELocationPII; string m_OldPII; @@ -259,7 +259,7 @@ inline const string& CReferenceItem::GetUniqueStr(void) const { // supress creation if other identifiers exist. - if (m_MUID == 0 && m_PMID == 0) { + if (m_MUID == ZERO_ENTREZ_ID && m_PMID == ZERO_ENTREZ_ID) { x_CreateUniqueStr(); } return m_UniqueStr; diff --git a/c++/include/objtools/logging/listener.hpp b/c++/include/objtools/logging/listener.hpp index ea45c1f5..ae50a4af 100644 --- a/c++/include/objtools/logging/listener.hpp +++ b/c++/include/objtools/logging/listener.hpp @@ -1,5 +1,5 @@ -/* $Id: listener.hpp 600659 2020-01-24 15:26:41Z foleyjp $ +/* $Id: listener.hpp 608330 2020-05-14 16:03:45Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -34,7 +34,7 @@ #ifndef _OBJTOOLS_LISTENER_HPP_ #define _OBJTOOLS_LISTENER_HPP_ -#include +#include #include BEGIN_NCBI_SCOPE diff --git a/c++/include/objtools/logging/message.hpp b/c++/include/objtools/logging/message.hpp index 9bcd8e62..f0fd15da 100644 --- a/c++/include/objtools/logging/message.hpp +++ b/c++/include/objtools/logging/message.hpp @@ -1,4 +1,4 @@ -/* $Id: message.hpp 599046 2019-12-18 18:34:26Z ludwigf $ +/* $Id: message.hpp 608330 2020-05-14 16:03:45Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -33,23 +33,32 @@ #ifndef _OBJTOOLS_MESSAGE_HPP_ #define _OBJTOOLS_MESSAGE_HPP_ -#include +#include BEGIN_NCBI_SCOPE BEGIN_SCOPE(objects) // ============================================================================ -class NCBI_XOBJUTIL_EXPORT IObjtoolsMessage : public IMessage +class NCBI_XOBJUTIL_EXPORT IObjtoolsMessage // ============================================================================ { public: virtual ~IObjtoolsMessage(void) = default; + virtual IObjtoolsMessage *Clone(void) const = 0; + + virtual void Write(CNcbiOstream& out) const = 0; + virtual void Dump(CNcbiOstream& out) const = 0; virtual void WriteAsXML(CNcbiOstream& out) const = 0; virtual void DumpAsXML(CNcbiOstream& out) const = 0; + + virtual string GetText(void) const = 0; + virtual EDiagSev GetSeverity(void) const = 0; + virtual int GetCode(void) const = 0; + virtual int GetSubCode(void) const = 0; }; @@ -63,7 +72,7 @@ public: virtual CObjtoolsMessage *Clone(void) const; - virtual string Compose(void) const; + NCBI_DEPRECATED virtual string Compose(void) const; virtual void Write(CNcbiOstream& out) const; diff --git a/c++/include/objtools/pubseq_gateway/client/psg_client.hpp b/c++/include/objtools/pubseq_gateway/client/psg_client.hpp index bed7e421..82dbd2ac 100644 --- a/c++/include/objtools/pubseq_gateway/client/psg_client.hpp +++ b/c++/include/objtools/pubseq_gateway/client/psg_client.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP #define OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP -/* $Id: psg_client.hpp 599706 2020-01-06 18:04:04Z sadyrovr $ +/* $Id: psg_client.hpp 612393 2020-07-21 13:51:24Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -87,6 +87,9 @@ public: // Get request ID string GetId() const { return x_GetId(); } + /// Set hops + void SetHops(unsigned hops) { m_Hops = hops; } + protected: CPSG_Request(shared_ptr user_context = {}, CRef request_context = {}) @@ -105,6 +108,7 @@ private: shared_ptr m_UserContext; CRef m_RequestContext; + unsigned m_Hops = 0; friend class CPSG_Queue; }; @@ -858,6 +862,20 @@ public: /// Check whether the queue was stopped/reset and is now empty. bool IsEmpty() const; + + /// Check whether the queue has been initialized. + bool IsInitialized() const { return static_cast(m_Impl); } + + + /// Get an API lock. + /// Holding this API lock is essential if numerous short-lived queue instances are used. + /// It prevents an internal I/O implementation (threads, TCP connections, HTTP sessions, etc) + /// from being destroyed (on destroying last remaining queue instance) + /// and then re-created (with new queue instance). + using TApiLock = shared_ptr; + static TApiLock GetApiLock(); + + CPSG_Queue(CPSG_Queue&&); CPSG_Queue& operator=(CPSG_Queue&&); diff --git a/c++/include/objtools/readers/fasta.hpp b/c++/include/objtools/readers/fasta.hpp index 7dcc6fd1..0096f38c 100644 --- a/c++/include/objtools/readers/fasta.hpp +++ b/c++/include/objtools/readers/fasta.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS___FASTA__HPP #define OBJTOOLS_READERS___FASTA__HPP -/* $Id: fasta.hpp 598690 2019-12-12 14:55:16Z foleyjp $ +/* $Id: fasta.hpp 612524 2020-07-23 11:37:59Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -341,6 +341,11 @@ private: TSeqPos line_number, CBioseq& bioseq, ILineErrorListener* pMessageListener); + + void x_SetDeflineParseInfo(SDefLineParseInfo& info); + + bool m_bModifiedMaxIdLength=false; + protected: struct SGap : public CObject { enum EKnownSize { diff --git a/c++/include/objtools/readers/fasta_reader_utils.hpp b/c++/include/objtools/readers/fasta_reader_utils.hpp index 92f0431b..1c5fa79f 100644 --- a/c++/include/objtools/readers/fasta_reader_utils.hpp +++ b/c++/include/objtools/readers/fasta_reader_utils.hpp @@ -1,7 +1,7 @@ #ifndef FASTA_READER_UTILS_HPP #define FASTA_READER_UTILS_HPP -/* $Id: fasta_reader_utils.hpp 599727 2020-01-06 20:18:10Z foleyjp $ +/* $Id: fasta_reader_utils.hpp 612524 2020-07-23 11:37:59Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -76,7 +76,8 @@ public: struct SDeflineParseInfo { TBaseFlags fBaseFlags; TFastaFlags fFastaFlags; - TSeqPos maxIdLength; + TSeqPos maxIdLength=0; // If maxIdLength is zero, the code uses the + // default values specified in CSeq_id TSeqPos lineNumber; }; diff --git a/c++/include/objtools/readers/gff2_reader.hpp b/c++/include/objtools/readers/gff2_reader.hpp index e25d15dc..b13844b4 100644 --- a/c++/include/objtools/readers/gff2_reader.hpp +++ b/c++/include/objtools/readers/gff2_reader.hpp @@ -1,4 +1,4 @@ - /* $Id: gff2_reader.hpp 603569 2020-03-12 18:23:57Z ivanov $ + /* $Id: gff2_reader.hpp 610837 2020-06-24 15:29:29Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -117,6 +117,12 @@ public: // // new stuff: // + virtual void xGetData( + ILineReader&, + TReaderData&); + + bool IsInGenbankMode() const; + virtual bool xParseStructuredComment( const string&); @@ -277,6 +283,9 @@ protected: CSeq_feat&, CSeq_feat&); + bool xNeedsNewSeqAnnot( + const string&); + // data: // protected: diff --git a/c++/include/objtools/readers/gff3_reader.hpp b/c++/include/objtools/readers/gff3_reader.hpp index bad353d8..2219b9b3 100644 --- a/c++/include/objtools/readers/gff3_reader.hpp +++ b/c++/include/objtools/readers/gff3_reader.hpp @@ -1,4 +1,4 @@ - /* $Id: gff3_reader.hpp 607807 2020-05-07 18:58:43Z ivanov $ + /* $Id: gff3_reader.hpp 610837 2020-06-24 15:29:29Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -106,13 +106,7 @@ public: ILineReader& lr, ILineErrorListener* pErrors=nullptr); - bool IsInGenbankMode() const; - protected: - virtual void xGetData( - ILineReader&, - TReaderData&); - virtual void xProcessData( const TReaderData&, CSeq_annot&); @@ -216,9 +210,6 @@ protected: virtual bool xParseAlignment( const string& strLine); - bool xNeedsNewSeqAnnot( - const string&); - // Data: map mCdsParentMap; map > mMrnaLocs; diff --git a/c++/include/objtools/readers/gtf_reader.hpp b/c++/include/objtools/readers/gtf_reader.hpp index cab127b8..16c05938 100644 --- a/c++/include/objtools/readers/gtf_reader.hpp +++ b/c++/include/objtools/readers/gtf_reader.hpp @@ -1,4 +1,4 @@ - /* $Id: gtf_reader.hpp 598212 2019-12-05 12:32:15Z ludwigf $ + /* $Id: gtf_reader.hpp 610647 2020-06-22 11:31:17Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -310,7 +310,7 @@ protected: const CGtfAttributes::MultiValue&, CSeq_feat&); - bool xCdsIsPartial( + NCBI_DEPRECATED bool xCdsIsPartial( const CGtfReadRecord& ); typedef map< string, CRef< CSeq_feat > > TIdToFeature; diff --git a/c++/include/objtools/readers/message_listener.hpp b/c++/include/objtools/readers/message_listener.hpp index 328434fc..fa22fc32 100644 --- a/c++/include/objtools/readers/message_listener.hpp +++ b/c++/include/objtools/readers/message_listener.hpp @@ -1,4 +1,4 @@ -/* $Id: message_listener.hpp 600664 2020-01-24 15:57:16Z foleyjp $ +/* $Id: message_listener.hpp 608381 2020-05-15 12:43:35Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -51,13 +51,19 @@ public: virtual ~ILineErrorListener() {} // IListener::Post() implementation - virtual void Post(const IMessage& message) + NCBI_STD_DEPRECATED("This method is no longer functional and will be removed in SC-25.") + virtual void Post(const IMessage& /*message*/) { - const ILineError* le = dynamic_cast(&message); - if (!le) return; - PutError(*le); + // Remove in SC-25 + return; } + NCBI_STD_DEPRECATED("This method is redundant and will be removed in SC-25.") + virtual void Push(const IObjtoolsMessage& message) + { + // Remove in SC-25 + PutMessage(message); + } /// Store error in the container, and /// return true if error was stored fine, and /// return false if the caller should terminate all further processing. @@ -74,13 +80,12 @@ public: } // IListener::Get() implementation - virtual const IMessage& Get(size_t index) const - { return const_cast(this)->GetError(index); } + virtual const ILineError& Get(size_t index) const + { return this->GetError(index); } /// 0-based error retrieval. virtual const ILineError& - GetError( - size_t ) =0; + GetError(size_t ) const =0; virtual size_t Count(void) const = 0; @@ -105,7 +110,7 @@ public: const Uint8 iNumDone = 0, const Uint8 iNumTotal = 0 ) = 0; - virtual const IMessage& GetMessage(size_t index) const + virtual const ILineError& GetMessage(size_t index) const { return Get(index); } virtual void Clear(void) @@ -150,7 +155,7 @@ public: const ILineError& GetError( - size_t uPos ) { + size_t uPos ) const { return *dynamic_cast(m_Errors[ uPos ].get()); } virtual void Dump() diff --git a/c++/include/serial/grpc_integration/grpc_integration.hpp b/c++/include/serial/grpc_integration/grpc_integration.hpp index 8dde2efb..8291b637 100644 --- a/c++/include/serial/grpc_integration/grpc_integration.hpp +++ b/c++/include/serial/grpc_integration/grpc_integration.hpp @@ -1,7 +1,7 @@ #ifndef SERIAL_GRPC_INTEGRATION___GRPC_INTEGRATION__HPP #define SERIAL_GRPC_INTEGRATION___GRPC_INTEGRATION__HPP -/* $Id: grpc_integration.hpp 606576 2020-04-23 17:12:06Z ivanov $ +/* $Id: grpc_integration.hpp 608310 2020-05-14 12:35:38Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -114,11 +114,13 @@ public: /// (in order of priority): /// - Config file entry "[section] variable" /// - Environment variables: env_var_name (if not empty/NULL); -/// then "NCBI_CONFIG__
__"; then "grpc_proxy" +/// then "NCBI_CONFIG__
__"; then "GRPC_PROXY" /// - The hard-coded NCBI default "linkerd:4142" +/// The value_source (if not null) will get CParamBase::EParamSource value string g_NCBI_GRPC_GetAddress(const char* section, const char* variable, - const char* env_var_name = nullptr); + const char* env_var_name = nullptr, + int* value_source = nullptr); END_NCBI_SCOPE diff --git a/c++/include/serial/grpc_integration/impl/grpc_support.hpp b/c++/include/serial/grpc_integration/impl/grpc_support.hpp index fc1a7e13..1bf0f396 100644 --- a/c++/include/serial/grpc_integration/impl/grpc_support.hpp +++ b/c++/include/serial/grpc_integration/impl/grpc_support.hpp @@ -1,7 +1,7 @@ #ifndef SERIAL_GRPC_INTEGRATION_IMPL___GRPC_SUPPORT__HPP #define SERIAL_GRPC_INTEGRATION_IMPL___GRPC_SUPPORT__HPP -/* $Id: grpc_support.hpp 607417 2020-05-04 15:40:44Z ivanov $ +/* $Id: grpc_support.hpp 608345 2020-05-14 18:36:54Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -38,7 +38,7 @@ #include #include #include -#ifdef HAVE_LIBPROTOBUF +#ifdef HAVE_LIBGRPC // HAVE_LIBPROTOBUF # include # if GOOGLE_PROTOBUF_VERSION >= 3002000 # define NCBI_GRPC_GET_BYTE_SIZE(msg) ((msg).ByteSizeLong()) @@ -103,7 +103,7 @@ public: private: CDiagContext& m_DiagContext; CRequestContext& m_RequestContext; -#ifdef HAVE_LIBPROTOBUF +#ifdef HAVE_LIBGRPC // HAVE_LIBPROTOBUF const TMessage& m_Reply; #endif bool m_ManagingRequest; @@ -169,7 +169,7 @@ CGRPCRequestLogger::CGRPCRequestLogger(TGRPCServerContext* sctx, const TMessage& reply) : m_DiagContext(GetDiagContext()), m_RequestContext(m_DiagContext.GetRequestContext()), -#ifdef HAVE_LIBPROTOBUF +#ifdef HAVE_LIBGRPC // HAVE_LIBPROTOBUF m_Reply(reply), #endif m_ManagingRequest(false) diff --git a/c++/include/serial/rpcbase.hpp b/c++/include/serial/rpcbase.hpp index 3d630b90..1be4fdaa 100644 --- a/c++/include/serial/rpcbase.hpp +++ b/c++/include/serial/rpcbase.hpp @@ -1,7 +1,7 @@ #ifndef SERIAL___RPCBASE__HPP #define SERIAL___RPCBASE__HPP -/* $Id: rpcbase.hpp 588592 2019-06-26 18:52:32Z ucko $ +/* $Id: rpcbase.hpp 615694 2020-09-02 18:14:03Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -62,12 +62,21 @@ class CRPCClient : public CObject, protected CConnIniter { public: - CRPCClient(const string& service = kEmptyStr, - ESerialDataFormat format = eSerial_AsnBinary, - unsigned int retry_limit = 3) - : CRPCClient_Base(service, format, retry_limit), + CRPCClient(const string& service = kEmptyStr) + : CRPCClient_Base(service, eSerial_AsnBinary), m_Timeout(kDefaultTimeout) {} + CRPCClient(const string& service, + ESerialDataFormat format) + : CRPCClient_Base(service, format), + m_Timeout(kDefaultTimeout) + {} + CRPCClient(const string& service, + ESerialDataFormat format, + unsigned int try_limit) + : CRPCClient_Base(service, format, try_limit), + m_Timeout(kDefaultTimeout) + {} virtual ~CRPCClient(void) { if ( !sx_IsSpecial(m_Timeout) ) { diff --git a/c++/include/serial/rpcbase_impl.hpp b/c++/include/serial/rpcbase_impl.hpp index c895fe07..b58c25cf 100644 --- a/c++/include/serial/rpcbase_impl.hpp +++ b/c++/include/serial/rpcbase_impl.hpp @@ -1,7 +1,7 @@ #ifndef SERIAL___RPCBASE_IMPL__HPP #define SERIAL___RPCBASE_IMPL__HPP -/* $Id: rpcbase_impl.hpp 588592 2019-06-26 18:52:32Z ucko $ +/* $Id: rpcbase_impl.hpp 615694 2020-09-02 18:14:03Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -55,8 +55,10 @@ class NCBI_XSERIAL_EXPORT CRPCClient_Base { public: CRPCClient_Base(const string& service, - ESerialDataFormat format, - unsigned int retry_limit); + ESerialDataFormat format); + CRPCClient_Base(const string& service, + ESerialDataFormat format, + unsigned int try_limit); virtual ~CRPCClient_Base(void); void Connect(void); @@ -75,9 +77,24 @@ public: ESerialDataFormat GetFormat(void) const { return m_Format; } void SetFormat(ESerialDataFormat fmt) { m_Format = fmt; } - unsigned int GetRetryLimit(void) const { return m_RetryLimit; } - void SetRetryLimit(unsigned int n) { m_RetryLimit = n; } - + /// Get number of request attempts. If not set explicitly through SetTryLimit + /// or constructor argument, the following values are used: + /// - __RPC_CLIENT__MAX_TRY environment varialbe + /// - [service_name.rpc_client] section, max_try value in the INI file + /// - 3 (global default) + unsigned int GetTryLimit(void) const { return m_TryLimit; } + void SetTryLimit(unsigned int n) { m_TryLimit = n > 0 ? n : 3; } + /// @deprecated Use GetTryLimit() + NCBI_DEPRECATED + unsigned int GetRetryLimit(void) const { return GetTryLimit(); } + /// @deprecated Use SetTryLimit() + NCBI_DEPRECATED + void SetRetryLimit(unsigned int n) { SetTryLimit(n); } + + /// Get retry delay. If not set explicitly through SetRetryDelay, the following values are used: + /// - __RPC_CLIENT__RETRY_DELAY environment varialbe + /// - [service_name.rpc_client] section, retry_delay value in the INI file + /// - 0 (global default) const CTimeSpan GetRetryDelay(void) const { return m_RetryDelay; } void SetRetryDelay(const CTimeSpan& ts) { m_RetryDelay = ts; } @@ -111,7 +128,7 @@ private: ESerialDataFormat m_Format; CMutex m_Mutex; ///< To allow sharing across threads. CTimeSpan m_RetryDelay; - unsigned int m_RetryCount; + unsigned int m_TryCount; int m_RecursionCount; protected: @@ -121,7 +138,7 @@ protected: unique_ptr m_In; unique_ptr m_Out; string m_Affinity; - unsigned int m_RetryLimit; + unsigned int m_TryLimit; CHttpRetryContext m_RetryCtx; CConstIRef m_Canceler; @@ -129,7 +146,7 @@ protected: // true. May reset the connection (or do anything else, really), // but note that Ask() will always automatically reconnect if the // stream is explicitly bad. (Ask() also takes care of enforcing - // m_RetryLimit.) + // m_TryLimit.) virtual bool x_ShouldRetry(unsigned int tries); // Calculate effective retry delay. Returns value from CRetryContext diff --git a/c++/include/serial/streamiter.hpp b/c++/include/serial/streamiter.hpp index ebe11e52..0c2a40ef 100644 --- a/c++/include/serial/streamiter.hpp +++ b/c++/include/serial/streamiter.hpp @@ -1,7 +1,7 @@ #ifndef STREAMITER__HPP #define STREAMITER__HPP -/* $Id: streamiter.hpp 583376 2019-03-27 18:06:15Z dicuccio $ +/* $Id: streamiter.hpp 609566 2020-06-03 19:29:58Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -924,7 +924,7 @@ CObjectIStreamIterator::CData::x_NextSeqWithFilter(const CObjectInfo& obj case eAllRandom: done.insert(mi_now); // no break - /* FALLTHROUGH */ + NCBI_FALLTHROUGH; case eAllSeq: { CObjectInfo oi = minfo.GetMember().GetTypeFamily() == eTypeFamilyPointer ? diff --git a/c++/include/util/bitset/bm.h b/c++/include/util/bitset/bm.h index 14822cf1..e2952658 100644 --- a/c++/include/util/bitset/bm.h +++ b/c++/include/util/bitset/bm.h @@ -145,19 +145,19 @@ public: class reference { public: - reference(bvector& bv, size_type position) + reference(bvector& bv, size_type position) BMNOEXCEPT : bv_(bv), position_(position) {} - reference(const reference& ref) + reference(const reference& ref) BMNOEXCEPT : bv_(ref.bv_), position_(ref.position_) { bv_.set(position_, ref.bv_.get_bit(position_)); } - operator bool() const + operator bool() const BMNOEXCEPT { return bv_.get_bit(position_); } @@ -168,13 +168,13 @@ public: return *this; } - const reference& operator=(bool value) const + const reference& operator=(bool value) const BMNOEXCEPT { bv_.set(position_, value); return *this; } - bool operator==(const reference& ref) const + bool operator==(const reference& ref) const BMNOEXCEPT { return bool(*this) == bool(ref); } @@ -204,13 +204,13 @@ public: } /*! Logical Not operator */ - bool operator!() const + bool operator!() const BMNOEXCEPT { return !bv_.get_bit(position_); } /*! Bit Not operator */ - bool operator~() const + bool operator~() const BMNOEXCEPT { return !bv_.get_bit(position_); } @@ -237,34 +237,37 @@ public: { friend class bvector; public: - iterator_base() : bv_(0), position_(bm::id_max), block_(0) {} + iterator_base() BMNOEXCEPT + : bv_(0), position_(bm::id_max), block_(0), block_type_(0), + block_idx_(0) + {} - bool operator==(const iterator_base& it) const + bool operator==(const iterator_base& it) const BMNOEXCEPT { return (position_ == it.position_) && (bv_ == it.bv_); } - bool operator!=(const iterator_base& it) const + bool operator!=(const iterator_base& it) const BMNOEXCEPT { return ! operator==(it); } - bool operator < (const iterator_base& it) const + bool operator < (const iterator_base& it) const BMNOEXCEPT { return position_ < it.position_; } - bool operator <= (const iterator_base& it) const + bool operator <= (const iterator_base& it) const BMNOEXCEPT { return position_ <= it.position_; } - bool operator > (const iterator_base& it) const + bool operator > (const iterator_base& it) const BMNOEXCEPT { return position_ > it.position_; } - bool operator >= (const iterator_base& it) const + bool operator >= (const iterator_base& it) const BMNOEXCEPT { return position_ >= it.position_; } @@ -274,18 +277,19 @@ public: \brief Checks if iterator is still valid. Analog of != 0 comparison for pointers. \returns true if iterator is valid. */ - bool valid() const { return position_ != bm::id_max; } + bool valid() const BMNOEXCEPT { return position_ != bm::id_max; } /** \fn bool bm::bvector::iterator_base::invalidate() \brief Turns iterator into an invalid state. */ - void invalidate() { position_ = bm::id_max; } + void invalidate() BMNOEXCEPT + { position_ = bm::id_max; block_type_ = ~0u;} /** \brief Compare FSMs for testing purposes \internal */ - bool compare_state(const iterator_base& ib) const + bool compare_state(const iterator_base& ib) const BMNOEXCEPT { if (this->bv_ != ib.bv_) return false; if (this->position_ != ib.position_) return false; @@ -317,7 +321,9 @@ public: public: - /** Information about current bitblock. */ + /** Bit-block descriptor + @internal + */ struct bitblock_descr { const bm::word_t* ptr; //!< Word pointer. @@ -327,7 +333,9 @@ public: size_type pos; //!< Last bit position decode before }; - /** Information about current DGAP block. */ + /** Information about current DGAP block. + @internal + */ struct dgap_descr { const gap_word_t* ptr; //!< Word pointer. @@ -379,9 +387,9 @@ public: typedef void pointer; typedef void reference; - insert_iterator() : bvect_(0), max_bit_(0) {} + insert_iterator() BMNOEXCEPT : bvect_(0), max_bit_(0) {} - insert_iterator(bvector& bvect) + insert_iterator(bvector& bvect) BMNOEXCEPT : bvect_(&bvect), max_bit_(bvect.size()) { @@ -463,7 +471,7 @@ public: typedef void pointer; typedef void reference; - bulk_insert_iterator() + bulk_insert_iterator() BMNOEXCEPT : bvect_(0), buf_(0), buf_size_(0), sorted_(BM_UNKNOWN) {} ~bulk_insert_iterator() @@ -473,7 +481,8 @@ public: bvect_->blockman_.get_allocator().free_bit_block((bm::word_t*)buf_); } - bulk_insert_iterator(bvector& bvect, bm::sort_order so = BM_UNKNOWN) + bulk_insert_iterator(bvector& bvect, + bm::sort_order so = BM_UNKNOWN) BMNOEXCEPT : bvect_(&bvect), sorted_(so) { bvect_->init(); @@ -499,7 +508,7 @@ public: sorted_ = BM_UNKNOWN; } - bulk_insert_iterator(bulk_insert_iterator&& iit) BMNOEXEPT + bulk_insert_iterator(bulk_insert_iterator&& iit) BMNOEXCEPT : bvect_(iit.bvect_) { buf_ = iit.buf_; iit.buf_ = 0; @@ -518,7 +527,7 @@ public: return *this; } - bulk_insert_iterator& operator=(bulk_insert_iterator&& ii) BMNOEXEPT + bulk_insert_iterator& operator=(bulk_insert_iterator&& ii) BMNOEXCEPT { bvect_ = ii.bvect_; if (buf_) @@ -562,11 +571,11 @@ public: bvect_->sync_size(); } - bvector_type* get_bvector() const { return bvect_; } + bvector_type* get_bvector() const BMNOEXCEPT { return bvect_; } protected: static - size_type buf_size_max() + size_type buf_size_max() BMNOEXCEPT { #ifdef BM64ADDR return bm::set_block_size / 2; @@ -599,26 +608,40 @@ public: typedef unsigned& reference; public: - enumerator() : iterator_base() + enumerator() BMNOEXCEPT : iterator_base() {} /*! @brief Construct enumerator associated with a vector. This construction creates unpositioned iterator with status valid() == false. It can be re-positioned using go_first() or go_to() */ - enumerator(const bvector* bv) + enumerator(const bvector* bv) BMNOEXCEPT : iterator_base() { this->bv_ = const_cast*>(bv); } + /*! @brief Construct enumerator for bit vector + @param bv bit-vector reference + @param pos bit position in the vector + if position is 0, it finds the next 1 or becomes not valid + (en.valid() == false) + */ + enumerator(const bvector& bv, size_type pos = 0) BMNOEXCEPT + : iterator_base() + { + this->bv_ = const_cast*>(&bv); + go_to(pos); + } + + /*! @brief Construct enumerator for bit vector @param bv bit-vector pointer @param pos bit position in the vector if position is 0, it finds the next 1 or becomes not valid (en.valid() == false) */ - enumerator(const bvector* bv, size_type pos) + enumerator(const bvector* bv, size_type pos) BMNOEXCEPT : iterator_base() { this->bv_ = const_cast*>(bv); @@ -626,505 +649,72 @@ public: } /*! \brief Get current position (value) */ - size_type operator*() const { return this->position_; } + size_type operator*() const BMNOEXCEPT { return this->position_; } /*! \brief Get current position (value) */ - size_type value() const { return this->position_; } + size_type value() const BMNOEXCEPT { return this->position_; } /*! \brief Advance enumerator forward to the next available bit */ - enumerator& operator++() { return this->go_up(); } + enumerator& operator++() BMNOEXCEPT { this->go_up(); return *this; } /*! \brief Advance enumerator forward to the next available bit. Possibly do NOT use this operator it is slower than the pre-fix increment. */ - enumerator operator++(int) + enumerator operator++(int) BMNOEXCEPT { enumerator tmp = *this; this->go_up(); return tmp; } - /*! \brief Position enumerator to the first available bit */ - void go_first() - { - BM_ASSERT(this->bv_); - - blocks_manager_type* bman = &(this->bv_->blockman_); - if (!bman->is_init()) - { - this->invalidate(); - return; - } - - bm::word_t*** blk_root = bman->top_blocks_root(); - - this->block_idx_ = this->position_= 0; - unsigned i, j; - - for (i = 0; i < bman->top_block_size(); ++i) - { - bm::word_t** blk_blk = blk_root[i]; - - if (blk_blk == 0) // not allocated - { - this->block_idx_ += bm::set_sub_array_size; - this->position_ += bm::bits_in_array; - continue; - } - - if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) - blk_blk = FULL_SUB_BLOCK_REAL_ADDR; - - for (j = 0; j < bm::set_sub_array_size; ++j,++(this->block_idx_)) - { - this->block_ = blk_blk[j]; - - if (this->block_ == 0) - { - this->position_ += bits_in_block; - continue; - } - - if (BM_IS_GAP(this->block_)) - { - this->block_type_ = 1; - if (search_in_gapblock()) - { - return; - } - } - else - { - if (this->block_ == FULL_BLOCK_FAKE_ADDR) - this->block_ = FULL_BLOCK_REAL_ADDR; - - this->block_type_ = 0; - if (search_in_bitblock()) - { - return; - } - } - - } // for j - - } // for i - - this->invalidate(); - } - - /// advance iterator forward by one - void advance() { this->go_up(); } + void go_first() BMNOEXCEPT; + /*! advance iterator forward by one + @return true if advance was successfull and the enumerator is valid + */ + bool advance() BMNOEXCEPT { return this->go_up(); } /*! \brief Advance enumerator to the next available bit */ - enumerator& go_up() - { - BM_ASSERT(this->valid()); - BM_ASSERT_THROW(this->valid(), BM_ERR_RANGE); + bool go_up() BMNOEXCEPT; - // Current block search. - // - - block_descr_type* bdescr = &(this->bdescr_); - switch (this->block_type_) - { - case 0: // BitBlock - { - // check if we can get the value from the bits traversal cache - unsigned short idx = ++(bdescr->bit_.idx); - if (idx < bdescr->bit_.cnt) - { - this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx]; - return *this; - } - this->position_ += - (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx]; - - bdescr->bit_.ptr += bm::set_bitscan_wave_size; - if (decode_bit_group(bdescr)) - { - return *this; - } - } - break; - case 1: // DGAP Block - { - ++this->position_; - if (--(bdescr->gap_.gap_len)) - { - return *this; - } - - // next gap is "OFF" by definition. - if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1) - { - break; - } - gap_word_t prev = *(bdescr->gap_.ptr); - unsigned int val = *(++(bdescr->gap_.ptr)); - - this->position_ += val - prev; - // next gap is now "ON" - if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1) - { - break; - } - prev = *(bdescr->gap_.ptr); - val = *(++(bdescr->gap_.ptr)); - bdescr->gap_.gap_len = (gap_word_t)(val - prev); - return *this; // next "ON" found; - } - default: - BM_ASSERT(0); - - } // switch - - if (search_in_blocks()) - return *this; - - this->invalidate(); - return *this; - } - /*! @brief Skip to specified relative rank - @param rank - number of ON bits to go for + @param rank - number of ON bits to go for (must be: > 0) + @return true if skip was successfull and enumerator is valid */ - enumerator& skip_to_rank(size_type rank) + bool skip_to_rank(size_type rank) BMNOEXCEPT { + BM_ASSERT(rank); --rank; if (!rank) - return *this; + return this->valid(); return skip(rank); } /*! @brief Skip specified number of bits from enumeration @param rank - number of ON bits to skip + @return true if skip was successfull and enumerator is valid */ - enumerator& skip(size_type rank) - { - if (!this->valid() || !rank) - return *this; - for (; rank; --rank) - { - block_descr_type* bdescr = &(this->bdescr_); - switch (this->block_type_) - { - case 0: // BitBlock - for (; rank; --rank) - { - unsigned short idx = ++(bdescr->bit_.idx); - if (idx < bdescr->bit_.cnt) - { - this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx]; - continue; - } - this->position_ += - (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx]; - bdescr->bit_.ptr += bm::set_bitscan_wave_size; - - if (!decode_bit_group(bdescr, rank)) - break; - } // for rank - break; - case 1: // DGAP Block - for (; rank; --rank) // TODO: better skip logic - { - ++this->position_; - if (--(bdescr->gap_.gap_len)) - { - continue; - } - - // next gap is "OFF" by definition. - if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1) - { - break; - } - gap_word_t prev = *(bdescr->gap_.ptr); - unsigned int val = *(++(bdescr->gap_.ptr)); - - this->position_ += val - prev; - // next gap is now "ON" - if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1) - { - break; - } - prev = *(bdescr->gap_.ptr); - val = *(++(bdescr->gap_.ptr)); - bdescr->gap_.gap_len = (gap_word_t)(val - prev); - } // for rank - break; - default: - BM_ASSERT(0); - } // switch - - if (!rank) - return *this; + bool skip(size_type rank) BMNOEXCEPT; - if (!search_in_blocks()) - { - this->invalidate(); - return *this; - } - } // for rank - return *this; - } - /*! @brief go to a specific position in the bit-vector (or next) */ - enumerator& go_to(size_type pos) - { - if (pos == 0) - { - go_first(); - return *this; - } - - size_type new_pos = this->bv_->check_or_next(pos); // find the true pos - if (new_pos == 0) // no bits available - { - this->invalidate(); - return *this; - } - BM_ASSERT(new_pos >= pos); - pos = new_pos; - - - this->position_ = pos; - size_type nb = this->block_idx_ = (pos >> bm::set_block_shift); - bm::bvector::blocks_manager_type& bman = - this->bv_->get_blocks_manager(); - unsigned i0, j0; - bm::get_block_coord(nb, i0, j0); - this->block_ = bman.get_block(i0, j0); - - BM_ASSERT(this->block_); - - this->block_type_ = (bool)BM_IS_GAP(this->block_); - - block_descr_type* bdescr = &(this->bdescr_); - unsigned nbit = unsigned(pos & bm::set_block_mask); - - if (this->block_type_) // gap - { - this->position_ = nb * bm::set_block_size * 32; - search_in_gapblock(); - - if (this->position_ == pos) - return *this; - this->position_ = pos; - - gap_word_t* gptr = BMGAP_PTR(this->block_); - unsigned is_set; - unsigned gpos = bm::gap_bfind(gptr, nbit, &is_set); - BM_ASSERT(is_set); - - bdescr->gap_.ptr = gptr + gpos; - if (gpos == 1) - { - bdescr->gap_.gap_len = bm::gap_word_t(gptr[gpos] - (nbit - 1)); - } - else - { - bm::gap_word_t interval = bm::gap_word_t(gptr[gpos] - gptr[gpos - 1]); - bm::gap_word_t interval2 = bm::gap_word_t(nbit - gptr[gpos - 1]); - bdescr->gap_.gap_len = bm::gap_word_t(interval - interval2 + 1); - } - } - else // bit - { - if (nbit == 0) - { - search_in_bitblock(); - return *this; - } - - unsigned nword = unsigned(nbit >> bm::set_word_shift); - - // check if we need to step back to match the wave - unsigned parity = nword % bm::set_bitscan_wave_size; - bdescr->bit_.ptr = this->block_ + (nword - parity); - bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits); - BM_ASSERT(bdescr->bit_.cnt); - bdescr->bit_.pos = (nb * bm::set_block_size * 32) + ((nword - parity) * 32); - bdescr->bit_.idx = 0; - nbit &= bm::set_word_mask; - nbit += 32 * parity; - for (unsigned i = 0; i < bdescr->bit_.cnt; ++i) - { - if (bdescr->bit_.bits[i] == nbit) - return *this; - bdescr->bit_.idx++; - } // for - BM_ASSERT(0); - } - return *this; - } - + bool go_to(size_type pos) BMNOEXCEPT; private: typedef typename iterator_base::block_descr block_descr_type; - bool decode_wave(block_descr_type* bdescr) - { - bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits); - if (bdescr->bit_.cnt) // found - { - bdescr->bit_.idx ^= bdescr->bit_.idx; // = 0; - bdescr->bit_.pos = this->position_; - this->position_ += bdescr->bit_.bits[0]; - return true; - } - return false; - } - - bool decode_bit_group(block_descr_type* bdescr) - { - const word_t* block_end = this->block_ + bm::set_block_size; - for (; bdescr->bit_.ptr < block_end;) - { - if (decode_wave(bdescr)) - return true; - this->position_ += bm::set_bitscan_wave_size * 32; // wave size - bdescr->bit_.ptr += bm::set_bitscan_wave_size; - } // for - return false; - } - - bool decode_bit_group(block_descr_type* bdescr, size_type& rank) - { - const word_t* block_end = this->block_ + bm::set_block_size; - - for (; bdescr->bit_.ptr < block_end;) - { - const bm::id64_t* w64_p = (bm::id64_t*)bdescr->bit_.ptr; - bm::id64_t w64 = *w64_p; - unsigned cnt = bm::word_bitcount64(w64); - if (rank > cnt) - { - rank -= cnt; - } - else - { - if (decode_wave(bdescr)) - return true; - } - this->position_ += bm::set_bitscan_wave_size * 32; // wave size - bdescr->bit_.ptr += bm::set_bitscan_wave_size; - } // for - return false; - } - - bool search_in_bitblock() - { - BM_ASSERT(this->block_type_ == 0); - - block_descr_type* bdescr = &(this->bdescr_); - bdescr->bit_.ptr = this->block_; - - return decode_bit_group(bdescr); - } - - bool search_in_gapblock() - { - BM_ASSERT(this->block_type_ == 1); - - block_descr_type* bdescr = &(this->bdescr_); - bdescr->gap_.ptr = BMGAP_PTR(this->block_); - unsigned bitval = *(bdescr->gap_.ptr) & 1; - - ++(bdescr->gap_.ptr); - - for (;true;) - { - unsigned val = *(bdescr->gap_.ptr); - if (bitval) - { - gap_word_t* first = BMGAP_PTR(this->block_) + 1; - if (bdescr->gap_.ptr == first) - { - bdescr->gap_.gap_len = (gap_word_t)(val + 1); - } - else - { - bdescr->gap_.gap_len = - (gap_word_t)(val - *(bdescr->gap_.ptr-1)); - } - return true; - } - this->position_ += val + 1; - if (val == bm::gap_max_bits - 1) - break; - bitval ^= 1; - ++(bdescr->gap_.ptr); - } - return false; - } - - bool search_in_blocks() - { - ++(this->block_idx_); - const blocks_manager_type& bman = this->bv_->blockman_; - block_idx_type i = this->block_idx_ >> bm::set_array_shift; - block_idx_type top_block_size = bman.top_block_size(); - bm::word_t*** blk_root = bman.top_blocks_root(); - for (; i < top_block_size; ++i) - { - bm::word_t** blk_blk = blk_root[i]; - if (blk_blk == 0) - { - // fast scan fwd in top level - size_type bn = this->block_idx_ + bm::set_sub_array_size; - size_type pos = this->position_ + bm::bits_in_array; - for (++i; i < top_block_size; ++i) - { - if (blk_root[i]) - break; - bn += bm::set_sub_array_size; - pos += bm::bits_in_array; - } // for i - this->block_idx_ = bn; - this->position_ = pos; - if ((i < top_block_size) && blk_root[i]) - --i; - continue; - } - if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) - blk_blk = FULL_SUB_BLOCK_REAL_ADDR; - - block_idx_type j = this->block_idx_ & bm::set_array_mask; - - for(; j < bm::set_sub_array_size; ++j, ++(this->block_idx_)) - { - this->block_ = blk_blk[j]; - - if (this->block_ == 0) - { - this->position_ += bm::bits_in_block; - continue; - } + static bool decode_wave(block_descr_type* bdescr) BMNOEXCEPT; + bool decode_bit_group(block_descr_type* bdescr) BMNOEXCEPT; + bool decode_bit_group(block_descr_type* bdescr, + size_type& rank) BMNOEXCEPT; + bool search_in_bitblock() BMNOEXCEPT; + bool search_in_gapblock() BMNOEXCEPT; + bool search_in_blocks() BMNOEXCEPT; - this->block_type_ = BM_IS_GAP(this->block_); - if (this->block_type_) - { - if (search_in_gapblock()) - return true; - } - else - { - if (this->block_ == FULL_BLOCK_FAKE_ADDR) - this->block_ = FULL_BLOCK_REAL_ADDR; - if (search_in_bitblock()) - return true; - } - } // for j - } // for i - return false; - } }; /*! @@ -1142,15 +732,14 @@ public: #ifndef BM_NO_STL typedef std::input_iterator_tag iterator_category; #endif - counted_enumerator() : bit_count_(0){} + counted_enumerator() BMNOEXCEPT : bit_count_(0){} - counted_enumerator(const enumerator& en) : enumerator(en) + counted_enumerator(const enumerator& en) BMNOEXCEPT : enumerator(en) { - if (this->valid()) - bit_count_ = 1; + bit_count_ = this->valid(); // 0 || 1 } - counted_enumerator& operator=(const enumerator& en) + counted_enumerator& operator=(const enumerator& en) BMNOEXCEPT { enumerator* me = this; *me = en; @@ -1159,11 +748,10 @@ public: return *this; } - counted_enumerator& operator++() + counted_enumerator& operator++() BMNOEXCEPT { this->go_up(); - if (this->valid()) - ++(this->bit_count_); + this->bit_count_ += this->valid(); return *this; } @@ -1171,8 +759,7 @@ public: { counted_enumerator tmp(*this); this->go_up(); - if (this->valid()) - ++bit_count_; + this->bit_count_ += this->valid(); return tmp; } @@ -1181,7 +768,7 @@ public: Method returns number of ON bits fromn the bit 0 to the current bit For the first bit in bitvector it is 1, for the second 2 */ - size_type count() const { return bit_count_; } + size_type count() const BMNOEXCEPT { return bit_count_; } private: /*! Function closed for usage */ counted_enumerator& go_to(size_type pos); @@ -1198,10 +785,10 @@ public: class mem_pool_guard { public: - mem_pool_guard() : bv_(0) + mem_pool_guard() BMNOEXCEPT : bv_(0) {} - mem_pool_guard(allocator_pool_type& pool, bvector& bv) + mem_pool_guard(allocator_pool_type& pool, bvector& bv) BMNOEXCEPT : bv_(&bv) { bv.set_allocator_pool(&pool); @@ -1213,13 +800,14 @@ public: } /// check if vector has no assigned allocator and set one - void assign_if_not_set(allocator_pool_type& pool, bvector& bv) + void assign_if_not_set(allocator_pool_type& pool, + bvector& bv) BMNOEXCEPT { - if (bv.get_allocator_pool() == 0) // alloc pool not set yet + if (!bv.get_allocator_pool()) // alloc pool not set yet { BM_ASSERT(!bv_); bv_ = &bv; - bv.set_allocator_pool(&pool); + bv_->set_allocator_pool(&pool); } } @@ -1248,7 +836,7 @@ public: const gap_word_t* glevel_len; allocation_policy(bm::strategy s=BM_BIT, - const gap_word_t* glevels = bm::gap_len_table::_len) + const gap_word_t* glevels = bm::gap_len_table::_len) BMNOEXCEPT : strat(s), glevel_len(glevels) {} }; @@ -1329,7 +917,7 @@ public: } - ~bvector() BMNOEXEPT {} + ~bvector() BMNOEXCEPT {} /*! \brief Explicit post-construction initialization */ @@ -1353,7 +941,7 @@ public: /*! \brief Move constructor */ - bvector(bvector&& bvect) BMNOEXEPT + bvector(bvector&& bvect) BMNOEXCEPT { blockman_.move_from(bvect.blockman_); size_ = bvect.size_; @@ -1380,7 +968,7 @@ public: /*! \brief Move assignment operator */ - bvector& operator=(bvector&& bvect) BMNOEXEPT + bvector& operator=(bvector&& bvect) BMNOEXCEPT { this->move_from(bvect); return *this; @@ -1389,11 +977,11 @@ public: /*! \brief Move bvector content from another bvector */ - void move_from(bvector& bvect) BMNOEXEPT; + void move_from(bvector& bvect) BMNOEXCEPT; /*! \brief Exchanges content of bv and this bvector. */ - void swap(bvector& bvect) BMNOEXEPT; + void swap(bvector& bvect) BMNOEXCEPT; /*! \brief Merge/move content from another vector @@ -1419,7 +1007,7 @@ public: return reference(*this, n); } - bool operator[](size_type n) const + bool operator[](size_type n) const BMNOEXCEPT { BM_ASSERT(n < size_); return get_bit(n); @@ -1434,25 +1022,23 @@ public: bool operator <= (const bvector& bv) const { return compare(bv)<=0; } bool operator > (const bvector& bv) const { return compare(bv)>0; } bool operator >= (const bvector& bv) const { return compare(bv) >= 0; } - bool operator == (const bvector& bv) const { return equal(bv); } - bool operator != (const bvector& bv) const { return !equal(bv); } + bool operator == (const bvector& bv) const BMNOEXCEPT { return equal(bv); } + bool operator != (const bvector& bv) const BMNOEXCEPT { return !equal(bv); } bvector operator~() const { return bvector(*this).invert(); } Alloc get_allocator() const - { - return blockman_.get_allocator(); - } + { return blockman_.get_allocator(); } - /// Set allocator pool for local (non-threaded) + /// Set allocator pool for local (non-th readed) /// memory cyclic(lots of alloc-free ops) opertations /// - void set_allocator_pool(allocator_pool_type* pool_ptr) + void set_allocator_pool(allocator_pool_type* pool_ptr) BMNOEXCEPT { blockman_.get_allocator().set_pool(pool_ptr); } /// Get curent allocator pool (if set) /// @return pointer to the current pool or NULL - allocator_pool_type* get_allocator_pool() + allocator_pool_type* get_allocator_pool() BMNOEXCEPT { return blockman_.get_allocator().get_pool(); } // -------------------------------------------------------------------- @@ -1567,6 +1153,10 @@ public: */ void set_bit_no_check(size_type n); + /** + \brief Set specified bit without checking preconditions (size, etc) + */ + bool set_bit_no_check(size_type n, bool val); /*! \brief Sets all bits in the specified closed interval [left,right] @@ -1596,9 +1186,7 @@ public: @sa set_range */ void clear_range(size_type left, size_type right) - { - set_range(left, right, false); - } + { set_range(left, right, false); } /*! @@ -1642,20 +1230,13 @@ public: \param free_mem if "true" (default) bvector frees the memory, otherwise sets blocks to 0. */ - void clear(bool free_mem = false) - { - blockman_.set_all_zero(free_mem); - } + void clear(bool free_mem = false) { blockman_.set_all_zero(free_mem); } /*! \brief Clears every bit in the bitvector. \return *this; */ - bvector& reset() - { - clear(true); - return *this; - } + bvector& reset() { clear(true); return *this; } /*! \brief Flips bit n @@ -1688,7 +1269,7 @@ public: //size_type capacity() const { return blockman_.capacity(); } /*! \brief return current size of the vector (bits) */ - size_type size() const { return size_; } + size_type size() const BMNOEXCEPT { return size_; } /*! \brief Change size of the bvector @@ -1699,15 +1280,16 @@ public: //@} // -------------------------------------------------------------------- - /*! @name Population counting and ranking methods + /*! @name Population counting, ranks, ranges and intervals */ //@{ /*! \brief population cout (count of ON bits) - \return Total number of bits ON. + \sa count_range + \return Total number of bits ON */ - size_type count() const; + size_type count() const BMNOEXCEPT; /*! \brief Computes bitcount values for all bvector blocks \param arr - pointer on array of block bit counts @@ -1715,8 +1297,9 @@ public: This number +1 gives you number of arr elements initialized during the function call. */ - block_idx_type count_blocks(unsigned* arr) const; - + block_idx_type count_blocks(unsigned* arr) const BMNOEXCEPT; + + /*! \brief Returns count of 1 bits in the given range [left..right] Uses rank-select index to accelerate the search @@ -1729,7 +1312,7 @@ public: */ size_type count_range(size_type left, size_type right, - const rs_index_type& rs_idx) const; + const rs_index_type& rs_idx) const BMNOEXCEPT; /*! \brief Returns count of 1 bits in the given range [left..right] @@ -1739,10 +1322,32 @@ public: \return population count in the diapason */ - size_type count_range(size_type left, - size_type right) const; + size_type count_range(size_type left, size_type right) const BMNOEXCEPT; + + /*! + \brief Returns true if all bits in the range are 1s (saturated interval) + Function uses closed interval [left, right] + + \param left - index of first bit start checking + \param right - index of last bit + + \return true if all bits are 1, false otherwise + @sa any_range, count_range + */ + bool is_all_one_range(size_type left, size_type right) const BMNOEXCEPT; + + /*! + \brief Returns true if any bits in the range are 1s (non-empty interval) + Function uses closed interval [left, right] + + \param left - index of first bit start checking + \param right - index of last bit + + \return true if at least 1 bits is set + @sa is_all_one_range, count_range + */ + bool any_range(size_type left, size_type right) const BMNOEXCEPT; - /*! \brief compute running total of all blocks in bit vector (rank-select index) \param rs_idx - [out] pointer to index / count structure @@ -1762,23 +1367,41 @@ public: should be prepared using build_rs_index \return population count in the range [0..n] \sa build_rs_index - \sa count_to_test, select, rank + \sa count_to_test, select, rank, rank_corrected */ - size_type count_to(size_type n, const rs_index_type& rs_idx) const; + size_type count_to(size_type n, + const rs_index_type& rs_idx) const BMNOEXCEPT; /*! - \brief Returns rank of specified bit position + \brief Returns rank of specified bit position (same as count_to()) \param n - index of bit to rank \param rs_idx - rank-select index \return population count in the range [0..n] \sa build_rs_index - \sa count_to_test, select, rank + \sa count_to_test, select, rank, rank_corrected */ - size_type rank(size_type n, const rs_index_type& rs_idx) const - { return count_to(n, rs_idx); } + size_type rank(size_type n, + const rs_index_type& rs_idx) const BMNOEXCEPT + { return count_to(n, rs_idx); } + /*! + \brief Returns rank corrceted by the requested border value (as -1) + + This is rank function (bit-count) minus value of bit 'n' + if bit-n is true function returns rank()-1 if false returns rank() + faster than rank() + test(). + + + \param n - index of bit to rank + \param rs_idx - rank-select index + \return population count in the range [0..n] corrected as -1 by the value of n + \sa build_rs_index + \sa count_to_test, select, rank + */ + size_type rank_corrected(size_type n, + const rs_index_type& rs_idx) const BMNOEXCEPT; /*! \brief popcount in [0..right] range if test(right) == true @@ -1787,25 +1410,28 @@ public: plus count_to() \param n - index of bit to test and rank - \param blocks_cnt - block count structure to accelerate search - should be prepared using running_count_blocks + \param rs_idx - rank-select index + (block count structure to accelerate search) + should be prepared using build_rs_index() \return population count in the diapason or 0 if right bit test failed \sa build_rs_index \sa count_to */ - size_type count_to_test(size_type n, const rs_index_type& blocks_cnt) const; + size_type + count_to_test(size_type n, + const rs_index_type& rs_idx) const BMNOEXCEPT; /*! Recalculate bitcount (deprecated) */ - size_type recalc_count() { return count(); } + size_type recalc_count() BMNOEXCEPT { return count(); } /*! Disables count cache. (deprecated). */ - void forget_count() {} + void forget_count() BMNOEXCEPT {} //@} @@ -1818,14 +1444,14 @@ public: \param n - Index of the bit to check. \return Bit value (1 or 0) */ - bool get_bit(size_type n) const; + bool get_bit(size_type n) const BMNOEXCEPT; /*! \brief returns true if bit n is set and false is bit n is 0. \param n - Index of the bit to check. \return Bit value (1 or 0) */ - bool test(size_type n) const { return get_bit(n); } + bool test(size_type n) const BMNOEXCEPT { return get_bit(n); } //@} @@ -1874,12 +1500,12 @@ public: \brief Returns true if any bits in this bitset are set, and otherwise returns false. \return true if any bit is set */ - bool any() const; + bool any() const BMNOEXCEPT; /*! \brief Returns true if no bits are set, otherwise returns false. */ - bool none() const { return !any(); } + bool none() const BMNOEXCEPT { return !any(); } //@} // -------------------------------------------------------------------- @@ -1890,21 +1516,22 @@ public: /*! \fn bool bvector::find(bm::id_t& pos) const \brief Finds index of first 1 bit - \param pos - index of the found 1 bit + \param pos - [out] index of the found 1 bit \return true if search returned result \sa get_first, get_next, extract_next, find_reverse, find_first_mismatch */ - bool find(size_type& pos) const; + bool find(size_type& pos) const BMNOEXCEPT; /*! \fn bool bvector::find(bm::id_t from, bm::id_t& pos) const - \brief Finds index of 1 bit starting from position + \brief Find index of 1 bit starting from position \param from - position to start search from - \param pos - index of the found 1 bit + \param pos - [out] index of the found 1 bit \return true if search returned result \sa get_first, get_next, extract_next, find_reverse, find_first_mismatch */ - bool find(size_type from, size_type& pos) const; + bool find(size_type from, size_type& pos) const BMNOEXCEPT; + /*! \fn bm::id_t bvector::get_first() const @@ -1915,7 +1542,7 @@ public: \return Index of the first 1 bit, may return 0 \sa get_next, find, extract_next, find_reverse */ - size_type get_first() const { return check_or_next(0); } + size_type get_first() const BMNOEXCEPT { return check_or_next(0); } /*! \fn bm::id_t bvector::get_next(bm::id_t prev) const @@ -1924,7 +1551,7 @@ public: \return Index of the next bit which is ON or 0 if not found. \sa get_first, find, extract_next, find_reverse */ - size_type get_next(size_type prev) const + size_type get_next(size_type prev) const BMNOEXCEPT { return (++prev == bm::id_max) ? 0 : check_or_next(prev); } /*! @@ -1945,7 +1572,7 @@ public: \return true if search returned result \sa get_first, get_next, extract_next, find, find_first_mismatch */ - bool find_reverse(size_type& pos) const; + bool find_reverse(size_type& pos) const BMNOEXCEPT; /*! \brief Finds dynamic range of bit-vector [first, last] @@ -1954,7 +1581,7 @@ public: \return true if search returned result \sa get_first, get_next, extract_next, find, find_reverse */ - bool find_range(size_type& first, size_type& last) const; + bool find_range(size_type& first, size_type& last) const BMNOEXCEPT; /*! \brief Find bit-vector position for the specified rank(bitcount) @@ -1969,7 +1596,8 @@ public: \return true if requested rank was found */ - bool find_rank(size_type rank, size_type from, size_type& pos) const; + bool find_rank(size_type rank, size_type from, + size_type& pos) const BMNOEXCEPT; /*! \brief Find bit-vector position for the specified rank(bitcount) @@ -1989,7 +1617,7 @@ public: \return true if requested rank was found */ bool find_rank(size_type rank, size_type from, size_type& pos, - const rs_index_type& rs_idx) const; + const rs_index_type& rs_idx) const BMNOEXCEPT; /*! \brief select bit-vector position for the specified rank(bitcount) @@ -2007,7 +1635,8 @@ public: \return true if requested rank was found */ - bool select(size_type rank, size_type& pos, const rs_index_type& rs_idx) const; + bool select(size_type rank, size_type& pos, + const rs_index_type& rs_idx) const BMNOEXCEPT; //@} @@ -2185,7 +1814,7 @@ public: @sa statistics */ - void calc_stat(struct bm::bvector::statistics* st) const; + void calc_stat(struct bm::bvector::statistics* st) const BMNOEXCEPT; /*! \brief Sets new blocks allocation strategy. @@ -2200,7 +1829,8 @@ public: 1 - Blocks mutation mode (adaptive algorithm) \sa set_new_blocks_strat */ - strategy get_new_blocks_strat() const { return new_blocks_strat_; } + strategy get_new_blocks_strat() const BMNOEXCEPT + { return new_blocks_strat_; } /*! \brief Optimize memory bitvector's memory allocation. @@ -2239,7 +1869,7 @@ public: Return true if bvector is initialized at all @internal */ - bool is_init() const { return blockman_.is_init(); } + bool is_init() const BMNOEXCEPT { return blockman_.is_init(); } //@} @@ -2258,13 +1888,13 @@ public: @return 0 if this == arg, -1 if this < arg, 1 if this > arg @sa find_first_mismatch */ - int compare(const bvector& bvect) const; + int compare(const bvector& bvect) const BMNOEXCEPT; /*! \brief Equal comparison with an agr bit-vector @return true if vectors are identical */ - bool equal(const bvector& bvect) const + bool equal(const bvector& bvect) const BMNOEXCEPT { size_type pos; bool found = find_first_mismatch(bvect, pos); @@ -2285,7 +1915,7 @@ public: bool find_first_mismatch(const bvector& bvect, size_type& pos, size_type search_to = bm::id_max - ) const; + ) const BMNOEXCEPT; //@} @@ -2305,14 +1935,16 @@ public: Use only if you are BitMagic library @internal */ - const blocks_manager_type& get_blocks_manager() const { return blockman_; } + const blocks_manager_type& get_blocks_manager() const BMNOEXCEPT + { return blockman_; } /** \brief get access to memory manager (internal) Use only if you are BitMagic library @internal */ - blocks_manager_type& get_blocks_manager() { return blockman_; } + blocks_manager_type& get_blocks_manager() BMNOEXCEPT + { return blockman_; } //@} @@ -2338,21 +1970,22 @@ protected: private: - size_type check_or_next(size_type prev) const; + size_type check_or_next(size_type prev) const BMNOEXCEPT; - /// set bit in GAP block withlength extension control + /// set bit in GAP block with GAP block length control bool gap_block_set(bm::gap_word_t* gap_blk, bool val, block_idx_type nblock, unsigned nbit); - + + /// set bit in GAP block with GAP block length control + void gap_block_set_no_ret(bm::gap_word_t* gap_blk, + bool val, block_idx_type nblock, + unsigned nbit); + /// check if specified bit is 1, and set it to 0 /// if specified bit is 0, scan for the next 1 and returns it /// if no 1 found returns 0 size_type check_or_next_extract(size_type prev); - /** - \brief Set specified bit without checking preconditions (size, etc) - */ - bool set_bit_no_check(size_type n, bool val); /** \brief AND specified bit without checking preconditions (size, etc) @@ -2440,11 +2073,11 @@ private: size_type block_count_to(const bm::word_t* block, block_idx_type nb, unsigned nbit_right, - const rs_index_type& blocks_cnt); + const rs_index_type& blocks_cnt) BMNOEXCEPT; /** Return value of first bit in the block */ - bool test_first_block_bit(block_idx_type nb) const; + bool test_first_block_bit(block_idx_type nb) const BMNOEXCEPT; private: blocks_manager_type blockman_; //!< bitblocks manager @@ -2510,7 +2143,7 @@ void bvector::init() // ----------------------------------------------------------------------- template -void bvector::move_from(bvector& bvect) BMNOEXEPT +void bvector::move_from(bvector& bvect) BMNOEXCEPT { if (this != &bvect) { @@ -2572,7 +2205,7 @@ bvector& bvector::set_range(size_type left, // ----------------------------------------------------------------------- template -typename bvector::size_type bvector::count() const +typename bvector::size_type bvector::count() const BMNOEXCEPT { if (!blockman_.is_init()) return 0; @@ -2592,6 +2225,9 @@ typename bvector::size_type bvector::count() const if (!found) break; blk_blk = blk_root[i]; + BM_ASSERT(blk_blk); + if (!blk_blk) + break; } if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) { @@ -2619,7 +2255,7 @@ typename bvector::size_type bvector::count() const // ----------------------------------------------------------------------- template -bool bvector::any() const +bool bvector::any() const BMNOEXCEPT { word_t*** blk_root = blockman_.top_blocks_root(); if (!blk_root) @@ -2780,7 +2416,7 @@ void bvector::build_rs_index(rs_index_type* rs_idx, template typename bvector::block_idx_type -bvector::count_blocks(unsigned* arr) const +bvector::count_blocks(unsigned* arr) const BMNOEXCEPT { bm::word_t*** blk_root = blockman_.top_blocks_root(); if (blk_root == 0) @@ -2797,7 +2433,7 @@ typename bvector::size_type bvector::block_count_to(const bm::word_t* block, block_idx_type nb, unsigned nbit_right, - const rs_index_type& rs_idx) + const rs_index_type& rs_idx) BMNOEXCEPT { size_type c; unsigned sub_range = rs_idx.find_sub_range(nbit_right); @@ -2909,7 +2545,7 @@ bvector::block_count_to(const bm::word_t* block, template typename bvector::size_type bvector::count_to(size_type right, - const rs_index_type& rs_idx) const + const rs_index_type& rs_idx) const BMNOEXCEPT { BM_ASSERT(right < bm::id_max); if (!blockman_.is_init()) @@ -2963,7 +2599,7 @@ bvector::count_to(size_type right, template typename bvector::size_type bvector::count_to_test(size_type right, - const rs_index_type& blocks_cnt) const + const rs_index_type& rs_idx) const BMNOEXCEPT { BM_ASSERT(right < bm::id_max); if (!blockman_.is_init()) @@ -2972,15 +2608,13 @@ bvector::count_to_test(size_type right, unsigned nblock_right = unsigned(right >> bm::set_block_shift); unsigned nbit_right = unsigned(right & bm::set_block_mask); - // running count of all blocks before target - // - size_type cnt = 0; unsigned i, j; bm::get_block_coord(nblock_right, i, j); const bm::word_t* block = blockman_.get_block_ptr(i, j); + size_type cnt = 0; if (!block) - return 0; + return cnt; bool gap = BM_IS_GAP(block); if (gap) @@ -2989,7 +2623,7 @@ bvector::count_to_test(size_type right, if (bm::gap_test_unr(gap_blk, (gap_word_t)nbit_right)) cnt = bm::gap_bit_count_to(gap_blk, (gap_word_t)nbit_right); else - return 0; + return cnt; } else { @@ -3004,14 +2638,16 @@ bvector::count_to_test(size_type right, w &= (1u << (nbit_right & bm::set_word_mask)); if (w) { - cnt = block_count_to(block, nblock_right, nbit_right, blocks_cnt); + cnt = block_count_to(block, nblock_right, nbit_right, rs_idx); BM_ASSERT(cnt == bm::bit_block_calc_count_to(block, nbit_right)); } else - return 0; + { + return cnt; + } } } - cnt += nblock_right ? blocks_cnt.rcount(nblock_right - 1) : 0; + cnt += nblock_right ? rs_idx.rcount(nblock_right - 1) : 0; return cnt; } @@ -3019,22 +2655,67 @@ bvector::count_to_test(size_type right, template typename bvector::size_type -bvector::count_range(size_type left, size_type right) const +bvector::rank_corrected(size_type right, + const rs_index_type& rs_idx) const BMNOEXCEPT +{ + BM_ASSERT(right < bm::id_max); + if (!blockman_.is_init()) + return 0; + + unsigned nblock_right = unsigned(right >> bm::set_block_shift); + unsigned nbit_right = unsigned(right & bm::set_block_mask); + + size_type cnt = nblock_right ? rs_idx.rcount(nblock_right - 1) : 0; + + unsigned i, j; + bm::get_block_coord(nblock_right, i, j); + const bm::word_t* block = blockman_.get_block_ptr(i, j); + + if (!block) + return cnt; + + bool gap = BM_IS_GAP(block); + if (gap) + { + cnt += bm::gap_bit_count_to(BMGAP_PTR(block), (gap_word_t)nbit_right, + true /* rank corrected */); + } + else + { + if (block == FULL_BLOCK_FAKE_ADDR) + cnt += nbit_right; + else + { + cnt += block_count_to(block, nblock_right, nbit_right, rs_idx); + unsigned w = block[nbit_right >> bm::set_word_shift] & + (1u << (nbit_right & bm::set_word_mask)); + cnt -= bool(w); // rank correction + } + } + return cnt; +} + + +// ----------------------------------------------------------------------- + +template +typename bvector::size_type +bvector::count_range(size_type left, size_type right) const BMNOEXCEPT { BM_ASSERT(left < bm::id_max && right < bm::id_max); - BM_ASSERT(left <= right); + if (left > right) + bm::xor_swap(left, right); + if (right == bm::id_max) + --right; - BM_ASSERT_THROW(right < bm::id_max, BM_ERR_RANGE); - BM_ASSERT_THROW(left <= right, BM_ERR_RANGE); - if (!blockman_.is_init()) return 0; size_type cnt = 0; // calculate logical number of start and destination blocks - unsigned nblock_left = unsigned(left >> bm::set_block_shift); - unsigned nblock_right = unsigned(right >> bm::set_block_shift); + block_idx_type nblock_left = (left >> bm::set_block_shift); + block_idx_type nblock_right = (right >> bm::set_block_shift); unsigned i0, j0; bm::get_block_coord(nblock_left, i0, j0); @@ -3076,13 +2757,15 @@ bvector::count_range(size_type left, size_type right) const { return cnt; } - + + // process all full mid-blocks { func.reset(); word_t*** blk_root = blockman_.top_blocks_root(); - unsigned top_blocks_size = blockman_.top_block_size(); + block_idx_type top_blocks_size = blockman_.top_block_size(); - bm::for_each_nzblock_range(blk_root, top_blocks_size, nblock_left+1, nblock_right-1, func); + bm::for_each_nzblock_range(blk_root, top_blocks_size, + nblock_left+1, nblock_right-1, func); cnt += func.count(); } @@ -3098,27 +2781,205 @@ bvector::count_range(size_type left, size_type right) const (gap_word_t)0, (gap_word_t)nbit_right); } - else + else + { + cnt += bm::bit_block_calc_count_range(block, 0, nbit_right); + } + } + return cnt; +} + +// ----------------------------------------------------------------------- + +template +bool bvector::is_all_one_range(size_type left, + size_type right) const BMNOEXCEPT +{ + if (!blockman_.is_init()) + return false; // nothing to do + + if (right < left) + bm::xor_swap(left, right); + if (right == bm::id_max) + --right; + if (left == right) + return test(left); + + BM_ASSERT(left < bm::id_max && right < bm::id_max); + + block_idx_type nblock_left = (left >> bm::set_block_shift); + block_idx_type nblock_right = (right >> bm::set_block_shift); + + unsigned i0, j0; + bm::get_block_coord(nblock_left, i0, j0); + const bm::word_t* block = blockman_.get_block(i0, j0); + + if (nblock_left == nblock_right) // hit in the same block + { + unsigned nbit_left = unsigned(left & bm::set_block_mask); + unsigned nbit_right = unsigned(right & bm::set_block_mask); + return bm::block_is_all_one_range(block, nbit_left, nbit_right); + } + + // process entry point block + { + unsigned nbit_left = unsigned(left & bm::set_block_mask); + bool all_one = bm::block_is_all_one_range(block, + nbit_left, (bm::gap_max_bits-1)); + if (!all_one) + return all_one; + ++nblock_left; + } + + // process tail block + { + bm::get_block_coord(nblock_right, i0, j0); + block = blockman_.get_block(i0, j0); + unsigned nbit_right = unsigned(right & bm::set_block_mask); + bool all_one = bm::block_is_all_one_range(block, 0, nbit_right); + if (!all_one) + return all_one; + --nblock_right; + } + + // check all blocks in the middle + // + if (nblock_left <= nblock_right) + { + unsigned i_from, j_from, i_to, j_to; + bm::get_block_coord(nblock_left, i_from, j_from); + bm::get_block_coord(nblock_right, i_to, j_to); + + bm::word_t*** blk_root = blockman_.top_blocks_root(); + + for (unsigned i = i_from; i <= i_to; ++i) + { + bm::word_t** blk_blk = blk_root[i]; + if (!blk_blk) + return false; + if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) + continue; + + unsigned j = (i == i_from) ? j_from : 0; + unsigned j_limit = (i == i_to) ? j_to+1 : bm::set_sub_array_size; + do + { + bool all_one = bm::check_block_one(blk_blk[j], true); + if (!all_one) + return all_one; + } while (++j < j_limit); + } // for i + } + return true; +} + +// ----------------------------------------------------------------------- + +template +bool bvector::any_range(size_type left, size_type right) const BMNOEXCEPT +{ + BM_ASSERT(left < bm::id_max && right < bm::id_max); + + if (!blockman_.is_init()) + return false; // nothing to do + + if (right < left) + bm::xor_swap(left, right); + if (right == bm::id_max) + --right; + if (left == right) + return test(left); + + block_idx_type nblock_left = (left >> bm::set_block_shift); + block_idx_type nblock_right = (right >> bm::set_block_shift); + + unsigned i0, j0; + bm::get_block_coord(nblock_left, i0, j0); + const bm::word_t* block = blockman_.get_block(i0, j0); + + if (nblock_left == nblock_right) // hit in the same block + { + unsigned nbit_left = unsigned(left & bm::set_block_mask); + unsigned nbit_right = unsigned(right & bm::set_block_mask); + return bm::block_any_range(block, nbit_left, nbit_right); + } + + // process entry point block + { + unsigned nbit_left = unsigned(left & bm::set_block_mask); + bool any_one = bm::block_any_range(block, + nbit_left, (bm::gap_max_bits-1)); + if (any_one) + return any_one; + ++nblock_left; + } + + // process tail block + { + bm::get_block_coord(nblock_right, i0, j0); + block = blockman_.get_block(i0, j0); + unsigned nbit_right = unsigned(right & bm::set_block_mask); + bool any_one = bm::block_any_range(block, 0, nbit_right); + if (any_one) + return any_one; + --nblock_right; + } + + // check all blocks in the middle + // + if (nblock_left <= nblock_right) + { + unsigned i_from, j_from, i_to, j_to; + bm::get_block_coord(nblock_left, i_from, j_from); + bm::get_block_coord(nblock_right, i_to, j_to); + + bm::word_t*** blk_root = blockman_.top_blocks_root(); + { + block_idx_type top_size = blockman_.top_block_size(); + if (i_from >= top_size) + return false; + if (i_to >= top_size) + { + i_to = unsigned(top_size-1); + j_to = bm::set_sub_array_size-1; + } + } + + for (unsigned i = i_from; i <= i_to; ++i) { - cnt += bm::bit_block_calc_count_range(block, 0, nbit_right); - } + bm::word_t** blk_blk = blk_root[i]; + if (!blk_blk) + continue; + if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) + return true; + + unsigned j = (i == i_from) ? j_from : 0; + unsigned j_limit = (i == i_to) ? j_to+1 : bm::set_sub_array_size; + do + { + bool any_one = bm::block_any(blk_blk[j]); + if (any_one) + return any_one; + } while (++j < j_limit); + } // for i } - return cnt; + return false; } - // ----------------------------------------------------------------------- template typename bvector::size_type bvector::count_range(size_type left, size_type right, - const rs_index_type& rs_idx) const + const rs_index_type& rs_idx) const BMNOEXCEPT { BM_ASSERT(left <= right); + if (left > right) + bm::xor_swap(left, right); + BM_ASSERT_THROW(right < bm::id_max, BM_ERR_RANGE); - BM_ASSERT_THROW(left <= right, BM_ERR_RANGE); if (left == right) return this->test(left); @@ -3187,7 +3048,7 @@ bvector& bvector::invert() // ----------------------------------------------------------------------- template -bool bvector::get_bit(size_type n) const +bool bvector::get_bit(size_type n) const BMNOEXCEPT { BM_ASSERT(n < size_); BM_ASSERT_THROW((n < size_), BM_ERR_RANGE); @@ -3309,7 +3170,7 @@ void bvector::set_gap_levels(const gap_word_t* glevel_len) // ----------------------------------------------------------------------- template -int bvector::compare(const bvector& bv) const +int bvector::compare(const bvector& bv) const BMNOEXCEPT { int res; unsigned top_blocks = blockman_.top_block_size(); @@ -3429,7 +3290,7 @@ int bvector::compare(const bvector& bv) const template bool bvector::find_first_mismatch( const bvector& bvect, size_type& pos, - size_type search_to) const + size_type search_to) const BMNOEXCEPT { unsigned top_blocks = blockman_.top_block_size(); bm::word_t*** top_root = blockman_.top_blocks_root(); @@ -3531,7 +3392,7 @@ bool bvector::find_first_mismatch( // ----------------------------------------------------------------------- template -void bvector::swap(bvector& bvect) BMNOEXEPT +void bvector::swap(bvector& bvect) BMNOEXCEPT { if (this != &bvect) { @@ -3543,7 +3404,8 @@ void bvector::swap(bvector& bvect) BMNOEXEPT // ----------------------------------------------------------------------- template -void bvector::calc_stat(struct bvector::statistics* st) const +void bvector::calc_stat( + struct bvector::statistics* st) const BMNOEXCEPT { BM_ASSERT(st); @@ -3572,6 +3434,9 @@ void bvector::calc_stat(struct bvector::statistics* st) const if (!found) break; blk_blk = blk_root[i]; + BM_ASSERT(blk_blk); + if (!blk_blk) + break; } if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) continue; @@ -3636,8 +3501,7 @@ void bvector::set_bit_no_check(size_type n) if (block_type) // gap block { - bm::gap_word_t* gap_blk = BMGAP_PTR(blk); - gap_block_set(gap_blk, val, nblock, nbit); + this->gap_block_set_no_ret(BMGAP_PTR(blk), val, nblock, nbit); } else // bit block { @@ -3798,7 +3662,10 @@ void bvector::import(const size_type* ids, size_type size_in, block_idx_type nblock_end = (ids[size_in-1] >> bm::set_block_shift); if (nblock == nblock_end) // special case: one block import { - import_block(ids, nblock, 0, stop); + if (stop == 1) + set_bit_no_check(ids[0]); + else + import_block(ids, nblock, 0, stop); return; } } @@ -3817,7 +3684,11 @@ void bvector::import(const size_type* ids, size_type size_in, stop = bm::idx_arr_block_lookup_u32(ids, size_in, nblock, start); #endif BM_ASSERT(start < stop); - import_block(ids, nblock, start, stop); + + if (stop - start == 1 && n < bm::id_max) // just one bit to set + set_bit_no_check(n); + else + import_block(ids, nblock, start, stop); start = stop; } while (start < size_in); } @@ -3826,17 +3697,22 @@ void bvector::import(const size_type* ids, size_type size_in, template void bvector::import_block(const size_type* ids, - block_idx_type nblock, - size_type start, size_type stop) + block_idx_type nblock, + size_type start, + size_type stop) { + BM_ASSERT(stop > start); int block_type; bm::word_t* blk = - blockman_.check_allocate_block(nblock, 1, 0, &block_type, true/*allow NULL ret*/); + blockman_.check_allocate_block(nblock, 1, 0, &block_type, + true/*allow NULL ret*/); if (!IS_FULL_BLOCK(blk)) { + // TODO: add a special case when we import just a few bits per block if (BM_IS_GAP(blk)) + { blk = blockman_.deoptimize_block(nblock); // TODO: try to avoid - + } #ifdef BM64ADDR bm::set_block_bits_u64(blk, ids, start, stop); #else @@ -3867,62 +3743,71 @@ bool bvector::set_bit_no_check(size_type n, bool val) return false; // calculate word number in block and bit - unsigned nbit = unsigned(n & bm::set_block_mask); - + unsigned nbit = unsigned(n & bm::set_block_mask); if (block_type) // gap { - bm::gap_word_t* gap_blk = BMGAP_PTR(blk); - unsigned is_set = gap_block_set(gap_blk, val, nblock, nbit); - return is_set; + return gap_block_set(BMGAP_PTR(blk), val, nblock, nbit); } else // bit block { unsigned nword = unsigned(nbit >> bm::set_word_shift); nbit &= bm::set_word_mask; - bm::word_t* word = blk + nword; bm::word_t mask = (((bm::word_t)1) << nbit); if (val) { - if ( ((*word) & mask) == 0 ) - { - *word |= mask; // set bit - return true; - } + val = ~(*word & mask); + *word |= mask; // set bit + return val; } else { - if ((*word) & mask) - { - *word &= ~mask; // clear bit - return true; - } + val = ~(*word & mask); + *word &= ~mask; // clear bit + return val; } } - return false; + //return false; } // ----------------------------------------------------------------------- template bool bvector::gap_block_set(bm::gap_word_t* gap_blk, - bool val, block_idx_type nblock, unsigned nbit) + bool val, block_idx_type nblock, + unsigned nbit) { - unsigned is_set, new_block_len; - new_block_len = - bm::gap_set_value(val, gap_blk, nbit, &is_set); - if (is_set) + unsigned is_set, new_len, old_len; + old_len = bm::gap_length(gap_blk)-1; + new_len = bm::gap_set_value(val, gap_blk, nbit, &is_set); + if (old_len < new_len) { unsigned threshold = bm::gap_limit(gap_blk, blockman_.glen()); - if (new_block_len > threshold) - { + if (new_len > threshold) blockman_.extend_gap_block(nblock, gap_blk); - } } return is_set; } +// ----------------------------------------------------------------------- + +template +void bvector::gap_block_set_no_ret(bm::gap_word_t* gap_blk, + bool val, block_idx_type nblock, unsigned nbit) +{ + unsigned new_len, old_len; + old_len = bm::gap_length(gap_blk)-1; + new_len = bm::gap_set_value(val, gap_blk, nbit); + if (old_len < new_len) + { + unsigned threshold = bm::gap_limit(gap_blk, blockman_.glen()); + if (new_len > threshold) + blockman_.extend_gap_block(nblock, gap_blk); + } +} + + // ----------------------------------------------------------------------- template @@ -4089,11 +3974,11 @@ bool bvector::and_bit_no_check(size_type n, bool val) //--------------------------------------------------------------------- template -bool bvector::find(size_type from, size_type& pos) const +bool bvector::find(size_type from, size_type& pos) const BMNOEXCEPT { - BM_ASSERT_THROW(from < bm::id_max, BM_ERR_RANGE); - - if (from == 0) + if (from == bm::id_max) + return false; + if (!from) { return find(pos); } @@ -4104,7 +3989,7 @@ bool bvector::find(size_type from, size_type& pos) const //--------------------------------------------------------------------- template -bool bvector::find_reverse(size_type& pos) const +bool bvector::find_reverse(size_type& pos) const BMNOEXCEPT { bool found; @@ -4138,7 +4023,9 @@ bool bvector::find_reverse(size_type& pos) const } if (found) { - block_idx_type base_idx = block_idx_type(i) * bm::set_sub_array_size * bm::gap_max_bits; + block_idx_type base_idx = + block_idx_type(i) * bm::set_sub_array_size * + bm::gap_max_bits; base_idx += j * bm::gap_max_bits; pos = base_idx + block_pos; return found; @@ -4159,7 +4046,7 @@ bool bvector::find_reverse(size_type& pos) const //--------------------------------------------------------------------- template -bool bvector::find(size_type& pos) const +bool bvector::find(size_type& pos) const BMNOEXCEPT { bool found; @@ -4205,7 +4092,8 @@ bool bvector::find(size_type& pos) const //--------------------------------------------------------------------- template -bool bvector::find_range(size_type& in_first, size_type& in_last) const +bool bvector::find_range(size_type& in_first, + size_type& in_last) const BMNOEXCEPT { bool found = find(in_first); if (found) @@ -4226,7 +4114,7 @@ bool bvector::find_range(size_type& in_first, size_type& in_last) const template bool bvector::find_rank(size_type rank_in, size_type from, - size_type& pos) const + size_type& pos) const BMNOEXCEPT { BM_ASSERT_THROW(from < bm::id_max, BM_ERR_RANGE); @@ -4280,7 +4168,7 @@ template bool bvector::find_rank(size_type rank_in, size_type from, size_type& pos, - const rs_index_type& rs_idx) const + const rs_index_type& rs_idx) const BMNOEXCEPT { BM_ASSERT_THROW(from < bm::id_max, BM_ERR_RANGE); @@ -4349,7 +4237,7 @@ bool bvector::find_rank(size_type rank_in, template bool bvector::select(size_type rank_in, size_type& pos, - const rs_index_type& rs_idx) const + const rs_index_type& rs_idx) const BMNOEXCEPT { bool ret = false; @@ -4385,7 +4273,7 @@ bool bvector::select(size_type rank_in, size_type& pos, template typename bvector::size_type -bvector::check_or_next(size_type prev) const +bvector::check_or_next(size_type prev) const BMNOEXCEPT { if (!blockman_.is_init()) return 0; @@ -4836,7 +4724,7 @@ void bvector::erase(size_type n) //--------------------------------------------------------------------- template -bool bvector::test_first_block_bit(block_idx_type nb) const +bool bvector::test_first_block_bit(block_idx_type nb) const BMNOEXCEPT { if (nb >= bm::set_total_blocks) // last possible block return false; @@ -6529,7 +6417,10 @@ bvector::combine_operation_with_block(block_idx_type nb, BM_ASSERT(gfunc); (*gfunc)(blk, BMGAP_PTR(arg_blk)); - blockman_.optimize_bit_block(nb); + // TODO: commented out optimization, because it can be very slow + // need to take into account previous operation not to make + // fruitless attempts here + //blockman_.optimize_bit_block(nb); return; } @@ -6860,7 +6751,468 @@ void bvector::throw_bad_alloc() } //--------------------------------------------------------------------- +// +//--------------------------------------------------------------------- + +template +bool bvector::enumerator::go_up() BMNOEXCEPT +{ + BM_ASSERT(this->valid()); + + block_descr_type* bdescr = &(this->bdescr_); + if (this->block_type_) // GAP + { + BM_ASSERT(this->block_type_ == 1); + ++this->position_; + if (--(bdescr->gap_.gap_len)) + return true; + // next gap is "OFF" by definition. + if (*(bdescr->gap_.ptr) != bm::gap_max_bits - 1) + { + gap_word_t prev = *(bdescr->gap_.ptr); + unsigned val = *(++(bdescr->gap_.ptr)); + this->position_ += val - prev; + // next gap is now "ON" + if (*(bdescr->gap_.ptr) != bm::gap_max_bits - 1) + { + prev = *(bdescr->gap_.ptr); + val = *(++(bdescr->gap_.ptr)); + bdescr->gap_.gap_len = (gap_word_t)(val - prev); + return true; // next "ON" found; + } + } + } + else // BIT + { + unsigned short idx = ++(bdescr->bit_.idx); + if (idx < bdescr->bit_.cnt) + { + this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx]; + return true; + } + this->position_ += + (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx]; + bdescr->bit_.ptr += bm::set_bitscan_wave_size; + if (decode_bit_group(bdescr)) + return true; + } + + if (search_in_blocks()) + return true; + + this->invalidate(); + return false; +} + +//--------------------------------------------------------------------- + + +template +bool bvector::enumerator::skip(size_type rank) BMNOEXCEPT +{ + if (!this->valid()) + return false; + if (!rank) + return this->valid(); // nothing to do + + for (; rank; --rank) + { + block_descr_type* bdescr = &(this->bdescr_); + switch (this->block_type_) + { + case 0: // BitBlock + for (; rank; --rank) + { + unsigned short idx = ++(bdescr->bit_.idx); + if (idx < bdescr->bit_.cnt) + { + this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx]; + continue; + } + this->position_ += + (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx]; + bdescr->bit_.ptr += bm::set_bitscan_wave_size; + + if (!decode_bit_group(bdescr, rank)) + break; + } // for rank + break; + case 1: // DGAP Block + for (; rank; --rank) // TODO: better skip logic + { + ++this->position_; + if (--(bdescr->gap_.gap_len)) + continue; + + // next gap is "OFF" by definition. + if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1) + break; + gap_word_t prev = *(bdescr->gap_.ptr); + unsigned int val = *(++(bdescr->gap_.ptr)); + + this->position_ += val - prev; + // next gap is now "ON" + if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1) + break; + prev = *(bdescr->gap_.ptr); + val = *(++(bdescr->gap_.ptr)); + bdescr->gap_.gap_len = (gap_word_t)(val - prev); + } // for rank + break; + default: + BM_ASSERT(0); + } // switch + + if (!rank) + return true; + + if (!search_in_blocks()) + { + this->invalidate(); + return false; + } + } // for rank + + return this->valid(); +} + + +//--------------------------------------------------------------------- + + +template +bool bvector::enumerator::go_to(size_type pos) BMNOEXCEPT +{ + if (pos == 0) + { + go_first(); + return this->valid(); + } + + size_type new_pos = this->bv_->check_or_next(pos); // find the true pos + if (!new_pos) // no bits available + { + this->invalidate(); + return false; + } + BM_ASSERT(new_pos >= pos); + pos = new_pos; + + + this->position_ = pos; + size_type nb = this->block_idx_ = (pos >> bm::set_block_shift); + bm::bvector::blocks_manager_type& bman = + this->bv_->get_blocks_manager(); + unsigned i0, j0; + bm::get_block_coord(nb, i0, j0); + this->block_ = bman.get_block(i0, j0); + + BM_ASSERT(this->block_); + + this->block_type_ = (bool)BM_IS_GAP(this->block_); + + block_descr_type* bdescr = &(this->bdescr_); + unsigned nbit = unsigned(pos & bm::set_block_mask); + + if (this->block_type_) // gap + { + this->position_ = nb * bm::set_block_size * 32; + search_in_gapblock(); + + if (this->position_ == pos) + return this->valid(); + this->position_ = pos; + + gap_word_t* gptr = BMGAP_PTR(this->block_); + unsigned is_set; + unsigned gpos = bm::gap_bfind(gptr, nbit, &is_set); + BM_ASSERT(is_set); + + bdescr->gap_.ptr = gptr + gpos; + if (gpos == 1) + { + bdescr->gap_.gap_len = bm::gap_word_t(gptr[gpos] - (nbit - 1)); + } + else + { + bm::gap_word_t interval = bm::gap_word_t(gptr[gpos] - gptr[gpos - 1]); + bm::gap_word_t interval2 = bm::gap_word_t(nbit - gptr[gpos - 1]); + bdescr->gap_.gap_len = bm::gap_word_t(interval - interval2 + 1); + } + } + else // bit + { + if (nbit == 0) + { + search_in_bitblock(); + return this->valid(); + } + + unsigned nword = unsigned(nbit >> bm::set_word_shift); + + // check if we need to step back to match the wave + unsigned parity = nword % bm::set_bitscan_wave_size; + bdescr->bit_.ptr = this->block_ + (nword - parity); + bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits); + BM_ASSERT(bdescr->bit_.cnt); + bdescr->bit_.pos = (nb * bm::set_block_size * 32) + ((nword - parity) * 32); + bdescr->bit_.idx = 0; + nbit &= bm::set_word_mask; + nbit += 32 * parity; + for (unsigned i = 0; i < bdescr->bit_.cnt; ++i) + { + if (bdescr->bit_.bits[i] == nbit) + return this->valid(); + bdescr->bit_.idx++; + } // for + BM_ASSERT(0); + } + return this->valid(); +} + +//--------------------------------------------------------------------- + +template +void bvector::enumerator::go_first() BMNOEXCEPT +{ + BM_ASSERT(this->bv_); + + blocks_manager_type* bman = &(this->bv_->blockman_); + if (!bman->is_init()) + { + this->invalidate(); + return; + } + + bm::word_t*** blk_root = bman->top_blocks_root(); + this->block_idx_ = this->position_= 0; + unsigned i, j; + + for (i = 0; i < bman->top_block_size(); ++i) + { + bm::word_t** blk_blk = blk_root[i]; + if (blk_blk == 0) // not allocated + { + this->block_idx_ += bm::set_sub_array_size; + this->position_ += bm::bits_in_array; + continue; + } + + if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) + blk_blk = FULL_SUB_BLOCK_REAL_ADDR; + + for (j = 0; j < bm::set_sub_array_size; ++j,++(this->block_idx_)) + { + this->block_ = blk_blk[j]; + if (this->block_ == 0) + { + this->position_ += bits_in_block; + continue; + } + if (BM_IS_GAP(this->block_)) + { + this->block_type_ = 1; + if (search_in_gapblock()) + return; + } + else + { + if (this->block_ == FULL_BLOCK_FAKE_ADDR) + this->block_ = FULL_BLOCK_REAL_ADDR; + this->block_type_ = 0; + if (search_in_bitblock()) + return; + } + } // for j + } // for i + + this->invalidate(); +} + +//--------------------------------------------------------------------- + +template +bool +bvector::enumerator::decode_wave(block_descr_type* bdescr) BMNOEXCEPT +{ + bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits); + if (bdescr->bit_.cnt) // found + { + bdescr->bit_.idx = 0; + return true; + } + return false; +} + +//--------------------------------------------------------------------- + +template +bool +bvector::enumerator::decode_bit_group(block_descr_type* bdescr) BMNOEXCEPT +{ + const word_t* block_end = this->block_ + bm::set_block_size; + for (; bdescr->bit_.ptr < block_end;) + { + if (decode_wave(bdescr)) + { + bdescr->bit_.pos = this->position_; + this->position_ += bdescr->bit_.bits[0]; + return true; + } + this->position_ += bm::set_bitscan_wave_size * 32; // wave size + bdescr->bit_.ptr += bm::set_bitscan_wave_size; + } // for + return false; +} + +//--------------------------------------------------------------------- + +template +bool +bvector::enumerator::decode_bit_group(block_descr_type* bdescr, + size_type& rank) BMNOEXCEPT +{ + const word_t* block_end = this->block_ + bm::set_block_size; + for (; bdescr->bit_.ptr < block_end;) + { + const bm::id64_t* w64_p = (bm::id64_t*)bdescr->bit_.ptr; + BM_ASSERT(bm::set_bitscan_wave_size == 4); // TODO: better handle this + + unsigned cnt = bm::word_bitcount64(w64_p[0]); + cnt += bm::word_bitcount64(w64_p[1]); + if (rank > cnt) + { + rank -= cnt; + } + else + { + if (decode_wave(bdescr)) + { + bdescr->bit_.pos = this->position_; + this->position_ += bdescr->bit_.bits[0]; + return true; + } + } + this->position_ += bm::set_bitscan_wave_size * 32; // wave size + bdescr->bit_.ptr += bm::set_bitscan_wave_size; + } // for + return false; +} + +//--------------------------------------------------------------------- + +template +bool bvector::enumerator::search_in_bitblock() BMNOEXCEPT +{ + BM_ASSERT(this->block_type_ == 0); + + block_descr_type* bdescr = &(this->bdescr_); + bdescr->bit_.ptr = this->block_; + return decode_bit_group(bdescr); +} + +//--------------------------------------------------------------------- + +template +bool bvector::enumerator::search_in_gapblock() BMNOEXCEPT +{ + BM_ASSERT(this->block_type_ == 1); + + block_descr_type* bdescr = &(this->bdescr_); + bdescr->gap_.ptr = BMGAP_PTR(this->block_); + unsigned bitval = *(bdescr->gap_.ptr) & 1; + + ++(bdescr->gap_.ptr); + + for (;true;) + { + unsigned val = *(bdescr->gap_.ptr); + if (bitval) + { + gap_word_t* first = BMGAP_PTR(this->block_) + 1; + if (bdescr->gap_.ptr == first) + { + bdescr->gap_.gap_len = (gap_word_t)(val + 1); + } + else + { + bdescr->gap_.gap_len = + (gap_word_t)(val - *(bdescr->gap_.ptr-1)); + } + return true; + } + this->position_ += val + 1; + if (val == bm::gap_max_bits - 1) + break; + bitval ^= 1; + ++(bdescr->gap_.ptr); + } + return false; +} + +//--------------------------------------------------------------------- + +template +bool bvector::enumerator::search_in_blocks() BMNOEXCEPT +{ + ++(this->block_idx_); + const blocks_manager_type& bman = this->bv_->blockman_; + block_idx_type i = this->block_idx_ >> bm::set_array_shift; + block_idx_type top_block_size = bman.top_block_size(); + bm::word_t*** blk_root = bman.top_blocks_root(); + for (; i < top_block_size; ++i) + { + bm::word_t** blk_blk = blk_root[i]; + if (blk_blk == 0) + { + // fast scan fwd in top level + size_type bn = this->block_idx_ + bm::set_sub_array_size; + size_type pos = this->position_ + bm::bits_in_array; + for (++i; i < top_block_size; ++i) + { + if (blk_root[i]) + break; + bn += bm::set_sub_array_size; + pos += bm::bits_in_array; + } // for i + this->block_idx_ = bn; + this->position_ = pos; + if ((i < top_block_size) && blk_root[i]) + --i; + continue; + } + if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) + blk_blk = FULL_SUB_BLOCK_REAL_ADDR; + + block_idx_type j = this->block_idx_ & bm::set_array_mask; + + for(; j < bm::set_sub_array_size; ++j, ++(this->block_idx_)) + { + this->block_ = blk_blk[j]; + if (this->block_ == 0) + { + this->position_ += bm::bits_in_block; + continue; + } + this->block_type_ = BM_IS_GAP(this->block_); + if (this->block_type_) + { + if (search_in_gapblock()) + return true; + } + else + { + if (this->block_ == FULL_BLOCK_FAKE_ADDR) + this->block_ = FULL_BLOCK_REAL_ADDR; + if (search_in_bitblock()) + return true; + } + } // for j + } // for i + return false; +} +//--------------------------------------------------------------------- } // namespace diff --git a/c++/include/util/bitset/bmaggregator.h b/c++/include/util/bitset/bmaggregator.h index d171f9ce..a996f9b8 100644 --- a/c++/include/util/bitset/bmaggregator.h +++ b/c++/include/util/bitset/bmaggregator.h @@ -89,6 +89,7 @@ public: public: + // ----------------------------------------------------------------------- /*! @name Construction and setup */ //@{ aggregator(); @@ -105,6 +106,12 @@ public: void set_optimization( typename bvector_type::optmode opt = bvector_type::opt_compress) { opt_mode_ = opt; } + + void set_compute_count(bool count_mode) + { + compute_count_ = count_mode; count_ = 0; + } + //@} @@ -122,12 +129,12 @@ public: @return current arg group size (0 if vector was not added (empty)) @sa reset */ - unsigned add(const bvector_type* bv, unsigned agr_group = 0); + unsigned add(const bvector_type* bv, unsigned agr_group = 0) BMNOEXCEPT; /** Reset aggregate groups, forget all attached vectors */ - void reset(); + void reset() BMNOEXCEPT; /** Aggregate added group of vectors using logical OR @@ -195,7 +202,9 @@ public: Set search hint for the range, where results needs to be searched (experimental for internal use). */ - void set_range_hint(size_type from, size_type to); + void set_range_hint(size_type from, size_type to) BMNOEXCEPT; + + size_type count() const { return count_; } //@} @@ -305,10 +314,10 @@ public: //@{ /** Get current operation code */ - int get_operation() const { return operation_; } + int get_operation() const BMNOEXCEPT { return operation_; } /** Set operation code for the aggregator */ - void set_operation(int op_code) { operation_ = op_code; } + void set_operation(int op_code) BMNOEXCEPT { operation_ = op_code; } /** Prepare operation, create internal resources, analyse dependencies. @@ -361,19 +370,20 @@ protected: bool init_clear = true); static - unsigned max_top_blocks(const bvector_type_const_ptr* bv_src, unsigned src_size); + unsigned max_top_blocks(const bvector_type_const_ptr* bv_src, + unsigned src_size) BMNOEXCEPT; bm::word_t* sort_input_blocks_or(const bvector_type_const_ptr* bv_src, unsigned src_size, unsigned i, unsigned j, unsigned* arg_blk_count, - unsigned* arg_blk_gap_count); + unsigned* arg_blk_gap_count) BMNOEXCEPT; bm::word_t* sort_input_blocks_and(const bvector_type_const_ptr* bv_src, unsigned src_size, unsigned i, unsigned j, unsigned* arg_blk_count, - unsigned* arg_blk_gap_count); + unsigned* arg_blk_gap_count) BMNOEXCEPT; bool process_bit_blocks_or(blocks_manager_type& bman_target, @@ -396,19 +406,24 @@ protected: unsigned find_effective_sub_block_size(unsigned i, const bvector_type_const_ptr* bv_src, unsigned src_size, - bool top_null_as_zero); - - bool any_carry_overs(unsigned co_size) const; + bool top_null_as_zero) BMNOEXCEPT; + + static + bool any_carry_overs(const unsigned char* carry_overs, + unsigned co_size) BMNOEXCEPT; /** @return carry over */ - bool process_shift_right_and(const bm::word_t* arg_blk, - digest_type& digest, - unsigned carry_over); - + static + unsigned process_shift_right_and(bm::word_t* BMRESTRICT blk, + const bm::word_t* BMRESTRICT arg_blk, + digest_type& BMRESTRICT digest, + unsigned carry_over) BMNOEXCEPT; + + static const bm::word_t* get_arg_block(const bvector_type_const_ptr* bv_src, - unsigned k, unsigned i, unsigned j); + unsigned k, unsigned i, unsigned j) BMNOEXCEPT; bvector_type* check_create_target(); @@ -418,8 +433,8 @@ private: /// @internal struct arena { - BM_DECLARE_TEMP_BLOCK(tb1); - BM_DECLARE_TEMP_BLOCK(tb_opt); ///< temp block for results optimization + BM_DECLARE_TEMP_BLOCK(tb1) + BM_DECLARE_TEMP_BLOCK(tb_opt) ///< temp block for results optimization const bm::word_t* v_arg_or_blk[max_aggregator_cap]; ///< source blocks list (OR) const bm::gap_word_t* v_arg_or_blk_gap[max_aggregator_cap]; ///< source GAP blocks list (OR) const bm::word_t* v_arg_and_blk[max_aggregator_cap]; ///< source blocks list (AND) @@ -450,8 +465,9 @@ private: size_type range_from_ = bm::id_max; ///< search from size_type range_to_ = bm::id_max; ///< search to - typename bvector_type::optmode opt_mode_; - + typename bvector_type::optmode opt_mode_; ///< perform search result optimization + bool compute_count_; ///< compute search result count + size_type count_; ///< search result count }; @@ -515,7 +531,9 @@ void aggregator_pipeline_execute(It first, It last) template aggregator::aggregator() -: opt_mode_(bvector_type::opt_none) +: opt_mode_(bvector_type::opt_none), + compute_count_(false), + count_(0) { ar_ = (arena*) bm::aligned_new_malloc(sizeof(arena)); } @@ -533,18 +551,19 @@ aggregator::~aggregator() // ------------------------------------------------------------------------ template -void aggregator::reset() +void aggregator::reset() BMNOEXCEPT { arg_group0_size = arg_group1_size = operation_ = top_block_size_ = 0; operation_status_ = op_undefined; range_set_ = false; range_from_ = range_to_ = bm::id_max; + count_ = 0; } // ------------------------------------------------------------------------ template -void aggregator::set_range_hint(size_type from, size_type to) +void aggregator::set_range_hint(size_type from, size_type to) BMNOEXCEPT { range_from_ = from; range_to_ = to; range_set_ = true; @@ -553,11 +572,12 @@ void aggregator::set_range_hint(size_type from, size_type to) // ------------------------------------------------------------------------ template -typename aggregator::bvector_type* aggregator::check_create_target() +typename aggregator::bvector_type* +aggregator::check_create_target() { if (!bv_target_) { - bv_target_ = new bvector_type(); + bv_target_ = new bvector_type(); //TODO: get rid of "new" bv_target_->init(); } return bv_target_; @@ -566,7 +586,8 @@ typename aggregator::bvector_type* aggregator::check_create_target() // ------------------------------------------------------------------------ template -unsigned aggregator::add(const bvector_type* bv, unsigned agr_group) +unsigned aggregator::add(const bvector_type* bv, + unsigned agr_group) BMNOEXCEPT { BM_ASSERT_THROW(agr_group <= 1, BM_ERR_RANGE); BM_ASSERT(agr_group <= 1); @@ -646,6 +667,7 @@ bool aggregator::find_first_and_sub(size_type& idx) template void aggregator::combine_shift_right_and(bvector_type& bv_target) { + count_ = 0; combine_shift_right_and(bv_target, ar_->arg_bv0, arg_group0_size, false); } @@ -890,10 +912,11 @@ bool aggregator::find_first_and_sub(size_type& idx, template unsigned -aggregator::find_effective_sub_block_size(unsigned i, - const bvector_type_const_ptr* bv_src, - unsigned src_size, - bool top_null_as_zero) +aggregator::find_effective_sub_block_size( + unsigned i, + const bvector_type_const_ptr* bv_src, + unsigned src_size, + bool top_null_as_zero) BMNOEXCEPT { // quick hack to avoid scanning large, arrays, where such scan can be quite // expensive by itself (this makes this function approximate) @@ -924,7 +947,7 @@ aggregator::find_effective_sub_block_size(unsigned i, max_size = j; break; } - } + } // for j if (max_size == bm::set_sub_array_size - 1) break; } // for k @@ -992,8 +1015,6 @@ void aggregator::combine_and(unsigned i, unsigned j, { BM_ASSERT(src_size); - typename bvector_type::blocks_manager_type& bman_target = bv_target.get_blocks_manager(); - unsigned arg_blk_count = 0; unsigned arg_blk_gap_count = 0; bm::word_t* blk = @@ -1012,12 +1033,11 @@ void aggregator::combine_and(unsigned i, unsigned j, if (ar_->v_arg_and_blk[0] == FULL_BLOCK_REAL_ADDR) { // another nothing to do: one FULL block + blocks_manager_type& bman_target = bv_target.get_blocks_manager(); bman_target.check_alloc_top_subblock(i); bman_target.set_block_ptr(i, j, blk); if (++j == bm::set_sub_array_size) - { bman_target.validate_top_full(i); - } return; } } @@ -1032,14 +1052,13 @@ void aggregator::combine_and(unsigned i, unsigned j, // if (arg_blk_gap_count) { - digest = - process_gap_blocks_and(arg_blk_gap_count, digest); + digest = process_gap_blocks_and(arg_blk_gap_count, digest); } - if (digest) // some results + if (digest) // we have results , allocate block and copy from temp { - // we have some results, allocate block and copy from temp + blocks_manager_type& bman_target = bv_target.get_blocks_manager(); bman_target.opt_copy_bit_block(i, j, ar_->tb1, - opt_mode_, ar_->tb_opt); + opt_mode_, ar_->tb_opt); } } } @@ -1154,7 +1173,7 @@ aggregator::process_gap_blocks_and(unsigned arg_blk_gap_count, bool b = bm::gap_test_unr(ar_->v_arg_and_blk_gap[k], single_bit_idx); if (!b) return 0; // AND 0 causes result to turn 0 - } + } // for k break; } } @@ -1471,7 +1490,8 @@ unsigned aggregator::resize_target(bvector_type& bv_target, template unsigned -aggregator::max_top_blocks(const bvector_type_const_ptr* bv_src, unsigned src_size) +aggregator::max_top_blocks(const bvector_type_const_ptr* bv_src, + unsigned src_size) BMNOEXCEPT { unsigned top_blocks = 1; @@ -1491,11 +1511,12 @@ aggregator::max_top_blocks(const bvector_type_const_ptr* bv_src, unsigned sr // ------------------------------------------------------------------------ template -bm::word_t* aggregator::sort_input_blocks_or(const bvector_type_const_ptr* bv_src, - unsigned src_size, - unsigned i, unsigned j, - unsigned* arg_blk_count, - unsigned* arg_blk_gap_count) +bm::word_t* aggregator::sort_input_blocks_or( + const bvector_type_const_ptr* bv_src, + unsigned src_size, + unsigned i, unsigned j, + unsigned* arg_blk_count, + unsigned* arg_blk_gap_count) BMNOEXCEPT { bm::word_t* blk = 0; for (unsigned k = 0; k < src_size; ++k) @@ -1529,11 +1550,12 @@ bm::word_t* aggregator::sort_input_blocks_or(const bvector_type_const_ptr* b // ------------------------------------------------------------------------ template -bm::word_t* aggregator::sort_input_blocks_and(const bvector_type_const_ptr* bv_src, - unsigned src_size, - unsigned i, unsigned j, - unsigned* arg_blk_count, - unsigned* arg_blk_gap_count) +bm::word_t* aggregator::sort_input_blocks_and( + const bvector_type_const_ptr* bv_src, + unsigned src_size, + unsigned i, unsigned j, + unsigned* arg_blk_count, + unsigned* arg_blk_gap_count) BMNOEXCEPT { unsigned full_blk_cnt = 0; bm::word_t* blk = FULL_BLOCK_FAKE_ADDR; @@ -1683,20 +1705,24 @@ bool aggregator::combine_shift_right_and( { if (i > top_block_size_) { - if (!this->any_carry_overs(src_and_size)) + if (!any_carry_overs(&ar_->carry_overs_[0], src_and_size)) break; // quit early if there is nothing to carry on } unsigned j = 0; do { - bool found = combine_shift_right_and(i, j, bv_target, bv_src_and, src_and_size); + bool found = + combine_shift_right_and(i, j, bv_target, bv_src_and, src_and_size); if (found && any) return found; } while (++j < bm::set_sub_array_size); } // for i + if (compute_count_) + return bool(count_); + return bv_target.any(); } @@ -1708,7 +1734,6 @@ bool aggregator::combine_shift_right_and(unsigned i, unsigned j, const bvector_type_const_ptr* bv_src, unsigned src_size) { - blocks_manager_type& bman_target = bv_target.get_blocks_manager(); bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1; unsigned char* carry_overs = &(ar_->carry_overs_[0]); @@ -1748,18 +1773,33 @@ bool aggregator::combine_shift_right_and(unsigned i, unsigned j, if (blk_zero) // delayed temp block 0-init requested { bm::bit_block_set(blk, 0); - blk_zero = false; + blk_zero = !blk_zero; // = false } const bm::word_t* arg_blk = get_arg_block(bv_src, k, i, j); - carry_overs[k] = process_shift_right_and(arg_blk, digest, carry_over); + carry_overs[k] = (unsigned char) + process_shift_right_and(blk, arg_blk, digest, carry_over); + BM_ASSERT(carry_overs[k] == 0 || carry_overs[k] == 1); } // for k - + + if (blk_zero) // delayed temp block 0-init + { + bm::bit_block_set(blk, 0); + } // block now gets emitted into the target bit-vector if (digest) { BM_ASSERT(!bm::bit_is_all_zero(blk)); - bman_target.opt_copy_bit_block(i, j, blk, - opt_mode_, ar_->tb_opt); + + if (compute_count_) + { + unsigned cnt = bm::bit_block_count(blk, digest); + count_ += cnt; + } + else + { + blocks_manager_type& bman_target = bv_target.get_blocks_manager(); + bman_target.opt_copy_bit_block(i, j, blk, opt_mode_, ar_->tb_opt); + } return true; } return false; @@ -1768,11 +1808,13 @@ bool aggregator::combine_shift_right_and(unsigned i, unsigned j, // ------------------------------------------------------------------------ template -bool aggregator::process_shift_right_and(const bm::word_t* arg_blk, - digest_type& digest, - unsigned carry_over) +unsigned aggregator::process_shift_right_and( + bm::word_t* BMRESTRICT blk, + const bm::word_t* BMRESTRICT arg_blk, + digest_type& BMRESTRICT digest, + unsigned carry_over) BMNOEXCEPT { - bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1; + BM_ASSERT(carry_over == 1 || carry_over == 0); if (BM_IS_GAP(arg_blk)) // GAP argument { @@ -1800,8 +1842,8 @@ bool aggregator::process_shift_right_and(const bm::word_t* arg_blk, if (digest) { carry_over = - bm::bit_block_shift_r1_and_unr(blk, carry_over, arg_blk, - &digest); + bm::bit_block_shift_r1_and_unr(blk, carry_over, arg_blk, + &digest); } else // digest == 0 { @@ -1813,13 +1855,12 @@ bool aggregator::process_shift_right_and(const bm::word_t* arg_blk, } else // arg is zero - target block => zero { - unsigned co = blk[bm::set_block_size-1] >> 31; // carry out + carry_over = blk[bm::set_block_size-1] >> 31; // carry out if (digest) { bm::bit_block_set(blk, 0); // TODO: digest based set - digest ^= digest; + digest = 0; } - carry_over = co; } } return carry_over; @@ -1829,22 +1870,26 @@ bool aggregator::process_shift_right_and(const bm::word_t* arg_blk, template const bm::word_t* aggregator::get_arg_block( - const bvector_type_const_ptr* bv_src, - unsigned k, unsigned i, unsigned j) + const bvector_type_const_ptr* bv_src, + unsigned k, unsigned i, unsigned j) BMNOEXCEPT { - const blocks_manager_type& bman_arg = bv_src[k]->get_blocks_manager(); - return bman_arg.get_block(i, j); + return bv_src[k]->get_blocks_manager().get_block(i, j); } // ------------------------------------------------------------------------ template -bool aggregator::any_carry_overs(unsigned co_size) const +bool aggregator::any_carry_overs(const unsigned char* carry_overs, + unsigned co_size) BMNOEXCEPT { - for (unsigned i = 0; i < co_size; ++i) - if (ar_->carry_overs_[i]) - return true; - return false; + // TODO: loop unroll? + unsigned acc = carry_overs[0]; + for (unsigned i = 1; i < co_size; ++i) + acc |= carry_overs[i]; +// if (ar_->carry_overs_[i]) +// return true; +// return false; + return acc; } // ------------------------------------------------------------------------ @@ -1888,7 +1933,7 @@ aggregator::run_step(unsigned i, unsigned j) { if (i > top_block_size_) { - if (!this->any_carry_overs(arg_group0_size)) + if (!this->any_carry_overs(&ar_->carry_overs_[0], arg_group0_size)) { operation_status_ = op_done; return operation_status_; diff --git a/c++/include/util/bitset/bmalgo.h b/c++/include/util/bitset/bmalgo.h index a44c5fc4..669dab93 100644 --- a/c++/include/util/bitset/bmalgo.h +++ b/c++/include/util/bitset/bmalgo.h @@ -46,7 +46,7 @@ namespace bm \ingroup setalgo */ template -typename BV::size_type count_and(const BV& bv1, const BV& bv2) +typename BV::size_type count_and(const BV& bv1, const BV& bv2) BMNOEXCEPT { return bm::distance_and_operation(bv1, bv2); } @@ -59,7 +59,7 @@ typename BV::size_type count_and(const BV& bv1, const BV& bv2) \ingroup setalgo */ template -typename BV::size_type any_and(const BV& bv1, const BV& bv2) +typename BV::size_type any_and(const BV& bv1, const BV& bv2) BMNOEXCEPT { distance_metric_descriptor dmd(bm::COUNT_AND); @@ -78,7 +78,7 @@ typename BV::size_type any_and(const BV& bv1, const BV& bv2) */ template bm::distance_metric_descriptor::size_type -count_xor(const BV& bv1, const BV& bv2) +count_xor(const BV& bv1, const BV& bv2) BMNOEXCEPT { distance_metric_descriptor dmd(bm::COUNT_XOR); @@ -94,7 +94,7 @@ count_xor(const BV& bv1, const BV& bv2) \ingroup setalgo */ template -typename BV::size_type any_xor(const BV& bv1, const BV& bv2) +typename BV::size_type any_xor(const BV& bv1, const BV& bv2) BMNOEXCEPT { distance_metric_descriptor dmd(bm::COUNT_XOR); @@ -112,7 +112,7 @@ typename BV::size_type any_xor(const BV& bv1, const BV& bv2) \ingroup setalgo */ template -typename BV::size_type count_sub(const BV& bv1, const BV& bv2) +typename BV::size_type count_sub(const BV& bv1, const BV& bv2) BMNOEXCEPT { distance_metric_descriptor dmd(bm::COUNT_SUB_AB); @@ -129,7 +129,7 @@ typename BV::size_type count_sub(const BV& bv1, const BV& bv2) \ingroup setalgo */ template -typename BV::size_type any_sub(const BV& bv1, const BV& bv2) +typename BV::size_type any_sub(const BV& bv1, const BV& bv2) BMNOEXCEPT { distance_metric_descriptor dmd(bm::COUNT_SUB_AB); @@ -146,7 +146,7 @@ typename BV::size_type any_sub(const BV& bv1, const BV& bv2) \ingroup setalgo */ template -typename BV::size_type count_or(const BV& bv1, const BV& bv2) +typename BV::size_type count_or(const BV& bv1, const BV& bv2) BMNOEXCEPT { distance_metric_descriptor dmd(bm::COUNT_OR); @@ -162,7 +162,7 @@ typename BV::size_type count_or(const BV& bv1, const BV& bv2) \ingroup setalgo */ template -typename BV::size_type any_or(const BV& bv1, const BV& bv2) +typename BV::size_type any_or(const BV& bv1, const BV& bv2) BMNOEXCEPT { distance_metric_descriptor dmd(bm::COUNT_OR); @@ -173,27 +173,28 @@ typename BV::size_type any_or(const BV& bv1, const BV& bv2) #define BM_SCANNER_OP(x) \ - if (0 != (block = blk_blk[j+x])) \ +if (0 != (block = blk_blk[j+x])) \ +{ \ + if (BM_IS_GAP(block)) \ { \ - if (BM_IS_GAP(block)) \ - { \ - bm::for_each_gap_blk(BMGAP_PTR(block), (r+j+x)*bm::bits_in_block,\ - bit_functor); \ - } \ - else \ - { \ - bm::for_each_bit_blk(block, (r+j+x)*bm::bits_in_block,bit_functor); \ - } \ - } + bm::for_each_gap_blk(BMGAP_PTR(block), (r+j+x)*bm::bits_in_block,\ + bit_functor); \ + } \ + else \ + { \ + bm::for_each_bit_blk(block, (r+j+x)*bm::bits_in_block,bit_functor); \ + } \ +} /** @brief bit-vector visitor scanner to traverse each 1 bit using C++ visitor @param bv - bit vector to scan - @param bit_functor (should support add_bits() and add_range() methods + @param bit_functor - visitor: should support add_bits(), add_range() \ingroup setalgo + @sa for_each_bit_range visit_each_bit */ template void for_each_bit(const BV& bv, @@ -248,10 +249,100 @@ void for_each_bit(const BV& bv, } // for i } +/** + @brief bit-vector range visitor to traverse each 1 bit + + @param bv - bit vector to scan + @param right - start of closed interval [from..to] + @param left - end of close interval [from..to] + @param bit_functor - visitor: should support add_bits(), add_range() + + \ingroup setalgo + @sa for_each_bit +*/ +template +void for_each_bit_range(const BV& bv, + typename BV::size_type left, + typename BV::size_type right, + Func& bit_functor) +{ + if (left > right) + bm::xor_swap(left, right); + if (right == bm::id_max) + --right; + BM_ASSERT(left < bm::id_max && right < bm::id_max); + + bm::for_each_bit_range_no_check(bv, left, right, bit_functor); +} + + #undef BM_SCANNER_OP + +/// private adaptor for C-style callbacks +/// +/// @internal +/// +template +struct bit_vitor_callback_adaptor +{ + typedef VCBT bit_visitor_callback_type; + + bit_vitor_callback_adaptor(void* h, bit_visitor_callback_type cb_func) + : handle_(h), func_(cb_func) + {} + + void add_bits(size_type offset, const unsigned char* bits, unsigned size) + { + for (unsigned i = 0; i < size; ++i) + func_(handle_, offset + bits[i]); + } + void add_range(size_type offset, size_type size) + { + for (size_type i = 0; i < size; ++i) + func_(handle_, offset + i); + } + + void* handle_; + bit_visitor_callback_type func_; +}; + + +/// Functor for bit-copy (for testing) +/// +/// @internal +/// +template +struct bit_vistor_copy_functor +{ + typedef typename BV::size_type size_type; + + bit_vistor_copy_functor(BV& bv) + : bv_(bv) + { + bv_.init(); + } + + void add_bits(size_type offset, const unsigned char* bits, unsigned size) + { + BM_ASSERT(size); + for (unsigned i = 0; i < size; ++i) + bv_.set_bit_no_check(offset + bits[i]); + } + void add_range(size_type offset, size_type size) + { + BM_ASSERT(size); + bv_.set_range(offset, offset + size - 1); + } + + BV& bv_; + bit_visitor_callback_type func_; +}; + + + /** - @brief bit-vector visitor scanner to traverse each 1 bit using C callback + @brief bvector visitor scanner to traverse each 1 bit using C callback @param bv - bit vector to scan @param handle_ptr - handle to private memory used by callback @@ -267,33 +358,101 @@ void visit_each_bit(const BV& bv, bit_visitor_callback_type callback_ptr) { typedef typename BV::size_type size_type; - // private adaptor for C-style callbacks - struct callback_adaptor + bm::bit_vitor_callback_adaptor + func(handle_ptr, callback_ptr); + bm::for_each_bit(bv, func); +} + +/** + @brief bvector visitor scanner to traverse each bits in range (C callback) + + @param bv - bit vector to scan + @param left - from [left..right] + @param right - to [left..right] + @param handle_ptr - handle to private memory used by callback + @param callback_ptr - callback function + + \ingroup setalgo + + @sa bit_visitor_callback_type for_each_bit +*/ +template +void visit_each_bit_range(const BV& bv, + typename BV::size_type left, + typename BV::size_type right, + void* handle_ptr, + bit_visitor_callback_type callback_ptr) +{ + typedef typename BV::size_type size_type; + bm::bit_vitor_callback_adaptor + func(handle_ptr, callback_ptr); + bm::for_each_bit_range(bv, left, right, func); +} + +/** + @brief Algorithm to identify bit-vector ranges (splits) for the rank + + Rank range split algorithm walks the bit-vector to create list of + non-overlapping ranges [s1..e1],[s2..e2]...[sN...eN] with requested + (rank) number of 1 bits. All ranges should be the same popcount weight, + except the last one, which may have less. + Scan is progressing from left to right so result ranges will be + naturally sorted. + + @param bv - bit vector to perform the range split scan + @param rank - requested number of bits in each range + if 0 it will create single range [first..last] + to cover the whole bv + @param target_v - [out] STL(or STL-like) vector of pairs to keep pairs results + + \ingroup setalgo + */ +template +void rank_range_split(const BV& bv, + typename BV::size_type rank, + PairVect& target_v) +{ + target_v.resize(0); + typename BV::size_type first, last, pos; + bool found = bv.find_range(first, last); + if (!found) // empty bit-vector + return; + + if (!rank) // if rank is not defined, include the whole vector [first..last] { - callback_adaptor(void* h, bit_visitor_callback_type cb_func) - : handle_(h), func_(cb_func) - {} - - void add_bits(size_type offset, const unsigned char* bits, unsigned size) + typename PairVect::value_type pv; + pv.first = first; pv.second = last; + target_v.push_back(pv); + return; + } + + while (1) + { + typename PairVect::value_type pv; + found = bv.find_rank(rank, first, pos); + if (found) { - for (unsigned i = 0; i < size; ++i) - func_(handle_, offset + bits[i]); + pv.first = first; pv.second = pos; + target_v.push_back(pv); + if (pos >= last) + break; + first = pos + 1; + continue; } - void add_range(size_type offset, unsigned size) + // insufficient rank (last range) + found = bv.any_range(first, last); + if (found) { - for (unsigned i = 0; i < size; ++i) - func_(handle_, offset + i); + pv.first = first; pv.second = last; + target_v.push_back(pv); } - - void* handle_; - bit_visitor_callback_type func_; - }; - - callback_adaptor func(handle_ptr, callback_ptr); - bm::for_each_bit(bv, func); + break; + } // while + } + /** Algorithms for rank compression of bit-vector @@ -562,6 +721,7 @@ void rank_compressor::compress_by_source(BV& bv_target, + } // bm #include "bmundef.h" diff --git a/c++/include/util/bitset/bmalgo_impl.h b/c++/include/util/bitset/bmalgo_impl.h index b6a24574..61ef7c97 100644 --- a/c++/include/util/bitset/bmalgo_impl.h +++ b/c++/include/util/bitset/bmalgo_impl.h @@ -70,7 +70,7 @@ enum distance_metric \ingroup distance */ inline -distance_metric operation2metric(set_operation op) +distance_metric operation2metric(set_operation op) BMNOEXCEPT { BM_ASSERT(is_const_set_operation(op)); if (op == set_COUNT) op = set_COUNT_B; @@ -95,11 +95,11 @@ struct distance_metric_descriptor distance_metric metric; size_type result; - distance_metric_descriptor(distance_metric m) + distance_metric_descriptor(distance_metric m) BMNOEXCEPT : metric(m), result(0) {} - distance_metric_descriptor() + distance_metric_descriptor() BMNOEXCEPT : metric(bm::COUNT_XOR), result(0) {} @@ -107,7 +107,7 @@ struct distance_metric_descriptor /*! \brief Sets metric result to 0 */ - void reset() + void reset() BMNOEXCEPT { result = 0; } @@ -125,7 +125,7 @@ inline void combine_count_operation_with_block(const bm::word_t* blk, const bm::word_t* arg_blk, distance_metric_descriptor* dmit, - distance_metric_descriptor* dmit_end) + distance_metric_descriptor* dmit_end) BMNOEXCEPT { gap_word_t* g1 = BMGAP_PTR(blk); @@ -340,7 +340,7 @@ void combine_count_operation_with_block(const bm::word_t* blk, */ inline unsigned combine_count_and_operation_with_block(const bm::word_t* blk, - const bm::word_t* arg_blk) + const bm::word_t* arg_blk) BMNOEXCEPT { unsigned gap = BM_IS_GAP(blk); unsigned arg_gap = BM_IS_GAP(arg_blk); @@ -381,7 +381,7 @@ void combine_any_operation_with_block(const bm::word_t* blk, const bm::word_t* arg_blk, unsigned arg_gap, distance_metric_descriptor* dmit, - distance_metric_descriptor* dmit_end) + distance_metric_descriptor* dmit_end) BMNOEXCEPT { gap_word_t* res=0; @@ -628,7 +628,7 @@ inline unsigned combine_count_operation_with_block(const bm::word_t* blk, const bm::word_t* arg_blk, - distance_metric metric) + distance_metric metric) BMNOEXCEPT { distance_metric_descriptor dmd(metric); combine_count_operation_with_block(blk, //gap, @@ -649,7 +649,7 @@ combine_any_operation_with_block(const bm::word_t* blk, unsigned gap, const bm::word_t* arg_blk, unsigned arg_gap, - distance_metric metric) + distance_metric metric) BMNOEXCEPT { distance_metric_descriptor dmd(metric); combine_any_operation_with_block(blk, gap, @@ -668,7 +668,7 @@ combine_any_operation_with_block(const bm::word_t* blk, inline void distance_stage(const distance_metric_descriptor* dmit, const distance_metric_descriptor* dmit_end, - bool* is_all_and) + bool* is_all_and) BMNOEXCEPT { for (const distance_metric_descriptor* it = dmit; it < dmit_end; ++it) { @@ -702,7 +702,7 @@ template void distance_operation(const BV& bv1, const BV& bv2, distance_metric_descriptor* dmit, - distance_metric_descriptor* dmit_end) + distance_metric_descriptor* dmit_end) BMNOEXCEPT { const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager(); const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager(); @@ -787,7 +787,7 @@ void distance_operation(const BV& bv1, */ template typename BV::size_type distance_and_operation(const BV& bv1, - const BV& bv2) + const BV& bv2) BMNOEXCEPT { const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager(); const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager(); @@ -858,7 +858,7 @@ template void distance_operation_any(const BV& bv1, const BV& bv2, distance_metric_descriptor* dmit, - distance_metric_descriptor* dmit_end) + distance_metric_descriptor* dmit_end) BMNOEXCEPT { const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager(); const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager(); @@ -980,7 +980,8 @@ void distance_operation_any(const BV& bv1, \internal */ template -It block_range_scan(It first, It last, SIZE_TYPE nblock, SIZE_TYPE* max_id) +It block_range_scan(It first, It last, + SIZE_TYPE nblock, SIZE_TYPE* max_id) BMNOEXCEPT { SIZE_TYPE m = *max_id; It right; @@ -1333,7 +1334,11 @@ typename BV::size_type count_intervals(const BV& bv) typename BV::blocks_manager_type::block_idx_type st = 0; bm::for_each_block(blk_root, bman.top_block_size(), func, st); - return func.count(); + typename BV::size_type intervals = func.count(); + bool last_bit_set = bv.test(bm::id_max-1); + + intervals -= last_bit_set; // correct last (out of range) interval + return intervals; } /*! @@ -1514,7 +1519,7 @@ void export_array(BV& bv, It first, It last) /*! - \brief for-each visitor, calls a special visitor functor for each 1 bit group + \brief for-each visitor, calls a visitor functor for each 1 bit group \param block - bit block buffer pointer \param offset - global block offset (number of bits) @@ -1527,6 +1532,7 @@ template void for_each_bit_blk(const bm::word_t* block, SIZE_TYPE offset, Func& bit_functor) { + BM_ASSERT(block); if (IS_FULL_BLOCK(block)) { bit_functor.add_range(offset, bm::gap_max_bits); @@ -1547,6 +1553,110 @@ void for_each_bit_blk(const bm::word_t* block, SIZE_TYPE offset, } while (block < block_end); } +/*! + \brief for-each range visitor, calls a visitor functor for each 1 bit group + + \param block - bit block buffer pointer + \param offset - global block offset (number of bits) + \param left - bit addredd in block from [from..to] + \param right - bit addredd in block to [from..to] + \param bit_functor - functor must support .add_bits(offset, bits_ptr, size) + + @ingroup bitfunc + @internal +*/ +template +void for_each_bit_blk(const bm::word_t* block, SIZE_TYPE offset, + unsigned left, unsigned right, + Func& bit_functor) +{ + BM_ASSERT(block); + BM_ASSERT(left <= right); + BM_ASSERT(right < bm::bits_in_block); + + if (IS_FULL_BLOCK(block)) + { + unsigned sz = right - left + 1; + bit_functor.add_range(offset + left, sz); + return; + } + unsigned char bits[bm::set_bitscan_wave_size*32]; + + unsigned cnt, nword, nbit, bitcount, temp; + nbit = left & bm::set_word_mask; + const bm::word_t* word = + block + (nword = unsigned(left >> bm::set_word_shift)); + if (left == right) // special case (only 1 bit to check) + { + if ((*word >> nbit) & 1u) + { + bits[0] = (unsigned char)nbit; + bit_functor.add_bits(offset + (nword * 32), bits, 1); + } + return; + } + + bitcount = right - left + 1u; + if (nbit) // starting position is not aligned + { + unsigned right_margin = nbit + right - left; + if (right_margin < 32) + { + unsigned mask = + block_set_table::_right[nbit] & + block_set_table::_left[right_margin]; + temp = (*word & mask); + cnt = bm::bitscan_popcnt(temp, bits); + if (cnt) + bit_functor.add_bits(offset + (nword * 32), bits, cnt); + + return; + } + temp = *word & block_set_table::_right[nbit]; + cnt = bm::bitscan_popcnt(temp, bits); + if (cnt) + bit_functor.add_bits(offset + (nword * 32), bits, cnt); + bitcount -= 32 - nbit; + ++word; ++nword; + } + else + { + bitcount = right - left + 1u; + } + BM_ASSERT(bm::set_bitscan_wave_size == 4); + // now when we are word aligned, we can scan the bit-stream + // loop unrolled to evaluate 4 words at a time + for ( ;bitcount >= 128; + bitcount-=128, word+=bm::set_bitscan_wave_size, + nword += bm::set_bitscan_wave_size) + { + cnt = bm::bitscan_wave(word, bits); + if (cnt) + bit_functor.add_bits(offset + (nword * 32), bits, cnt); + } // for + + for ( ;bitcount >= 32; bitcount-=32, ++word) + { + temp = *word; + cnt = bm::bitscan_popcnt(temp, bits); + if (cnt) + bit_functor.add_bits(offset + (nword * 32), bits, cnt); + ++nword; + } // for + + BM_ASSERT(bitcount < 32); + + if (bitcount) // we have a tail to count + { + temp = *word & block_set_table::_left[bitcount-1]; + cnt = bm::bitscan_popcnt(temp, bits); + if (cnt) + bit_functor.add_bits(offset + (nword * 32), bits, cnt); + } + +} + + /*! \brief for-each visitor, calls a special visitor functor for each 1 bit range @@ -1577,6 +1687,223 @@ void for_each_gap_blk(const T* buf, SIZE_TYPE offset, } } +/*! + \brief for-each visitor, calls a special visitor functor for each 1 bit range + + \param buf - bit block buffer pointer + \param offset - global block offset (number of bits) + \param left - interval start [left..right] + \param right - intreval end [left..right] + \param bit_functor - functor must support .add_range(offset, bits_ptr, size) + + @ingroup gapfunc + @internal +*/ +template +void for_each_gap_blk_range(const T* BMRESTRICT buf, + SIZE_TYPE offset, + unsigned left, unsigned right, + Func& bit_functor) +{ + BM_ASSERT(left <= right); + BM_ASSERT(right < bm::bits_in_block); + + unsigned is_set; + unsigned start_pos = bm::gap_bfind(buf, left, &is_set); + const T* BMRESTRICT pcurr = buf + start_pos; + + if (is_set) + { + if (right <= *pcurr) + { + bit_functor.add_range(offset + left, (right + 1)-left); + return; + } + bit_functor.add_range(offset + left, (*pcurr + 1)-left); + ++pcurr; + } + + const T* BMRESTRICT pend = buf + (*buf >> 3); + for (++pcurr; pcurr <= pend; pcurr += 2) + { + T prev = *(pcurr-1); + if (right <= *pcurr) + { + int sz = int(right) - int(prev); + if (sz > 0) + bit_functor.add_range(offset + prev + 1, unsigned(sz)); + return; + } + bit_functor.add_range(offset + prev + 1, *pcurr - prev); + } // for +} + + + +/*! For each non-zero block in [from, to] executes supplied functor + \internal +*/ +template +void for_each_bit_block_range(T*** root, + N top_size, N nb_from, N nb_to, F& f) +{ + BM_ASSERT(top_size); + if (nb_from > nb_to) + return; + unsigned i_from = unsigned(nb_from >> bm::set_array_shift); + unsigned j_from = unsigned(nb_from & bm::set_array_mask); + unsigned i_to = unsigned(nb_to >> bm::set_array_shift); + unsigned j_to = unsigned(nb_to & bm::set_array_mask); + + if (i_from >= top_size) + return; + if (i_to >= top_size) + { + i_to = unsigned(top_size-1); + j_to = bm::set_sub_array_size-1; + } + + for (unsigned i = i_from; i <= i_to; ++i) + { + T** blk_blk = root[i]; + if (!blk_blk) + continue; + if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) + { + unsigned j = (i == i_from) ? j_from : 0; + if (!j && (i != i_to)) // full sub-block + { + N base_idx = bm::get_super_block_start(i); + f.add_range(base_idx, bm::set_sub_total_bits); + } + else + { + do + { + N base_idx = bm::get_block_start(i, j); + f.add_range(base_idx, bm::gap_max_bits); + if ((i == i_to) && (j == j_to)) + return; + } while (++j < bm::set_sub_array_size); + } + } + else + { + unsigned j = (i == i_from) ? j_from : 0; + do + { + const T* block; + if (blk_blk[j]) + { + N base_idx = bm::get_block_start(i, j); + if (0 != (block = blk_blk[j])) + { + if (BM_IS_GAP(block)) + { + bm::for_each_gap_blk(BMGAP_PTR(block), base_idx, f); + } + else + { + bm::for_each_bit_blk(block, base_idx, f); + } + } + } + + if ((i == i_to) && (j == j_to)) + return; + } while (++j < bm::set_sub_array_size); + } + } // for i +} + + +/** + Implementation of for_each_bit_range without boilerplave checks + @internal +*/ +template +void for_each_bit_range_no_check(const BV& bv, + typename BV::size_type left, + typename BV::size_type right, + Func& bit_functor) +{ + typedef typename BV::size_type size_type; + typedef typename BV::block_idx_type block_idx_type; + + const typename BV::blocks_manager_type& bman = bv.get_blocks_manager(); + bm::word_t*** blk_root = bman.top_blocks_root(); + if (!blk_root) + return; + + block_idx_type nblock_left = (left >> bm::set_block_shift); + block_idx_type nblock_right = (right >> bm::set_block_shift); + + unsigned i0, j0; + bm::get_block_coord(nblock_left, i0, j0); + const bm::word_t* block = bman.get_block_ptr(i0, j0); + unsigned nbit_left = unsigned(left & bm::set_block_mask); + size_type offset = nblock_left * bm::bits_in_block; + + if (nblock_left == nblock_right) // hit in the same block + { + if (!block) + return; + unsigned nbit_right = unsigned(right & bm::set_block_mask); + if (BM_IS_GAP(block)) + { + bm::for_each_gap_blk_range(BMGAP_PTR(block), offset, + nbit_left, nbit_right, bit_functor); + } + else + { + bm::for_each_bit_blk(block, offset, nbit_left, nbit_right, + bit_functor); + } + return; + } + // process left block + if (nbit_left && block) + { + if (BM_IS_GAP(block)) + { + bm::for_each_gap_blk_range(BMGAP_PTR(block), offset, + nbit_left, bm::bits_in_block-1, bit_functor); + } + else + { + bm::for_each_bit_blk(block, offset, nbit_left, bm::bits_in_block-1, + bit_functor); + } + ++nblock_left; + } + + // process all complete blocks in the middle + { + block_idx_type top_blocks_size = bman.top_block_size(); + bm::for_each_bit_block_range(blk_root, top_blocks_size, + nblock_left, nblock_right-1, bit_functor); + } + + unsigned nbit_right = unsigned(right & bm::set_block_mask); + bm::get_block_coord(nblock_right, i0, j0); + block = bman.get_block_ptr(i0, j0); + + if (block) + { + offset = nblock_right * bm::bits_in_block; + if (BM_IS_GAP(block)) + { + bm::for_each_gap_blk_range(BMGAP_PTR(block), offset, + 0, nbit_right, bit_functor); + } + else + { + bm::for_each_bit_blk(block, offset, 0, nbit_right, bit_functor); + } + } +} + + } // namespace bm diff --git a/c++/include/util/bitset/bmalloc.h b/c++/include/util/bitset/bmalloc.h index b9921f27..73a4aacc 100644 --- a/c++/include/util/bitset/bmalloc.h +++ b/c++/include/util/bitset/bmalloc.h @@ -73,13 +73,10 @@ public: ptr = (bm::word_t*) ::_mm_malloc(n * sizeof(bm::word_t), BM_ALLOC_ALIGN); #endif #else - ptr = (bm::word_t*) ::malloc(n * sizeof(bm::word_t)); + ptr = (bm::word_t*) ::malloc(n * sizeof(bm::word_t)); #endif - if (!ptr) - { throw std::bad_alloc(); - } return ptr; } @@ -87,7 +84,7 @@ public: The member function frees storage for an array of n bm::word_t elements, by calling free. */ - static void deallocate(bm::word_t* p, size_t) + static void deallocate(bm::word_t* p, size_t) BMNOEXCEPT { #ifdef BM_ALLOC_ALIGN # ifdef _MSC_VER @@ -120,9 +117,7 @@ public: { void* ptr = ::malloc(n * sizeof(void*)); if (!ptr) - { throw std::bad_alloc(); - } return ptr; } @@ -130,7 +125,7 @@ public: The member function frees storage for an array of n bm::word_t elements, by calling free. */ - static void deallocate(void* p, size_t) + static void deallocate(void* p, size_t) BMNOEXCEPT { ::free(p); } @@ -147,7 +142,7 @@ public: n_pool_max_size = BM_DEFAULT_POOL_SIZE }; - pointer_pool_array() : size_(0) + pointer_pool_array() : pool_ptr_(0), size_(0) { allocate_pool(n_pool_max_size); } @@ -164,7 +159,7 @@ public: /// Push pointer to the pool (if it is not full) /// /// @return 0 if pointer is not accepted (pool is full) - unsigned push(void* ptr) + unsigned push(void* ptr) BMNOEXCEPT { if (size_ == n_pool_max_size - 1) return 0; @@ -174,21 +169,22 @@ public: /// Get a pointer if there are any vacant /// - void* pop() + void* pop() BMNOEXCEPT { - if (size_ == 0) + if (!size_) return 0; return pool_ptr_[--size_]; } private: void allocate_pool(size_t pool_size) { + BM_ASSERT(!pool_ptr_); pool_ptr_ = (void**)::malloc(sizeof(void*) * pool_size); if (!pool_ptr_) throw std::bad_alloc(); } - void free_pool() + void free_pool() BMNOEXCEPT { ::free(pool_ptr_); } @@ -218,21 +214,19 @@ public: bm::word_t* alloc_bit_block() { bm::word_t* ptr = (bm::word_t*)block_pool_.pop(); - if (ptr == 0) + if (!ptr) ptr = block_alloc_.allocate(bm::set_block_size, 0); return ptr; } - void free_bit_block(bm::word_t* block) + void free_bit_block(bm::word_t* block) BMNOEXCEPT { BM_ASSERT(IS_VALID_ADDR(block)); if (!block_pool_.push(block)) - { block_alloc_.deallocate(block, bm::set_block_size); - } } - void free_pools() + void free_pools() BMNOEXCEPT { bm::word_t* block; do @@ -267,19 +261,19 @@ public: public: - mem_alloc(const BA& block_alloc = BA(), const PA& ptr_alloc = PA()) + mem_alloc(const BA& block_alloc = BA(), const PA& ptr_alloc = PA()) BMNOEXCEPT : block_alloc_(block_alloc), ptr_alloc_(ptr_alloc), alloc_pool_p_(0) {} - mem_alloc(const mem_alloc& ma) + mem_alloc(const mem_alloc& ma) BMNOEXCEPT : block_alloc_(ma.block_alloc_), ptr_alloc_(ma.ptr_alloc_), alloc_pool_p_(0) // do not inherit pool (has to be explicitly defined) {} - mem_alloc& operator=(const mem_alloc& ma) + mem_alloc& operator=(const mem_alloc& ma) BMNOEXCEPT { block_alloc_ = ma.block_alloc_; ptr_alloc_ = ma.ptr_alloc_; @@ -289,26 +283,26 @@ public: /*! @brief Returns copy of the block allocator object */ - block_allocator_type get_block_allocator() const + block_allocator_type get_block_allocator() const BMNOEXCEPT { return BA(block_alloc_); } /*! @brief Returns copy of the ptr allocator object */ - ptr_allocator_type get_ptr_allocator() const + ptr_allocator_type get_ptr_allocator() const BMNOEXCEPT { return PA(block_alloc_); } /*! @brief set pointer to external pool */ - void set_pool(allocator_pool_type* pool) + void set_pool(allocator_pool_type* pool) BMNOEXCEPT { alloc_pool_p_ = pool; } /*! @brief get pointer to allocation pool (if set) */ - allocator_pool_type* get_pool() + allocator_pool_type* get_pool() BMNOEXCEPT { return alloc_pool_p_; } @@ -328,7 +322,7 @@ public: /*! @brief Frees bit block allocated by alloc_bit_block. */ - void free_bit_block(bm::word_t* block, unsigned alloc_factor = 1) + void free_bit_block(bm::word_t* block, unsigned alloc_factor = 1) BMNOEXCEPT { BM_ASSERT(IS_VALID_ADDR(block)); if (alloc_pool_p_ && alloc_factor == 1) @@ -377,7 +371,7 @@ public: /*! @brief Frees block of pointers. */ - void free_ptr(void* p, size_t size) + void free_ptr(void* p, size_t size) BMNOEXCEPT { if (p) ptr_alloc_.deallocate(p, size); @@ -427,7 +421,7 @@ void* aligned_new_malloc(size_t size) /// /// @internal inline -void aligned_free(void* ptr) +void aligned_free(void* ptr) BMNOEXCEPT { if (!ptr) return; diff --git a/c++/include/util/bitset/bmavx2.h b/c++/include/util/bitset/bmavx2.h index 04e66dd9..2e232589 100644 --- a/c++/include/util/bitset/bmavx2.h +++ b/c++/include/util/bitset/bmavx2.h @@ -222,6 +222,66 @@ bm::id_t avx2_bit_count(const __m256i* BMRESTRICT block, return (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]); } +/*! + @brief Calculate population count based on digest + + @return popcnt + @ingroup AVX2 +*/ +inline +bm::id_t avx2_bit_block_count(const bm::word_t* const block, + bm::id64_t digest) +{ + bm::id_t count = 0; + bm::id64_t* cnt64; + BM_AVX2_POPCNT_PROLOG; + __m256i cnt = _mm256_setzero_si256(); + while (digest) + { + bm::id64_t t = bm::bmi_blsi_u64(digest); // d & -d; + + unsigned wave = _mm_popcnt_u64(t - 1); + unsigned off = wave * bm::set_block_digest_wave_size; + + const __m256i* BMRESTRICT wave_src = (__m256i*)&block[off]; + + __m256i m1A, m1B, m1C, m1D; + m1A = _mm256_load_si256(wave_src); + m1B = _mm256_load_si256(wave_src+1); + if (!_mm256_testz_si256(m1A, m1A)) + { + BM_AVX2_BIT_COUNT(bc, m1A) + cnt = _mm256_add_epi64(cnt, bc); + } + if (!_mm256_testz_si256(m1B, m1B)) + { + BM_AVX2_BIT_COUNT(bc, m1B) + cnt = _mm256_add_epi64(cnt, bc); + } + + m1C = _mm256_load_si256(wave_src+2); + m1D = _mm256_load_si256(wave_src+3); + if (!_mm256_testz_si256(m1C, m1C)) + { + BM_AVX2_BIT_COUNT(bc, m1C) + cnt = _mm256_add_epi64(cnt, bc); + } + if (!_mm256_testz_si256(m1D, m1D)) + { + BM_AVX2_BIT_COUNT(bc, m1D) + cnt = _mm256_add_epi64(cnt, bc); + } + + digest = bm::bmi_bslr_u64(digest); // d &= d - 1; + } // while + cnt64 = (bm::id64_t*)&cnt; + count = (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]); + return count; + +} + + + /*! @brief AND bit count for two aligned bit-blocks @ingroup AVX2 @@ -1367,6 +1427,20 @@ bool avx2_is_all_one(const __m256i* BMRESTRICT block) return true; } +/*! + @brief check if wave of pointers is all 0xFFF + @ingroup AVX2 +*/ +BMFORCEINLINE +bool avx2_test_all_one_wave(const void* ptr) +{ + __m256i maskF = _mm256_set1_epi32(~0u); // braodcast 0xFF + __m256i wcmpA = _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)ptr), maskF); // (w0 == maskF) + unsigned maskA = unsigned(_mm256_movemask_epi8(wcmpA)); + return (maskA == ~0u); +} + + /*! @brief check if wave of pointers is all NULL @ingroup AVX2 @@ -2474,16 +2548,24 @@ int avx2_cmpge_u16(__m256i vect16, unsigned short value) } /** - hybrid binary search, starts as binary, then switches to scan - + Hybrid binary search, starts as binary, then switches to scan + NOTE: AVX code uses _mm256_subs_epu16 - saturated substraction which gives 0 if A-B=0 if A < B (not negative a value). - + + \param buf - GAP buffer pointer. + \param pos - index of the element. + \param is_set - output. GAP value (0 or 1). + \return GAP index. + @ingroup AVX2 */ inline -unsigned avx2_gap_test(const unsigned short* buf, unsigned pos) +unsigned avx2_gap_bfind(const unsigned short* BMRESTRICT buf, + unsigned pos, unsigned* BMRESTRICT is_set) { + BM_ASSERT(is_set); + const unsigned linear_cutoff = 48; const unsigned unroll_factor = 16; @@ -2500,8 +2582,9 @@ unsigned avx2_gap_test(const unsigned short* buf, unsigned pos) { if (buf[start] >= pos) { - res = ((*buf) & 1) ^ ((--start) & 1); - return res; + res = ((*buf) & 1) ^ ((start-1) & 1); + *is_set = res; + return start; } } // for BM_ASSERT(0); @@ -2516,7 +2599,7 @@ unsigned avx2_gap_test(const unsigned short* buf, unsigned pos) // but stay within allocated block memory // dsize = arr_end - start; - + __m256i mZ = _mm256_setzero_si256(); __m256i mPos = _mm256_set1_epi16((unsigned short)pos); __m256i vect16, mSub, mge_mask; @@ -2532,8 +2615,9 @@ unsigned avx2_gap_test(const unsigned short* buf, unsigned pos) { int lz = _tzcnt_u32(mask) / 2; start += lz; - res = ((*buf) & 1) ^ ((--start) & 1); - return res; + res = ((*buf) & 1) ^ ((start-1) & 1); + *is_set = res; + return start; } } // for k unsigned tail = unroll_factor - (end - start); @@ -2544,22 +2628,19 @@ unsigned avx2_gap_test(const unsigned short* buf, unsigned pos) mSub = _mm256_subs_epu16(mPos, vect16); mge_mask = _mm256_cmpeq_epi16(mSub, mZ); int mask = _mm256_movemask_epi8(mge_mask); - BM_ASSERT(mask); - // TODO: if should be not needed, cleanup - if (mask) - { - int lz = _tzcnt_u32(mask) / 2; - start += lz; - res = ((*buf) & 1) ^ ((--start) & 1); - return res; - } - start += unroll_factor; // remove with if when sure + BM_ASSERT(mask); // the rersult MUST be here at this point + + int lz = _tzcnt_u32(mask) / 2; + start += lz; + res = ((*buf) & 1) ^ ((start-1) & 1); + *is_set = res; + return start; } for (; start < end; ++start) { if (buf[start] >= pos) break; - } + } // for break; } unsigned curr = (start + end) >> 1; @@ -2568,8 +2649,22 @@ unsigned avx2_gap_test(const unsigned short* buf, unsigned pos) else end = curr; } // while - res = ((*buf) & 1) ^ ((--start) & 1); - return res; + res = ((*buf) & 1) ^ ((start-1) & 1); + *is_set = res; + return start; +} + + +/** + Hybrid binary search, starts as binary, then switches to scan + @ingroup AVX2 +*/ +inline +unsigned avx2_gap_test(const unsigned short* BMRESTRICT buf, unsigned pos) +{ + unsigned is_set; + bm::avx2_gap_bfind(buf, pos, &is_set); + return is_set; } /** @@ -3024,6 +3119,13 @@ void avx2_bit_block_xor(bm::word_t* target_block, #define VECT_BIT_BLOCK_XOR(t, src, src_xor, d) \ avx2_bit_block_xor(t, src, src_xor, d) +#define VECT_GAP_BFIND(buf, pos, is_set) \ + avx2_gap_bfind(buf, pos, is_set) + +#define VECT_BIT_COUNT_DIGEST(blk, d) \ + avx2_bit_block_count(blk, d) + + } // namespace diff --git a/c++/include/util/bitset/bmblocks.h b/c++/include/util/bitset/bmblocks.h index 3e4c4c61..857d8e9b 100644 --- a/c++/include/util/bitset/bmblocks.h +++ b/c++/include/util/bitset/bmblocks.h @@ -59,10 +59,10 @@ public: public: typedef id_type size_type; - bm_func_base(blocks_manager& bman) : bm_(bman) {} + bm_func_base(blocks_manager& bman) BMNOEXCEPT : bm_(bman) {} - void on_empty_top(unsigned /* top_block_idx*/ ) {} - void on_empty_block(block_idx_type /* block_idx*/ ) {} + void on_empty_top(unsigned /* top_block_idx*/ ) BMNOEXCEPT {} + void on_empty_block(block_idx_type /* block_idx*/ )BMNOEXCEPT {} private: bm_func_base(const bm_func_base&); bm_func_base& operator=(const bm_func_base&); @@ -76,13 +76,13 @@ public: { public: typedef id_type size_type; - bm_func_base_const(const blocks_manager& bman) : bm_(bman) {} + bm_func_base_const(const blocks_manager& bman) BMNOEXCEPT : bm_(bman) {} - void on_empty_top(unsigned /* top_block_idx*/ ) {} - void on_empty_block(block_idx_type /* block_idx*/ ) {} + void on_empty_top(unsigned /* top_block_idx*/ ) BMNOEXCEPT {} + void on_empty_block(block_idx_type /* block_idx*/ ) BMNOEXCEPT {} private: - bm_func_base_const(const bm_func_base_const&); - bm_func_base_const& operator=(const bm_func_base_const&); + bm_func_base_const(const bm_func_base_const&) BMNOEXCEPT; + bm_func_base_const& operator=(const bm_func_base_const&) BMNOEXCEPT; protected: const blocks_manager& bm_; }; @@ -92,10 +92,10 @@ public: class block_count_base : public bm_func_base_const { protected: - block_count_base(const blocks_manager& bm) + block_count_base(const blocks_manager& bm) BMNOEXCEPT : bm_func_base_const(bm) {} - bm::id_t block_count(const bm::word_t* block) const + bm::id_t block_count(const bm::word_t* block) const BMNOEXCEPT { return this->bm_.block_bitcount(block); } @@ -108,17 +108,17 @@ public: public: typedef id_type size_type; - block_count_func(const blocks_manager& bm) + block_count_func(const blocks_manager& bm) BMNOEXCEPT : block_count_base(bm), count_(0) {} - id_type count() const { return count_; } + id_type count() const BMNOEXCEPT { return count_; } - void operator()(const bm::word_t* block) + void operator()(const bm::word_t* block) BMNOEXCEPT { count_ += this->block_count(block); } - void add_full(id_type c) { count_ += c; } - void reset() { count_ = 0; } + void add_full(id_type c) BMNOEXCEPT { count_ += c; } + void reset() BMNOEXCEPT { count_ = 0; } private: id_type count_; @@ -131,24 +131,22 @@ public: public: typedef id_type size_type; - block_count_arr_func(const blocks_manager& bm, unsigned* arr) + block_count_arr_func(const blocks_manager& bm, unsigned* arr) BMNOEXCEPT : block_count_base(bm), arr_(arr), last_idx_(0) { arr_[0] = 0; } - void operator()(const bm::word_t* block, id_type idx) + void operator()(const bm::word_t* block, id_type idx) BMNOEXCEPT { while (++last_idx_ < idx) - { arr_[last_idx_] = 0; - } arr_[idx] = this->block_count(block); last_idx_ = idx; } - id_type last_block() const { return last_idx_; } - void on_non_empty_top(unsigned) {} + id_type last_block() const BMNOEXCEPT { return last_idx_; } + void on_non_empty_top(unsigned) BMNOEXCEPT {} private: unsigned* arr_; @@ -161,13 +159,14 @@ public: public: typedef id_type size_type; - block_count_change_func(const blocks_manager& bm) + block_count_change_func(const blocks_manager& bm) BMNOEXCEPT : bm_func_base_const(bm), count_(0), prev_block_border_bit_(0) {} - block_idx_type block_count(const bm::word_t* block, block_idx_type idx) + block_idx_type block_count(const bm::word_t* block, + block_idx_type idx) BMNOEXCEPT { block_idx_type cnt = 0; id_type first_bit; @@ -187,7 +186,7 @@ public: if (BM_IS_GAP(block)) { gap_word_t* gap_block = BMGAP_PTR(block); - cnt = gap_length(gap_block) - 1; + cnt = bm::gap_length(gap_block) - 1; if (idx) { first_bit = bm::gap_test_unr(gap_block, 0); @@ -213,9 +212,9 @@ public: return cnt; } - id_type count() const { return count_; } + id_type count() const BMNOEXCEPT { return count_; } - void operator()(const bm::word_t* block, block_idx_type idx) + void operator()(const bm::word_t* block, block_idx_type idx) BMNOEXCEPT { count_ += block_count(block, idx); } @@ -232,11 +231,12 @@ public: public: typedef id_type size_type; - block_any_func(const blocks_manager& bm) + block_any_func(const blocks_manager& bm) BMNOEXCEPT : bm_func_base_const(bm) {} - bool operator()(const bm::word_t* block, block_idx_type /*idx*/) + bool operator() + (const bm::word_t* block, block_idx_type /*idx*/) BMNOEXCEPT { if (BM_IS_GAP(block)) // gap block return (!gap_is_all_zero(BMGAP_PTR(block))); @@ -250,9 +250,9 @@ public: class gap_level_func : public bm_func_base { public: - gap_level_func(blocks_manager& bm, const gap_word_t* glevel_len) - : bm_func_base(bm), - glevel_len_(glevel_len) + gap_level_func(blocks_manager& bm, + const gap_word_t* glevel_len) BMNOEXCEPT + : bm_func_base(bm), glevel_len_(glevel_len) { BM_ASSERT(glevel_len); } @@ -282,19 +282,18 @@ public: return; } - unsigned len = gap_length(gap_blk); - int level = gap_calc_level(len, glevel_len_); + unsigned len = bm::gap_length(gap_blk); + int level = bm::gap_calc_level(len, glevel_len_); if (level == -1) { - bm::word_t* blk = - bman.get_allocator().alloc_bit_block(); + bm::word_t* blk = bman.get_allocator().alloc_bit_block(); bman.set_block_ptr(idx, blk); bm::gap_convert_to_bitset(blk, gap_blk); } else { gap_word_t* gap_blk_new = - bman.allocate_gap_block(unsigned(level), gap_blk, glevel_len_); + bman.allocate_gap_block(unsigned(level), gap_blk, glevel_len_); bm::word_t* p = (bm::word_t*) gap_blk_new; BMSET_PTRGAP(p); @@ -312,7 +311,7 @@ public: class block_one_func : public bm_func_base { public: - block_one_func(blocks_manager& bm) : bm_func_base(bm) {} + block_one_func(blocks_manager& bm) BMNOEXCEPT : bm_func_base(bm) {} void operator()(bm::word_t* block, block_idx_type idx) { @@ -357,7 +356,7 @@ public: } #ifndef BM_NO_CXX11 - blocks_manager(blocks_manager&& blockman) BMNOEXEPT + blocks_manager(blocks_manager&& blockman) BMNOEXCEPT : max_bits_(blockman.max_bits_), top_blocks_(0), top_block_size_(blockman.top_block_size_), @@ -369,7 +368,7 @@ public: } #endif - ~blocks_manager() BMNOEXEPT + ~blocks_manager() BMNOEXCEPT { if (temp_block_) alloc_.free_bit_block(temp_block_); @@ -379,7 +378,7 @@ public: /*! \brief Swaps content \param bm another blocks manager */ - void swap(blocks_manager& bm) BMNOEXEPT + void swap(blocks_manager& bm) BMNOEXCEPT { BM_ASSERT(this != &bm); @@ -399,7 +398,7 @@ public: /*! \brief implementation of moving semantics */ - void move_from(blocks_manager& bm) BMNOEXEPT + void move_from(blocks_manager& bm) BMNOEXCEPT { deinit_tree(); swap(bm); @@ -412,9 +411,9 @@ public: } - void free_ptr(bm::word_t** ptr) + void free_ptr(bm::word_t** ptr) BMNOEXCEPT { - if (ptr) alloc_.free_ptr(ptr); + alloc_.free_ptr(ptr); } /** @@ -422,7 +421,7 @@ public: \param bits_to_store - supposed capacity (number of bits) \return size of the top level block */ - unsigned compute_top_block_size(id_type bits_to_store) + unsigned compute_top_block_size(id_type bits_to_store) const BMNOEXCEPT { if (bits_to_store >= bm::id_max) // working in full-range mode return bm::set_top_array_size; @@ -456,7 +455,8 @@ public: \param no_more_blocks - 1 if there are no more blocks at all \return block adress or NULL if not yet allocated */ - bm::word_t* get_block(block_idx_type nb, int* no_more_blocks) const + const bm::word_t* + get_block(block_idx_type nb, int* no_more_blocks) const BMNOEXCEPT { BM_ASSERT(top_blocks_); unsigned i = unsigned(nb >> bm::set_array_shift); @@ -489,7 +489,7 @@ public: @return bm::set_total_blocks - no more blocks */ block_idx_type - find_next_nz_block(block_idx_type nb, bool deep_scan = true) const + find_next_nz_block(block_idx_type nb, bool deep_scan=true) const BMNOEXCEPT { if (is_init()) { @@ -521,7 +521,7 @@ public: \param j - second level block index \return block adress or NULL if not yet allocated */ - const bm::word_t* get_block(unsigned i, unsigned j) const + const bm::word_t* get_block(unsigned i, unsigned j) const BMNOEXCEPT { if (!top_blocks_ || i >= top_block_size_) return 0; const bm::word_t* const* blk_blk = top_blocks_[i]; @@ -537,7 +537,7 @@ public: \param j - second level block index \return block adress or NULL if not yet allocated */ - const bm::word_t* get_block_ptr(unsigned i, unsigned j) const + const bm::word_t* get_block_ptr(unsigned i, unsigned j) const BMNOEXCEPT { if (!top_blocks_ || i >= top_block_size_) return 0; @@ -553,9 +553,10 @@ public: \param j - second level block index \return block adress or NULL if not yet allocated */ - bm::word_t* get_block_ptr(unsigned i, unsigned j) + bm::word_t* get_block_ptr(unsigned i, unsigned j) BMNOEXCEPT { - if (!top_blocks_ || i >= top_block_size_) return 0; + if (!top_blocks_ || i >= top_block_size_) + return 0; bm::word_t* const* blk_blk = top_blocks_[i]; if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) return FULL_BLOCK_FAKE_ADDR; @@ -569,7 +570,7 @@ public: \param i - top level block index \return block adress or NULL if not yet allocated */ - const bm::word_t* const * get_topblock(unsigned i) const + const bm::word_t* const * get_topblock(unsigned i) const BMNOEXCEPT { return (!top_blocks_ || i >= top_block_size_) ? 0 : top_blocks_[i]; } @@ -577,7 +578,7 @@ public: /** \brief Returns root block in the tree. */ - bm::word_t*** top_blocks_root() const + bm::word_t*** top_blocks_root() const BMNOEXCEPT { blocks_manager* bm = const_cast(this); @@ -837,7 +838,7 @@ public: { gap_res = true; new_block = (bm::word_t*) - get_allocator().alloc_gap_block(unsigned(new_level), glen()); + get_allocator().alloc_gap_block(unsigned(new_level), glen()); ::memcpy(new_block, gap_block, len * sizeof(bm::gap_word_t)); bm::set_gap_level(new_block, new_level); } @@ -1124,7 +1125,7 @@ public: /*! @brief Fills all blocks with 0. @param free_mem - if true function frees the resources (obsolete) */ - void set_all_zero(bool /*free_mem*/) + void set_all_zero(bool /*free_mem*/) BMNOEXCEPT { if (!is_init()) return; deinit_tree(); // TODO: optimization of top-level realloc @@ -1141,7 +1142,7 @@ public: bm::set_sub_array_size, func); } - void free_top_subblock(unsigned nblk_blk) + void free_top_subblock(unsigned nblk_blk) BMNOEXCEPT { BM_ASSERT(top_blocks_[nblk_blk]); if ((bm::word_t*)top_blocks_[nblk_blk] != FULL_BLOCK_FAKE_ADDR) @@ -1452,7 +1453,7 @@ public: Places new block into blocks table. */ BMFORCEINLINE - void set_block_ptr(unsigned i, unsigned j, bm::word_t* block) + void set_block_ptr(unsigned i, unsigned j, bm::word_t* block) BMNOEXCEPT { BM_ASSERT(is_init()); BM_ASSERT(i < top_block_size_); @@ -1597,7 +1598,7 @@ public: /** Free block, make it zero pointer in the tree */ - void zero_gap_block_ptr(unsigned i, unsigned j) + void zero_gap_block_ptr(unsigned i, unsigned j) BMNOEXCEPT { BM_ASSERT(top_blocks_ && i < top_block_size_); @@ -1616,19 +1617,15 @@ public: Count number of bits ON in the block */ static - bm::id_t block_bitcount(const bm::word_t* block) + bm::id_t block_bitcount(const bm::word_t* block) BMNOEXCEPT { BM_ASSERT(block); id_t count; if (BM_IS_GAP(block)) - { count = bm::gap_bit_count_unr(BMGAP_PTR(block)); - } else // bitset - { count = (IS_FULL_BLOCK(block)) ? bm::bits_in_block : bm::bit_block_count(block); - } return count; } @@ -1678,7 +1675,7 @@ public: } /*! deallocate temp block */ - void free_temp_block() + void free_temp_block() BMNOEXCEPT { if (temp_block_) { @@ -1686,6 +1683,7 @@ public: temp_block_ = 0; } } + /*! Detach and return temp block. if temp block is NULL allocates a bit-block caller is responsible for returning @@ -1705,7 +1703,7 @@ public: /*! Return temp block if temp block already exists - block gets deallocated */ - void return_tempblock(bm::word_t* block) + void return_tempblock(bm::word_t* block) BMNOEXCEPT { BM_ASSERT(block != temp_block_); BM_ASSERT(IS_VALID_ADDR(block)); @@ -1717,7 +1715,7 @@ public: } /*! Assigns new GAP lengths vector */ - void set_glen(const gap_word_t* glevel_len) + void set_glen(const gap_word_t* glevel_len) BMNOEXCEPT { ::memcpy(glevel_len_, glevel_len, sizeof(glevel_len_)); } @@ -1745,7 +1743,7 @@ public: /** Returns true if second level block pointer is 0. */ - bool is_subblock_null(unsigned nsub) const + bool is_subblock_null(unsigned nsub) const BMNOEXCEPT { BM_ASSERT(top_blocks_); if (nsub >= top_block_size_) @@ -1753,14 +1751,14 @@ public: return top_blocks_[nsub] == NULL; } - bm::word_t*** top_blocks_root() + bm::word_t*** top_blocks_root() BMNOEXCEPT { return top_blocks_; } /*! \brief Returns current GAP level vector */ - const gap_word_t* glen() const + const gap_word_t* glen() const BMNOEXCEPT { return glevel_len_; } @@ -1768,14 +1766,14 @@ public: /*! \brief Returns GAP level length for specified level \param level - level number */ - unsigned glen(unsigned level) const + unsigned glen(unsigned level) const BMNOEXCEPT { return glevel_len_[level]; } /*! \brief Returns size of the top block array in the tree */ - unsigned top_block_size() const + unsigned top_block_size() const BMNOEXCEPT { return top_block_size_; } @@ -1829,21 +1827,20 @@ public: /** \brief Returns reference on the allocator */ - allocator_type& get_allocator() { return alloc_; } + allocator_type& get_allocator() BMNOEXCEPT { return alloc_; } /** \brief Returns allocator */ - allocator_type get_allocator() const { return alloc_; } + allocator_type get_allocator() const BMNOEXCEPT { return alloc_; } /// if tree of blocks already up - bool is_init() const { return top_blocks_ != 0; } + bool is_init() const BMNOEXCEPT { return top_blocks_ != 0; } /// allocate first level of descr. of blocks void init_tree() { BM_ASSERT(top_blocks_ == 0); - if (top_block_size_) { top_blocks_ = (bm::word_t***) alloc_.alloc_ptr(top_block_size_); @@ -1865,7 +1862,7 @@ public: alloc_.free_bit_block(blk); \ } - void deallocate_top_subblock(unsigned nblk_blk) + void deallocate_top_subblock(unsigned nblk_blk) BMNOEXCEPT { if (!top_blocks_[nblk_blk]) return; @@ -1907,7 +1904,7 @@ public: /** destroy tree, free memory in all blocks and control structures Note: pointers are NOT assigned to zero(!) */ - void destroy_tree() BMNOEXEPT + void destroy_tree() BMNOEXCEPT { if (!top_blocks_) return; @@ -1937,7 +1934,7 @@ public: } #undef BM_FREE_OP - void deinit_tree() BMNOEXEPT + void deinit_tree() BMNOEXCEPT { destroy_tree(); top_blocks_ = 0; top_block_size_ = 0; @@ -1946,7 +1943,7 @@ public: // ---------------------------------------------------------------- /// calculate top blocks which are not NULL and not FULL - unsigned find_real_top_blocks() const + unsigned find_real_top_blocks() const BMNOEXCEPT { unsigned cnt = 0; unsigned top_blocks = top_block_size(); @@ -1964,7 +1961,7 @@ public: // ---------------------------------------------------------------- /// calculate max top blocks size whithout NULL-tail - unsigned find_max_top_blocks() const + unsigned find_max_top_blocks() const BMNOEXCEPT { unsigned top_blocks = top_block_size(); if (!top_blocks) @@ -1981,11 +1978,11 @@ public: // ---------------------------------------------------------------- - void validate_top_zero(unsigned i) + void validate_top_zero(unsigned i) BMNOEXCEPT { BM_ASSERT(i < top_block_size()); bm::word_t** blk_blk = top_blocks_[i]; - // TODO: SIMD + // TODO: SIMD or unroll for (unsigned j = 0; j < bm::set_sub_array_size; ++j) { if (blk_blk[j]) @@ -1997,7 +1994,7 @@ public: // ---------------------------------------------------------------- - void validate_top_full(unsigned i) + void validate_top_full(unsigned i) BMNOEXCEPT { BM_ASSERT(i < top_block_size()); bm::word_t** blk_blk = top_blocks_[i]; @@ -2015,7 +2012,7 @@ public: Calculate approximate memory needed to serialize big runs of 0000s and 111s (as blocks) */ - size_t calc_serialization_null_full() const + size_t calc_serialization_null_full() const BMNOEXCEPT { size_t s_size = sizeof(unsigned); if (!top_blocks_) @@ -2041,6 +2038,9 @@ public: } nb_empty += (i - nb_prev) * bm::set_sub_array_size; blk_blk = top_blocks_[i]; + BM_ASSERT(blk_blk); + if (!blk_blk) + break; } if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) { @@ -2357,7 +2357,7 @@ template class bit_block_guard { public: - bit_block_guard(BlocksManager& bman, bm::word_t* blk=0) + bit_block_guard(BlocksManager& bman, bm::word_t* blk=0) BMNOEXCEPT : bman_(bman), block_(blk) {} @@ -2366,18 +2366,20 @@ public: if (IS_VALID_ADDR(block_)) bman_.get_allocator().free_bit_block(block_, 3); } - void attach(bm::word_t* blk) + + void attach(bm::word_t* blk) BMNOEXCEPT { if (IS_VALID_ADDR(block_)) bman_.get_allocator().free_bit_block(block_); block_ = blk; } + bm::word_t* allocate() { attach(bman_.get_allocator().alloc_bit_block(3)); return block_; } - bm::word_t* get() { return block_; } + bm::word_t* get() BMNOEXCEPT { return block_; } private: bit_block_guard(const bit_block_guard&); diff --git a/c++/include/util/bitset/bmbmatrix.h b/c++/include/util/bitset/bmbmatrix.h index b8b0ed88..06908a80 100644 --- a/c++/include/util/bitset/bmbmatrix.h +++ b/c++/include/util/bitset/bmbmatrix.h @@ -72,7 +72,7 @@ public: allocation_policy_type ap = allocation_policy_type(), size_type bv_max_size = bm::id_max, const allocator_type& alloc = allocator_type()); - ~basic_bmatrix() BMNOEXEPT; + ~basic_bmatrix() BMNOEXCEPT; /*! copy-ctor */ basic_bmatrix(const basic_bmatrix& bbm); @@ -84,10 +84,10 @@ public: #ifndef BM_NO_CXX11 /*! move-ctor */ - basic_bmatrix(basic_bmatrix&& bbm) BMNOEXEPT; + basic_bmatrix(basic_bmatrix&& bbm) BMNOEXCEPT; /*! move assignmment operator */ - basic_bmatrix& operator = (basic_bmatrix&& bbm) BMNOEXEPT + basic_bmatrix& operator = (basic_bmatrix&& bbm) BMNOEXCEPT { if (this != &bbm) { @@ -98,7 +98,8 @@ public: } #endif - void set_allocator_pool(allocator_pool_type* pool_ptr) { pool_ = pool_ptr; } + void set_allocator_pool(allocator_pool_type* pool_ptr) BMNOEXCEPT + { pool_ = pool_ptr; } ///@} @@ -107,7 +108,7 @@ public: ///@{ /*! Swap content */ - void swap(basic_bmatrix& bbm) BMNOEXEPT; + void swap(basic_bmatrix& bbm) BMNOEXCEPT; /*! Copy content */ void copy_from(const basic_bmatrix& bbm); @@ -118,17 +119,17 @@ public: /*! @name row access */ ///@{ - /*! Get row bit-vector */ - const bvector_type* row(size_type i) const; + /*! Get row bit-vector. Can return NULL */ + const bvector_type* row(size_type i) const BMNOEXCEPT; - /*! Get row bit-vector */ - bvector_type_const_ptr get_row(size_type i) const; + /*! Get row bit-vector. Can return NULL */ + bvector_type_const_ptr get_row(size_type i) const BMNOEXCEPT; - /*! Get row bit-vector */ - bvector_type* get_row(size_type i); + /*! Get row bit-vector. Can return NULL */ + bvector_type* get_row(size_type i) BMNOEXCEPT; /*! get number of value rows */ - size_type rows() const { return rsize_; } + size_type rows() const BMNOEXCEPT { return rsize_; } /*! Make sure row is constructed, return bit-vector */ bvector_type_ptr construct_row(size_type row); @@ -168,7 +169,7 @@ public: @param pos - column position in the matrix @param octet_idx - octet based row position (1 octet - 8 rows) */ - unsigned char get_octet(size_type pos, size_type octet_idx) const; + unsigned char get_octet(size_type pos, size_type octet_idx) const BMNOEXCEPT; /*! Compare vector[pos] with octet @@ -183,7 +184,7 @@ public: @return 0 - equal, -1 - less(vect[pos] < octet), 1 - greater */ int compare_octet(size_type pos, - size_type octet_idx, char octet) const; + size_type octet_idx, char octet) const BMNOEXCEPT; ///@} @@ -194,12 +195,13 @@ public: ///@{ /// Test if 4 rows from i are not NULL - bool test_4rows(unsigned i) const; + bool test_4rows(unsigned i) const BMNOEXCEPT; /// Get low level internal access to - const bm::word_t* get_block(size_type p, unsigned i, unsigned j) const; + const bm::word_t* get_block(size_type p, + unsigned i, unsigned j) const BMNOEXCEPT; - unsigned get_half_octet(size_type pos, size_type row_idx) const; + unsigned get_half_octet(size_type pos, size_type row_idx) const BMNOEXCEPT; /*! \brief run memory optimization for all bit-vector rows @@ -222,7 +224,7 @@ public: protected: void allocate_rows(size_type rsize); - void free_rows() BMNOEXEPT; + void free_rows() BMNOEXCEPT; bvector_type* construct_bvector(const bvector_type* bv) const; void destruct_bvector(bvector_type* bv) const; @@ -286,7 +288,7 @@ public: #ifndef BM_NO_CXX11 /*! move-ctor */ - base_sparse_vector(base_sparse_vector&& bsv) BMNOEXEPT + base_sparse_vector(base_sparse_vector&& bsv) BMNOEXCEPT { bmatr_.swap(bsv.bmatr_); size_ = bsv.size_; @@ -295,19 +297,19 @@ public: } #endif - void swap(base_sparse_vector& bsv) BMNOEXEPT; + void swap(base_sparse_vector& bsv) BMNOEXCEPT; - size_type size() const { return size_; } + size_type size() const BMNOEXCEPT { return size_; } void resize(size_type new_size); void clear_range(size_type left, size_type right, bool set_null); /*! \brief resize to zero, free memory */ - void clear() BMNOEXEPT; + void clear() BMNOEXCEPT; /*! return true if empty */ - bool empty() const { return size() == 0; } + bool empty() const BMNOEXCEPT { return size() == 0; } public: @@ -317,13 +319,14 @@ public: /** \brief check if container supports NULL(unassigned) values */ - bool is_nullable() const { return bmatr_.get_row(this->null_plain()) != 0; } + bool is_nullable() const BMNOEXCEPT + { return bmatr_.get_row(this->null_plain()) != 0; } /** \brief Get bit-vector of assigned values or NULL (if not constructed that way) */ - const bvector_type* get_null_bvector() const + const bvector_type* get_null_bvector() const BMNOEXCEPT { return bmatr_.get_row(this->null_plain()); } /** \brief test if specified element is NULL @@ -331,7 +334,7 @@ public: \return true if it is NULL false if it was assigned or container is not configured to support assignment flags */ - bool is_null(size_type idx) const; + bool is_null(size_type idx) const BMNOEXCEPT; ///@} @@ -352,25 +355,27 @@ public: \return bit-vector for the bit plain or NULL */ bvector_type_const_ptr - get_plain(unsigned i) const { return bmatr_.row(i); } + get_plain(unsigned i) const BMNOEXCEPT { return bmatr_.row(i); } /*! \brief get total number of bit-plains in the vector */ - static unsigned plains() { return value_bits(); } + static unsigned plains() BMNOEXCEPT { return value_bits(); } /** Number of stored bit-plains (value plains + extra */ - static unsigned stored_plains() { return value_bits()+1; } + static unsigned stored_plains() BMNOEXCEPT { return value_bits()+1; } /** Number of effective bit-plains in the value type */ - unsigned effective_plains() const { return effective_plains_ + 1; } + unsigned effective_plains() const BMNOEXCEPT + { return effective_plains_ + 1; } /*! \brief get access to bit-plain as is (can return NULL) */ - bvector_type_ptr plain(unsigned i) { return bmatr_.get_row(i); } - const bvector_type_ptr plain(unsigned i) const { return bmatr_.get_row(i); } + bvector_type_ptr plain(unsigned i) BMNOEXCEPT { return bmatr_.get_row(i); } + bvector_type_const_ptr plain(unsigned i) const BMNOEXCEPT + { return bmatr_.get_row(i); } bvector_type* get_null_bvect() { return bmatr_.get_row(this->null_plain());} @@ -388,12 +393,12 @@ public: @return 64-bit mask @internal */ - bm::id64_t get_plains_mask(unsigned element_idx) const; + bm::id64_t get_plains_mask(unsigned element_idx) const BMNOEXCEPT; /*! get read-only access to inetrnal bit-matrix */ - const bmatrix_type& get_bmatrix() const { return bmatr_; } + const bmatrix_type& get_bmatrix() const BMNOEXCEPT { return bmatr_; } ///@} /*! @@ -417,7 +422,7 @@ public: @sa statistics */ - void calc_stat(typename bvector_type::statistics* st) const; + void calc_stat(typename bvector_type::statistics* st) const BMNOEXCEPT; /*! \brief check if another sparse vector has the same content and size @@ -429,7 +434,7 @@ public: \return true, if it is the same */ bool equal(const base_sparse_vector& sv, - bm::null_support null_able = bm::use_null) const; + bm::null_support null_able = bm::use_null) const BMNOEXCEPT; protected: void copy_from(const base_sparse_vector& bsv); @@ -463,13 +468,13 @@ protected: typedef typename bvector_type::block_idx_type block_idx_type; /** Number of total bit-plains in the value type*/ - static unsigned value_bits() + static unsigned value_bits() BMNOEXCEPT { return base_sparse_vector::sv_value_plains; } /** plain index for the "NOT NULL" flags plain */ - static unsigned null_plain() { return value_bits(); } + static unsigned null_plain() BMNOEXCEPT { return value_bits(); } /** optimize block in all matrix plains */ void optimize_block(block_idx_type nb) @@ -515,7 +520,7 @@ basic_bmatrix::basic_bmatrix(size_type rsize, //--------------------------------------------------------------------- template -basic_bmatrix::~basic_bmatrix() BMNOEXEPT +basic_bmatrix::~basic_bmatrix() BMNOEXCEPT { free_rows(); } @@ -537,7 +542,7 @@ basic_bmatrix::basic_bmatrix(const basic_bmatrix& bbm) //--------------------------------------------------------------------- template -basic_bmatrix::basic_bmatrix(basic_bmatrix&& bbm) BMNOEXEPT +basic_bmatrix::basic_bmatrix(basic_bmatrix&& bbm) BMNOEXCEPT : bv_size_(bbm.bv_size_), alloc_(bbm.alloc_), ap_(bbm.ap_), @@ -552,7 +557,7 @@ basic_bmatrix::basic_bmatrix(basic_bmatrix&& bbm) BMNOEXEPT template const typename basic_bmatrix::bvector_type* -basic_bmatrix::row(size_type i) const +basic_bmatrix::row(size_type i) const BMNOEXCEPT { BM_ASSERT(i < rsize_); return bv_rows_[i]; @@ -562,7 +567,7 @@ basic_bmatrix::row(size_type i) const template const typename basic_bmatrix::bvector_type* -basic_bmatrix::get_row(size_type i) const +basic_bmatrix::get_row(size_type i) const BMNOEXCEPT { BM_ASSERT(i < rsize_); return bv_rows_[i]; @@ -572,7 +577,7 @@ basic_bmatrix::get_row(size_type i) const template typename basic_bmatrix::bvector_type* -basic_bmatrix::get_row(size_type i) +basic_bmatrix::get_row(size_type i) BMNOEXCEPT { BM_ASSERT(i < rsize_); return bv_rows_[i]; @@ -581,7 +586,7 @@ basic_bmatrix::get_row(size_type i) //--------------------------------------------------------------------- template -bool basic_bmatrix::test_4rows(unsigned j) const +bool basic_bmatrix::test_4rows(unsigned j) const BMNOEXCEPT { BM_ASSERT((j + 4) <= rsize_); #if defined(BM64_SSE4) @@ -593,7 +598,8 @@ bool basic_bmatrix::test_4rows(unsigned j) const __m256i w0 = _mm256_loadu_si256((__m256i*)(bv_rows_ + j)); return !_mm256_testz_si256(w0, w0); #else - bool b = bv_rows_[j + 0] || bv_rows_[j + 1] || bv_rows_[j + 2] || bv_rows_[j + 3]; + bool b = bv_rows_[j + 0] || bv_rows_[j + 1] || + bv_rows_[j + 2] || bv_rows_[j + 3]; return b; #endif } @@ -655,7 +661,7 @@ void basic_bmatrix::allocate_rows(size_type rsize) //--------------------------------------------------------------------- template -void basic_bmatrix::free_rows() BMNOEXEPT +void basic_bmatrix::free_rows() BMNOEXCEPT { for (size_type i = 0; i < rsize_; ++i) { @@ -676,7 +682,7 @@ void basic_bmatrix::free_rows() BMNOEXEPT //--------------------------------------------------------------------- template -void basic_bmatrix::swap(basic_bmatrix& bbm) BMNOEXEPT +void basic_bmatrix::swap(basic_bmatrix& bbm) BMNOEXCEPT { if (this == &bbm) return; @@ -795,12 +801,14 @@ void basic_bmatrix::destruct_bvector(bvector_type* bv) const template const bm::word_t* -basic_bmatrix::get_block(size_type p, unsigned i, unsigned j) const +basic_bmatrix::get_block(size_type p, + unsigned i, unsigned j) const BMNOEXCEPT { bvector_type_const_ptr bv = this->row(p); if (bv) { - const typename bvector_type::blocks_manager_type& bman = bv->get_blocks_manager(); + const typename bvector_type::blocks_manager_type& bman = + bv->get_blocks_manager(); return bman.get_block_ptr(i, j); } return 0; @@ -902,7 +910,7 @@ void basic_bmatrix::insert_octet(size_type pos, template unsigned char -basic_bmatrix::get_octet(size_type pos, size_type octet_idx) const +basic_bmatrix::get_octet(size_type pos, size_type octet_idx) const BMNOEXCEPT { unsigned v = 0; @@ -1003,7 +1011,7 @@ basic_bmatrix::get_octet(size_type pos, size_type octet_idx) const template int basic_bmatrix::compare_octet(size_type pos, size_type octet_idx, - char octet) const + char octet) const BMNOEXCEPT { char value = char(get_octet(pos, octet_idx)); return (value > octet) - (value < octet); @@ -1013,7 +1021,7 @@ int basic_bmatrix::compare_octet(size_type pos, template unsigned -basic_bmatrix::get_half_octet(size_type pos, size_type row_idx) const +basic_bmatrix::get_half_octet(size_type pos, size_type row_idx) const BMNOEXCEPT { unsigned v = 0; @@ -1198,7 +1206,7 @@ void base_sparse_vector::copy_from( template void base_sparse_vector::swap( - base_sparse_vector& bsv) BMNOEXEPT + base_sparse_vector& bsv) BMNOEXCEPT { if (this != &bsv) { @@ -1212,7 +1220,7 @@ void base_sparse_vector::swap( //--------------------------------------------------------------------- template -void base_sparse_vector::clear() BMNOEXEPT +void base_sparse_vector::clear() BMNOEXCEPT { unsigned plains = value_bits(); for (size_type i = 0; i < plains; ++i) @@ -1274,7 +1282,8 @@ void base_sparse_vector::resize(size_type sz) //--------------------------------------------------------------------- template -bool base_sparse_vector::is_null(size_type idx) const +bool base_sparse_vector::is_null( + size_type idx) const BMNOEXCEPT { const bvector_type* bv_null = get_null_bvector(); return (bv_null) ? (!bv_null->test(idx)) : false; @@ -1312,7 +1321,7 @@ typename base_sparse_vector::bvector_type_ptr template bm::id64_t base_sparse_vector::get_plains_mask( - unsigned element_idx) const + unsigned element_idx) const BMNOEXCEPT { BM_ASSERT(element_idx < MAX_SIZE); bm::id64_t mask = 0; @@ -1364,7 +1373,7 @@ void base_sparse_vector::optimize(bm::word_t* temp_block, template void base_sparse_vector::calc_stat( - typename bvector_type::statistics* st) const + typename bvector_type::statistics* st) const BMNOEXCEPT { BM_ASSERT(st); @@ -1409,7 +1418,7 @@ void base_sparse_vector::clear_value_plains_from( template void base_sparse_vector::insert_clear_value_plains_from( - unsigned plain_idx, size_type idx) + unsigned plain_idx, size_type idx) { for (unsigned i = plain_idx; i < sv_value_plains; ++i) { @@ -1437,7 +1446,7 @@ void base_sparse_vector::erase_column(size_type idx) template bool base_sparse_vector::equal( const base_sparse_vector& sv, - bm::null_support null_able) const + bm::null_support null_able) const BMNOEXCEPT { size_type arg_size = sv.size(); if (this->size_ != arg_size) @@ -1468,11 +1477,6 @@ bool base_sparse_vector::equal( bool eq = bv->equal(*arg_bv); if (!eq) return false; - /* - int cmp = bv->compare(*arg_bv); - if (cmp != 0) - return false; - */ } // for j if (null_able == bm::use_null) @@ -1490,11 +1494,6 @@ bool base_sparse_vector::equal( bool eq = bv_null->equal(*bv_null_arg); if (!eq) return false; - /* - int cmp = bv_null->compare(*bv_null); - if (cmp != 0) - return false; - */ } return true; } diff --git a/c++/include/util/bitset/bmbuffer.h b/c++/include/util/bitset/bmbuffer.h index eba6d7e8..8abdcc85 100644 --- a/c++/include/util/bitset/bmbuffer.h +++ b/c++/include/util/bitset/bmbuffer.h @@ -33,35 +33,35 @@ namespace bm class byte_buffer_ptr { public: - byte_buffer_ptr() + byte_buffer_ptr() BMNOEXCEPT : byte_buf_(0), size_(0) {} /// construct byte buffer pointer /// - byte_buffer_ptr(unsigned char* in_buf, size_t in_size) + byte_buffer_ptr(unsigned char* in_buf, size_t in_size) BMNOEXCEPT : byte_buf_(in_buf), size_(in_size) {} /// Set buffer pointer - void set_buf(unsigned char* in_buf, size_t in_size) + void set_buf(unsigned char* in_buf, size_t in_size) BMNOEXCEPT { byte_buf_ = in_buf; size_= in_size; } /// Get buffer size - size_t size() const { return size_; } + size_t size() const BMNOEXCEPT { return size_; } /// Get read access to buffer memory - const unsigned char* buf() const { return byte_buf_; } + const unsigned char* buf() const BMNOEXCEPT { return byte_buf_; } /// Get write access to buffer memory - unsigned char* data() { return byte_buf_; } + unsigned char* data() BMNOEXCEPT { return byte_buf_; } - bool operator==(const byte_buffer_ptr& lhs) const { return equal(lhs); } + bool operator==(const byte_buffer_ptr& lhs) const BMNOEXCEPT { return equal(lhs); } /// return true if content and size is the same - bool equal(const byte_buffer_ptr& lhs) const + bool equal(const byte_buffer_ptr& lhs) const BMNOEXCEPT { if (this == &lhs) return true; @@ -89,7 +89,7 @@ public: typedef size_t size_type; public: - byte_buffer() : capacity_(0), alloc_factor_(0) + byte_buffer() BMNOEXCEPT : capacity_(0), alloc_factor_(0) {} byte_buffer(size_t in_capacity) @@ -98,7 +98,7 @@ public: allocate(in_capacity); } - byte_buffer(const byte_buffer& lhs) + byte_buffer(const byte_buffer& lhs) BMNOEXCEPT { byte_buf_ = 0; size_ = capacity_ = alloc_factor_ = 0; @@ -110,7 +110,7 @@ public: #ifndef BM_NO_CXX11 /// Move constructor - byte_buffer(byte_buffer&& in_buf) BMNOEXEPT + byte_buffer(byte_buffer&& in_buf) BMNOEXCEPT { byte_buf_ = in_buf.byte_buf_; in_buf.byte_buf_ = 0; @@ -121,14 +121,14 @@ public: } /// Move assignment operator - byte_buffer& operator=(byte_buffer&& lhs) BMNOEXEPT + byte_buffer& operator=(byte_buffer&& lhs) BMNOEXCEPT { move_from(lhs); return *this; } #endif - byte_buffer& operator=(const byte_buffer& lhs) + byte_buffer& operator=(const byte_buffer& lhs) BMNOEXCEPT { if (this == &lhs) return *this; @@ -143,7 +143,7 @@ public: } /// swap content with another buffer - void swap(byte_buffer& other) BMNOEXEPT + void swap(byte_buffer& other) BMNOEXCEPT { if (this == &other) return; @@ -157,7 +157,7 @@ public: } /// take/move content from another buffer - void move_from(byte_buffer& other) BMNOEXEPT + void move_from(byte_buffer& other) BMNOEXCEPT { if (this == &other) return; @@ -190,7 +190,7 @@ public: /// Get buffer capacity - size_t capacity() const { return capacity_; } + size_t capacity() const BMNOEXCEPT { return capacity_; } /// adjust current size (buffer content preserved) void resize(size_t new_size, bool copy_content = true) @@ -213,6 +213,11 @@ public: { if (new_capacity <= capacity_) return; + if (!capacity_) + { + allocate(new_capacity); + return; + } byte_buffer tmp_buffer(new_capacity); tmp_buffer = *this; @@ -247,7 +252,7 @@ public: } /// return memory consumtion - size_t mem_usage() const + size_t mem_usage() const BMNOEXCEPT { return sizeof(capacity_) + sizeof(alloc_factor_) + capacity(); @@ -258,7 +263,7 @@ private: void set_buf(unsigned char* buf, size_t size); /// compute number of words for the desired capacity - static size_t compute_words(size_t capacity) + static size_t compute_words(size_t capacity) BMNOEXCEPT { size_t words = (capacity / sizeof(bm::word_t))+1; return words; @@ -307,10 +312,8 @@ public: typedef Val value_type; typedef typename buffer_type::size_type size_type; - heap_vector() - : buffer_() - { - } + heap_vector() BMNOEXCEPT : buffer_() + {} heap_vector(const heap_vector& hv) : buffer_() @@ -359,14 +362,14 @@ public: } } - value_type* data() { return (value_type*) buffer_.data(); } + value_type* data() BMNOEXCEPT { return (value_type*) buffer_.data(); } - void swap(heap_vector& other) BMNOEXEPT + void swap(heap_vector& other) BMNOEXCEPT { buffer_.swap(other.buffer_); } - const value_type& operator[](size_type pos) const + const value_type& operator[](size_type pos) const BMNOEXCEPT { BM_ASSERT(pos < size()); size_type v_size = value_size(); @@ -374,7 +377,7 @@ public: return *reinterpret_cast(p); } - value_type& operator[](size_type pos) + value_type& operator[](size_type pos) BMNOEXCEPT { BM_ASSERT(pos < size()); size_type v_size = value_size(); @@ -393,22 +396,22 @@ public: return *reinterpret_cast(p); } - const value_type* begin() const + const value_type* begin() const BMNOEXCEPT { return (const value_type*) buffer_.buf(); } - size_type size() const + size_type size() const BMNOEXCEPT { return buffer_.size() / value_size(); } - size_type capacity() const + size_type capacity() const BMNOEXCEPT { return buffer_.capacity() / value_size(); } - bool empty() const + bool empty() const BMNOEXCEPT { return (buffer_.size() == 0); } @@ -492,7 +495,7 @@ protected: buffer_.resize(new_size * v_size); } - static size_type value_size() + static size_type value_size() BMNOEXCEPT { size_type size_of = sizeof(value_type); return size_of; @@ -537,13 +540,13 @@ public: row_size_in_bytes = sizeof(value_type) * COLS }; - static size_t rows() { return ROWS; } - static size_t cols() { return COLS; } + static size_t rows() BMNOEXCEPT { return ROWS; } + static size_t cols() BMNOEXCEPT { return COLS; } /** By default object is constructed NOT allocated. */ - heap_matrix() + heap_matrix() BMNOEXCEPT : buffer_() {} @@ -565,12 +568,12 @@ public: buffer_.resize(size_in_bytes); } - bool is_init() const + bool is_init() const BMNOEXCEPT { return buffer_.size(); } - value_type get(size_type row_idx, size_type col_idx) const + value_type get(size_type row_idx, size_type col_idx) const BMNOEXCEPT { BM_ASSERT(row_idx < ROWS); BM_ASSERT(col_idx < COLS); @@ -579,7 +582,7 @@ public: return ((const value_type*)buf)[col_idx]; } - const value_type* row(size_type row_idx) const + const value_type* row(size_type row_idx) const BMNOEXCEPT { BM_ASSERT(row_idx < ROWS); BM_ASSERT(buffer_.size()); @@ -587,7 +590,7 @@ public: return (const value_type*) buf; } - value_type* row(size_type row_idx) + value_type* row(size_type row_idx) BMNOEXCEPT { BM_ASSERT(row_idx < ROWS); BM_ASSERT(buffer_.size()); @@ -597,21 +600,21 @@ public: } /** memset all buffer to all zeroes */ - void set_zero() + void set_zero() BMNOEXCEPT { ::memset(buffer_.data(), 0, size_in_bytes); } /*! swap content */ - void swap(heap_matrix& other) BMNOEXEPT + void swap(heap_matrix& other) BMNOEXCEPT { buffer_.swap(other.buffer_); } /*! move content from another matrix */ - void move_from(heap_matrix& other) BMNOEXEPT + void move_from(heap_matrix& other) BMNOEXCEPT { buffer_.move_from(other.buffer_); } @@ -624,7 +627,7 @@ public: /*! remapping: vect[idx] = matrix[idx, vect[idx] ] */ template - void remap(VECT_TYPE* vect, size_type size) const + void remap(VECT_TYPE* vect, size_type size) const BMNOEXCEPT { BM_ASSERT(size <= ROWS); const unsigned char* buf = buffer_.buf(); @@ -641,7 +644,7 @@ public: /*! zero-terminated remap: vect[idx] = matrix[idx, vect[idx] ] */ template - void remapz(VECT_TYPE* vect) const + void remapz(VECT_TYPE* vect) const BMNOEXCEPT { const unsigned char* buf = buffer_.buf(); for (size_type i = 0; i < ROWS; ++i) @@ -704,12 +707,12 @@ public: buffer_.resize(size_in_bytes()); } - bool is_init() const + bool is_init() const BMNOEXCEPT { return buffer_.size(); } - const value_type* row(size_type row_idx) const + const value_type* row(size_type row_idx) const BMNOEXCEPT { BM_ASSERT(row_idx < rows_); BM_ASSERT(buffer_.size()); @@ -717,7 +720,7 @@ public: return (const value_type*) buf; } - value_type* row(size_type row_idx) + value_type* row(size_type row_idx) BMNOEXCEPT { BM_ASSERT(row_idx < rows_); BM_ASSERT(buffer_.size()); @@ -726,15 +729,31 @@ public: return (value_type*)buf; } + value_type get(size_type row_idx, size_type col_idx) BMNOEXCEPT + { + BM_ASSERT(row_idx < rows_); + BM_ASSERT(col_idx < cols_); + const value_type* r = row(row_idx); + return r[col_idx]; + } + + void set(size_type row_idx, size_type col_idx, value_type v) BMNOEXCEPT + { + BM_ASSERT(row_idx < rows_); + BM_ASSERT(col_idx < cols_); + value_type* r = row(row_idx); + r[col_idx] = v; + } + /** memset all buffer to all zeroes */ - void set_zero() + void set_zero() BMNOEXCEPT { ::memset(buffer_.data(), 0, size_in_bytes()); } /*! swap content */ - void swap(dynamic_heap_matrix& other) BMNOEXEPT + void swap(dynamic_heap_matrix& other) BMNOEXCEPT { bm::xor_swap(rows_, other.rows_); bm::xor_swap(cols_, other.cols_); @@ -743,7 +762,7 @@ public: /*! move content from another matrix */ - void move_from(dynamic_heap_matrix& other) BMNOEXEPT + void move_from(dynamic_heap_matrix& other) BMNOEXCEPT { rows_ = other.rows_; cols_ = other.cols_; @@ -751,16 +770,46 @@ public: } /** Get low-level buffer access */ - buffer_type& get_buffer() { return buffer_; } + buffer_type& get_buffer() BMNOEXCEPT { return buffer_; } /** Get low-level buffer access */ - const buffer_type& get_buffer() const { return buffer_; } + const buffer_type& get_buffer() const BMNOEXCEPT { return buffer_; } + + /** + copy values of the left triangle elements to the right triangle + (operation specific to matrices with symmetric distances) + */ + void replicate_triange() BMNOEXCEPT + { + BM_ASSERT(rows_ == cols_); + for (size_type i = 0; i < rows_; ++i) + { + for (size_type j = i+1; j < cols_; ++j) + { + set(i, j, get(j, i)); + } + } + } + /** + Sum of row elements + */ + template + void sum(ACC& acc, size_type row_idx) const BMNOEXCEPT + { + BM_ASSERT(row_idx < rows_); + ACC s = 0; + const value_type* r = row(row_idx); + for (size_type j = 0; j < cols_; ++j) + s += r[j]; + acc = s; + } protected: - size_type size_in_bytes() const + + size_type size_in_bytes() const BMNOEXCEPT { return sizeof(value_type) * cols_ * rows_; } - size_type row_size_in_bytes() const + size_type row_size_in_bytes() const BMNOEXCEPT { return sizeof(value_type) * cols_; } diff --git a/c++/include/util/bitset/bmconst.h b/c++/include/util/bitset/bmconst.h index e1497865..954b223b 100644 --- a/c++/include/util/bitset/bmconst.h +++ b/c++/include/util/bitset/bmconst.h @@ -96,14 +96,14 @@ const unsigned set_array_shift = 8u; const unsigned set_array_mask = 0xFFu; const unsigned set_total_blocks32 = (bm::set_array_size32 * bm::set_array_size32); +const unsigned set_sub_total_bits = bm::set_sub_array_size * bm::gap_max_bits; #ifdef BM64ADDR const unsigned set_total_blocks48 = bm::id_max48 / bm::gap_max_bits; const unsigned long long id_max = bm::id_max48; -const unsigned long long set_array_size48 = 1 + (bm::id_max48 / (bm::set_sub_array_size * bm::gap_max_bits)); +const unsigned long long set_array_size48 = 1 + (bm::id_max48 / set_sub_total_bits); const unsigned set_top_array_size = bm::set_array_size48; const id64_t set_total_blocks = id64_t(bm::set_top_array_size) * set_sub_array_size; -//bm::id_max / (bm::gap_max_bits * bm::set_sub_array_size); #else const unsigned id_max = bm::id_max32; const unsigned set_top_array_size = bm::set_array_size32; @@ -228,8 +228,8 @@ template struct _copyright }; template const char _copyright::_p[] = - "BitMagic C++ Library. v.6.0.0 (c) 2002-2020 Anatoliy Kuznetsov."; -template const unsigned _copyright::_v[3] = {6, 0, 0}; + "BitMagic C++ Library. v.6.4.0 (c) 2002-2020 Anatoliy Kuznetsov."; +template const unsigned _copyright::_v[3] = {6, 4, 0}; diff --git a/c++/include/util/bitset/bmdbg.h b/c++/include/util/bitset/bmdbg.h index 4c4e3bbd..f0796e26 100644 --- a/c++/include/util/bitset/bmdbg.h +++ b/c++/include/util/bitset/bmdbg.h @@ -510,15 +510,15 @@ void print_stat(const BV& bv, typename BV::block_idx_type blocks = 0) } template -unsigned compute_serialization_size(const BV& bv) +size_t compute_serialization_size(const BV& bv) { BM_DECLARE_TEMP_BLOCK(tb) unsigned char* buf = 0; - unsigned blob_size = 0; + typename BV::size_type blob_size = 0; try { bm::serializer bvs(typename BV::allocator_type(), tb); - bvs.set_compression_level(4); + //bvs.set_compression_level(4); typename BV::statistics st; bv.calc_stat(&st); @@ -677,12 +677,12 @@ void print_svector_stat(const SV& svect, bool print_sim = false) const typename SV::bvector_type* bv1 = sim_vec[k].get_first(); const typename SV::bvector_type* bv2 = sim_vec[k].get_second(); - unsigned bv_size2 = compute_serialization_size(*bv2); + auto bv_size2 = compute_serialization_size(*bv2); typename SV::bvector_type bvx(*bv2); bvx ^= *bv1; - unsigned bv_size_x = compute_serialization_size(bvx); + auto bv_size_x = compute_serialization_size(bvx); if (bv_size_x < bv_size2) // true savings { size_t diff = bv_size2 - bv_size_x; @@ -904,10 +904,15 @@ int file_save_svector(const SV& sv, const std::string& fname, size_t* sv_blob_si BM_ASSERT(!fname.empty()); bm::sparse_vector_serial_layout sv_lay; - + + bm::sparse_vector_serializer sv_serializer; + sv_serializer.set_xor_ref(true); + + sv_serializer.serialize(sv, sv_lay); +/* BM_DECLARE_TEMP_BLOCK(tb) bm::sparse_vector_serialize(sv, sv_lay, tb); - +*/ std::ofstream fout(fname.c_str(), std::ios::binary); if (!fout.good()) { @@ -956,7 +961,7 @@ int file_load_svector(SV& sv, const std::string& fname) } -// comapre-check if sparse vector is excatly coresponds to vector +// compare-check if sparse vector is excatly coresponds to vector // // returns 0 - if equal // 1 - no size match @@ -979,6 +984,20 @@ int svector_check(const SV& sv, const V& vect) } +template +void convert_bv2sv(SV& sv, const BV& bv) +{ + typename SV::back_insert_iterator bit = sv.get_back_inserter(); + typename BV::enumerator en = bv.first(); + for (; en.valid(); ++en) + { + auto v = en.value(); + bit = v; + } + bit.flush(); +} + + } // namespace #include "bmundef.h" diff --git a/c++/include/util/bitset/bmdef.h b/c++/include/util/bitset/bmdef.h index 4863e63a..83006e10 100644 --- a/c++/include/util/bitset/bmdef.h +++ b/c++/include/util/bitset/bmdef.h @@ -72,10 +72,15 @@ For more information please visit: http://bitmagic.io // cxx11 features // #if defined(BM_NO_CXX11) || (defined(_MSC_VER) && _MSC_VER < 1900) -# define BMNOEXEPT +# define BMNOEXCEPT +# define BMNOEXCEPT2 #else -# ifndef BMNOEXEPT -# define BMNOEXEPT noexcept +# ifndef BMNOEXCEPT +# define BMNOEXCEPT noexcept +#if defined(__EMSCRIPTEN__) +#else +# define BMNOEXCEPT2 +#endif # endif #endif @@ -84,16 +89,14 @@ For more information please visit: http://bitmagic.io // detects use of EMSCRIPTEN engine and tweaks settings // WebAssemply compiles into 32-bit ptr yet 64-bit wordsize use GCC extensions // +// BMNOEXCEPT2 is to declare "noexcept" for WebAsm only where needed +// and silence GCC warnings where not #if defined(__EMSCRIPTEN__) # define BM64OPT # define BM_USE_GCC_BUILD -//# define BM_FORBID_UNALIGNED_ACCESS -#endif - -// disable 'register' keyword, which is obsolete in C++11 -// -#ifndef BMREGISTER -# define BMREGISTER +# define BMNOEXCEPT2 noexcept +#else +# define BMNOEXCEPT2 #endif diff --git a/c++/include/util/bitset/bmfunc.h b/c++/include/util/bitset/bmfunc.h index 06d213a9..e874b28d 100644 --- a/c++/include/util/bitset/bmfunc.h +++ b/c++/include/util/bitset/bmfunc.h @@ -39,12 +39,12 @@ namespace bm inline bm::id_t bit_block_calc_count_range(const bm::word_t* block, bm::word_t left, - bm::word_t right); + bm::word_t right) BMNOEXCEPT; inline bm::id_t bit_block_any_range(const bm::word_t* block, bm::word_t left, - bm::word_t right); + bm::word_t right) BMNOEXCEPT; /*! @brief Structure with statistical information about memory @@ -64,7 +64,7 @@ struct bv_statistics unsigned long long gaps_by_level[bm::gap_levels]; ///< number of GAP blocks at each level /// cound bit block - void add_bit_block() + void add_bit_block() BMNOEXCEPT { ++bit_blocks; size_t mem_used = sizeof(bm::word_t) * bm::set_block_size; @@ -73,7 +73,7 @@ struct bv_statistics } /// count gap block - void add_gap_block(unsigned capacity, unsigned length) + void add_gap_block(unsigned capacity, unsigned length) BMNOEXCEPT { ++gap_blocks; size_t mem_used = (capacity * sizeof(gap_word_t)); @@ -93,7 +93,7 @@ struct bv_statistics } /// Reset statisctics - void reset() + void reset() BMNOEXCEPT { bit_blocks = gap_blocks = ptr_sub_blocks = bv_count = 0; max_serialize_mem = memory_used = gap_cap_overhead = 0; @@ -102,10 +102,11 @@ struct bv_statistics } /// Sum data from another sttructure - void add(const bv_statistics& st) + void add(const bv_statistics& st) BMNOEXCEPT { bit_blocks += st.bit_blocks; gap_blocks += st.gap_blocks; + ptr_sub_blocks += st.ptr_sub_blocks; bv_count += st.bv_count; max_serialize_mem += st.max_serialize_mem + 8; memory_used += st.memory_used; @@ -121,6 +122,8 @@ struct pair { First first; Second second; + + pair(First f, Second s) : first(f), second(s) {} }; /** @@ -141,24 +144,35 @@ struct bit_decode_cache \internal */ template -void get_block_coord(BI_TYPE nb, unsigned& i, unsigned& j) +BMFORCEINLINE +void get_block_coord(BI_TYPE nb, unsigned& i, unsigned& j) BMNOEXCEPT { i = unsigned(nb >> bm::set_array_shift); // top block address j = unsigned(nb & bm::set_array_mask); // address in sub-block } /** - \brief ad-hoc conditional expressions + Compute bit address of the first bit in a superblock \internal */ -template struct conditional +template +BMFORCEINLINE RTYPE get_super_block_start(unsigned i) BMNOEXCEPT { - static bool test() { return true; } -}; -template <> struct conditional + return RTYPE(i) * bm::set_sub_total_bits; +} + +/** + Compute bit address of the first bit in a block + \internal +*/ +template +BMFORCEINLINE RTYPE get_block_start(unsigned i, unsigned j) BMNOEXCEPT { - static bool test() { return false; } -}; + RTYPE idx = bm::get_super_block_start(i); + idx += (j) * bm::gap_max_bits; + return idx; +} + /*! @defgroup gapfunc GAP functions @@ -177,42 +191,12 @@ template <> struct conditional */ - - -/*! - Returns BSR value - @ingroup bitfunc -*/ -template -unsigned bit_scan_reverse(T value) -{ - BM_ASSERT(value); - - if (bm::conditional::test()) - { - #if defined(BM_USE_GCC_BUILD) - return (unsigned) (63 - __builtin_clzll(value)); - #else - bm::id64_t v8 = value; - v8 >>= 32; - unsigned v = (unsigned)v8; - if (v) - { - v = bm::bit_scan_reverse32(v); - return v + 32; - } - #endif - } - return bit_scan_reverse32((unsigned)value); -} - - /*! Returns bit count @ingroup bitfunc */ BMFORCEINLINE -bm::id_t word_bitcount(bm::id_t w) +bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT { #if defined(BMSSE42OPT) || defined(BMAVX2OPT) return bm::id_t(_mm_popcnt_u32(w)); @@ -230,7 +214,7 @@ bm::id_t word_bitcount(bm::id_t w) } inline -int parallel_popcnt_32(unsigned int n) +int parallel_popcnt_32(unsigned int n) BMNOEXCEPT { unsigned int tmp; @@ -245,7 +229,7 @@ int parallel_popcnt_32(unsigned int n) @ingroup bitfunc */ BMFORCEINLINE -unsigned word_bitcount64(bm::id64_t x) +unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT { #if defined(BMSSE42OPT) || defined(BMAVX2OPT) #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512) @@ -270,7 +254,7 @@ unsigned word_bitcount64(bm::id64_t x) inline unsigned bitcount64_4way(bm::id64_t x, bm::id64_t y, - bm::id64_t u, bm::id64_t v) + bm::id64_t u, bm::id64_t v) BMNOEXCEPT { const bm::id64_t m1 = 0x5555555555555555U; const bm::id64_t m2 = 0x3333333333333333U; @@ -400,7 +384,8 @@ void bit_for_each(T w, F& func) /*! @brief Adaptor to copy 1 bits to array @internal */ -template class copy_to_array_functor +template +class copy_to_array_functor { public: copy_to_array_functor(B* bits): bp_(bits) @@ -408,10 +393,10 @@ public: B* ptr() { return bp_; } - void operator()(unsigned bit_idx) { *bp_++ = (B)bit_idx; } + void operator()(unsigned bit_idx) BMNOEXCEPT { *bp_++ = (B)bit_idx; } void operator()(unsigned bit_idx0, - unsigned bit_idx1) + unsigned bit_idx1) BMNOEXCEPT { bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_+=2; @@ -419,7 +404,7 @@ public: void operator()(unsigned bit_idx0, unsigned bit_idx1, - unsigned bit_idx2) + unsigned bit_idx2) BMNOEXCEPT { bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2; bp_+=3; @@ -428,7 +413,7 @@ public: void operator()(unsigned bit_idx0, unsigned bit_idx1, unsigned bit_idx2, - unsigned bit_idx3) + unsigned bit_idx3) BMNOEXCEPT { bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3; @@ -451,7 +436,8 @@ private: @ingroup bitfunc */ -template unsigned bit_list(T w, B* bits) +template +unsigned bit_list(T w, B* bits) BMNOEXCEPT { copy_to_array_functor func(bits); bit_for_each(w, func); @@ -468,7 +454,8 @@ template unsigned bit_list(T w, B* bits) @ingroup bitfunc */ -template unsigned bit_list_4(T w, B* bits) +template +unsigned bit_list_4(T w, B* bits) BMNOEXCEPT { copy_to_array_functor func(bits); bit_for_each_4(w, func); @@ -486,7 +473,8 @@ template unsigned bit_list_4(T w, B* bits) @internal */ template -unsigned short bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs) +unsigned short +bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs) BMNOEXCEPT { unsigned pos = 0; while (w) @@ -508,7 +496,7 @@ unsigned short bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs) @internal */ template -unsigned short bitscan_popcnt(bm::id_t w, B* bits) +unsigned short bitscan_popcnt(bm::id_t w, B* bits) BMNOEXCEPT { unsigned pos = 0; while (w) @@ -529,29 +517,48 @@ unsigned short bitscan_popcnt(bm::id_t w, B* bits) @ingroup bitfunc */ template -unsigned short bitscan_popcnt64(bm::id64_t w, B* bits) +unsigned short bitscan_popcnt64(bm::id64_t w, B* bits) BMNOEXCEPT { unsigned short pos = 0; while (w) { - bm::id64_t t = w & -w; + bm::id64_t t = bmi_blsi_u64(w); // w & -w; bits[pos++] = (B) bm::word_bitcount64(t - 1); - w &= w - 1; + w = bmi_bslr_u64(w); // w &= w - 1; + } + return pos; +} + +/*! + \brief Unpacks 64-bit word into list of ON bit indexes using popcnt method + \param w - value + \param bits - pointer on the result array + \param offs - value to add to bit position (programmed shift) + \return number of bits in the list + @ingroup bitfunc +*/ +template +unsigned short +bitscan_popcnt64(bm::id64_t w, B* bits, unsigned short offs) BMNOEXCEPT +{ + unsigned short pos = 0; + while (w) + { + bm::id64_t t = bmi_blsi_u64(w); // w & -w; + bits[pos++] = B(bm::word_bitcount64(t - 1) + offs); + w = bmi_bslr_u64(w); // w &= w - 1; } return pos; } + template -unsigned short bitscan(V w, B* bits) +unsigned short bitscan(V w, B* bits) BMNOEXCEPT { if (bm::conditional::test()) - { return bm::bitscan_popcnt64(w, bits); - } else - { return bm::bitscan_popcnt((bm::word_t)w, bits); - } } // -------------------------------------------------------------- @@ -566,7 +573,7 @@ unsigned short bitscan(V w, B* bits) \return selected value (inxed of bit set) */ inline -unsigned word_select64_linear(bm::id64_t w, unsigned rank) +unsigned word_select64_linear(bm::id64_t w, unsigned rank) BMNOEXCEPT { BM_ASSERT(w); BM_ASSERT(rank); @@ -589,7 +596,7 @@ unsigned word_select64_linear(bm::id64_t w, unsigned rank) \return selected value (inxed of bit set) */ inline -unsigned word_select64_bitscan(bm::id64_t w, unsigned rank) +unsigned word_select64_bitscan(bm::id64_t w, unsigned rank) BMNOEXCEPT { BM_ASSERT(w); BM_ASSERT(rank); @@ -616,7 +623,7 @@ unsigned word_select64_bitscan(bm::id64_t w, unsigned rank) \return selected value (inxed of bit set) */ inline -unsigned word_select64(bm::id64_t w, unsigned rank) +unsigned word_select64(bm::id64_t w, unsigned rank) BMNOEXCEPT { #if defined(BMI2_SELECT64) return BMI2_SELECT64(w, rank); @@ -642,7 +649,7 @@ unsigned word_select64(bm::id64_t w, unsigned rank) @internal */ BMFORCEINLINE -bm::id64_t widx_to_digest_mask(unsigned w_idx) +bm::id64_t widx_to_digest_mask(unsigned w_idx) BMNOEXCEPT { bm::id64_t mask(1ull); return mask << (w_idx / bm::set_block_digest_wave_size); @@ -657,7 +664,7 @@ bm::id64_t widx_to_digest_mask(unsigned w_idx) @internal */ BMFORCEINLINE -bm::id64_t digest_mask(unsigned from, unsigned to) +bm::id64_t digest_mask(unsigned from, unsigned to) BMNOEXCEPT { BM_ASSERT(from <= to); @@ -680,7 +687,8 @@ bm::id64_t digest_mask(unsigned from, unsigned to) @internal */ inline -bool check_zero_digest(bm::id64_t digest, unsigned bitpos_from, unsigned bitpos_to) +bool check_zero_digest(bm::id64_t digest, + unsigned bitpos_from, unsigned bitpos_to) BMNOEXCEPT { bm::id64_t mask = bm::digest_mask(bitpos_from, bitpos_to); return !(digest & mask); @@ -695,7 +703,7 @@ bool check_zero_digest(bm::id64_t digest, unsigned bitpos_from, unsigned bitpos_ @internal */ inline -void block_init_digest0(bm::word_t* const block, bm::id64_t digest) +void block_init_digest0(bm::word_t* const block, bm::id64_t digest) BMNOEXCEPT { unsigned off; for (unsigned i = 0; i < 64; ++i) @@ -725,7 +733,7 @@ void block_init_digest0(bm::word_t* const block, bm::id64_t digest) @internal */ inline -bm::id64_t calc_block_digest0(const bm::word_t* const block) +bm::id64_t calc_block_digest0(const bm::word_t* const block) BMNOEXCEPT { bm::id64_t digest0 = 0; unsigned off; @@ -766,7 +774,8 @@ bm::id64_t calc_block_digest0(const bm::word_t* const block) @internal */ inline -bm::id64_t update_block_digest0(const bm::word_t* const block, bm::id64_t digest) +bm::id64_t +update_block_digest0(const bm::word_t* const block, bm::id64_t digest) BMNOEXCEPT { const bm::id64_t mask(1ull); bm::id64_t d = digest; @@ -807,7 +816,7 @@ bm::id64_t update_block_digest0(const bm::word_t* const block, bm::id64_t digest /// Returns true if set operation is constant (bitcount) inline -bool is_const_set_operation(set_operation op) +bool is_const_set_operation(set_operation op) BMNOEXCEPT { return (int(op) >= int(set_COUNT)); } @@ -816,7 +825,7 @@ bool is_const_set_operation(set_operation op) Convert set operation to operation */ inline -bm::operation setop2op(bm::set_operation op) +bm::operation setop2op(bm::set_operation op) BMNOEXCEPT { BM_ASSERT(op == set_AND || op == set_OR || @@ -863,7 +872,7 @@ template struct all_set // version with minimal branching, super-scalar friendly // inline - static bm::id64_t block_type(const bm::word_t* bp) + static bm::id64_t block_type(const bm::word_t* bp) BMNOEXCEPT { bm::id64_t type; if (bm::conditional::test()) @@ -884,11 +893,11 @@ template struct all_set } BMFORCEINLINE - static bool is_full_block(const bm::word_t* bp) + static bool is_full_block(const bm::word_t* bp) BMNOEXCEPT { return (bp == _block._p || bp == _block._p_fullp); } BMFORCEINLINE - static bool is_valid_block_addr(const bm::word_t* bp) + static bool is_valid_block_addr(const bm::word_t* bp) BMNOEXCEPT { return (bp && !(bp == _block._p || bp == _block._p_fullp)); } static all_set_block _block; @@ -899,7 +908,7 @@ template typename all_set::all_set_block all_set::_block; /// XOR swap two scalar variables template -void xor_swap(W& x, W& y) +void xor_swap(W& x, W& y) BMNOEXCEPT { BM_ASSERT(&x != &y); x ^= y; @@ -913,7 +922,7 @@ void xor_swap(W& x, W& y) @internal */ template -bool find_not_null_ptr(bm::word_t*** arr, N start, N size, N* pos) +bool find_not_null_ptr(bm::word_t*** arr, N start, N size, N* pos) BMNOEXCEPT { BM_ASSERT(pos); // BM_ASSERT(start < size); @@ -1035,7 +1044,7 @@ template int wordcmp(T a, T b) @ingroup bitfunc */ inline -bool bit_is_all_zero(const bm::word_t* BMRESTRICT start) +bool bit_is_all_zero(const bm::word_t* BMRESTRICT start) BMNOEXCEPT { #if defined(VECT_IS_ZERO_BLOCK) return VECT_IS_ZERO_BLOCK(start); @@ -1062,7 +1071,7 @@ bool bit_is_all_zero(const bm::word_t* BMRESTRICT start) @ingroup gapfunc */ BMFORCEINLINE -bool gap_is_all_zero(const bm::gap_word_t* buf) +bool gap_is_all_zero(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT { // (almost) branchless variant: return (!(*buf & 1u)) & (!(bm::gap_max_bits - 1 - buf[1])); @@ -1075,7 +1084,7 @@ bool gap_is_all_zero(const bm::gap_word_t* buf) @ingroup gapfunc */ BMFORCEINLINE -bool gap_is_all_one(const bm::gap_word_t* buf) +bool gap_is_all_one(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT { return ((*buf & 1u) && (buf[1] == bm::gap_max_bits - 1)); } @@ -1088,7 +1097,7 @@ bool gap_is_all_one(const bm::gap_word_t* buf) @ingroup gapfunc */ BMFORCEINLINE -bm::gap_word_t gap_length(const bm::gap_word_t* buf) +bm::gap_word_t gap_length(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT { return (bm::gap_word_t)((*buf >> 3) + 1); } @@ -1103,7 +1112,8 @@ bm::gap_word_t gap_length(const bm::gap_word_t* buf) @ingroup gapfunc */ template -unsigned gap_capacity(const T* buf, const T* glevel_len) +unsigned +gap_capacity(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT { return glevel_len[(*buf >> 1) & 3]; } @@ -1118,7 +1128,8 @@ unsigned gap_capacity(const T* buf, const T* glevel_len) @ingroup gapfunc */ template -unsigned gap_limit(const T* buf, const T* glevel_len) +unsigned +gap_limit(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT { return glevel_len[(*buf >> 1) & 3]-4; } @@ -1132,7 +1143,7 @@ unsigned gap_limit(const T* buf, const T* glevel_len) @ingroup gapfunc */ template -T gap_level(const T* buf) +T gap_level(const T* BMRESTRICT buf) BMNOEXCEPT { return T((*buf >> 1) & 3u); } @@ -1149,7 +1160,8 @@ T gap_level(const T* buf) @ingroup gapfunc */ template -unsigned gap_find_last(const T* buf, unsigned* last) +unsigned +gap_find_last(const T* BMRESTRICT buf, unsigned* BMRESTRICT last) BMNOEXCEPT { BM_ASSERT(last); @@ -1179,7 +1191,8 @@ unsigned gap_find_last(const T* buf, unsigned* last) @ingroup gapfunc */ template -unsigned gap_find_first(const T* buf, unsigned* first) +unsigned +gap_find_first(const T* BMRESTRICT buf, unsigned* BMRESTRICT first) BMNOEXCEPT { BM_ASSERT(first); @@ -1206,24 +1219,30 @@ unsigned gap_find_first(const T* buf, unsigned* first) @ingroup gapfunc */ template -unsigned gap_bfind(const T* buf, unsigned pos, unsigned* is_set) +unsigned gap_bfind(const T* BMRESTRICT buf, + unsigned pos, unsigned* BMRESTRICT is_set) BMNOEXCEPT { BM_ASSERT(pos < bm::gap_max_bits); - *is_set = (*buf) & 1; + #undef VECT_GAP_BFIND // TODO: VECTOR bfind causes performance degradation + #ifdef VECT_GAP_BFIND + return VECT_GAP_BFIND(buf, pos, is_set); + #else + *is_set = (*buf) & 1; - unsigned start = 1; - unsigned end = 1 + ((*buf) >> 3); + unsigned start = 1; + unsigned end = 1 + ((*buf) >> 3); - while ( start != end ) - { - unsigned curr = (start + end) >> 1; - if ( buf[curr] < pos ) - start = curr + 1; - else - end = curr; - } - *is_set ^= ((start-1) & 1); - return start; + while ( start != end ) + { + unsigned curr = (start + end) >> 1; + if ( buf[curr] < pos ) + start = curr + 1; + else + end = curr; + } + *is_set ^= ((start-1) & 1); + return start; + #endif } @@ -1234,7 +1253,8 @@ unsigned gap_bfind(const T* buf, unsigned pos, unsigned* is_set) \return true if position is in "1" gap @ingroup gapfunc */ -template unsigned gap_test(const T* buf, unsigned pos) +template +unsigned gap_test(const T* BMRESTRICT buf, unsigned pos) BMNOEXCEPT { BM_ASSERT(pos < bm::gap_max_bits); @@ -1277,7 +1297,7 @@ template unsigned gap_test(const T* buf, unsigned pos) @ingroup gapfunc */ template -unsigned gap_test_unr(const T* buf, const unsigned pos) +unsigned gap_test_unr(const T* BMRESTRICT buf, const unsigned pos) BMNOEXCEPT { BM_ASSERT(pos < bm::gap_max_bits); @@ -1286,84 +1306,10 @@ unsigned gap_test_unr(const T* buf, const unsigned pos) return (*buf) & 1; } #if defined(BMSSE2OPT) - unsigned start = 1; - unsigned end = 1 + ((*buf) >> 3); - unsigned dsize = end - start; - - if (dsize < 17) - { - start = bm::sse2_gap_find(buf + 1, (bm::gap_word_t)pos, dsize); - unsigned res = ((*buf) & 1) ^ ((start) & 1); - BM_ASSERT(buf[start + 1] >= pos); - BM_ASSERT(buf[start] < pos || (start == 0)); - BM_ASSERT(res == bm::gap_test(buf, pos)); - return res; - } - unsigned arr_end = end; - while (start != end) - { - unsigned curr = (start + end) >> 1; - if (buf[curr] < pos) - start = curr + 1; - else - end = curr; - - unsigned size = end - start; - if (size < 16) - { - size += (end != arr_end); - unsigned idx = bm::sse2_gap_find(buf + start, (bm::gap_word_t)pos, size); - start += idx; - - BM_ASSERT(buf[start] >= pos); - BM_ASSERT(buf[start - 1] < pos || (start == 1)); - break; - } - } - - unsigned res = ((*buf) & 1) ^ ((--start) & 1); - + unsigned res = bm::sse2_gap_test(buf, pos); BM_ASSERT(res == bm::gap_test(buf, pos)); - return res; -//#endif #elif defined(BMSSE42OPT) - unsigned start = 1; - unsigned end = 1 + ((*buf) >> 3); - unsigned dsize = end - start; - - if (dsize < 17) - { - start = bm::sse4_gap_find(buf+1, (bm::gap_word_t)pos, dsize); - unsigned res = ((*buf) & 1) ^ ((start) & 1); - BM_ASSERT(buf[start+1] >= pos); - BM_ASSERT(buf[start] < pos || (start==0)); - BM_ASSERT(res == bm::gap_test(buf, pos)); - return res; - } - unsigned arr_end = end; - while (start != end) - { - unsigned curr = (start + end) >> 1; - if (buf[curr] < pos) - start = curr + 1; - else - end = curr; - - unsigned size = end - start; - if (size < 16) - { - size += (end != arr_end); - unsigned idx = bm::sse4_gap_find(buf + start, (bm::gap_word_t)pos, size); - start += idx; - - BM_ASSERT(buf[start] >= pos); - BM_ASSERT(buf[start - 1] < pos || (start == 1)); - break; - } - } - - unsigned res = ((*buf) & 1) ^ ((--start) & 1); - + unsigned res = bm::sse42_gap_test(buf, pos); BM_ASSERT(res == bm::gap_test(buf, pos)); #elif defined(BMAVX2OPT) unsigned res = bm::avx2_gap_test(buf, pos); @@ -1378,21 +1324,22 @@ unsigned gap_test_unr(const T* buf, const unsigned pos) \internal */ template -void for_each_nzblock_range(T*** root, N top_size, N nb_from, N nb_to, F& f) +void for_each_nzblock_range(T*** root, + N top_size, N nb_from, N nb_to, F& f) BMNOEXCEPT { BM_ASSERT(top_size); if (nb_from > nb_to) return; - unsigned i_from = nb_from >> bm::set_array_shift; - unsigned j_from = nb_from & bm::set_array_mask; - unsigned i_to = nb_to >> bm::set_array_shift; - unsigned j_to = nb_to & bm::set_array_mask; + unsigned i_from = unsigned(nb_from >> bm::set_array_shift); + unsigned j_from = unsigned(nb_from & bm::set_array_mask); + unsigned i_to = unsigned(nb_to >> bm::set_array_shift); + unsigned j_to = unsigned(nb_to & bm::set_array_mask); if (i_from >= top_size) return; if (i_to >= top_size) { - i_to = top_size-1; + i_to = unsigned(top_size-1); j_to = bm::set_sub_array_size-1; } @@ -1400,16 +1347,12 @@ void for_each_nzblock_range(T*** root, N top_size, N nb_from, N nb_to, F& f) { T** blk_blk = root[i]; if (!blk_blk) - { continue; - } if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) { unsigned j = (i == i_from) ? j_from : 0; if (!j && (i != i_to)) // full sub-block - { - f.add_full(bm::set_sub_array_size * bm::gap_max_bits); - } + f.add_full(bm::set_sub_total_bits); else { do @@ -1426,13 +1369,10 @@ void for_each_nzblock_range(T*** root, N top_size, N nb_from, N nb_to, F& f) do { if (blk_blk[j]) - { f(blk_blk[j]); - } if ((i == i_to) && (j == j_to)) return; - ++j; - } while (j < bm::set_sub_array_size); + } while (++j < bm::set_sub_array_size); } } // for i } @@ -1672,7 +1612,7 @@ void for_each_nzblock2(T*** root, unsigned size1, F& f) Function returns if function-predicate returns true */ template -bool for_each_nzblock_if(T*** root, BI size1, F& f) +bool for_each_nzblock_if(T*** root, BI size1, F& f) BMNOEXCEPT { BI block_idx = 0; for (BI i = 0; i < size1; ++i) @@ -1754,14 +1694,11 @@ template F bmfor_each(T first, T last, F f) /*! Computes SUM of all elements of the sequence */ template -bm::id64_t sum_arr(T* first, T* last) +bm::id64_t sum_arr(const T* first, const T* last) BMNOEXCEPT { bm::id64_t sum = 0; - while (first < last) - { + for (;first < last; ++first) sum += *first; - ++first; - } return sum; } @@ -1775,7 +1712,8 @@ bm::id64_t sum_arr(T* first, T* last) @ingroup gapfunc */ template -void gap_split(const T* buf, T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt) +void gap_split(const T* buf, + T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt) BMNOEXCEPT { const T* pcurr = buf; unsigned len = (*pcurr >> 3); @@ -1834,7 +1772,7 @@ void gap_split(const T* buf, T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt) @ingroup gapfunc */ template -unsigned gap_bit_count(const T* buf, unsigned dsize=0) +unsigned gap_bit_count(const T* buf, unsigned dsize=0) BMNOEXCEPT { const T* pcurr = buf; if (dsize == 0) @@ -1850,14 +1788,8 @@ unsigned gap_bit_count(const T* buf, unsigned dsize=0) bits_counter += *pcurr + 1; ++pcurr; } - ++pcurr; // set GAP to 1 - - while (pcurr <= pend) - { + for (++pcurr; pcurr <= pend; pcurr += 2) bits_counter += *pcurr - *(pcurr-1); - pcurr += 2; // jump to the next positive GAP - } - return bits_counter; } @@ -1867,7 +1799,8 @@ unsigned gap_bit_count(const T* buf, unsigned dsize=0) \return Number of non-zero bits. @ingroup gapfunc */ -template unsigned gap_bit_count_unr(const T* buf) +template +unsigned gap_bit_count_unr(const T* buf) BMNOEXCEPT { const T* pcurr = buf; unsigned dsize = (*pcurr >> 3); @@ -1918,7 +1851,7 @@ template unsigned gap_bit_count_unr(const T* buf) { cnt += *pcurr - *(pcurr - 1); } - BM_ASSERT(cnt == gap_bit_count(buf)); + BM_ASSERT(cnt == bm::gap_bit_count(buf)); return cnt; } @@ -1933,9 +1866,11 @@ template unsigned gap_bit_count_unr(const T* buf) @ingroup gapfunc */ template -unsigned gap_bit_count_range(const T* const buf, unsigned left, unsigned right) +unsigned gap_bit_count_range(const T* const buf, + unsigned left, unsigned right) BMNOEXCEPT { BM_ASSERT(left <= right); + BM_ASSERT(right < bm::gap_max_bits); const T* pcurr = buf; const T* pend = pcurr + (*pcurr >> 3); @@ -1965,6 +1900,140 @@ unsigned gap_bit_count_range(const T* const buf, unsigned left, unsigned right) return bits_counter; } +/*! + \brief Test if all bits are 1 in GAP buffer in the [left, right] range. + \param buf - GAP buffer pointer. + \param left - leftmost bit index to start from + \param right- rightmost bit index + \return true if all bits are "11111" + @ingroup gapfunc +*/ +template +bool gap_is_all_one_range(const T* const BMRESTRICT buf, + unsigned left, unsigned right) BMNOEXCEPT +{ + BM_ASSERT(left <= right); + BM_ASSERT(right < bm::gap_max_bits); + + unsigned is_set; + unsigned start_pos = bm::gap_bfind(buf, left, &is_set); + if (!is_set) // GAP is 0 + return false; + const T* const pcurr = buf + start_pos; + return (right <= *pcurr); +} + +/*! + \brief Test if any bits are 1 in GAP buffer in the [left, right] range. + \param buf - GAP buffer pointer. + \param left - leftmost bit index to start from + \param right- rightmost bit index + \return true if at least 1 "00010" + @ingroup gapfunc +*/ +template +bool gap_any_range(const T* const BMRESTRICT buf, + unsigned left, unsigned right) BMNOEXCEPT +{ + BM_ASSERT(left <= right); + BM_ASSERT(right < bm::gap_max_bits); + + unsigned is_set; + unsigned start_pos = bm::gap_bfind(buf, left, &is_set); + const T* const pcurr = buf + start_pos; + + if (!is_set) // start GAP is 0 ... + { + if (right <= *pcurr) // ...bit if the interval goes into at least 1 blk + return false; // .. nope + return true; + } + return true; +} + +/*! + \brief Test if any bits are 1 in GAP buffer in the [left, right] range + and flanked with 0s + \param buf - GAP buffer pointer. + \param left - leftmost bit index to start from + \param right- rightmost bit index + \return true if "011110" + @ingroup gapfunc +*/ +template +bool gap_is_interval(const T* const BMRESTRICT buf, + unsigned left, unsigned right) BMNOEXCEPT +{ + BM_ASSERT(left <= right); + BM_ASSERT(left > 0); // cannot check left-1 otherwise + BM_ASSERT(right < bm::gap_max_bits-1); // cannot check right+1 otherwise + + unsigned is_set; + unsigned start_pos = bm::gap_bfind(buf, left, &is_set); + + const T* pcurr = buf + start_pos; + if (!is_set || (right != *pcurr) || (start_pos <= 1)) + return false; + --pcurr; + if (*pcurr != left-1) + return false; + return true; +} + +/** + \brief Searches for the last 1 bit in the 111 interval of a GAP block + \param buf - BIT block buffer + \param nbit - bit index to start checking from + \param pos - [out] found value + + \return false if not found + @ingroup gapfunc +*/ +template +bool gap_find_interval_end(const T* const BMRESTRICT buf, + unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT +{ + BM_ASSERT(pos); + BM_ASSERT(nbit < bm::gap_max_bits); + + unsigned is_set; + unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set); + if (!is_set) + return false; + *pos = buf[start_pos]; + return true; +} + + +/** + \brief Searches for the first 1 bit in the 111 interval of a GAP block + \param buf - BIT block buffer + \param nbit - bit index to start checking from + \param pos - [out] found value + + \return false if not found + @ingroup gapfunc +*/ +template +bool gap_find_interval_start(const T* const BMRESTRICT buf, + unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT +{ + BM_ASSERT(pos); + BM_ASSERT(nbit < bm::gap_max_bits); + + unsigned is_set; + unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set); + if (!is_set) + return false; + --start_pos; + if (!start_pos) + *pos = 0; + else + *pos = buf[start_pos]+1; + return true; +} + + /*! \brief GAP block find position for the rank @@ -1982,7 +2051,7 @@ template SIZE_TYPE gap_find_rank(const T* const block, SIZE_TYPE rank, unsigned nbit_from, - unsigned& nbit_pos) + unsigned& nbit_pos) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(rank); @@ -2025,11 +2094,14 @@ SIZE_TYPE gap_find_rank(const T* const block, \brief Counts 1 bits in GAP buffer in the closed [0, right] range. \param buf - GAP buffer pointer. \param right- rightmost bit index - \return Number of non-zero bits. + \param is_corrected - if true the result will be rank corrected + if right bit == true count=count-1 + \return Number of non-zero bits @ingroup gapfunc */ template -unsigned gap_bit_count_to(const T* const buf, T right) +unsigned gap_bit_count_to(const T* const buf, T right, + bool is_corrected=false) BMNOEXCEPT { const T* pcurr = buf; const T* pend = pcurr + (*pcurr >> 3); @@ -2042,6 +2114,7 @@ unsigned gap_bit_count_to(const T* const buf, T right) if (right <= *pcurr) // we are in the target block right now { bits_counter = (right + 1u) & is_set; // & is_set == if (is_set) + bits_counter -= (is_set & unsigned(is_corrected)); return bits_counter; } bits_counter += (*pcurr + 1u) & is_set; @@ -2051,10 +2124,14 @@ unsigned gap_bit_count_to(const T* const buf, T right) { bits_counter += (*pcurr - prev_gap) & is_set; if (pcurr == pend) + { + bits_counter -= (is_set & unsigned(is_corrected)); return bits_counter; + } prev_gap = *pcurr++; } bits_counter += (right - prev_gap) & is_set; + bits_counter -= (is_set & unsigned(is_corrected)); return bits_counter; } @@ -2110,7 +2187,8 @@ template struct d_copy_func @ingroup gapfunc */ template -T* gap_2_dgap(const T* gap_buf, T* dgap_buf, bool copy_head=true) +T* gap_2_dgap(const T* BMRESTRICT gap_buf, + T* BMRESTRICT dgap_buf, bool copy_head=true) BMNOEXCEPT { if (copy_head) // copy GAP header { @@ -2135,7 +2213,8 @@ T* gap_2_dgap(const T* gap_buf, T* dgap_buf, bool copy_head=true) @ingroup gapfunc */ template -void dgap_2_gap(const T* dgap_buf, T* gap_buf, T gap_header=0) +void dgap_2_gap(const T* BMRESTRICT dgap_buf, + T* BMRESTRICT gap_buf, T gap_header=0) BMNOEXCEPT { const T* pcurr = dgap_buf; unsigned len; @@ -2175,7 +2254,8 @@ void dgap_2_gap(const T* dgap_buf, T* gap_buf, T gap_header=0) @ingroup gapfunc */ -template int gapcmp(const T* buf1, const T* buf2) +template +int gapcmp(const T* buf1, const T* buf2) BMNOEXCEPT { const T* pcurr1 = buf1; const T* pend1 = pcurr1 + (*pcurr1 >> 3); @@ -2213,9 +2293,7 @@ template int gapcmp(const T* buf1, const T* buf2) return (bitval1) ? 1 : -1; } } - ++pcurr1; ++pcurr2; - bitval1 ^= 1; bitval2 ^= 1; } @@ -2235,7 +2313,7 @@ template int gapcmp(const T* buf1, const T* buf2) template bool gap_find_first_diff(const T* BMRESTRICT buf1, const T* BMRESTRICT buf2, - unsigned* BMRESTRICT pos) + unsigned* BMRESTRICT pos) BMNOEXCEPT { BM_ASSERT(buf1 && buf2 && pos); @@ -2253,7 +2331,8 @@ bool gap_find_first_diff(const T* BMRESTRICT buf1, return false; } - +// ------------------------------------------------------------------------- +// /*! \brief Abstract operation for GAP buffers. @@ -2264,7 +2343,6 @@ bool gap_find_first_diff(const T* BMRESTRICT buf1, can be 0 or 1 (1 inverts the vector) \param vect2 - operand 2 GAP encoded buffer. \param vect2_mask - same as vect1_mask - \param f - operation functor. \param dlen - destination length after the operation \note Internal function. @@ -2278,8 +2356,7 @@ void gap_buff_op(T* BMRESTRICT dest, unsigned vect1_mask, const T* BMRESTRICT vect2, unsigned vect2_mask, - F& f, - unsigned& dlen) + unsigned& dlen) BMNOEXCEPT2 { const T* cur1 = vect1; const T* cur2 = vect2; @@ -2287,7 +2364,7 @@ void gap_buff_op(T* BMRESTRICT dest, T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask); T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask); - T bitval = (T) f(bitval1, bitval2); + T bitval = (T) F::op(bitval1, bitval2); T bitval_prev = bitval; T* res = dest; @@ -2297,7 +2374,7 @@ void gap_buff_op(T* BMRESTRICT dest, T c1 = *cur1; T c2 = *cur2; while (1) { - bitval = (T) f(bitval1, bitval2); + bitval = (T) F::op(bitval1, bitval2); // Check if GAP value changes and we need to // start the next one @@ -2327,19 +2404,18 @@ void gap_buff_op(T* BMRESTRICT dest, } ++cur2; c2 = *cur2; } - } // while dlen = (unsigned)(res - dest); *dest = (T)((*dest & 7) + (dlen << 3)); } + /*! \brief Abstract operation for GAP buffers (predicts legth) Receives functor F as a template argument \param vect1 - operand 1 GAP encoded buffer. \param vect2 - operand 2 GAP encoded buffer. - \param f - operation functor. \param dlen - destination length after the operation \param limit - maximum target length limit, returns false if limit is reached @@ -2354,9 +2430,8 @@ void gap_buff_op(T* BMRESTRICT dest, template bool gap_buff_dry_op(const T* BMRESTRICT vect1, const T* BMRESTRICT vect2, - F& f, unsigned& dlen, - unsigned limit) + unsigned limit) BMNOEXCEPT2 { const T* cur1 = vect1; const T* cur2 = vect2; @@ -2364,7 +2439,7 @@ bool gap_buff_dry_op(const T* BMRESTRICT vect1, T bitval1 = (T)((*cur1++ & 1)); T bitval2 = (T)((*cur2++ & 1)); - T bitval = (T) f(bitval1, bitval2); + T bitval = (T) F::op(bitval1, bitval2); T bitval_prev = bitval; unsigned len = 1; @@ -2372,7 +2447,7 @@ bool gap_buff_dry_op(const T* BMRESTRICT vect1, T c1 = *cur1; T c2 = *cur2; while (1) { - bitval = (T) f(bitval1, bitval2); + bitval = (T) F::op(bitval1, bitval2); // Check if GAP value changes and we need to // start the next one @@ -2418,7 +2493,6 @@ bool gap_buff_dry_op(const T* BMRESTRICT vect1, can be 0 or 1 (1 inverts the vector) \param vect2 - operand 2 GAP encoded buffer. \param vect2_mask - same as vect1_mask - \param f - operation functor. \note Internal function. \return non zero value if operation result returns any 1 bit @@ -2428,8 +2502,7 @@ template unsigned gap_buff_any_op(const T* BMRESTRICT vect1, unsigned vect1_mask, const T* BMRESTRICT vect2, - unsigned vect2_mask, - F f) + unsigned vect2_mask) BMNOEXCEPT2 { const T* cur1 = vect1; const T* cur2 = vect2; @@ -2437,14 +2510,14 @@ unsigned gap_buff_any_op(const T* BMRESTRICT vect1, unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask; unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask; - unsigned bitval = f(bitval1, bitval2); + unsigned bitval = F::op(bitval1, bitval2); if (bitval) return bitval; unsigned bitval_prev = bitval; while (1) { - bitval = f(bitval1, bitval2); + bitval = F::op(bitval1, bitval2); if (bitval) return bitval; @@ -2468,10 +2541,8 @@ unsigned gap_buff_any_op(const T* BMRESTRICT vect1, { break; } - ++cur1; - bitval1 ^= 1; - bitval2 ^= 1; + bitval1 ^= 1; bitval2 ^= 1; } ++cur2; } @@ -2488,13 +2559,12 @@ unsigned gap_buff_any_op(const T* BMRESTRICT vect1, Receives functor F as a template argument \param vect1 - operand 1 GAP encoded buffer. \param vect2 - operand 2 GAP encoded buffer. - \param f - operation functor. \note Internal function. @ingroup gapfunc */ template -unsigned gap_buff_count_op(const T* vect1, const T* vect2, F f) +unsigned gap_buff_count_op(const T* vect1, const T* vect2) BMNOEXCEPT2 { unsigned count;// = 0; const T* cur1 = vect1; @@ -2502,18 +2572,15 @@ unsigned gap_buff_count_op(const T* vect1, const T* vect2, F f) unsigned bitval1 = (*cur1++ & 1); unsigned bitval2 = (*cur2++ & 1); - unsigned bitval = count = f(bitval1, bitval2); + unsigned bitval = count = F::op(bitval1, bitval2); unsigned bitval_prev = bitval; - //if (bitval) ++count; - T res, res_prev; res = res_prev = 0; while (1) { - bitval = f(bitval1, bitval2); - + bitval = F::op(bitval1, bitval2); // Check if GAP value changes and we need to // start the next one. if (bitval != bitval_prev) @@ -2530,8 +2597,7 @@ unsigned gap_buff_count_op(const T* vect1, const T* vect2, F f) count += res - res_prev; res_prev = res; } - ++cur1; - bitval1 ^= 1; + ++cur1; bitval1 ^= 1; } else // >= { @@ -2548,13 +2614,10 @@ unsigned gap_buff_count_op(const T* vect1, const T* vect2, F f) else // equal { if (*cur2 == (bm::gap_max_bits - 1)) - { break; - } ++cur1; - bitval1 ^= 1; - bitval2 ^= 1; + bitval1 ^= 1; bitval2 ^= 1; } ++cur2; } @@ -2565,6 +2628,10 @@ unsigned gap_buff_count_op(const T* vect1, const T* vect2, F f) } +#ifdef __GNUG__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif /*! \brief Sets or clears bit in the GAP buffer. @@ -2582,11 +2649,11 @@ template unsigned gap_set_value(unsigned val, T* BMRESTRICT buf, unsigned pos, - unsigned* BMRESTRICT is_set) + unsigned* BMRESTRICT is_set) BMNOEXCEPT { BM_ASSERT(pos < bm::gap_max_bits); - unsigned curr = gap_bfind(buf, pos, is_set); + unsigned curr = bm::gap_bfind(buf, pos, is_set); T end = (T)(*buf >> 3); if (*is_set == val) { @@ -2601,10 +2668,10 @@ unsigned gap_set_value(unsigned val, // Special case, first bit GAP operation. There is no platform beside it. // initial flag must be inverted. - if (pos == 0) + if (!pos) { *buf ^= 1; - if ( buf[1] ) // We need to insert a 1 bit platform here. + if (buf[1]) // We need to insert a 1 bit GAP here { ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t)); buf[1] = 0; @@ -2612,52 +2679,125 @@ unsigned gap_set_value(unsigned val, } else // Only 1 bit in the GAP. We need to delete the first GAP. { - pprev = buf + 1; - pcurr = pprev + 1; - do - { - *pprev++ = *pcurr++; - } while (pcurr < pend); - --end; + pprev = buf + 1; pcurr = pprev + 1; + goto copy_gaps; } } - else if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit + else + if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit { ++(*pprev); if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP. { --end; - if (pcurr != pend) // GAP merge: 2 GAPS to be deleted + if (pcurr != pend) // GAP merge: 2 GAPS to be deleted { - --end; ++pcurr; - do - { - *pprev++ = *pcurr++; - } while (pcurr < pend); + copy_gaps: + --end; + do { *pprev++ = *pcurr++; } while (pcurr < pend); } } } - else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left. + else + if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left. { - --(*pcurr); - if (pcurr == pend) + --(*pcurr); + end += (pcurr == pend); + } + else // Worst case: split current GAP + { + if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove + ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T))); + end += 2; + pcurr[0] = (T)(pos-1); + pcurr[1] = (T)pos; + } + + // Set correct length word and last border word + *buf = (T)((*buf & 7) + (end << 3)); + buf[end] = bm::gap_max_bits-1; + return end; +} + +/*! + \brief Sets or clears bit in the GAP buffer. + + \param val - new bit value + \param buf - GAP buffer. + \param pos - Index of bit to set. + + \return New GAP buffer length. + + @ingroup gapfunc +*/ +template +unsigned gap_set_value(unsigned val, + T* BMRESTRICT buf, + unsigned pos) BMNOEXCEPT +{ + BM_ASSERT(pos < bm::gap_max_bits); + unsigned is_set; + unsigned curr = bm::gap_bfind(buf, pos, &is_set); + T end = (T)(*buf >> 3); + if (is_set == val) + return end; + + T* pcurr = buf + curr; + T* pprev = pcurr - 1; + T* pend = buf + end; + + // Special case, first bit GAP operation. There is no platform beside it. + // initial flag must be inverted. + if (!pos) + { + *buf ^= 1; + if (buf[1]) // We need to insert a 1 bit GAP here + { + ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t)); + buf[1] = 0; + ++end; + } + else // Only 1 bit in the GAP. We need to delete the first GAP. { - ++end; + pprev = buf + 1; pcurr = pprev + 1; + goto copy_gaps; } } - else // Worst case we need to split current block. + else + if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit + { + ++(*pprev); + if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP. + { + --end; + if (pcurr != pend) // GAP merge: 2 GAPS to be deleted + { + ++pcurr; + copy_gaps: + --end; + do { *pprev++ = *pcurr++; } while (pcurr < pend); + } + } + } + else + if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left. { - ::memmove(pcurr+2, pcurr,(end - curr + 1)*sizeof(T)); - *pcurr++ = (T)(pos - 1); - *pcurr = (T)pos; - end = (T)(end + 2); + --(*pcurr); + end += (pcurr == pend); + } + else // Worst case: split current GAP + { + if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove + ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T))); + end += 2; + pcurr[0] = (T)(pos-1); + pcurr[1] = (T)pos; } - // Set correct length word. + // Set correct length word and last border word *buf = (T)((*buf & 7) + (end << 3)); - - buf[end] = bm::gap_max_bits - 1; + buf[end] = bm::gap_max_bits-1; return end; } @@ -2672,7 +2812,7 @@ unsigned gap_set_value(unsigned val, @ingroup gapfunc */ template -unsigned gap_add_value(T* buf, unsigned pos) +unsigned gap_add_value(T* buf, unsigned pos) BMNOEXCEPT { BM_ASSERT(pos < bm::gap_max_bits); @@ -2684,7 +2824,7 @@ unsigned gap_add_value(T* buf, unsigned pos) // Special case, first bit GAP operation. There is no platform beside it. // initial flag must be inverted. - if (pos == 0) + if (!pos) { *buf ^= 1; if ( buf[1] ) // We need to insert a 1 bit platform here. @@ -2695,13 +2835,9 @@ unsigned gap_add_value(T* buf, unsigned pos) } else // Only 1 bit in the GAP. We need to delete the first GAP. { - pprev = buf + 1; - pcurr = pprev + 1; - do - { - *pprev++ = *pcurr++; - } while (pcurr < pend); + pprev = buf + 1; pcurr = pprev + 1; --end; + do { *pprev++ = *pcurr++; } while (pcurr < pend); } } else if (((unsigned)(*pprev))+1 == pos && (curr > 1) ) // Left border bit @@ -2710,40 +2846,32 @@ unsigned gap_add_value(T* buf, unsigned pos) if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP. { --end; - if (pcurr != pend) // GAP merge: 2 GAPS to be deleted - { - // TODO: should never get here... - --end; - ++pcurr; - do - { - *pprev++ = *pcurr++; - } while (pcurr < pend); - } - } + BM_ASSERT(pcurr == pend); + } } else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left. { --(*pcurr); - if (pcurr == pend) - { - ++end; - } + end += (pcurr == pend); } else // Worst case we need to split current block. { - *pcurr++ = (T)(pos - 1); - *pcurr = (T)pos; + pcurr[0] = (T)(pos-1); + pcurr[1] = (T)pos; end = (T)(end+2); } // Set correct length word. *buf = (T)((*buf & 7) + (end << 3)); - buf[end] = bm::gap_max_bits - 1; return end; } +#ifdef __GNUG__ +#pragma GCC diagnostic pop +#endif + + /*! @brief Right shift GAP block by 1 bit @param buf - block pointer @@ -2754,7 +2882,8 @@ unsigned gap_add_value(T* buf, unsigned pos) @ingroup gapfunc */ template -bool gap_shift_r1(T* buf, unsigned co_flag, unsigned* new_len) +bool gap_shift_r1(T* BMRESTRICT buf, + unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT { BM_ASSERT(new_len); bool co; @@ -2802,7 +2931,8 @@ bool gap_shift_r1(T* buf, unsigned co_flag, unsigned* new_len) @ingroup gapfunc */ template -bool gap_shift_l1(T* buf, unsigned co_flag, unsigned* new_len) +bool gap_shift_l1(T* BMRESTRICT buf, + unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT { BM_ASSERT(new_len); unsigned is_set; @@ -2860,7 +2990,7 @@ bool gap_shift_l1(T* buf, unsigned co_flag, unsigned* new_len) */ template -unsigned gap_set_array(T* buf, const T* arr, unsigned len) +unsigned gap_set_array(T* buf, const T* arr, unsigned len) BMNOEXCEPT { *buf = (T)((*buf & 6u) + (1u << 3)); // gap header setup @@ -2921,8 +3051,7 @@ unsigned gap_set_array(T* buf, const T* arr, unsigned len) @ingroup gapfunc */ template -unsigned bit_array_compute_gaps(const T* arr, - unsigned len) +unsigned bit_array_compute_gaps(const T* arr, unsigned len) BMNOEXCEPT { unsigned gap_count = 1; T prev = arr[0]; @@ -2954,9 +3083,9 @@ unsigned bit_array_compute_gaps(const T* arr, @ingroup gapfunc */ template -unsigned gap_block_find(const T* buf, +unsigned gap_block_find(const T* BMRESTRICT buf, unsigned nbit, - bm::id_t* prev) + bm::id_t* BMRESTRICT prev) BMNOEXCEPT { BM_ASSERT(nbit < bm::gap_max_bits); @@ -2968,19 +3097,20 @@ unsigned gap_block_find(const T* buf, *prev = nbit; return 1u; } - unsigned val = buf[gap_idx] + 1; *prev = val; - return (val != bm::gap_max_bits); // no bug here. } +//------------------------------------------------------------------------ + + /*! \brief Set 1 bit in a block @ingroup bitfunc */ BMFORCEINLINE -void set_bit(unsigned* dest, unsigned bitpos) +void set_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT { unsigned nbit = unsigned(bitpos & bm::set_block_mask); unsigned nword = unsigned(nbit >> bm::set_word_shift); @@ -2993,7 +3123,7 @@ void set_bit(unsigned* dest, unsigned bitpos) @ingroup bitfunc */ BMFORCEINLINE -void clear_bit(unsigned* dest, unsigned bitpos) +void clear_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT { unsigned nbit = unsigned(bitpos & bm::set_block_mask); unsigned nword = unsigned(nbit >> bm::set_word_shift); @@ -3007,7 +3137,7 @@ void clear_bit(unsigned* dest, unsigned bitpos) @ingroup bitfunc */ BMFORCEINLINE -unsigned test_bit(const unsigned* block, unsigned bitpos) +unsigned test_bit(const unsigned* block, unsigned bitpos) BMNOEXCEPT { unsigned nbit = unsigned(bitpos & bm::set_block_mask); unsigned nword = unsigned(nbit >> bm::set_word_shift); @@ -3025,7 +3155,7 @@ unsigned test_bit(const unsigned* block, unsigned bitpos) @ingroup bitfunc */ inline -void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) +void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT { const unsigned maskFF = ~0u; @@ -3072,7 +3202,7 @@ void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) @ingroup bitfunc */ inline -void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) +void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT { const unsigned maskFF = ~0u; @@ -3121,7 +3251,7 @@ void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) */ inline void xor_bit_block(unsigned* dest, unsigned bitpos, - unsigned bitcount) + unsigned bitcount) BMNOEXCEPT { unsigned nbit = unsigned(bitpos & bm::set_block_mask); unsigned nword = unsigned(nbit >> bm::set_word_shift); @@ -3175,7 +3305,8 @@ inline void xor_bit_block(unsigned* dest, @ingroup gapfunc */ template -void gap_sub_to_bitset(unsigned* dest, const T* pcurr) +void gap_sub_to_bitset(unsigned* BMRESTRICT dest, + const T* BMRESTRICT pcurr) BMNOEXCEPT { BM_ASSERT(dest && pcurr); @@ -3203,7 +3334,8 @@ void gap_sub_to_bitset(unsigned* dest, const T* pcurr) @ingroup gapfunc */ template -void gap_sub_to_bitset(unsigned* dest, const T* pcurr, bm::id64_t digest0) +void gap_sub_to_bitset(unsigned* BMRESTRICT dest, + const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT { BM_ASSERT(dest && pcurr); @@ -3261,7 +3393,8 @@ void gap_sub_to_bitset(unsigned* dest, const T* pcurr, bm::id64_t digest0) @ingroup gapfunc */ template -void gap_xor_to_bitset(unsigned* dest, const T* pcurr) +void gap_xor_to_bitset(unsigned* BMRESTRICT dest, + const T* BMRESTRICT pcurr) BMNOEXCEPT { BM_ASSERT(dest && pcurr); @@ -3288,7 +3421,8 @@ void gap_xor_to_bitset(unsigned* dest, const T* pcurr) @ingroup gapfunc */ template -void gap_add_to_bitset(unsigned* dest, const T* pcurr, unsigned len) +void gap_add_to_bitset(unsigned* BMRESTRICT dest, + const T* BMRESTRICT pcurr, unsigned len) BMNOEXCEPT { BM_ASSERT(dest && pcurr); @@ -3321,7 +3455,8 @@ void gap_add_to_bitset(unsigned* dest, const T* pcurr, unsigned len) @ingroup gapfunc */ template -void gap_add_to_bitset(unsigned* dest, const T* pcurr) +void gap_add_to_bitset(unsigned* BMRESTRICT dest, + const T* BMRESTRICT pcurr) BMNOEXCEPT { unsigned len = (*pcurr >> 3); gap_add_to_bitset(dest, pcurr, len); @@ -3336,7 +3471,8 @@ void gap_add_to_bitset(unsigned* dest, const T* pcurr) @ingroup gapfunc */ template -void gap_and_to_bitset(unsigned* dest, const T* pcurr) +void gap_and_to_bitset(unsigned* BMRESTRICT dest, + const T* BMRESTRICT pcurr) BMNOEXCEPT { BM_ASSERT(dest && pcurr); @@ -3370,7 +3506,8 @@ void gap_and_to_bitset(unsigned* dest, const T* pcurr) @ingroup gapfunc */ template -void gap_and_to_bitset(unsigned* dest, const T* pcurr, bm::id64_t digest0) +void gap_and_to_bitset(unsigned* BMRESTRICT dest, + const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT { BM_ASSERT(dest && pcurr); if (!digest0) @@ -3431,7 +3568,8 @@ void gap_and_to_bitset(unsigned* dest, const T* pcurr, bm::id64_t digest0) @ingroup gapfunc */ template -bm::id_t gap_bitset_and_count(const unsigned* block, const T* pcurr) +bm::id_t gap_bitset_and_count(const unsigned* BMRESTRICT block, + const T* BMRESTRICT pcurr) BMNOEXCEPT { BM_ASSERT(block); const T* pend = pcurr + (*pcurr >> 3); @@ -3458,7 +3596,8 @@ bm::id_t gap_bitset_and_count(const unsigned* block, const T* pcurr) @ingroup gapfunc */ template -bm::id_t gap_bitset_and_any(const unsigned* block, const T* pcurr) +bm::id_t gap_bitset_and_any(const unsigned* BMRESTRICT block, + const T* BMRESTRICT pcurr) BMNOEXCEPT { BM_ASSERT(block); @@ -3487,7 +3626,8 @@ bm::id_t gap_bitset_and_any(const unsigned* block, const T* pcurr) @ingroup gapfunc */ template -bm::id_t gap_bitset_sub_count(const unsigned* block, const T* buf) +bm::id_t gap_bitset_sub_count(const unsigned* BMRESTRICT block, + const T* BMRESTRICT buf) BMNOEXCEPT { BM_ASSERT(block); @@ -3521,7 +3661,8 @@ bm::id_t gap_bitset_sub_count(const unsigned* block, const T* buf) @ingroup gapfunc */ template -bm::id_t gap_bitset_sub_any(const unsigned* block, const T* buf) +bm::id_t gap_bitset_sub_any(const unsigned* BMRESTRICT block, + const T* BMRESTRICT buf) BMNOEXCEPT { BM_ASSERT(block); @@ -3558,7 +3699,8 @@ bm::id_t gap_bitset_sub_any(const unsigned* block, const T* buf) @ingroup gapfunc */ template -bm::id_t gap_bitset_xor_count(const unsigned* block, const T* buf) +bm::id_t gap_bitset_xor_count(const unsigned* BMRESTRICT block, + const T* BMRESTRICT buf) BMNOEXCEPT { BM_ASSERT(block); @@ -3595,7 +3737,8 @@ bm::id_t gap_bitset_xor_count(const unsigned* block, const T* buf) @ingroup gapfunc */ template -bm::id_t gap_bitset_xor_any(const unsigned* block, const T* buf) +bm::id_t gap_bitset_xor_any(const unsigned* BMRESTRICT block, + const T* BMRESTRICT buf) BMNOEXCEPT { BM_ASSERT(block); @@ -3632,10 +3775,10 @@ bm::id_t gap_bitset_xor_any(const unsigned* block, const T* buf) @ingroup gapfunc */ template -bm::id_t gap_bitset_or_count(const unsigned* block, const T* buf) +bm::id_t gap_bitset_or_count(const unsigned* BMRESTRICT block, + const T* BMRESTRICT buf) BMNOEXCEPT { BM_ASSERT(block); - const T* pcurr = buf; const T* pend = pcurr + (*pcurr >> 3); ++pcurr; @@ -3664,7 +3807,8 @@ bm::id_t gap_bitset_or_count(const unsigned* block, const T* buf) @ingroup gapfunc */ template -bm::id_t gap_bitset_or_any(const unsigned* block, const T* buf) +bm::id_t gap_bitset_or_any(const unsigned* BMRESTRICT block, + const T* BMRESTRICT buf) BMNOEXCEPT { bool b = !bm::gap_is_all_zero(buf) || !bm::bit_is_all_zero(block); @@ -3682,7 +3826,7 @@ bm::id_t gap_bitset_or_any(const unsigned* block, const T* buf) @ingroup bitfunc */ inline -void bit_block_set(bm::word_t* BMRESTRICT dst, bm::word_t value) +void bit_block_set(bm::word_t* BMRESTRICT dst, bm::word_t value) BMNOEXCEPT { #ifdef BMVECTOPT VECT_SET_BLOCK(dst, value); @@ -3700,7 +3844,8 @@ void bit_block_set(bm::word_t* BMRESTRICT dst, bm::word_t value) @ingroup gapfunc */ template -void gap_convert_to_bitset(unsigned* dest, const T* buf) +void gap_convert_to_bitset(unsigned* BMRESTRICT dest, + const T* BMRESTRICT buf) BMNOEXCEPT { bm::bit_block_set(dest, 0); bm::gap_add_to_bitset(dest, buf); @@ -3721,13 +3866,12 @@ void gap_convert_to_bitset(unsigned* dest, const T* buf) @ingroup gapfunc */ template -unsigned* gap_convert_to_bitset_smart(unsigned* dest, - const T* buf, - id_t set_max) +unsigned* gap_convert_to_bitset_smart(unsigned* BMRESTRICT dest, + const T* BMRESTRICT buf, + id_t set_max) BMNOEXCEPT { if (buf[1] == set_max - 1) return (buf[0] & 1) ? FULL_BLOCK_REAL_ADDR : 0; - bm::gap_convert_to_bitset(dest, buf); return dest; } @@ -3742,7 +3886,8 @@ unsigned* gap_convert_to_bitset_smart(unsigned* dest, @ingroup gapfunc @internal */ -template unsigned gap_control_sum(const T* buf) +template +unsigned gap_control_sum(const T* buf) BMNOEXCEPT { unsigned end = *buf >> 3; @@ -3755,7 +3900,6 @@ template unsigned gap_control_sum(const T* buf) ++pcurr; } ++pcurr; // now we are in GAP "1" again - while (pcurr <= pend) { BM_ASSERT(*pcurr > *(pcurr-1)); @@ -3773,9 +3917,8 @@ template unsigned gap_control_sum(const T* buf) @ingroup gapfunc */ -template void gap_set_all(T* buf, - unsigned set_max, - unsigned value) +template +void gap_set_all(T* buf, unsigned set_max, unsigned value) BMNOEXCEPT { BM_ASSERT(value == 0 || value == 1); *buf = (T)((*buf & 6u) + (1u << 3) + value); @@ -3796,8 +3939,7 @@ template void gap_init_range_block(T* buf, T from, T to, - T value) - //unsigned set_max) + T value) BMNOEXCEPT { BM_ASSERT(value == 0 || value == 1); const unsigned set_max = bm::bits_in_block; @@ -3844,7 +3986,7 @@ void gap_init_range_block(T* buf, @ingroup gapfunc */ -template void gap_invert(T* buf) +template void gap_invert(T* buf) BMNOEXCEPT { *buf ^= 1; } @@ -3863,7 +4005,7 @@ template void gap_invert(T* buf) @ingroup gapfunc */ template -void set_gap_level(T* buf, int level) +void set_gap_level(T* buf, int level) BMNOEXCEPT { BM_ASSERT(level >= 0); BM_ASSERT(unsigned(level) < bm::gap_levels); @@ -3885,7 +4027,7 @@ void set_gap_level(T* buf, int level) @ingroup gapfunc */ template -inline int gap_calc_level(unsigned len, const T* glevel_len) +int gap_calc_level(unsigned len, const T* glevel_len) BMNOEXCEPT { if (len <= unsigned(glevel_len[0]-4)) return 0; if (len <= unsigned(glevel_len[1]-4)) return 1; @@ -3906,10 +4048,11 @@ inline int gap_calc_level(unsigned len, const T* glevel_len) @ingroup gapfunc */ template -inline unsigned gap_free_elements(const T* buf, const T* glevel_len) +inline unsigned gap_free_elements(const T* BMRESTRICT buf, + const T* BMRESTRICT glevel_len) BMNOEXCEPT { - unsigned len = gap_length(buf); - unsigned capacity = gap_capacity(buf, glevel_len); + unsigned len = bm::gap_length(buf); + unsigned capacity = bm::gap_capacity(buf, glevel_len); return capacity - len; } @@ -3923,7 +4066,7 @@ inline unsigned gap_free_elements(const T* buf, const T* glevel_len) @ingroup bitfunc */ template -int bitcmp(const T* buf1, const T* buf2, unsigned len) +int bitcmp(const T* buf1, const T* buf2, unsigned len) BMNOEXCEPT { BM_ASSERT(len); const T* pend1 = buf1 + len; @@ -3948,8 +4091,9 @@ int bitcmp(const T* buf1, const T* buf2, unsigned len) @ingroup bitfunc */ inline -bool bit_find_first_diff(const bm::word_t* blk1, const bm::word_t* blk2, - unsigned* pos) +bool bit_find_first_diff(const bm::word_t* BMRESTRICT blk1, + const bm::word_t* BMRESTRICT blk2, + unsigned* BMRESTRICT pos) BMNOEXCEPT { BM_ASSERT(blk1 && blk2 && pos); #ifdef VECT_BIT_FIND_DIFF @@ -3969,7 +4113,7 @@ bool bit_find_first_diff(const bm::word_t* blk1, const bm::word_t* blk2, if (diff) { unsigned idx = bm::count_trailing_zeros_u64(diff); - *pos = unsigned(idx + (i * 8u * sizeof(bm::wordop_t))); + *pos = unsigned(idx + (i * 8u * unsigned(sizeof(bm::wordop_t)))); return true; } } // for @@ -3997,7 +4141,7 @@ bool bit_find_first_diff(const bm::word_t* blk1, const bm::word_t* blk2, \brief Converts bit block to GAP. \param dest - Destinatio GAP buffer. \param block - Source bitblock buffer. - \param dest_len - length of the dest. buffer. + \param dest_len - length of the destination buffer. \return New length of GAP block or 0 if conversion failed (insufficicent space). @@ -4006,7 +4150,7 @@ bool bit_find_first_diff(const bm::word_t* blk1, const bm::word_t* blk2, inline unsigned bit_block_to_gap(gap_word_t* BMRESTRICT dest, const unsigned* BMRESTRICT block, - unsigned dest_len) + unsigned dest_len) BMNOEXCEPT { const unsigned* BMRESTRICT block_end = block + bm::set_block_size; gap_word_t* BMRESTRICT pcurr = dest; @@ -4083,10 +4227,15 @@ complete: } #endif +/** + Convert bit block to GAP representation + @internal + @ingroup bitfunc +*/ inline unsigned bit_to_gap(gap_word_t* BMRESTRICT dest, const unsigned* BMRESTRICT block, - unsigned dest_len) + unsigned dest_len) BMNOEXCEPT { #if defined(VECT_BIT_TO_GAP) return VECT_BIT_TO_GAP(dest, block, dest_len); @@ -4159,10 +4308,10 @@ template D gap_convert_to_arr(D* BMRESTRICT dest, const T* BMRESTRICT buf, unsigned dest_len, - bool invert = false) + bool invert = false) BMNOEXCEPT { - BMREGISTER const T* BMRESTRICT pcurr = buf; - BMREGISTER const T* pend = pcurr + (*pcurr >> 3); + const T* BMRESTRICT pcurr = buf; + const T* pend = pcurr + (*pcurr >> 3); D* BMRESTRICT dest_curr = dest; ++pcurr; @@ -4215,7 +4364,7 @@ D gap_convert_to_arr(D* BMRESTRICT dest, @ingroup bitfunc */ inline -bm::id_t bit_block_count(const bm::word_t* block) +bm::id_t bit_block_count(const bm::word_t* block) BMNOEXCEPT { const bm::word_t* block_end = block + bm::set_block_size; bm::id_t count = 0; @@ -4278,8 +4427,12 @@ bm::id_t bit_block_count(const bm::word_t* block) @ingroup bitfunc */ inline -bm::id_t bit_block_count(const bm::word_t* const block, bm::id64_t digest) +bm::id_t bit_block_count(const bm::word_t* const block, + bm::id64_t digest) BMNOEXCEPT { +#ifdef VECT_BIT_COUNT_DIGEST + return VECT_BIT_COUNT_DIGEST(block, digest); +#else bm::id_t count = 0; bm::id64_t d = digest; while (d) @@ -4304,6 +4457,7 @@ bm::id_t bit_block_count(const bm::word_t* const block, bm::id64_t digest) d = bm::bmi_bslr_u64(d); // d &= d - 1; } // while return count; +#endif } @@ -4318,7 +4472,7 @@ bm::id_t bit_block_count(const bm::word_t* const block, bm::id64_t digest) */ inline bm::id_t bit_block_calc_count(const bm::word_t* block, - const bm::word_t* block_end) + const bm::word_t* block_end) BMNOEXCEPT { bm::id_t count = 0; bm::word_t acc = *block++; @@ -4352,7 +4506,7 @@ bm::id_t bit_block_calc_count(const bm::word_t* block, @ingroup bitfunc */ inline -bm::id_t bit_count_change(bm::word_t w) +bm::id_t bit_count_change(bm::word_t w) BMNOEXCEPT { unsigned count = 1; w ^= (w >> 1); @@ -4368,7 +4522,7 @@ bm::id_t bit_count_change(bm::word_t w) @internal */ inline -unsigned bit_block_change32(const bm::word_t* block, unsigned size) +unsigned bit_block_change32(const bm::word_t* block, unsigned size) BMNOEXCEPT { unsigned gap_count = 1; @@ -4416,7 +4570,8 @@ unsigned bit_block_change32(const bm::word_t* block, unsigned size) @internal */ inline -void bit_block_change_bc(const bm::word_t* block, unsigned* gc, unsigned* bc) +void bit_block_change_bc(const bm::word_t* BMRESTRICT block, + unsigned* BMRESTRICT gc, unsigned* BMRESTRICT bc) BMNOEXCEPT { BM_ASSERT(gc); BM_ASSERT(bc); @@ -4441,7 +4596,7 @@ void bit_block_change_bc(const bm::word_t* block, unsigned* gc, unsigned* bc) @ingroup bitfunc */ inline -unsigned bit_block_calc_change(const bm::word_t* block) +unsigned bit_block_calc_change(const bm::word_t* block) BMNOEXCEPT { #if defined(VECT_BLOCK_CHANGE) return VECT_BLOCK_CHANGE(block, bm::set_block_size); @@ -4450,6 +4605,78 @@ unsigned bit_block_calc_change(const bm::word_t* block) #endif } +/*! + Check if all bits are 1 in [left, right] range + @ingroup bitfunc +*/ +inline +bool bit_block_is_all_one_range(const bm::word_t* const BMRESTRICT block, + bm::word_t left, + bm::word_t right) BMNOEXCEPT +{ + BM_ASSERT(left <= right); + BM_ASSERT(right <= bm::gap_max_bits-1); + + unsigned nword, nbit, bitcount, temp; + nbit = left & bm::set_word_mask; + const bm::word_t* word = + block + (nword = unsigned(left >> bm::set_word_shift)); + if (left == right) // special case (only 1 bit to check) + return (*word >> nbit) & 1u; + + if (nbit) // starting position is not aligned + { + unsigned right_margin = nbit + right - left; + if (right_margin < 32) + { + unsigned mask = + block_set_table::_right[nbit] & + block_set_table::_left[right_margin]; + return mask == (*word & mask); + } + temp = *word & block_set_table::_right[nbit]; + if (temp != block_set_table::_right[nbit]) + return false; + bitcount = (right - left + 1u) - (32 - nbit); + ++word; + } + else + { + bitcount = right - left + 1u; + } + + // now when we are word aligned, we can scan the bit-stream + const bm::id64_t maskFF64 = ~0ull; + const bm::word_t maskFF = ~0u; + // loop unrolled to evaluate 4 words at a time + // SIMD showed no advantage, unless evaluate sub-wave intervals + // + for ( ;bitcount >= 128; bitcount-=128, word+=4) + { + bm::id64_t w64_0 = bm::id64_t(word[0]) + (bm::id64_t(word[1]) << 32); + bm::id64_t w64_1 = bm::id64_t(word[2]) + (bm::id64_t(word[3]) << 32); + if ((w64_0 ^ maskFF64) | (w64_1 ^ maskFF64)) + return false; + } // for + + for ( ;bitcount >= 32; bitcount-=32, ++word) + { + if (*word != maskFF) + return false; + } // for + BM_ASSERT(bitcount < 32); + + if (bitcount) // we have a tail to count + { + temp = *word & block_set_table::_left[bitcount-1]; + if (temp != block_set_table::_left[bitcount-1]) + return false; + } + + return true; +} + + /*! @@ -4462,7 +4689,7 @@ unsigned bit_block_calc_change(const bm::word_t* block) inline bm::id_t bit_block_calc_count_range(const bm::word_t* block, bm::word_t left, - bm::word_t right) + bm::word_t right) BMNOEXCEPT { BM_ASSERT(left <= right); BM_ASSERT(right <= bm::gap_max_bits-1); @@ -4530,7 +4757,7 @@ bm::id_t bit_block_calc_count_range(const bm::word_t* block, */ inline bm::id_t bit_block_calc_count_to(const bm::word_t* block, - bm::word_t right) + bm::word_t right) BMNOEXCEPT { BM_ASSERT(block); if (!right) // special case, first bit check @@ -4586,7 +4813,7 @@ bm::id_t bit_block_calc_count_to(const bm::word_t* block, @ingroup bitfunc */ inline -void bit_block_rotate_left_1(bm::word_t* block) +void bit_block_rotate_left_1(bm::word_t* block) BMNOEXCEPT { bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit for (unsigned i = 0; i < bm::set_block_size-1; ++i) @@ -4602,7 +4829,7 @@ void bit_block_rotate_left_1(bm::word_t* block) @ingroup bitfunc */ inline -void bit_block_rotate_left_1_unr(bm::word_t* block) +void bit_block_rotate_left_1_unr(bm::word_t* block) BMNOEXCEPT { bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit const unsigned unroll_factor = 4; @@ -4638,7 +4865,8 @@ void bit_block_rotate_left_1_unr(bm::word_t* block) @ingroup bitfunc */ inline -bm::word_t bit_block_insert(bm::word_t* block, unsigned bitpos, bool value) +bm::word_t bit_block_insert(bm::word_t* BMRESTRICT block, + unsigned bitpos, bool value) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(bitpos < 65536); @@ -4686,8 +4914,9 @@ bm::word_t bit_block_insert(bm::word_t* block, unsigned bitpos, bool value) @ingroup bitfunc */ inline -bool bit_block_shift_r1(bm::word_t* block, - bm::word_t* empty_acc, bm::word_t co_flag) +bool bit_block_shift_r1(bm::word_t* BMRESTRICT block, + bm::word_t* BMRESTRICT empty_acc, + bm::word_t co_flag) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(empty_acc); @@ -4715,8 +4944,9 @@ bool bit_block_shift_r1(bm::word_t* block, @ingroup bitfunc */ inline -bool bit_block_shift_r1_unr(bm::word_t* block, - bm::word_t* empty_acc, bm::word_t co_flag) +bool bit_block_shift_r1_unr(bm::word_t* BMRESTRICT block, + bm::word_t* BMRESTRICT empty_acc, + bm::word_t co_flag) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(empty_acc); @@ -4740,7 +4970,7 @@ bool bit_block_shift_r1_unr(bm::word_t* block, */ inline bool bit_block_shift_l1(bm::word_t* block, - bm::word_t* empty_acc, bm::word_t co_flag) + bm::word_t* empty_acc, bm::word_t co_flag) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(empty_acc); @@ -4770,7 +5000,8 @@ bool bit_block_shift_l1(bm::word_t* block, */ inline bool bit_block_shift_l1_unr(bm::word_t* block, - bm::word_t* empty_acc, bm::word_t co_flag) + bm::word_t* empty_acc, + bm::word_t co_flag) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(empty_acc); @@ -4791,7 +5022,9 @@ bool bit_block_shift_l1_unr(bm::word_t* block, @ingroup bitfunc */ inline -void bit_block_erase(bm::word_t* block, unsigned bitpos, bool carry_over) +void bit_block_erase(bm::word_t* block, + unsigned bitpos, + bool carry_over) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(bitpos < 65536); @@ -4799,7 +5032,7 @@ void bit_block_erase(bm::word_t* block, unsigned bitpos, bool carry_over) if (!bitpos) { bm::word_t acc; - bit_block_shift_l1_unr(block, &acc, carry_over); + bm::bit_block_shift_l1_unr(block, &acc, carry_over); return; } @@ -4848,7 +5081,7 @@ inline bool bit_block_shift_r1_and(bm::word_t* BMRESTRICT block, bm::word_t co_flag, const bm::word_t* BMRESTRICT mask_block, - bm::id64_t* BMRESTRICT digest) + bm::id64_t* BMRESTRICT digest) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(mask_block); @@ -4896,7 +5129,7 @@ bool bit_block_shift_r1_and(bm::word_t* BMRESTRICT block, block[d_base] = co_flag & mask_block[d_base]; if (block[d_base]) - d |= dmask; // update d + d |= dmask; // update digest co_flag = 0; } } @@ -4920,7 +5153,7 @@ inline bool bit_block_shift_r1_and_unr(bm::word_t* BMRESTRICT block, bm::word_t co_flag, const bm::word_t* BMRESTRICT mask_block, - bm::id64_t* BMRESTRICT digest) + bm::id64_t* BMRESTRICT digest) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(mask_block); @@ -4942,9 +5175,9 @@ bool bit_block_shift_r1_and_unr(bm::word_t* BMRESTRICT block, @ingroup bitfunc */ inline -bm::id_t bit_block_any_range(const bm::word_t* block, +bm::id_t bit_block_any_range(const bm::word_t* const BMRESTRICT block, bm::word_t left, - bm::word_t right) + bm::word_t right) BMNOEXCEPT { BM_ASSERT(left <= right); @@ -4969,8 +5202,7 @@ bm::id_t bit_block_any_range(const bm::word_t* block, unsigned mask = block_set_table::_right[nbit] & block_set_table::_left[right_margin]; - acc = *word & mask; - return acc; + return *word & mask; } else { @@ -4982,22 +5214,26 @@ bm::id_t bit_block_any_range(const bm::word_t* block, ++word; } - // now when we are word aligned, we can check bits the usual way - for ( ;bitcount >= 32; bitcount -= 32) + // loop unrolled to evaluate 4 words at a time + // SIMD showed no advantage, unless evaluate sub-wave intervals + // + for ( ;bitcount >= 128; bitcount-=128, word+=4) { - acc = *word++; - if (acc) + acc = word[0] | word[1] | word[2] | word[3]; + if (acc) return acc; - } + } // for - if (bitcount) // we have a tail to count + acc = 0; + for ( ;bitcount >= 32; bitcount -= 32) { - acc = (*word) & block_set_table::_left[bitcount-1]; - if (acc) - return acc; - } + acc |= *word++; + } // for - return 0; + if (bitcount) // we have a tail to count + acc |= (*word) & block_set_table::_left[bitcount-1]; + + return acc; } // ---------------------------------------------------------------------- @@ -5005,7 +5241,8 @@ bm::id_t bit_block_any_range(const bm::word_t* block, /*! Function inverts block of bits @ingroup bitfunc */ -template void bit_invert(T* start) +template +void bit_invert(T* start) BMNOEXCEPT { BM_ASSERT(IS_VALID_ADDR((bm::word_t*)start)); #ifdef BMVECTOPT @@ -5029,7 +5266,7 @@ template void bit_invert(T* start) @ingroup bitfunc */ inline -bool is_bits_one(const bm::wordop_t* start) +bool is_bits_one(const bm::wordop_t* start) BMNOEXCEPT { #if defined(BMSSE42OPT) || defined(BMAVX2OPT) return VECT_IS_ONE_BLOCK(start); @@ -5050,35 +5287,311 @@ bool is_bits_one(const bm::wordop_t* start) // ---------------------------------------------------------------------- -// GAP blocks manipulation functions: +/*! @brief Returns "true" if all bits are 1 in the block [left, right] + Function check for block varieties + @internal +*/ +inline +bool block_is_all_one_range(const bm::word_t* const BMRESTRICT block, + unsigned left, unsigned right) BMNOEXCEPT +{ + BM_ASSERT(left <= right); + BM_ASSERT(right < bm::gap_max_bits); + if (block) + { + if (BM_IS_GAP(block)) + return bm::gap_is_all_one_range(BMGAP_PTR(block), left, right); + if (block == FULL_BLOCK_FAKE_ADDR) + return true; + return bm::bit_block_is_all_one_range(block, left, right); + } + return false; +} + +/*! @brief Returns "true" if all bits are 1 in the block [left, right] + and border bits are 0 + @internal +*/ +inline +bool block_is_interval(const bm::word_t* const BMRESTRICT block, + unsigned left, unsigned right) BMNOEXCEPT +{ + BM_ASSERT(left <= right); + BM_ASSERT(right < bm::gap_max_bits-1); + + if (block) + { + bool is_left, is_right, all_one; + if (BM_IS_GAP(block)) + { + const bm::gap_word_t* gap = BMGAP_PTR(block); + all_one = bm::gap_is_interval(gap, left, right); + return all_one; + } + else // bit-block + { + if (block == FULL_BLOCK_FAKE_ADDR) + return false; + unsigned nword = ((left-1) >> bm::set_word_shift); + is_left = block[nword] & (1u << ((left-1) & bm::set_word_mask)); + if (is_left == false) + { + nword = ((right + 1) >> bm::set_word_shift); + is_right = block[nword] & (1u << ((right + 1) & bm::set_word_mask)); + if (is_right == false) + { + all_one = bm::bit_block_is_all_one_range(block, left, right); + return all_one; + } + } + } + } + + return false; +} + +// ---------------------------------------------------------------------- + +/** + \brief Searches for the last 1 bit in the 111 interval of a BIT block + \param block - BIT buffer + \param nbit - bit index to start checking from + \param pos - [out] found value + + \return false if not found + @ingroup bitfunc +*/ +inline +bool bit_block_find_interval_end(const bm::word_t* BMRESTRICT block, + unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT +{ + BM_ASSERT(block); + BM_ASSERT(pos); + + unsigned nword = unsigned(nbit >> bm::set_word_shift); + unsigned bit_pos = (nbit & bm::set_word_mask); + bm::word_t w = block[nword]; + w &= (1u << bit_pos); + if (!w) + return false; + + if (nbit == bm::gap_max_bits-1) + { + *pos = bm::gap_max_bits-1; + return true; + } + *pos = nbit; + + ++nbit; + nword = unsigned(nbit >> bm::set_word_shift); + bit_pos = (nbit & bm::set_word_mask); + + w = (~block[nword]) >> bit_pos; + w <<= bit_pos; // clear the trailing bits + if (w) + { + bit_pos = bm::bit_scan_forward32(w); // trailing zeros + *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1); + return true; + } + + for (++nword; nword < bm::set_block_size; ++nword) + { + w = ~block[nword]; + if (w) + { + bit_pos = bm::bit_scan_forward32(w); // trailing zeros + *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1); + return true; + } + } // for nword + + // 0 not found, all block is 1s... + *pos = bm::gap_max_bits-1; + return true; +} + + +/*! @brief Find end of the current 111 interval + @return search result code 0 - not found, 1 found, 2 - found at the end + @internal +*/ +inline +unsigned block_find_interval_end(const bm::word_t* BMRESTRICT block, + unsigned nbit_from, + unsigned* BMRESTRICT found_nbit) BMNOEXCEPT +{ + BM_ASSERT(block && found_nbit); + BM_ASSERT(nbit_from < bm::gap_max_bits); + + bool b; + if (BM_IS_GAP(block)) + { + const bm::gap_word_t* gap = BMGAP_PTR(block); + b = bm::gap_find_interval_end(gap, nbit_from, found_nbit); + if (b && *found_nbit == bm::gap_max_bits-1) + return 2; // end of block, keep searching + } + else // bit-block + { + if (IS_FULL_BLOCK(block)) + { + *found_nbit = bm::gap_max_bits-1; + return 2; + } + b = bm::bit_block_find_interval_end(block, nbit_from, found_nbit); + if (b && *found_nbit == bm::gap_max_bits-1) + return 2; // end of block, keep searching + } + return b; +} -/*! \brief GAP and functor */ -BMFORCEINLINE unsigned and_op(unsigned v1, unsigned v2) +// ---------------------------------------------------------------------- + +/** + \brief Searches for the first 1 bit in the 111 interval of a BIT block + \param block - BIT buffer + \param nbit - bit index to start checking from + \param pos - [out] found value + + \return false if not found + @ingroup bitfunc +*/ +inline +bool bit_block_find_interval_start(const bm::word_t* BMRESTRICT block, + unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT { - return v1 & v2; + BM_ASSERT(block); + BM_ASSERT(pos); + + unsigned nword = unsigned(nbit >> bm::set_word_shift); + unsigned bit_pos = (nbit & bm::set_word_mask); + bm::word_t w = block[nword]; + w &= (1u << bit_pos); + if (!w) + return false; + + if (nbit == 0) + { + *pos = 0; + return true; + } + *pos = nbit; + + --nbit; + nword = unsigned(nbit >> bm::set_word_shift); + bit_pos = (nbit & bm::set_word_mask); + + w = (~block[nword]) & block_set_table::_left[bit_pos]; + if (w) + { + bit_pos = bm::bit_scan_reverse32(w); + *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1); + return true; + } + + if (nword) + { + for (--nword; true; --nword) + { + w = ~block[nword]; + if (w) + { + bit_pos = bm::bit_scan_reverse32(w); // trailing zeros + *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1); + return true; + } + if (!nword) + break; + } // for nword + } + + // 0 not found, all block is 1s... + *pos = 0; + return true; } -/*! \brief GAP xor functor */ -BMFORCEINLINE unsigned xor_op(unsigned v1, unsigned v2) +/*! @brief Find start of the current 111 interval + @return search result code 0 - not found, 1 found, 2 - found at the start + @internal +*/ +inline +unsigned block_find_interval_start(const bm::word_t* BMRESTRICT block, + unsigned nbit_from, + unsigned* BMRESTRICT found_nbit) BMNOEXCEPT { - return v1 ^ v2; + BM_ASSERT(block && found_nbit); + BM_ASSERT(nbit_from < bm::gap_max_bits); + bool b; + if (BM_IS_GAP(block)) + { + const bm::gap_word_t* gap = BMGAP_PTR(block); + b = bm::gap_find_interval_start(gap, nbit_from, found_nbit); + if (b && *found_nbit == 0) + return 2; // start of block, keep searching + } + else // bit-block + { + if (IS_FULL_BLOCK(block)) + { + *found_nbit = 0; + return 2; + } + b = bm::bit_block_find_interval_start(block, nbit_from, found_nbit); + if (b && *found_nbit == 0) + return 2; // start of block, keep searching + } + return b; } +// ---------------------------------------------------------------------- -/*! \brief GAP or functor */ -BMFORCEINLINE unsigned or_op(unsigned v1, unsigned v2) +/*! @brief Returns "true" if one bit is set in the block [left, right] + Function check for block varieties + @internal +*/ +inline +bool block_any_range(const bm::word_t* const BMRESTRICT block, + unsigned left, unsigned right) BMNOEXCEPT { - return v1 | v2; + BM_ASSERT(left <= right); + BM_ASSERT(right < bm::gap_max_bits); + if (!block) + return false; + if (BM_IS_GAP(block)) + return bm::gap_any_range(BMGAP_PTR(block), left, right); + if (IS_FULL_BLOCK(block)) + return true; + return bm::bit_block_any_range(block, left, right); } -/*! \brief GAP or functor */ -BMFORCEINLINE unsigned sub_op(unsigned v1, unsigned v2) +// ---------------------------------------------------------------------- + +/*! @brief Returns "true" if one bit is set in the block + Function check for block varieties + @internal +*/ +inline +bool block_any(const bm::word_t* const BMRESTRICT block) BMNOEXCEPT { - return v1 & ~v2; + if (!block) + return false; + if (IS_FULL_BLOCK(block)) + return true; + bool all_zero = (BM_IS_GAP(block)) ? + bm::gap_is_all_zero(BMGAP_PTR(block)) + : bm::bit_is_all_zero(block); + return !all_zero; } + +// ---------------------------------------------------------------------- + +// GAP blocks manipulation functions: + + /*! \brief GAP AND operation. @@ -5095,13 +5608,14 @@ BMFORCEINLINE unsigned sub_op(unsigned v1, unsigned v2) @ingroup gapfunc */ -BMFORCEINLINE +inline gap_word_t* gap_operation_and(const gap_word_t* BMRESTRICT vect1, const gap_word_t* BMRESTRICT vect2, gap_word_t* BMRESTRICT tmp_buf, - unsigned& dsize) + unsigned& dsize) BMNOEXCEPT { - bm::gap_buff_op(tmp_buf, vect1, 0, vect2, 0, bm::and_op, dsize); + bm::gap_buff_op( + tmp_buf, vect1, 0, vect2, 0, dsize); return tmp_buf; } @@ -5119,11 +5633,11 @@ gap_word_t* gap_operation_and(const gap_word_t* BMRESTRICT vect1, @ingroup gapfunc */ -BMFORCEINLINE +inline unsigned gap_operation_any_and(const gap_word_t* BMRESTRICT vect1, - const gap_word_t* BMRESTRICT vect2) + const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT { - return gap_buff_any_op(vect1, 0, vect2, 0, and_op); + return gap_buff_any_op(vect1, 0, vect2, 0); } @@ -5138,9 +5652,9 @@ unsigned gap_operation_any_and(const gap_word_t* BMRESTRICT vect1, */ inline unsigned gap_count_and(const gap_word_t* BMRESTRICT vect1, - const gap_word_t* BMRESTRICT vect2) + const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT { - return gap_buff_count_op(vect1, vect2, and_op); + return bm::gap_buff_count_op(vect1, vect2); } @@ -5165,9 +5679,10 @@ inline gap_word_t* gap_operation_xor(const gap_word_t* BMRESTRICT vect1, const gap_word_t* BMRESTRICT vect2, gap_word_t* BMRESTRICT tmp_buf, - unsigned& dsize) + unsigned& dsize) BMNOEXCEPT { - gap_buff_op(tmp_buf, vect1, 0, vect2, 0, bm::xor_op, dsize); + bm::gap_buff_op( + tmp_buf, vect1, 0, vect2, 0, dsize); return tmp_buf; } @@ -5178,9 +5693,10 @@ inline bool gap_operation_dry_xor(const gap_word_t* BMRESTRICT vect1, const gap_word_t* BMRESTRICT vect2, unsigned& dsize, - unsigned limit) + unsigned limit) BMNOEXCEPT { - return gap_buff_dry_op(vect1, vect2, bm::xor_op, dsize, limit); + return + bm::gap_buff_dry_op(vect1, vect2, dsize, limit); } @@ -5200,9 +5716,9 @@ bool gap_operation_dry_xor(const gap_word_t* BMRESTRICT vect1, */ BMFORCEINLINE unsigned gap_operation_any_xor(const gap_word_t* BMRESTRICT vect1, - const gap_word_t* BMRESTRICT vect2) + const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT { - return gap_buff_any_op(vect1, 0, vect2, 0, bm::xor_op); + return gap_buff_any_op(vect1, 0, vect2, 0); } /*! @@ -5214,11 +5730,11 @@ unsigned gap_operation_any_xor(const gap_word_t* BMRESTRICT vect1, @ingroup gapfunc */ -BMFORCEINLINE +BMFORCEINLINE unsigned gap_count_xor(const gap_word_t* BMRESTRICT vect1, - const gap_word_t* BMRESTRICT vect2) + const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT { - return gap_buff_count_op(vect1, vect2, bm::xor_op); + return bm::gap_buff_count_op(vect1, vect2); } @@ -5243,10 +5759,10 @@ inline gap_word_t* gap_operation_or(const gap_word_t* BMRESTRICT vect1, const gap_word_t* BMRESTRICT vect2, gap_word_t* BMRESTRICT tmp_buf, - unsigned& dsize) + unsigned& dsize) BMNOEXCEPT { - gap_buff_op(tmp_buf, vect1, 1, vect2, 1, bm::and_op, dsize); - gap_invert(tmp_buf); + bm::gap_buff_op(tmp_buf, vect1, 1, vect2, 1, dsize); + bm::gap_invert(tmp_buf); return tmp_buf; } @@ -5261,9 +5777,9 @@ gap_word_t* gap_operation_or(const gap_word_t* BMRESTRICT vect1, */ BMFORCEINLINE unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1, - const gap_word_t* BMRESTRICT vect2) + const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT { - return gap_buff_count_op(vect1, vect2, bm::or_op); + return gap_buff_count_op(vect1, vect2); } @@ -5285,12 +5801,14 @@ unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1, @ingroup gapfunc */ -inline gap_word_t* gap_operation_sub(const gap_word_t* BMRESTRICT vect1, - const gap_word_t* BMRESTRICT vect2, - gap_word_t* BMRESTRICT tmp_buf, - unsigned& dsize) +inline +gap_word_t* gap_operation_sub(const gap_word_t* BMRESTRICT vect1, + const gap_word_t* BMRESTRICT vect2, + gap_word_t* BMRESTRICT tmp_buf, + unsigned& dsize) BMNOEXCEPT { - gap_buff_op(tmp_buf, vect1, 0, vect2, 1, and_op, dsize); + bm::gap_buff_op( // no bug here + tmp_buf, vect1, 0, vect2, 1, dsize); return tmp_buf; } @@ -5309,11 +5827,13 @@ inline gap_word_t* gap_operation_sub(const gap_word_t* BMRESTRICT vect1, @ingroup gapfunc */ -BMFORCEINLINE +inline unsigned gap_operation_any_sub(const gap_word_t* BMRESTRICT vect1, - const gap_word_t* BMRESTRICT vect2) + const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT { - return gap_buff_any_op(vect1, 0, vect2, 1, bm::and_op); + return + bm::gap_buff_any_op( // no bug here + vect1, 0, vect2, 1); } @@ -5328,9 +5848,9 @@ unsigned gap_operation_any_sub(const gap_word_t* BMRESTRICT vect1, */ BMFORCEINLINE unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1, - const gap_word_t* BMRESTRICT vect2) + const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT { - return gap_buff_count_op(vect1, vect2, bm::sub_op); + return bm::gap_buff_count_op(vect1, vect2); } @@ -5348,7 +5868,8 @@ unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1, @ingroup bitfunc */ inline -void bit_block_copy(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src) +void bit_block_copy(bm::word_t* BMRESTRICT dst, + const bm::word_t* BMRESTRICT src) BMNOEXCEPT { #ifdef BMVECTOPT VECT_COPY_BLOCK(dst, src); @@ -5366,7 +5887,8 @@ void bit_block_copy(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src @ingroup bitfunc */ inline -void bit_block_stream(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src) +void bit_block_stream(bm::word_t* BMRESTRICT dst, + const bm::word_t* BMRESTRICT src) BMNOEXCEPT { #ifdef VECT_STREAM_BLOCK VECT_STREAM_BLOCK(dst, src); @@ -5388,7 +5910,8 @@ void bit_block_stream(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT s @ingroup bitfunc */ inline -bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src) +bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst, + const bm::word_t* BMRESTRICT src) BMNOEXCEPT { BM_ASSERT(dst); BM_ASSERT(src); @@ -5428,7 +5951,7 @@ bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRIC inline bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src, - bm::id64_t digest) + bm::id64_t digest) BMNOEXCEPT { BM_ASSERT(dst); BM_ASSERT(src); @@ -5486,7 +6009,7 @@ bm::id64_t bit_block_and_5way(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src1, const bm::word_t* BMRESTRICT src2, const bm::word_t* BMRESTRICT src3, - bm::id64_t digest) + bm::id64_t digest) BMNOEXCEPT { BM_ASSERT(dst); BM_ASSERT(src0 && src1 && src2 && src3); @@ -5551,7 +6074,7 @@ inline bm::id64_t bit_block_and_2way(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src1, const bm::word_t* BMRESTRICT src2, - bm::id64_t digest) + bm::id64_t digest) BMNOEXCEPT { BM_ASSERT(dst); BM_ASSERT(src1 && src2); @@ -5612,7 +6135,7 @@ bm::id64_t bit_block_and_2way(bm::word_t* BMRESTRICT dst, */ inline unsigned bit_block_and_count(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { unsigned count; const bm::word_t* src1_end = src1 + bm::set_block_size; @@ -5661,7 +6184,7 @@ unsigned bit_block_and_count(const bm::word_t* BMRESTRICT src1, */ inline unsigned bit_block_and_any(const bm::word_t* src1, - const bm::word_t* src2) + const bm::word_t* src2) BMNOEXCEPT { unsigned count = 0; const bm::word_t* src1_end = src1 + bm::set_block_size; @@ -5691,7 +6214,7 @@ unsigned bit_block_and_any(const bm::word_t* src1, */ inline unsigned bit_block_xor_count(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { unsigned count; const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size; @@ -5740,7 +6263,7 @@ unsigned bit_block_xor_count(const bm::word_t* BMRESTRICT src1, */ inline unsigned bit_block_xor_any(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { unsigned count = 0; const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size; @@ -5767,7 +6290,7 @@ unsigned bit_block_xor_any(const bm::word_t* BMRESTRICT src1, */ inline unsigned bit_block_sub_count(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { unsigned count; const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size; @@ -5815,7 +6338,7 @@ unsigned bit_block_sub_count(const bm::word_t* BMRESTRICT src1, */ inline unsigned bit_block_sub_any(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { unsigned count = 0; const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size; @@ -5844,7 +6367,7 @@ unsigned bit_block_sub_any(const bm::word_t* BMRESTRICT src1, */ inline unsigned bit_block_or_count(const bm::word_t* src1, - const bm::word_t* src2) + const bm::word_t* src2) BMNOEXCEPT { unsigned count; const bm::word_t* src1_end = src1 + bm::set_block_size; @@ -5892,7 +6415,7 @@ unsigned bit_block_or_count(const bm::word_t* src1, */ inline unsigned bit_block_or_any(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { unsigned count = 0; const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size; @@ -5924,7 +6447,7 @@ unsigned bit_block_or_any(const bm::word_t* BMRESTRICT src1, @ingroup bitfunc */ inline bm::word_t* bit_operation_and(bm::word_t* BMRESTRICT dst, - const bm::word_t* BMRESTRICT src) + const bm::word_t* BMRESTRICT src) BMNOEXCEPT { BM_ASSERT(dst || src); @@ -5988,7 +6511,7 @@ inline bm::word_t* bit_operation_and(bm::word_t* BMRESTRICT dst, */ inline bm::id_t bit_operation_and_count(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2)) return 0; @@ -6012,7 +6535,7 @@ bm::id_t bit_operation_and_count(const bm::word_t* BMRESTRICT src1, */ inline bm::id_t bit_operation_and_any(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2)) return 0; @@ -6037,7 +6560,7 @@ bm::id_t bit_operation_and_any(const bm::word_t* BMRESTRICT src1, */ inline bm::id_t bit_operation_sub_count(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { if (src1 == src2) return 0; @@ -6074,7 +6597,7 @@ bm::id_t bit_operation_sub_count(const bm::word_t* BMRESTRICT src1, */ inline bm::id_t bit_operation_sub_count_inv(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { return bit_operation_sub_count(src2, src1); } @@ -6092,7 +6615,7 @@ bm::id_t bit_operation_sub_count_inv(const bm::word_t* BMRESTRICT src1, */ inline bm::id_t bit_operation_sub_any(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { if (IS_EMPTY_BLOCK(src1)) return 0; @@ -6127,7 +6650,7 @@ bm::id_t bit_operation_sub_any(const bm::word_t* BMRESTRICT src1, */ inline bm::id_t bit_operation_or_count(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { if (IS_FULL_BLOCK(src1) || IS_FULL_BLOCK(src2)) return bm::gap_max_bits; @@ -6164,7 +6687,7 @@ bm::id_t bit_operation_or_count(const bm::word_t* BMRESTRICT src1, */ inline bm::id_t bit_operation_or_any(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { if (IS_EMPTY_BLOCK(src1)) { @@ -6197,7 +6720,7 @@ bm::id_t bit_operation_or_any(const bm::word_t* BMRESTRICT src1, */ inline bool bit_block_or(bm::word_t* BMRESTRICT dst, - const bm::word_t* BMRESTRICT src) + const bm::word_t* BMRESTRICT src) BMNOEXCEPT { #ifdef BMVECTOPT return VECT_OR_BLOCK(dst, src); @@ -6235,7 +6758,7 @@ bool bit_block_or(bm::word_t* BMRESTRICT dst, inline bool bit_block_or_2way(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { #ifdef BMVECTOPT return VECT_OR_BLOCK_2WAY(dst, src1, src2); @@ -6274,7 +6797,7 @@ bool bit_block_or_2way(bm::word_t* BMRESTRICT dst, inline bm::id64_t bit_block_xor_2way(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { #ifdef BMVECTOPT return VECT_XOR_BLOCK_2WAY(dst, src1, src2); @@ -6315,7 +6838,7 @@ bm::id64_t bit_block_xor_2way(bm::word_t* BMRESTRICT dst, inline bool bit_block_or_3way(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { #ifdef BMVECTOPT return VECT_OR_BLOCK_3WAY(dst, src1, src2); @@ -6361,7 +6884,7 @@ bool bit_block_or_5way(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src1, const bm::word_t* BMRESTRICT src2, const bm::word_t* BMRESTRICT src3, - const bm::word_t* BMRESTRICT src4) + const bm::word_t* BMRESTRICT src4) BMNOEXCEPT { #ifdef BMVECTOPT return VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4); @@ -6407,7 +6930,7 @@ bool bit_block_or_5way(bm::word_t* BMRESTRICT dst, */ inline bm::word_t* bit_operation_or(bm::word_t* BMRESTRICT dst, - const bm::word_t* BMRESTRICT src) + const bm::word_t* BMRESTRICT src) BMNOEXCEPT { BM_ASSERT(dst || src); @@ -6467,7 +6990,7 @@ bm::word_t* bit_operation_or(bm::word_t* BMRESTRICT dst, */ inline bm::id64_t bit_block_sub(bm::word_t* BMRESTRICT dst, - const bm::word_t* BMRESTRICT src) + const bm::word_t* BMRESTRICT src) BMNOEXCEPT { #ifdef BMVECTOPT bm::id64_t acc = VECT_SUB_BLOCK(dst, src); @@ -6504,7 +7027,7 @@ bm::id64_t bit_block_sub(bm::word_t* BMRESTRICT dst, inline bm::id64_t bit_block_sub(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src, - bm::id64_t digest) + bm::id64_t digest) BMNOEXCEPT { BM_ASSERT(dst); BM_ASSERT(src); @@ -6565,7 +7088,7 @@ inline bm::id64_t bit_block_sub_2way(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src1, const bm::word_t* BMRESTRICT src2, - bm::id64_t digest) + bm::id64_t digest) BMNOEXCEPT { BM_ASSERT(dst); BM_ASSERT(src1 && src2); @@ -6630,7 +7153,7 @@ bm::id64_t bit_block_sub_2way(bm::word_t* BMRESTRICT dst, */ inline bm::word_t* bit_operation_sub(bm::word_t* BMRESTRICT dst, - const bm::word_t* BMRESTRICT src) + const bm::word_t* BMRESTRICT src) BMNOEXCEPT { BM_ASSERT(dst || src); @@ -6688,7 +7211,7 @@ bm::word_t* bit_operation_sub(bm::word_t* BMRESTRICT dst, */ inline bm::id64_t bit_block_xor(bm::word_t* BMRESTRICT dst, - const bm::word_t* BMRESTRICT src) + const bm::word_t* BMRESTRICT src) BMNOEXCEPT { BM_ASSERT(dst); BM_ASSERT(src); @@ -6724,7 +7247,7 @@ bm::id64_t bit_block_xor(bm::word_t* BMRESTRICT dst, */ inline void bit_andnot_arr_ffmask(bm::word_t* BMRESTRICT dst, - const bm::word_t* BMRESTRICT src) + const bm::word_t* BMRESTRICT src) BMNOEXCEPT { const bm::word_t* BMRESTRICT src_end = src + bm::set_block_size; #ifdef BMVECTOPT @@ -6759,7 +7282,7 @@ void bit_andnot_arr_ffmask(bm::word_t* BMRESTRICT dst, */ inline bm::word_t* bit_operation_xor(bm::word_t* BMRESTRICT dst, - const bm::word_t* BMRESTRICT src) + const bm::word_t* BMRESTRICT src) BMNOEXCEPT { BM_ASSERT(dst || src); if (src == dst) return 0; // XOR rule @@ -6797,7 +7320,7 @@ bm::word_t* bit_operation_xor(bm::word_t* BMRESTRICT dst, */ inline bm::id_t bit_operation_xor_count(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { if (src1 == src2) return 0; @@ -6829,7 +7352,7 @@ bm::id_t bit_operation_xor_count(const bm::word_t* BMRESTRICT src1, */ inline bm::id_t bit_operation_xor_any(const bm::word_t* BMRESTRICT src1, - const bm::word_t* BMRESTRICT src2) + const bm::word_t* BMRESTRICT src2) BMNOEXCEPT { if (src1 == src2) return 0; @@ -6854,7 +7377,7 @@ bm::id_t bit_operation_xor_any(const bm::word_t* BMRESTRICT src1, @ingroup bitfunc */ template -unsigned bit_count_nonzero_size(const T* blk, unsigned data_size) +unsigned bit_count_nonzero_size(const T* blk, unsigned data_size) BMNOEXCEPT { BM_ASSERT(blk && data_size); unsigned count = 0; @@ -6914,7 +7437,8 @@ unsigned bit_count_nonzero_size(const T* blk, unsigned data_size) @ingroup bitfunc */ inline -unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos) +unsigned bit_block_find(const bm::word_t* BMRESTRICT block, + unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(pos); @@ -6934,7 +7458,7 @@ unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos) if (w) { bit_pos = bm::bit_scan_forward32(w); // trailing zeros - *pos = unsigned(bit_pos + (nword * 8u * sizeof(bm::word_t))); + *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))); return 1; } @@ -6944,7 +7468,7 @@ unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos) if (w) { bit_pos = bm::bit_scan_forward32(w); // trailing zeros - *pos = unsigned(bit_pos + (i * 8u * sizeof(bm::word_t))); + *pos = unsigned(bit_pos + (i * 8u * unsigned(sizeof(bm::word_t)))); return w; } } // for i @@ -6952,6 +7476,8 @@ unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos) } + + /*! \brief BIT block find the last set bit (backward search) @@ -6962,7 +7488,8 @@ unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos) @ingroup bitfunc */ inline -unsigned bit_find_last(const bm::word_t* block, unsigned* last) +unsigned bit_find_last(const bm::word_t* BMRESTRICT block, + unsigned* BMRESTRICT last) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(last); @@ -6975,7 +7502,7 @@ unsigned bit_find_last(const bm::word_t* block, unsigned* last) if (w) { unsigned idx = bm::bit_scan_reverse(w); - *last = unsigned(idx + (i * 8u * sizeof(bm::word_t))); + *last = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t)))); return w; } if (i == 0) @@ -6995,7 +7522,8 @@ unsigned bit_find_last(const bm::word_t* block, unsigned* last) @internal */ inline -bool bit_find_first(const bm::word_t* block, unsigned* pos) +bool bit_find_first(const bm::word_t* BMRESTRICT block, + unsigned* BMRESTRICT pos) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(pos); @@ -7009,7 +7537,7 @@ bool bit_find_first(const bm::word_t* block, unsigned* pos) if (w) { unsigned idx = bm::bit_scan_forward32(w); // trailing zeros - *pos = unsigned(idx + (i * 8u * sizeof(bm::word_t))); + *pos = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t)))); return w; } } // for i @@ -7029,9 +7557,9 @@ bool bit_find_first(const bm::word_t* block, unsigned* pos) @ingroup bitfunc */ inline -unsigned bit_find_first(const bm::word_t* block, - unsigned* first, - bm::id64_t digest) +unsigned bit_find_first(const bm::word_t* BMRESTRICT block, + unsigned* BMRESTRICT first, + bm::id64_t digest) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(first); @@ -7047,7 +7575,7 @@ unsigned bit_find_first(const bm::word_t* block, if (w) { unsigned idx = bit_scan_forward32(w); // trailing zeros - *first = unsigned(idx + (i * 8u * sizeof(bm::word_t))); + *first = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t)))); return w; } } // for i @@ -7067,9 +7595,9 @@ unsigned bit_find_first(const bm::word_t* block, @ingroup bitfunc */ inline -bool bit_find_first_if_1(const bm::word_t* block, - unsigned* first, - bm::id64_t digest) +bool bit_find_first_if_1(const bm::word_t* BMRESTRICT block, + unsigned* BMRESTRICT first, + bm::id64_t digest) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(first); @@ -7127,7 +7655,7 @@ template SIZE_TYPE bit_find_rank(const bm::word_t* const block, SIZE_TYPE rank, unsigned nbit_from, - unsigned& nbit_pos) + unsigned& nbit_pos) BMNOEXCEPT { BM_ASSERT(block); BM_ASSERT(rank); @@ -7206,7 +7734,7 @@ template SIZE_TYPE block_find_rank(const bm::word_t* const block, SIZE_TYPE rank, unsigned nbit_from, - unsigned& nbit_pos) + unsigned& nbit_pos) BMNOEXCEPT { if (BM_IS_GAP(block)) { @@ -7230,7 +7758,7 @@ inline bm::set_representation best_representation(unsigned bit_count, unsigned total_possible_bitcount, unsigned gap_count, - unsigned block_size) + unsigned block_size) BMNOEXCEPT { unsigned arr_size = unsigned(sizeof(bm::gap_word_t) * bit_count + sizeof(bm::gap_word_t)); unsigned gap_size = unsigned(sizeof(bm::gap_word_t) * gap_count + sizeof(bm::gap_word_t)); @@ -7268,15 +7796,16 @@ T bit_convert_to_arr(T* BMRESTRICT dest, const unsigned* BMRESTRICT src, bm::id_t bits, unsigned dest_len, - unsigned mask = 0) + unsigned mask = 0) BMNOEXCEPT { T* BMRESTRICT pcurr = dest; - for (unsigned bit_idx=0; bit_idx < bits; ++src,bit_idx += unsigned(sizeof(*src) * 8)) + for (unsigned bit_idx=0; bit_idx < bits; + ++src,bit_idx += unsigned(sizeof(*src) * 8)) { unsigned val = *src ^ mask; // invert value by XOR 0xFF.. if (val == 0) continue; - if (pcurr + sizeof(val)*8 >= dest + dest_len) // insufficient space + if (pcurr + unsigned(sizeof(val)*8) >= dest + dest_len) // insufficient space return 0; // popscan loop to decode bits in a word while (val) @@ -7299,7 +7828,7 @@ T bit_convert_to_arr(T* BMRESTRICT dest, @internal */ inline -bool check_block_zero(const bm::word_t* blk, bool deep_scan) +bool check_block_zero(const bm::word_t* blk, bool deep_scan) BMNOEXCEPT { if (!blk) return true; if (IS_FULL_BLOCK(blk)) return false; @@ -7323,7 +7852,7 @@ bool check_block_zero(const bm::word_t* blk, bool deep_scan) @internal */ inline -bool check_block_one(const bm::word_t* blk, bool deep_scan) +bool check_block_one(const bm::word_t* blk, bool deep_scan) BMNOEXCEPT { if (blk == 0) return false; @@ -7348,7 +7877,7 @@ bool check_block_one(const bm::word_t* blk, bool deep_scan) template unsigned gap_overhead(const T* length, const T* length_end, - const T* glevel_len) + const T* glevel_len) BMNOEXCEPT { BM_ASSERT(length && length_end && glevel_len); @@ -7375,7 +7904,7 @@ unsigned gap_overhead(const T* length, template bool improve_gap_levels(const T* length, const T* length_end, - T* glevel_len) + T* glevel_len) BMNOEXCEPT { BM_ASSERT(length && length_end && glevel_len); @@ -7473,7 +8002,7 @@ bool improve_gap_levels(const T* length, inline bool block_find_first_diff(const bm::word_t* BMRESTRICT blk, const bm::word_t* BMRESTRICT arg_blk, - unsigned* BMRESTRICT pos) + unsigned* BMRESTRICT pos) BMNOEXCEPT { // If one block is zero we check if the other one has at least // one bit ON @@ -7568,7 +8097,7 @@ public: bitblock_get_adapter(const bm::word_t* bit_block) : b_(bit_block) {} BMFORCEINLINE - bm::word_t get_32() { return *b_++; } + bm::word_t get_32() BMNOEXCEPT { return *b_++; } private: const bm::word_t* b_; }; @@ -7597,9 +8126,9 @@ class bitblock_sum_adapter public: bitblock_sum_adapter() : sum_(0) {} BMFORCEINLINE - void push_back(bm::word_t w) { this->sum_+= w; } + void push_back(bm::word_t w) BMNOEXCEPT { this->sum_+= w; } /// Get accumulated sum - bm::word_t sum() const { return this->sum_; } + bm::word_t sum() const BMNOEXCEPT { return this->sum_; } private: bm::word_t sum_; }; @@ -7619,7 +8148,7 @@ public: cnt_(0) {} - bm::word_t get_32() + bm::word_t get_32() BMNOEXCEPT { if (cnt_ < from_ || cnt_ > to_) { @@ -7645,7 +8174,7 @@ template void bit_recomb(It1& it1, It2& it2, BinaryOp& op, Encoder& enc, - unsigned block_size = bm::set_block_size) + unsigned block_size = bm::set_block_size) BMNOEXCEPT { for (unsigned i = 0; i < block_size; ++i) { @@ -7659,37 +8188,37 @@ void bit_recomb(It1& it1, It2& it2, /// Bit AND functor template struct bit_AND { - W operator()(W w1, W w2) { return w1 & w2; } + W operator()(W w1, W w2) BMNOEXCEPT { return w1 & w2; } }; /// Bit OR functor template struct bit_OR { - W operator()(W w1, W w2) { return w1 | w2; } + W operator()(W w1, W w2) BMNOEXCEPT { return w1 | w2; } }; /// Bit SUB functor template struct bit_SUB { - W operator()(W w1, W w2) { return w1 & ~w2; } + W operator()(W w1, W w2) BMNOEXCEPT { return w1 & ~w2; } }; /// Bit XOR functor template struct bit_XOR { - W operator()(W w1, W w2) { return w1 ^ w2; } + W operator()(W w1, W w2) BMNOEXCEPT { return w1 ^ w2; } }; /// Bit ASSIGN functor template struct bit_ASSIGN { - W operator()(W, W w2) { return w2; } + W operator()(W, W w2) BMNOEXCEPT { return w2; } }; /// Bit COUNT functor template struct bit_COUNT { - W operator()(W w1, W w2) + W operator()(W w1, W w2) BMNOEXCEPT { w1 = 0; BM_INCWORD_BITCOUNT(w1, w2); @@ -7700,7 +8229,7 @@ template struct bit_COUNT /// Bit COUNT AND functor template struct bit_COUNT_AND { - W operator()(W w1, W w2) + W operator()(W w1, W w2) BMNOEXCEPT { W r = 0; BM_INCWORD_BITCOUNT(r, w1 & w2); @@ -7711,7 +8240,7 @@ template struct bit_COUNT_AND /// Bit COUNT XOR functor template struct bit_COUNT_XOR { - W operator()(W w1, W w2) + W operator()(W w1, W w2) BMNOEXCEPT { W r = 0; BM_INCWORD_BITCOUNT(r, w1 ^ w2); @@ -7722,7 +8251,7 @@ template struct bit_COUNT_XOR /// Bit COUNT OR functor template struct bit_COUNT_OR { - W operator()(W w1, W w2) + W operator()(W w1, W w2) BMNOEXCEPT { W r = 0; BM_INCWORD_BITCOUNT(r, w1 | w2); @@ -7734,7 +8263,7 @@ template struct bit_COUNT_OR /// Bit COUNT SUB AB functor template struct bit_COUNT_SUB_AB { - W operator()(W w1, W w2) + W operator()(W w1, W w2) BMNOEXCEPT { W r = 0; BM_INCWORD_BITCOUNT(r, w1 & (~w2)); @@ -7745,7 +8274,7 @@ template struct bit_COUNT_SUB_AB /// Bit SUB BA functor template struct bit_COUNT_SUB_BA { - W operator()(W w1, W w2) + W operator()(W w1, W w2) BMNOEXCEPT { W r = 0; BM_INCWORD_BITCOUNT(r, w2 & (~w1)); @@ -7756,7 +8285,7 @@ template struct bit_COUNT_SUB_BA /// Bit COUNT A functor template struct bit_COUNT_A { - W operator()(W w1, W ) + W operator()(W w1, W ) BMNOEXCEPT { W r = 0; BM_INCWORD_BITCOUNT(r, w1); @@ -7767,7 +8296,7 @@ template struct bit_COUNT_A /// Bit COUNT B functor template struct bit_COUNT_B { - W operator()(W, W w2) + W operator()(W, W w2) BMNOEXCEPT { W r = 0; BM_INCWORD_BITCOUNT(r, w2); @@ -7858,8 +8387,11 @@ operation_functions::bit_op_count_table_[bm::set_END] = { 0, // set_COUNT_B }; - -const unsigned short set_bitscan_wave_size = 2; +/** + Size of bit decode wave in words + @internal + */ +const unsigned short set_bitscan_wave_size = 4; /*! \brief Unpacks word wave (Nx 32-bit words) \param w_ptr - pointer on wave start @@ -7870,10 +8402,12 @@ const unsigned short set_bitscan_wave_size = 2; @internal */ inline -unsigned short bitscan_wave(const bm::word_t* w_ptr, unsigned char* bits) +unsigned short +bitscan_wave(const bm::word_t* BMRESTRICT w_ptr, + unsigned char* BMRESTRICT bits) BMNOEXCEPT { bm::word_t w0, w1; - unsigned short cnt0; + unsigned int cnt0; w0 = w_ptr[0]; w1 = w_ptr[1]; @@ -7881,15 +8415,23 @@ unsigned short bitscan_wave(const bm::word_t* w_ptr, unsigned char* bits) #if defined(BMAVX512OPT) || defined(BMAVX2OPT) || defined(BMSSE42OPT) // combine into 64-bit word and scan (when HW popcnt64 is available) bm::id64_t w = (bm::id64_t(w1) << 32) | w0; - cnt0 = (unsigned short) bm::bitscan_popcnt64(w, bits); + cnt0 = bm::bitscan_popcnt64(w, bits); + + w0 = w_ptr[2]; + w1 = w_ptr[3]; + w = (bm::id64_t(w1) << 32) | w0; + cnt0 += bm::bitscan_popcnt64(w, bits + cnt0, 64); #else - unsigned short cnt1; // decode wave as two 32-bit bitscan decodes - cnt0 = w0 ? bm::bitscan_popcnt(w0, bits) : 0; - cnt1 = w1 ? bm::bitscan_popcnt(w1, bits + cnt0, 32) : 0; - cnt0 = (unsigned short)(cnt0 + cnt1); + cnt0 = bm::bitscan_popcnt(w0, bits); + cnt0 += bm::bitscan_popcnt(w1, bits + cnt0, 32); + + w0 = w_ptr[2]; + w1 = w_ptr[3]; + cnt0 += bm::bitscan_popcnt(w0, bits + cnt0, 64); + cnt0 += bm::bitscan_popcnt(w1, bits + cnt0, 64+32); #endif - return cnt0; + return static_cast(cnt0); } #if defined (BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512) @@ -7899,9 +8441,11 @@ unsigned short bitscan_wave(const bm::word_t* w_ptr, unsigned char* bits) @internal */ inline -void bit_block_gather_scatter(unsigned* arr, const bm::word_t* blk, - const unsigned* idx, unsigned size, unsigned start, - unsigned bit_idx) +void bit_block_gather_scatter(unsigned* BMRESTRICT arr, + const bm::word_t* BMRESTRICT blk, + const unsigned* BMRESTRICT idx, + unsigned size, unsigned start, + unsigned bit_idx) BMNOEXCEPT { typedef unsigned TRGW; typedef unsigned IDX; @@ -7930,8 +8474,10 @@ typedef unsigned IDX; @internal */ template -void bit_block_gather_scatter(TRGW* arr, const bm::word_t* blk, - const IDX* idx, SZ size, SZ start, unsigned bit_idx) +void bit_block_gather_scatter(TRGW* BMRESTRICT arr, + const bm::word_t* BMRESTRICT blk, + const IDX* BMRESTRICT idx, + SZ size, SZ start, unsigned bit_idx) BMNOEXCEPT { // TODO: SIMD for 64-bit index sizes and 64-bit target value size // @@ -7943,15 +8489,17 @@ void bit_block_gather_scatter(TRGW* arr, const bm::word_t* blk, { const SZ base = start + k; const unsigned nbitA = unsigned(idx[base] & bm::set_block_mask); - arr[base] |= (TRGW(bool(blk[nbitA >> bm::set_word_shift] & (mask1 << (nbitA & bm::set_word_mask)))) << bit_idx); + arr[base] |= (TRGW(bool(blk[nbitA >> bm::set_word_shift] & + (mask1 << (nbitA & bm::set_word_mask)))) << bit_idx); const unsigned nbitB = unsigned(idx[base + 1] & bm::set_block_mask); - arr[base+1] |= (TRGW(bool(blk[nbitB >> bm::set_word_shift] & (mask1 << (nbitB & bm::set_word_mask)))) << bit_idx); + arr[base+1] |= (TRGW(bool(blk[nbitB >> bm::set_word_shift] & + (mask1 << (nbitB & bm::set_word_mask)))) << bit_idx); } // for k - for (; k < len; ++k) { unsigned nbit = unsigned(idx[start + k] & bm::set_block_mask); - arr[start + k] |= (TRGW(bool(blk[nbit >> bm::set_word_shift] & (mask1 << (nbit & bm::set_word_mask)))) << bit_idx); + arr[start + k] |= (TRGW(bool(blk[nbit >> bm::set_word_shift] & + (mask1 << (nbit & bm::set_word_mask)))) << bit_idx); } // for k } @@ -7968,7 +8516,8 @@ void bit_block_gather_scatter(TRGW* arr, const bm::word_t* blk, @internal */ inline -bm::id64_t idx_arr_block_lookup_u64(const bm::id64_t* idx, bm::id64_t size, bm::id64_t nb, bm::id64_t start) +bm::id64_t idx_arr_block_lookup_u64(const bm::id64_t* idx, + bm::id64_t size, bm::id64_t nb, bm::id64_t start) BMNOEXCEPT { BM_ASSERT(idx); BM_ASSERT(start < size); @@ -7993,7 +8542,8 @@ bm::id64_t idx_arr_block_lookup_u64(const bm::id64_t* idx, bm::id64_t size, bm:: @internal */ inline -unsigned idx_arr_block_lookup_u32(const unsigned* idx, unsigned size, unsigned nb, unsigned start) +unsigned idx_arr_block_lookup_u32(const unsigned* idx, + unsigned size, unsigned nb, unsigned start) BMNOEXCEPT { BM_ASSERT(idx); BM_ASSERT(start < size); @@ -8027,7 +8577,7 @@ unsigned idx_arr_block_lookup_u32(const unsigned* idx, unsigned size, unsigned n inline void set_block_bits_u64(bm::word_t* BMRESTRICT block, const bm::id64_t* BMRESTRICT idx, - bm::id64_t start, bm::id64_t stop) + bm::id64_t start, bm::id64_t stop) BMNOEXCEPT { // TODO: SIMD for 64-bit mode for (bm::id64_t i = start; i < stop; ++i) @@ -8036,8 +8586,7 @@ void set_block_bits_u64(bm::word_t* BMRESTRICT block, unsigned nbit = unsigned(n & bm::set_block_mask); unsigned nword = nbit >> bm::set_word_shift; nbit &= bm::set_word_mask; - bm::word_t mask = (1u << nbit); - block[nword] |= mask; + block[nword] |= (1u << nbit); } // for i } @@ -8058,7 +8607,7 @@ void set_block_bits_u64(bm::word_t* BMRESTRICT block, inline void set_block_bits_u32(bm::word_t* BMRESTRICT block, const unsigned* BMRESTRICT idx, - unsigned start, unsigned stop ) + unsigned start, unsigned stop ) BMNOEXCEPT { #if defined(VECT_SET_BLOCK_BITS) VECT_SET_BLOCK_BITS(block, idx, start, stop); @@ -8069,8 +8618,7 @@ void set_block_bits_u32(bm::word_t* BMRESTRICT block, unsigned nbit = unsigned(n & bm::set_block_mask); unsigned nword = nbit >> bm::set_word_shift; nbit &= bm::set_word_mask; - bm::word_t mask = (1u << nbit); - block[nword] |= mask; + block[nword] |= (1u << nbit); } // for i #endif } @@ -8084,7 +8632,8 @@ void set_block_bits_u32(bm::word_t* BMRESTRICT block, @internal */ inline -bool block_ptr_array_range(bm::word_t** arr, unsigned& left, unsigned& right) +bool block_ptr_array_range(bm::word_t** arr, + unsigned& left, unsigned& right) BMNOEXCEPT { BM_ASSERT(arr); @@ -8119,7 +8668,7 @@ bool block_ptr_array_range(bm::word_t** arr, unsigned& left, unsigned& right) */ inline unsigned lower_bound_linear_u32(const unsigned* arr, unsigned target, - unsigned from, unsigned to) + unsigned from, unsigned to) BMNOEXCEPT { BM_ASSERT(arr); BM_ASSERT(from <= to); @@ -8141,8 +8690,9 @@ unsigned lower_bound_linear_u32(const unsigned* arr, unsigned target, @internal */ inline -unsigned lower_bound_linear_u64(const unsigned long long* arr, unsigned long long target, - unsigned from, unsigned to) +unsigned lower_bound_linear_u64(const unsigned long long* arr, + unsigned long long target, + unsigned from, unsigned to) BMNOEXCEPT { BM_ASSERT(arr); BM_ASSERT(from <= to); @@ -8166,7 +8716,7 @@ unsigned lower_bound_linear_u64(const unsigned long long* arr, unsigned long lon */ inline unsigned lower_bound_u32(const unsigned* arr, unsigned target, - unsigned from, unsigned to) + unsigned from, unsigned to) BMNOEXCEPT { BM_ASSERT(arr); BM_ASSERT(from <= to); @@ -8200,8 +8750,9 @@ unsigned lower_bound_u32(const unsigned* arr, unsigned target, @internal */ inline -unsigned lower_bound_u64(const unsigned long long* arr, unsigned long long target, - unsigned from, unsigned to) +unsigned lower_bound_u64(const unsigned long long* arr, + unsigned long long target, + unsigned from, unsigned to) BMNOEXCEPT { BM_ASSERT(arr); BM_ASSERT(from <= to); @@ -8238,7 +8789,8 @@ unsigned lower_bound_u64(const unsigned long long* arr, unsigned long long targe */ #ifdef BM64ADDR inline -bm::id64_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx) +bm::id64_t block_to_global_index(unsigned i, unsigned j, + unsigned block_idx) BMNOEXCEPT { bm::id64_t base_idx = bm::id64_t(i) * bm::set_sub_array_size * bm::gap_max_bits; base_idx += j * bm::gap_max_bits; @@ -8246,7 +8798,8 @@ bm::id64_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx) } #else inline -bm::id_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx) +bm::id_t block_to_global_index(unsigned i, unsigned j, + unsigned block_idx) BMNOEXCEPT { unsigned base_idx = i * bm::set_sub_array_size * bm::gap_max_bits; base_idx += j * bm::gap_max_bits; @@ -8274,7 +8827,7 @@ union ptr_payload_t @internal */ inline -bm::id64_t ptrp_test(ptr_payload_t ptr, bm::gap_word_t v) +bm::id64_t ptrp_test(ptr_payload_t ptr, bm::gap_word_t v) BMNOEXCEPT { if (v == 0) { diff --git a/c++/include/util/bitset/bmgamma.h b/c++/include/util/bitset/bmgamma.h index 1b632e9e..641123c2 100644 --- a/c++/include/util/bitset/bmgamma.h +++ b/c++/include/util/bitset/bmgamma.h @@ -42,25 +42,25 @@ template class gamma_decoder { public: - gamma_decoder(TBitIO& bin) : bin_(bin) + gamma_decoder(TBitIO& bin) BMNOEXEPT : bin_(bin) {} /** Start encoding sequence */ - void start() + void start() BMNOEXEPT {} /** Stop decoding sequence */ - void stop() + void stop() BMNOEXEPT {} /** Decode word */ - T operator()(void) + T operator()(void) BMNOEXEPT { unsigned l = bin_.eat_zero_bits(); bin_.get_bit(); // get border bit diff --git a/c++/include/util/bitset/bmintervals.h b/c++/include/util/bitset/bmintervals.h new file mode 100644 index 00000000..7ef98047 --- /dev/null +++ b/c++/include/util/bitset/bmintervals.h @@ -0,0 +1,781 @@ +#ifndef BMINTERVALS__H__INCLUDED__ +#define BMINTERVALS__H__INCLUDED__ + +/* +Copyright(c) 2002-2020 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +For more information please visit: http://bitmagic.io +*/ +/*! \file bmintervals.h + \brief Algorithms for bit ranges and intervals +*/ + +#ifndef BM__H__INCLUDED__ +// BitMagic utility headers do not include main "bm.h" declaration +// #include "bm.h" or "bm64.h" explicitly +# error missing include (bm.h or bm64.h) +#endif + +#include "bmdef.h" + +/** \defgroup bvintervals Algorithms for bit intervals + Algorithms and iterators for bit ranges and intervals + @ingroup bvector + */ + + +namespace bm +{ + +/*! + \brief forward iterator class to traverse bit-vector as ranges + + Traverse enumerator for forward walking bit-vector as intervals: + series of consequtive 1111s flanked with zeroes. + Enumerator can traverse the whole bit-vector or jump(go_to) to position. + + \ingroup bvintervals +*/ +template +class interval_enumerator +{ +public: +#ifndef BM_NO_STL + typedef std::input_iterator_tag iterator_category; +#endif + typedef BV bvector_type; + typedef typename bvector_type::size_type size_type; + typedef typename bvector_type::allocator_type allocator_type; + typedef bm::byte_buffer buffer_type; + typedef bm::pair pair_type; + +public: + /*! @name Construction and assignment */ + //@{ + + interval_enumerator() + : bv_(0), interval_(bm::id_max, bm::id_max), gap_ptr_(0) + {} + + /** + Construct enumerator for the bit-vector + */ + interval_enumerator(const BV& bv) + : bv_(&bv), interval_(bm::id_max, bm::id_max), gap_ptr_(0) + { + go_to_impl(0, false); + } + + /** + Construct enumerator for the specified position + @param bv - source bit-vector + @param start_pos - position on bit-vector to search for interval + @param extend_start - flag to extend interval start to the start if + true start happenes to be less than start_pos + @sa go_to + */ + interval_enumerator(const BV& bv, size_type start_pos, bool extend_start) + : bv_(&bv), interval_(bm::id_max, bm::id_max), gap_ptr_(0) + { + go_to_impl(start_pos, extend_start); + } + + /** + Copy constructor + */ + interval_enumerator(const interval_enumerator& ien) + : bv_(ien.bv_), interval_(bm::id_max, bm::id_max), gap_ptr_(0) + { + go_to_impl(ien.start(), false); + } + + /** + Assignment operator + */ + interval_enumerator& operator=(const interval_enumerator& ien) + { + bv_ = ien.bv_; gap_ptr_ = 0; + go_to_impl(ien.start(), false); + } + +#ifndef BM_NO_CXX11 + /** move-ctor */ + interval_enumerator(interval_enumerator&& ien) BMNOEXCEPT + : bv_(0), interval_(bm::id_max, bm::id_max), gap_ptr_(0) + { + this->swap(ien); + } + + /** move assignmment operator */ + interval_enumerator& operator=(interval_enumerator&& ien) BMNOEXCEPT + { + if (this != &ien) + this->swap(ien); + return *this; + } +#endif + + //@} + + + // ----------------------------------------------------------------- + + /*! @name Comparison methods all use start position to compare */ + //@{ + + bool operator==(const interval_enumerator& ien) const BMNOEXCEPT + { return (start() == ien.start()); } + bool operator!=(const interval_enumerator& ien) const BMNOEXCEPT + { return (start() != ien.start()); } + bool operator < (const interval_enumerator& ien) const BMNOEXCEPT + { return (start() < ien.start()); } + bool operator <= (const interval_enumerator& ien) const BMNOEXCEPT + { return (start() <= ien.start()); } + bool operator > (const interval_enumerator& ien) const BMNOEXCEPT + { return (start() > ien.start()); } + bool operator >= (const interval_enumerator& ien) const BMNOEXCEPT + { return (start() >= ien.start()); } + //@} + + + /// Return interval start/left as bit-vector coordinate 011110 [left..right] + size_type start() const BMNOEXCEPT; + /// Return interval end/right as bit-vector coordinate 011110 [left..right] + size_type end() const BMNOEXCEPT; + + const pair_type& operator*() const BMNOEXCEPT { return interval_; } + + /// Get interval pair + const pair_type& get() const BMNOEXCEPT { return interval_; } + + /// Returns true if enumerator is valid (false if traversal is done) + bool valid() const BMNOEXCEPT; + + // ----------------------------------------------------------------- + + /*! @name enumerator positioning */ + //@{ + + /*! + @brief Go to inetrval at specified position + Jump to position with interval. If interval is not available at + the specified position (o bit) enumerator will find the next interval. + If interval is present we have an option to find interval start [left..] + and set enumerator from the effective start coodrinate + + @param pos - position on bit-vector + @param extend_start - find effective start if it is less than the + go to position + @return true if enumerator remains valid after the jump + */ + bool go_to(size_type pos, bool extend_start = true); + + /*! Advance to the next interval + @return true if interval is available + @sa valid + */ + bool advance(); + + /*! \brief Advance enumerator forward to the next available bit */ + interval_enumerator& operator++() BMNOEXCEPT + { advance(); return *this; } + + /*! \brief Advance enumerator forward to the next available bit */ + interval_enumerator operator++(int) BMNOEXCEPT + { + interval_enumerator tmp = *this; + advance(); + return tmp; + } + //@} + + /** + swap enumerator with another one + */ + void swap(interval_enumerator& ien) BMNOEXCEPT; + +protected: + typedef typename bvector_type::block_idx_type block_idx_type; + typedef typename bvector_type::allocator_type bv_allocator_type; + typedef bm::heap_vector + gap_vector_type; + + + bool go_to_impl(size_type pos, bool extend_start); + + /// Turn FSM into invalid state (out of range) + void invalidate() BMNOEXCEPT; + +private: + const BV* bv_; ///!< bit-vector for traversal + gap_vector_type gap_buf_; ///!< GAP buf.vector for bit-block + pair_type interval_; ///! current inetrval + const bm::gap_word_t* gap_ptr_; ///!< current pointer in GAP block +}; + +//---------------------------------------------------------------------------- + +/*! + \brief Returns true if range is all 1s flanked with 0s + Function performs the test on a closed range [left, right] + true interval is all 1s AND test(left-1)==false AND test(right+1)==false + Examples: + 01110 [1,3] - true + 11110 [0,3] - true + 11110 [1,3] - false + \param bv - bit-vector for check + \param left - index of first bit start checking + \param right - index of last bit + \return true/false + + \ingroup bvintervals + + @sa is_all_one_range +*/ +template +bool is_interval(const BV& bv, + typename BV::size_type left, + typename BV::size_type right) BMNOEXCEPT +{ + typedef typename BV::block_idx_type block_idx_type; + + const typename BV::blocks_manager_type& bman = bv.get_blocks_manager(); + + if (!bman.is_init()) + return false; // nothing to do + + if (right < left) + bm::xor_swap(left, right); + if (left == bm::id_max) // out of range + return false; + if (right == bm::id_max) + --right; + + block_idx_type nblock_left = (left >> bm::set_block_shift); + block_idx_type nblock_right = (right >> bm::set_block_shift); + + if (nblock_left == nblock_right) // same block (fast case) + { + unsigned nbit_left = unsigned(left & bm::set_block_mask); + unsigned nbit_right = unsigned(right & bm::set_block_mask); + if ((nbit_left > 0) && (nbit_right < bm::gap_max_bits-1)) + { + unsigned i0, j0; + bm::get_block_coord(nblock_left, i0, j0); + const bm::word_t* block = bman.get_block_ptr(i0, j0); + bool b = bm::block_is_interval(block, nbit_left, nbit_right); + return b; + } + } + bool is_left, is_right, is_all_one; + is_left = left > 0 ? bv.test(left-1) : false; + if (is_left == false) + { + is_right = (right < (bm::id_max - 1)) ? bv.test(right + 1) : false; + if (is_left == false && is_right == false) + { + is_all_one = bv.is_all_one_range(left, right); + return is_all_one; + } + } + return false; +} + + +//---------------------------------------------------------------------------- + +/*! + + \brief Reverse find index of first 1 bit gap (01110) starting from position + Reverse scan for the first 1 in a block of continious 1s. + Method employs closed interval semantics: 0[pos..from] + + \param bv - bit-vector for search + \param from - position to start reverse search from + \param pos - [out] index of the found first 1 bit in a gap of bits + \return true if search returned result, false if not found + (start point is zero) + + \sa is_interval, find_interval_end + \ingroup bvintervals +*/ +template +bool find_interval_start(const BV& bv, + typename BV::size_type from, + typename BV::size_type& pos) BMNOEXCEPT +{ + typedef typename BV::size_type size_type; + typedef typename BV::block_idx_type block_idx_type; + + const typename BV::blocks_manager_type& bman = bv.get_blocks_manager(); + + if (!bman.is_init()) + return false; // nothing to do + if (!from) + { + pos = from; + return bv.test(from); + } + + block_idx_type nb = (from >> bm::set_block_shift); + unsigned i0, j0; + bm::get_block_coord(nb, i0, j0); + + size_type base_idx; + unsigned found_nbit; + + const bm::word_t* block = bman.get_block_ptr(i0, j0); + if (!block) + return false; + unsigned nbit = unsigned(from & bm::set_block_mask); + unsigned res = bm::block_find_interval_start(block, nbit, &found_nbit); + + switch (res) + { + case 0: // not interval + return false; + case 1: // interval found + pos = found_nbit + (nb * bm::gap_max_bits); + return true; + case 2: // keep scanning + base_idx = bm::get_block_start(i0, j0); + pos = base_idx + found_nbit; + if (!nb) + return true; + break; + default: + BM_ASSERT(0); + } // switch + + --nb; + bm::get_block_coord(nb, i0, j0); + bm::word_t*** blk_root = bman.top_blocks_root(); + + for (unsigned i = i0; true; --i) + { + bm::word_t** blk_blk = blk_root[i]; + if (!blk_blk) + return true; + if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) + { + pos = bm::get_super_block_start(i); + if (!i) + break; + continue; + } + unsigned j = (i == i0) ? j0 : 255; + for (; true; --j) + { + if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) + { + pos = bm::get_block_start(i, j); + goto loop_j_end; // continue + } + + block = blk_blk[j]; + if (!block) + return true; + + res = bm::block_find_interval_start(block, + bm::gap_max_bits-1, &found_nbit); + switch (res) + { + case 0: // not interval (but it was the interval, so last result + return true; + case 1: // interval found + base_idx = bm::get_block_start(i, j); + pos = base_idx + found_nbit; + return true; + case 2: // keep scanning + pos = bm::get_block_start(i, j); + break; + default: + BM_ASSERT(0); + } // switch + + loop_j_end: // continue point + if (!j) + break; + } // for j + + if (!i) + break; + } // for i + + return true; +} + + +//---------------------------------------------------------------------------- + +/*! + \brief Reverse find index of first 1 bit gap (01110) starting from position + Reverse scan for the first 1 in a block of continious 1s. + Method employs closed interval semantics: 0[pos..from] + + \param bv - bit-vector for search + \param from - position to start reverse search from + \param pos - [out] index of the found first 1 bit in a gap of bits + \return true if search returned result, false if not found + (start point is zero) + + \sa is_interval, find_interval_end + \ingroup bvintervals +*/ +template +bool find_interval_end(const BV& bv, + typename BV::size_type from, + typename BV::size_type & pos) BMNOEXCEPT +{ + typedef typename BV::block_idx_type block_idx_type; + + if (from == bm::id_max) + return false; + const typename BV::blocks_manager_type& bman = bv.get_blocks_manager(); + + if (!bman.is_init()) + return false; // nothing to do + if (from == bm::id_max-1) + { + pos = from; + return bv.test(from); + } + + block_idx_type nb = (from >> bm::set_block_shift); + unsigned i0, j0; + bm::get_block_coord(nb, i0, j0); + + unsigned found_nbit; + + const bm::word_t* block = bman.get_block_ptr(i0, j0); + if (!block) + return false; + unsigned nbit = unsigned(from & bm::set_block_mask); + unsigned res = bm::block_find_interval_end(block, nbit, &found_nbit); + switch (res) + { + case 0: // not interval + return false; + case 1: // interval found + pos = found_nbit + (nb * bm::gap_max_bits); + return true; + case 2: // keep scanning + pos = found_nbit + (nb * bm::gap_max_bits); + break; + default: + BM_ASSERT(0); + } // switch + + block_idx_type nblock_right = (bm::id_max >> bm::set_block_shift); + unsigned i_from, j_from, i_to, j_to; + bm::get_block_coord(nblock_right, i_to, j_to); + block_idx_type top_size = bman.top_block_size(); + if (i_to >= top_size) + i_to = unsigned(top_size-1); + + ++nb; + bm::word_t*** blk_root = bman.top_blocks_root(); + bm::get_block_coord(nb, i_from, j_from); + + for (unsigned i = i_from; i <= i_to; ++i) + { + bm::word_t** blk_blk = blk_root[i]; + if (!blk_blk) + return true; + if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) + { + if (i > i_from) + { + pos += bm::gap_max_bits * bm::set_sub_array_size; + continue; + } + else + { + // TODO: optimization to avoid scanning rest of the super block + } + } + + unsigned j = (i == i_from) ? j_from : 0; + do + { + if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR) + { + pos += bm::gap_max_bits; + continue; + } + + block = blk_blk[j]; + if (!block) + return true; + + res = bm::block_find_interval_end(block, 0, &found_nbit); + switch (res) + { + case 0: // not interval (but it was the interval, so last result + return true; + case 1: // interval found + pos += found_nbit+1; + return true; + case 2: // keep scanning + pos += bm::gap_max_bits; + break; + default: + BM_ASSERT(0); + } // switch + } while (++j < bm::set_sub_array_size); + } // for i + + return true; +} + + + +//---------------------------------------------------------------------------- +// +//---------------------------------------------------------------------------- + +template +typename interval_enumerator::size_type +interval_enumerator::start() const BMNOEXCEPT +{ + return interval_.first; +} + +//---------------------------------------------------------------------------- + +template +typename interval_enumerator::size_type +interval_enumerator::end() const BMNOEXCEPT +{ + return interval_.second; +} + +//---------------------------------------------------------------------------- + +template +bool interval_enumerator::valid() const BMNOEXCEPT +{ + return (interval_.first != bm::id_max); +} + +//---------------------------------------------------------------------------- + +template +void interval_enumerator::invalidate() BMNOEXCEPT +{ + interval_.first = interval_.second = bm::id_max; +} + +//---------------------------------------------------------------------------- + +template +bool interval_enumerator::go_to(size_type pos, bool extend_start) +{ + return go_to_impl(pos, extend_start); +} + +//---------------------------------------------------------------------------- + +template +bool interval_enumerator::go_to_impl(size_type pos, bool extend_start) +{ + if (!bv_ || !bv_->is_init() || (pos >= bm::id_max)) + { + invalidate(); + return false; + } + + bool found; + size_type start_pos; + + // go to prolog: identify the true interval start position + // + if (extend_start) + { + found = bm::find_interval_start(*bv_, pos, start_pos); + if (!found) + { + found = bv_->find(pos, start_pos); + if (!found) + { + invalidate(); + return false; + } + } + } + else + { + found = bv_->find(pos, start_pos); + if (!found) + { + invalidate(); + return false; + } + } + + // start position established, start decoding from it + interval_.first = pos = start_pos; + + block_idx_type nb = (pos >> bm::set_block_shift); + const typename BV::blocks_manager_type& bman = bv_->get_blocks_manager(); + unsigned i0, j0; + bm::get_block_coord(nb, i0, j0); + const bm::word_t* block = bman.get_block_ptr(i0, j0); + BM_ASSERT(block); + + if (block == FULL_BLOCK_FAKE_ADDR) + { + // super-long interval, find the end of it + found = bm::find_interval_end(*bv_, pos, interval_.second); + BM_ASSERT(found); + gap_ptr_ = 0; + return true; + } + + if (BM_IS_GAP(block)) + { + const bm::gap_word_t* BMRESTRICT gap_block = BMGAP_PTR(block); + unsigned nbit = unsigned(pos & bm::set_block_mask); + + unsigned is_set; + unsigned gap_pos = bm::gap_bfind(gap_block, nbit, &is_set); + BM_ASSERT(is_set); + + interval_.second = (nb * bm::gap_max_bits) + gap_block[gap_pos]; + if (gap_block[gap_pos] == bm::gap_max_bits-1) + { + // it is the end of the GAP block - run search + // + if (interval_.second == bm::id_max-1) + { + gap_ptr_ = 0; + return true; + } + found = bm::find_interval_end(*bv_, interval_.second + 1, start_pos); + if (found) + interval_.second = start_pos; + gap_ptr_ = 0; + return true; + } + gap_ptr_ = gap_block + gap_pos; + return true; + } + + // bit-block: turn to GAP and position there + // + if (gap_buf_.size() == 0) + { + gap_buf_.resize(bm::gap_max_bits+64); + } + bm::gap_word_t* gap_tmp = gap_buf_.data(); + unsigned len = bm::bit_to_gap(gap_tmp, block, bm::gap_max_bits+64); + BM_ASSERT(len); + + + size_type base_idx = (nb * bm::gap_max_bits); + for (unsigned i = 1; i <= len; ++i) + { + size_type gap_pos = base_idx + gap_tmp[i]; + if (gap_pos >= pos) + { + if (gap_tmp[i] == bm::gap_max_bits - 1) + { + found = bm::find_interval_end(*bv_, gap_pos, interval_.second); + BM_ASSERT(found); + gap_ptr_ = 0; + return true; + } + + gap_ptr_ = &gap_tmp[i]; + interval_.second = gap_pos; + return true; + } + if (gap_tmp[i] == bm::gap_max_bits - 1) + break; + } // for + + BM_ASSERT(0); + + return false; +} + +//---------------------------------------------------------------------------- + +template +bool interval_enumerator::advance() +{ + BM_ASSERT(valid()); + + if (interval_.second == bm::id_max-1) + { + invalidate(); + return false; + } + block_idx_type nb = (interval_.first >> bm::set_block_shift); + + bool found; + if (gap_ptr_) // in GAP block + { + ++gap_ptr_; // 0 - GAP + if (*gap_ptr_ == bm::gap_max_bits-1) // GAP block end + { + return go_to_impl(((nb+1) * bm::gap_max_bits), false); + } + unsigned prev = *gap_ptr_; + + ++gap_ptr_; // 1 - GAP + BM_ASSERT(*gap_ptr_ > prev); + interval_.first = (nb * bm::gap_max_bits) + prev + 1; + if (*gap_ptr_ == bm::gap_max_bits-1) // GAP block end + { + found = bm::find_interval_end(*bv_, interval_.first, interval_.second); + BM_ASSERT(found); (void)found; + gap_ptr_ = 0; + return true; + } + interval_.second = (nb * bm::gap_max_bits) + *gap_ptr_; + return true; + } + return go_to_impl(interval_.second + 1, false); +} + +//---------------------------------------------------------------------------- + +template +void interval_enumerator::swap(interval_enumerator& ien) BMNOEXCEPT +{ + const BV* bv_tmp = bv_; + bv_ = ien.bv_; + ien.bv_ = bv_tmp; + + gap_buf_.swap(ien.gap_buf_); + bm::xor_swap(interval_.first, ien.interval_.first); + bm::xor_swap(interval_.second, ien.interval_.second); + + const bm::gap_word_t* gap_tmp = gap_ptr_; + gap_ptr_ = ien.gap_ptr_; + ien.gap_ptr_ = gap_tmp; +} + +//---------------------------------------------------------------------------- +// +//---------------------------------------------------------------------------- + + +} // namespace bm + +#include "bmundef.h" + +#endif diff --git a/c++/include/util/bitset/bmrandom.h b/c++/include/util/bitset/bmrandom.h index aca0829d..3f0e23f1 100644 --- a/c++/include/util/bitset/bmrandom.h +++ b/c++/include/util/bitset/bmrandom.h @@ -97,7 +97,7 @@ private: unsigned process_word(bm::word_t* blk_out, const bm::word_t* blk_src, unsigned nword, - unsigned take_count); + unsigned take_count) BMNOEXCEPT; static void get_random_array(bm::word_t* blk_out, @@ -106,7 +106,7 @@ private: unsigned count); static unsigned compute_take_count(unsigned bc, - size_type in_count, size_type sample_count); + size_type in_count, size_type sample_count) BMNOEXCEPT; private: @@ -357,9 +357,10 @@ void random_subset::get_subset(BV& bv_out, } template -unsigned random_subset::compute_take_count(unsigned bc, - size_type in_count, - size_type sample_count) +unsigned random_subset::compute_take_count( + unsigned bc, + size_type in_count, + size_type sample_count) BMNOEXCEPT { float block_percent = float(bc) / float(in_count); float bits_to_take = float(sample_count) * block_percent; @@ -404,7 +405,7 @@ void random_subset::get_block_subset(bm::word_t* blk_out, } // now transform vacant bits to array, then pick random elements // - unsigned arr_len = bit_convert_to_arr(bit_list_, + unsigned arr_len = bm::bit_convert_to_arr(bit_list_, sub_block_, bm::gap_max_bits, bm::gap_max_bits, @@ -418,7 +419,7 @@ template unsigned random_subset::process_word(bm::word_t* blk_out, const bm::word_t* blk_src, unsigned nword, - unsigned take_count) + unsigned take_count) BMNOEXCEPT { unsigned new_bits, mask; do diff --git a/c++/include/util/bitset/bmrs.h b/c++/include/util/bitset/bmrs.h index 14cce61e..08659062 100644 --- a/c++/include/util/bitset/bmrs.h +++ b/c++/include/util/bitset/bmrs.h @@ -59,7 +59,7 @@ public: rs_index(const rs_index& rsi); /// init arrays to zeros - void init() BMNOEXEPT; + void init() BMNOEXCEPT; /// copy rs index void copy_from(const rs_index& rsi); @@ -165,7 +165,7 @@ rs_index::rs_index(const rs_index& rsi) template -void rs_index::init() BMNOEXEPT +void rs_index::init() BMNOEXCEPT { sblock_count_.resize(0); sblock_row_idx_.resize(0); diff --git a/c++/include/util/bitset/bmserial.h b/c++/include/util/bitset/bmserial.h index ee65f1b0..5ab72838 100644 --- a/c++/include/util/bitset/bmserial.h +++ b/c++/include/util/bitset/bmserial.h @@ -75,12 +75,12 @@ template class serializer { public: - typedef BV bvector_type; - typedef typename bvector_type::allocator_type allocator_type; - typedef typename bvector_type::blocks_manager_type blocks_manager_type; - typedef typename bvector_type::statistics statistics_type; - typedef typename bvector_type::block_idx_type block_idx_type; - typedef typename bvector_type::size_type size_type; + typedef BV bvector_type; + typedef typename bvector_type::allocator_type allocator_type; + typedef typename bvector_type::blocks_manager_type blocks_manager_type; + typedef typename bvector_type::statistics statistics_type; + typedef typename bvector_type::block_idx_type block_idx_type; + typedef typename bvector_type::size_type size_type; typedef byte_buffer buffer; typedef bm::bv_ref_vector bv_ref_vector_type; @@ -113,7 +113,7 @@ public: @param clevel - compression level (0-5) @sa get_compression_level */ - void set_compression_level(unsigned clevel); + void set_compression_level(unsigned clevel) BMNOEXCEPT; /** Get compression level (0-5), Default 5 (recommended) @@ -127,7 +127,8 @@ public: Recommended: use 3 or 5 */ - unsigned get_compression_level() const { return compression_level_; } + unsigned get_compression_level() const BMNOEXCEPT + { return compression_level_; } //@} @@ -189,20 +190,21 @@ public: Return serialization counter vector @internal */ - const size_type* get_compression_stat() const { return compression_stat_; } + const size_type* get_compression_stat() const BMNOEXCEPT + { return compression_stat_; } /** Set GAP length serialization (serializes GAP levels of the original vector) @param value - when TRUE serialized vector includes GAP levels parameters */ - void gap_length_serialization(bool value); + void gap_length_serialization(bool value) BMNOEXCEPT; /** Set byte-order serialization (for cross platform compatibility) @param value - TRUE serialization format includes byte-order marker */ - void byte_order_serialization(bool value); + void byte_order_serialization(bool value) BMNOEXCEPT; /** Add skip-markers to serialization BLOB for faster range decode @@ -214,7 +216,7 @@ public: smaller interval means more bookmarks added to the skip list thus more increasing the BLOB size */ - void set_bookmarks(bool enable, unsigned bm_interval = 256); + void set_bookmarks(bool enable, unsigned bm_interval = 256) BMNOEXCEPT; /** Attach collection of reference vectors for XOR serialization @@ -227,20 +229,21 @@ public: Set current index in rer.vector collection (not a row idx or plain idx) */ - void set_curr_ref_idx(size_type ref_idx); + void set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT; protected: /** Encode serialization header information */ - void encode_header(const BV& bv, bm::encoder& enc); + void encode_header(const BV& bv, bm::encoder& enc) BMNOEXCEPT; /*! Encode GAP block */ void encode_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc); /*! Encode GAP block with Elias Gamma coder */ - void gamma_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc); + void gamma_gap_block(const bm::gap_word_t* gap_block, + bm::encoder& enc) BMNOEXCEPT; /** Encode GAP block as delta-array with Elias Gamma coder @@ -248,29 +251,30 @@ protected: void gamma_gap_array(const bm::gap_word_t* gap_block, unsigned arr_len, bm::encoder& enc, - bool inverted = false); + bool inverted = false) BMNOEXCEPT; /// Encode bit-block as an array of bits void encode_bit_array(const bm::word_t* block, - bm::encoder& enc, bool inverted); + bm::encoder& enc, bool inverted) BMNOEXCEPT; void gamma_gap_bit_block(const bm::word_t* block, - bm::encoder& enc); + bm::encoder& enc) BMNOEXCEPT; void gamma_arr_bit_block(const bm::word_t* block, - bm::encoder& enc, bool inverted); + bm::encoder& enc, bool inverted) BMNOEXCEPT; void bienc_arr_bit_block(const bm::word_t* block, - bm::encoder& enc, bool inverted); + bm::encoder& enc, bool inverted) BMNOEXCEPT; /// encode bit-block as interpolated bit block of gaps - void bienc_gap_bit_block(const bm::word_t* block, bm::encoder& enc); + void bienc_gap_bit_block(const bm::word_t* block, + bm::encoder& enc) BMNOEXCEPT; void interpolated_arr_bit_block(const bm::word_t* block, - bm::encoder& enc, bool inverted); + bm::encoder& enc, bool inverted) BMNOEXCEPT; /// encode bit-block as interpolated gap block void interpolated_gap_bit_block(const bm::word_t* block, - bm::encoder& enc); + bm::encoder& enc) BMNOEXCEPT; /** Encode GAP block as an array with binary interpolated coder @@ -278,29 +282,29 @@ protected: void interpolated_gap_array(const bm::gap_word_t* gap_block, unsigned arr_len, bm::encoder& enc, - bool inverted); + bool inverted) BMNOEXCEPT; void interpolated_gap_array_v0(const bm::gap_word_t* gap_block, unsigned arr_len, bm::encoder& enc, - bool inverted); + bool inverted) BMNOEXCEPT; /*! Encode GAP block with using binary interpolated encoder */ void interpolated_encode_gap_block( - const bm::gap_word_t* gap_block, bm::encoder& enc); + const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT; /** Encode BIT block with repeatable runs of zeroes */ void encode_bit_interval(const bm::word_t* blk, bm::encoder& enc, - unsigned size_control); + unsigned size_control) BMNOEXCEPT; /** Encode bit-block using digest (hierarchical compression) */ void encode_bit_digest(const bm::word_t* blk, - bm::encoder& enc, - bm::id64_t d0); + bm::encoder& enc, + bm::id64_t d0) BMNOEXCEPT; /** Determine best representation for GAP block based @@ -314,25 +318,26 @@ protected: @internal */ - unsigned char find_gap_best_encoding(const bm::gap_word_t* gap_block); + unsigned char + find_gap_best_encoding(const bm::gap_word_t* gap_block) BMNOEXCEPT; /// Determine best representation for a bit-block - unsigned char find_bit_best_encoding(const bm::word_t* block); + unsigned char find_bit_best_encoding(const bm::word_t* block) BMNOEXCEPT; /// Determine best representation for a bit-block (level 5) - unsigned char find_bit_best_encoding_l5(const bm::word_t* block); + unsigned char find_bit_best_encoding_l5(const bm::word_t* block) BMNOEXCEPT; /// Reset all accumulated compression statistics - void reset_compression_stats(); + void reset_compression_stats() BMNOEXCEPT; - void reset_models() { mod_size_ = 0; } - void add_model(unsigned char mod, unsigned score); + void reset_models() BMNOEXCEPT { mod_size_ = 0; } + void add_model(unsigned char mod, unsigned score) BMNOEXCEPT; protected: /// Bookmark state structure struct bookmark_state { - bookmark_state(block_idx_type nb_range) + bookmark_state(block_idx_type nb_range) BMNOEXCEPT : ptr_(0), nb_(0), nb_range_(nb_range), bm_type_(0) { @@ -364,7 +369,7 @@ protected: */ static void process_bookmark(block_idx_type nb, bookmark_state& bookm, - bm::encoder& enc); + bm::encoder& enc) BMNOEXCEPT; private: serializer(const serializer&); @@ -421,6 +426,8 @@ class deseriaizer_base protected: typedef DEC decoder_type; typedef BLOCK_IDX block_idx_type; + typedef bm::bit_in bit_in_type; + protected: deseriaizer_base() : id_array_(0), bookmark_idx_(0), skip_offset_(0), skip_pos_(0) @@ -440,29 +447,31 @@ protected: bm::gap_word_t* dst_arr); /// Read binary interpolated list into a bit-set - void read_bic_arr(decoder_type& decoder, bm::word_t* blk); + void read_bic_arr(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT; /// Read binary interpolated gap blocks into a bitset - void read_bic_gap(decoder_type& decoder, bm::word_t* blk); + void read_bic_gap(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT; /// Read inverted binary interpolated list into a bit-set - void read_bic_arr_inv(decoder_type& decoder, bm::word_t* blk); + void read_bic_arr_inv(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT; /// Read digest0-type bit-block - void read_digest0_block(decoder_type& decoder, bm::word_t* blk); + void read_digest0_block(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT; /// read bit-block encoded as runs static - void read_0runs_block(decoder_type& decoder, bm::word_t* blk); + void read_0runs_block(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT; static - const char* err_msg() { return "BM::Invalid serialization format"; } + const char* err_msg() BMNOEXCEPT { return "BM::Invalid serialization format"; } /// Try to skip if skip bookmark is available within reach /// @return new block idx if skip went well /// - block_idx_type try_skip(decoder_type& decoder, block_idx_type nb, block_idx_type expect_nb); + block_idx_type try_skip(decoder_type& decoder, + block_idx_type nb, + block_idx_type expect_nb) BMNOEXCEPT; protected: bm::gap_word_t* id_array_; ///< ptr to idx array for temp decode use @@ -519,7 +528,7 @@ public: is not guaranteed to be absent @sa unset_range() */ - void set_range(size_type from, size_type to) + void set_range(size_type from, size_type to) BMNOEXCEPT { is_range_set_ = 1; idx_from_ = from; idx_to_ = to; } @@ -528,7 +537,7 @@ public: Disable range deserialization @sa set_range() */ - void unset_range() { is_range_set_ = 0; } + void unset_range() BMNOEXCEPT { is_range_set_ = 0; } protected: typedef typename BV::blocks_manager_type blocks_manager_type; @@ -608,7 +617,7 @@ public: void set_range(size_type from, size_type to); /// disable range filtration - void unset_range() { is_range_set_ = false; } + void unset_range() BMNOEXCEPT { is_range_set_ = false; } size_type deserialize(bvector_type& bv, serial_iterator_type& sit, @@ -639,7 +648,8 @@ private: serial_iterator_type& sit, set_operation op); static - const char* err_msg() { return "BM::de-serialization format error"; } + const char* err_msg() BMNOEXCEPT + { return "BM::de-serialization format error"; } private: bool is_range_set_ = false; size_type nb_range_from_ = 0; @@ -675,7 +685,7 @@ public: void next(); /// skip all zero or all-one blocks - block_idx_type skip_mono_blocks(); + block_idx_type skip_mono_blocks() BMNOEXCEPT; /// read bit block, using logical operation unsigned get_bit_block(bm::word_t* dst_block, @@ -708,17 +718,17 @@ public: }; /// Returns iterator internal state - iterator_state state() const { return this->state_; } + iterator_state state() const BMNOEXCEPT { return this->state_; } - iterator_state get_state() const { return this->state_; } + iterator_state get_state() const BMNOEXCEPT { return this->state_; } /// Number of ids in the inverted list (valid for e_list_ids) - unsigned get_id_count() const { return this->id_cnt_; } + unsigned get_id_count() const BMNOEXCEPT { return this->id_cnt_; } /// Get last id from the id list - bm::id_t get_id() const { return this->last_id_; } + bm::id_t get_id() const BMNOEXCEPT { return this->last_id_; } /// Get current block index - block_idx_type block_idx() const { return this->block_idx_; } + block_idx_type block_idx() const BMNOEXCEPT { return this->block_idx_; } public: /// member function pointer for bitset-bitset get operations @@ -761,19 +771,19 @@ public: /// (Converts inverted list into bits) /// Returns number of words (bits) being read unsigned get_arr_bit(bm::word_t* dst_block, - bool clear_target=true); + bool clear_target=true) BMNOEXCEPT; /// Get current block type - unsigned get_block_type() const { return block_type_; } + unsigned get_block_type() const BMNOEXCEPT { return block_type_; } - unsigned get_bit(); + unsigned get_bit() BMNOEXCEPT; - void get_inv_arr(bm::word_t* block); + void get_inv_arr(bm::word_t* block) BMNOEXCEPT; /// Try to skip if skip bookmark is available within reach /// @return true if skip went well /// - bool try_skip(block_idx_type nb, block_idx_type expect_nb) + bool try_skip(block_idx_type nb, block_idx_type expect_nb) BMNOEXCEPT { block_idx_type new_nb = parent_type::try_skip(decoder_, nb, expect_nb); if (new_nb) @@ -1064,6 +1074,7 @@ serializer::serializer(bm::word_t* temp_block) gap_serial_(false), byte_order_serial_(true), sb_bookmarks_(false), + sb_range_(0), compression_level_(bm::set_compression_default), ref_vect_(0), ref_idx_(0), @@ -1097,7 +1108,7 @@ serializer::~serializer() template -void serializer::reset_compression_stats() +void serializer::reset_compression_stats() BMNOEXCEPT { for (unsigned i = 0; i < 256; ++i) compression_stat_[i] = 0; @@ -1105,30 +1116,30 @@ void serializer::reset_compression_stats() template -void serializer::set_compression_level(unsigned clevel) +void serializer::set_compression_level(unsigned clevel) BMNOEXCEPT { if (clevel <= bm::set_compression_max) compression_level_ = clevel; } template -void serializer::gap_length_serialization(bool value) +void serializer::gap_length_serialization(bool value) BMNOEXCEPT { gap_serial_ = value; } template -void serializer::byte_order_serialization(bool value) +void serializer::byte_order_serialization(bool value) BMNOEXCEPT { byte_order_serial_ = value; } template -void serializer::set_bookmarks(bool enable, unsigned bm_interval) +void serializer::set_bookmarks(bool enable, unsigned bm_interval) BMNOEXCEPT { sb_bookmarks_ = enable; if (enable) - { + { if (bm_interval > 512) bm_interval = 512; else @@ -1148,13 +1159,13 @@ void serializer::set_ref_vectors(const bv_ref_vector_type* ref_vect) } template -void serializer::set_curr_ref_idx(size_type ref_idx) +void serializer::set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT { ref_idx_ = ref_idx; } template -void serializer::encode_header(const BV& bv, bm::encoder& enc) +void serializer::encode_header(const BV& bv, bm::encoder& enc) BMNOEXCEPT { const blocks_manager_type& bman = bv.get_blocks_manager(); @@ -1207,7 +1218,7 @@ void serializer::encode_header(const BV& bv, bm::encoder& enc) template void serializer::interpolated_encode_gap_block( - const bm::gap_word_t* gap_block, bm::encoder& enc) + const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT { unsigned len = bm::gap_length(gap_block); if (len > 4) // BIC encoding @@ -1266,7 +1277,8 @@ void serializer::interpolated_encode_gap_block( template -void serializer::gamma_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc) +void serializer::gamma_gap_block(const bm::gap_word_t* gap_block, + bm::encoder& enc) BMNOEXCEPT { unsigned len = gap_length(gap_block); if (len > 3 && (compression_level_ > 3)) // Use Elias Gamma encoding @@ -1307,7 +1319,7 @@ template void serializer::gamma_gap_array(const bm::gap_word_t* gap_array, unsigned arr_len, bm::encoder& enc, - bool inverted) + bool inverted) BMNOEXCEPT { unsigned char scode = inverted ? bm::set_block_arrgap_egamma_inv : bm::set_block_arrgap_egamma; @@ -1349,10 +1361,11 @@ void serializer::gamma_gap_array(const bm::gap_word_t* gap_array, template -void serializer::interpolated_gap_array_v0(const bm::gap_word_t* gap_block, - unsigned arr_len, - bm::encoder& enc, - bool inverted) +void serializer::interpolated_gap_array_v0( + const bm::gap_word_t* gap_block, + unsigned arr_len, + bm::encoder& enc, + bool inverted) BMNOEXCEPT { BM_ASSERT(arr_len <= 65535); unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv @@ -1399,7 +1412,7 @@ template void serializer::interpolated_gap_array(const bm::gap_word_t* gap_block, unsigned arr_len, bm::encoder& enc, - bool inverted) + bool inverted) BMNOEXCEPT { BM_ASSERT(arr_len <= 65535); @@ -1471,7 +1484,7 @@ void serializer::interpolated_gap_array(const bm::gap_word_t* gap_block, template -void serializer::add_model(unsigned char mod, unsigned score) +void serializer::add_model(unsigned char mod, unsigned score) BMNOEXCEPT { BM_ASSERT(mod_size_ < 64); // too many models (memory corruption?) scores_[mod_size_] = score; models_[mod_size_] = mod; @@ -1479,7 +1492,8 @@ void serializer::add_model(unsigned char mod, unsigned score) } template -unsigned char serializer::find_bit_best_encoding_l5(const bm::word_t* block) +unsigned char +serializer::find_bit_best_encoding_l5(const bm::word_t* block) BMNOEXCEPT { unsigned bc, bit_gaps; @@ -1566,7 +1580,8 @@ unsigned char serializer::find_bit_best_encoding_l5(const bm::word_t* block) } template -unsigned char serializer::find_bit_best_encoding(const bm::word_t* block) +unsigned char +serializer::find_bit_best_encoding(const bm::word_t* block) BMNOEXCEPT { reset_models(); @@ -1672,7 +1687,7 @@ unsigned char serializer::find_bit_best_encoding(const bm::word_t* block) template unsigned char -serializer::find_gap_best_encoding(const bm::gap_word_t* gap_block) +serializer::find_gap_best_encoding(const bm::gap_word_t* gap_block)BMNOEXCEPT { // heuristics and hard-coded rules to determine // the best representation for d-GAP block @@ -1731,9 +1746,9 @@ void serializer::encode_gap_block(const bm::gap_word_t* gap_block, bm::encod break; case bm::set_block_bit_1bit: - arr_len = gap_convert_to_arr(gap_temp_block, - gap_block, - bm::gap_equiv_len-10); + arr_len = bm::gap_convert_to_arr(gap_temp_block, + gap_block, + bm::gap_equiv_len-10); BM_ASSERT(arr_len == 1); enc.put_8(bm::set_block_bit_1bit); enc.put_16(gap_temp_block[0]); @@ -1779,7 +1794,7 @@ template void serializer::encode_bit_interval(const bm::word_t* blk, bm::encoder& enc, unsigned //size_control - ) + ) BMNOEXCEPT { enc.put_8(bm::set_block_bit_0runs); enc.put_8((blk[0]==0) ? 0 : 1); // encode start @@ -1830,7 +1845,7 @@ void serializer::encode_bit_interval(const bm::word_t* blk, template void serializer::encode_bit_digest(const bm::word_t* block, bm::encoder& enc, - bm::id64_t d0) + bm::id64_t d0) BMNOEXCEPT { // evaluate a few "sure" models here and pick the best // @@ -1927,16 +1942,16 @@ void serializer::optimize_serialize_destroy(BV& bv, template void serializer::encode_bit_array(const bm::word_t* block, bm::encoder& enc, - bool inverted) + bool inverted) BMNOEXCEPT { unsigned arr_len; unsigned mask = inverted ? ~0u : 0u; // TODO: get rid of max bits - arr_len = bit_convert_to_arr(bit_idx_arr_.data(), - block, - bm::gap_max_bits, - bm::gap_max_bits_cmrz, - mask); + arr_len = bm::bit_convert_to_arr(bit_idx_arr_.data(), + block, + bm::gap_max_bits, + bm::gap_max_bits_cmrz, + mask); if (arr_len) { unsigned char scode = @@ -1950,7 +1965,7 @@ void serializer::encode_bit_array(const bm::word_t* block, template void serializer::gamma_gap_bit_block(const bm::word_t* block, - bm::encoder& enc) + bm::encoder& enc) BMNOEXCEPT { unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_equiv_len); BM_ASSERT(len); (void)len; @@ -1959,7 +1974,8 @@ void serializer::gamma_gap_bit_block(const bm::word_t* block, template void serializer::gamma_arr_bit_block(const bm::word_t* block, - bm::encoder& enc, bool inverted) + bm::encoder& enc, + bool inverted) BMNOEXCEPT { unsigned mask = inverted ? ~0u : 0u; unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(), @@ -1978,7 +1994,8 @@ void serializer::gamma_arr_bit_block(const bm::word_t* block, template void serializer::bienc_arr_bit_block(const bm::word_t* block, - bm::encoder& enc, bool inverted) + bm::encoder& enc, + bool inverted) BMNOEXCEPT { unsigned mask = inverted ? ~0u : 0u; unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(), @@ -1996,7 +2013,7 @@ void serializer::bienc_arr_bit_block(const bm::word_t* block, template void serializer::interpolated_gap_bit_block(const bm::word_t* block, - bm::encoder& enc) + bm::encoder& enc) BMNOEXCEPT { unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits); BM_ASSERT(len); (void)len; @@ -2006,7 +2023,7 @@ void serializer::interpolated_gap_bit_block(const bm::word_t* block, template void serializer::bienc_gap_bit_block(const bm::word_t* block, - bm::encoder& enc) + bm::encoder& enc) BMNOEXCEPT { unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits); BM_ASSERT(len); (void)len; @@ -2052,8 +2069,10 @@ void serializer::bienc_gap_bit_block(const bm::word_t* block, template -void serializer::interpolated_arr_bit_block(const bm::word_t* block, - bm::encoder& enc, bool inverted) +void +serializer::interpolated_arr_bit_block(const bm::word_t* block, + bm::encoder& enc, + bool inverted) BMNOEXCEPT { unsigned mask = inverted ? ~0u : 0u; unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(), @@ -2134,7 +2153,7 @@ void serializer::interpolated_arr_bit_block(const bm::word_t* block, template void serializer::process_bookmark(block_idx_type nb, bookmark_state& bookm, - bm::encoder& enc) + bm::encoder& enc) BMNOEXCEPT { BM_ASSERT(bookm.nb_range_); @@ -2786,8 +2805,6 @@ unsigned deseriaizer_base::read_id_list( unsigned block_type, bm::gap_word_t* dst_arr) { - typedef bit_in bit_in_type; - bm::gap_word_t len = 0; switch (block_type) @@ -2867,12 +2884,12 @@ unsigned deseriaizer_base::read_id_list( } template -void deseriaizer_base::read_bic_arr(decoder_type& dec, - bm::word_t* blk) +void +deseriaizer_base::read_bic_arr(decoder_type& dec, + bm::word_t* blk) BMNOEXCEPT { BM_ASSERT(!BM_IS_GAP(blk)); - typedef bit_in bit_in_type; bm::gap_word_t min_v = dec.get_16(); bm::gap_word_t max_v = dec.get_16(); unsigned arr_len = dec.get_16(); @@ -2890,7 +2907,9 @@ void deseriaizer_base::read_bic_arr(decoder_type& dec, } template -void deseriaizer_base::read_bic_arr_inv(decoder_type& decoder, bm::word_t* blk) +void +deseriaizer_base::read_bic_arr_inv(decoder_type& decoder, + bm::word_t* blk) BMNOEXCEPT { // TODO: optimization bm::bit_block_set(blk, 0); @@ -2899,18 +2918,16 @@ void deseriaizer_base::read_bic_arr_inv(decoder_type& decoder, } template -void deseriaizer_base::read_bic_gap(decoder_type& dec, bm::word_t* blk) +void deseriaizer_base::read_bic_gap(decoder_type& dec, + bm::word_t* blk) BMNOEXCEPT { BM_ASSERT(!BM_IS_GAP(blk)); - typedef bit_in bit_in_type; - bm::gap_word_t head = dec.get_8(); unsigned arr_len = dec.get_16(); bm::gap_word_t min_v = dec.get_16(); BM_ASSERT(arr_len <= bie_cut_off); - id_array_[0] = head; id_array_[1] = min_v; @@ -2920,15 +2937,14 @@ void deseriaizer_base::read_bic_gap(decoder_type& dec, bm::word_ bin.bic_decode_u16(&id_array_[2], arr_len-2, min_v, 65535); if (!IS_VALID_ADDR(blk)) - { return; - } bm::gap_add_to_bitset(blk, id_array_, arr_len); } template -void deseriaizer_base::read_digest0_block(decoder_type& dec, - bm::word_t* block) +void deseriaizer_base::read_digest0_block( + decoder_type& dec, + bm::word_t* block) BMNOEXCEPT { bm::id64_t d0 = dec.get_64(); while (d0) @@ -2966,8 +2982,9 @@ void deseriaizer_base::read_digest0_block(decoder_type& dec, } template -void deseriaizer_base::read_0runs_block(decoder_type& dec, - bm::word_t* blk) +void deseriaizer_base::read_0runs_block( + decoder_type& dec, + bm::word_t* blk) BMNOEXCEPT { //TODO: optimization if block exists and it is OR-ed read bm::bit_block_set(blk, 0); @@ -2995,13 +3012,13 @@ void deseriaizer_base::read_0runs_block(decoder_type& dec, template -void deseriaizer_base::read_gap_block(decoder_type& decoder, +void +deseriaizer_base::read_gap_block(decoder_type& decoder, unsigned block_type, bm::gap_word_t* dst_block, bm::gap_word_t& gap_head) { - typedef bit_in bit_in_type; - +// typedef bit_in bit_in_type; switch (block_type) { case set_block_gap: @@ -3028,7 +3045,7 @@ void deseriaizer_base::read_gap_block(decoder_type& decoder, for (gap_word_t k = 0; k < len; ++k) { gap_word_t bit_idx = decoder.get_16(); - gap_add_value(dst_block, bit_idx); + bm::gap_add_value(dst_block, bit_idx); } // for } break; @@ -3126,7 +3143,7 @@ typename deseriaizer_base::block_idx_type deseriaizer_base::try_skip( decoder_type& decoder, block_idx_type nb, - block_idx_type expect_nb) + block_idx_type expect_nb) BMNOEXCEPT { if (skip_offset_) // skip bookmark is available { @@ -3156,26 +3173,22 @@ deseriaizer_base::try_skip( nb_sync = decoder.get_32(); break; case set_nb_sync_mark48: + nb_sync = block_idx_type(decoder.get_48()); #ifndef BM64ADDR BM_ASSERT(0); - #ifndef BM_NO_STL - throw std::logic_error(this->err_msg()); - #else - BM_THROW(BM_ERR_SERIALFORMAT); - #endif + decoder.set_pos(save_pos); + skip_offset_ = 0; + return 0; // invalid bookmark from 64-bit serialization #endif - nb_sync = block_idx_type(decoder.get_48()); break; case set_nb_sync_mark64: + nb_sync = block_idx_type(decoder.get_64()); #ifndef BM64ADDR BM_ASSERT(0); - #ifndef BM_NO_STL - throw std::logic_error(this->err_msg()); - #else - BM_THROW(BM_ERR_SERIALFORMAT); - #endif + decoder.set_pos(save_pos); + skip_offset_ = 0; + return 0; // invalid bookmark from 64-bit serialization #endif - nb_sync = block_idx_type(decoder.get_64()); break; default: BM_ASSERT(0); @@ -3187,8 +3200,6 @@ deseriaizer_base::try_skip( nb_sync += nb; if (nb_sync <= expect_nb) // within reach { - //block_idx_ = nb_sync; - //state_ = e_blocks; skip_offset_ = 0; return nb_sync; } @@ -3593,6 +3604,7 @@ size_t deserializer::deserialize(bvector_type& bv, { // 64-bit vector cannot be deserialized into 32-bit BM_ASSERT(sizeof(block_idx_type)==8); + bv_size = (block_idx_type)dec.get_64(); #ifndef BM64ADDR #ifndef BM_NO_STL throw std::logic_error(this->err_msg()); @@ -3600,7 +3612,6 @@ size_t deserializer::deserialize(bvector_type& bv, BM_THROW(BM_ERR_SERIALFORMAT); #endif #endif - bv_size = (block_idx_type)dec.get_64(); } else bv_size = dec.get_32(); @@ -3715,12 +3726,12 @@ size_t deserializer::deserialize(bvector_type& bv, goto process_full_blocks; #else BM_ASSERT(0); // 32-bit vector cannot read 64-bit + dec.get_64(); #ifndef BM_NO_STL throw std::logic_error(this->err_msg()); #else BM_THROW(BM_ERR_SERIALFORMAT); #endif - dec.get_64(); #endif process_full_blocks: { @@ -3957,7 +3968,7 @@ size_t deserializer::deserialize(bvector_type& bv, template void deserializer::xor_decode(size_type x_ref_idx, bm::id64_t x_ref_d64, blocks_manager_type& bman, - block_idx_type nb) + block_idx_type nb) { BM_ASSERT(ref_vect_); @@ -4098,7 +4109,15 @@ serial_stream_iterator::serial_stream_iterator(const unsigned ch } state_ = e_blocks; } - block_idx_arr_ = (gap_word_t*) ::malloc(sizeof(gap_word_t) * bm::gap_max_bits); + block_idx_arr_=(gap_word_t*)::malloc(sizeof(gap_word_t) * bm::gap_max_bits); + if (!block_idx_arr_) + { + #ifndef BM_NO_STL + throw std::bad_alloc(); + #else + BM_THROW(BM_ERR_BADALLOC); + #endif + } this->id_array_ = block_idx_arr_; } @@ -4335,7 +4354,7 @@ void serial_stream_iterator::next() template typename serial_stream_iterator::block_idx_type -serial_stream_iterator::skip_mono_blocks() +serial_stream_iterator::skip_mono_blocks() BMNOEXCEPT { BM_ASSERT(state_ == e_zero_blocks || state_ == e_one_blocks); if (!mono_block_cnt_) @@ -4350,7 +4369,8 @@ serial_stream_iterator::skip_mono_blocks() } template -void serial_stream_iterator::get_inv_arr(bm::word_t* block) +void +serial_stream_iterator::get_inv_arr(bm::word_t* block) BMNOEXCEPT { gap_word_t len = decoder_.get_16(); if (block) @@ -4358,7 +4378,7 @@ void serial_stream_iterator::get_inv_arr(bm::word_t* block) bm::bit_block_set(block, ~0u); for (unsigned k = 0; k < len; ++k) { - gap_word_t bit_idx = decoder_.get_16(); + bm::gap_word_t bit_idx = decoder_.get_16(); bm::clear_bit(block, bit_idx); } } @@ -5519,8 +5539,8 @@ serial_stream_iterator::get_bit_block_COUNT_SUB_BA( template unsigned serial_stream_iterator::get_arr_bit( - bm::word_t* dst_block, - bool clear_target) + bm::word_t* dst_block, + bool clear_target) BMNOEXCEPT { BM_ASSERT(this->block_type_ == set_block_arrbit || this->block_type_ == set_block_bit_1bit); @@ -5547,17 +5567,16 @@ unsigned serial_stream_iterator::get_arr_bit( else { if (this->block_type_ == set_block_bit_1bit) - { - return 1; // nothing to do: len var already consumed 16bits - } - // fwd the decocing stream + return 1; // nothing to do: len var already consumed 16 bits + + // fwd the decode stream decoder_.seek(len * 2); } return len; } template -unsigned serial_stream_iterator::get_bit() +unsigned serial_stream_iterator::get_bit() BMNOEXCEPT { BM_ASSERT(this->block_type_ == set_block_bit_1bit); ++(this->block_idx_); diff --git a/c++/include/util/bitset/bmsparsevec.h b/c++/include/util/bitset/bmsparsevec.h index fed6f242..536ae7b5 100644 --- a/c++/include/util/bitset/bmsparsevec.h +++ b/c++/include/util/bitset/bmsparsevec.h @@ -37,7 +37,7 @@ For more information please visit: http://bitmagic.io #include "bmtrans.h" -#include "bmalgo.h" +#include "bmalgo_impl.h" #include "bmbuffer.h" #include "bmbmatrix.h" #include "bmdef.h" @@ -113,10 +113,10 @@ public: class reference { public: - reference(sparse_vector& sv, size_type idx) BMNOEXEPT + reference(sparse_vector& sv, size_type idx) BMNOEXCEPT : sv_(sv), idx_(idx) {} - operator value_type() const { return sv_.get(idx_); } + operator value_type() const BMNOEXCEPT { return sv_.get(idx_); } reference& operator=(const reference& ref) { sv_.set(idx_, (value_type)ref); @@ -127,9 +127,9 @@ public: sv_.set(idx_, val); return *this; } - bool operator==(const reference& ref) const + bool operator==(const reference& ref) const BMNOEXCEPT { return bool(*this) == bool(ref); } - bool is_null() const { return sv_.is_null(idx_); } + bool is_null() const BMNOEXCEPT { return sv_.is_null(idx_); } private: sparse_vector& sv_; size_type idx_; @@ -169,30 +169,30 @@ public: typedef value_type& reference; public: - const_iterator(); - const_iterator(const sparse_vector_type* sv); - const_iterator(const sparse_vector_type* sv, size_type pos); - const_iterator(const const_iterator& it); + const_iterator() BMNOEXCEPT; + const_iterator(const sparse_vector_type* sv) BMNOEXCEPT; + const_iterator(const sparse_vector_type* sv, size_type pos) BMNOEXCEPT; + const_iterator(const const_iterator& it) BMNOEXCEPT; - bool operator==(const const_iterator& it) const + bool operator==(const const_iterator& it) const BMNOEXCEPT { return (pos_ == it.pos_) && (sv_ == it.sv_); } - bool operator!=(const const_iterator& it) const + bool operator!=(const const_iterator& it) const BMNOEXCEPT { return ! operator==(it); } - bool operator < (const const_iterator& it) const + bool operator < (const const_iterator& it) const BMNOEXCEPT { return pos_ < it.pos_; } - bool operator <= (const const_iterator& it) const + bool operator <= (const const_iterator& it) const BMNOEXCEPT { return pos_ <= it.pos_; } - bool operator > (const const_iterator& it) const + bool operator > (const const_iterator& it) const BMNOEXCEPT { return pos_ > it.pos_; } - bool operator >= (const const_iterator& it) const + bool operator >= (const const_iterator& it) const BMNOEXCEPT { return pos_ >= it.pos_; } /// \brief Get current position (value) - value_type operator*() const { return this->value(); } + value_type operator*() const { return this->value(); } /// \brief Advance to the next available value - const_iterator& operator++() { this->advance(); return *this; } + const_iterator& operator++() BMNOEXCEPT { this->advance(); return *this; } /// \brief Advance to the next available value const_iterator& operator++(int) @@ -203,24 +203,25 @@ public: value_type value() const; /// \brief Get NULL status - bool is_null() const; + bool is_null() const BMNOEXCEPT; /// Returns true if iterator is at a valid position - bool valid() const { return pos_ != bm::id_max; } + bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; } /// Invalidate current iterator - void invalidate() { pos_ = bm::id_max; } + void invalidate() BMNOEXCEPT { pos_ = bm::id_max; } /// Current position (index) in the vector - size_type pos() const { return pos_; } + size_type pos() const BMNOEXCEPT{ return pos_; } /// re-position to a specified position - void go_to(size_type pos); + void go_to(size_type pos) BMNOEXCEPT; /// advance iterator forward by one - void advance(); + /// @return true if it is still valid + bool advance() BMNOEXCEPT; - void skip_zero_values(); + void skip_zero_values() BMNOEXCEPT; private: enum buf_size_e { @@ -232,7 +233,6 @@ public: size_type pos_; ///!< Position mutable buffer_type buffer_; ///!< value buffer mutable value_type* buf_ptr_; ///!< position in the buffer - mutable allocator_pool_type pool_; }; /** @@ -313,7 +313,7 @@ public: Get access to not-null vector @internal */ - bvector_type* get_null_bvect() const { return bv_null_; } + bvector_type* get_null_bvect() const BMNOEXCEPT { return bv_null_; } /** add value to the buffer without changing the NULL vector @param v - value to push back @@ -323,9 +323,9 @@ public: size_type add_value_no_null(value_type v); /** - Reconf back inserter not to touch the NULL vector + Reconfшпгку back inserter not to touch the NULL vector */ - void disable_set_null() { set_not_null_ = false; } + void disable_set_null() BMNOEXCEPT { set_not_null_ = false; } // --------------------------------------------------------------- protected: @@ -387,11 +387,11 @@ public: #ifndef BM_NO_CXX11 /*! move-ctor */ - sparse_vector(sparse_vector&& sv) BMNOEXEPT; + sparse_vector(sparse_vector&& sv) BMNOEXCEPT; /*! move assignmment operator */ - sparse_vector& operator = (sparse_vector&& sv) BMNOEXEPT + sparse_vector& operator = (sparse_vector&& sv) BMNOEXCEPT { if (this != &sv) { @@ -402,7 +402,7 @@ public: } #endif - ~sparse_vector() BMNOEXEPT; + ~sparse_vector() BMNOEXCEPT; ///@} @@ -411,14 +411,16 @@ public: ///@{ /** \brief Operator to get write access to an element */ - reference operator[](size_type idx) { return reference(*this, idx); } + reference operator[](size_type idx) BMNOEXCEPT + { return reference(*this, idx); } /*! \brief get specified element without bounds checking \param idx - element index \return value of the element */ - value_type operator[](size_type idx) const { return this->get(idx); } + value_type operator[](size_type idx) const BMNOEXCEPT + { return this->get(idx); } /*! \brief access specified element with bounds checking @@ -431,7 +433,7 @@ public: \param idx - element index \return value of the element */ - value_type get(size_type idx) const; + value_type get(size_type idx) const BMNOEXCEPT; /*! \brief set specified element with bounds checking and automatic resize @@ -485,21 +487,24 @@ public: //@{ /** Provide const iterator access to container content */ - const_iterator begin() const; + const_iterator begin() const BMNOEXCEPT; /** Provide const iterator access to the end */ - const_iterator end() const { return const_iterator(this, bm::id_max); } + const_iterator end() const BMNOEXCEPT + { return const_iterator(this, bm::id_max); } /** Get const_itertor re-positioned to specific element @param idx - position in the sparse vector */ - const_iterator get_const_iterator(size_type idx) const { return const_iterator(this, idx); } + const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT + { return const_iterator(this, idx); } /** Provide back insert iterator Back insert iterator implements buffered insertion, which is faster, than random access or push_back */ - back_insert_iterator get_back_inserter() { return back_insert_iterator(this); } + back_insert_iterator get_back_inserter() + { return back_insert_iterator(this); } ///@} @@ -515,7 +520,7 @@ public: /** \brief trait if sparse vector is "compressed" (false) */ static - bool is_compressed() { return false; } + bool is_compressed() BMNOEXCEPT { return false; } ///@} @@ -608,14 +613,14 @@ public: /*! \brief content exchange */ - void swap(sparse_vector& sv) BMNOEXEPT; + void swap(sparse_vector& sv) BMNOEXCEPT; // ------------------------------------------------------------ /*! @name Clear */ ///@{ /*! \brief resize to zero, free memory */ - void clear() BMNOEXEPT; + void clear() BMNOEXCEPT; /*! \brief clear range (assign bit 0 for all plains) @@ -636,12 +641,12 @@ public: /*! \brief return size of the vector \return size of sparse vector */ - size_type size() const { return this->size_; } + size_type size() const BMNOEXCEPT { return this->size_; } /*! \brief return true if vector is empty \return true if empty */ - bool empty() const { return (size() == 0); } + bool empty() const BMNOEXCEPT { return (size() == 0); } /*! \brief resize vector \param sz - new size @@ -663,7 +668,7 @@ public: \return true, if it is the same */ bool equal(const sparse_vector& sv, - bm::null_support null_able = bm::use_null) const; + bm::null_support null_able = bm::use_null) const BMNOEXCEPT; ///@} @@ -679,7 +684,7 @@ public: \return 0 - equal, < 0 - vect[i] < str, >0 otherwise */ - int compare(size_type idx, const value_type val) const; + int compare(size_type idx, const value_type val) const BMNOEXCEPT; ///@} @@ -694,8 +699,9 @@ public: \param stat - memory allocation statistics after optimization */ void optimize(bm::word_t* temp_block = 0, - typename bvector_type::optmode opt_mode = bvector_type::opt_compress, - typename sparse_vector::statistics* stat = 0); + typename bvector_type::optmode opt_mode = bvector_type::opt_compress, + typename sparse_vector::statistics* stat = 0); + /*! \brief Optimize sizes of GAP blocks @@ -715,7 +721,8 @@ public: @sa statistics */ - void calc_stat(struct sparse_vector::statistics* st) const; + void calc_stat( + struct sparse_vector::statistics* st) const BMNOEXCEPT; ///@} // ------------------------------------------------------------ @@ -789,7 +796,6 @@ public: \param offset - target index in the sparse vector to export from \param zero_mem - set to false if target array is pre-initialized with 0s to avoid performance penalty - \param pool_ptr - optional pointer to block allocation pool \return number of exported elements \sa decode @@ -799,8 +805,7 @@ public: size_type extract(value_type* arr, size_type size, size_type offset = 0, - bool zero_mem = true, - allocator_pool_type* pool_ptr = 0) const; + bool zero_mem = true) const BMNOEXCEPT2; /** \brief extract small window without use of masking vector \sa decode @@ -824,7 +829,7 @@ public: \internal */ static - size_type translate_address(size_type i) { return i; } + size_type translate_address(size_type i) BMNOEXCEPT { return i; } /** \brief throw range error @@ -845,24 +850,24 @@ public: \brief find position of compressed element by its rank */ static - bool find_rank(size_type rank, size_type& pos); + bool find_rank(size_type rank, size_type& pos) BMNOEXCEPT; /** \brief size of sparse vector (may be different for RSC) */ - size_type effective_size() const { return size(); } + size_type effective_size() const BMNOEXCEPT { return size(); } /** \brief Always 1 (non-matrix type) */ - size_type effective_vector_max() const { return 1; } + size_type effective_vector_max() const BMNOEXCEPT { return 1; } ///@} /// Set allocator pool for local (non-threaded) /// memory cyclic(lots of alloc-free ops) opertations /// - void set_allocator_pool(allocator_pool_type* pool_ptr); + void set_allocator_pool(allocator_pool_type* pool_ptr) BMNOEXCEPT; protected: enum octet_plains @@ -886,20 +891,26 @@ protected: void insert_value_no_null(size_type idx, value_type v); void resize_internal(size_type sz) { resize(sz); } - size_type size_internal() const { return size(); } + size_type size_internal() const BMNOEXCEPT { return size(); } - bool is_remap() const { return false; } - size_t remap_size() const { return 0; } - const unsigned char* get_remap_buffer() const { return 0; } - unsigned char* init_remap_buffer() { return 0; } - void set_remap() { } + bool is_remap() const BMNOEXCEPT { return false; } + size_t remap_size() const BMNOEXCEPT { return 0; } + const unsigned char* get_remap_buffer() const BMNOEXCEPT { return 0; } + unsigned char* init_remap_buffer() BMNOEXCEPT { return 0; } + void set_remap() BMNOEXCEPT { } bool resolve_range(size_type from, size_type to, - size_type* idx_from, size_type* idx_to) const + size_type* idx_from, size_type* idx_to) const BMNOEXCEPT { *idx_from = from; *idx_to = to; return true; } + /// Increment element by 1 without chnaging NULL vector or size + void inc_no_null(size_type idx); + + /// increment by v without chnaging NULL vector or size + void inc_no_null(size_type idx, value_type v); + protected: template friend class rsc_sparse_vector; template friend class sparse_vector_scanner; @@ -933,7 +944,7 @@ sparse_vector::sparse_vector(const sparse_vector& sv) #ifndef BM_NO_CXX11 template -sparse_vector::sparse_vector(sparse_vector&& sv) BMNOEXEPT +sparse_vector::sparse_vector(sparse_vector&& sv) BMNOEXCEPT { parent_type::swap(sv); } @@ -944,13 +955,13 @@ sparse_vector::sparse_vector(sparse_vector&& sv) BMNOEXEPT //--------------------------------------------------------------------- template -sparse_vector::~sparse_vector() BMNOEXEPT +sparse_vector::~sparse_vector() BMNOEXCEPT {} //--------------------------------------------------------------------- template -void sparse_vector::swap(sparse_vector& sv) BMNOEXEPT +void sparse_vector::swap(sparse_vector& sv) BMNOEXCEPT { parent_type::swap(sv); } @@ -1080,19 +1091,7 @@ sparse_vector::decode(value_type* arr, size_type dec_size, bool zero_mem) const { - if (dec_size < 32) - { - return extract_range(arr, dec_size, idx_from, zero_mem); - } - return extract_plains(arr, dec_size, idx_from, zero_mem); - // TODO: write proper extract() based on for_each_range() and a visitor - /* - if (dec_size < 1024) - { - return extract_plains(arr, dec_size, idx_from, zero_mem); - } return extract(arr, dec_size, idx_from, zero_mem); - */ } //--------------------------------------------------------------------- @@ -1373,89 +1372,65 @@ sparse_vector::extract_plains(value_type* arr, template typename sparse_vector::size_type -sparse_vector::extract(value_type* arr, +sparse_vector::extract(value_type* BMRESTRICT arr, size_type size, size_type offset, - bool zero_mem, - allocator_pool_type* pool_ptr) const + bool zero_mem) const BMNOEXCEPT2 { /// Decoder functor /// @internal /// struct sv_decode_visitor_func { - sv_decode_visitor_func(value_type* varr, + sv_decode_visitor_func(value_type* BMRESTRICT varr, value_type mask, - size_type off) - : arr_(varr), mask_(mask), off_(off) + size_type off) BMNOEXCEPT2 + : arr_(varr), mask_(mask), sv_off_(off) {} - - void add_bits(size_type arr_offset, const unsigned char* bits, unsigned bits_size) + + void add_bits(size_type bv_offset, + const unsigned char* bits, unsigned bits_size) BMNOEXCEPT { - size_type idx_base = arr_offset - off_; - const value_type m = mask_; - unsigned i = 0; - for (; i < bits_size; ++i) - arr_[idx_base + bits[i]] |= m; + // can be negative (-1) when bv base offset = 0 and sv = 1,2.. + size_type base = bv_offset - sv_off_; + value_type m = mask_; + for (unsigned i = 0; i < bits_size; ++i) + arr_[bits[i] + base] |= m; } - - void add_range(size_type arr_offset, unsigned sz) + void add_range(size_type bv_offset, size_type sz) BMNOEXCEPT { - size_type idx_base = arr_offset - off_; - const value_type m = mask_; - for (unsigned i = 0; i < sz; ++i) - arr_[i + idx_base] |= m; + auto base = bv_offset - sv_off_; + value_type m = mask_; + for (size_type i = 0; i < sz; ++i) + arr_[i + base] |= m; } - value_type* arr_; - value_type mask_; - size_type off_; - }; + value_type* BMRESTRICT arr_; ///< target array for reverse transpose + value_type mask_; ///< bit-plane mask + size_type sv_off_; ///< SV read offset + }; - if (size == 0) + if (!size) return 0; if (zero_mem) ::memset(arr, 0, sizeof(value_type)*size); - size_type start = offset; - size_type end = start + size; + size_type end = offset + size; if (end > this->size_) - { end = this->size_; - } - - bool masked_scan = !(offset == 0 && size == this->size()); - if (masked_scan) // use temp vector to decompress the area - { - bvector_type bv_mask; - bv_mask.set_allocator_pool(pool_ptr); - - for (size_type i = 0; i < parent_type::value_bits(); ++i) - { - const bvector_type* bv = this->bmatr_.get_row(i); - if (bv) - { - bv_mask.copy_range(*bv, offset, end - 1); - sv_decode_visitor_func func(arr, (value_type(1) << i), offset); - bm::for_each_bit(bv_mask, func); - } - } // for i - } - else - { - for (size_type i = 0; i < parent_type::value_bits(); ++i) - { - const bvector_type* bv = this->bmatr_.get_row(i); - if (bv) - { - sv_decode_visitor_func func(arr, (value_type(1) << i), 0); - bm::for_each_bit(*bv, func); - } - } // for i - } - return end - start; + sv_decode_visitor_func func(arr, 0, offset); + + for (size_type i = 0; i < parent_type::value_bits(); ++i) + { + const bvector_type* bv = this->bmatr_.get_row(i); + if (!bv) + continue; + func.mask_ = (value_type(1) << i); // set target plane OR mask + bm::for_each_bit_range_no_check(*bv, offset, end-1, func); + } // for i + return end - offset; } //--------------------------------------------------------------------- @@ -1473,7 +1448,8 @@ sparse_vector::at(typename sparse_vector::size_type idx) const template typename sparse_vector::value_type -sparse_vector::get(typename sparse_vector::size_type i) const +sparse_vector::get( + typename sparse_vector::size_type i) const BMNOEXCEPT { BM_ASSERT(i < bm::id_max); BM_ASSERT(i < size()); @@ -1485,7 +1461,7 @@ sparse_vector::get(typename sparse_vector::size_type i) const bool b = this->bmatr_.test_4rows(j); if (b) { - value_type vm = this->bmatr_.get_half_octet(i, j); + value_type vm = (value_type)this->bmatr_.get_half_octet(i, j); v |= vm << j; } } // for j @@ -1692,7 +1668,17 @@ void sparse_vector::inc(size_type idx) { if (idx >= this->size_) this->size_ = idx+1; + inc_no_null(idx); + bvector_type* bv_null = this->get_null_bvect(); + if (bv_null) + bv_null->set_bit_no_check(idx); +} + +//--------------------------------------------------------------------- +template +void sparse_vector::inc_no_null(size_type idx) +{ for (unsigned i = 0; i < parent_type::sv_value_plains; ++i) { bvector_type* bv = this->get_plain(i); @@ -1700,15 +1686,21 @@ void sparse_vector::inc(size_type idx) if (!carry_over) break; } - bvector_type* bv_null = this->get_null_bvect(); - if (bv_null) - bv_null->set_bit_no_check(idx); } //--------------------------------------------------------------------- template -void sparse_vector::clear() BMNOEXEPT +void sparse_vector::inc_no_null(size_type idx, value_type v) +{ + value_type v_prev = get(idx); + set_value_no_null(idx, v + v_prev); +} + +//--------------------------------------------------------------------- + +template +void sparse_vector::clear() BMNOEXCEPT { parent_type::clear(); } @@ -1716,7 +1708,7 @@ void sparse_vector::clear() BMNOEXEPT //--------------------------------------------------------------------- template -bool sparse_vector::find_rank(size_type rank, size_type& pos) +bool sparse_vector::find_rank(size_type rank, size_type& pos) BMNOEXCEPT { BM_ASSERT(rank); pos = rank - 1; @@ -1740,7 +1732,7 @@ sparse_vector::clear_range( template void sparse_vector::calc_stat( - struct sparse_vector::statistics* st) const + struct sparse_vector::statistics* st) const BMNOEXCEPT { BM_ASSERT(st); typename bvector_type::statistics stbv; @@ -1906,7 +1898,8 @@ void sparse_vector::filter( //--------------------------------------------------------------------- template -int sparse_vector::compare(size_type idx, const value_type val) const +int sparse_vector::compare(size_type idx, + const value_type val) const BMNOEXCEPT { // TODO: consider bit-by-bit comparison to minimize CPU hit miss in plans get() value_type sv_value = get(idx); @@ -1917,7 +1910,7 @@ int sparse_vector::compare(size_type idx, const value_type val) const template bool sparse_vector::equal(const sparse_vector& sv, - bm::null_support null_able) const + bm::null_support null_able) const BMNOEXCEPT { return parent_type::equal(sv, null_able); } @@ -1926,7 +1919,7 @@ bool sparse_vector::equal(const sparse_vector& sv, template typename sparse_vector::const_iterator -sparse_vector::begin() const +sparse_vector::begin() const BMNOEXCEPT { typedef typename sparse_vector::const_iterator it_type; return it_type(this); @@ -1936,7 +1929,7 @@ sparse_vector::begin() const template void sparse_vector::set_allocator_pool( - typename sparse_vector::allocator_pool_type* pool_ptr) + typename sparse_vector::allocator_pool_type* pool_ptr) BMNOEXCEPT { this->bmatr_.set_allocator_pool(pool_ptr); } @@ -1948,7 +1941,7 @@ void sparse_vector::set_allocator_pool( template -sparse_vector::const_iterator::const_iterator() +sparse_vector::const_iterator::const_iterator() BMNOEXCEPT : sv_(0), pos_(bm::id_max), buf_ptr_(0) {} @@ -1956,7 +1949,7 @@ sparse_vector::const_iterator::const_iterator() template sparse_vector::const_iterator::const_iterator( - const typename sparse_vector::const_iterator& it) + const typename sparse_vector::const_iterator& it) BMNOEXCEPT : sv_(it.sv_), pos_(it.pos_), buf_ptr_(0) {} @@ -1964,7 +1957,8 @@ sparse_vector::const_iterator::const_iterator( template sparse_vector::const_iterator::const_iterator( - const typename sparse_vector::const_iterator::sparse_vector_type* sv) + const typename sparse_vector::const_iterator::sparse_vector_type* sv + ) BMNOEXCEPT : sv_(sv), buf_ptr_(0) { BM_ASSERT(sv_); @@ -1976,7 +1970,7 @@ sparse_vector::const_iterator::const_iterator( template sparse_vector::const_iterator::const_iterator( const typename sparse_vector::const_iterator::sparse_vector_type* sv, - typename sparse_vector::size_type pos) + typename sparse_vector::size_type pos) BMNOEXCEPT : sv_(sv), buf_ptr_(0) { BM_ASSERT(sv_); @@ -1986,7 +1980,7 @@ sparse_vector::const_iterator::const_iterator( //--------------------------------------------------------------------- template -void sparse_vector::const_iterator::go_to(size_type pos) +void sparse_vector::const_iterator::go_to(size_type pos) BMNOEXCEPT { pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos; buf_ptr_ = 0; @@ -1995,22 +1989,23 @@ void sparse_vector::const_iterator::go_to(size_type pos) //--------------------------------------------------------------------- template -void sparse_vector::const_iterator::advance() +bool sparse_vector::const_iterator::advance() BMNOEXCEPT { if (pos_ == bm::id_max) // nothing to do, we are at the end - return; + return false; ++pos_; if (pos_ >= sv_->size()) + { this->invalidate(); - else + return false; + } + if (buf_ptr_) { - if (buf_ptr_) - { - ++buf_ptr_; - if (buf_ptr_ - ((value_type*)buffer_.data()) >= n_buf_size) - buf_ptr_ = 0; - } + ++buf_ptr_; + if (buf_ptr_ - ((value_type*)buffer_.data()) >= n_buf_size) + buf_ptr_ = 0; } + return true; } //--------------------------------------------------------------------- @@ -2026,7 +2021,7 @@ sparse_vector::const_iterator::value() const { buffer_.reserve(n_buf_size * sizeof(value_type)); buf_ptr_ = (value_type*)(buffer_.data()); - sv_->extract(buf_ptr_, n_buf_size, pos_, true, &pool_); + sv_->extract(buf_ptr_, n_buf_size, pos_, true); } v = *buf_ptr_; return v; @@ -2035,7 +2030,7 @@ sparse_vector::const_iterator::value() const //--------------------------------------------------------------------- template -void sparse_vector::const_iterator::skip_zero_values() +void sparse_vector::const_iterator::skip_zero_values() BMNOEXCEPT { value_type v = value(); if (buf_ptr_) @@ -2063,7 +2058,7 @@ void sparse_vector::const_iterator::skip_zero_values() //--------------------------------------------------------------------- template -bool sparse_vector::const_iterator::is_null() const +bool sparse_vector::const_iterator::is_null() const BMNOEXCEPT { return sv_->is_null(pos_); } diff --git a/c++/include/util/bitset/bmsparsevec_algo.h b/c++/include/util/bitset/bmsparsevec_algo.h index 70269d15..2be5afec 100644 --- a/c++/include/util/bitset/bmsparsevec_algo.h +++ b/c++/include/util/bitset/bmsparsevec_algo.h @@ -18,7 +18,7 @@ limitations under the License. For more information please visit: http://bitmagic.io */ /*! \file bmsparsevec_algo.h - \brief Algorithms for sparse_vector<> + \brief Algorithms for bm::sparse_vector */ #ifndef BM__H__INCLUDED__ @@ -31,6 +31,7 @@ For more information please visit: http://bitmagic.io #include "bmsparsevec.h" #include "bmaggregator.h" #include "bmbuffer.h" +#include "bmalgo.h" #include "bmdef.h" #ifdef _MSC_VER @@ -231,7 +232,7 @@ bool sparse_vector_find_first_mismatch(const SV& sv1, } // null_proc } - for (unsigned i = 0; mismatch & (i < plains1); ++i) + for (unsigned i = 0; mismatch && (i < plains1); ++i) { typename SV::bvector_type_const_ptr bv1 = sv1.get_plain(i); typename SV::bvector_type_const_ptr bv2 = sv2.get_plain(i); @@ -503,7 +504,7 @@ public: /** \brief reset sparse vector binding */ - void reset_binding(); + void reset_binding() BMNOEXCEPT; /** \brief find all sparse vector elements EQ to search value @@ -709,7 +710,7 @@ protected: int compare_str(const SV& sv, size_type idx, const value_type* str); /// compare sv[idx] with input value - int compare(const SV& sv, size_type idx, const value_type val); + int compare(const SV& sv, size_type idx, const value_type val) BMNOEXCEPT; protected: sparse_vector_scanner(const sparse_vector_scanner&) = delete; @@ -1047,7 +1048,7 @@ void set2set_11_transform::remap(const bvector_type& bv_in, { sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM); bv_out.set(&gb_->buffer_[0], buf_cnt, BM_SORTED); - buf_cnt ^= buf_cnt; + buf_cnt = 0; } nb_old = nb; gb_->gather_idx_[buf_cnt++] = idx; @@ -1061,7 +1062,7 @@ void set2set_11_transform::remap(const bvector_type& bv_in, { sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM); bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED); - buf_cnt ^= buf_cnt; + buf_cnt = 0; } } // for en if (buf_cnt) @@ -1157,7 +1158,7 @@ void sparse_vector_scanner::bind(const SV& sv, bool sorted) //---------------------------------------------------------------------------- template -void sparse_vector_scanner::reset_binding() +void sparse_vector_scanner::reset_binding() BMNOEXCEPT { bound_sv_ = 0; effective_str_max_ = 0; @@ -2020,7 +2021,7 @@ int sparse_vector_scanner::compare_str(const SV& sv, template int sparse_vector_scanner::compare(const SV& sv, size_type idx, - const value_type val) + const value_type val) BMNOEXCEPT { // TODO: implement sentinel elements cache (similar to compare_str()) return sv.compare(idx, val); diff --git a/c++/include/util/bitset/bmsparsevec_compr.h b/c++/include/util/bitset/bmsparsevec_compr.h index e0930820..ecc791cc 100644 --- a/c++/include/util/bitset/bmsparsevec_compr.h +++ b/c++/include/util/bitset/bmsparsevec_compr.h @@ -98,18 +98,120 @@ public: class reference { public: - reference(rsc_sparse_vector& csv, size_type idx) BMNOEXEPT + reference(rsc_sparse_vector& csv, size_type idx) BMNOEXCEPT : csv_(csv), idx_(idx) {} - operator value_type() const { return csv_.get(idx_); } - bool operator==(const reference& ref) const + operator value_type() const BMNOEXCEPT { return csv_.get(idx_); } + bool operator==(const reference& ref) const BMNOEXCEPT { return bool(*this) == bool(ref); } - bool is_null() const { return csv_.is_null(idx_); } + bool is_null() const BMNOEXCEPT { return csv_.is_null(idx_); } private: rsc_sparse_vector& csv_; size_type idx_; }; + /** + Const iterator to traverse the rsc sparse vector. + + Implementation uses buffer for decoding so, competing changes + to the original vector may not match the iterator returned values. + + This iterator keeps an operational buffer, memory footprint is not + negligable + + @ingroup sv + */ + class const_iterator + { + public: + friend class rsc_sparse_vector; + +#ifndef BM_NO_STL + typedef std::input_iterator_tag iterator_category; +#endif + typedef rsc_sparse_vector rsc_sparse_vector_type; + typedef rsc_sparse_vector_type* rsc_sparse_vector_type_ptr; + typedef typename rsc_sparse_vector_type::value_type value_type; + typedef typename rsc_sparse_vector_type::size_type size_type; + typedef typename rsc_sparse_vector_type::bvector_type bvector_type; + typedef typename bvector_type::allocator_type allocator_type; + typedef typename + bvector_type::allocator_type::allocator_pool_type allocator_pool_type; + typedef bm::byte_buffer buffer_type; + + typedef unsigned difference_type; + typedef unsigned* pointer; + typedef value_type& reference; + + public: + const_iterator() BMNOEXCEPT; + const_iterator(const rsc_sparse_vector_type* csv) BMNOEXCEPT; + const_iterator(const rsc_sparse_vector_type* csv, size_type pos) BMNOEXCEPT; + const_iterator(const const_iterator& it) BMNOEXCEPT; + + bool operator==(const const_iterator& it) const BMNOEXCEPT + { return (pos_ == it.pos_) && (csv_ == it.csv_); } + bool operator!=(const const_iterator& it) const BMNOEXCEPT + { return ! operator==(it); } + bool operator < (const const_iterator& it) const BMNOEXCEPT + { return pos_ < it.pos_; } + bool operator <= (const const_iterator& it) const BMNOEXCEPT + { return pos_ <= it.pos_; } + bool operator > (const const_iterator& it) const BMNOEXCEPT + { return pos_ > it.pos_; } + bool operator >= (const const_iterator& it) const BMNOEXCEPT + { return pos_ >= it.pos_; } + + /// \brief Get current position (value) + value_type operator*() const { return this->value(); } + + + /// \brief Advance to the next available value + const_iterator& operator++() BMNOEXCEPT { this->advance(); return *this; } + + /// \brief Advance to the next available value + const_iterator& operator++(int) + { const_iterator tmp(*this);this->advance(); return tmp; } + + + /// \brief Get current position (value) + value_type value() const; + + /// \brief Get NULL status + bool is_null() const BMNOEXCEPT; + + /// Returns true if iterator is at a valid position + bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; } + + /// Invalidate current iterator + void invalidate() BMNOEXCEPT { pos_ = bm::id_max; } + + /// Current position (index) in the vector + size_type pos() const BMNOEXCEPT{ return pos_; } + + /// re-position to a specified position + void go_to(size_type pos) BMNOEXCEPT; + + /// advance iterator forward by one + /// @return true if it is still valid + bool advance() BMNOEXCEPT; + + void skip_zero_values() BMNOEXCEPT; + private: + enum buf_size_e + { + n_buf_size = 1024 * 8 + }; + + private: + const rsc_sparse_vector_type* csv_; ///!< ptr to parent + size_type pos_; ///!< Position + mutable buffer_type vbuffer_; ///!< value buffer + mutable buffer_type tbuffer_; ///!< temp buffer + mutable value_type* buf_ptr_; ///!< position in the buffer + }; + + /** Back insert iterator implements buffered insert, faster than generic @@ -141,8 +243,8 @@ public: typedef void reference; public: - back_insert_iterator(); - back_insert_iterator(rsc_sparse_vector_type* csv); + back_insert_iterator() BMNOEXCEPT; + back_insert_iterator(rsc_sparse_vector_type* csv) BMNOEXCEPT; back_insert_iterator& operator=(const back_insert_iterator& bi) { @@ -154,7 +256,8 @@ public: ~back_insert_iterator(); /** push value to the vector */ - back_insert_iterator& operator=(value_type v) { this->add(v); return *this; } + back_insert_iterator& operator=(value_type v) + { this->add(v); return *this; } /** noop */ back_insert_iterator& operator*() { return *this; } /** noop */ @@ -166,10 +269,10 @@ public: void add(value_type v); /** add NULL (no-value) to the container */ - void add_null(); + void add_null() BMNOEXCEPT; /** add a series of consequitve NULLs (no-value) to the container */ - void add_null(size_type count); + void add_null(size_type count) BMNOEXCEPT; /** flush the accumulated buffer */ void flush(); @@ -183,7 +286,8 @@ public: ///size_type add_value(value_type v); typedef rsc_sparse_vector_type::sparse_vector_type sparse_vector_type; - typedef typename sparse_vector_type::back_insert_iterator sparse_vector_bi; + typedef + typename sparse_vector_type::back_insert_iterator sparse_vector_bi; private: rsc_sparse_vector_type* csv_; ///!< pointer on the parent vector sparse_vector_bi sv_bi_; @@ -192,12 +296,25 @@ public: public: // ------------------------------------------------------------ /*! @name Construction and assignment */ + //@{ rsc_sparse_vector(bm::null_support null_able = bm::use_null, allocation_policy_type ap = allocation_policy_type(), size_type bv_max_size = bm::id_max, const allocator_type& alloc = allocator_type()); + + /** + Contructor to pre-initialize the list of assigned (not NULL) elements. + + If the list of not NULL elements is known upfront it can help to + pre-declare it, enable rank-select index and then use set function. + This scenario gives significant speed boost, comparing random assignment + + @param bv_null - not NULL vector for the container + */ + rsc_sparse_vector(const bvector_type& bv_null); + ~rsc_sparse_vector(); /*! copy-ctor */ @@ -205,7 +322,7 @@ public: /*! copy assignmment operator */ - rsc_sparse_vector& operator = (const rsc_sparse_vector& csv) + rsc_sparse_vector& operator=(const rsc_sparse_vector& csv) { if (this != &csv) { @@ -219,13 +336,13 @@ public: } return *this; } - + #ifndef BM_NO_CXX11 /*! move-ctor */ - rsc_sparse_vector(rsc_sparse_vector&& csv) BMNOEXEPT; + rsc_sparse_vector(rsc_sparse_vector&& csv) BMNOEXCEPT; /*! move assignmment operator */ - rsc_sparse_vector& operator=(rsc_sparse_vector&& csv) BMNOEXEPT + rsc_sparse_vector& operator=(rsc_sparse_vector&& csv) BMNOEXCEPT { if (this != &csv) { @@ -249,7 +366,7 @@ public: /*! \brief return size of the vector \return size of sparse vector */ - size_type size() const; + size_type size() const BMNOEXCEPT; /*! \brief return true if vector is empty \return true if empty @@ -281,7 +398,7 @@ public: \param idx - element index \return value of the element */ - value_type get(size_type idx) const; + value_type get(size_type idx) const BMNOEXCEPT; /*! \brief set specified element with bounds checking and automatic resize @@ -301,6 +418,29 @@ public: */ void set(size_type idx, value_type v); + + /*! + \brief increment specified element by one + \param idx - element index + */ + void inc(size_type idx); + + /*! + \brief increment specified element by one + \param idx - element index + \param v - increment value + */ + void inc(size_type idx, value_type v); + + /*! + \brief increment specified element by one, element MUST be NOT NULL + Faster than just inc() if element is NULL - behavior is undefined + \param idx - element index + \param v - increment value + @sa inc + */ + void inc_not_null(size_type idx, value_type v); + /*! \brief set specified element to NULL RSC vector actually erases element when it is set to NULL (expensive). @@ -309,37 +449,67 @@ public: void set_null(size_type idx); - /** \brief test if specified element is NULL \param idx - element index \return true if it is NULL false if it was assigned or container is not configured to support assignment flags */ - bool is_null(size_type idx) const; + bool is_null(size_type idx) const BMNOEXCEPT; /** \brief Get bit-vector of assigned values (or NULL) */ - const bvector_type* get_null_bvector() const; + const bvector_type* get_null_bvector() const BMNOEXCEPT; /** \brief find position of compressed element by its rank \param rank - rank (virtual index in sparse vector) \param idx - index (true position) */ - bool find_rank(size_type rank, size_type& idx) const; + bool find_rank(size_type rank, size_type& idx) const BMNOEXCEPT; //@} // ------------------------------------------------------------ /*! @name Export content to C-stype array */ ///@{ - + + /** + \brief C-style decode + \param arr - decode target array (must be properly sized) + \param idx_from - start address to decode + \param size - number of elements to decode + \param zero_mem - flag if array needs to beset to zeros first + + @return actual decoded size + @sa decode_buf + */ size_type decode(value_type* arr, size_type idx_from, size_type size, bool zero_mem = true) const; + + /** + \brief C-style decode (variant with external memory) + Analog of decode, but requires two arrays. + Faster than decode in many cases. + + \param arr - decode target array (must be properly sized) + \param arr_buf_tmp - decode temp bufer (must be same size of arr) + \param idx_from - start address to decode + \param size - number of elements to decode + \param zero_mem - flag if array needs to beset to zeros first + + @return actual decoded size + @sa decode + */ + size_type decode_buf(value_type* arr, + value_type* arr_buf_tmp, + size_type idx_from, + size_type size, + bool zero_mem = true) const BMNOEXCEPT; + ///@} @@ -367,7 +537,7 @@ public: \brief check if another vector has the same content \return true, if it is the same */ - bool equal(const rsc_sparse_vector& csv) const; + bool equal(const rsc_sparse_vector& csv) const BMNOEXCEPT; //@} @@ -395,6 +565,20 @@ public: /*! @name Iterator access */ //@{ + /** Provide const iterator access to container content */ + const_iterator begin() const BMNOEXCEPT + { return const_iterator(this); } + + /** Provide const iterator access to the end */ + const_iterator end() const BMNOEXCEPT + { return const_iterator(this, bm::id_max); } + + /** Get const_itertor re-positioned to specific element + @param idx - position in the sparse vector + */ + const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT + { return const_iterator(this, idx); } + back_insert_iterator get_back_inserter() { return back_insert_iterator(this); } ///@} @@ -408,13 +592,14 @@ public: \param opt_mode - requested compression depth \param stat - memory allocation statistics after optimization */ - void optimize(bm::word_t* temp_block = 0, - typename bvector_type::optmode opt_mode = bvector_type::opt_compress, - statistics* stat = 0); + void optimize( + bm::word_t* temp_block = 0, + typename bvector_type::optmode opt_mode = bvector_type::opt_compress, + statistics* stat = 0); /*! \brief resize to zero, free memory */ - void clear() BMNOEXEPT; + void clear() BMNOEXCEPT; /*! @brief Calculates memory statistics. @@ -427,7 +612,8 @@ public: @sa statistics */ - void calc_stat(struct rsc_sparse_vector::statistics* st) const; + void calc_stat( + struct rsc_sparse_vector::statistics* st) const BMNOEXCEPT; ///@} @@ -448,6 +634,14 @@ public: void copy_range(const rsc_sparse_vector& csv, size_type left, size_type right); + /** + @brief merge two vectors (argument gets destroyed) + It is important that both vectors have the same NULL vectors + @param csv - [in,out] argumnet vector to merge + (works like move so arg should not be used after the merge) + */ + void merge_not_null(rsc_sparse_vector& csv); + ///@} // ------------------------------------------------------------ @@ -467,12 +661,12 @@ public: /*! \brief returns true if prefix sum table is in sync with the vector */ - bool in_sync() const { return in_sync_; } + bool in_sync() const BMNOEXCEPT { return in_sync_; } /*! \brief Unsync the prefix sum table */ - void unsync() { in_sync_ = false; } + void unsync() BMNOEXCEPT { in_sync_ = false; } ///@} // ------------------------------------------------------------ @@ -483,19 +677,23 @@ public: \brief get access to bit-plain, function checks and creates a plain \return bit-vector for the bit plain */ - bvector_type_const_ptr get_plain(unsigned i) const { return sv_.get_plain(i); } + bvector_type_const_ptr get_plain(unsigned i) const BMNOEXCEPT + { return sv_.get_plain(i); } - bvector_type_ptr get_plain(unsigned i) { return sv_.get_plain(i); } + bvector_type_ptr get_plain(unsigned i) BMNOEXCEPT + { return sv_.get_plain(i); } /*! Number of effective bit-plains in the value type */ - unsigned effective_plains() const { return sv_.effective_plains(); } + unsigned effective_plains() const BMNOEXCEPT + { return sv_.effective_plains(); } /*! \brief get total number of bit-plains in the vector */ - static unsigned plains() { return sparse_vector_type::plains(); } + static unsigned plains() BMNOEXCEPT + { return sparse_vector_type::plains(); } /** Number of stored bit-plains (value plains + extra */ static unsigned stored_plains() @@ -504,22 +702,23 @@ public: /*! \brief access dense vector */ - const sparse_vector_type& get_sv() const { return sv_; } + const sparse_vector_type& get_sv() const BMNOEXCEPT { return sv_; } /*! \brief size of internal dense vector */ - size_type effective_size() const { return sv_.size(); } + size_type effective_size() const BMNOEXCEPT { return sv_.size(); } /** \brief Always 1 (non-matrix type) */ - size_type effective_vector_max() const { return 1; } + size_type effective_vector_max() const BMNOEXCEPT { return 1; } /*! get read-only access to inetrnal bit-matrix */ - const bmatrix_type& get_bmatrix() const { return sv_.get_bmatrix(); } + const bmatrix_type& get_bmatrix() const BMNOEXCEPT + { return sv_.get_bmatrix(); } ///@} @@ -537,26 +736,29 @@ protected: \return true if id is known and resolved successfully */ - bool resolve(size_type idx, size_type* idx_to) const; + bool resolve(size_type idx, size_type* idx_to) const BMNOEXCEPT; bool resolve_range(size_type from, size_type to, - size_type* idx_from, size_type* idx_to) const; + size_type* idx_from, size_type* idx_to) const BMNOEXCEPT; void resize_internal(size_type sz) { sv_.resize_internal(sz); } - size_type size_internal() const { return sv_.size(); } + size_type size_internal() const BMNOEXCEPT { return sv_.size(); } - bool is_remap() const { return false; } - size_t remap_size() const { return 0; } - const unsigned char* get_remap_buffer() const { return 0; } - unsigned char* init_remap_buffer() { return 0; } - void set_remap() { } + bool is_remap() const BMNOEXCEPT { return false; } + size_t remap_size() const BMNOEXCEPT { return 0; } + const unsigned char* get_remap_buffer() const BMNOEXCEPT { return 0; } + unsigned char* init_remap_buffer() BMNOEXCEPT { return 0; } + void set_remap() BMNOEXCEPT { } void push_back_no_check(size_type idx, value_type v); private: - void construct_bv_blocks(); - void free_bv_blocks(); + + /// Allocate memory for RS index + void construct_rs_index(); + /// Free rs-index + void free_rs_index(); protected: template friend class sparse_vector_scanner; @@ -580,13 +782,37 @@ rsc_sparse_vector::rsc_sparse_vector(bm::null_support null_able, allocation_policy_type ap, size_type bv_max_size, const allocator_type& alloc) -: sv_(null_able, ap, bv_max_size, alloc), - in_sync_(false) +: sv_(null_able, ap, bv_max_size, alloc), in_sync_(false) { BM_ASSERT(null_able == bm::use_null); BM_ASSERT(int(sv_value_plains) == int(SV::sv_value_plains)); size_ = max_id_ = 0; - construct_bv_blocks(); + construct_rs_index(); +} + +//--------------------------------------------------------------------- + +template +rsc_sparse_vector::rsc_sparse_vector(const bvector_type& bv_null) +: sv_(bm::use_null), in_sync_(false) +{ + construct_rs_index(); + bvector_type* bv = sv_.get_null_bvect(); + BM_ASSERT(bv); + *bv = bv_null; + + bool found = bv->find_reverse(max_id_); + if (found) + { + size_ = max_id_ + 1; + size_type sz = bv->count(); + sv_.resize(sz); + } + else + { + BM_ASSERT(!bv->any()); + size_ = max_id_ = 0; + } } //--------------------------------------------------------------------- @@ -594,7 +820,7 @@ rsc_sparse_vector::rsc_sparse_vector(bm::null_support null_able, template rsc_sparse_vector::~rsc_sparse_vector() { - free_bv_blocks(); + free_rs_index(); } //--------------------------------------------------------------------- @@ -602,24 +828,20 @@ rsc_sparse_vector::~rsc_sparse_vector() template rsc_sparse_vector::rsc_sparse_vector( const rsc_sparse_vector& csv) -: sv_(csv.sv_), - size_(csv.size_), - max_id_(csv.max_id_), - in_sync_(csv.in_sync_) +: sv_(csv.sv_), size_(csv.size_), max_id_(csv.max_id_), in_sync_(csv.in_sync_) { BM_ASSERT(int(sv_value_plains) == int(SV::sv_value_plains)); - construct_bv_blocks(); + construct_rs_index(); if (in_sync_) - { bv_blocks_ptr_->copy_from(*(csv.bv_blocks_ptr_)); - } } //--------------------------------------------------------------------- template -rsc_sparse_vector::rsc_sparse_vector(rsc_sparse_vector&& csv) BMNOEXEPT +rsc_sparse_vector::rsc_sparse_vector( + rsc_sparse_vector&& csv) BMNOEXCEPT : sv_(bm::use_null), size_(0), max_id_(0), in_sync_(false) @@ -636,7 +858,7 @@ rsc_sparse_vector::rsc_sparse_vector(rsc_sparse_vector&& csv) B template typename rsc_sparse_vector::size_type -rsc_sparse_vector::size() const +rsc_sparse_vector::size() const BMNOEXCEPT { return size_; } @@ -686,9 +908,93 @@ void rsc_sparse_vector::set_null(size_type idx) size_type sv_idx = bv_null->count_range(0, idx); bv_null->clear_bit_no_check(idx); sv_.erase(--sv_idx); + in_sync_ = false; } } +//--------------------------------------------------------------------- + +template +void rsc_sparse_vector::inc(size_type idx) +{ + bvector_type* bv_null = sv_.get_null_bvect(); + BM_ASSERT(bv_null); + + size_type sv_idx; + bool found = bv_null->test(idx); + + sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_) + : bv_null->count_range(0, idx); // TODO: make test'n'count + + if (found) + { + sv_.inc_no_null(--sv_idx); + } + else + { + sv_.insert_value_no_null(sv_idx, 1); + bv_null->set_bit_no_check(idx); + + if (idx > max_id_) + { + max_id_ = idx; + size_ = max_id_ + 1; + } + in_sync_ = false; + } +} + +//--------------------------------------------------------------------- + +template +void rsc_sparse_vector::inc(size_type idx, value_type v) +{ + bvector_type* bv_null = sv_.get_null_bvect(); + BM_ASSERT(bv_null); + + size_type sv_idx; + bool found = bv_null->test(idx); + + sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_) + : bv_null->count_range(0, idx); // TODO: make test'n'count + + if (found) + { + sv_.inc_no_null(--sv_idx, v); + } + else + { + sv_.insert_value_no_null(sv_idx, v); + bv_null->set_bit_no_check(idx); + + if (idx > max_id_) + { + max_id_ = idx; + size_ = max_id_ + 1; + } + in_sync_ = false; + } +} + +//--------------------------------------------------------------------- + +template +void rsc_sparse_vector::inc_not_null(size_type idx, value_type v) +{ + bvector_type* bv_null = sv_.get_null_bvect(); + BM_ASSERT(bv_null->test(idx)); // idx must be NOT NULL + + size_type sv_idx; + sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_) + : bv_null->count_range(0, idx); // TODO: make test'n'count + --sv_idx; + if (v == 1) + sv_.inc_no_null(sv_idx); + else + sv_.inc_no_null(sv_idx, v); +} + + //--------------------------------------------------------------------- template @@ -696,15 +1002,15 @@ void rsc_sparse_vector::set(size_type idx, value_type v) { bvector_type* bv_null = sv_.get_null_bvect(); BM_ASSERT(bv_null); - + + size_type sv_idx; bool found = bv_null->test(idx); - size_type sv_idx = bv_null->count_range(0, idx); // TODO: make test'n'count -// size_type sv_idx; -// bool found = resolve(idx, &sv_idx); + + sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_) + : bv_null->count_range(0, idx); // TODO: make test'n'count if (found) { - //sv_.set(--sv_idx, v); sv_.set_value_no_null(--sv_idx, v); } else @@ -725,7 +1031,7 @@ void rsc_sparse_vector::set(size_type idx, value_type v) template bool rsc_sparse_vector::equal( - const rsc_sparse_vector& csv) const + const rsc_sparse_vector& csv) const BMNOEXCEPT { if (this == &csv) return true; @@ -739,7 +1045,7 @@ bool rsc_sparse_vector::equal( template void rsc_sparse_vector::load_from( - const sparse_vector_type& sv_src) + const sparse_vector_type& sv_src) { max_id_ = size_ = 0; @@ -837,10 +1143,10 @@ void rsc_sparse_vector::sync(bool force) //--------------------------------------------------------------------- template -bool rsc_sparse_vector::resolve(size_type idx, size_type* idx_to) const +bool rsc_sparse_vector::resolve(size_type idx, + size_type* idx_to) const BMNOEXCEPT { BM_ASSERT(idx_to); - const bvector_type* bv_null = sv_.get_null_bvector(); if (in_sync_) { @@ -849,23 +1155,17 @@ bool rsc_sparse_vector::resolve(size_type idx, size_type* idx_to) const else // slow access { bool found = bv_null->test(idx); - if (!found) - { - *idx_to = 0; - } - else - { - *idx_to = bv_null->count_range(0, idx); - } + *idx_to = found ? bv_null->count_range(0, idx) : 0; } return bool(*idx_to); } + //--------------------------------------------------------------------- template bool rsc_sparse_vector::resolve_range( size_type from, size_type to, - size_type* idx_from, size_type* idx_to) const + size_type* idx_from, size_type* idx_to) const BMNOEXCEPT { BM_ASSERT(idx_to && idx_from); const bvector_type* bv_null = sv_.get_null_bvector(); @@ -876,12 +1176,15 @@ bool rsc_sparse_vector::resolve_range( copy_sz = bv_null->count_range(from, to); if (!copy_sz) return false; + if (in_sync_) - sv_left = bv_null->count_range(0, from, *bv_blocks_ptr_); + sv_left = bv_null->rank_corrected(from, *bv_blocks_ptr_); else + { sv_left = bv_null->count_range(0, from); - bool tl = bv_null->test(from); // TODO: add count and test - sv_left -= tl; // rank correction + bool tl = bv_null->test(from); // TODO: add count and test + sv_left -= tl; // rank correction + } *idx_from = sv_left; *idx_to = sv_left + copy_sz - 1; return true; @@ -910,7 +1213,7 @@ rsc_sparse_vector::at(size_type idx) const template typename rsc_sparse_vector::value_type -rsc_sparse_vector::get(size_type idx) const +rsc_sparse_vector::get(size_type idx) const BMNOEXCEPT { size_type sv_idx; bool found = resolve(idx, &sv_idx); @@ -923,7 +1226,7 @@ rsc_sparse_vector::get(size_type idx) const //--------------------------------------------------------------------- template -bool rsc_sparse_vector::is_null(size_type idx) const +bool rsc_sparse_vector::is_null(size_type idx) const BMNOEXCEPT { const bvector_type* bv_null = sv_.get_null_bvector(); BM_ASSERT(bv_null); @@ -950,7 +1253,7 @@ void rsc_sparse_vector::optimize(bm::word_t* temp_block, //--------------------------------------------------------------------- template -void rsc_sparse_vector::clear() BMNOEXEPT +void rsc_sparse_vector::clear() BMNOEXCEPT { sv_.clear(); in_sync_ = false; max_id_ = size_ = 0; @@ -960,7 +1263,7 @@ void rsc_sparse_vector::clear() BMNOEXEPT template void rsc_sparse_vector::calc_stat( - struct rsc_sparse_vector::statistics* st) const + struct rsc_sparse_vector::statistics* st) const BMNOEXCEPT { BM_ASSERT(st); sv_.calc_stat((typename sparse_vector_type::statistics*)st); @@ -977,7 +1280,7 @@ void rsc_sparse_vector::calc_stat( template const typename rsc_sparse_vector::bvector_type* -rsc_sparse_vector::get_null_bvector() const +rsc_sparse_vector::get_null_bvector() const BMNOEXCEPT { return sv_.get_null_bvector(); } @@ -986,7 +1289,8 @@ rsc_sparse_vector::get_null_bvector() const template bool -rsc_sparse_vector::find_rank(size_type rank, size_type& idx) const +rsc_sparse_vector::find_rank(size_type rank, + size_type& idx) const BMNOEXCEPT { BM_ASSERT(rank); bool b; @@ -1006,7 +1310,7 @@ typename rsc_sparse_vector::size_type rsc_sparse_vector::decode(value_type* arr, size_type idx_from, size_type size, - bool /*zero_mem*/) const + bool zero_mem) const { if (size == 0) return 0; @@ -1020,51 +1324,104 @@ rsc_sparse_vector::decode(value_type* arr, if ((bm::id_max - size) <= idx_from) size = bm::id_max - idx_from; + if ((idx_from + size) > this->size()) + size = this->size() - idx_from; const bvector_type* bv_null = sv_.get_null_bvector(); + size_type rank = bv_null->rank_corrected(idx_from, *bv_blocks_ptr_); + + BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from)); - size_type rank = bv_null->count_to(idx_from, *bv_blocks_ptr_); - bool b = bv_null->test(idx_from); - bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from); - size_type i = *en_i; - if (idx_from + size <= i) // empty space (all zeros) + BM_ASSERT(en_i.valid()); + + if (zero_mem) + ::memset(arr, 0, sizeof(value_type)*size); + + sparse_vector_const_iterator it = sv_.get_const_iterator(rank); + size_type i = 0; + if (it.valid()) { + do + { + size_type en_idx = *en_i; + size_type delta = en_idx - idx_from; + idx_from += delta; + i += delta; + if (i >= size) + return size; + arr[i++] = it.value(); + if (!en_i.advance()) + break; + if (!it.advance()) + break; + ++idx_from; + } while (i < size); + } + return i; +} + + +template +typename rsc_sparse_vector::size_type +rsc_sparse_vector::decode_buf(value_type* arr, + value_type* arr_buf_tmp, + size_type idx_from, + size_type size, + bool zero_mem) const BMNOEXCEPT +{ + if (!size || (idx_from >= this->size())) + return 0; + + BM_ASSERT(arr && arr_buf_tmp); + BM_ASSERT(arr != arr_buf_tmp); + BM_ASSERT(in_sync_); // call sync() before decoding + BM_ASSERT(bv_blocks_ptr_); + + if ((bm::id_max - size) <= idx_from) + size = bm::id_max - idx_from; + if ((idx_from + size) > this->size()) + size = this->size() - idx_from; + + if (zero_mem) ::memset(arr, 0, sizeof(value_type)*size); + + const bvector_type* bv_null = sv_.get_null_bvector(); + size_type rank = bv_null->rank_corrected(idx_from, *bv_blocks_ptr_); + + BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from)); + + bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from); + if (!en_i.valid()) return size; - } - rank -= b; - sparse_vector_const_iterator it = sv_.get_const_iterator(rank); - i = 0; - while (it.valid()) + + size_type i = en_i.value(); + if (idx_from + size <= i) // empty space (all zeros) + return size; + + size_type extract_cnt = + bv_null->count_range(idx_from, idx_from + size - 1, *bv_blocks_ptr_); + + BM_ASSERT(extract_cnt <= this->size()); + auto ex_sz = sv_.decode(arr_buf_tmp, rank, extract_cnt, true); + BM_ASSERT(ex_sz == extract_cnt); (void) ex_sz; + + for (i = 0; i < extract_cnt; ++i) { - if (!en_i.valid()) - break; + BM_ASSERT(en_i.valid()); size_type en_idx = *en_i; - while (idx_from < en_idx) // zero the empty prefix - { - arr[i] ^= arr[i]; - ++i; ++idx_from; - if (i == size) - return i; - } - BM_ASSERT(idx_from == en_idx); - arr[i] = *it; - ++i; ++idx_from; - if (i == size) - return i; - + arr[en_idx-idx_from] = arr_buf_tmp[i]; en_i.advance(); - it.advance(); - } // while - - return i; + } // for i + + return size; } + //--------------------------------------------------------------------- template -void rsc_sparse_vector::construct_bv_blocks() +void rsc_sparse_vector::construct_rs_index() { if (bv_blocks_ptr_) return; @@ -1076,7 +1433,7 @@ void rsc_sparse_vector::construct_bv_blocks() //--------------------------------------------------------------------- template -void rsc_sparse_vector::free_bv_blocks() +void rsc_sparse_vector::free_rs_index() { if (bv_blocks_ptr_) { @@ -1085,13 +1442,57 @@ void rsc_sparse_vector::free_bv_blocks() } } +//--------------------------------------------------------------------- + +template +void rsc_sparse_vector::copy_range( + const rsc_sparse_vector& csv, + size_type left, size_type right) +{ + if (left > right) + bm::xor_swap(left, right); + + if (left >= csv.size()) + return; + + size_ = csv.size_; max_id_ = csv.max_id_; + in_sync_ = false; + + const bvector_type* arg_bv_null = csv.sv_.get_null_bvector(); + size_type sv_left, sv_right; + bool range_valid = csv.resolve_range(left, right, &sv_left, &sv_right); + if (!range_valid) + { + sv_.clear(); sv_.resize(size_); + bvector_type* bv_null = sv_.get_null_bvect(); + bv_null->copy_range(*arg_bv_null, 0, right); + return; + } + bvector_type* bv_null = sv_.get_null_bvect(); + bv_null->copy_range(*arg_bv_null, 0, right); // not NULL vector gets a full copy + sv_.copy_range(csv.sv_, sv_left, sv_right, bm::no_null); // don't copy NULL +} + + +//--------------------------------------------------------------------- + +template +void rsc_sparse_vector::merge_not_null(rsc_sparse_vector& csv) +{ + // MUST have the same NULL to work + BM_ASSERT(sv_.get_null_bvector()->equal(*csv.sv_.get_null_bvector())); + + sv_.merge(csv.sv_); +} + + //--------------------------------------------------------------------- // //--------------------------------------------------------------------- template -rsc_sparse_vector::back_insert_iterator::back_insert_iterator() +rsc_sparse_vector::back_insert_iterator::back_insert_iterator() BMNOEXCEPT : csv_(0) {} @@ -1100,7 +1501,7 @@ rsc_sparse_vector::back_insert_iterator::back_insert_iterator() template rsc_sparse_vector::back_insert_iterator::back_insert_iterator - (rsc_sparse_vector_type* csv) + (rsc_sparse_vector_type* csv) BMNOEXCEPT { csv_ = csv; sv_bi_ = csv->sv_.get_back_inserter(); @@ -1134,7 +1535,7 @@ void rsc_sparse_vector::back_insert_iterator::add( //--------------------------------------------------------------------- template -void rsc_sparse_vector::back_insert_iterator::add_null() +void rsc_sparse_vector::back_insert_iterator::add_null() BMNOEXCEPT { BM_ASSERT(csv_); csv_->max_id_++; @@ -1145,7 +1546,7 @@ void rsc_sparse_vector::back_insert_iterator::add_null() template void rsc_sparse_vector::back_insert_iterator::add_null( - rsc_sparse_vector::back_insert_iterator::size_type count) + rsc_sparse_vector::back_insert_iterator::size_type count) BMNOEXCEPT { BM_ASSERT(csv_); csv_->max_id_+=count; @@ -1161,39 +1562,140 @@ void rsc_sparse_vector::back_insert_iterator::flush() csv_->in_sync_ = false; } +//--------------------------------------------------------------------- +// +//--------------------------------------------------------------------- + +template +rsc_sparse_vector::const_iterator::const_iterator() BMNOEXCEPT +: csv_(0), pos_(bm::id_max), buf_ptr_(0) +{} + +//--------------------------------------------------------------------- + +template +rsc_sparse_vector::const_iterator::const_iterator( + const typename rsc_sparse_vector::const_iterator& it) BMNOEXCEPT +: csv_(it.csv_), pos_(it.pos_), buf_ptr_(0) +{} + +//--------------------------------------------------------------------- + +template +rsc_sparse_vector::const_iterator::const_iterator( + const typename rsc_sparse_vector::const_iterator::rsc_sparse_vector_type* csv + ) BMNOEXCEPT +: csv_(csv), buf_ptr_(0) +{ + BM_ASSERT(csv_); + pos_ = csv_->empty() ? bm::id_max : 0u; +} + //--------------------------------------------------------------------- template -void rsc_sparse_vector::copy_range( - const rsc_sparse_vector& csv, - size_type left, size_type right) +rsc_sparse_vector::const_iterator::const_iterator( + const typename rsc_sparse_vector::const_iterator::rsc_sparse_vector_type* csv, + typename rsc_sparse_vector::size_type pos) BMNOEXCEPT +: csv_(csv), buf_ptr_(0) { - if (left > right) - bm::xor_swap(left, right); + BM_ASSERT(csv_); + this->go_to(pos); +} - if (left >= csv.size()) - return; - - size_ = csv.size_; max_id_ = csv.max_id_; - in_sync_ = false; +//--------------------------------------------------------------------- - const bvector_type* arg_bv_null = csv.sv_.get_null_bvector(); - size_type sv_left, sv_right; - bool range_valid = csv.resolve_range(left, right, &sv_left, &sv_right); +template +void rsc_sparse_vector::const_iterator::go_to(size_type pos) BMNOEXCEPT +{ + pos_ = (!csv_ || pos >= csv_->size()) ? bm::id_max : pos; + buf_ptr_ = 0; +} - if (!range_valid) +//--------------------------------------------------------------------- + +template +bool rsc_sparse_vector::const_iterator::advance() BMNOEXCEPT +{ + if (pos_ == bm::id_max) // nothing to do, we are at the end + return false; + ++pos_; + if (pos_ >= csv_->size()) { - sv_.clear(); - sv_.resize(size_); - bvector_type* bv_null = sv_.get_null_bvect(); - bv_null->copy_range(*arg_bv_null, 0, right); - return; + this->invalidate(); + return false; + } + if (buf_ptr_) + { + ++buf_ptr_; + if (buf_ptr_ - ((value_type*)vbuffer_.data()) >= n_buf_size) + buf_ptr_ = 0; + } + return true; +} + +//--------------------------------------------------------------------- + +template +typename rsc_sparse_vector::const_iterator::value_type +rsc_sparse_vector::const_iterator::value() const +{ + BM_ASSERT(this->valid()); + value_type v; + + if (!buf_ptr_) + { + vbuffer_.reserve(n_buf_size * sizeof(value_type)); + tbuffer_.reserve(n_buf_size * sizeof(value_type)); + buf_ptr_ = (value_type*)(vbuffer_.data()); + value_type* tmp_buf_ptr = (value_type*) (tbuffer_.data()); + + csv_->decode_buf(buf_ptr_, tmp_buf_ptr, pos_, n_buf_size, true); + } + v = *buf_ptr_; + return v; +} + +//--------------------------------------------------------------------- + +template +void rsc_sparse_vector::const_iterator::skip_zero_values() BMNOEXCEPT +{ + value_type v = value(); + if (buf_ptr_) + { + v = *buf_ptr_; + value_type* buf_end = ((value_type*)vbuffer_.data()) + n_buf_size; + while(!v) + { + ++pos_; + if (++buf_ptr_ < buf_end) + v = *buf_ptr_; + else + break; + } + if (pos_ >= csv_->size()) + { + pos_ = bm::id_max; + return; + } + if (buf_ptr_ >= buf_end) + buf_ptr_ = 0; } - bvector_type* bv_null = sv_.get_null_bvect(); - bv_null->copy_range(*arg_bv_null, 0, right); // not NULL vector gets a full copy - sv_.copy_range(csv.sv_, sv_left, sv_right, bm::no_null); // don't copy NULL } +//--------------------------------------------------------------------- + +template +bool rsc_sparse_vector::const_iterator::is_null() const BMNOEXCEPT +{ + return csv_->is_null(pos_); +} + + +//--------------------------------------------------------------------- + + } // namespace bm diff --git a/c++/include/util/bitset/bmsparsevec_serial.h b/c++/include/util/bitset/bmsparsevec_serial.h index 95fc3af2..958b44e9 100644 --- a/c++/include/util/bitset/bmsparsevec_serial.h +++ b/c++/include/util/bitset/bmsparsevec_serial.h @@ -927,7 +927,8 @@ unsigned sparse_vector_deserializer::load_header( BM_ASSERT(h1 == 'B' && (h2 == 'M' || h2 == 'C')); - if (h1 != 'B' && (h2 != 'M' || h2 != 'C')) // no magic header? + bool sig2_ok = (h2 == 'M' || h2 == 'C'); + if (h1 != 'B' || !sig2_ok) //&& (h2 != 'M' || h2 != 'C')) // no magic header? raise_invalid_header(); unsigned char bv_bo = dec.get_8(); (void) bv_bo; diff --git a/c++/include/util/bitset/bmsparsevec_util.h b/c++/include/util/bitset/bmsparsevec_util.h index 7cec6255..f61143a5 100644 --- a/c++/include/util/bitset/bmsparsevec_util.h +++ b/c++/include/util/bitset/bmsparsevec_util.h @@ -70,7 +70,7 @@ public: /*! \brief Move content from the argument address resolver */ - void move_from(bvps_addr_resolver& addr_res) BMNOEXEPT; + void move_from(bvps_addr_resolver& addr_res) BMNOEXCEPT; /*! \brief Resolve id to integer id (address) @@ -82,7 +82,7 @@ public: \return true if id is known and resolved successfully */ - bool resolve(size_type id_from, size_type* id_to) const; + bool resolve(size_type id_from, size_type* id_to) const BMNOEXCEPT; /*! \brief Resolve id to integer id (address) without sync check @@ -94,7 +94,7 @@ public: \return true if id is known and resolved successfully */ - bool get(size_type id_from, size_type* id_to) const; + bool get(size_type id_from, size_type* id_to) const BMNOEXCEPT; /*! \brief Set id (bit) to address resolver @@ -146,7 +146,7 @@ public: /*! \brief equality comparison */ - bool equal(const bvps_addr_resolver& addr_res) const; + bool equal(const bvps_addr_resolver& addr_res) const BMNOEXCEPT; protected: void construct_rs_index(); @@ -437,7 +437,7 @@ bvps_addr_resolver::bvps_addr_resolver(const bvps_addr_resolver& addr_res) template -void bvps_addr_resolver::move_from(bvps_addr_resolver& addr_res) BMNOEXEPT +void bvps_addr_resolver::move_from(bvps_addr_resolver& addr_res) BMNOEXCEPT { if (this != &addr_res) { @@ -459,7 +459,8 @@ void bvps_addr_resolver::move_from(bvps_addr_resolver& addr_res) BMNOEXEPT //--------------------------------------------------------------------- template -bool bvps_addr_resolver::resolve(size_type id_from, size_type* id_to) const +bool bvps_addr_resolver::resolve(size_type id_from, + size_type* id_to) const BMNOEXCEPT { BM_ASSERT(id_to); if (in_sync_) @@ -484,7 +485,8 @@ bool bvps_addr_resolver::resolve(size_type id_from, size_type* id_to) const //--------------------------------------------------------------------- template -bool bvps_addr_resolver::get(size_type id_from, size_type* id_to) const +bool bvps_addr_resolver::get(size_type id_from, + size_type* id_to) const BMNOEXCEPT { BM_ASSERT(id_to); BM_ASSERT(in_sync_); @@ -529,10 +531,10 @@ void bvps_addr_resolver::optimize(bm::word_t* temp_block) //--------------------------------------------------------------------- template -bool bvps_addr_resolver::equal(const bvps_addr_resolver& addr_res) const +bool bvps_addr_resolver::equal( + const bvps_addr_resolver& addr_res) const BMNOEXCEPT { - int cmp = addr_bv_.compare(addr_res.addr_bv_); - return (cmp == 0); + return addr_bv_.equal(addr_res.addr_bv_); } //--------------------------------------------------------------------- diff --git a/c++/include/util/bitset/bmsse2.h b/c++/include/util/bitset/bmsse2.h index 6748e05f..09998d73 100644 --- a/c++/include/util/bitset/bmsse2.h +++ b/c++/include/util/bitset/bmsse2.h @@ -398,6 +398,74 @@ unsigned sse2_gap_find(const bm::gap_word_t* BMRESTRICT pbuf, const bm::gap_word } return size; } + +/** + Hybrid binary search, starts as binary, then switches to linear scan + + \param buf - GAP buffer pointer. + \param pos - index of the element. + \param is_set - output. GAP value (0 or 1). + \return GAP index. + + @ingroup SSE2 +*/ +inline +unsigned sse2_gap_bfind(const unsigned short* BMRESTRICT buf, + unsigned pos, unsigned* BMRESTRICT is_set) +{ + unsigned start = 1; + unsigned end = 1 + ((*buf) >> 3); + unsigned dsize = end - start; + + if (dsize < 17) + { + start = bm::sse2_gap_find(buf+1, (bm::gap_word_t)pos, dsize); + *is_set = ((*buf) & 1) ^ (start & 1); + BM_ASSERT(buf[start+1] >= pos); + BM_ASSERT(buf[start] < pos || (start==0)); + + return start+1; + } + unsigned arr_end = end; + while (start != end) + { + unsigned curr = (start + end) >> 1; + if (buf[curr] < pos) + start = curr + 1; + else + end = curr; + + unsigned size = end - start; + if (size < 16) + { + size += (end != arr_end); + unsigned idx = + bm::sse2_gap_find(buf + start, (bm::gap_word_t)pos, size); + start += idx; + + BM_ASSERT(buf[start] >= pos); + BM_ASSERT(buf[start - 1] < pos || (start == 1)); + break; + } + } + + *is_set = ((*buf) & 1) ^ ((start-1) & 1); + return start; +} + +/** + Hybrid binary search, starts as binary, then switches to scan + @ingroup SSE2 +*/ +inline +unsigned sse2_gap_test(const unsigned short* BMRESTRICT buf, unsigned pos) +{ + unsigned is_set; + bm::sse2_gap_bfind(buf, pos, &is_set); + return is_set; +} + + #ifdef __GNUG__ #pragma GCC diagnostic pop #endif @@ -460,6 +528,8 @@ unsigned sse2_gap_find(const bm::gap_word_t* BMRESTRICT pbuf, const bm::gap_word #define VECT_SET_BLOCK(dst, value) \ sse2_set_block((__m128i*) dst, value) +#define VECT_GAP_BFIND(buf, pos, is_set) \ + sse2_gap_bfind(buf, pos, is_set) } // namespace diff --git a/c++/include/util/bitset/bmsse4.h b/c++/include/util/bitset/bmsse4.h index 0f2e03d9..d5362f15 100644 --- a/c++/include/util/bitset/bmsse4.h +++ b/c++/include/util/bitset/bmsse4.h @@ -576,6 +576,17 @@ bool sse4_is_all_one(const __m128i* BMRESTRICT block) return true; } +/*! + @brief check if SSE wave is all oxFFFF...FFF + @ingroup SSE4 +*/ +BMFORCEINLINE +bool sse42_test_all_one_wave(const void* ptr) +{ + return _mm_test_all_ones(_mm_loadu_si128((__m128i*)ptr)); +} + + /*! @brief check if wave of pointers is all NULL @ingroup SSE4 @@ -973,12 +984,14 @@ bool sse42_bit_find_first(const __m128i* BMRESTRICT block, #endif /*! - SSE4.2 check for one to two (variable len) 128 bit SSE lines for gap search results (8 elements) + SSE4.2 check for one to two (variable len) 128 bit SSE lines + for gap search results (8 elements) @ingroup SSE4 \internal */ inline -unsigned sse4_gap_find(const bm::gap_word_t* BMRESTRICT pbuf, const bm::gap_word_t pos, const unsigned size) +unsigned sse4_gap_find(const bm::gap_word_t* BMRESTRICT pbuf, + const bm::gap_word_t pos, const unsigned size) { BM_ASSERT(size <= 16); BM_ASSERT(size); @@ -1031,6 +1044,74 @@ unsigned sse4_gap_find(const bm::gap_word_t* BMRESTRICT pbuf, const bm::gap_word return size - bc; } +/** + Hybrid binary search, starts as binary, then switches to linear scan + + \param buf - GAP buffer pointer. + \param pos - index of the element. + \param is_set - output. GAP value (0 or 1). + \return GAP index. + + @ingroup SSE4 +*/ +inline +unsigned sse42_gap_bfind(const unsigned short* BMRESTRICT buf, + unsigned pos, unsigned* BMRESTRICT is_set) +{ + unsigned start = 1; + unsigned end = 1 + ((*buf) >> 3); + unsigned dsize = end - start; + + if (dsize < 17) + { + start = bm::sse4_gap_find(buf+1, (bm::gap_word_t)pos, dsize); + *is_set = ((*buf) & 1) ^ (start & 1); + BM_ASSERT(buf[start+1] >= pos); + BM_ASSERT(buf[start] < pos || (start==0)); + + return start+1; + } + unsigned arr_end = end; + while (start != end) + { + unsigned curr = (start + end) >> 1; + if (buf[curr] < pos) + start = curr + 1; + else + end = curr; + + unsigned size = end - start; + if (size < 16) + { + size += (end != arr_end); + unsigned idx = + bm::sse4_gap_find(buf + start, (bm::gap_word_t)pos, size); + start += idx; + + BM_ASSERT(buf[start] >= pos); + BM_ASSERT(buf[start - 1] < pos || (start == 1)); + break; + } + } + + *is_set = ((*buf) & 1) ^ ((start-1) & 1); + return start; +} + +/** + Hybrid binary search, starts as binary, then switches to scan + @ingroup SSE4 +*/ +inline +unsigned sse42_gap_test(const unsigned short* BMRESTRICT buf, unsigned pos) +{ + unsigned is_set; + bm::sse42_gap_bfind(buf, pos, &is_set); + return is_set; +} + + + /** Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array @@ -1751,6 +1832,8 @@ void sse42_bit_block_xor(bm::word_t* target_block, #define VECT_BIT_BLOCK_XOR(t, src, src_xor, d) \ sse42_bit_block_xor(t, src, src_xor, d) +#define VECT_GAP_BFIND(buf, pos, is_set) \ + sse42_gap_bfind(buf, pos, is_set) #ifdef __GNUG__ #pragma GCC diagnostic pop diff --git a/c++/include/util/bitset/bmsse_util.h b/c++/include/util/bitset/bmsse_util.h index 92ad1300..e2504624 100644 --- a/c++/include/util/bitset/bmsse_util.h +++ b/c++/include/util/bitset/bmsse_util.h @@ -823,9 +823,6 @@ void sse2_stream_block(__m128i* BMRESTRICT dst, inline void sse2_invert_block(__m128i* dst) { - //__m128i mZ = _mm_setzero_si128(); - //__m128i maskF = _mm_cmpeq_epi8(mZ, mZ); // 0xFF.. - __m128i maskF = _mm_set1_epi32(~0u); __m128i* BMRESTRICT dst_end = (__m128i*)((bm::word_t*)(dst) + bm::set_block_size); diff --git a/c++/include/util/bitset/bmstrsparsevec.h b/c++/include/util/bitset/bmstrsparsevec.h index 82ef9e2a..a2a0da37 100644 --- a/c++/include/util/bitset/bmstrsparsevec.h +++ b/c++/include/util/bitset/bmstrsparsevec.h @@ -95,19 +95,19 @@ public: { public: const_reference(const str_sparse_vector& str_sv, - size_type idx) BMNOEXEPT + size_type idx) BMNOEXCEPT : str_sv_(str_sv), idx_(idx) {} - operator const value_type*() const + operator const value_type*() const BMNOEXCEPT { str_sv_.get(idx_, buf_, MAX_STR_SIZE); return &(buf_[0]); } - bool operator==(const const_reference& ref) const + bool operator==(const const_reference& ref) const BMNOEXCEPT { return bool(*this) == bool(ref); } - bool is_null() const { return str_sv_.is_null(idx_); } + bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); } private: const str_sparse_vector& str_sv_; size_type idx_; @@ -122,11 +122,11 @@ public: { public: reference(str_sparse_vector& str_sv, - size_type idx) BMNOEXEPT + size_type idx) BMNOEXCEPT : str_sv_(str_sv), idx_(idx) {} - operator const value_type*() const + operator const value_type*() const BMNOEXCEPT { str_sv_.get(idx_, buf_, MAX_STR_SIZE); return &(buf_[0]); @@ -144,9 +144,9 @@ public: str_sv_.set(idx_, str); return *this; } - bool operator==(const reference& ref) const + bool operator==(const reference& ref) const BMNOEXCEPT { return bool(*this) == bool(ref); } - bool is_null() const { return str_sv_.is_null(idx_); } + bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); } private: str_sparse_vector& str_sv_; size_type idx_; @@ -183,55 +183,56 @@ public: typedef CharType* pointer; typedef CharType*& reference; public: - const_iterator(); - const_iterator(const str_sparse_vector_type* sv); - const_iterator(const str_sparse_vector_type* sv, size_type pos); - const_iterator(const const_iterator& it); + const_iterator() BMNOEXCEPT; + const_iterator(const str_sparse_vector_type* sv) BMNOEXCEPT; + const_iterator(const str_sparse_vector_type* sv, size_type pos) BMNOEXCEPT; + const_iterator(const const_iterator& it) BMNOEXCEPT; - bool operator==(const const_iterator& it) const + bool operator==(const const_iterator& it) const BMNOEXCEPT { return (pos_ == it.pos_) && (sv_ == it.sv_); } - bool operator!=(const const_iterator& it) const + bool operator!=(const const_iterator& it) const BMNOEXCEPT { return ! operator==(it); } - bool operator < (const const_iterator& it) const + bool operator < (const const_iterator& it) const BMNOEXCEPT { return pos_ < it.pos_; } - bool operator <= (const const_iterator& it) const + bool operator <= (const const_iterator& it) const BMNOEXCEPT { return pos_ <= it.pos_; } - bool operator > (const const_iterator& it) const + bool operator > (const const_iterator& it) const BMNOEXCEPT { return pos_ > it.pos_; } - bool operator >= (const const_iterator& it) const + bool operator >= (const const_iterator& it) const BMNOEXCEPT { return pos_ >= it.pos_; } /// \brief Get current position (value) - const value_type* operator*() const { return this->value(); } + const value_type* operator*() const BMNOEXCEPT { return this->value(); } /// \brief Advance to the next available value - const_iterator& operator++() { this->advance(); return *this; } + const_iterator& operator++() BMNOEXCEPT + { this->advance(); return *this; } /// \brief Advance to the next available value - const_iterator& operator++(int) + const_iterator& operator++(int) BMNOEXCEPT { const_iterator tmp(*this);this->advance(); return tmp; } /// \brief Get current position (value) - const value_type* value() const; + const value_type* value() const BMNOEXCEPT; /// \brief Get NULL status - bool is_null() const { return sv_->is_null(this->pos_); } + bool is_null() const BMNOEXCEPT { return sv_->is_null(this->pos_); } /// Returns true if iterator is at a valid position - bool valid() const { return pos_ != bm::id_max; } + bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; } /// Invalidate current iterator - void invalidate() { pos_ = bm::id_max; } + void invalidate() BMNOEXCEPT { pos_ = bm::id_max; } /// Current position (index) in the vector - size_type pos() const { return pos_; } + size_type pos() const BMNOEXCEPT { return pos_; } /// re-position to a specified position - void go_to(size_type pos); + void go_to(size_type pos) BMNOEXCEPT; /// advance iterator forward by one - void advance(); + void advance() BMNOEXCEPT; protected: typedef bm::heap_matrix&& str_sv) BMNOEXEPT + str_sparse_vector(str_sparse_vector&& str_sv) BMNOEXCEPT { parent_type::swap(str_sv); remap_flags_ = str_sv.remap_flags_; @@ -403,7 +404,7 @@ public: /*! move assignmment operator */ str_sparse_vector& operator = - (str_sparse_vector&& str_sv) BMNOEXEPT + (str_sparse_vector&& str_sv) BMNOEXCEPT { if (this != &str_sv) { @@ -475,7 +476,8 @@ public: @return string length */ - size_type get(size_type idx, value_type* str, size_type buf_size) const; + size_type get(size_type idx, + value_type* str, size_type buf_size) const BMNOEXCEPT; /*! \brief set specified element with bounds checking and automatic resize @@ -568,7 +570,7 @@ public: } /*! Swap content */ - void swap(str_sparse_vector& str_sv) BMNOEXEPT; + void swap(str_sparse_vector& str_sv) BMNOEXCEPT; ///@} @@ -589,14 +591,14 @@ public: \return 0 - equal, < 0 - vect[i] < str, >0 otherwise */ - int compare(size_type idx, const value_type* str) const; + int compare(size_type idx, const value_type* str) const BMNOEXCEPT; /** \brief Find size of common prefix between two vector elements in octets \return size of common prefix */ - unsigned common_prefix_length(size_type idx1, size_type idx2) const; + unsigned common_prefix_length(size_type idx1, size_type idx2) const BMNOEXCEPT; ///@} @@ -606,7 +608,7 @@ public: ///@{ /*! \brief resize to zero, free memory */ - void clear() BMNOEXEPT; + void clear() BMNOEXCEPT; /*! \brief clear range (assign bit 0 for all plains) @@ -650,13 +652,11 @@ public: static size_type max_str() { return sv_octet_plains; } /*! \brief get effective string length used in vector - - Method returns efficiency, how close are we - to reserved maximum. - + Calculate and returns efficiency, how close are we + to the reserved maximum. \return current string length maximum */ - size_type effective_max_str() const; + size_type effective_max_str() const BMNOEXCEPT; /*! \brief get effective string length used in vector \return current string length maximum @@ -691,7 +691,9 @@ public: @sa statistics */ - void calc_stat(struct str_sparse_vector::statistics* st) const; + void calc_stat( + struct str_sparse_vector::statistics* st + ) const BMNOEXCEPT; ///@} @@ -701,15 +703,15 @@ public: //@{ /** Provide const iterator access to container content */ - const_iterator begin() const; + const_iterator begin() const BMNOEXCEPT; /** Provide const iterator access to the end */ - const_iterator end() const { return const_iterator(this, bm::id_max); } + const_iterator end() const BMNOEXCEPT { return const_iterator(this, bm::id_max); } /** Get const_itertor re-positioned to specific element @param idx - position in the sparse vector */ - const_iterator get_const_iterator(size_type idx) const + const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT { return const_iterator(this, idx); } /** Provide back insert iterator @@ -730,7 +732,7 @@ public: /** \brief trait if sparse vector is "compressed" (false) */ static - bool is_compressed() { return false; } + bool is_compressed() BMNOEXCEPT { return false; } ///@} @@ -745,7 +747,7 @@ public: /** Get remapping status (true|false) */ - bool is_remap() const { return remap_flags_ != 0; } + bool is_remap() const BMNOEXCEPT { return remap_flags_ != 0; } /** Build remapping profile and load content from another sparse vector @@ -757,7 +759,7 @@ public: Calculate flags which octets are present on each byte-plain. @internal */ - void calc_octet_stat(plain_octet_matrix_type& octet_matrix) const; + void calc_octet_stat(plain_octet_matrix_type& octet_matrix) const BMNOEXCEPT; static void build_octet_remap( @@ -771,10 +773,11 @@ public: @internal */ static - bool remap_tosv(value_type* sv_str, - size_type buf_size, - const value_type* str, - const plain_octet_matrix_type& octet_remap_matrix2); + bool remap_tosv(value_type* BMRESTRICT sv_str, + size_type buf_size, + const value_type* BMRESTRICT str, + const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix2 + ) BMNOEXCEPT; /*! remap string from external (ASCII) system to matrix internal code @@ -782,7 +785,7 @@ public: */ bool remap_tosv(value_type* sv_str, size_type buf_size, - const value_type* str) const + const value_type* str) const BMNOEXCEPT { return remap_tosv(sv_str, buf_size, str, remap_matrix2_); } @@ -793,10 +796,12 @@ public: @internal */ static - bool remap_fromsv(value_type* str, - size_type buf_size, - const value_type* sv_str, - const plain_octet_matrix_type& octet_remap_matrix1); + bool remap_fromsv( + value_type* BMRESTRICT str, + size_type buf_size, + const value_type* BMRESTRICT sv_str, + const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix1 + ) BMNOEXCEPT; /*! re-calculate remap matrix2 based on matrix1 @internal @@ -949,18 +954,18 @@ public: \return true, if it is the same */ bool equal(const str_sparse_vector& sv, - bm::null_support null_able = bm::use_null) const; + bm::null_support null_able = bm::use_null) const BMNOEXCEPT; /** \brief find position of compressed element by its rank */ static - bool find_rank(size_type rank, size_type& pos); + bool find_rank(size_type rank, size_type& pos) BMNOEXCEPT; /** \brief size of sparse vector (may be different for RSC) */ - size_type effective_size() const { return size(); } + size_type effective_size() const BMNOEXCEPT { return size(); } protected: @@ -1138,7 +1143,8 @@ str_sparse_vector::str_sparse_vector( //--------------------------------------------------------------------- template -void str_sparse_vector::swap(str_sparse_vector& str_sv) BMNOEXEPT +void str_sparse_vector::swap( + str_sparse_vector& str_sv) BMNOEXCEPT { parent_type::swap(str_sv); bm::xor_swap(remap_flags_, str_sv.remap_flags_); @@ -1287,7 +1293,7 @@ void str_sparse_vector::insert_value_no_null( template typename str_sparse_vector::size_type str_sparse_vector::get( - size_type idx, value_type* str, size_type buf_size) const + size_type idx, value_type* str, size_type buf_size) const BMNOEXCEPT { size_type i = 0; for (; i < MAX_STR_SIZE; ++i) @@ -1330,7 +1336,8 @@ void str_sparse_vector::optimize( template void str_sparse_vector::calc_stat( - struct str_sparse_vector::statistics* st) const + struct str_sparse_vector::statistics* st + ) const BMNOEXCEPT { BM_ASSERT(st); typename bvector_type::statistics stbv; @@ -1362,7 +1369,7 @@ void str_sparse_vector::calc_stat( template int str_sparse_vector::compare( size_type idx, - const value_type* str) const + const value_type* str) const BMNOEXCEPT { BM_ASSERT(str); int res = 0; @@ -1390,7 +1397,7 @@ int str_sparse_vector::compare( template unsigned str_sparse_vector::common_prefix_length( - size_type idx1, size_type idx2) const + size_type idx1, size_type idx2) const BMNOEXCEPT { unsigned i = 0; for (; i < MAX_STR_SIZE; ++i) @@ -1416,8 +1423,9 @@ unsigned str_sparse_vector::common_prefix_length( template bool -str_sparse_vector::find_rank(size_type rank, - size_type& pos) +str_sparse_vector::find_rank( + size_type rank, + size_type& pos) BMNOEXCEPT { BM_ASSERT(rank); pos = rank - 1; @@ -1428,7 +1436,8 @@ str_sparse_vector::find_rank(size_type rank, template typename str_sparse_vector::size_type -str_sparse_vector::effective_max_str() const +str_sparse_vector::effective_max_str() + const BMNOEXCEPT { for (int i = MAX_STR_SIZE-1; i >= 0; --i) { @@ -1446,7 +1455,7 @@ str_sparse_vector::effective_max_str() const template void str_sparse_vector::calc_octet_stat( - plain_octet_matrix_type& octet_matrix) const + plain_octet_matrix_type& octet_matrix) const BMNOEXCEPT { octet_matrix.init(); octet_matrix.set_zero(); @@ -1531,10 +1540,10 @@ void str_sparse_vector::recalc_remap_matrix2() template bool str_sparse_vector::remap_tosv( - value_type* sv_str, - size_type buf_size, - const value_type* str, - const plain_octet_matrix_type& octet_remap_matrix2) + value_type* BMRESTRICT sv_str, + size_type buf_size, + const value_type* BMRESTRICT str, + const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix2) BMNOEXCEPT { for (unsigned i = 0; i < buf_size; ++i) { @@ -1559,10 +1568,11 @@ bool str_sparse_vector::remap_tosv( template bool str_sparse_vector::remap_fromsv( - value_type* str, - size_type buf_size, - const value_type* sv_str, - const plain_octet_matrix_type& octet_remap_matrix1) + value_type* BMRESTRICT str, + size_type buf_size, + const value_type* BMRESTRICT sv_str, + const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix1 + ) BMNOEXCEPT { for (unsigned i = 0; i < buf_size; ++i) { @@ -1586,7 +1596,8 @@ bool str_sparse_vector::remap_fromsv( //--------------------------------------------------------------------- template -void str_sparse_vector::remap_from(const str_sparse_vector& str_sv) +void +str_sparse_vector::remap_from(const str_sparse_vector& str_sv) { if (str_sv.is_remap()) { @@ -1639,7 +1650,7 @@ void str_sparse_vector::sync(bool /*force*/) template bool str_sparse_vector::equal( const str_sparse_vector& sv, - bm::null_support null_able) const + bm::null_support null_able) const BMNOEXCEPT { // at this point both vectors should have the same remap settings // to be considered "equal". @@ -1686,7 +1697,7 @@ void str_sparse_vector::copy_range( template typename str_sparse_vector::const_iterator -str_sparse_vector::begin() const +str_sparse_vector::begin() const BMNOEXCEPT { typedef typename str_sparse_vector::const_iterator it_type; @@ -1696,7 +1707,7 @@ str_sparse_vector::begin() const //--------------------------------------------------------------------- template -void str_sparse_vector::clear() BMNOEXEPT +void str_sparse_vector::clear() BMNOEXCEPT { parent_type::clear(); } @@ -1736,7 +1747,7 @@ void str_sparse_vector::throw_bad_value( template -str_sparse_vector::const_iterator::const_iterator() +str_sparse_vector::const_iterator::const_iterator() BMNOEXCEPT : sv_(0), pos_(bm::id_max), pos_in_buf_(~size_type(0)) {} @@ -1744,7 +1755,7 @@ str_sparse_vector::const_iterator::const_iterator() template str_sparse_vector::const_iterator::const_iterator( - const str_sparse_vector::const_iterator& it) + const str_sparse_vector::const_iterator& it) BMNOEXCEPT : sv_(it.sv_), pos_(it.pos_), pos_in_buf_(~size_type(0)) {} @@ -1752,7 +1763,7 @@ str_sparse_vector::const_iterator::const_iterator( template str_sparse_vector::const_iterator::const_iterator( - const str_sparse_vector* sv) + const str_sparse_vector* sv) BMNOEXCEPT : sv_(sv), pos_(sv->empty() ? bm::id_max : 0), pos_in_buf_(~size_type(0)) {} @@ -1761,7 +1772,7 @@ str_sparse_vector::const_iterator::const_iterator( template str_sparse_vector::const_iterator::const_iterator( const str_sparse_vector* sv, - typename str_sparse_vector::size_type pos) + typename str_sparse_vector::size_type pos) BMNOEXCEPT : sv_(sv), pos_(pos >= sv->size() ? bm::id_max : pos), pos_in_buf_(~size_type(0)) {} @@ -1769,7 +1780,7 @@ str_sparse_vector::const_iterator::const_iterator( template const typename str_sparse_vector::value_type* -str_sparse_vector::const_iterator::value() const +str_sparse_vector::const_iterator::value() const BMNOEXCEPT { BM_ASSERT(sv_); BM_ASSERT(this->valid()); @@ -1791,8 +1802,10 @@ str_sparse_vector::const_iterator::value() const //--------------------------------------------------------------------- template -void str_sparse_vector::const_iterator::go_to( - typename str_sparse_vector::size_type pos) +void +str_sparse_vector::const_iterator::go_to( + typename str_sparse_vector::size_type pos + ) BMNOEXCEPT { pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos; pos_in_buf_ = ~size_type(0); @@ -1801,7 +1814,8 @@ void str_sparse_vector::const_iterator::go_to( //--------------------------------------------------------------------- template -void str_sparse_vector::const_iterator::advance() +void +str_sparse_vector::const_iterator::advance() BMNOEXCEPT { if (pos_ == bm::id_max) // nothing to do, we are at the end return; @@ -1825,7 +1839,7 @@ void str_sparse_vector::const_iterator::advance() //--------------------------------------------------------------------- template -str_sparse_vector::back_insert_iterator::back_insert_iterator() +str_sparse_vector::back_insert_iterator::back_insert_iterator() BMNOEXCEPT : sv_(0), bv_null_(0), pos_in_buf_(~size_type(0)), prev_nb_(0) {} @@ -1833,7 +1847,7 @@ str_sparse_vector::back_insert_iterator::back_insert template str_sparse_vector::back_insert_iterator::back_insert_iterator( - str_sparse_vector* sv) + str_sparse_vector* sv) BMNOEXCEPT : sv_(sv), pos_in_buf_(~size_type(0)) { if (sv) @@ -1851,7 +1865,7 @@ str_sparse_vector::back_insert_iterator::back_insert template str_sparse_vector::back_insert_iterator::back_insert_iterator( -const str_sparse_vector::back_insert_iterator& bi) +const str_sparse_vector::back_insert_iterator& bi) BMNOEXCEPT : sv_(bi.sv_), bv_null_(bi.bv_null_), pos_in_buf_(~size_type(0)), prev_nb_(bi.prev_nb_) { BM_ASSERT(bi.empty()); @@ -1868,7 +1882,9 @@ str_sparse_vector::back_insert_iterator::~back_inser //--------------------------------------------------------------------- template -bool str_sparse_vector::back_insert_iterator::empty() const +bool +str_sparse_vector::back_insert_iterator::empty() + const BMNOEXCEPT { return (pos_in_buf_ == ~size_type(0) || !sv_); } diff --git a/c++/include/util/bitset/bmtimer.h b/c++/include/util/bitset/bmtimer.h index 3c02262d..d8d34f77 100644 --- a/c++/include/util/bitset/bmtimer.h +++ b/c++/include/util/bitset/bmtimer.h @@ -46,7 +46,7 @@ public: std::chrono::duration duration; unsigned repeats; - statistics() : repeats(1) {} + statistics() : duration(0), repeats(1) {} statistics(std::chrono::duration d, unsigned r) : duration(d), repeats(r) @@ -147,7 +147,13 @@ public: if (ms > 1000) { double sec = ms / 1000; - std::cout << it->first << "; " << std::setprecision(4) << sec << " sec" << std::endl; + if (sec > 60) + { + double min = sec / 60; + std::cout << it->first << "; " << std::setprecision(4) << min << " min" << std::endl; + } + else + std::cout << it->first << "; " << std::setprecision(4) << sec << " sec" << std::endl; } else std::cout << it->first << "; " << it->second.duration.count() << " ms" << std::endl; diff --git a/c++/include/util/bitset/bmundef.h b/c++/include/util/bitset/bmundef.h index 324a7c23..7a1796a0 100644 --- a/c++/include/util/bitset/bmundef.h +++ b/c++/include/util/bitset/bmundef.h @@ -72,7 +72,10 @@ For more information please visit: http://bitmagic.io #undef VECT_ARR_BLOCK_LOOKUP #undef VECT_SET_BLOCK_BITS + #undef VECT_BLOCK_CHANGE +#undef VECT_BLOCK_CHANGE_BC + #undef VECT_BIT_TO_GAP #undef VECT_AND_DIGEST @@ -80,7 +83,12 @@ For more information please visit: http://bitmagic.io #undef VECT_AND_DIGEST_5WAY #undef VECT_BLOCK_SET_DIGEST +#undef VECT_BLOCK_XOR_CHANGE +#undef VECT_BIT_BLOCK_XOR + +#undef VECT_BIT_FIND_FIRST #undef VECT_BIT_FIND_DIFF +#undef VECT_GAP_BFIND #undef BMI1_SELECT64 #undef BMI2_SELECT64 diff --git a/c++/include/util/bitset/bmutil.h b/c++/include/util/bitset/bmutil.h index 7dba20db..60918615 100644 --- a/c++/include/util/bitset/bmutil.h +++ b/c++/include/util/bitset/bmutil.h @@ -94,22 +94,34 @@ namespace bm bm::word_t* end() { return (b_.w32 + bm::set_block_size); } }; - /** Get minimum of 2 values */ template -T min_value(T v1, T v2) +T min_value(T v1, T v2) BMNOEXCEPT { return v1 < v2 ? v1 : v2; } +/** + \brief ad-hoc conditional expressions + \internal +*/ +template struct conditional +{ + static bool test() { return true; } +}; +template <> struct conditional +{ + static bool test() { return false; } +}; + /** Fast loop-less function to find LOG2 */ template -T ilog2(T x) +T ilog2(T x) BMNOEXCEPT { unsigned int l = 0; @@ -122,7 +134,7 @@ T ilog2(T x) } template<> -inline bm::gap_word_t ilog2(gap_word_t x) +inline bm::gap_word_t ilog2(gap_word_t x) BMNOEXCEPT { unsigned int l = 0; if (x >= 1<<8) { x = (bm::gap_word_t)(x >> 8); l |= 8; } @@ -140,7 +152,7 @@ template class ptr_guard { public: - ptr_guard(T* p) : ptr_(p) {} + ptr_guard(T* p) BMNOEXCEPT : ptr_(p) {} ~ptr_guard() { delete ptr_; } private: ptr_guard(const ptr_guard& p); @@ -154,8 +166,7 @@ private: @ingroup bitfunc @internal */ -inline -unsigned count_leading_zeros(unsigned x) +inline unsigned count_leading_zeros(unsigned x) BMNOEXCEPT { unsigned n = (x >= (1U << 16)) ? @@ -171,7 +182,7 @@ unsigned count_leading_zeros(unsigned x) @internal */ inline -unsigned count_trailing_zeros(unsigned v) +unsigned count_trailing_zeros(unsigned v) BMNOEXCEPT { // (v & -v) isolates the last set bit return unsigned(bm::tzcnt_table::_lut[(-v & v) % 37]); @@ -181,7 +192,7 @@ unsigned count_trailing_zeros(unsigned v) Lookup table based integer LOG2 */ template -T ilog2_LUT(T x) +T ilog2_LUT(T x) BMNOEXCEPT { unsigned l = 0; if (x & 0xffff0000) @@ -200,7 +211,7 @@ T ilog2_LUT(T x) Lookup table based short integer LOG2 */ template<> -inline bm::gap_word_t ilog2_LUT(bm::gap_word_t x) +inline bm::gap_word_t ilog2_LUT(bm::gap_word_t x) BMNOEXCEPT { bm::gap_word_t l = 0; if (x & 0xff00) @@ -218,7 +229,7 @@ inline bm::gap_word_t ilog2_LUT(bm::gap_word_t x) #ifdef __GNUG__ BMFORCEINLINE -unsigned bsf_asm32(unsigned int v) +unsigned bsf_asm32(unsigned int v) BMNOEXCEPT { unsigned r; asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) ); @@ -226,7 +237,7 @@ unsigned bsf_asm32(unsigned int v) } BMFORCEINLINE -unsigned bsr_asm32(unsigned int v) +unsigned bsr_asm32(unsigned int v) BMNOEXCEPT { unsigned r; asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) ); @@ -240,7 +251,7 @@ unsigned bsr_asm32(unsigned int v) #if defined(_M_AMD64) || defined(_M_X64) // inline assembly not supported BMFORCEINLINE -unsigned int bsr_asm32(unsigned int value) +unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT { unsigned long r; _BitScanReverse(&r, value); @@ -248,7 +259,7 @@ unsigned int bsr_asm32(unsigned int value) } BMFORCEINLINE -unsigned int bsf_asm32(unsigned int value) +unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT { unsigned long r; _BitScanForward(&r, value); @@ -258,13 +269,13 @@ unsigned int bsf_asm32(unsigned int value) #else BMFORCEINLINE -unsigned int bsr_asm32(unsigned int value) +unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT { __asm bsr eax, value } BMFORCEINLINE -unsigned int bsf_asm32(unsigned int value) +unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT { __asm bsf eax, value } @@ -280,14 +291,14 @@ unsigned int bsf_asm32(unsigned int value) // http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8562 // template -T bit_scan_fwd(T v) +T bit_scan_fwd(T v) BMNOEXCEPT { return DeBruijn_bit_position::_multiply[(((v & -v) * 0x077CB531U)) >> 27]; } inline -unsigned bit_scan_reverse32(unsigned value) +unsigned bit_scan_reverse32(unsigned value) BMNOEXCEPT { BM_ASSERT(value); #if defined(BM_USE_GCC_BUILD) @@ -302,7 +313,7 @@ unsigned bit_scan_reverse32(unsigned value) } inline -unsigned bit_scan_forward32(unsigned value) +unsigned bit_scan_forward32(unsigned value) BMNOEXCEPT { BM_ASSERT(value); #if defined(BM_USE_GCC_BUILD) @@ -318,7 +329,7 @@ unsigned bit_scan_forward32(unsigned value) BMFORCEINLINE -unsigned long long bmi_bslr_u64(unsigned long long w) +unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT { #if defined(BMAVX2OPT) || defined (BMAVX512OPT) return _blsr_u64(w); @@ -339,7 +350,7 @@ unsigned long long bmi_blsi_u64(unsigned long long w) /// 64-bit bit-scan reverse inline -unsigned count_leading_zeros_u64(bm::id64_t w) +unsigned count_leading_zeros_u64(bm::id64_t w) BMNOEXCEPT { BM_ASSERT(w); #if defined(BMAVX2OPT) || defined (BMAVX512OPT) @@ -367,7 +378,7 @@ unsigned count_leading_zeros_u64(bm::id64_t w) /// 64-bit bit-scan fwd inline -unsigned count_trailing_zeros_u64(bm::id64_t w) +unsigned count_trailing_zeros_u64(bm::id64_t w) BMNOEXCEPT { BM_ASSERT(w); @@ -396,6 +407,72 @@ unsigned count_trailing_zeros_u64(bm::id64_t w) +/*! + Returns BSR value + @ingroup bitfunc +*/ +template +unsigned bit_scan_reverse(T value) BMNOEXCEPT +{ + BM_ASSERT(value); + + if (bm::conditional::test()) + { + #if defined(BM_USE_GCC_BUILD) + return (unsigned) (63 - __builtin_clzll(value)); + #else + bm::id64_t v8 = value; + v8 >>= 32; + unsigned v = (unsigned)v8; + if (v) + { + v = bm::bit_scan_reverse32(v); + return v + 32; + } + #endif + } + return bm::bit_scan_reverse32((unsigned)value); +} + +/*! \brief and functor + \internal + */ +struct and_func +{ + static + BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2 + { return v1 & v2; } +}; +/*! \brief xor functor + \internal + */ +struct xor_func +{ + static + BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2 + { return v1 ^ v2; } +}; +/*! \brief or functor + \internal + */ +struct or_func +{ + static + BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2 + { return v1 | v2; } +}; +/*! \brief sub functor + \internal + */ +struct sub_func +{ + static + BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2 + { return v1 & ~v2; } +}; + + + #ifdef __GNUG__ #pragma GCC diagnostic pop #endif diff --git a/c++/include/util/bitset/bmxor.h b/c++/include/util/bitset/bmxor.h index b5bd1a4e..60173042 100644 --- a/c++/include/util/bitset/bmxor.h +++ b/c++/include/util/bitset/bmxor.h @@ -36,7 +36,7 @@ namespace bm inline unsigned bit_block_xor_change32(const bm::word_t* BMRESTRICT block, const bm::word_t* BMRESTRICT xor_block, - unsigned size) + unsigned size) BMNOEXCEPT { unsigned gap_count = 1; @@ -80,7 +80,7 @@ unsigned bit_block_xor_change32(const bm::word_t* BMRESTRICT block, inline unsigned bit_block_xor_change(const bm::word_t* BMRESTRICT block, const bm::word_t* BMRESTRICT xor_block, - unsigned size) + unsigned size) BMNOEXCEPT { #ifdef VECT_BLOCK_XOR_CHANGE return VECT_BLOCK_XOR_CHANGE(block, xor_block, size); @@ -112,7 +112,7 @@ struct block_waves_xor_descr inline void compute_complexity_descr( const bm::word_t* BMRESTRICT block, - block_waves_xor_descr& BMRESTRICT x_descr) + block_waves_xor_descr& BMRESTRICT x_descr) BMNOEXCEPT { for (unsigned i = 0; i < bm::block_waves; ++i) { @@ -146,7 +146,7 @@ bm::id64_t compute_xor_complexity_descr( const bm::word_t* BMRESTRICT block, const bm::word_t* BMRESTRICT xor_block, block_waves_xor_descr& BMRESTRICT x_descr, - unsigned& block_gain) + unsigned& BMRESTRICT block_gain) BMNOEXCEPT { block_gain = 0; // approximate block gain (sum of sub-waves) bm::id64_t digest = 0; @@ -191,7 +191,7 @@ bm::id64_t compute_xor_complexity_descr( inline void bit_block_xor(bm::word_t* target_block, const bm::word_t* block, const bm::word_t* xor_block, - bm::id64_t digest) + bm::id64_t digest) BMNOEXCEPT { BM_ASSERT(target_block); BM_ASSERT(block); @@ -267,21 +267,23 @@ public: } /// Get reference list size - size_type size() const { return (size_type)ref_bvects_.size(); } + size_type size() const BMNOEXCEPT { return (size_type)ref_bvects_.size(); } /// Get reference vector by the index in this ref-vector - const bvector_type* get_bv(size_type idx) const { return ref_bvects_[idx]; } + const bvector_type* get_bv(size_type idx) const BMNOEXCEPT + { return ref_bvects_[idx]; } /// Get reference row index by the index in this ref-vector - size_type get_row_idx(size_type idx) const { return (size_type)ref_bvects_rows_[idx]; } + size_type get_row_idx(size_type idx) const BMNOEXCEPT + { return (size_type)ref_bvects_rows_[idx]; } /// not-found value for find methods static - size_type not_found() { return ~(size_type(0)); } + size_type not_found() BMNOEXCEPT { return ~(size_type(0)); } /// Find vector index by the reference index /// @return ~0 if not found - size_type find(std::size_t ref_idx) const + size_type find(std::size_t ref_idx) const BMNOEXCEPT { size_type sz = size(); for (size_type i = 0; i < sz; ++i) @@ -333,13 +335,16 @@ public: typedef typename bvector_type::size_type size_type; public: - void set_ref_vector(const bv_ref_vector_type* ref_vect) { ref_vect_ = ref_vect; } - const bv_ref_vector_type& get_ref_vector() const { return *ref_vect_; } + void set_ref_vector(const bv_ref_vector_type* ref_vect) BMNOEXCEPT + { ref_vect_ = ref_vect; } + + const bv_ref_vector_type& get_ref_vector() const BMNOEXCEPT + { return *ref_vect_; } /** Compute statistics for the anchor search vector @param block - bit-block target */ - void compute_x_block_stats(const bm::word_t* block); + void compute_x_block_stats(const bm::word_t* block) BMNOEXCEPT; /** Scan for all candidate bit-blocks to find mask or match @return true if XOR complement or matching vector found @@ -360,23 +365,26 @@ public: /** Validate serialization target */ - bool validate_found(bm::word_t* xor_block, const bm::word_t* block) const; + bool validate_found(bm::word_t* xor_block, + const bm::word_t* block) const BMNOEXCEPT; - size_type found_ridx() const { return found_ridx_; } - const bm::word_t* get_found_block() const { return found_block_xor_; } - unsigned get_x_best_metric() const { return x_best_metric_; } - bm::id64_t get_xor_digest() const { return x_d64_; } + size_type found_ridx() const BMNOEXCEPT { return found_ridx_; } + const bm::word_t* get_found_block() const BMNOEXCEPT + { return found_block_xor_; } + unsigned get_x_best_metric() const BMNOEXCEPT { return x_best_metric_; } + bm::id64_t get_xor_digest() const BMNOEXCEPT { return x_d64_; } /// true if completely identical vector found - bool is_eq_found() const { return !x_best_metric_; } + bool is_eq_found() const BMNOEXCEPT { return !x_best_metric_; } - unsigned get_x_bc() const { return x_bc_; } - unsigned get_x_gc() const { return x_gc_; } - unsigned get_x_block_best() const { return x_block_best_metric_; } + unsigned get_x_bc() const BMNOEXCEPT { return x_bc_; } + unsigned get_x_gc() const BMNOEXCEPT { return x_gc_; } + unsigned get_x_block_best() const BMNOEXCEPT + { return x_block_best_metric_; } - bm::block_waves_xor_descr& get_descr() { return x_descr_; } + bm::block_waves_xor_descr& get_descr() BMNOEXCEPT { return x_descr_; } private: const bv_ref_vector_type* ref_vect_ = 0; ///< ref.vect for XOR filter @@ -400,7 +408,7 @@ private: // -------------------------------------------------------------------------- template -void xor_scanner::compute_x_block_stats(const bm::word_t* block) +void xor_scanner::compute_x_block_stats(const bm::word_t* block) BMNOEXCEPT { BM_ASSERT(IS_VALID_ADDR(block)); BM_ASSERT(!BM_IS_GAP(block)); @@ -439,7 +447,8 @@ bool xor_scanner::search_best_xor_mask(const bm::word_t* block, { const bvector_type* bv = ref_vect_->get_bv(ri); BM_ASSERT(bv); - const typename bvector_type::blocks_manager_type& bman = bv->get_blocks_manager(); + const typename bvector_type::blocks_manager_type& bman = + bv->get_blocks_manager(); const bm::word_t* block_xor = bman.get_block_ptr(i, j); if (!IS_VALID_ADDR(block_xor) || BM_IS_GAP(block_xor)) continue; @@ -487,7 +496,7 @@ bool xor_scanner::search_best_xor_mask(const bm::word_t* block, if (!xor_bc) // completely identical block? { unsigned pos; - bool f = bit_find_first_diff(block, block_xor, &pos); + bool f = bm::bit_find_first_diff(block, block_xor, &pos); x_best_metric_ += f; } } @@ -555,7 +564,7 @@ bool xor_scanner::search_best_xor_gap(const bm::word_t* block, template bool xor_scanner::validate_found(bm::word_t* xor_block, - const bm::word_t* block) const + const bm::word_t* block) const BMNOEXCEPT { bm::id64_t d64 = get_xor_digest(); BM_ASSERT(d64); diff --git a/c++/include/util/bitset/encoding.h b/c++/include/util/bitset/encoding.h index b30ac412..19c9112d 100644 --- a/c++/include/util/bitset/encoding.h +++ b/c++/include/util/bitset/encoding.h @@ -51,24 +51,24 @@ class encoder public: typedef unsigned char* position_type; public: - encoder(unsigned char* buf, size_t size); - void put_8(unsigned char c); - void put_16(bm::short_t s); - void put_16(const bm::short_t* s, unsigned count); - void put_24(bm::word_t w); - void put_32(bm::word_t w); - void put_32(const bm::word_t* w, unsigned count); - void put_48(bm::id64_t w); - void put_64(bm::id64_t w); + encoder(unsigned char* buf, size_t size) BMNOEXCEPT; + void put_8(unsigned char c) BMNOEXCEPT; + void put_16(bm::short_t s) BMNOEXCEPT; + void put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT; + void put_24(bm::word_t w) BMNOEXCEPT; + void put_32(bm::word_t w) BMNOEXCEPT; + void put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT; + void put_48(bm::id64_t w) BMNOEXCEPT; + void put_64(bm::id64_t w) BMNOEXCEPT; void put_prefixed_array_32(unsigned char c, - const bm::word_t* w, unsigned count); + const bm::word_t* w, unsigned count) BMNOEXCEPT; void put_prefixed_array_16(unsigned char c, const bm::short_t* s, unsigned count, - bool encode_count); - void memcpy(const unsigned char* src, size_t count); - size_t size() const; - unsigned char* get_pos() const; - void set_pos(unsigned char* buf_pos); + bool encode_count) BMNOEXCEPT; + void memcpy(const unsigned char* src, size_t count) BMNOEXCEPT; + size_t size() const BMNOEXCEPT; + unsigned char* get_pos() const BMNOEXCEPT; + void set_pos(unsigned char* buf_pos) BMNOEXCEPT; private: unsigned char* buf_; unsigned char* start_; @@ -83,25 +83,25 @@ private: class decoder_base { public: - decoder_base(const unsigned char* buf) { buf_ = start_ = buf; } + decoder_base(const unsigned char* buf) BMNOEXCEPT { buf_ = start_ = buf; } /// Reads character from the decoding buffer. - unsigned char get_8() { return *buf_++; } + unsigned char get_8() BMNOEXCEPT { return *buf_++; } /// Returns size of the current decoding stream. - size_t size() const { return size_t(buf_ - start_); } + size_t size() const BMNOEXCEPT { return size_t(buf_ - start_); } /// change current position - void seek(int delta) { buf_ += delta; } + void seek(int delta) BMNOEXCEPT { buf_ += delta; } /// read bytes from the decode buffer - void memcpy(unsigned char* dst, size_t count); + void memcpy(unsigned char* dst, size_t count) BMNOEXCEPT; /// Return current buffer pointer - const unsigned char* get_pos() const { return buf_; } + const unsigned char* get_pos() const BMNOEXCEPT { return buf_; } /// Set current buffer pointer - void set_pos(const unsigned char* pos) { buf_ = pos; } + void set_pos(const unsigned char* pos) BMNOEXCEPT { buf_ = pos; } protected: const unsigned char* buf_; const unsigned char* start_; @@ -117,16 +117,16 @@ protected: class decoder : public decoder_base { public: - decoder(const unsigned char* buf); - bm::short_t get_16(); - bm::word_t get_24(); - bm::word_t get_32(); - bm::id64_t get_48(); - bm::id64_t get_64(); - void get_32(bm::word_t* w, unsigned count); - bool get_32_OR(bm::word_t* w, unsigned count); - void get_32_AND(bm::word_t* w, unsigned count); - void get_16(bm::short_t* s, unsigned count); + decoder(const unsigned char* buf) BMNOEXCEPT; + bm::short_t get_16() BMNOEXCEPT; + bm::word_t get_24() BMNOEXCEPT; + bm::word_t get_32() BMNOEXCEPT; + bm::id64_t get_48() BMNOEXCEPT; + bm::id64_t get_64() BMNOEXCEPT; + void get_32(bm::word_t* w, unsigned count) BMNOEXCEPT; + bool get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT; + void get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT; + void get_16(bm::short_t* s, unsigned count) BMNOEXCEPT; }; // ---------------------------------------------------------------- @@ -181,23 +181,23 @@ public: ~bit_out() { flush(); } /// issue single bit into encode bit-stream - void put_bit(unsigned value); + void put_bit(unsigned value) BMNOEXCEPT; /// issue count bits out of value - void put_bits(unsigned value, unsigned count); + void put_bits(unsigned value, unsigned count) BMNOEXCEPT; /// issue 0 into output stream - void put_zero_bit(); + void put_zero_bit() BMNOEXCEPT; /// issue specified number of 0s - void put_zero_bits(unsigned count); + void put_zero_bits(unsigned count) BMNOEXCEPT; /// Elias Gamma encode the specified value - void gamma(unsigned value); + void gamma(unsigned value) BMNOEXCEPT; /// Binary Interpolative array decode void bic_encode_u16(const bm::gap_word_t* arr, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi) + bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT { bic_encode_u16_cm(arr, sz, lo, hi); } @@ -205,24 +205,24 @@ public: /// Binary Interpolative encoding (array of 16-bit ints) void bic_encode_u16_rg(const bm::gap_word_t* arr, unsigned sz, bm::gap_word_t lo, - bm::gap_word_t hi); + bm::gap_word_t hi) BMNOEXCEPT; /// Binary Interpolative encoding (array of 16-bit ints) /// cm - "center-minimal" void bic_encode_u16_cm(const bm::gap_word_t* arr, unsigned sz, bm::gap_word_t lo, - bm::gap_word_t hi); + bm::gap_word_t hi) BMNOEXCEPT; /// Binary Interpolative encoding (array of 32-bit ints) /// cm - "center-minimal" void bic_encode_u32_cm(const bm::word_t* arr, unsigned sz, - bm::word_t lo, bm::word_t hi); + bm::word_t lo, bm::word_t hi) BMNOEXCEPT; /// Flush the incomplete 32-bit accumulator word - void flush() { if (used_bits_) flush_accum(); } + void flush() BMNOEXCEPT { if (used_bits_) flush_accum(); } private: - void flush_accum() + void flush_accum() BMNOEXCEPT { dest_.put_32(accum_); used_bits_ = accum_ = 0; @@ -248,31 +248,32 @@ template class bit_in { public: - bit_in(TDecoder& decoder) + bit_in(TDecoder& decoder) BMNOEXCEPT : src_(decoder), used_bits_(unsigned(sizeof(accum_) * 8)), - accum_(0) + accum_(0) {} /// decode unsigned value using Elias Gamma coding - unsigned gamma(); + unsigned gamma() BMNOEXCEPT; /// read number of bits out of the stream - unsigned get_bits(unsigned count); + unsigned get_bits(unsigned count) BMNOEXCEPT; /// Binary Interpolative array decode void bic_decode_u16(bm::gap_word_t* arr, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi) + bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT { bic_decode_u16_cm(arr, sz, lo, hi); } void bic_decode_u16_bitset(bm::word_t* block, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi) + bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT { bic_decode_u16_cm_bitset(block, sz, lo, hi); } - void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) + void bic_decode_u16_dry(unsigned sz, + bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT { bic_decode_u16_cm_dry(sz, lo, hi); } @@ -280,29 +281,32 @@ public: /// Binary Interpolative array decode void bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi); + bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT; /// Binary Interpolative array decode void bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi); + bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT; /// Binary Interpolative array decode (32-bit) void bic_decode_u32_cm(bm::word_t* arr, unsigned sz, - bm::word_t lo, bm::word_t hi); + bm::word_t lo, bm::word_t hi) BMNOEXCEPT; /// Binary Interpolative array decode into bitset (32-bit based) void bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi); + bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT; /// Binary Interpolative array decode into /dev/null - void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi); + void bic_decode_u16_rg_dry(unsigned sz, + bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT; /// Binary Interpolative array decode into bitset (32-bit based) void bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi); + bm::gap_word_t lo, + bm::gap_word_t hi) BMNOEXCEPT; /// Binary Interpolative array decode into /dev/null - void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi); + void bic_decode_u16_cm_dry(unsigned sz, + bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT; private: bit_in(const bit_in&); @@ -377,7 +381,7 @@ private: \param buf - memory buffer pointer. \param size - size of the buffer */ -inline encoder::encoder(unsigned char* buf, size_t a_size) +inline encoder::encoder(unsigned char* buf, size_t a_size) BMNOEXCEPT : buf_(buf), start_(buf) { size_ = a_size; @@ -387,7 +391,7 @@ inline encoder::encoder(unsigned char* buf, size_t a_size) */ inline void encoder::put_prefixed_array_32(unsigned char c, const bm::word_t* w, - unsigned count) + unsigned count) BMNOEXCEPT { put_8(c); put_32(w, count); @@ -399,7 +403,7 @@ inline void encoder::put_prefixed_array_32(unsigned char c, inline void encoder::put_prefixed_array_16(unsigned char c, const bm::short_t* s, unsigned count, - bool encode_count) + bool encode_count) BMNOEXCEPT { put_8(c); if (encode_count) @@ -413,7 +417,7 @@ inline void encoder::put_prefixed_array_16(unsigned char c, \brief Puts one character into the encoding buffer. \param c - character to encode */ -BMFORCEINLINE void encoder::put_8(unsigned char c) +BMFORCEINLINE void encoder::put_8(unsigned char c) BMNOEXCEPT { *buf_++ = c; } @@ -423,7 +427,7 @@ BMFORCEINLINE void encoder::put_8(unsigned char c) \brief Puts short word (16 bits) into the encoding buffer. \param s - short word to encode */ -BMFORCEINLINE void encoder::put_16(bm::short_t s) +BMFORCEINLINE void encoder::put_16(bm::short_t s) BMNOEXCEPT { #if (BM_UNALIGNED_ACCESS_OK == 1) ::memcpy(buf_, &s, sizeof(bm::short_t)); // optimizer takes care of it @@ -438,7 +442,7 @@ BMFORCEINLINE void encoder::put_16(bm::short_t s) /*! \brief Method puts array of short words (16 bits) into the encoding buffer. */ -inline void encoder::put_16(const bm::short_t* s, unsigned count) +inline void encoder::put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT { #if (BM_UNALIGNED_ACCESS_OK == 1) ::memcpy(buf_, s, sizeof(bm::short_t)*count); @@ -465,7 +469,7 @@ inline void encoder::put_16(const bm::short_t* s, unsigned count) \brief copy bytes into target buffer or just rewind if src is NULL */ inline -void encoder::memcpy(const unsigned char* src, size_t count) +void encoder::memcpy(const unsigned char* src, size_t count) BMNOEXCEPT { BM_ASSERT((buf_ + count) < (start_ + size_)); if (src) @@ -478,7 +482,7 @@ void encoder::memcpy(const unsigned char* src, size_t count) \fn unsigned encoder::size() const \brief Returns size of the current encoding stream. */ -inline size_t encoder::size() const +inline size_t encoder::size() const BMNOEXCEPT { return size_t(buf_ - start_); } @@ -486,7 +490,7 @@ inline size_t encoder::size() const /** \brief Get current memory stream position */ -inline encoder::position_type encoder::get_pos() const +inline encoder::position_type encoder::get_pos() const BMNOEXCEPT { return buf_; } @@ -494,7 +498,7 @@ inline encoder::position_type encoder::get_pos() const /** \brief Set current memory stream position */ -inline void encoder::set_pos(encoder::position_type buf_pos) +inline void encoder::set_pos(encoder::position_type buf_pos) BMNOEXCEPT { buf_ = buf_pos; } @@ -504,7 +508,7 @@ inline void encoder::set_pos(encoder::position_type buf_pos) \brief Puts 24 bits word into encoding buffer. \param w - word to encode. */ -inline void encoder::put_24(bm::word_t w) +inline void encoder::put_24(bm::word_t w) BMNOEXCEPT { BM_ASSERT((w & ~(0xFFFFFFU)) == 0); @@ -520,7 +524,7 @@ inline void encoder::put_24(bm::word_t w) \brief Puts 32 bits word into encoding buffer. \param w - word to encode. */ -inline void encoder::put_32(bm::word_t w) +inline void encoder::put_32(bm::word_t w) BMNOEXCEPT { #if (BM_UNALIGNED_ACCESS_OK == 1) ::memcpy(buf_, &w, sizeof(bm::word_t)); @@ -538,7 +542,7 @@ inline void encoder::put_32(bm::word_t w) \brief Puts 48 bits word into encoding buffer. \param w - word to encode. */ -inline void encoder::put_48(bm::id64_t w) +inline void encoder::put_48(bm::id64_t w) BMNOEXCEPT { BM_ASSERT((w & ~(0xFFFFFFFFFFFFUL)) == 0); *buf_++ = (unsigned char)w; @@ -555,7 +559,7 @@ inline void encoder::put_48(bm::id64_t w) \brief Puts 64 bits word into encoding buffer. \param w - word to encode. */ -inline void encoder::put_64(bm::id64_t w) +inline void encoder::put_64(bm::id64_t w) BMNOEXCEPT { #if (BM_UNALIGNED_ACCESS_OK == 1) ::memcpy(buf_, &w, sizeof(bm::id64_t)); @@ -576,10 +580,10 @@ inline void encoder::put_64(bm::id64_t w) /*! \brief Encodes array of 32-bit words */ -inline -void encoder::put_32(const bm::word_t* w, unsigned count) +inline void encoder::put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT { #if (BM_UNALIGNED_ACCESS_OK == 1) + // use memcpy() because compilers now understand it as an idiom and inline ::memcpy(buf_, w, sizeof(bm::word_t) * count); buf_ += sizeof(bm::word_t) * count; #else @@ -611,7 +615,7 @@ void encoder::put_32(const bm::word_t* w, unsigned count) Load bytes from the decode buffer */ inline -void decoder_base::memcpy(unsigned char* dst, size_t count) +void decoder_base::memcpy(unsigned char* dst, size_t count) BMNOEXCEPT { if (dst) ::memcpy(dst, buf_, count); @@ -623,7 +627,7 @@ void decoder_base::memcpy(unsigned char* dst, size_t count) \brief Construction \param buf - pointer to the decoding memory. */ -inline decoder::decoder(const unsigned char* buf) +inline decoder::decoder(const unsigned char* buf) BMNOEXCEPT : decoder_base(buf) { } @@ -632,7 +636,7 @@ inline decoder::decoder(const unsigned char* buf) \fn bm::short_t decoder::get_16() \brief Reads 16-bit word from the decoding buffer. */ -BMFORCEINLINE bm::short_t decoder::get_16() +BMFORCEINLINE bm::short_t decoder::get_16() BMNOEXCEPT { #if (BM_UNALIGNED_ACCESS_OK == 1) bm::short_t a; @@ -648,7 +652,7 @@ BMFORCEINLINE bm::short_t decoder::get_16() \fn bm::word_t decoder::get_24() \brief Reads 32-bit word from the decoding buffer. */ -inline bm::word_t decoder::get_24() +inline bm::word_t decoder::get_24() BMNOEXCEPT { bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) + ((unsigned)buf_[2] << 16); @@ -661,7 +665,7 @@ inline bm::word_t decoder::get_24() \fn bm::word_t decoder::get_32() \brief Reads 32-bit word from the decoding buffer. */ -BMFORCEINLINE bm::word_t decoder::get_32() +BMFORCEINLINE bm::word_t decoder::get_32() BMNOEXCEPT { #if (BM_UNALIGNED_ACCESS_OK == 1) bm::word_t a; @@ -679,7 +683,7 @@ BMFORCEINLINE bm::word_t decoder::get_32() \brief Reads 64-bit word from the decoding buffer. */ inline -bm::id64_t decoder::get_48() +bm::id64_t decoder::get_48() BMNOEXCEPT { bm::id64_t a = buf_[0] + ((bm::id64_t)buf_[1] << 8) + @@ -696,7 +700,7 @@ bm::id64_t decoder::get_48() \brief Reads 64-bit word from the decoding buffer. */ inline -bm::id64_t decoder::get_64() +bm::id64_t decoder::get_64() BMNOEXCEPT { #if (BM_UNALIGNED_ACCESS_OK == 1) bm::id64_t a; @@ -722,7 +726,7 @@ bm::id64_t decoder::get_64() \param w - pointer on memory block to read into. \param count - size of memory block in words. */ -inline void decoder::get_32(bm::word_t* w, unsigned count) +inline void decoder::get_32(bm::word_t* w, unsigned count) BMNOEXCEPT { if (!w) { @@ -754,7 +758,7 @@ inline void decoder::get_32(bm::word_t* w, unsigned count) \param count - should match bm::set_block_size */ inline -bool decoder::get_32_OR(bm::word_t* w, unsigned count) +bool decoder::get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT { if (!w) { @@ -795,7 +799,7 @@ bool decoder::get_32_OR(bm::word_t* w, unsigned count) \param count - should match bm::set_block_size */ inline -void decoder::get_32_AND(bm::word_t* w, unsigned count) +void decoder::get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT { if (!w) { @@ -833,7 +837,7 @@ void decoder::get_32_AND(bm::word_t* w, unsigned count) \param s - pointer on memory block to read into. \param count - size of memory block in words. */ -inline void decoder::get_16(bm::short_t* s, unsigned count) +inline void decoder::get_16(bm::short_t* s, unsigned count) BMNOEXCEPT { if (!s) { @@ -1004,7 +1008,7 @@ void decoder_little_endian::get_16(bm::short_t* s, unsigned count) // template -void bit_out::put_bit(unsigned value) +void bit_out::put_bit(unsigned value) BMNOEXCEPT { BM_ASSERT(value <= 1); accum_ |= (value << used_bits_); @@ -1015,7 +1019,7 @@ void bit_out::put_bit(unsigned value) // ---------------------------------------------------------------------- template -void bit_out::put_bits(unsigned value, unsigned count) +void bit_out::put_bits(unsigned value, unsigned count) BMNOEXCEPT { unsigned used = used_bits_; unsigned acc = accum_; @@ -1057,7 +1061,7 @@ void bit_out::put_bits(unsigned value, unsigned count) // ---------------------------------------------------------------------- template -void bit_out::put_zero_bit() +void bit_out::put_zero_bit() BMNOEXCEPT { if (++used_bits_ == (sizeof(accum_) * 8)) flush_accum(); @@ -1066,7 +1070,7 @@ void bit_out::put_zero_bit() // ---------------------------------------------------------------------- template -void bit_out::put_zero_bits(unsigned count) +void bit_out::put_zero_bits(unsigned count) BMNOEXCEPT { unsigned used = used_bits_; unsigned free_bits = (sizeof(accum_) * 8) - used; @@ -1096,7 +1100,7 @@ void bit_out::put_zero_bits(unsigned count) // ---------------------------------------------------------------------- template -void bit_out::gamma(unsigned value) +void bit_out::gamma(unsigned value) BMNOEXCEPT { BM_ASSERT(value); @@ -1168,9 +1172,10 @@ void bit_out::gamma(unsigned value) // ---------------------------------------------------------------------- template -void bit_out::bic_encode_u16_rg(const bm::gap_word_t* arr, - unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi) +void bit_out::bic_encode_u16_rg( + const bm::gap_word_t* arr, + unsigned sz, + bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT { for (;sz;) { @@ -1204,7 +1209,8 @@ void bit_out::bic_encode_u16_rg(const bm::gap_word_t* arr, template void bit_out::bic_encode_u32_cm(const bm::word_t* arr, unsigned sz, - bm::word_t lo, bm::word_t hi) + bm::word_t lo, + bm::word_t hi) BMNOEXCEPT { for (;sz;) { @@ -1292,7 +1298,7 @@ template void bit_out::bic_encode_u16_cm(const bm::gap_word_t* arr, unsigned sz_i, bm::gap_word_t lo_i, - bm::gap_word_t hi_i) + bm::gap_word_t hi_i) BMNOEXCEPT { BM_ASSERT(sz_i <= 65535); @@ -1329,7 +1335,8 @@ void bit_out::bic_encode_u16_cm(const bm::gap_word_t* arr, template void bit_out::bic_encode_u16_cm(const bm::gap_word_t* arr, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi) + bm::gap_word_t lo, + bm::gap_word_t hi) BMNOEXCEPT { for (;sz;) { @@ -1379,7 +1386,8 @@ void bit_out::bic_encode_u16_cm(const bm::gap_word_t* arr, template void bit_in::bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi) + bm::gap_word_t lo, + bm::gap_word_t hi) BMNOEXCEPT { for (;sz;) { @@ -1421,7 +1429,8 @@ void bit_in::bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz, template void bit_in::bic_decode_u32_cm(bm::word_t* arr, unsigned sz, - bm::word_t lo, bm::word_t hi) + bm::word_t lo, + bm::word_t hi) BMNOEXCEPT { for (;sz;) { @@ -1472,7 +1481,8 @@ void bit_in::bic_decode_u32_cm(bm::word_t* arr, unsigned sz, template void bit_in::bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi) + bm::gap_word_t lo, + bm::gap_word_t hi) BMNOEXCEPT { for (;sz;) { @@ -1523,7 +1533,8 @@ void bit_in::bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz, template void bit_in::bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi) + bm::gap_word_t lo, + bm::gap_word_t hi) BMNOEXCEPT { for (;sz;) { @@ -1579,7 +1590,8 @@ void bit_in::bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz, template void bit_in::bic_decode_u16_cm_dry(unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi) + bm::gap_word_t lo, + bm::gap_word_t hi) BMNOEXCEPT { for (;sz;) { @@ -1630,7 +1642,8 @@ void bit_in::bic_decode_u16_cm_dry(unsigned sz, template void bit_in::bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi) + bm::gap_word_t lo, + bm::gap_word_t hi) BMNOEXCEPT { for (;sz;) { @@ -1665,7 +1678,7 @@ void bit_in::bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz, if (sz == 1) return; bic_decode_u16_rg_bitset(block, mid_idx, lo, bm::gap_word_t(val - 1)); - // tail recursion: + // tail recursion of: //bic_decode_u16_bitset(block, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi); sz -= mid_idx + 1; lo = bm::gap_word_t(val + 1); @@ -1676,7 +1689,8 @@ void bit_in::bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz, template void bit_in::bic_decode_u16_rg_dry(unsigned sz, - bm::gap_word_t lo, bm::gap_word_t hi) + bm::gap_word_t lo, + bm::gap_word_t hi) BMNOEXCEPT { for (;sz;) { @@ -1705,7 +1719,6 @@ void bit_in::bic_decode_u16_rg_dry(unsigned sz, if (sz == 1) return; bic_decode_u16_rg_dry(mid_idx, lo, bm::gap_word_t(val - 1)); - //bic_decode_u16_dry(sz - mid_idx - 1, bm::gap_word_t(val + 1), hi); sz -= mid_idx + 1; lo = bm::gap_word_t(val + 1); } // for sz @@ -1716,7 +1729,7 @@ void bit_in::bic_decode_u16_rg_dry(unsigned sz, // ---------------------------------------------------------------------- template -unsigned bit_in::gamma() +unsigned bit_in::gamma() BMNOEXCEPT { unsigned acc = accum_; unsigned used = used_bits_; @@ -1801,7 +1814,7 @@ ret: // ---------------------------------------------------------------------- template -unsigned bit_in::get_bits(unsigned count) +unsigned bit_in::get_bits(unsigned count) BMNOEXCEPT { BM_ASSERT(count); const unsigned maskFF = ~0u; diff --git a/c++/include/util/format_guess.hpp b/c++/include/util/format_guess.hpp index d943a764..823af862 100644 --- a/c++/include/util/format_guess.hpp +++ b/c++/include/util/format_guess.hpp @@ -1,7 +1,7 @@ #ifndef FORMATGUESS__HPP #define FORMATGUESS__HPP -/* $Id: format_guess.hpp 596735 2019-11-12 16:36:21Z ludwigf $ +/* $Id: format_guess.hpp 612523 2020-07-23 11:23:30Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -92,6 +92,51 @@ public: eGffAugustus = 34, ///< GFFish output of Augustus Gene Prediction eJSON = 35, ///< JSON ePsl = 36, ///< PSL alignment format + // The following formats are not yet recognized by CFormatGuess - CXX-10039 + eAltGraphX = 37, + eBed5FloatScore = 38, + eBedGraph = 39, + eBedRnaElements = 40, + eBigBarChart = 41, + eBigBed = 42, + eBigPsl = 43, + eBigChain = 44, + eBigMaf = 45, + eBigWig = 46, + eBroadPeak = 47, + eChain = 48, + eClonePos = 49, + eColoredExon = 50, + eCtgPos = 51, + eDownloadsOnly = 52, + eEncodeFiveC = 53, + eExpRatio = 54, + eFactorSource = 55, + eGenePred = 56, + eLd2 = 57, + eNarrowPeak = 58, + eNetAlign = 59, + ePeptideMapping = 60, + eRmsk = 61, + eSnake = 62, + eVcfTabix = 63, + eWigMaf = 64, + + // The following formats *are* recognized by CFormatGuess: + eFlatFileGenbank = 65, + eFlatFileEna = 66, + eFlatFileUniProt = 67, + + // *** Adding new format codes? *** + // (1) A sanity check in the implementation depends on the format codes being + // consecutive. Hence no gaps allowed! + // (2) Heed the warning above about never changing an already existing + // format code! + // (3) You must provide a display name for the new format. Do that in + // sm_FormatNames. + // (4) You must add your new format to sm_CheckOrder (unless you don't want your + // format actually being checked and recognized. + /// Max value of EFormat eFormat_max }; @@ -118,6 +163,8 @@ public: eThrowOnBadSource, ///< Throw an exception if the data source (stream, file) can't be read }; + static bool IsSupportedFormat(EFormat format); + /// Hints for guessing formats. Two hint types can be used: preferred and /// disabled. Preferred are checked before any other formats. Disabled /// formats are not checked at all. @@ -195,6 +242,7 @@ public: ~CFormatGuess(); + NCBI_DEPRECATED EFormat GuessFormat(EMode); NCBI_DEPRECATED bool TestFormat(EFormat, EMode); @@ -259,6 +307,10 @@ protected: bool TestFormatJson(EMode); bool TestFormatPsl(EMode); + bool TestFormatFlatFileGenbank(EMode); + bool TestFormatFlatFileEna(EMode); + bool TestFormatFlatFileUniProt(EMode); + bool IsInputRepeatMaskerWithoutHeader(); bool IsInputRepeatMaskerWithHeader(); @@ -322,17 +374,19 @@ private: bool x_IsBlankOrNumbers(const string& testString) const; // data: - static const char* const sm_FormatNames[eFormat_max]; + using NAME_MAP = map; + static const NAME_MAP sm_FormatNames; bool x_TryProcessCLUSTALSeqData(const string& line, string& id, size_t& seg_length) const; bool x_LooksLikeCLUSTALConservedInfo(const string& line) const; protected: - static int s_CheckOrder[]; + static vector sm_CheckOrder; static const streamsize s_iTestBufferGranularity = 8096; + CNcbiIstream& m_Stream; bool m_bOwnsStream; char* m_pTestBuffer; diff --git a/c++/include/util/limited_size_map.hpp b/c++/include/util/limited_size_map.hpp index ad507486..ab5e6303 100644 --- a/c++/include/util/limited_size_map.hpp +++ b/c++/include/util/limited_size_map.hpp @@ -1,7 +1,7 @@ #ifndef UTIL__LIMITED_SIZE_MAP__HPP #define UTIL__LIMITED_SIZE_MAP__HPP -/* $Id: limited_size_map.hpp 402322 2013-06-06 17:13:46Z vasilche $ +/* $Id: limited_size_map.hpp 612734 2020-07-27 11:38:33Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -165,7 +165,8 @@ public: return !(*this == a); } }; - + + explicit limited_size_map(size_t size_limit = 0) : m_SizeLimit(size_limit) { diff --git a/c++/scripts/common/check/inspxe.sh b/c++/scripts/common/check/inspxe.sh index 97b66896..11ad6d09 100755 --- a/c++/scripts/common/check/inspxe.sh +++ b/c++/scripts/common/check/inspxe.sh @@ -37,7 +37,7 @@ exe=$1.exe shift # Run test -"$inspxe" -collect mi3 -knob detect-leaks-on-exit=false -knob enable-memory-growth-detection=false -knob enable-on-demand-leak-detection=false -knob still-allocated-memory=false -knob detect-resource-leaks=false -knob stack-depth=32 -result-dir $rd -return-app-exitcode -suppression-file "$suppress_dir" -- $exe "$@" +"$inspxe" -collect mi3 -knob detect-leaks-on-exit=false -knob enable-memory-growth-detection=false -knob enable-on-demand-leak-detection=false -knob still-allocated-memory=false -knob detect-resource-leaks=false -knob stack-depth=16 -result-dir $rd -return-app-exitcode -suppression-file "$suppress_dir" -- $exe "$@" app_result=$? sleep 5 if test ! -d $rd; then diff --git a/c++/scripts/common/check/tsan.supp b/c++/scripts/common/check/tsan.supp index 81534f31..654e475e 100644 --- a/c++/scripts/common/check/tsan.supp +++ b/c++/scripts/common/check/tsan.supp @@ -19,6 +19,9 @@ race:corelib/test/test_ncbidiag_mt.cpp race:^ncbi::CDiagContext::ApproveMessage(ncbi::SDiagMessage&, +# ncbi_url.cpp default encoder. Leave as is. CXX-10543 +race:^ncbi::CSafeStatic >::x_Init()$ + ################################################################ # ivanov diff --git a/c++/scripts/common/impl/install.sh b/c++/scripts/common/impl/install.sh index ccc41deb..4cbbbe38 100755 --- a/c++/scripts/common/impl/install.sh +++ b/c++/scripts/common/impl/install.sh @@ -16,7 +16,7 @@ echo "[`date`]" -svn_location=`echo '$URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.10.1/c++/scripts/common/impl/install.sh $' | sed "s%\\$[U]RL: *\\([^$][^$]*\\) \\$.*%\\1%"` +svn_location=`echo '$URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.11.0/c++/scripts/common/impl/install.sh $' | sed "s%\\$[U]RL: *\\([^$][^$]*\\) \\$.*%\\1%"` svn_revision=`echo '$Revision: 429376 $' | sed "s%\\$[R]evision: *\\([^$][^$]*\\) \\$.*%\\1%"` script_name=`basename $0` diff --git a/c++/scripts/projects/blast/Manifest b/c++/scripts/projects/blast/Manifest index d3976454..c520c215 100644 --- a/c++/scripts/projects/blast/Manifest +++ b/c++/scripts/projects/blast/Manifest @@ -1,7 +1,7 @@ # # Filename: Manifest # -# $Id: Manifest 598028 2019-12-03 15:46:49Z camacho $ +# $Id: Manifest 615066 2020-08-26 16:41:29Z fongah2 $ # # Author: Christiam Camacho # @@ -44,6 +44,7 @@ DEFAULT_CONFIGURE_FLAGS: --without-debug --with-strip --with-openmp --with-mt -- # that. The build-root is needed so that rpmbuild can find the proper directories # to copy the binaries from Linux64-Centos : icc : ICC.sh 1900 --with-static --without-dll --with-bin-release --with-strip --without-debug --without-pcre --with-mt --with-openmp --with-flat-makefile --with-experimental=Int8GI --without-vdb --without-gnutls --without-gcrypt OPENMP_FLAGS='-qopenmp -qopenmp-link=static';LDFLAGS=-Wl,--as-needed + #Linux64-Centos : gcc : GCC.sh --with-static --without-dll --with-bin-release --with-strip --without-debug --without-pcre --with-mt --with-openmp --with-flat-makefile --with-experimental=Int8GI --without-vdb --without-gnutls --without-gcrypt #Linux64-Centos : gcc-debug : GCC.sh --with-strip --with-debug --without-dll --without-pcre --with-mt --with-openmp --with-flat-makefile --with-experimental=Int8GI --without-vdb --without-gnutls --without-gcrypt diff --git a/c++/scripts/projects/blast/components.link b/c++/scripts/projects/blast/components.link index 70b29161..6d1d217f 100644 --- a/c++/scripts/projects/blast/components.link +++ b/c++/scripts/projects/blast/components.link @@ -4,6 +4,6 @@ core 24.0 dbase 24.0 web 24.0 objects 24.0 -objtools 24.0 -algo 24.0 -app 24.0 +objtools 24.1 +algo 24.1 +app 24.1 diff --git a/c++/scripts/projects/igblast/edit_imgt_file.pl b/c++/scripts/projects/igblast/edit_imgt_file.pl index bdfe4636..69ba2b81 100755 --- a/c++/scripts/projects/igblast/edit_imgt_file.pl +++ b/c++/scripts/projects/igblast/edit_imgt_file.pl @@ -1,7 +1,8 @@ -#!/usr/bin/perl -w +#!/usr/bin/env perl use strict; +use warnings; my $inputfile=shift (@ARGV); open(in_handle, $inputfile); diff --git a/c++/scripts/projects/ncbi_gbench.lst b/c++/scripts/projects/ncbi_gbench.lst index 91b553c6..9f7d2a83 100644 --- a/c++/scripts/projects/ncbi_gbench.lst +++ b/c++/scripts/projects/ncbi_gbench.lst @@ -15,3 +15,7 @@ misc/third_party -[^gi].*/app -[^g].*/unit_test -algo/ms +-internal/gbench/app/msaviewer +-internal/gbench/app/sviewer +-internal/gbench/app/treeview +-internal/gbench/app/uud diff --git a/c++/scripts/projects/netschedule/ChangeLog b/c++/scripts/projects/netschedule/ChangeLog index a74068b5..dc87b7df 100644 --- a/c++/scripts/projects/netschedule/ChangeLog +++ b/c++/scripts/projects/netschedule/ChangeLog @@ -1,3 +1,10 @@ +Release 4.42.1 cloned from 4.41.0 (2020-09-21) + + * NetSchedule: cannot start with string to unsigned int conversion + (CXX-11350) + * NetSchedule: update program name when a client changes its session + (CXX-11283) + Release 4.42.0 cloned from 4.41.0 (2020-02-11) * NetSchedule: extend DUMP command (CXX-10344) diff --git a/c++/scripts/projects/pubseq_gateway/ChangeLog b/c++/scripts/projects/pubseq_gateway/ChangeLog index 29ac7d6d..ec20ce24 100644 --- a/c++/scripts/projects/pubseq_gateway/ChangeLog +++ b/c++/scripts/projects/pubseq_gateway/ChangeLog @@ -1,3 +1,14 @@ +Release 1.10.0 (2020-07-10) + + * PSG: add anti recursion flag (CXX-11438) + * PSG: create and use a high level Reply object at the very beginning + (CXX-11425) + * PSG server MaxDebug configuration segfault (CXX-11402) + * PSG server get_na processor filter (CXX-11401) + * PSG: incorrect handling of the CHttpReply<> instance (CXX-11397) + * PSG - add API to retrieve biodata from other (non-Cassandra/LMDB) sources + (CXX-11312) + Release 1.9.0 (2020-04-09) * Updated libuv 1.35.0, datastax 2.15.1, lmdb 0.9.24 (CXX-11268) diff --git a/c++/scripts/projects/pubseq_gateway/project.lst b/c++/scripts/projects/pubseq_gateway/project.lst index ad5f1707..6524c3c6 100644 --- a/c++/scripts/projects/pubseq_gateway/project.lst +++ b/c++/scripts/projects/pubseq_gateway/project.lst @@ -18,6 +18,11 @@ misc/third_party_static app$ app/pubseq_gateway$ app/pubseq_gateway/server$ +app/pubseq_gateway/server/test$ +app/pubseq_gateway/server/test/input$ +app/pubseq_gateway/server/integrationsmoketest$ +app/pubseq_gateway/server/integrationsmoketest/input$ +app/pubseq_gateway/server/integrationsmoketest/baseline$ objtools$ objtools/pubseq_gateway$ diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt index ae4ca1b9..09d447bd 100644 --- a/c++/src/CMakeLists.txt +++ b/c++/src/CMakeLists.txt @@ -1,6 +1,9 @@ ############################################################################# -# $Id: CMakeLists.txt 608162 2020-05-12 16:01:31Z blastadm $ +# $Id: CMakeLists.txt 617723 2020-10-06 07:10:56Z blastadm $ ############################################################################# -cmake_minimum_required(VERSION 3.3) -include(build-system/cmake/CMakeLists.top_builddir.txt) +cmake_minimum_required(VERSION 3.7) +if ("${PROJECT_NAME}" STREQUAL "") + project(ncbi_cpp) +endif() +include(${CMAKE_CURRENT_LIST_DIR}/build-system/cmake/CMakeLists.top_builddir.txt) diff --git a/c++/src/Makefile.in b/c++/src/Makefile.in index fd0a86f2..2f66472f 100644 --- a/c++/src/Makefile.in +++ b/c++/src/Makefile.in @@ -1,4 +1,4 @@ -# $Id: Makefile.in 608162 2020-05-12 16:01:31Z blastadm $ +# $Id: Makefile.in 617723 2020-10-06 07:10:56Z blastadm $ # Master (top-level) makefile for all NCBI C++ projects ################################################################## diff --git a/c++/src/algo/blast/api/CMakeLists.xblast.lib.txt b/c++/src/algo/blast/api/CMakeLists.xblast.lib.txt index 15432cdb..faf61fb7 100644 --- a/c++/src/algo/blast/api/CMakeLists.xblast.lib.txt +++ b/c++/src/algo/blast/api/CMakeLists.xblast.lib.txt @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeLists.xblast.lib.txt 594157 2019-09-30 18:28:48Z gouriano $ +# $Id: CMakeLists.xblast.lib.txt 615334 2020-08-31 15:35:33Z fukanchi $ ############################################################################# set(SRC_BLAST_CXX_CORE @@ -75,6 +75,8 @@ set(SRC_BLAST_CXX_CORE deltablast magicblast_options magicblast + blast_node + blast_usage_report ) diff --git a/c++/src/algo/blast/api/Makefile.xblast.lib b/c++/src/algo/blast/api/Makefile.xblast.lib index d8689276..ca8bc442 100644 --- a/c++/src/algo/blast/api/Makefile.xblast.lib +++ b/c++/src/algo/blast/api/Makefile.xblast.lib @@ -1,4 +1,4 @@ -# $Id: Makefile.xblast.lib 553565 2017-12-18 22:23:38Z fongah2 $ +# $Id: Makefile.xblast.lib 615334 2020-08-31 15:35:33Z fukanchi $ include $(srcdir)/../core/Makefile.blast.lib @@ -75,7 +75,9 @@ cdd_pssm_input \ deltablast_options \ deltablast \ magicblast_options \ -magicblast +magicblast \ +blast_node \ +blast_usage_report SRC = $(SRC_C:%=.core_%) $(SRC_CXX) diff --git a/c++/src/algo/blast/api/blast_aux.cpp b/c++/src/algo/blast/api/blast_aux.cpp index 9c928135..6bb9cac4 100644 --- a/c++/src/algo/blast/api/blast_aux.cpp +++ b/c++/src/algo/blast/api/blast_aux.cpp @@ -1,4 +1,4 @@ -/* $Id: blast_aux.cpp 519527 2016-11-16 14:19:45Z camacho $ +/* $Id: blast_aux.cpp 615182 2020-08-28 04:28:48Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1152,6 +1152,47 @@ LoadSequencesToScope(CScope::TIds& ids, vector& ranges, CRef top_bh.GetSeqMap().CanResolveRange(&*scope, sel); } +void CBlastAppDiagHandler::Post(const SDiagMessage & mess) +{ + if(m_handler != NULL) { + m_handler->Post(mess); + } + if(m_save) { + CRef d(new CBlast4_error); + string m; + mess.Write(m); + d->SetMessage(NStr::Sanitize(m)); + d->SetCode((int)mess.m_Severity); + { + DEFINE_STATIC_MUTEX(mx); + CMutexGuard guard(mx); + m_messages.push_back(d); + } + } +} + +void CBlastAppDiagHandler::ResetMessages() +{ + DEFINE_STATIC_MUTEX(mx); + CMutexGuard guard(mx); + m_messages.clear(); +} + +CBlastAppDiagHandler::~CBlastAppDiagHandler() +{ + if(m_handler) { + SetDiagHandler(m_handler); + m_handler = NULL; + } +} + +void CBlastAppDiagHandler::DoNotSaveMessages(void) +{ + m_save = false; + ResetMessages(); +} + + END_SCOPE(blast) END_NCBI_SCOPE diff --git a/c++/src/algo/blast/api/blast_node.cpp b/c++/src/algo/blast/api/blast_node.cpp new file mode 100644 index 00000000..b568c0ff --- /dev/null +++ b/c++/src/algo/blast/api/blast_node.cpp @@ -0,0 +1,293 @@ +/* $Id: + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Authors: Amelia Fong + * + */ + +/** @file blast_node.cpp + * BLAST node api + */ + +#include +#include +#include +#include +#include + +#if defined(NCBI_OS_UNIX) +#include +#endif + +#ifndef SKIP_DOXYGEN_PROCESSING +USING_NCBI_SCOPE; +USING_SCOPE(blast); +USING_SCOPE(objects); +#endif + +void CBlastNodeMailbox::SendMsg(CRef msg) +{ + CFastMutexGuard guard(m_Mutex); + m_MsgQueue.push_back(msg); + m_Notify.SignalSome(); +} + +CBlastNode::CBlastNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args, + CBlastAppDiagHandler & bah, EProgram blast_program, + int query_index, int num_queries, CBlastNodeMailbox * mailbox): + m_NodeNum(node_num), m_NcbiArgs(ncbi_args), m_Args(args), + m_Bah(bah), m_BlastProgram(blast_program), + m_QueryIndex(query_index), m_NumOfQueries(num_queries), + m_QueriesLength(0) +{ + if(mailbox != NULL) { + m_Mailbox.Reset(mailbox); + } + string p("Query "); + p+=NStr::IntToString(query_index) + "-" + NStr::IntToString(query_index + num_queries -1); + m_NodeIdStr = p; +} + +CBlastNode::~CBlastNode () { + if(m_Mailbox.NotEmpty()) { + m_Mailbox.Reset(); + } +} + +void CBlastNode::SendMsg(CBlastNodeMsg::EMsgType msg_type, void* ptr) +{ + if (m_Mailbox.NotEmpty()) { + CRef m( new CBlastNodeMsg(msg_type, ptr)); + m_Mailbox->SendMsg(m); + } +} + +CBlastMasterNode::CBlastMasterNode(CNcbiOstream & out_stream, int num_threads): + m_OutputStream(out_stream), m_MaxNumThreads(num_threads), m_MaxNumNodes(num_threads + 2), + m_NumErrStatus(0), m_NumQueries(0), m_QueriesLength(0) +{ + m_StopWatch.Start(); +} + +void +CBlastMasterNode::x_WaitForNewEvent() +{ + CFastMutexGuard guard(m_Mutex); + m_NewEvent.WaitForSignal(m_Mutex); +} + +void +CBlastMasterNode::RegisterNode(CBlastNode * node, CBlastNodeMailbox * mailbox) +{ + if(node == NULL) { + NCBI_THROW(CBlastException, eInvalidArgument, "Empty Node" ); + } + if(mailbox == NULL) { + NCBI_THROW(CBlastException, eInvalidArgument, "Empty mailbox" ); + } + if(mailbox->GetNodeNum() != node->GetNodeNum()) { + NCBI_THROW(CBlastException, eCoreBlastError, "Invalid mailbox node number" ); + } + { + CFastMutexGuard guard(m_Mutex); + int node_num = node->GetNodeNum(); + if ((m_PostOffice.find(node_num) != m_PostOffice.end()) || + (m_RegisteredNodes.find(node_num) != m_RegisteredNodes.end())){ + NCBI_THROW(CBlastException, eInvalidArgument, "Duplicate chunk num" ); + } + m_PostOffice[node_num]= mailbox; + m_RegisteredNodes[node_num] = node; + } +} + +bool CBlastMasterNode::Processing() +{ + NON_CONST_ITERATE(TPostOffice, itr, m_PostOffice) { + if(itr->second->GetNumMsgs() > 0) { + CRef msg = itr->second->ReadMsg(); + int chunk_num = itr->first; + if (msg.NotEmpty()) { + switch (msg->GetMsgType()) { + case CBlastNodeMsg::eRunRequest: + { + if ((int) m_ActiveNodes.size() < m_MaxNumThreads) { + CBlastNode * n = (CBlastNode *) msg->GetMsgBody(); + if(n != NULL) { + double start_time = m_StopWatch.Elapsed(); + n->Run(); + pair< int, double > p(chunk_num, start_time); + m_ActiveNodes.insert(p); + CRef empty_msg; + pair > m(chunk_num, empty_msg); + m_FormatQueue.insert(m); + _TRACE("Starting Chunk # " << chunk_num) ; + } + else { + NCBI_THROW(CBlastException, eCoreBlastError, "Invalid mailbox node number" ); + } + } + else { + itr->second->UnreadMsg(msg); + FormatResults(); + if (IsFull()) { + x_WaitForNewEvent(); + } + return true; + } + break; + } + case CBlastNodeMsg::ePostResult: + case CBlastNodeMsg::eErrorExit: + { + m_FormatQueue[itr->first] = msg; + double diff = m_StopWatch.Elapsed() - m_ActiveNodes[itr->first]; + m_ActiveNodes.erase(chunk_num); + CTimeSpan s(diff); + _TRACE("Chunk #" << chunk_num << " completed in " << s.AsSmartString()); + break; + } + case CBlastNodeMsg::ePostLog: + { + break; + } + default: + { + NCBI_THROW(CBlastException, eCoreBlastError, "Invalid node message type"); + break; + } + } + } + } + } + FormatResults(); + return IsActive(); +} + +void CBlastMasterNode::FormatResults() +{ + TFormatQueue::iterator itr= m_FormatQueue.begin(); + + while (itr != m_FormatQueue.end()){ + CRef msg(itr->second); + if(msg.Empty()) { + break; + } + CBlastNode * n = (CBlastNode *) msg->GetMsgBody(); + if(n == NULL) { + string err_msg = "Empty formatting msg for chunk num # " + NStr::IntToString(itr->first); + NCBI_THROW(CBlastException, eCoreBlastError, err_msg); + } + int node_num = n->GetNodeNum(); + if (msg->GetMsgType() == CBlastNodeMsg::ePostResult) { + string results; + n->GetBlastResults(results); + if (results != kEmptyStr) { + m_OutputStream << results; + } + } + else if (msg->GetMsgType() == CBlastNodeMsg::eErrorExit) { + m_NumErrStatus++; + ERR_POST("Chunk # " << node_num << " exit with error (" << n->GetStatus() << ")"); + } + else { + NCBI_THROW(CBlastException, eCoreBlastError, "Invalid msg type"); + } + m_NumQueries += n->GetNumOfQueries(); + m_QueriesLength += n->GetQueriesLength(); + n->Detach(); + m_PostOffice.erase(node_num); + m_RegisteredNodes.erase(node_num); + + itr++; + } + + if (itr != m_FormatQueue.begin()) { + m_FormatQueue.erase(m_FormatQueue.begin(), itr); + } +} + +int CBlastMasterNode::IsFull() +{ + TRegisteredNodes::reverse_iterator rr = m_RegisteredNodes.rbegin(); + TActiveNodes::reverse_iterator ra = m_ActiveNodes.rbegin(); + unsigned int in_buffer = m_MaxNumThreads; + if ((!m_RegisteredNodes.empty()) && (!m_ActiveNodes.empty())) { + in_buffer = rr->first - ra->first; + } + return ((int) (m_ActiveNodes.size() + in_buffer) >= m_MaxNumNodes); +} + + +bool s_IsSeqID(string & line) +{ + static const int kMainAccSize = 32; + size_t digit_pos = line.find_last_of("0123456789|", kMainAccSize); + if (digit_pos != NPOS) { + return true; + } + + return false; +} + +int +CBlastNodeInputReader::GetQueryBatch(string & queries, int & query_no) +{ + CNcbiOstrstream ss; + int q_size = 0; + int q_count = 0; + queries.clear(); + query_no = -1; + + while ( !AtEOF()) { + string line = NStr::TruncateSpaces_Unsafe(*++(*this), NStr::eTrunc_Begin); + if (line.empty()) { + continue; + } + char c =line[0]; + if (c == '!' || c == '#' || c == ';') { + continue; + } + bool isId = s_IsSeqID(line); + if ( isId || ( c == '>' )) { + if (q_size >= m_QueryBatchSize) { + UngetLine(); + break; + } + q_count ++; + } + if (c != '>') { + q_size += isId? m_EstAvgQueryLength : line.size(); + } + ss << line << endl; + } + ss << std::ends; + ss.flush(); + if (q_count > 0){ + queries = ss.str(); + query_no = m_QueryCount +1; + m_QueryCount +=q_count; + } + return q_count; +} diff --git a/c++/src/algo/blast/api/blast_usage_report.cpp b/c++/src/algo/blast/api/blast_usage_report.cpp new file mode 100644 index 00000000..0c8d4312 --- /dev/null +++ b/c++/src/algo/blast/api/blast_usage_report.cpp @@ -0,0 +1,228 @@ +/* $Id: + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Authors: Amelia Fong + * + */ + +/** @file blast_usage_report.cpp + * BLAST usage report api + */ + +#include +#include +#include + +#ifndef SKIP_DOXYGEN_PROCESSING +USING_NCBI_SCOPE; +USING_SCOPE(blast); +#endif + +static const string kNcbiAppName="standalone-blast"; +static const string kIdFile="/sys/class/dmi/id/sys_vendor"; + +void CBlastUsageReport::x_CheckRunEnv() +{ + char * blast_docker = getenv("BLAST_DOCKER"); + if(blast_docker != NULL){ + AddParam(eDocker, true); + } + + CFile id_file(kIdFile); + if(id_file.Exists()){ + CNcbiIfstream s(id_file.GetPath().c_str(), IOS_BASE::in); + string line; + NcbiGetlineEOL(s, line); + NStr::ToUpper(line); + if (line.find("GOOGLE") != NPOS) { + AddParam(eGCP, true); + } + else if (line.find("AMAZON")!= NPOS){ + AddParam(eAWS, true); + } + } + + char* elb_job_id = getenv("BLAST_ELB_JOB_ID"); + if(elb_job_id != NULL){ + string j_id(elb_job_id); + AddParam(eELBJobId, j_id); + } + char* elb_batch_num = getenv("BLAST_ELB_BATCH_NUM"); + if(elb_batch_num != NULL){ + int bn = NStr::StringToInt(CTempString(elb_batch_num), NStr::fConvErr_NoThrow); + AddParam(eELBBatchNum, bn); + } +} + +CBlastUsageReport::CBlastUsageReport() +{ + x_CheckBlastUsageEnv(); + AddParam(eApp, kNcbiAppName); + x_CheckRunEnv(); +} + +CBlastUsageReport::~CBlastUsageReport() +{ + if (IsEnabled()) { + Send(m_Params); + Wait(); + Finish(); + } +} + +string CBlastUsageReport::x_EUsageParmsToString(EUsageParams p) +{ + string retval; + switch (p) { + case eApp: retval.assign("ncbi_app"); break; + case eVersion: retval.assign("version"); break; + case eProgram: retval.assign("program"); break; + case eTask: retval.assign("task"); break; + case eExitStatus: retval.assign("exit_status"); break; + case eRunTime: retval.assign("run_time"); break; + case eDBName: retval.assign("db_name"); break; + case eDBLength: retval.assign("db_length"); break; + case eDBNumSeqs: retval.assign("db_num_seqs"); break; + case eDBDate: retval.assign("db_date"); break; + case eBl2seq: retval.assign("bl2seq"); break; + case eNumSubjects: retval.assign("num_subjects"); break; + case eSubjectsLength: retval.assign("subjects_length"); break; + case eNumQueries: retval.assign("num_queries"); break; + case eTotalQueryLength: retval.assign("queries_length"); break; + case eEvalueThreshold: retval.assign("evalue_threshold"); break; + case eNumThreads: retval.assign("num_threads"); break; + case eHitListSize: retval.assign("hitlist_size"); break; + case eOutputFmt: retval.assign("output_fmt"); break; + case eTaxIdList: retval.assign("taxidlist"); break; + case eNegTaxIdList: retval.assign("negative_taxidlist"); break; + case eGIList: retval.assign("gilist"); break; + case eNegGIList: retval.assign("negative_gilist"); break; + case eSeqIdList: retval.assign("seqidlist"); break; + case eNegSeqIdList: retval.assign("negative_seqidlist"); break; + case eIPGList: retval.assign("ipglist"); break; + case eNegIPGList: retval.assign("negative_ipglist"); break; + case eMaskAlgo: retval.assign("mask_algo"); break; + case eCompBasedStats: retval.assign("comp_based_stats"); break; + case eRange: retval.assign("range"); break; + case eMTMode: retval.assign("mt_mode"); break; + case eNumQueryBatches: retval.assign("num_query_batches"); break; + case eNumErrStatus: retval.assign("num_error_status"); break; + case ePSSMInput: retval.assign("pssm_input"); break; + case eConverged: retval.assign("converged"); break; + case eArchiveInput: retval.assign("archive"); break; + case eRIDInput: retval.assign("rid"); break; + case eDBInfo: retval.assign("db_info"); break; + case eDBTaxInfo: retval.assign("db_tax_info"); break; + case eDBEntry: retval.assign("db_entry"); break; + case eDBDumpAll: retval.assign("db_entry_all"); break; + case eDBType: retval.assign("db_type"); break; + case eInputType: retval.assign("input_type"); break; + case eParseSeqIDs: retval.assign("parse_seqids"); break; + case eSeqType: retval.assign("seq_type"); break; + case eDBTest: retval.assign("db_test"); break; + case eDBAliasMode: retval.assign("db_alias_mode"); break; + case eDocker: retval.assign("docker"); break; + case eGCP: retval.assign("gcp"); break; + case eAWS: retval.assign("aws"); break; + case eELBJobId: retval.assign("elb_job_id"); break; + case eELBBatchNum: retval.assign("elb_batch_num"); break; + default: + LOG_POST(Warning <<"Invalid usage params: " << (int)p); + abort(); + break; + } + return retval; +} + +void CBlastUsageReport::AddParam(EUsageParams p, int val) +{ + if (IsEnabled()){ + string t = x_EUsageParmsToString(p); + m_Params.Add(t, NStr::IntToString(val)); + } +} + +void CBlastUsageReport::AddParam(EUsageParams p, const string & val) +{ + if (IsEnabled()) { + string t = x_EUsageParmsToString(p); + m_Params.Add(t, val); + } +} + +void CBlastUsageReport::AddParam(EUsageParams p, const double & val) +{ + if (IsEnabled()) { + string t = x_EUsageParmsToString(p); + m_Params.Add(t, val); + } +} + +void CBlastUsageReport::x_CheckBlastUsageEnv() +{ + char * blast_usage_env = getenv("BLAST_USAGE_REPORT"); + if(blast_usage_env != NULL){ + bool enable = NStr::StringToBool(blast_usage_env); + if (!enable) { + SetEnabled(false); + CUsageReportAPI::SetEnabled(false); + LOG_POST(Info <<"Phone home disabled"); + return ; + } + } + + CNcbiIstrstream empty_stream(kEmptyCStr); + CRef registry(new CNcbiRegistry(empty_stream, IRegistry::fWithNcbirc)); + if (registry->HasEntry("BLAST", "BLAST_USAGE_REPORT")) { + bool enable = NStr::StringToBool(registry->Get("BLAST", "BLAST_USAGE_REPORT")); + if (!enable) { + SetEnabled(false); + CUsageReportAPI::SetEnabled(false); + LOG_POST(Info <<"Phone home disabled by config setting"); + return ; + } + } + CUsageReportAPI::SetEnabled(true); + SetEnabled(true); + LOG_POST(Info <<"Phone home enabled"); +} + +void CBlastUsageReport::AddParam(EUsageParams p, Int8 val) +{ + if (IsEnabled()) { + string t = x_EUsageParmsToString(p); + m_Params.Add(t, val); + } + +} + +void CBlastUsageReport::AddParam(EUsageParams p, bool val) +{ + if (IsEnabled()) { + string t = x_EUsageParmsToString(p); + m_Params.Add(t, val); + } + +} diff --git a/c++/src/algo/blast/blastinput/blast_input.cpp b/c++/src/algo/blast/blastinput/blast_input.cpp index ad81a464..a74337db 100644 --- a/c++/src/algo/blast/blastinput/blast_input.cpp +++ b/c++/src/algo/blast/blastinput/blast_input.cpp @@ -1,4 +1,4 @@ -/* $Id: blast_input.cpp 550028 2017-10-30 16:49:00Z rackerst $ +/* $Id: blast_input.cpp 615335 2020-08-31 15:36:38Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -165,6 +165,8 @@ CBlastInput::GetNextSeqBatch(CScope& scope) retval->AddQuery(q); } + m_NumSeqs +=retval->Size(); + m_TotalLength += size_read; _TRACE("Read " << retval->Size() << " queries"); return retval; } diff --git a/c++/src/algo/blast/blastinput/cmdline_flags.cpp b/c++/src/algo/blast/blastinput/cmdline_flags.cpp index a2575940..179fe960 100644 --- a/c++/src/algo/blast/blastinput/cmdline_flags.cpp +++ b/c++/src/algo/blast/blastinput/cmdline_flags.cpp @@ -1,4 +1,4 @@ -/* $Id: cmdline_flags.cpp 605536 2020-04-13 11:07:50Z ivanov $ +/* $Id: cmdline_flags.cpp 615184 2020-08-28 04:29:55Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -268,6 +268,8 @@ const string kArgPrintMdTag("md_tag"); const string kArgUnalignedOutput("out_unaligned"); const string kArgUnalignedFormat("unaligned_fmt"); +const string kArgMTMode("mt_mode"); + END_SCOPE(blast) END_NCBI_SCOPE diff --git a/c++/src/algo/blast/blastinput/rpsblast_args.cpp b/c++/src/algo/blast/blastinput/rpsblast_args.cpp index 68403aad..7caf41cc 100644 --- a/c++/src/algo/blast/blastinput/rpsblast_args.cpp +++ b/c++/src/algo/blast/blastinput/rpsblast_args.cpp @@ -1,4 +1,4 @@ -/* $Id: rpsblast_args.cpp 544441 2017-08-23 11:55:51Z camacho $ +/* $Id: rpsblast_args.cpp 615193 2020-08-28 04:31:11Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -55,6 +55,14 @@ CRPSBlastMTArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc) NStr::IntToString(kDfltRpsThreadingMode)); arg_desc.SetConstraint(kArgNumThreads, new CArgAllowValuesGreaterThanOrEqual(0)); + arg_desc.AddDefaultKey(kArgMTMode, "int_value", + "Multi-thread mode to use in RPS BLAST search:\n " + "0 (auto) split by database vols\n " + "1 split by queries", + CArgDescriptions::eInteger, + NStr::IntToString(0)); + arg_desc.SetConstraint(kArgMTMode, + new CArgAllowValuesBetween(0, 1, true)); #endif arg_desc.SetCurrentGroup(""); } @@ -140,6 +148,63 @@ CRPSBlastAppArgs::GetQueryBatchSize() const return blast::GetQueryBatchSize(eRPSBlast, m_IsUngapped, is_remote); } +/// Get the input stream +CNcbiIstream& +CRPSBlastAppArgs::GetInputStream() +{ + return CBlastAppArgs::GetInputStream(); +} +/// Get the output stream +CNcbiOstream& +CRPSBlastAppArgs::GetOutputStream() +{ + return CBlastAppArgs::GetOutputStream(); +} + +/// Get the input stream +CNcbiIstream& +CRPSBlastNodeArgs::GetInputStream() +{ + if ( !m_InputStream ) { + abort(); + } + return *m_InputStream; +} +/// Get the output stream +CNcbiOstream& +CRPSBlastNodeArgs::GetOutputStream() +{ + return m_OutputStream; +} + +CRPSBlastNodeArgs::CRPSBlastNodeArgs(const string & input) +{ + m_InputStream = new CNcbiIstrstream(input.c_str(), input.length()); +} + +CRPSBlastNodeArgs::~CRPSBlastNodeArgs() +{ + if (m_InputStream) { + delete m_InputStream; + m_InputStream = NULL; + } +} + +int +CRPSBlastNodeArgs::GetQueryBatchSize() const +{ + bool is_remote = (m_RemoteArgs.NotEmpty() && m_RemoteArgs->ExecuteRemotely()); + return blast::GetQueryBatchSize(eRPSBlast, m_IsUngapped, is_remote); +} + +CRef +CRPSBlastNodeArgs::x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, + const CArgs& /*args*/) +{ + CRef retval + (new CBlastRPSOptionsHandle(locality)); + return retval; +} END_SCOPE(blast) END_NCBI_SCOPE diff --git a/c++/src/algo/blast/blastinput/rpstblastn_args.cpp b/c++/src/algo/blast/blastinput/rpstblastn_args.cpp index 98ae1ab8..e37743a5 100644 --- a/c++/src/algo/blast/blastinput/rpstblastn_args.cpp +++ b/c++/src/algo/blast/blastinput/rpstblastn_args.cpp @@ -1,4 +1,4 @@ -/* $Id: rpstblastn_args.cpp 505234 2016-06-23 13:16:57Z fongah2 $ +/* $Id: rpstblastn_args.cpp 615193 2020-08-28 04:31:11Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -35,6 +35,7 @@ #include #include #include +#include #include BEGIN_NCBI_SCOPE @@ -92,7 +93,7 @@ CRPSTBlastnAppArgs::CRPSTBlastnAppArgs() arg.Reset(m_FormattingArgs); m_Args.push_back(arg); - m_MTArgs.Reset(new CMTArgs(true)); + m_MTArgs.Reset(new CRPSBlastMTArgs()); arg.Reset(m_MTArgs); m_Args.push_back(arg); @@ -123,6 +124,64 @@ CRPSTBlastnAppArgs::GetQueryBatchSize() const return blast::GetQueryBatchSize(eRPSTblastn, m_IsUngapped, is_remote); } +/// Get the input stream +CNcbiIstream& +CRPSTBlastnAppArgs::GetInputStream() +{ + return CBlastAppArgs::GetInputStream(); +} +/// Get the output stream +CNcbiOstream& +CRPSTBlastnAppArgs::GetOutputStream() +{ + return CBlastAppArgs::GetOutputStream(); +} + +/// Get the input stream +CNcbiIstream& +CRPSTBlastnNodeArgs::GetInputStream() +{ + if ( !m_InputStream ) { + abort(); + } + return *m_InputStream; +} +/// Get the output stream +CNcbiOstream& +CRPSTBlastnNodeArgs::GetOutputStream() +{ + return m_OutputStream; +} + +CRPSTBlastnNodeArgs::CRPSTBlastnNodeArgs(const string & input) +{ + m_InputStream = new CNcbiIstrstream(input.c_str(), input.length()); +} + +CRPSTBlastnNodeArgs::~CRPSTBlastnNodeArgs() +{ + if (m_InputStream) { + delete m_InputStream; + m_InputStream = NULL; + } +} + +int +CRPSTBlastnNodeArgs::GetQueryBatchSize() const +{ + bool is_remote = (m_RemoteArgs.NotEmpty() && m_RemoteArgs->ExecuteRemotely()); + return blast::GetQueryBatchSize(eRPSTblastn, m_IsUngapped, is_remote); +} + +CRef +CRPSTBlastnNodeArgs::x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, + const CArgs& /*args*/) +{ + CRef retval + (new CRPSTBlastnOptionsHandle(locality)); + return retval; +} + END_SCOPE(blast) END_NCBI_SCOPE diff --git a/c++/src/algo/blast/core/blast_engine.c b/c++/src/algo/blast/core/blast_engine.c index caeffd91..797d17a5 100644 --- a/c++/src/algo/blast/core/blast_engine.c +++ b/c++/src/algo/blast/core/blast_engine.c @@ -1,4 +1,4 @@ -/* $Id: blast_engine.c 604741 2020-04-01 15:15:25Z ivanov $ +/* $Id: blast_engine.c 617226 2020-09-28 18:25:19Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -79,9 +79,9 @@ #define CONV_NUCL2PROT_COORDINATES(length) (length) / CODON_LENGTH NCBI_XBLAST_EXPORT const int kBlastMajorVersion = 2; -NCBI_XBLAST_EXPORT const int kBlastMinorVersion = 10; -NCBI_XBLAST_EXPORT const int kBlastPatchVersion = 1; -NCBI_XBLAST_EXPORT const char* kBlastReleaseDate = "June-01-2020"; +NCBI_XBLAST_EXPORT const int kBlastMinorVersion = 11; +NCBI_XBLAST_EXPORT const int kBlastPatchVersion = 0; +NCBI_XBLAST_EXPORT const char* kBlastReleaseDate = "Oct-15-2020"; /** Structure to be passed to s_BlastSearchEngineCore, containing pointers to various preallocated structures and arrays. */ @@ -992,6 +992,9 @@ s_BlastSetUpAuxStructures(const BlastSeqSrc* seq_src, Boolean jumper = (ext_options->ePrelimGapExt == eJumperWithTraceback); Int4 offset_array_size = GetOffsetArraySize(lookup_wrap); + if(phi_lookup) { + offset_array_size = PHI_MAX_HIT; + } ASSERT(seq_src); *aux_struct_ptr = aux_struct = (BlastCoreAuxStruct*) diff --git a/c++/src/algo/blast/core/blast_kappa.c b/c++/src/algo/blast/core/blast_kappa.c index 8cad3447..4a4447e6 100644 --- a/c++/src/algo/blast/core/blast_kappa.c +++ b/c++/src/algo/blast/core/blast_kappa.c @@ -1,4 +1,4 @@ -/* $Id: blast_kappa.c 605341 2020-04-09 16:06:51Z ivanov $ +/* $Id: blast_kappa.c 616357 2020-09-15 12:19:52Z ivanov $ * ========================================================================== * * PUBLIC DOMAIN NOTICE @@ -3468,7 +3468,9 @@ Blast_RedoAlignmentCore_MT(EBlastProgramType program_number, int tid = 0; #ifdef _OPENMP - tid = omp_get_thread_num(); + if(actual_num_threads > 1) { + tid = omp_get_thread_num(); + } #endif seqSrc = seqsrc_tld[tid]; scoringParams = score_params_tld[tid]; @@ -3492,10 +3494,12 @@ Blast_RedoAlignmentCore_MT(EBlastProgramType program_number, if (seqSrc) { continue; } + if(actual_num_threads > 1) { #pragma omp critical(intrpt) - interrupt = TRUE; + interrupt = TRUE; #pragma omp flush(interrupt) - continue; + continue; + } } if (BlastCompo_EarlyTermination( @@ -3507,10 +3511,12 @@ Blast_RedoAlignmentCore_MT(EBlastProgramType program_number, if (seqSrc) { continue; } + if(actual_num_threads > 1) { #pragma omp critical(intrpt) - interrupt = TRUE; + interrupt = TRUE; #pragma omp flush(interrupt) - continue; + continue; + } } query_index = localMatch->query_index; @@ -3728,7 +3734,8 @@ match_loop_cleanup: } s_MatchingSequenceRelease(&matchingSeq); BlastCompo_AlignmentsFree(&incoming_aligns, NULL); - if (*pStatusCode != 0 || !seqSrc) { + if ((actual_num_threads > 1) && + (*pStatusCode != 0 || !seqSrc)) { #pragma omp critical(intrpt) interrupt = TRUE; #pragma omp flush(interrupt) diff --git a/c++/src/algo/blast/format/blast_format.cpp b/c++/src/algo/blast/format/blast_format.cpp index aad39515..1b0289ec 100644 --- a/c++/src/algo/blast/format/blast_format.cpp +++ b/c++/src/algo/blast/format/blast_format.cpp @@ -2416,3 +2416,95 @@ void CBlastFormat::x_InitSAMFormatter() m_SamFormatter.reset(new CBlast_SAM_Formatter(m_Outfile, *m_Scope, m_CustomOutputFormatSpec, pg)); } + +bool s_SetCompBasedStats(EProgram program) +{ + if (program == eBlastp || program == eTblastn || + program == ePSIBlast || program == ePSITblastn || + program == eRPSBlast || program == eRPSTblastn || + program == eBlastx || program == eDeltaBlast) { + return true; + } + return false; +} + +void CBlastFormat::LogBlastSearchInfo(CBlastUsageReport & report) +{ + if (report.IsEnabled()) { + report.AddParam(CBlastUsageReport::eProgram, m_Program); + EProgram task = m_Options->GetProgram(); + string task_str = EProgramToTaskName(task); + report.AddParam(CBlastUsageReport::eTask, task_str); + report.AddParam(CBlastUsageReport::eEvalueThreshold, m_Options->GetEvalueThreshold()); + report.AddParam(CBlastUsageReport::eHitListSize, m_Options->GetHitlistSize()); + report.AddParam(CBlastUsageReport::eOutputFmt, m_FormatType); + + if (s_SetCompBasedStats(task)) { + report.AddParam(CBlastUsageReport::eCompBasedStats, m_Options->GetCompositionBasedStats()); + } + + int num_seqs = 0; + for (size_t i = 0; i < m_DbInfo.size(); i++) { + num_seqs += m_DbInfo[i].number_seqs; + } + if( m_IsBl2Seq) { + report.AddParam(CBlastUsageReport::eBl2seq, "true"); + if (m_IsDbScan) { + report.AddParam(CBlastUsageReport::eNumSubjects, num_seqs); + report.AddParam(CBlastUsageReport::eSubjectsLength, GetDbTotalLength()); + } + else if (m_SeqInfoSrc.NotEmpty()){ + report.AddParam(CBlastUsageReport::eNumSubjects, (int) m_SeqInfoSrc->Size()); + int total_subj_length = 0; + for (size_t i = 0; i < m_SeqInfoSrc->Size(); i++) { + total_subj_length += m_SeqInfoSrc->GetLength(i); + } + report.AddParam(CBlastUsageReport::eSubjectsLength, total_subj_length); + } + } + else { + string dir = kEmptyStr; + CFile::SplitPath(m_DbName, &dir); + string db_name = m_DbName; + if (dir != kEmptyStr) { + db_name = m_DbName.substr(dir.length()); + } + report.AddParam(CBlastUsageReport::eDBName, db_name); + report.AddParam(CBlastUsageReport::eDBLength, GetDbTotalLength()); + report.AddParam(CBlastUsageReport::eDBNumSeqs, num_seqs); + report.AddParam(CBlastUsageReport::eDBDate, m_DbInfo[0].date); + if(m_SearchDb.NotEmpty()){ + if(m_SearchDb->GetGiList().NotEmpty()) { + CRef l = m_SearchDb->GetGiList(); + if (l->GetNumGis()) { + report.AddParam(CBlastUsageReport::eGIList, true); + } + if (l->GetNumSis()){ + report.AddParam(CBlastUsageReport::eSeqIdList, true); + } + if (l->GetNumTaxIds()){ + report.AddParam(CBlastUsageReport::eTaxIdList, true); + } + if (l->GetNumPigs()) { + report.AddParam(CBlastUsageReport::eIPGList, true); + } + } + if(m_SearchDb->GetNegativeGiList().NotEmpty()) { + CRef l = m_SearchDb->GetNegativeGiList(); + if (l->GetNumGis()) { + report.AddParam(CBlastUsageReport::eNegGIList, true); + } + if (l->GetNumSis()){ + report.AddParam(CBlastUsageReport::eNegSeqIdList, true); + } + if (l->GetNumTaxIds()){ + report.AddParam(CBlastUsageReport::eNegTaxIdList, true); + } + if (l->GetNumPigs()) { + report.AddParam(CBlastUsageReport::eNegIPGList, true); + } + } + } + } + } +} diff --git a/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp b/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp index 23c25e73..3e8d753e 100644 --- a/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp +++ b/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp @@ -1,4 +1,4 @@ -/* $Id: version_reference_unit_test.cpp 604741 2020-04-01 15:15:25Z ivanov $ +/* $Id: version_reference_unit_test.cpp 617227 2020-09-28 18:26:44Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -43,8 +43,8 @@ BOOST_AUTO_TEST_SUITE(version_reference) BOOST_AUTO_TEST_CASE(testVersion) { const int kMajor = 2; - const int kMinor = 10; - const int kPatch = 1; + const int kMinor = 11; + const int kPatch = 0; blast::CBlastVersion v; BOOST_REQUIRE_EQUAL(kMajor, v.GetMajor()); BOOST_REQUIRE_EQUAL(kMinor, v.GetMinor()); diff --git a/c++/src/app/CMakeLists.txt b/c++/src/app/CMakeLists.txt index f080ea2a..cd1b2387 100644 --- a/c++/src/app/CMakeLists.txt +++ b/c++/src/app/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeLists.txt 594373 2019-10-03 13:30:50Z gouriano $ +# $Id: CMakeLists.txt 612980 2020-07-30 19:13:50Z ivanov $ ############################################################################# # Include projects from this directory @@ -23,6 +23,7 @@ NCBI_add_subdirectory( convert_seq discrepancy_report dustmasker + flat2asn formatguess gap_stats gi2taxid diff --git a/c++/src/app/Makefile.in b/c++/src/app/Makefile.in index 56502141..feadce62 100644 --- a/c++/src/app/Makefile.in +++ b/c++/src/app/Makefile.in @@ -1,9 +1,9 @@ -# $Id: Makefile.in 591515 2019-08-16 14:37:05Z ludwigf $ +# $Id: Makefile.in 612980 2020-07-30 19:13:50Z ivanov $ # Miscellaneous applications ################################# -SUB_PROJ = asn2asn asn2fasta asn2flat asnval asn_cleanup \ +SUB_PROJ = asn2asn asn2fasta asn2flat flat2asn asnval asn_cleanup \ id1_fetch blast convert_seq \ nmer_repeats objmgr gi2taxid netschedule grid netstorage igblast \ winmasker dustmasker segmasker blastdb vecscreen \ @@ -15,7 +15,7 @@ SUB_PROJ = asn2asn asn2fasta asn2flat asnval asn_cleanup \ srcchk tableval ncbi_encrypt ssub_fork asn_cache magicblast \ multipattern prt2fsm \ pub_report gff_deconcat sub_fuse \ - feat_import + feat_import EXPENDABLE_SUB_PROJ = split_cache wig2table netcache rmblastn dblb tls idfetch pubseq_gateway diff --git a/c++/src/app/blast/CMakeLists.rpsblast.app.txt b/c++/src/app/blast/CMakeLists.rpsblast.app.txt index 8baf4a1a..9e28aa96 100644 --- a/c++/src/app/blast/CMakeLists.rpsblast.app.txt +++ b/c++/src/app/blast/CMakeLists.rpsblast.app.txt @@ -1,9 +1,9 @@ ############################################################################# -# $Id: CMakeLists.rpsblast.app.txt 593591 2019-09-20 14:53:34Z gouriano $ +# $Id: CMakeLists.rpsblast.app.txt 615197 2020-08-28 04:31:45Z fukanchi $ ############################################################################# NCBI_begin_app(rpsblast) - NCBI_sources(rpsblast_app) + NCBI_sources(rpsblast_node rpsblast_app) NCBI_add_definitions(NCBI_MODULE=BLAST) NCBI_uses_toolkit_libraries(blast_app_util) NCBI_requires(-Cygwin) diff --git a/c++/src/app/blast/CMakeLists.rpstblastn.app.txt b/c++/src/app/blast/CMakeLists.rpstblastn.app.txt index b87f1f73..de8d7b0c 100644 --- a/c++/src/app/blast/CMakeLists.rpstblastn.app.txt +++ b/c++/src/app/blast/CMakeLists.rpstblastn.app.txt @@ -1,9 +1,9 @@ ############################################################################# -# $Id: CMakeLists.rpstblastn.app.txt 593591 2019-09-20 14:53:34Z gouriano $ +# $Id: CMakeLists.rpstblastn.app.txt 615200 2020-08-28 04:32:09Z fukanchi $ ############################################################################# NCBI_begin_app(rpstblastn) - NCBI_sources(rpstblastn_app) + NCBI_sources(rpstblastn_node rpstblastn_app) NCBI_add_definitions(NCBI_MODULE=BLAST) NCBI_uses_toolkit_libraries(blast_app_util) NCBI_requires(-Cygwin) diff --git a/c++/src/app/blast/Makefile.rpsblast.app b/c++/src/app/blast/Makefile.rpsblast.app index 5cd6a080..3ffc95ee 100644 --- a/c++/src/app/blast/Makefile.rpsblast.app +++ b/c++/src/app/blast/Makefile.rpsblast.app @@ -1,7 +1,7 @@ WATCHERS = camacho madden fongah2 APP = rpsblast -SRC = rpsblast_app +SRC = rpsblast_node rpsblast_app LIB_ = $(BLAST_INPUT_LIBS) $(BLAST_LIBS) xregexp $(PCRE_LIB) $(OBJMGR_LIBS) LIB = blast_app_util $(LIB_:%=%$(STATIC)) diff --git a/c++/src/app/blast/Makefile.rpstblastn.app b/c++/src/app/blast/Makefile.rpstblastn.app index c5b6fa13..4c0df466 100644 --- a/c++/src/app/blast/Makefile.rpstblastn.app +++ b/c++/src/app/blast/Makefile.rpstblastn.app @@ -1,7 +1,7 @@ WATCHERS = camacho madden fongah2 APP = rpstblastn -SRC = rpstblastn_app +SRC = rpstblastn_node rpstblastn_app LIB_ = $(BLAST_INPUT_LIBS) $(BLAST_LIBS) xregexp $(PCRE_LIB) $(OBJMGR_LIBS) LIB = blast_app_util $(LIB_:%=%$(STATIC)) diff --git a/c++/src/app/blast/blast_app_util.cpp b/c++/src/app/blast/blast_app_util.cpp index 729f353f..b9eee79c 100644 --- a/c++/src/app/blast/blast_app_util.cpp +++ b/c++/src/app/blast/blast_app_util.cpp @@ -1,4 +1,4 @@ -/* $Id: blast_app_util.cpp 592833 2019-09-09 13:01:28Z fongah2 $ +/* $Id: blast_app_util.cpp 615351 2020-08-31 15:38:53Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -915,47 +915,6 @@ GetSubjectFile(const CArgs& args) return filename; } - -void CBlastAppDiagHandler::Post(const SDiagMessage & mess) -{ - if(m_handler != NULL) { - m_handler->Post(mess); - } - if(m_save) { - CRef d(new CBlast4_error); - string m; - mess.Write(m); - d->SetMessage(NStr::Sanitize(m)); - d->SetCode((int)mess.m_Severity); - { - DEFINE_STATIC_MUTEX(mx); - CMutexGuard guard(mx); - m_messages.push_back(d); - } - } -} - -void CBlastAppDiagHandler::ResetMessages() -{ - DEFINE_STATIC_MUTEX(mx); - CMutexGuard guard(mx); - m_messages.clear(); -} - -CBlastAppDiagHandler::~CBlastAppDiagHandler() -{ - if(m_handler) { - SetDiagHandler(m_handler); - m_handler = NULL; - } -} - -void CBlastAppDiagHandler::DoNotSaveMessages(void) -{ - m_save = false; - ResetMessages(); -} - void PrintErrorArchive(const CArgs & a, const list > & msg) { try { @@ -983,4 +942,41 @@ void QueryBatchCleanup() } +void LogQueryInfo(CBlastUsageReport & report, const CBlastInput & q_info) +{ + report.AddParam(CBlastUsageReport::eTotalQueryLength, q_info.GetTotalLengthProcessed()); + report.AddParam(CBlastUsageReport::eNumQueries, q_info.GetNumSeqsProcessed()); +} + + +void LogRPSBlastOptions(blast::CBlastUsageReport & report, const CBlastOptions & opt) +{ + report.AddParam(CBlastUsageReport::eProgram, Blast_ProgramNameFromType(opt.GetProgramType())); + report.AddParam(CBlastUsageReport::eEvalueThreshold, opt.GetEvalueThreshold()); + report.AddParam(CBlastUsageReport::eHitListSize, opt.GetHitlistSize()); + report.AddParam(CBlastUsageReport::eCompBasedStats, opt.GetCompositionBasedStats()); +} + +void LogRPSCmdOptions(blast::CBlastUsageReport & report, const CBlastAppArgs & args) +{ + if (args.GetBlastDatabaseArgs().NotEmpty() && + args.GetBlastDatabaseArgs()->GetSearchDatabase().NotEmpty() && + args.GetBlastDatabaseArgs()->GetSearchDatabase()->GetSeqDb().NotEmpty()) { + + CRef db = args.GetBlastDatabaseArgs()->GetSearchDatabase()->GetSeqDb(); + string db_name = db->GetDBNameList(); + int off = db_name.find_last_of(CFile::GetPathSeparator()); + if (off != -1) { + db_name.erase(0, off+1); + } + report.AddParam(CBlastUsageReport::eDBName, db_name); + report.AddParam(CBlastUsageReport::eDBLength, (Int8) db->GetTotalLength()); + report.AddParam(CBlastUsageReport::eDBNumSeqs, db->GetNumSeqs()); + report.AddParam(CBlastUsageReport::eDBDate, db->GetDate()); + } + + if(args.GetFormattingArgs().NotEmpty()){ + report.AddParam(CBlastUsageReport::eOutputFmt, args.GetFormattingArgs()->GetFormattedOutputChoice()); + } +} END_NCBI_SCOPE diff --git a/c++/src/app/blast/blast_app_util.hpp b/c++/src/app/blast/blast_app_util.hpp index ace25cbd..998b65af 100644 --- a/c++/src/app/blast/blast_app_util.hpp +++ b/c++/src/app/blast/blast_app_util.hpp @@ -1,4 +1,4 @@ -/* $Id: blast_app_util.hpp 570350 2018-09-07 12:47:53Z fongah2 $ +/* $Id: blast_app_util.hpp 615351 2020-08-31 15:38:53Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -48,6 +49,7 @@ #include #include // for CBlastFormatUtil #include // for SDataLoaderConfig +#include BEGIN_NCBI_SCOPE @@ -302,28 +304,6 @@ UseXInclude(const blast::CFormattingArgs & f, const string & s); string GetSubjectFile(const CArgs& args); -/// Class to capture message from diag handler -class CBlastAppDiagHandler : public CDiagHandler -{ -public: - /// Constructor - CBlastAppDiagHandler():m_handler(GetDiagHandler(true)), m_save (true) {} - /// Destructor - ~CBlastAppDiagHandler(); - /// Save and post diag message - virtual void Post (const SDiagMessage & mess); - /// Reset messgae buffer, erase all saved message - void ResetMessages(void); - /// Call to turn off saving diag message, discard all saved message - void DoNotSaveMessages(void); - /// Return list of saved diag messages - list > & GetMessages(void) { return m_messages;} -private : - CDiagHandler * m_handler; - list > m_messages; - bool m_save; -}; - /// Function to print blast archive with only error messages (search failed) /// to output stream /// @param a cmdline args [in] @@ -333,6 +313,12 @@ void PrintErrorArchive(const CArgs & a, const list > & msg); /// Clean up formatter scope and release void QueryBatchCleanup(); +void LogQueryInfo(blast::CBlastUsageReport & report, const blast::CBlastInput & q_info); + +/// Log blast usage opts for rpsblast apps +void LogRPSBlastOptions(blast::CBlastUsageReport & report, const blast::CBlastOptions & opt); +void LogRPSCmdOptions(blast::CBlastUsageReport & report, const blast::CBlastAppArgs & args); + END_NCBI_SCOPE #endif /* APP__BLAST_APP_UTIL__HPP */ diff --git a/c++/src/app/blast/blast_formatter.cpp b/c++/src/app/blast/blast_formatter.cpp index e8474b45..85e43bc5 100644 --- a/c++/src/app/blast/blast_formatter.cpp +++ b/c++/src/app/blast/blast_formatter.cpp @@ -1,4 +1,4 @@ -/* $Id: blast_formatter.cpp 591152 2019-08-12 11:18:21Z fongah2 $ +/* $Id: blast_formatter.cpp 616875 2020-09-22 13:14:55Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -40,6 +40,9 @@ #include #include #include +#include +#include +#include #include "blast_app_util.hpp" @@ -57,8 +60,18 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); - m_LoadFromArchive = false; + m_LoadFromArchive = false; + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blast_formatter"); + } + } + + ~CBlastFormatterApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } + private: /** @inheritDoc */ virtual void Init(); @@ -82,6 +95,8 @@ private: /// @param scope Scope object to add the sequence data to [in|out] SSeqLoc x_QueryBioseqToSSeqLoc(const CBioseq& bioseq, CRef scope); + void x_AddCmdOptions(); + /// Our link to the NCBI BLAST service CRef m_RmtBlast; @@ -90,6 +105,8 @@ private: /// Tracks whether results come from an archive file. bool m_LoadFromArchive; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; void CBlastFormatterApp::Init() @@ -237,6 +254,78 @@ s_ConvertSubjects2TSeqLocVector(CRef remote_blast) return retval; } +bool +s_InitializeSubject(CRef db_args, + CRef opts_hndl, + CRef& db_adapter, + CRef& scope) +{ + bool isRemote = false; + db_adapter.Reset(); + + _ASSERT(db_args.NotEmpty()); + CRef search_db = db_args->GetSearchDatabase(); + + if (scope.Empty()) { + scope.Reset(new CScope(*CObjectManager::GetInstance())); + } + + CRef subjects; + if ( (subjects = db_args->GetSubjects(scope)) ) { + _ASSERT(search_db.Empty()); + char* bl2seq_legacy = getenv("BL2SEQ_LEGACY"); + if (bl2seq_legacy) { + db_adapter.Reset(new CLocalDbAdapter(subjects, opts_hndl, false)); + } + else { + db_adapter.Reset(new CLocalDbAdapter(subjects, opts_hndl, true)); + } + } else { + _ASSERT(search_db.NotEmpty()); + try { + // Try to open the BLAST database even for remote searches, as if + // it is available locally, it will be better to fetch the + // sequence data for formatting from this (local) source + CRef seqdb = search_db->GetSeqDb(); + db_adapter.Reset(new CLocalDbAdapter(*search_db)); + scope->AddDataLoader(RegisterOMDataLoader(seqdb), CBlastDatabaseArgs::kSubjectsDataLoaderPriority); + LOG_POST(Info <<"Add local loader " << search_db->GetDatabaseName()); + } catch (const CSeqDBException&) { + SetDiagPostLevel(eDiag_Critical); + string remote_loader = kEmptyStr; + try { + db_adapter.Reset(new CLocalDbAdapter(*search_db)); + remote_loader = CRemoteBlastDbDataLoader::RegisterInObjectManager + (*( CObjectManager::GetInstance()), + search_db->GetDatabaseName(), + search_db->IsProtein() ? CBlastDbDataLoader::eProtein : CBlastDbDataLoader::eNucleotide, + true, CObjectManager::eDefault, CBlastDatabaseArgs::kSubjectsDataLoaderPriority) + .GetLoader()->GetName(); + scope->AddDataLoader(remote_loader, CBlastDatabaseArgs::kSubjectsDataLoaderPriority); + SetDiagPostLevel(eDiag_Warning); + isRemote = true; + LOG_POST(Info <<"Remote " << search_db->GetDatabaseName()); + } + catch (CException & e) { + SetDiagPostLevel(eDiag_Warning); + NCBI_THROW(CException, eUnknown, "Fail to initialize local or remote DB" ); + } + } + } + try { + const int kGenbankLoaderPriority = 99; + CRef reader(new CId2Reader); + reader->SetPreopenConnection(false); + string genbank_loader = CGBDataLoader::RegisterInObjectManager + (*( CObjectManager::GetInstance()), reader,CObjectManager::eNonDefault).GetLoader()->GetName(); + scope->AddDataLoader(genbank_loader, kGenbankLoaderPriority); + } catch (const CException& e) { + LOG_POST(Info << "Failed to add genbank dataloader"); + // It's ok not to have genbank loader + } + return isRemote; +} + int CBlastFormatterApp::PrintFormattedOutput(void) { int retval = 0; @@ -306,7 +395,7 @@ int CBlastFormatterApp::PrintFormattedOutput(void) } CRef db_adapter; - InitializeSubject(db_args, opts_handle, true, db_adapter, scope); + bool isRemoteLoader = s_InitializeSubject(db_args, opts_handle, db_adapter, scope); const string kTask = m_RmtBlast->GetTask(); @@ -323,7 +412,7 @@ int CBlastFormatterApp::PrintFormattedOutput(void) opts.GetQueryGeneticCode(), opts.GetDbGeneticCode(), opts.GetSumStatisticsMode(), - !kRid.empty(), + (!kRid.empty() || isRemoteLoader), filtering_algorithm, fmt_args.GetCustomOutputFormatSpec(), kTask == "megablast", @@ -396,7 +485,7 @@ int CBlastFormatterApp::PrintFormattedOutput(void) else { scope->AddScope(*(queries->GetScope(0))); } - InitializeSubject(db_args, opts_handle, true, db_adapter, scope); + s_InitializeSubject(db_args, opts_handle, db_adapter, scope); } } formatter.PrintEpilog(opts); @@ -475,9 +564,26 @@ int CBlastFormatterApp::Run(void) } } CATCH_ALL(status) + x_AddCmdOptions(); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } +void CBlastFormatterApp::x_AddCmdOptions() +{ + const CArgs & args = GetArgs(); + if (args[kArgRid].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eRIDInput, args[kArgRid].AsString()); + } + else if (args[kArgArchive].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eArchiveInput, true); + } + + if(args["outfmt"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString()); + } +} + #ifndef SKIP_DOXYGEN_PROCESSING int main(int argc, const char* argv[] /*, const char* envp[]*/) diff --git a/c++/src/app/blast/blastn_app.cpp b/c++/src/app/blast/blastn_app.cpp index ef3e0d61..821d9ccf 100644 --- a/c++/src/app/blast/blastn_app.cpp +++ b/c++/src/app/blast/blastn_app.cpp @@ -1,4 +1,4 @@ -/* $Id: blastn_app.cpp 574693 2018-11-16 17:46:37Z zaretska $ +/* $Id: blastn_app.cpp 615344 2020-08-31 15:37:55Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -55,6 +55,14 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + } + } + + ~CBlastnApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: /** @inheritDoc */ @@ -64,6 +72,8 @@ private: /// This application's command line args CRef m_CmdLineArgs; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; void CBlastnApp::Init() @@ -111,6 +121,7 @@ int CBlastnApp::Run(void) /*** Get the query sequence(s) ***/ CRef query_opts = m_CmdLineArgs->GetQueryOptionsArgs(); + SDataLoaderConfig dlconfig = InitializeQueryDataLoaderConfiguration(query_opts->QueryIsProtein(), db_adapter); @@ -223,12 +234,17 @@ int CBlastnApp::Run(void) opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1); } + LogQueryInfo(m_UsageReport, input); + formatter.LogBlastSearchInfo(m_UsageReport); } CATCH_ALL(status) if(!bah.GetMessages().empty()) { const CArgs & a = GetArgs(); PrintErrorArchive(a, bah.GetMessages()); } + + m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads()); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } diff --git a/c++/src/app/blast/blastp_app.cpp b/c++/src/app/blast/blastp_app.cpp index 4be0f568..36ff687e 100644 --- a/c++/src/app/blast/blastp_app.cpp +++ b/c++/src/app/blast/blastp_app.cpp @@ -1,4 +1,4 @@ -/* $Id: blastp_app.cpp 574693 2018-11-16 17:46:37Z zaretska $ +/* $Id: blastp_app.cpp 616355 2020-09-15 12:19:36Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -55,6 +55,14 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + } + } + + ~CBlastpApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: /** @inheritDoc */ @@ -64,6 +72,8 @@ private: /// This application's command line args CRef m_CmdLineArgs; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; void CBlastpApp::Init() @@ -202,11 +212,16 @@ int CBlastpApp::Run(void) opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1); } + LogQueryInfo(m_UsageReport, input); + formatter.LogBlastSearchInfo(m_UsageReport); } CATCH_ALL(status) if(!bah.GetMessages().empty()) { const CArgs & a = GetArgs(); PrintErrorArchive(a, bah.GetMessages()); } + + m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads()); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } diff --git a/c++/src/app/blast/blastx_app.cpp b/c++/src/app/blast/blastx_app.cpp index 3e4e9eeb..c6d82121 100644 --- a/c++/src/app/blast/blastx_app.cpp +++ b/c++/src/app/blast/blastx_app.cpp @@ -1,4 +1,4 @@ -/* $Id: blastx_app.cpp 574693 2018-11-16 17:46:37Z zaretska $ +/* $Id: blastx_app.cpp 615342 2020-08-31 15:37:39Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -55,7 +55,15 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + } } + + ~CBlastxApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); + } private: /** @inheritDoc */ virtual void Init(); @@ -64,6 +72,8 @@ private: /// This application's command line args CRef m_CmdLineArgs; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; void CBlastxApp::Init() @@ -202,11 +212,15 @@ int CBlastxApp::Run(void) opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1); } + LogQueryInfo(m_UsageReport, input); + formatter.LogBlastSearchInfo(m_UsageReport); } CATCH_ALL(status) if(!bah.GetMessages().empty()) { const CArgs & a = GetArgs(); PrintErrorArchive(a, bah.GetMessages()); } + m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads()); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } diff --git a/c++/src/app/blast/deltablast_app.cpp b/c++/src/app/blast/deltablast_app.cpp index e8a39e90..20adaaa5 100644 --- a/c++/src/app/blast/deltablast_app.cpp +++ b/c++/src/app/blast/deltablast_app.cpp @@ -1,4 +1,4 @@ -/* $Id: deltablast_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $ +/* $Id: deltablast_app.cpp 615345 2020-08-31 15:38:03Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -63,6 +63,13 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + } + } + ~CDeltaBlastApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: /** @inheritDoc */ @@ -100,6 +107,8 @@ private: CRef m_AncillaryData; CBlastAppDiagHandler m_bah; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; void CDeltaBlastApp::Init() @@ -440,11 +449,15 @@ int CDeltaBlastApp::Run(void) opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1); } + LogQueryInfo(m_UsageReport, input); + formatter.LogBlastSearchInfo(m_UsageReport); } CATCH_ALL(status) if(!m_bah.GetMessages().empty()) { const CArgs & a = GetArgs(); PrintErrorArchive(a, m_bah.GetMessages()); } + m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads()); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } diff --git a/c++/src/app/blast/get_species_taxids.sh b/c++/src/app/blast/get_species_taxids.sh index 10275b8f..60995cd7 100755 --- a/c++/src/app/blast/get_species_taxids.sh +++ b/c++/src/app/blast/get_species_taxids.sh @@ -1,5 +1,5 @@ #!/bin/bash -# $Id: get_species_taxids.sh 588462 2019-06-24 18:46:42Z camacho $ +# $Id: get_species_taxids.sh 617228 2020-09-28 18:26:52Z ivanov $ # =========================================================================== # # PUBLIC DOMAIN NOTICE @@ -141,8 +141,9 @@ if [ ! -z "${NAME}" ]; then error_exit "esummary error" $? fi + sed -i 's/,\|{/\n/g' $TMP grep 'uid\|rank\|division\|scientificname\|commonname' $TMP | \ - grep -v "uids\|genbankdivision" | tr -d '"\|,' | tr -s ' ' | \ + grep -v "uids\|genbankdivision" | tr '"\|,' " " | tr -s ' ' | \ sed 's/uid/\nTaxid/g;s/name/ name/g' > $OUTPUT echo -e "\n$NUM_RESULTS matche(s) found.\n" >> $OUTPUT diff --git a/c++/src/app/blast/legacy_blast.pl b/c++/src/app/blast/legacy_blast.pl index 6422cf7e..876a1ca5 100755 --- a/c++/src/app/blast/legacy_blast.pl +++ b/c++/src/app/blast/legacy_blast.pl @@ -1,5 +1,5 @@ -#! /usr/bin/perl -w -# $Id: legacy_blast.pl 195935 2010-06-28 20:32:08Z camacho $ +#! /usr/bin/env perl +# $Id: legacy_blast.pl 609147 2020-05-27 11:52:21Z ivanov $ # =========================================================================== # # PUBLIC DOMAIN NOTICE @@ -68,7 +68,7 @@ if ($application eq "blastall") { } elsif ($application eq "seedtop") { $cmd = &handle_seedtop(\$print_only); } elsif ($application =~ /version/) { - my $revision = '$Revision: 195935 $'; + my $revision = '$Revision: 609147 $'; $revision =~ s/\$Revision: | \$//g; print "$0 version $revision\n"; goto CLEAN_UP; diff --git a/c++/src/app/blast/psiblast_app.cpp b/c++/src/app/blast/psiblast_app.cpp index f80fd71a..96a6cabb 100644 --- a/c++/src/app/blast/psiblast_app.cpp +++ b/c++/src/app/blast/psiblast_app.cpp @@ -1,4 +1,4 @@ -/* $Id: psiblast_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $ +/* $Id: psiblast_app.cpp 617621 2020-10-05 13:24:26Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -62,6 +62,13 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + } + } + ~CPsiBlastApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: /** @inheritDoc */ @@ -113,6 +120,8 @@ private: CConstRef m_AncillaryData; CBlastAppDiagHandler m_bah; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; void CPsiBlastApp::Init() @@ -368,6 +377,7 @@ CPsiBlastApp::DoIterations(CRef opts_hndl, retval = x_RunLocalPsiBlastIterations(query, pssm, scope, db_adapter, opts_hndl, formatter, kNumIterations); } + m_UsageReport.AddParam(CBlastUsageReport::eConverged, retval); return retval; } @@ -459,6 +469,7 @@ int CPsiBlastApp::Run(void) _TRACE("PSI-BLAST running with FASTA input"); } else { _TRACE("PSI-BLAST running with PSSM input"); + m_UsageReport.AddParam(CBlastUsageReport::ePSSMInput, true); } /*** Get the formatting options ***/ @@ -552,12 +563,19 @@ int CPsiBlastApp::Run(void) if (m_CmdLineArgs->ProduceDebugOutput()) opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1); + if(input) { + LogQueryInfo(m_UsageReport, *input); + } + + formatter.LogBlastSearchInfo(m_UsageReport); } CATCH_ALL(status) if(!m_bah.GetMessages().empty()) { const CArgs & a = GetArgs(); PrintErrorArchive(a, m_bah.GetMessages()); } + m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads()); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } diff --git a/c++/src/app/blast/rpsblast_app.cpp b/c++/src/app/blast/rpsblast_app.cpp index 32dd5a32..7814eac3 100644 --- a/c++/src/app/blast/rpsblast_app.cpp +++ b/c++/src/app/blast/rpsblast_app.cpp @@ -1,4 +1,4 @@ -/* $Id: rpsblast_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $ +/* $Id: rpsblast_app.cpp 615351 2020-08-31 15:38:53Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -40,6 +40,7 @@ #include #include #include "blast_app_util.hpp" +#include "rpsblast_node.hpp" #include #include @@ -57,6 +58,13 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + } + } + ~CRPSBlastApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: /** @inheritDoc */ @@ -64,8 +72,13 @@ private: /** @inheritDoc */ virtual int Run(); + int x_RunMTBySplitDB(); + int x_RunMTBySplitQuery(); + /// This application's command line args CRef m_CmdLineArgs; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; void CRPSBlastApp::Init() @@ -81,6 +94,18 @@ void CRPSBlastApp::Init() } int CRPSBlastApp::Run(void) +{ + const CArgs& args = GetArgs(); + if ((args[kArgMTMode].AsInteger() == 0) || (args[kArgNumThreads].AsInteger() <= 1)){ + return x_RunMTBySplitDB(); + } + else { + m_UsageReport.AddParam(CBlastUsageReport::eMTMode, args[kArgMTMode].AsInteger()); + return x_RunMTBySplitQuery(); + } +} + +int CRPSBlastApp::x_RunMTBySplitDB(void) { int status = BLAST_EXIT_SUCCESS; CBlastAppDiagHandler bah; @@ -205,14 +230,91 @@ int CRPSBlastApp::Run(void) opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1); } + LogQueryInfo(m_UsageReport, input); + formatter.LogBlastSearchInfo(m_UsageReport); } CATCH_ALL(status) if(!bah.GetMessages().empty()) { const CArgs & a = GetArgs(); PrintErrorArchive(a, bah.GetMessages()); } + m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads()); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } + +int CRPSBlastApp::x_RunMTBySplitQuery(void) +{ + int status = BLAST_EXIT_SUCCESS; + CBlastAppDiagHandler bah; + int batch_size = 3600; + + char * mt_query_batch_env = getenv("BLAST_MT_QUERY_BATCH_SIZE"); + if (mt_query_batch_env) { + batch_size = NStr::StringToInt(mt_query_batch_env); + } + cerr << "Batch Size: " << batch_size << endl; + // Allow the fasta reader to complain on invalid sequence input + SetDiagPostLevel(eDiag_Warning); + SetDiagPostPrefix("rpsblast"); + SetDiagHandler(&bah, false); + + try { + const CArgs& args = GetArgs(); + const int kMaxNumOfThreads = args[kArgNumThreads].AsInteger(); + CRef opts_hndl; + if(RecoverSearchStrategy(args, m_CmdLineArgs)) { + opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args)); + } + else { + opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args)); + } + if(IsIStreamEmpty(m_CmdLineArgs->GetInputStream())){ + ERR_POST(Warning << "Query is Empty!"); + return BLAST_EXIT_SUCCESS; + } + CNcbiOstream & out_stream = m_CmdLineArgs->GetOutputStream(); + CBlastMasterNode master_node(out_stream, kMaxNumOfThreads); + int chunk_num = 0; + + LogRPSBlastOptions(m_UsageReport, opts_hndl->GetOptions()); + LogRPSCmdOptions(m_UsageReport, *m_CmdLineArgs); + CBlastNodeInputReader input(m_CmdLineArgs->GetInputStream(), batch_size, 360); + while (master_node.Processing()) { + if (!input.AtEOF()) { + if (!master_node.IsFull()) { + string qb; + int q_index = 0; + int num_q = input.GetQueryBatch(qb, q_index); + if (num_q > 0) { + CBlastNodeMailbox * mb(new CBlastNodeMailbox(chunk_num, master_node.GetBuzzer())); + CRPSBlastNode * t(new CRPSBlastNode(chunk_num, GetArguments(), args, bah, qb, q_index, num_q, mb)); + master_node.RegisterNode(t, mb); + chunk_num ++; + } + } + } + else { + master_node.Shutdown(); + m_UsageReport.AddParam(CBlastUsageReport::eNumQueries, master_node.GetNumOfQueries()); + m_UsageReport.AddParam(CBlastUsageReport::eTotalQueryLength, master_node.GetQueriesLength()); + m_UsageReport.AddParam(CBlastUsageReport::eNumErrStatus, master_node.GetNumErrStatus()); + m_UsageReport.AddParam(CBlastUsageReport::eNumQueryBatches, chunk_num); + } + } + + } CATCH_ALL (status) + + if(!bah.GetMessages().empty()) { + const CArgs & a = GetArgs(); + PrintErrorArchive(a, bah.GetMessages()); + } + m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads()); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); + return status; + +} + #ifndef SKIP_DOXYGEN_PROCESSING int main(int argc, const char* argv[] /*, const char* envp[]*/) { diff --git a/c++/src/app/blast/rpsblast_node.cpp b/c++/src/app/blast/rpsblast_node.cpp new file mode 100644 index 00000000..2a3c9a11 --- /dev/null +++ b/c++/src/app/blast/rpsblast_node.cpp @@ -0,0 +1,210 @@ +/* $Id: + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Authors: Amelia Fong + * + */ + +/** @file rpsblast_node.cpp + * RPSBLAST node api + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "blast_app_util.hpp" +#include "rpsblast_node.hpp" +#include +#include + +#ifndef SKIP_DOXYGEN_PROCESSING +USING_NCBI_SCOPE; +USING_SCOPE(blast); +USING_SCOPE(objects); +#endif + +CRPSBlastNode::CRPSBlastNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args, + CBlastAppDiagHandler & bah, const string & input, + int query_index, int num_queries, CBlastNodeMailbox * mailbox): + CBlastNode(node_num, ncbi_args, args, bah, eRPSBlast, query_index, num_queries, mailbox), m_Input(input) +{ + m_CmdLineArgs.Reset(new CRPSBlastNodeArgs(m_Input)); + SetState(eInitialized); + SendMsg(CBlastNodeMsg::eRunRequest, (void*) this); +} + +int CRPSBlastNode::GetBlastResults(string & results) +{ + if(GetState() == eDone) { + results = CNcbiOstrstreamToString(m_CmdLineArgs->GetOutputStrStream()); + return GetStatus(); + } + return -1; +} + +CRPSBlastNode::~CRPSBlastNode() +{ + m_CmdLineArgs.Reset(); +} + +void * +CRPSBlastNode::Main() +{ + int status = BLAST_EXIT_SUCCESS; + CBlastAppDiagHandler & bah = GetDiagHandler(); + SetDiagPostPrefix(GetNodeIdStr().c_str()); + + SetState(eRunning); + try { + const CArgs& args = GetArgs(); + CRef opts_hndl; + if(RecoverSearchStrategy(args, m_CmdLineArgs)) { + opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args)); + } + else { + opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args)); + } + + CheckForFreqRatioFile(m_CmdLineArgs->GetBlastDatabaseArgs()->GetDatabaseName(), + opts_hndl, true); + const CBlastOptions& opt = opts_hndl->GetOptions(); + + /*** Initialize the database ***/ + CRef db_args(m_CmdLineArgs->GetBlastDatabaseArgs()); + CRef db_adapter; + CRef scope; + InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(), + db_adapter, scope); + _ASSERT(db_adapter && scope); + + /*** Get the query sequence(s) ***/ + CRef query_opts = + m_CmdLineArgs->GetQueryOptionsArgs(); + SDataLoaderConfig dlconfig = + InitializeQueryDataLoaderConfiguration(query_opts->QueryIsProtein(), + db_adapter); + CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(), + query_opts->UseLowercaseMasks(), + query_opts->GetParseDeflines(), + query_opts->GetRange()); + CBlastFastaInputSource fasta(m_CmdLineArgs->GetInputStream(), iconfig); + CBlastInput input(&fasta, m_CmdLineArgs->GetQueryBatchSize()); + + /*** Get the formatting options ***/ + CRef fmt_args(m_CmdLineArgs->GetFormattingArgs()); + bool isArchiveFormat = fmt_args->ArchiveFormatRequested(args); + if(!isArchiveFormat) { + bah.DoNotSaveMessages(); + } + CBlastFormat formatter(opt, *db_adapter, + fmt_args->GetFormattedOutputChoice(), + query_opts->GetParseDeflines(), + m_CmdLineArgs->GetOutputStream(), + fmt_args->GetNumDescriptions(), + fmt_args->GetNumAlignments(), + *scope, + opt.GetMatrixName(), + fmt_args->ShowGis(), + fmt_args->DisplayHtmlOutput(), + opt.GetQueryGeneticCode(), + opt.GetDbGeneticCode(), + opt.GetSumStatisticsMode(), + m_CmdLineArgs->ExecuteRemotely(), + db_adapter->GetFilteringAlgorithm(), + fmt_args->GetCustomOutputFormatSpec(), + false, false, NULL, NULL, + GetCmdlineArgs(GetArguments())); + + formatter.SetQueryRange(query_opts->GetRange()); + formatter.SetLineLength(fmt_args->GetLineLength()); + if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) { + formatter.SetBaseFile(args[kArgOutput].AsString()); + } + formatter.PrintProlog(); + + /*** Process the input ***/ + for (; !input.End(); formatter.ResetScopeHistory(), QueryBatchCleanup()) { + + CRef query_batch(input.GetNextSeqBatch(*scope)); + CRef queries(new CObjMgr_QueryFactory(*query_batch)); + + SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl); + + CRef results; + + if (m_CmdLineArgs->ExecuteRemotely()) + { + CRef rmt_blast = + InitializeRemoteBlast(queries, db_args, opts_hndl, + m_CmdLineArgs->ProduceDebugRemoteOutput(), + m_CmdLineArgs->GetClientId()); + results = rmt_blast->GetResultSet(); + } + else + { + CLocalRPSBlast local_search (query_batch, db_args->GetDatabaseName(), opts_hndl, 1); + results = local_search.Run(); + } + + if (fmt_args->ArchiveFormatRequested(args)) { + formatter.WriteArchive(*queries, *opts_hndl, *results, 0, bah.GetMessages()); + bah.ResetMessages(); + } else { + BlastFormatter_PreFetchSequenceData(*results, scope, + fmt_args->GetFormattedOutputChoice()); + ITERATE(CSearchResultSet, result, *results) { + formatter.PrintOneResultSet(**result, query_batch); + } + } + } + + formatter.PrintEpilog(opt); + + if (m_CmdLineArgs->ProduceDebugOutput()) { + opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1); + } + SetQueriesLength(input.GetTotalLengthProcessed()); + } CATCH_ALL(status) + + SetStatus(status); + if (status == BLAST_EXIT_SUCCESS) { + SetState(eDone); + SendMsg(CBlastNodeMsg::ePostResult, (void *) this); + + } + else { + SetState(eError); + SendMsg(CBlastNodeMsg::eErrorExit, (void *) this); + + } + + return NULL; +} + diff --git a/c++/src/app/blast/rpsblast_node.hpp b/c++/src/app/blast/rpsblast_node.hpp new file mode 100644 index 00000000..112a7a2e --- /dev/null +++ b/c++/src/app/blast/rpsblast_node.hpp @@ -0,0 +1,62 @@ +/* $Id: + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Authors: Amelia Fong + * + */ + +/** @file rpsblast_node.hpp + * RPSBLAST node api + */ + +#ifndef APP__RPSBLAST_NODE__HPP +#define APP__RPSBLAST_NODE__HPP + +#include +#include + +BEGIN_NCBI_SCOPE +BEGIN_SCOPE(blast) + +class CRPSBlastNode : public CBlastNode +{ +public : + + CRPSBlastNode (int check_num, const CNcbiArguments & ncbi_args, const CArgs& args, + CBlastAppDiagHandler & bah, const string & input, + int query_index, int num_queries, CBlastNodeMailbox * mailbox = NULL); + virtual int GetBlastResults(string & results); +protected: + virtual ~CRPSBlastNode(void); + virtual void* Main(void); +private: + string m_Input; + CRef m_CmdLineArgs; +}; + +END_SCOPE(blast) +END_NCBI_SCOPE + +#endif /* APP__RPSBLAST_NODE__HPP */ diff --git a/c++/src/app/blast/rpstblastn_app.cpp b/c++/src/app/blast/rpstblastn_app.cpp index 8dafa55e..1f9ddd4e 100644 --- a/c++/src/app/blast/rpstblastn_app.cpp +++ b/c++/src/app/blast/rpstblastn_app.cpp @@ -1,4 +1,4 @@ -/* $Id: rpstblastn_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $ +/* $Id: rpstblastn_app.cpp 615352 2020-08-31 15:39:03Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -40,6 +40,7 @@ #include #include #include "blast_app_util.hpp" +#include "rpstblastn_node.hpp" #include #include @@ -57,6 +58,13 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + } + } + ~CRPSTBlastnApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: /** @inheritDoc */ @@ -64,8 +72,13 @@ private: /** @inheritDoc */ virtual int Run(); + int x_RunMTBySplitDB(); + int x_RunMTBySplitQuery(); + /// This application's command line args CRef m_CmdLineArgs; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; void CRPSTBlastnApp::Init() @@ -80,7 +93,20 @@ void CRPSTBlastnApp::Init() SetupArgDescriptions(m_CmdLineArgs->SetCommandLine()); } + int CRPSTBlastnApp::Run(void) +{ + const CArgs& args = GetArgs(); + if ((args[kArgMTMode].AsInteger() == 0) || (args[kArgNumThreads].AsInteger() <= 1)){ + return x_RunMTBySplitDB(); + } + else { + m_UsageReport.AddParam(CBlastUsageReport::eMTMode, args[kArgMTMode].AsInteger()); + return x_RunMTBySplitQuery(); + } +} + +int CRPSTBlastnApp::x_RunMTBySplitDB(void) { int status = BLAST_EXIT_SUCCESS; CBlastAppDiagHandler bah; @@ -198,14 +224,91 @@ int CRPSTBlastnApp::Run(void) opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1); } + LogQueryInfo(m_UsageReport, input); + formatter.LogBlastSearchInfo(m_UsageReport); } CATCH_ALL(status) if(!bah.GetMessages().empty()) { const CArgs & a = GetArgs(); PrintErrorArchive(a, bah.GetMessages()); } + m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads()); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); + return status; +} + +int CRPSTBlastnApp::x_RunMTBySplitQuery(void) +{ + int status = BLAST_EXIT_SUCCESS; + CBlastAppDiagHandler bah; + int batch_size = 8000; + + char * mt_query_batch_env = getenv("BLAST_MT_QUERY_BATCH_SIZE"); + if (mt_query_batch_env) { + batch_size = NStr::StringToInt(mt_query_batch_env); + } + cerr << "Batch Size: " << batch_size << endl; + // Allow the fasta reader to complain on invalid sequence input + SetDiagPostLevel(eDiag_Warning); + SetDiagPostPrefix("rpstblastn_mt"); + SetDiagHandler(&bah, false); + + try { + const CArgs& args = GetArgs(); + const int kMaxNumOfThreads = args[kArgNumThreads].AsInteger(); + CRef opts_hndl; + if(RecoverSearchStrategy(args, m_CmdLineArgs)) { + opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args)); + } + else { + opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args)); + } + if(IsIStreamEmpty(m_CmdLineArgs->GetInputStream())){ + ERR_POST(Warning << "Query is Empty!"); + return BLAST_EXIT_SUCCESS; + } + CNcbiOstream & out_stream = m_CmdLineArgs->GetOutputStream(); + CBlastMasterNode master_node(out_stream, kMaxNumOfThreads); + int chunk_num = 0; + + LogRPSBlastOptions(m_UsageReport, opts_hndl->GetOptions()); + LogRPSCmdOptions(m_UsageReport, *m_CmdLineArgs); + CBlastNodeInputReader input(m_CmdLineArgs->GetInputStream(), batch_size, 4500); + while (master_node.Processing()) { + if (!input.AtEOF()) { + if (!master_node.IsFull()) { + int q_index = 0; + string qb; + int num_q = input.GetQueryBatch(qb, q_index); + if (num_q > 0) { + CBlastNodeMailbox * mb(new CBlastNodeMailbox(chunk_num, master_node.GetBuzzer())); + CRPSTBlastnNode * t(new CRPSTBlastnNode(chunk_num, GetArguments(), args, bah, qb, q_index, num_q, mb)); + master_node.RegisterNode(t, mb); + chunk_num ++; + } + } + } + else { + master_node.Shutdown(); + m_UsageReport.AddParam(CBlastUsageReport::eNumQueries, master_node.GetNumOfQueries()); + m_UsageReport.AddParam(CBlastUsageReport::eTotalQueryLength, master_node.GetQueriesLength()); + m_UsageReport.AddParam(CBlastUsageReport::eNumErrStatus, master_node.GetNumErrStatus()); + m_UsageReport.AddParam(CBlastUsageReport::eNumQueryBatches, chunk_num); + } + + } + + } CATCH_ALL (status) + + if(!bah.GetMessages().empty()) { + const CArgs & a = GetArgs(); + PrintErrorArchive(a, bah.GetMessages()); + } + m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads()); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } + #ifndef SKIP_DOXYGEN_PROCESSING int main(int argc, const char* argv[] /*, const char* envp[]*/) { diff --git a/c++/src/app/blast/rpstblastn_node.cpp b/c++/src/app/blast/rpstblastn_node.cpp new file mode 100644 index 00000000..860f30ca --- /dev/null +++ b/c++/src/app/blast/rpstblastn_node.cpp @@ -0,0 +1,209 @@ +/* $Id: + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Authors: Amelia Fong + * + */ + +/** @file rpstblastn_node.cpp + * RPSTBLASTN MT command line application + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "blast_app_util.hpp" +#include "rpstblastn_node.hpp" +#include + +#ifndef SKIP_DOXYGEN_PROCESSING +USING_NCBI_SCOPE; +USING_SCOPE(blast); +USING_SCOPE(objects); +#endif + +CRPSTBlastnNode::CRPSTBlastnNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args, + CBlastAppDiagHandler & bah, const string & input, + int query_index, int num_queries,CBlastNodeMailbox * mailbox): + CBlastNode(node_num, ncbi_args, args, bah, eRPSTblastn, query_index, num_queries, mailbox), m_Input(input) +{ + m_CmdLineArgs.Reset(new CRPSTBlastnNodeArgs(m_Input)); + SetState(eInitialized); + SendMsg(CBlastNodeMsg::eRunRequest, (void*) this); +} + +int CRPSTBlastnNode::GetBlastResults(string & results) +{ + if(GetState() == eDone) { + results = CNcbiOstrstreamToString(m_CmdLineArgs->GetOutputStrStream()); + return GetStatus(); + } + return -1; +} + +CRPSTBlastnNode::~CRPSTBlastnNode() +{ + m_CmdLineArgs.Reset(); +} + +void * +CRPSTBlastnNode::Main() +{ + int status = BLAST_EXIT_SUCCESS; + CBlastAppDiagHandler & bah = GetDiagHandler(); + SetDiagPostPrefix(GetNodeIdStr().c_str()); + + SetState(eRunning); + try { + const CArgs& args = GetArgs(); + CRef opts_hndl; + if(RecoverSearchStrategy(args, m_CmdLineArgs)) { + opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args)); + } + else { + opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args)); + } + + CheckForFreqRatioFile(m_CmdLineArgs->GetBlastDatabaseArgs()->GetDatabaseName(), + opts_hndl, true); + const CBlastOptions& opt = opts_hndl->GetOptions(); + + /*** Initialize the database ***/ + CRef db_args(m_CmdLineArgs->GetBlastDatabaseArgs()); + CRef db_adapter; + CRef scope; + InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(), + db_adapter, scope); + _ASSERT(db_adapter && scope); + + /*** Get the query sequence(s) ***/ + CRef query_opts = + m_CmdLineArgs->GetQueryOptionsArgs(); + SDataLoaderConfig dlconfig = + InitializeQueryDataLoaderConfiguration(query_opts->QueryIsProtein(), + db_adapter); + CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(), + query_opts->UseLowercaseMasks(), + query_opts->GetParseDeflines(), + query_opts->GetRange()); + CBlastFastaInputSource fasta(m_CmdLineArgs->GetInputStream(), iconfig); + CBlastInput input(&fasta, m_CmdLineArgs->GetQueryBatchSize()); + + /*** Get the formatting options ***/ + CRef fmt_args(m_CmdLineArgs->GetFormattingArgs()); + bool isArchiveFormat = fmt_args->ArchiveFormatRequested(args); + if(!isArchiveFormat) { + bah.DoNotSaveMessages(); + } + CBlastFormat formatter(opt, *db_adapter, + fmt_args->GetFormattedOutputChoice(), + query_opts->GetParseDeflines(), + m_CmdLineArgs->GetOutputStream(), + fmt_args->GetNumDescriptions(), + fmt_args->GetNumAlignments(), + *scope, + opt.GetMatrixName(), + fmt_args->ShowGis(), + fmt_args->DisplayHtmlOutput(), + opt.GetQueryGeneticCode(), + opt.GetDbGeneticCode(), + opt.GetSumStatisticsMode(), + m_CmdLineArgs->ExecuteRemotely(), + db_adapter->GetFilteringAlgorithm(), + fmt_args->GetCustomOutputFormatSpec(), + false, false, NULL, NULL, + GetCmdlineArgs(GetArguments())); + + formatter.SetQueryRange(query_opts->GetRange()); + formatter.SetLineLength(fmt_args->GetLineLength()); + if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) { + formatter.SetBaseFile(args[kArgOutput].AsString()); + } + formatter.PrintProlog(); + + /*** Process the input ***/ + for (; !input.End(); formatter.ResetScopeHistory(), QueryBatchCleanup()) { + + CRef query_batch(input.GetNextSeqBatch(*scope)); + CRef queries(new CObjMgr_QueryFactory(*query_batch)); + + SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl); + + CRef results; + + if (m_CmdLineArgs->ExecuteRemotely()) + { + CRef rmt_blast = + InitializeRemoteBlast(queries, db_args, opts_hndl, + m_CmdLineArgs->ProduceDebugRemoteOutput(), + m_CmdLineArgs->GetClientId()); + results = rmt_blast->GetResultSet(); + } + else + { + CLocalRPSBlast local_search (query_batch, db_args->GetDatabaseName(), opts_hndl, 1); + results = local_search.Run(); + } + + if (fmt_args->ArchiveFormatRequested(args)) { + formatter.WriteArchive(*queries, *opts_hndl, *results, 0, bah.GetMessages()); + bah.ResetMessages(); + } else { + BlastFormatter_PreFetchSequenceData(*results, scope, + fmt_args->GetFormattedOutputChoice()); + ITERATE(CSearchResultSet, result, *results) { + formatter.PrintOneResultSet(**result, query_batch); + } + } + } + + formatter.PrintEpilog(opt); + + if (m_CmdLineArgs->ProduceDebugOutput()) { + opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1); + } + + SetQueriesLength(input.GetTotalLengthProcessed()); + } CATCH_ALL(status) + + SetStatus(status); + if (status == BLAST_EXIT_SUCCESS) { + SetState(eDone); + SendMsg(CBlastNodeMsg::ePostResult, (void *) this); + + } + else { + SetState(eError); + SendMsg(CBlastNodeMsg::eErrorExit, (void *) this); + + } + + return NULL; +} diff --git a/c++/src/app/blast/rpstblastn_node.hpp b/c++/src/app/blast/rpstblastn_node.hpp new file mode 100644 index 00000000..9ecfcaaf --- /dev/null +++ b/c++/src/app/blast/rpstblastn_node.hpp @@ -0,0 +1,62 @@ +/* $Id: + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Authors: Amelia Fong + * + */ + +/** @file rpstblastn_node.hpp + * RPSTBLASTN node api + */ + +#ifndef APP__RPSTBLASTN_NODE__HPP +#define APP__RPSTBLASTN_NODE__HPP + +#include +#include + +BEGIN_NCBI_SCOPE +BEGIN_SCOPE(blast) + +class CRPSTBlastnNode : public CBlastNode +{ +public : + + CRPSTBlastnNode (int check_num, const CNcbiArguments & ncbi_args, const CArgs& args, + CBlastAppDiagHandler & bah, const string & input, + int query_index, int num_queries, CBlastNodeMailbox * mailbox = NULL); + virtual int GetBlastResults(string & results); +protected: + virtual ~CRPSTBlastnNode(void); + virtual void* Main(void); +private: + string m_Input; + CRef m_CmdLineArgs; +}; + +END_SCOPE(blast) +END_NCBI_SCOPE + +#endif /* APP__RPSTBLASTN_NODE__HPP */ diff --git a/c++/src/app/blast/tblastn_app.cpp b/c++/src/app/blast/tblastn_app.cpp index e3e000e1..4ffb4033 100644 --- a/c++/src/app/blast/tblastn_app.cpp +++ b/c++/src/app/blast/tblastn_app.cpp @@ -1,4 +1,4 @@ -/* $Id: tblastn_app.cpp 574693 2018-11-16 17:46:37Z zaretska $ +/* $Id: tblastn_app.cpp 616358 2020-09-15 12:19:53Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -55,6 +55,14 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + } + } + + ~CTblastnApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: /** @inheritDoc */ @@ -63,6 +71,8 @@ private: virtual int Run(); /// This application's command line args CRef m_CmdLineArgs; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; void CTblastnApp::Init() @@ -258,11 +268,18 @@ int CTblastnApp::Run(void) if (m_CmdLineArgs->ProduceDebugOutput()) { opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1); } + if (input) { + LogQueryInfo(m_UsageReport, *input); + } + formatter.LogBlastSearchInfo(m_UsageReport); } CATCH_ALL(status) if(!bah.GetMessages().empty()) { const CArgs & a = GetArgs(); PrintErrorArchive(a, bah.GetMessages()); } + + m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads()); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } diff --git a/c++/src/app/blast/tblastx_app.cpp b/c++/src/app/blast/tblastx_app.cpp index 4f569022..fc1e67bd 100644 --- a/c++/src/app/blast/tblastx_app.cpp +++ b/c++/src/app/blast/tblastx_app.cpp @@ -1,4 +1,4 @@ -/* $Id: tblastx_app.cpp 574693 2018-11-16 17:46:37Z zaretska $ +/* $Id: tblastx_app.cpp 615343 2020-08-31 15:37:47Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -55,6 +55,14 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + } + } + + ~CTblastxApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: /** @inheritDoc */ @@ -64,6 +72,8 @@ private: /// This application's command line args CRef m_CmdLineArgs; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; void CTblastxApp::Init() @@ -202,11 +212,15 @@ int CTblastxApp::Run(void) opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1); } + LogQueryInfo(m_UsageReport, input); + formatter.LogBlastSearchInfo(m_UsageReport); } CATCH_ALL(status) if(!bah.GetMessages().empty()) { const CArgs & a = GetArgs(); PrintErrorArchive(a, bah.GetMessages()); } + m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads()); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } diff --git a/c++/src/app/blast/update_blastdb.pl b/c++/src/app/blast/update_blastdb.pl index 8bc065f9..212d2f08 100755 --- a/c++/src/app/blast/update_blastdb.pl +++ b/c++/src/app/blast/update_blastdb.pl @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# $Id: update_blastdb.pl 608134 2020-05-12 15:44:10Z ivanov $ +# $Id: update_blastdb.pl 608596 2020-05-19 10:56:17Z ivanov $ # =========================================================================== # # PUBLIC DOMAIN NOTICE @@ -114,7 +114,7 @@ my $exit_code = 0; $|++; if ($opt_show_version) { - my $revision = '$Revision: 608134 $'; + my $revision = '$Revision: 608596 $'; $revision =~ s/\$Revision: | \$//g; print "$0 version $revision\n"; exit($exit_code); @@ -135,20 +135,27 @@ if (defined($opt_source)) { # Try to auto-detect whether we're on the cloud if (defined($curl)) { my $tmpfile = File::Temp->new(); - my $gcp_cmd = "$curl --connect-timeout 1 -sfo $tmpfile -H 'Metadata-Flavor: Google' " . GCP_URL; - my $aws_cmd = "$curl --connect-timeout 1 -sfo /dev/null " . AMI_URL; + my $gcp_cmd = "$curl --connect-timeout 3 --retry 3 --retry-max-time 30 -sfo $tmpfile -H 'Metadata-Flavor: Google' " . GCP_URL; + my $aws_cmd = "$curl --connect-timeout 3 --retry 3 --retry-max-time 30 -sfo /dev/null " . AMI_URL; print "$gcp_cmd\n" if DEBUG; if (system($gcp_cmd) == 0) { - # status not always reliable. Chekc that return is all digits. - my $tmpfile_content = do { local $/; <$tmpfile>}; - print "tempfile: $tmpfile_content\n" if DEBUG; - if ($tmpfile_content =~ m/^(\d+)$/) { - $location = "GCP"; - } + # status not always reliable. Check that curl output is all digits. + my $tmpfile_content = do { local $/; <$tmpfile>}; + print "curl output $tmpfile_content\n" if DEBUG; + $location = "GCP" if ($tmpfile_content =~ m/^(\d+)$/); + } elsif (DEBUG) { + # Consult https://ec.haxx.se/usingcurl/usingcurl-returns + print "curl to GCP metadata server returned ", $?>>8, "\n"; } + print "$aws_cmd\n" if DEBUG; - $location = "AWS" if (system($aws_cmd) == 0); - print "Loation is $location\n" if DEBUG; + if (system($aws_cmd) == 0) { + $location = "AWS"; + } elsif (DEBUG) { + # Consult https://ec.haxx.se/usingcurl/usingcurl-returns + print "curl to AWS metadata server returned ", $?>>8, "\n"; + } + print "Location is $location\n" if DEBUG; } } if ($location =~ /aws|gcp/i and not defined $curl) { diff --git a/c++/src/app/blastdb/CMakeLists.convert2blastmask.app.txt b/c++/src/app/blastdb/CMakeLists.convert2blastmask.app.txt index 390775bf..3cdcfcac 100644 --- a/c++/src/app/blastdb/CMakeLists.convert2blastmask.app.txt +++ b/c++/src/app/blastdb/CMakeLists.convert2blastmask.app.txt @@ -1,10 +1,10 @@ ############################################################################# -# $Id: CMakeLists.convert2blastmask.app.txt 593591 2019-09-20 14:53:34Z gouriano $ +# $Id: CMakeLists.convert2blastmask.app.txt 615546 2020-09-01 12:05:24Z ivanov $ ############################################################################# NCBI_begin_app(convert2blastmask) NCBI_sources(convert2blastmask) - NCBI_uses_toolkit_libraries(blast seqmasks_io) + NCBI_uses_toolkit_libraries(blast seqmasks_io xblast) NCBI_add_definitions(NCBI_MODULE=BLASTDB) NCBI_project_watchers(camacho fongah2) NCBI_end_app() diff --git a/c++/src/app/blastdb/blastdb_aliastool.cpp b/c++/src/app/blastdb/blastdb_aliastool.cpp index 569c1e21..c27e9d9f 100644 --- a/c++/src/app/blastdb/blastdb_aliastool.cpp +++ b/c++/src/app/blastdb/blastdb_aliastool.cpp @@ -1,4 +1,4 @@ -/* $Id: blastdb_aliastool.cpp 593112 2019-09-12 12:56:14Z fongah2 $ +/* $Id: blastdb_aliastool.cpp 615362 2020-08-31 15:39:55Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -59,6 +59,14 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdb_aliastool"); + } + } + ~CBlastDBAliasApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: /** @inheritDoc */ @@ -81,6 +89,7 @@ private: int x_ConvertSeqIDFile() const; void x_SeqIDFileInfo() const; + void x_AddCmdOptions(); /// Documentation for this program static const char * const DOCUMENTATION; @@ -108,6 +117,9 @@ private: } vector x_GetDbsToAggregate(const string dbs, const string file) const; void x_AddVDBsToAliasFile( string filename, bool append, string title = kEmptyStr) const; + + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; const char * const CBlastDBAliasApp::DOCUMENTATION = "\n\n" @@ -619,9 +631,48 @@ int CBlastDBAliasApp::Run(void) } } CATCH_ALL(status) + x_AddCmdOptions(); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } +void CBlastDBAliasApp::x_AddCmdOptions() +{ + const CArgs & args = GetArgs(); + if (args["gi_file_in"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "gi_file_conversion"); + } + else if (args["seqid_file_in"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "seqid_file_conversion"); + } + else if (args["seqid_file_info"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "get_seqid_file_info"); + } + + if (args["dblist"].HasValue() || args["dblist_file"].HasValue() || args["num_volumes"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_alias_db"); + } + else if (args[kArgDb].HasValue() && args[kArgGiList]){ + m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_gilist_alias_db"); + } + else if (args[kArgDb].HasValue() && args[kArgSeqIdList]){ + m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_seqidlist_alias_db"); + } + else if (args[kArgDb].HasValue() && args[kArgTaxIdListFile]) { + m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_taxidlist_alias_db"); + } + + if (args["vdblist"].HasValue() || args["vdblist_file"].HasValue()) { + if (args["dblist"].HasValue() || args["dblist_file"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "add_vdblist"); + } + else { + m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_vdb_alias_db"); + } + } +} + + #ifndef SKIP_DOXYGEN_PROCESSING int main(int argc, const char* argv[] /*, const char* envp[]*/) diff --git a/c++/src/app/blastdb/blastdb_convert.cpp b/c++/src/app/blastdb/blastdb_convert.cpp index da0cda88..03246dc3 100644 --- a/c++/src/app/blastdb/blastdb_convert.cpp +++ b/c++/src/app/blastdb/blastdb_convert.cpp @@ -1,4 +1,4 @@ -/* $Id: blastdb_convert.cpp 598221 2019-12-05 15:33:01Z fongah2 $ +/* $Id: blastdb_convert.cpp 615364 2020-08-31 15:40:14Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -91,6 +91,14 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdb_convert"); + } + } + ~CBlastdbConvertApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: @@ -100,6 +108,8 @@ private: virtual int Run(); CNcbiOstream * m_LogFile; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; void CBlastdbConvertApp::Init() diff --git a/c++/src/app/blastdb/blastdbcheck.cpp b/c++/src/app/blastdb/blastdbcheck.cpp index 2273c561..34c2e668 100644 --- a/c++/src/app/blastdb/blastdbcheck.cpp +++ b/c++/src/app/blastdb/blastdbcheck.cpp @@ -1,4 +1,4 @@ -/* $Id: blastdbcheck.cpp 538739 2017-06-13 18:26:55Z rackerst $ +/* $Id: blastdbcheck.cpp 615362 2020-08-31 15:39:55Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -64,6 +64,14 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdbcheck"); + } + } + ~CBlastDbCheckApplication() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: @@ -73,6 +81,11 @@ private: virtual int Run(void); /** @inheritDoc */ virtual void Exit(void); + + void x_AddCmdOptions(); + + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; @@ -1491,10 +1504,33 @@ int CBlastDbCheckApplication::Run(void) status = okay ? 0 : 1; } CATCH_ALL(status) + + x_AddCmdOptions(); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } +void CBlastDbCheckApplication::x_AddCmdOptions() +{ + const CArgs & args = GetArgs(); + if(args["random"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "random"); + } + else if (args["full"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "full"); + } + else if (args["stride"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "stride"); + } + else if(args["ends"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "end"); + } + else { + m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "default"); + } +} + ///////////////////////////////////////////////////////////////////////////// // Cleanup diff --git a/c++/src/app/blastdb/blastdbcmd.cpp b/c++/src/app/blastdb/blastdbcmd.cpp index dd6557ee..77789783 100644 --- a/c++/src/app/blastdb/blastdbcmd.cpp +++ b/c++/src/app/blastdb/blastdbcmd.cpp @@ -1,4 +1,4 @@ -/* $Id: blastdbcmd.cpp 598336 2019-12-06 18:17:01Z merezhuk $ +/* $Id: blastdbcmd.cpp 616873 2020-09-22 13:14:39Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -64,6 +64,14 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdbcmd"); + } + } + ~CBlastDBCmdApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: /** @inheritDoc */ @@ -88,6 +96,9 @@ private: set m_TaxIdList; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; + /// Initializes Blast DB void x_InitBlastDB(); void x_InitBlastDB_TaxIdList(); @@ -129,8 +140,11 @@ private: void x_PrintBlastDatabaseTaxInformation(); int x_ProcessBatchPig(CBlastDB_Formatter & fmt); + + void x_AddCmdOptions(); }; + string s_PreProcessAccessionsForDBv5(const string & id) { string rv = id; @@ -162,6 +176,7 @@ string s_PreProcessAccessionsForDBv5(const string & id) } + bool CBlastDBCmdApp::x_GetOids(const string & id, vector & oids) { @@ -362,7 +377,14 @@ CBlastDBCmdApp::x_ProcessBatchEntry_NoDup(CBlastDB_Formatter & fmt) ids[i] = s_PreProcessAccessionsForDBv5(ids[i]); } } + try { m_BlastDb->AccessionsToOids(ids, oids); + } + catch (CSeqDBException & e) { + if (e.GetMsg().find("DB contains no accession info") == NPOS){ + NCBI_RETHROW_SAME(e, e.GetMsg()); + } + } for(unsigned i=0; i < ids.size(); i++) { if(oids[i] == kSeqDBEntryNotFound) { Int8 num_id = NStr::StringToNumeric(ids[i], NStr::fConvErr_NoThrow); @@ -1127,12 +1149,55 @@ int CBlastDBCmdApp::Run(void) x_InitBlastDB(); status = x_ProcessSearchRequest(); } + x_AddCmdOptions(); } CATCH_ALL(status) + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } +void CBlastDBCmdApp::x_AddCmdOptions() +{ + const CArgs & args = GetArgs(); + if (args["info"]) { + m_UsageReport.AddParam(CBlastUsageReport::eDBInfo, true); + } + else if (args["tax_info"]) { + m_UsageReport.AddParam(CBlastUsageReport::eDBTaxInfo, true); + } + else if(args[kArgTaxIdList].HasValue() || args[kArgTaxIdListFile].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eTaxIdList, true); + } + else if(args["ipg"].HasValue() || args["ipg_batch"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eIPGList, true); + } + else if(args["entry"].HasValue() || args["entry_batch"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eDBEntry, true); + if (args["entry"].HasValue() && args["entry"].AsString() == "all") { + m_UsageReport.AddParam(CBlastUsageReport::eDBDumpAll, true); + } + else { + m_UsageReport.AddParam(CBlastUsageReport::eDBEntry, true); + } + } + if(args["outfmt"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString()); + } + + + string db_name = m_BlastDb->GetDBNameList(); + int off = db_name.find_last_of(CFile::GetPathSeparator()); + if (off != -1) { + db_name.erase(0, off+1); + } + m_UsageReport.AddParam(CBlastUsageReport::eDBName, db_name); + m_UsageReport.AddParam(CBlastUsageReport::eDBLength, (Int8) m_BlastDb->GetTotalLength()); + m_UsageReport.AddParam(CBlastUsageReport::eDBNumSeqs, m_BlastDb->GetNumSeqs()); + m_UsageReport.AddParam(CBlastUsageReport::eDBDate, m_BlastDb->GetDate()); +} + + #ifndef SKIP_DOXYGEN_PROCESSING int main(int argc, const char* argv[] /*, const char* envp[]*/) diff --git a/c++/src/app/blastdb/blastdbcp.cpp b/c++/src/app/blastdb/blastdbcp.cpp index 7e5c4894..c52bfe4a 100644 --- a/c++/src/app/blastdb/blastdbcp.cpp +++ b/c++/src/app/blastdb/blastdbcp.cpp @@ -1,4 +1,4 @@ -/* $Id: blastdbcp.cpp 605535 2020-04-13 11:07:03Z ivanov $ +/* $Id: blastdbcp.cpp 615363 2020-08-31 15:40:04Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -34,6 +34,7 @@ #include #include #include +#include USING_NCBI_SCOPE; USING_SCOPE(blast); @@ -46,6 +47,9 @@ class BlastdbCopyApplication : public CNcbiApplication { public: BlastdbCopyApplication(); + ~BlastdbCopyApplication() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); + } private: /* Private Methods */ virtual void Init(void); @@ -69,6 +73,9 @@ private: /* Private Data */ const string kTargetOnly; const string kMembershipBits; const string kCopyOnly; + + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; ///////////////////////////////////////////////////////////////////////////// @@ -82,6 +89,11 @@ BlastdbCopyApplication::BlastdbCopyApplication() CRef version(new CVersion()); version->SetVersionInfo(1, 0); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdbcp"); + } } diff --git a/c++/src/app/blastdb/convert2blastmask.cpp b/c++/src/app/blastdb/convert2blastmask.cpp index 954ff686..71b44b98 100644 --- a/c++/src/app/blastdb/convert2blastmask.cpp +++ b/c++/src/app/blastdb/convert2blastmask.cpp @@ -1,4 +1,4 @@ -/* # $Id: convert2blastmask.cpp 492284 2016-02-16 16:55:37Z camacho $ +/* # $Id: convert2blastmask.cpp 615362 2020-08-31 15:39:55Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -112,6 +112,14 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "convert2blastmask"); + } + } + ~CConvert2BlastMaskApplication() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: @@ -122,8 +130,12 @@ private: CMaskFromFasta* x_GetReader(); CMaskWriterBlastDbMaskInfo* x_GetWriter(); + void x_AddCmdOptions(); + /// Contains the description of this application static const char * const USAGE_LINE; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; const char * const CConvert2BlastMaskApplication::USAGE_LINE @@ -218,6 +230,8 @@ int CConvert2BlastMaskApplication::Run(void) { cerr << e.what() << endl; retval = 1; } + x_AddCmdOptions(); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, retval); return retval; } @@ -226,6 +240,21 @@ void CConvert2BlastMaskApplication::Exit(void) SetDiagStream(0); } +void CConvert2BlastMaskApplication::x_AddCmdOptions() +{ + const CArgs & args = GetArgs(); + if (args["masking_algorithm"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eMaskAlgo, args["masking_algorithm"].AsString()); + } + if (args["outfmt"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString()); + } + if (args["parse_seqids"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eParseSeqIDs, true); + } + +} + #ifndef SKIP_DOXYGEN_PROCESSING int main(int argc, const char* argv[]) { diff --git a/c++/src/app/blastdb/makeblastdb.cpp b/c++/src/app/blastdb/makeblastdb.cpp index 323e28b5..a9a82cfc 100644 --- a/c++/src/app/blastdb/makeblastdb.cpp +++ b/c++/src/app/blastdb/makeblastdb.cpp @@ -1,4 +1,4 @@ -/* $Id: makeblastdb.cpp 592321 2019-08-29 17:58:35Z fongah2 $ +/* $Id: makeblastdb.cpp 615359 2020-08-31 15:39:39Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -91,6 +91,14 @@ public: CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "makeblastdb"); + } + } + ~CMakeBlastDBApp() { + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } private: @@ -124,6 +132,8 @@ private: void x_VerifyInputFilesType(const vector& filenames, CMakeBlastDBApp::ESupportedInputFormats input_type); + void x_AddCmdOptions(); + // Data CNcbiOstream * m_LogFile; @@ -135,6 +145,8 @@ private: bool m_IsModifyMode; bool m_SkipUnver; + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; /// Reads an object defined in a NCBI ASN.1 spec from a stream in multiple @@ -1206,9 +1218,34 @@ int CMakeBlastDBApp::Run(void) int status = 0; try { x_BuildDatabase(); } CATCH_ALL(status) + x_AddCmdOptions(); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } +void CMakeBlastDBApp::x_AddCmdOptions() +{ + const CArgs & args = GetArgs(); + if (args["input_type"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eInputType, args["input_type"].AsString()); + } + if (args[kArgDbType].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eSeqType, args[kArgDbType].AsString()); + } + if(args["taxid"].HasValue() || args["taxid_map"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eTaxIdList, true); + } + if(args["parse_seqids"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eParseSeqIDs, args["parse_seqids"].AsBoolean()); + } + if (args["gi_mask"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eGIList, true); + } + else if(args["mask_data"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eMaskAlgo, true); + } +} + #ifndef SKIP_DOXYGEN_PROCESSING int main(int argc, const char* argv[] /*, const char* envp[]*/) diff --git a/c++/src/app/blastdb/makeprofiledb.cpp b/c++/src/app/blastdb/makeprofiledb.cpp index 4421e88a..aabf04fc 100644 --- a/c++/src/app/blastdb/makeprofiledb.cpp +++ b/c++/src/app/blastdb/makeprofiledb.cpp @@ -1,4 +1,4 @@ -/* $Id: makeprofiledb.cpp 596198 2019-11-04 15:01:48Z boratyng $ +/* $Id: makeprofiledb.cpp 615360 2020-08-31 15:39:46Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -244,6 +244,8 @@ private: int x_Run(void); + void x_AddCmdOptions(); + // Data CNcbiOstream * m_LogFile; CNcbiIstream * m_InPssmList; @@ -276,6 +278,9 @@ private: bool m_UpdateFreqRatios; bool m_UseModelThreshold; + + CBlastUsageReport m_UsageReport; + CStopWatch m_StopWatch; }; CMakeProfileDBApp::CMakeProfileDBApp(void) @@ -291,6 +296,11 @@ CMakeProfileDBApp::CMakeProfileDBApp(void) CRef version(new CVersion()); version->SetVersionInfo(new CBlastVersion()); SetFullVersion(version); + m_StopWatch.Start(); + if (m_UsageReport.IsEnabled()) { + m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print()); + m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "makeprofiledb"); + } } CMakeProfileDBApp::~CMakeProfileDBApp() @@ -348,6 +358,7 @@ CMakeProfileDBApp::~CMakeProfileDBApp() string pog_str = m_OutDbName + ".pog"; CFile(pog_str).Remove(); } + m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed()); } void CMakeProfileDBApp::x_SetupArgDescriptions(void) @@ -1732,9 +1743,24 @@ int CMakeProfileDBApp::Run(void) LOG_POST(Error << "Error: Unknown exception"); status = BLAST_UNKNOWN_ERROR; } + + x_AddCmdOptions(); + m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status); return status; } +void CMakeProfileDBApp::x_AddCmdOptions() +{ + const CArgs & args = GetArgs(); + if (args["dbtype"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eDBType, args["dbtype"].AsString()); + } + if(args["taxid"].HasValue() || args["taxid_map"].HasValue()) { + m_UsageReport.AddParam(CBlastUsageReport::eTaxIdList, true); + } +} + + #ifndef SKIP_DOXYGEN_PROCESSING int main(int argc, const char* argv[] /*, const char* envp[]*/) { diff --git a/c++/src/build-system/Makefile.mk.in b/c++/src/build-system/Makefile.mk.in index 2768fc6b..a780b435 100644 --- a/c++/src/build-system/Makefile.mk.in +++ b/c++/src/build-system/Makefile.mk.in @@ -1,5 +1,5 @@ ################################# -# $Id: Makefile.mk.in 606338 2020-04-20 16:30:59Z ivanov $ +# $Id: Makefile.mk.in 616396 2020-09-15 18:22:00Z ivanov $ # Author: Denis Vakatov (vakatov@ncbi.nlm.nih.gov) ################################# # @@ -378,6 +378,8 @@ BZ2_LIBS = @BZ2_LIBS@ BZ2_LIB = @BZ2_LIB@ LZO_INCLUDE = @LZO_INCLUDE@ LZO_LIBS = @LZO_LIBS@ +ZSTD_INCLUDE= @ZSTD_INCLUDE@ +ZSTD_LIBS = @ZSTD_LIBS@ CMPRS_INCLUDE = $(Z_INCLUDE) $(BZ2_INCLUDE) $(LZO_INCLUDE) CMPRS_LIBS = $(Z_LIBS) $(BZ2_LIBS) $(LZO_LIBS) @@ -784,6 +786,19 @@ HIREDIS_INCLUDE = @HIREDIS_INCLUDE@ HIREDIS_LIBS = @HIREDIS_LIBS@ HIREDIS_STATIC_LIBS = @HIREDIS_STATIC_LIBS@ +# Apache Arrow (specifically focusing on Parquet) +APACHE_ARROW_INCLUDE = @APACHE_ARROW_INCLUDE@ +APACHE_ARROW_LIBS = @APACHE_ARROW_LIBS@ +APACHE_ARROW_STATIC_LIBS = @APACHE_ARROW_STATIC_LIBS@ + +# Kafka +LIBRDKAFKA_INCLUDE = @LIBRDKAFKA_INCLUDE@ +LIBRDKAFKA_LIBS = @LIBRDKAFKA_LIBS@ +LIBRDKAFKA_STATIC_LIBS = @LIBRDKAFKA_STATIC_LIBS@ +CPPKAFKA_INCLUDE = @CPPKAFKA_INCLUDE@ +CPPKAFKA_LIBS = @CPPKAFKA_LIBS@ +CPPKAFKA_STATIC_LIBS = @CPPKAFKA_STATIC_LIBS@ + # Compress COMPRESS_LDEP = $(CMPRS_LIB) COMPRESS_LIBS = xcompress $(COMPRESS_LDEP) @@ -840,10 +855,10 @@ EUTILS_LIBS = eutils egquery elink epost esearch espell esummary \ OBJREAD_LIBS = xobjread variation submit xlogging # formatting code -XFORMAT_LIBS = xformat xcleanup gbseq mlacli mla medlars pubmed valid $(OBJEDIT_LIBS) +XFORMAT_LIBS = xformat xcleanup gbseq $(OBJEDIT_LIBS) # object editing library -OBJEDIT_LIBS = xobjedit $(OBJREAD_LIBS) taxon3 +OBJEDIT_LIBS = xobjedit $(OBJREAD_LIBS) taxon3 mlacli mla medlars pubmed valid # standard data loader configuration, plus supporting libraries DATA_LOADERS_UTIL_LIB = data_loaders_util \ diff --git a/c++/src/build-system/Makefile.xcode.tmpl b/c++/src/build-system/Makefile.xcode.tmpl index 6f799179..ea9dd58b 100644 --- a/c++/src/build-system/Makefile.xcode.tmpl +++ b/c++/src/build-system/Makefile.xcode.tmpl @@ -1,12 +1,12 @@ -# $Id: Makefile.xcode.tmpl 563416 2018-05-09 11:59:33Z ivanov $ +# $Id: Makefile.xcode.tmpl 608826 2020-05-21 18:14:06Z ivanov $ # Makefile template for Xcode ####################################################################### include ./Makefile.mk -DEVSDK = /Developer/SDKs -SDKDIR = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform -SDK = $(firstword $(wildcard $(DEVSDK)/*.sdk) $(wildcard $(SDKDIR)/*.sdk)) +# DEVSDK = /Developer/SDKs +# SDKDIR = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform +# SDK = $(firstword $(wildcard $(DEVSDK)/*.sdk) $(wildcard $(SDKDIR)/*.sdk)) ifneq "" "$(wildcard ${SDK})" SDKFLAG = -sdk ${SDK} else diff --git a/c++/src/build-system/cmake/CMake.NCBIComponents.cmake b/c++/src/build-system/cmake/CMake.NCBIComponents.cmake index c8717103..278ec912 100644 --- a/c++/src/build-system/cmake/CMake.NCBIComponents.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIComponents.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIComponents.cmake 607658 2020-05-06 12:48:50Z ivanov $ +# $Id: CMake.NCBIComponents.cmake 609371 2020-06-01 14:13:18Z ivanov $ ############################################################################# ## @@ -43,7 +43,7 @@ endif() if(WIN32) set(NCBI_COMPONENT_local_lbsm_FOUND NO) else() - if (EXISTS ${NCBI_SRC_ROOT}/connect/ncbi_lbsm.c) + if (EXISTS ${NCBITK_SRC_ROOT}/connect/ncbi_lbsm.c) # message("local_lbsm found at ${NCBI_SRC_ROOT}/connect") set(NCBI_COMPONENT_local_lbsm_FOUND YES) set(HAVE_LOCAL_LBSM 1) @@ -56,9 +56,9 @@ endif() ############################################################################# # LocalPCRE -if (EXISTS ${includedir}/util/regexp) +if (EXISTS ${NCBITK_INC_ROOT}/util/regexp) set(NCBI_COMPONENT_LocalPCRE_FOUND YES) - set(NCBI_COMPONENT_LocalPCRE_INCLUDE ${includedir}/util/regexp) + set(NCBI_COMPONENT_LocalPCRE_INCLUDE ${NCBITK_INC_ROOT}/util/regexp) set(NCBI_COMPONENT_LocalPCRE_NCBILIB regexp) else() set(NCBI_COMPONENT_LocalPCRE_FOUND NO) @@ -66,9 +66,9 @@ endif() ############################################################################# # LocalZ -if (EXISTS ${includedir}/util/compress/zlib) +if (EXISTS ${NCBITK_INC_ROOT}/util/compress/zlib) set(NCBI_COMPONENT_LocalZ_FOUND YES) - set(NCBI_COMPONENT_LocalZ_INCLUDE ${includedir}/util/compress/zlib) + set(NCBI_COMPONENT_LocalZ_INCLUDE ${NCBITK_INC_ROOT}/util/compress/zlib) set(NCBI_COMPONENT_LocalZ_NCBILIB z) else() set(NCBI_COMPONENT_LocalZ_FOUND NO) @@ -76,9 +76,9 @@ endif() ############################################################################# # LocalBZ2 -if (EXISTS ${includedir}/util/compress/bzip2) +if (EXISTS ${NCBITK_INC_ROOT}/util/compress/bzip2) set(NCBI_COMPONENT_LocalBZ2_FOUND YES) - set(NCBI_COMPONENT_LocalBZ2_INCLUDE ${includedir}/util/compress/bzip2) + set(NCBI_COMPONENT_LocalBZ2_INCLUDE ${NCBITK_INC_ROOT}/util/compress/bzip2) set(NCBI_COMPONENT_LocalBZ2_NCBILIB bz2) else() set(NCBI_COMPONENT_LocalBZ2_FOUND NO) @@ -86,9 +86,9 @@ endif() ############################################################################# #LocalLMDB -if (EXISTS ${includedir}/util/lmdb) +if (EXISTS ${NCBITK_INC_ROOT}/util/lmdb) set(NCBI_COMPONENT_LocalLMDB_FOUND YES) - set(NCBI_COMPONENT_LocalLMDB_INCLUDE ${includedir}/util/lmdb) + set(NCBI_COMPONENT_LocalLMDB_INCLUDE ${NCBITK_INC_ROOT}/util/lmdb) set(NCBI_COMPONENT_LocalLMDB_NCBILIB lmdb) else() set(NCBI_COMPONENT_LocalLMDB_FOUND NO) @@ -96,8 +96,8 @@ endif() ############################################################################# # FreeTDS -set(FTDS95_INCLUDE ${includedir}/dbapi/driver/ftds95 ${includedir}/dbapi/driver/ftds95/freetds) -set(FTDS100_INCLUDE ${includedir}/dbapi/driver/ftds100 ${includedir}/dbapi/driver/ftds100/freetds) +set(FTDS95_INCLUDE ${NCBITK_INC_ROOT}/dbapi/driver/ftds95 ${NCBITK_INC_ROOT}/dbapi/driver/ftds95/freetds) +set(FTDS100_INCLUDE ${NCBITK_INC_ROOT}/dbapi/driver/ftds100 ${NCBITK_INC_ROOT}/dbapi/driver/ftds100/freetds) set(NCBI_COMPONENT_FreeTDS_FOUND YES) set(NCBI_COMPONENT_FreeTDS_INCLUDE ${FTDS100_INCLUDE}) @@ -115,7 +115,7 @@ if (NCBI_EXPERIMENTAL_DISABLE_HUNTER) if (MSVC) include(${NCBI_TREE_CMAKECFG}/CMake.NCBIComponentsMSVC.cmake) -elseif (XCODE) +elseif (APPLE) include(${NCBI_TREE_CMAKECFG}/CMake.NCBIComponentsXCODE.cmake) else() if(NCBI_EXPERIMENTAL_CFG) @@ -132,8 +132,8 @@ endif() ############################################################################# # FreeTDS -set(FTDS95_INCLUDE ${includedir}/dbapi/driver/ftds95 ${includedir}/dbapi/driver/ftds95/freetds) -set(FTDS100_INCLUDE ${includedir}/dbapi/driver/ftds100 ${includedir}/dbapi/driver/ftds100/freetds) +set(FTDS95_INCLUDE ${NCBITK_INC_ROOT}/dbapi/driver/ftds95 ${NCBITK_INC_ROOT}/dbapi/driver/ftds95/freetds) +set(FTDS100_INCLUDE ${NCBITK_INC_ROOT}/dbapi/driver/ftds100 ${NCBITK_INC_ROOT}/dbapi/driver/ftds100/freetds) ############################################################################# list(SORT NCBI_ALL_COMPONENTS) diff --git a/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake b/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake index 509d065a..3e389500 100644 --- a/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIComponentsMSVC.cmake 607786 2020-05-07 15:35:50Z ivanov $ +# $Id: CMake.NCBIComponentsMSVC.cmake 609371 2020-06-01 14:13:18Z ivanov $ ############################################################################# ## @@ -15,7 +15,7 @@ ## HAVE_XXX -set(NCBI_COMPONENT_MSWin_FOUND YES) +set(NCBI_REQUIRE_MSWin_FOUND YES) #to debug #set(NCBI_TRACE_COMPONENT_GRPC ON) ############################################################################# diff --git a/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake b/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake index 4c08f9c2..41972faf 100644 --- a/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIComponentsUNIX.cmake 605517 2020-04-12 00:56:13Z ucko $ +# $Id: CMake.NCBIComponentsUNIX.cmake 611999 2020-07-14 15:30:59Z ivanov $ ############################################################################# ## @@ -610,8 +610,8 @@ if (WIN32) find_external_library(VDB INCLUDES sra/sradb.h LIBS ncbi-vdb - INCLUDE_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.5\\interfaces" - LIBS_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.5\\win\\release\\x86_64\\lib") + INCLUDE_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.8\\interfaces" + LIBS_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.8\\win\\release\\x86_64\\lib") else (WIN32) find_external_library(VDB INCLUDES sra/sradb.h diff --git a/c++/src/build-system/cmake/CMake.NCBIComponentsUNIXex.cmake b/c++/src/build-system/cmake/CMake.NCBIComponentsUNIXex.cmake index 66bb91d2..af4e6b49 100644 --- a/c++/src/build-system/cmake/CMake.NCBIComponentsUNIXex.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIComponentsUNIXex.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIComponentsUNIXex.cmake 607786 2020-05-07 15:35:50Z ivanov $ +# $Id: CMake.NCBIComponentsUNIXex.cmake 609371 2020-06-01 14:13:18Z ivanov $ ############################################################################# ## @@ -14,8 +14,10 @@ ## HAVE_LIBXXX ## HAVE_XXX -set(NCBI_COMPONENT_unix_FOUND YES) -set(NCBI_COMPONENT_Linux_FOUND YES) +set(NCBI_REQUIRE_unix_FOUND YES) +if(NOT APPLE) +set(NCBI_REQUIRE_Linux_FOUND YES) +endif() option(USE_LOCAL_BZLIB "Use a local copy of libbz2") option(USE_LOCAL_PCRE "Use a local copy of libpcre") #to debug @@ -292,21 +294,20 @@ if(NOT NCBI_COMPONENT_BACKWARD_DISABLED) if(EXISTS ${NCBI_ThirdParty_BACKWARD}/include) set(LIBBACKWARD_INCLUDE ${NCBI_ThirdParty_BACKWARD}/include) set(HAVE_LIBBACKWARD_CPP YES) + set(NCBI_COMPONENT_BACKWARD_FOUND YES) + set(NCBI_COMPONENT_BACKWARD_INCLUDE ${LIBBACKWARD_INCLUDE}) + list(APPEND NCBI_ALL_COMPONENTS BACKWARD) + else() + message("NOT FOUND BACKWARD") endif() find_library(LIBBACKWARD_LIBS NAMES backward HINTS ${NCBI_ThirdParty_BACKWARD}/lib) find_library(LIBDW_LIBS NAMES dw) if (LIBDW_LIBS) set(HAVE_LIBDW YES) endif() - if(HAVE_LIBBACKWARD_CPP AND HAVE_LIBDW) - set(NCBI_COMPONENT_BACKWARD_FOUND YES) - set(NCBI_COMPONENT_BACKWARD_INCLUDE ${LIBBACKWARD_INCLUDE}) set(NCBI_COMPONENT_BACKWARD_LIBS ${LIBDW_LIBS}) # set(NCBI_COMPONENT_BACKWARD_LIBS ${LIBBACKWARD_LIBS} ${LIBDW_LIBS}) - list(APPEND NCBI_ALL_COMPONENTS BACKWARD) - else() - message("NOT FOUND BACKWARD") endif() else(NOT NCBI_COMPONENT_BACKWARD_DISABLED) message("DISABLED BACKWARD") diff --git a/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake b/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake index cc8944b4..b496332f 100644 --- a/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIComponentsXCODE.cmake 607661 2020-05-06 12:49:33Z ivanov $ +# $Id: CMake.NCBIComponentsXCODE.cmake 611999 2020-07-14 15:30:59Z ivanov $ ############################################################################# ## @@ -15,8 +15,12 @@ ## HAVE_XXX -set(NCBI_COMPONENT_XCODE_FOUND YES) -set(NCBI_COMPONENT_unix_FOUND YES) +set(NCBI_REQUIRE_unix_FOUND YES) +if(XCODE) +set(NCBI_REQUIRE_XCODE_FOUND YES) +endif() +#to debug +#set(NCBI_TRACE_COMPONENT_JPEG ON) ############################################################################# # common settings set(NCBI_TOOLS_ROOT $ENV{NCBI}) @@ -55,6 +59,7 @@ set(KRB5_LIBS "-framework Kerberos" -liconv) ############################################################################ set(NCBI_ThirdPartyBasePath ${NCBI_TOOLS_ROOT}) +set(NCBI_ThirdParty_BACKWARD ${NCBI_ThirdPartyBasePath}/backward-cpp-1.3.20180206-44ae960) set(NCBI_ThirdParty_TLS ${NCBI_ThirdPartyBasePath}/gnutls-3.4.0) #set(NCBI_ThirdParty_FASTCGI set(NCBI_ThirdParty_Boost ${NCBI_ThirdPartyBasePath}/boost-1.62.0-ncbi1) @@ -71,19 +76,23 @@ set(NCBI_ThirdParty_TIFF ${NCBI_ThirdPartyBasePath}/safe-sw) set(NCBI_ThirdParty_XML ${NCBI_ThirdPartyBasePath}/libxml-2.7.8) set(NCBI_ThirdParty_XSLT ${NCBI_ThirdPartyBasePath}/libxml-2.7.8) set(NCBI_ThirdParty_EXSLT ${NCBI_ThirdParty_XSLT}) -set(NCBI_ThirdParty_SQLITE3 ${NCBI_ThirdPartyBasePath}/sqlite-3.8.10.1-ncbi1) +set(NCBI_ThirdParty_SQLITE3 ${NCBI_ThirdPartyBasePath}/sqlite-3.26.0-ncbi1) #set(NCBI_ThirdParty_Sybase -set(NCBI_ThirdParty_VDB "/net/snowman/vol/projects/trace_software/vdb/vdb-versions/2.10.5") +set(NCBI_ThirdParty_VDB "/net/snowman/vol/projects/trace_software/vdb/vdb-versions/2.10.8") set(NCBI_ThirdParty_VDB_ARCH x86_64) set(NCBI_ThirdParty_wxWidgets ${NCBI_ThirdPartyBasePath}/wxWidgets-3.1.3-ncbi1) set(NCBI_ThirdParty_GLEW ${NCBI_ThirdPartyBasePath}/glew-1.5.8) set(NCBI_ThirdParty_FTGL ${NCBI_ThirdPartyBasePath}/ftgl-2.1.3-rc5) set(NCBI_ThirdParty_FreeType ${NCBI_OPT_ROOT}) +set(NCBI_ThirdParty_NGHTTP2 ${NCBI_ThirdPartyBasePath}/nghttp2-1.40.0) +set(NCBI_ThirdParty_UV ${NCBI_ThirdPartyBasePath}/libuv-1.35.0) +set(NCBI_ThirdParty_GL2PS ${NCBI_ThirdPartyBasePath}/gl2ps-1.4.0) +set(NCBI_ThirdParty_Nettle ${NCBI_ThirdPartyBasePath}/nettle-3.1.1) +set(NCBI_ThirdParty_GMP ${NCBI_ThirdPartyBasePath}/gmp-6.0.0a) ############################################################################# ############################################################################# -set(_XCODE_EXTRA_LIBS) function(NCBI_define_component _name) if(NCBI_COMPONENT_${_name}_DISABLED) @@ -120,8 +129,8 @@ function(NCBI_define_component _name) set(_suffixes .a .dylib) endif() set(_roots ${_root}) -# set(_subdirs Release${NCBI_PlatformBits}/lib lib64 lib) - set(_subdirs Release${NCBI_PlatformBits}/lib lib64 ${_XCODE_EXTRA_LIBS}) + set(_subdirs Release${NCBI_PlatformBits}/lib lib64 lib) +# set(_subdirs Release${NCBI_PlatformBits}/lib lib64 ${_XCODE_EXTRA_LIBS}) if (BUILD_SHARED_LIBS AND DEFINED NCBI_ThirdParty_${_name}_SHLIB) set(_roots ${NCBI_ThirdParty_${_name}_SHLIB} ${_roots}) set(_subdirs shlib64 shlib lib64 lib) @@ -135,11 +144,18 @@ function(NCBI_define_component _name) set(_all_libs "") foreach(_lib IN LISTS _args) set(_this_found NO) + if(NCBI_TRACE_COMPONENT_${_name}) + message("${_name}: checking ${_root}/${_libdir}/lib${_lib}") + endif() foreach(_sfx IN LISTS _suffixes) if(EXISTS ${_root}/${_libdir}/lib${_lib}${_sfx}) list(APPEND _all_libs ${_root}/${_libdir}/lib${_lib}${_sfx}) set(_this_found YES) break() + else() + if(NCBI_TRACE_COMPONENT_${_name}) + message("${_name}: ${_root}/${_libdir}/lib${_lib}${_sfx} not found") + endif() endif() endforeach() if(NOT _this_found) @@ -203,8 +219,20 @@ endmacro() set(NCBI_COMPONENT_NCBI_C_FOUND NO) ############################################################################# -# STACKTRACE -set(NCBI_COMPONENT_STACKTRACE_FOUND NO) +# BACKWARD, UNWIND +if(NOT NCBI_COMPONENT_BACKWARD_DISABLED) + if(EXISTS ${NCBI_ThirdParty_BACKWARD}/include) + set(LIBBACKWARD_INCLUDE ${NCBI_ThirdParty_BACKWARD}/include) + set(HAVE_LIBBACKWARD_CPP YES) + set(NCBI_COMPONENT_BACKWARD_FOUND YES) + set(NCBI_COMPONENT_BACKWARD_INCLUDE ${LIBBACKWARD_INCLUDE}) + list(APPEND NCBI_ALL_COMPONENTS BACKWARD) + else() + message("NOT FOUND BACKWARD") + endif() +else(NOT NCBI_COMPONENT_BACKWARD_DISABLED) + message("DISABLED BACKWARD") +endif(NOT NCBI_COMPONENT_BACKWARD_DISABLED) ############################################################################# #LMDB @@ -252,7 +280,6 @@ else() set(NCBI_COMPONENT_Boost.Test.Included_FOUND NO) endif() -set(_XCODE_EXTRA_LIBS lib) ############################################################################# # Boost.Test NCBI_define_component(Boost.Test boost_unit_test_framework) @@ -260,7 +287,6 @@ NCBI_define_component(Boost.Test boost_unit_test_framework) ############################################################################# # Boost.Spirit NCBI_define_component(Boost.Spirit boost_thread-mt) -set(_XCODE_EXTRA_LIBS "") ############################################################################# # JPEG @@ -432,10 +458,27 @@ NCBI_define_component(FTGL ftgl) ############################################################################# # FreeType -set(_XCODE_EXTRA_LIBS lib) NCBI_define_component(FreeType freetype) if(NCBI_COMPONENT_FreeType_FOUND) set(NCBI_COMPONENT_FreeType_INCLUDE ${NCBI_COMPONENT_FreeType_INCLUDE} ${NCBI_COMPONENT_FreeType_INCLUDE}/freetype2) endif() -set(_XCODE_EXTRA_LIBS "") +############################################################################# +# NGHTTP2 +NCBI_define_component(NGHTTP2 nghttp2) + +############################################################################# +# UV +NCBI_define_component(UV uv) + +############################################################################# +# GL2PS +NCBI_define_component(GL2PS gl2ps) + +############################################################################# +# Nettle +NCBI_define_component(Nettle nettle hogweed) + +############################################################################# +# GMP +#NCBI_define_component(GMP gmp) diff --git a/c++/src/build-system/cmake/CMake.NCBIptb.cmake b/c++/src/build-system/cmake/CMake.NCBIptb.cmake index 178c2298..8cd375b1 100644 --- a/c++/src/build-system/cmake/CMake.NCBIptb.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIptb.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIptb.cmake 607660 2020-05-06 12:49:19Z ivanov $ +# $Id: CMake.NCBIptb.cmake 609379 2020-06-01 14:15:14Z ivanov $ ############################################################################# ############################################################################# ## @@ -100,6 +100,7 @@ ############################################################################# # deprecated macro(NCBI_add_root_subdirectory) + message(WARNING "NCBI_add_root_subdirectory is deprecated, use NCBI_add_subdirectory instead") NCBI_add_subdirectory(${ARGV}) endmacro() @@ -108,10 +109,22 @@ function(NCBI_add_subdirectory) if(NCBI_PTBMODE_PARTS) return() endif() - if(NCBI_PTBCFG_ENABLE_COLLECTOR AND NOT NCBI_PTB_HAS_ROOT) + + if(NOT DEFINED NCBI_CURRENT_SOURCE_DIR) set(NCBI_CURRENT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + if("${NCBI_CURRENT_SOURCE_DIR}" STREQUAL "${NCBITK_SRC_ROOT}") + set(NCBI_TREE_ROOT ${NCBITK_TREE_ROOT}) + set(NCBI_SRC_ROOT ${NCBITK_SRC_ROOT}) + set(NCBI_INC_ROOT ${NCBITK_INC_ROOT}) + elseif("${NCBI_CURRENT_SOURCE_DIR}" STREQUAL "${NCBITK_TREE_ROOT}") + set(NCBI_TREE_ROOT ${NCBITK_TREE_ROOT}) + set(NCBI_SRC_ROOT ${NCBITK_TREE_ROOT}) + set(NCBI_INC_ROOT ${NCBITK_TREE_ROOT}) + endif() + + if(NOT NCBI_PTB_HAS_ROOT) NCBI_internal_analyze_tree() - variable_watch(CMAKE_CURRENT_LIST_DIR NCBI_internal_end_of_config) endif() if(NCBI_PTBMODE_COLLECT_DEPS) @@ -130,13 +143,25 @@ function(NCBI_add_subdirectory) if(DEFINED NCBI_PTB_ALLOWED_DIRS) set(_is_good FALSE) foreach(_dir IN LISTS NCBI_PTB_ALLOWED_DIRS) - NCBI_util_match_path(${_dir} ${NCBI_CURRENT_SOURCE_DIR} _is_good) - if(_is_good) + string(FIND ${_dir} ${NCBI_CURRENT_SOURCE_DIR} _pos) + if(${_pos} EQUAL 0) + set(_is_good TRUE) break() endif() endforeach() else() - set(_is_good TRUE) + NCBI_internal_process_project_filters( _is_good) + if(NOT _is_good) + if(NOT "${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "") + foreach(_dir IN LISTS NCBI_PTBCFG_PROJECT_LIST) + string(FIND "${NCBI_SRC_ROOT}/${_dir}" "${NCBI_CURRENT_SOURCE_DIR}" _pos) + if(${_pos} EQUAL 0) + set(_is_good TRUE) + break() + endif() + endforeach() + endif() + endif() endif() if (_is_good AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_sub}/CMakeLists.txt") add_subdirectory(${_sub}) @@ -302,6 +327,10 @@ macro(NCBI_begin_app _name) if (NCBI_PTBMODE_COLLECT_DEPS OR TARGET ${_name}) set(_appname ${_appname}-app) endif() + elseif(NOT NCBI_PTBCFG_ENABLE_COLLECTOR) + if (TARGET ${_name}) + set(_appname ${_appname}-app) + endif() endif() endif() set(NCBI_PROJECT ${_appname}) @@ -616,6 +645,18 @@ function(NCBI_register_hook _event _callback) endif() endfunction() +############################################################################## +macro(NCBI_util_elapsed _value) + if(DEFINED NCBI_TIMESTAMP_START) + string(TIMESTAMP _curtime "%s") + math(EXPR _delta "${_curtime} - ${NCBI_TIMESTAMP_START}") + string(TIMESTAMP _curtime "%H:%M:%S") + set(${_value} "${_curtime} (${_delta}s)") + else() + string(TIMESTAMP ${_value} "%H:%M:%S") + endif() +endmacro() + ############################################################################## macro(NCBI_util_parse_sign _input _value _negative) string(SUBSTRING ${_input} 0 1 _sign) @@ -667,53 +708,63 @@ macro(NCBI_internal_analyze_tree) set_property(GLOBAL PROPERTY NCBI_PTBPROP_COUNT_${_type} 0) endforeach() - if(NOT DEFINED NCBI_PTBCFG_KNOWN_FOLDERS OR "${NCBI_PTBCFG_KNOWN_FOLDERS}" STREQUAL "") - file(GLOB _files LIST_DIRECTORIES TRUE "${NCBI_CURRENT_SOURCE_DIR}/*") - foreach(_file IN LISTS _files) - if(IS_DIRECTORY ${_file} AND EXISTS ${_file}/CMakeLists.txt) - get_filename_component(_basename ${_file} NAME) - list(APPEND NCBI_PTBCFG_KNOWN_FOLDERS ${_basename}) - endif() - endforeach() + if( "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "" AND + "${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "" AND + "${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "") + set(NCBI_PTB_NOFILTERS TRUE) + endif() + if (NCBI_PTBCFG_ENABLE_COLLECTOR AND NCBI_PTB_NOFILTERS AND NOT NCBI_PTBCFG_ALLOW_COMPOSITE) + set(NCBI_PTBCFG_ENABLE_COLLECTOR FALSE) + set(NCBI_PTBCFG_ENABLE_COLLECTOR FALSE PARENT_SCOPE) endif() - message("Analyzing source tree...") - set_property(GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS "") + if(NCBI_PTBCFG_ENABLE_COLLECTOR) + if(NOT DEFINED NCBI_PTBCFG_KNOWN_FOLDERS OR "${NCBI_PTBCFG_KNOWN_FOLDERS}" STREQUAL "") + file(GLOB _files LIST_DIRECTORIES TRUE "${NCBI_CURRENT_SOURCE_DIR}/*") + foreach(_file IN LISTS _files) + if(IS_DIRECTORY ${_file} AND EXISTS ${_file}/CMakeLists.txt) + get_filename_component(_basename ${_file} NAME) + list(APPEND NCBI_PTBCFG_KNOWN_FOLDERS ${_basename}) + endif() + endforeach() + endif() + list(LENGTH NCBI_PTBCFG_KNOWN_FOLDERS _count) + if(NOT ${_count} EQUAL 1) + set(NCBI_PTB_THIS_SRC_ROOT ${NCBI_SRC_ROOT} PARENT_SCOPE) + set(NCBI_PTB_THIS_SRC_ROOT ${NCBI_SRC_ROOT}) + endif() - set(NCBI_PTBMODE_COLLECT_DEPS ON) - NCBI_add_subdirectory(${NCBI_PTBCFG_KNOWN_FOLDERS}) - set(NCBI_PTB_CALLBACK_ALL_PARSED TRUE) - set(NCBI_PTBMODE_COLLECT_DEPS OFF) + NCBI_util_elapsed(_elapsed) + message("${_elapsed}: Analyzing source tree...") + set_property(GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS "") - get_property(_allprojects GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS) - get_property(_allowedprojects GLOBAL PROPERTY NCBI_PTBPROP_ALLOWED_PROJECTS) + set(NCBI_PTBMODE_COLLECT_DEPS ON) + set(_known ${NCBI_PTBCFG_KNOWN_FOLDERS}) + unset(NCBI_PTBCFG_KNOWN_FOLDERS) + NCBI_add_subdirectory(${_known}) + set(NCBI_PTB_CALLBACK_ALL_PARSED TRUE) + set(NCBI_PTBMODE_COLLECT_DEPS OFF) -if(OFF) -message("NCBI_PTBPROP_ALL_PROJECTS: ${_allprojects}") -foreach(_prj IN LISTS _allprojects) - get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj}) - message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}") -endforeach() -message("NCBI_PTBPROP_ALLOWED_PROJECTS: ${_allowedprojects}") -endif() + get_property(_allprojects GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS) + get_property(_allowedprojects GLOBAL PROPERTY NCBI_PTBPROP_ALLOWED_PROJECTS) - if("${_allowedprojects}" STREQUAL "") - message(FATAL_ERROR "List of projects is empty") - return() - endif() + if(OFF) + message("NCBI_PTBPROP_ALL_PROJECTS: ${_allprojects}") + foreach(_prj IN LISTS _allprojects) + get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj}) + message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}") + endforeach() + message("NCBI_PTBPROP_ALLOWED_PROJECTS: ${_allowedprojects}") + endif() - message("Collecting projects...") - list(REMOVE_DUPLICATES _allowedprojects) - foreach(_prj IN LISTS _allowedprojects) - NCBI_internal_collect_dependencies(${_prj}) - get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj}) - get_property(_host GLOBAL PROPERTY NCBI_PTBPROP_HOST_${_prj}) - set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} ${_host} ${_prj} ${_prjdeps}) - endforeach() - list(SORT NCBI_PTB_ALLOWED_PROJECTS) - list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_PROJECTS) - if(NCBI_PTBCFG_ALLOW_COMPOSITE) - set(_allowedprojects ${NCBI_PTB_ALLOWED_PROJECTS}) + if("${_allowedprojects}" STREQUAL "") + message(FATAL_ERROR "List of projects is empty") + return() + endif() + + NCBI_util_elapsed(_elapsed) + message("${_elapsed}: Collecting projects...") + list(REMOVE_DUPLICATES _allowedprojects) foreach(_prj IN LISTS _allowedprojects) NCBI_internal_collect_dependencies(${_prj}) get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj}) @@ -722,35 +773,53 @@ endif() endforeach() list(SORT NCBI_PTB_ALLOWED_PROJECTS) list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_PROJECTS) - endif() - foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS) - get_property(_dir GLOBAL PROPERTY NCBI_PTBPROP_DIR_${_prj}) - list(APPEND NCBI_PTB_ALLOWED_DIRS ${_dir}) - endforeach() - list(SORT NCBI_PTB_ALLOWED_DIRS) - list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_DIRS) -if(OFF) -message("NCBI_PTB_ALLOWED_PROJECTS: ${NCBI_PTB_ALLOWED_PROJECTS}") -foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS) - get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj}) - message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}") -endforeach() -endif() + if(NCBI_PTBCFG_ALLOW_COMPOSITE) + set(_allowedprojects ${NCBI_PTB_ALLOWED_PROJECTS}) + foreach(_prj IN LISTS _allowedprojects) + NCBI_internal_collect_dependencies(${_prj}) + get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj}) + get_property(_host GLOBAL PROPERTY NCBI_PTBPROP_HOST_${_prj}) + set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} ${_host} ${_prj} ${_prjdeps}) + endforeach() + list(SORT NCBI_PTB_ALLOWED_PROJECTS) + list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_PROJECTS) + endif() + if(NOT NCBI_PTB_NOFILTERS) + foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS) + get_property(_dir GLOBAL PROPERTY NCBI_PTBPROP_DIR_${_prj}) + list(APPEND NCBI_PTB_ALLOWED_DIRS ${_dir}) + endforeach() + list(SORT NCBI_PTB_ALLOWED_DIRS) + list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_DIRS) + set(NCBI_PTB_ALLOWED_DIRS ${NCBI_PTB_ALLOWED_DIRS} PARENT_SCOPE) + endif() - foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS) - NCBI_internal_collect_requires(${_prj}) - endforeach() - set(NCBI_PTB_CALLBACK_COLLECTED TRUE) - foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS) - if (NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${_prj}) - NCBI_internal_print_project_info(${_prj}) + if(OFF) + message("NCBI_PTB_ALLOWED_PROJECTS: ${NCBI_PTB_ALLOWED_PROJECTS}") + foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS) + get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj}) + message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}") + endforeach() endif() - endforeach() - set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} PARENT_SCOPE) - set(NCBI_PTB_ALLOWED_DIRS ${NCBI_PTB_ALLOWED_DIRS} PARENT_SCOPE) - message("Configuring projects...") + foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS) + NCBI_internal_collect_requires(${_prj}) + endforeach() + set(NCBI_PTB_CALLBACK_COLLECTED TRUE) + foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS) + if (NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${_prj}) + NCBI_internal_print_project_info(${_prj}) + endif() + endforeach() + + set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} PARENT_SCOPE) + else() + message("Source tree analysis skipped") + endif() + NCBI_util_elapsed(_elapsed) + message("${_elapsed}: Configuring projects...") + variable_watch(CMAKE_CURRENT_LIST_DIR NCBI_internal_end_of_config) endmacro() ############################################################################# @@ -761,6 +830,8 @@ function(NCBI_internal_end_of_config _variable _access _value) set(NCBI_PTB_CALLBACK_ALL_ADDED TRUE) NCBI_internal_print_report("Processed" TOTAL) NCBI_internal_print_report("Added" COUNT) + NCBI_util_elapsed(_elapsed) + message("${_elapsed}: Done") endfunction() ############################################################################# @@ -1351,7 +1422,7 @@ macro(NCBI_internal_process_parts _result) NCBI_internal_collect_parts(_result) if(_result) - if (NCBI_PTBMODE_COLLECT_DEPS) + if (NCBI_PTBMODE_COLLECT_DEPS OR NOT NCBI_PTBCFG_ENABLE_COLLECTOR) #set_property(GLOBAL PROPERTY NCBI_PTBPROP_PARTS_${NCBI_PROJECT_ID} ${NCBITMP_PROJECT_PART_IDS}) foreach(_part IN LISTS NCBITMP_PROJECT_PART_IDS) set_property(GLOBAL PROPERTY NCBI_PTBPROP_HOSTID_${_part} ${NCBI_PROJECT_ID}) @@ -1406,7 +1477,10 @@ endfunction() ############################################################################## function(NCBI_internal_verify_libs) set(_optimize NO) - if (WIN32 AND NOT NCBI_PTBMODE_COLLECT_DEPS AND NOT DEFINED NCBI_EXTERNAL_TREE_ROOT AND NOT DEFINED NCBI_PTBCFG_DOINSTALL) + if (WIN32 AND NCBI_PTBCFG_ENABLE_COLLECTOR + AND NOT NCBI_PTBMODE_COLLECT_DEPS + AND NOT DEFINED NCBI_EXTERNAL_TREE_ROOT + AND NOT DEFINED NCBI_PTBCFG_DOINSTALL) if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "STATIC") # set(_ncbilib ${NCBITMP_NCBILIB}) get_property(_ncbilib GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${NCBI_PROJECT}) @@ -1499,7 +1573,7 @@ endfunction() ############################################################################## function(NCBI_internal_process_project_filters _result) - if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "") + if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "" AND NOT "${NCBI_PROJECT}" STREQUAL "") foreach(_prj IN LISTS NCBI_PTBCFG_PROJECT_TARGETS) if("${_prj}" STREQUAL "") continue() @@ -1516,7 +1590,7 @@ function(NCBI_internal_process_project_filters _result) endforeach() endif() - if(NOT "${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "") + if(NOT "${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "" AND NOT "${NCBI_PROJECT}" STREQUAL "") set(_alltags ${NCBI__PROJTAG} ${NCBI_${NCBI_PROJECT}_PROJTAG}) if("${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "-") if(NOT "${_alltags}" STREQUAL "") @@ -1556,6 +1630,11 @@ function(NCBI_internal_process_project_filters _result) if(NOT "${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "") set(_is_good FALSE) set(_hasp FALSE) + if(DEFINED NCBI_PTB_THIS_SRC_ROOT) + set(_src_root ${NCBI_PTB_THIS_SRC_ROOT}) + else() + set(_src_root ${NCBI_SRC_ROOT}) + endif() foreach(_dir IN LISTS NCBI_PTBCFG_PROJECT_LIST) if("${_dir}" STREQUAL "") continue() @@ -1566,14 +1645,14 @@ function(NCBI_internal_process_project_filters _result) endif() NCBI_util_parse_sign( ${_dir} _value _negate) if(_negate) - NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${NCBI_SRC_ROOT}/${_value} _match) + NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${_src_root}/${_value} _match) if(_match) set(${_result} FALSE PARENT_SCOPE) return() endif() else() set(_hasp TRUE) - NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${NCBI_SRC_ROOT}/${_value} _match) + NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${_src_root}/${_value} _match) if(_match) set(_is_good TRUE) endif() @@ -1585,7 +1664,7 @@ function(NCBI_internal_process_project_filters _result) endif() endif() - if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "") + if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "" AND NOT "${NCBI_PROJECT}" STREQUAL "") set(_is_good FALSE) set(_hasp FALSE) foreach(_prj IN LISTS NCBI_PTBCFG_PROJECT_TARGETS) @@ -1713,7 +1792,7 @@ function(NCBI_internal_print_report _caption _counter) set(_report "") foreach( _type IN ITEMS CONSOLEAPP GUIAPP STATIC SHARED CUSTOM) get_property(_cnt GLOBAL PROPERTY NCBI_PTBPROP_${_counter}_${_type}) - if( ${_cnt} GREATER 0) + if( NOT "${_cnt}" STREQUAL "" AND "${_cnt}" GREATER 0) if( NOT "${_report}" STREQUAL "") string(APPEND _report ",") endif() @@ -1780,7 +1859,7 @@ function(NCBI_internal_add_project) get_property(_hosted GLOBAL PROPERTY NCBI_PTBPROP_HOST_${NCBI_PROJECT}) endif() - if (NOT NCBI_PTBMODE_PARTS AND NOT NCBI_PTBMODE_COLLECT_DEPS AND NCBI_PTBCFG_ENABLE_COLLECTOR) + if (NCBI_PTBCFG_ENABLE_COLLECTOR AND NOT NCBI_PTBMODE_PARTS AND NOT NCBI_PTBMODE_COLLECT_DEPS) if(DEFINED NCBI_PTB_ALLOWED_PROJECTS) if(NOT ${NCBI_PROJECT} IN_LIST NCBI_PTB_ALLOWED_PROJECTS) if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT}) @@ -1804,6 +1883,19 @@ function(NCBI_internal_add_project) endif() endif() + if(NOT NCBI_PTBCFG_ENABLE_COLLECTOR AND NOT NCBI_PTBMODE_PARTS) + get_property(_count GLOBAL PROPERTY NCBI_PTBPROP_TOTAL_${NCBI_${NCBI_PROJECT}_TYPE}) + math(EXPR _count "${_count} + 1") + set_property(GLOBAL PROPERTY NCBI_PTBPROP_TOTAL_${NCBI_${NCBI_PROJECT}_TYPE} ${_count}) + NCBI_internal_process_project_filters(_allowed) + if (NOT _allowed) + if ("${ARGC}" GREATER "0") + set(${ARGV0} FALSE PARENT_SCOPE) + endif() + return() + endif() + endif() + if (NCBI_PTBMODE_COLLECT_DEPS) get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DIRECT_DEPS_${NCBI_PROJECT} SET) if (_prjdeps AND NOT DEFINED NCBI_${NCBI_PROJECT}_PARTS) @@ -1918,6 +2010,9 @@ endif() endif() return() endif() + elseif(NOT NCBI_PTBCFG_ENABLE_COLLECTOR AND NCBI_PTBMODE_PARTS) + set(NCBITMP_PROJECT_PART_IDS ${NCBITMP_PROJECT_PART_IDS} ${NCBI_PROJECT_ID} PARENT_SCOPE ) + set(NCBITMP_PROJECT_PARTS ${NCBITMP_PROJECT_PARTS} ${NCBI_PROJECT_PARTNAME} PARENT_SCOPE ) endif() #message("processing ${NCBI_PROJECT_ID}") @@ -2008,6 +2103,10 @@ endif() message("WARNING: App target ${NCBI_${NCBI_PROJECT}_OUTPUT} (${NCBI_CURRENT_SOURCE_DIR}) cannot be created") message(" because there is already a target with the same name in ${_dir}") message(" App target ${NCBI_${NCBI_PROJECT}_OUTPUT} will be renamed into ${NCBI_PROJECT}") + elseif(NOT NCBI_PTBCFG_ENABLE_COLLECTOR) + message("WARNING: App target ${NCBI_${NCBI_PROJECT}_OUTPUT} (${NCBI_CURRENT_SOURCE_DIR}) cannot be created") + message(" because there is already a target with the same name elsewhere") + message(" App target ${NCBI_${NCBI_PROJECT}_OUTPUT} will be renamed into ${NCBI_PROJECT}") endif() endif() set_target_properties(${NCBI_PROJECT} PROPERTIES OUTPUT_NAME ${NCBI_${NCBI_PROJECT}_OUTPUT}) @@ -2022,6 +2121,8 @@ message(" ADDED: ${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT_ID}") message(" NCBITMP_PROJECT_SOURCES ${NCBITMP_PROJECT_SOURCES}") message(" NCBITMP_PROJECT_HEADERS ${NCBITMP_PROJECT_HEADERS}") message(" NCBITMP_PROJECT_RESOURCES ${NCBITMP_PROJECT_RESOURCES}") +#message(" NCBI_SRC_ROOT ${NCBI_SRC_ROOT}") +#message(" NCBI_INC_ROOT ${NCBI_INC_ROOT}") endif() if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "CUSTOM") diff --git a/c++/src/build-system/cmake/CMake.NCBIptb.ntest.cmake b/c++/src/build-system/cmake/CMake.NCBIptb.ntest.cmake index 7de1a464..5ee24bab 100644 --- a/c++/src/build-system/cmake/CMake.NCBIptb.ntest.cmake +++ b/c++/src/build-system/cmake/CMake.NCBIptb.ntest.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBIptb.ntest.cmake 607666 2020-05-06 12:51:46Z ivanov $ +# $Id: CMake.NCBIptb.ntest.cmake 609363 2020-06-01 14:11:57Z ivanov $ ############################################################################# ############################################################################# ## @@ -98,11 +98,7 @@ endfunction() ############################################################################## function(NCBI_internal_add_ncbi_checktarget) - if(DEFINED NCBI_EXTERNAL_TREE_ROOT) - set(SCRIPT_NAME "${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_COMMON_SCRIPTS}/check/check_make_unix_cmake.sh") - else() - set(SCRIPT_NAME "${NCBI_TREE_ROOT}/${NCBI_DIRNAME_COMMON_SCRIPTS}/check/check_make_unix_cmake.sh") - endif() + set(SCRIPT_NAME "${NCBITK_TREE_ROOT}/${NCBI_DIRNAME_COMMON_SCRIPTS}/check/check_make_unix_cmake.sh") set(WORKDIR ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}) set(_checkdir ../check) set(_checkroot ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/${_checkdir}) diff --git a/c++/src/build-system/cmake/CMake.NCBItoolkit.cmake b/c++/src/build-system/cmake/CMake.NCBItoolkit.cmake index 691382a1..a4d03da4 100644 --- a/c++/src/build-system/cmake/CMake.NCBItoolkit.cmake +++ b/c++/src/build-system/cmake/CMake.NCBItoolkit.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMake.NCBItoolkit.cmake 603345 2020-03-10 17:24:45Z ivanov $ +# $Id: CMake.NCBItoolkit.cmake 609379 2020-06-01 14:15:14Z ivanov $ ############################################################################# if(NOT DEFINED NCBI_TOOLKIT_NCBIPTB_BUILD_SYSTEM_INCLUDED) @@ -38,7 +38,11 @@ if(NCBI_EXPERIMENTAL) set(NCBI_EXPERIMENTAL_SUBDIRS ON) set(NCBI_EXPERIMENTAL_DISABLE_HUNTER ON) set(NCBI_VERBOSE_ALLPROJECTS OFF) - set(NCBI_PTBCFG_ENABLE_COLLECTOR ON) + if(NCBI_PTBCFG_SKIP_ANALYSIS) + set(NCBI_PTBCFG_ENABLE_COLLECTOR OFF) + else() + set(NCBI_PTBCFG_ENABLE_COLLECTOR ON) + endif() if(BUILD_SHARED_LIBS) if(WIN32 OR XCODE) @@ -80,32 +84,25 @@ if (WIN32) endif() endif() -if (DEFINED NCBI_EXTERNAL_TREE_ROOT) - set(_prefix "${NCBI_EXTERNAL_TREE_ROOT}/src/") -else() - if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/build-system/cmake/CMake.NCBIptb.cmake") - set(_prefix "") - elseif (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/build-system/cmake/CMake.NCBIptb.cmake") - set(_prefix "src/") - else() - message(FATAL_ERROR "Cannot find NCBIptb build system in ${CMAKE_SOURCE_DIR}") - endif() +set(_listdir "${CMAKE_CURRENT_LIST_DIR}") +if (NOT EXISTS "${_listdir}/CMake.NCBIptb.cmake") + message(FATAL_ERROR "Cannot find NCBIptb build system in ${_listdir}") endif() -include(${_prefix}build-system/cmake/CMakeMacros.cmake) -include(${_prefix}build-system/cmake/CMakeChecks.cmake) -include(${_prefix}build-system/cmake/CMake.NCBIptb.cmake) -include(${_prefix}build-system/cmake/CMake.NCBIptb.ncbi.cmake) -include(${_prefix}build-system/cmake/CMake.NCBIptb.datatool.cmake) -include(${_prefix}build-system/cmake/CMake.NCBIptb.grpc.cmake) -include(${_prefix}build-system/cmake/CMake.NCBIptb.ctest.cmake) +include(${_listdir}/CMakeMacros.cmake) +include(${_listdir}/CMakeChecks.cmake) +include(${_listdir}/CMake.NCBIptb.cmake) +include(${_listdir}/CMake.NCBIptb.ncbi.cmake) +include(${_listdir}/CMake.NCBIptb.datatool.cmake) +include(${_listdir}/CMake.NCBIptb.grpc.cmake) +include(${_listdir}/CMake.NCBIptb.ctest.cmake) if(NCBI_PTBCFG_ADDCHECK) - include(${_prefix}build-system/cmake/CMake.NCBIptb.ntest.cmake) + include(${_listdir}/CMake.NCBIptb.ntest.cmake) endif() if(NCBI_PTBCFG_DOINSTALL) - include(${_prefix}build-system/cmake/CMake.NCBIptb.install.cmake) + include(${_listdir}/CMake.NCBIptb.install.cmake) endif() -include(${_prefix}build-system/cmake/CMake.NCBIptb.legacy.cmake) +include(${_listdir}/CMake.NCBIptb.legacy.cmake) if (DEFINED NCBI_EXTERNAL_TREE_ROOT) if (EXISTS ${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.cmake) @@ -116,5 +113,5 @@ if (DEFINED NCBI_EXTERNAL_TREE_ROOT) NCBI_import_hostinfo(${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.hostinfo) endif() -include(${_prefix}build-system/cmake/CMakeChecks.final-message.cmake) +include(${_listdir}/CMakeChecks.final-message.cmake) endif(NOT DEFINED NCBI_TOOLKIT_NCBIPTB_BUILD_SYSTEM_INCLUDED) diff --git a/c++/src/build-system/cmake/CMakeChecks.boost.cmake b/c++/src/build-system/cmake/CMakeChecks.boost.cmake index 9ad928dc..ddf73f2b 100644 --- a/c++/src/build-system/cmake/CMakeChecks.boost.cmake +++ b/c++/src/build-system/cmake/CMakeChecks.boost.cmake @@ -35,7 +35,7 @@ endif() #set(Boost_DEBUG ON) find_package(Boost - COMPONENTS filesystem iostreams date_time regex system serialization + COMPONENTS filesystem iostreams date_time regex system serialization thread REQUIRED) set(CMAKE_PREFIX_PATH ${_foo_CMAKE_PREFIX_PATH}) diff --git a/c++/src/build-system/cmake/CMakeChecks.cmake b/c++/src/build-system/cmake/CMakeChecks.cmake index 5661b7b1..82a779b9 100644 --- a/c++/src/build-system/cmake/CMakeChecks.cmake +++ b/c++/src/build-system/cmake/CMakeChecks.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeChecks.cmake 607666 2020-05-06 12:51:46Z ivanov $ +# $Id: CMakeChecks.cmake 609374 2020-06-01 14:13:44Z ivanov $ ############################################################################# # # Note: @@ -13,6 +13,10 @@ if("${CMAKE_GENERATOR}" STREQUAL "Xcode") endif() endif() +string(TIMESTAMP NCBI_TIMESTAMP_START "%s") +string(TIMESTAMP _start) +message("Started: ${_start}") + ############################################################################# # Source tree description # @@ -37,18 +41,19 @@ set(NCBI_DIRNAME_CMAKECFG ${NCBI_DIRNAME_SRC}/build-system/cmake) if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/build-system/cmake/CMake.NCBIptb.cmake") - set(top_src_dir ${CMAKE_CURRENT_SOURCE_DIR}/..) - set(abs_top_src_dir ${CMAKE_CURRENT_SOURCE_DIR}/..) + set(_this_root ${CMAKE_CURRENT_SOURCE_DIR}/..) else() - set(top_src_dir ${CMAKE_SOURCE_DIR}) - set(abs_top_src_dir ${CMAKE_SOURCE_DIR}) + set(_this_root ${CMAKE_SOURCE_DIR}) endif() -get_filename_component(top_src_dir "${top_src_dir}" ABSOLUTE) -get_filename_component(abs_top_src_dir "${abs_top_src_dir}" ABSOLUTE) - -set(NCBI_TREE_ROOT ${top_src_dir}) -set(NCBI_SRC_ROOT ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_SRC}) -set(NCBI_INC_ROOT ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}) +get_filename_component(_this_root "${_this_root}" ABSOLUTE) +get_filename_component(top_src_dir "${CMAKE_CURRENT_LIST_DIR}/../../.." ABSOLUTE) + +set(NCBI_TREE_ROOT ${_this_root}) +set(NCBI_SRC_ROOT ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_SRC}) +set(NCBI_INC_ROOT ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}) +set(NCBITK_TREE_ROOT ${top_src_dir}) +set(NCBITK_SRC_ROOT ${NCBITK_TREE_ROOT}/${NCBI_DIRNAME_SRC}) +set(NCBITK_INC_ROOT ${NCBITK_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}) if (NOT EXISTS "${NCBI_SRC_ROOT}") set(NCBI_SRC_ROOT ${NCBI_TREE_ROOT}) endif() @@ -63,9 +68,9 @@ set(includedir ${NCBI_INC_ROOT}) set(incdir ${CMAKE_BINARY_DIR}/${NCBI_DIRNAME_CFGINC}) set(incinternal ${NCBI_INC_ROOT}/${NCBI_DIRNAME_INTERNAL}) - set(NCBI_DIRNAME_BUILD build) -if (DEFINED NCBI_EXTERNAL_TREE_ROOT) +#if (DEFINED NCBI_EXTERNAL_TREE_ROOT) +if (OFF) string(FIND ${CMAKE_BINARY_DIR} ${NCBI_TREE_ROOT} _pos_root) string(FIND ${CMAKE_BINARY_DIR} ${NCBI_SRC_ROOT} _pos_src) if(NOT "${_pos_root}" LESS "0" AND "${_pos_src}" LESS "0" AND NOT "${CMAKE_BINARY_DIR}" STREQUAL "${NCBI_TREE_ROOT}") @@ -95,9 +100,6 @@ else() endif() endif() endif() -if (NOT IS_DIRECTORY ${incinternal}) - set(incinternal "") -endif() if (NCBI_EXPERIMENTAL_CFG) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_RUNTIME}") @@ -111,26 +113,9 @@ else() set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${LIBRARY_OUTPUT_PATH}") endif() -if (DEFINED NCBI_EXTERNAL_TREE_ROOT) - set(NCBI_TREE_BUILDCFG "${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_BUILDCFG}") - set(NCBI_TREE_CMAKECFG "${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_CMAKECFG}") - set(NCBI_TREE_COMMON_INCLUDE ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/common) -else() - set(NCBI_TREE_BUILDCFG "${NCBI_TREE_ROOT}/${NCBI_DIRNAME_BUILDCFG}") - set(NCBI_TREE_CMAKECFG "${NCBI_TREE_ROOT}/${NCBI_DIRNAME_CMAKECFG}") - set(NCBI_TREE_COMMON_INCLUDE ${NCBI_INC_ROOT}/common) -endif() -if(OFF) -message("CMAKE_SOURCE_DIR = ${CMAKE_SOURCE_DIR}") -message("NCBI_TREE_ROOT = ${NCBI_TREE_ROOT}") -message("NCBI_SRC_ROOT = ${NCBI_SRC_ROOT}") -message("NCBI_INC_ROOT = ${NCBI_INC_ROOT}") -message("NCBI_BUILD_ROOT = ${NCBI_BUILD_ROOT}") -message("NCBI_CFGINC_ROOT = ${NCBI_CFGINC_ROOT}") -message("NCBI_TREE_BUILDCFG = ${NCBI_TREE_BUILDCFG}") -message("NCBI_TREE_CMAKECFG = ${NCBI_TREE_CMAKECFG}") -message("NCBI_TREE_COMMON_INCLUDE = ${NCBI_TREE_COMMON_INCLUDE}") -endif() +set(NCBI_TREE_CMAKECFG "${CMAKE_CURRENT_LIST_DIR}") +get_filename_component(NCBI_TREE_BUILDCFG "${CMAKE_CURRENT_LIST_DIR}/.." ABSOLUTE) + if(EXISTS ${NCBI_TREE_ROOT}/CMake.CustomConfig.txt) include(${NCBI_TREE_ROOT}/CMake.CustomConfig.txt) endif() @@ -174,23 +159,38 @@ else() endif() set(NCBI_DIRNAME_PREBUILT ${_prebuilt_loc}) +set(_tk_includedir ${NCBITK_INC_ROOT}) +set(_tk_incinternal ${NCBITK_INC_ROOT}/${NCBI_DIRNAME_INTERNAL}) +set(_inc_dirs) +foreach( _inc IN ITEMS ${includedir} ${incinternal} ${_tk_includedir} ${_tk_incinternal}) + if (IS_DIRECTORY ${_inc}) + list(APPEND _inc_dirs ${_inc}) + endif() +endforeach() +list(REMOVE_DUPLICATES _inc_dirs) +include_directories(${incdir} ${_inc_dirs}) +include_regular_expression("^.*[.](h|hpp|c|cpp|inl|inc)$") +if(OFF) +message("CMAKE_SOURCE_DIR = ${CMAKE_SOURCE_DIR}") +message("NCBI_TREE_ROOT = ${NCBI_TREE_ROOT}") +message("NCBI_SRC_ROOT = ${NCBI_SRC_ROOT}") +message("NCBI_INC_ROOT = ${NCBI_INC_ROOT}") +message("NCBITK_TREE_ROOT = ${NCBITK_TREE_ROOT}") +message("NCBITK_SRC_ROOT = ${NCBITK_SRC_ROOT}") +message("NCBITK_INC_ROOT = ${NCBITK_INC_ROOT}") +message("NCBI_BUILD_ROOT = ${NCBI_BUILD_ROOT}") +message("NCBI_CFGINC_ROOT = ${NCBI_CFGINC_ROOT}") +message("NCBI_TREE_BUILDCFG = ${NCBI_TREE_BUILDCFG}") +message("NCBI_TREE_CMAKECFG = ${NCBI_TREE_CMAKECFG}") +message("include_directories(${incdir} ${_inc_dirs})") +endif() + if (DEFINED NCBI_EXTERNAL_TREE_ROOT) set(NCBI_EXTERNAL_BUILD_ROOT ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_PREBUILT}) - - if (IS_DIRECTORY ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}) - set(_ext_includedir0 ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}) - if (IS_DIRECTORY ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/${NCBI_DIRNAME_INTERNAL}) - set(_ext_incinternal ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/${NCBI_DIRNAME_INTERNAL}) - endif() - endif() if (NOT EXISTS ${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.cmake) message(FATAL_ERROR "${NCBI_PTBCFG_INSTALL_EXPORT} was not found in ${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}") endif() - include_directories(${incdir} ${NCBI_INC_ROOT} ${incinternal} ${_ext_includedir0} ${_ext_incinternal}) -else() - include_directories(${incdir} ${includedir0} ${incinternal}) endif() -include_regular_expression("^.*[.](h|hpp|c|cpp|inl|inc)$") #set(CMAKE_MODULE_PATH "${NCBI_SRC_ROOT}/build-system/cmake/" ${CMAKE_MODULE_PATH}) list(APPEND CMAKE_MODULE_PATH "${NCBI_TREE_CMAKECFG}") @@ -228,7 +228,7 @@ include(${NCBI_TREE_CMAKECFG}/CMake.NCBIComponents.cmake) # This sets a version to be used throughout our config process # NOTE: Adjust as needed # -set(NCBI_CPP_TOOLKIT_VERSION_MAJOR 23) +set(NCBI_CPP_TOOLKIT_VERSION_MAJOR 24) set(NCBI_CPP_TOOLKIT_VERSION_MINOR 0) set(NCBI_CPP_TOOLKIT_VERSION_PATCH 0) set(NCBI_CPP_TOOLKIT_VERSION_EXTRA "") @@ -318,6 +318,7 @@ string(REPLACE ";" " " FEATURES "${NCBI_ALL_COMPONENTS}") if (NCBI_EXPERIMENTAL_CFG) + set(_tk_common_include "${NCBITK_INC_ROOT}/common") if (WIN32 OR XCODE) foreach(_cfg ${NCBI_CONFIGURATION_TYPES}) @@ -346,11 +347,15 @@ else() configure_file(${NCBI_TREE_CMAKECFG}/config.cmake.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbiconf_xcode.h) endif() endif() - if (EXISTS ${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in) - configure_file(${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbicfg.cfg.c) + if (EXISTS ${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in) + configure_file(${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbicfg.cfg.c) + endif() + configure_file(${_tk_common_include}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/ncbi_build_ver.h) + if (DEFINED NCBI_EXTERNAL_TREE_ROOT) + configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h) + else() + configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBITK_INC_ROOT}/common/ncbi_revision.h) endif() - configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/ncbi_build_ver.h) - configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h) endforeach() if(NOT EXISTS ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c) file(WRITE ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c "#include \n") @@ -371,12 +376,17 @@ endif() set(NCBI_SIGNATURE "${NCBI_COMPILER}_${NCBI_COMPILER_VERSION}-${NCBI_BUILD_TYPE}--${HOST_CPU}-${HOST_OS_WITH_VERSION}-${_local_host_name}") configure_file(${NCBI_TREE_CMAKECFG}/config.cmake.h.in ${NCBI_CFGINC_ROOT}/ncbiconf_unix.h) - if (EXISTS ${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in) - configure_file(${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c) + if (EXISTS ${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in) + configure_file(${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c) endif() - configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/common/ncbi_build_ver.h) - configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h) + configure_file(${_tk_common_include}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/common/ncbi_build_ver.h) + configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h) + if (DEFINED NCBI_EXTERNAL_TREE_ROOT) + configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h) + else() + configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBITK_INC_ROOT}/common/ncbi_revision.h) + endif() endif() else (NCBI_EXPERIMENTAL_CFG) diff --git a/c++/src/build-system/cmake/CMakeChecks.compiler.cmake b/c++/src/build-system/cmake/CMakeChecks.compiler.cmake index bf8d5f05..9178e638 100644 --- a/c++/src/build-system/cmake/CMakeChecks.compiler.cmake +++ b/c++/src/build-system/cmake/CMakeChecks.compiler.cmake @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeChecks.compiler.cmake 608131 2020-05-12 15:15:17Z ivanov $ +# $Id: CMakeChecks.compiler.cmake 609369 2020-06-01 14:12:55Z ivanov $ ############################################################################# # # This config is designed to capture all compiler and linker definitions and search parameters @@ -262,6 +262,7 @@ if (NOT buildconf) set(buildconf0 ${CMAKE_BUILD_TYPE}) set(NCBI_BUILD_TYPE "${CMAKE_BUILD_TYPE}MT64") endif (NOT buildconf) +set(NCBI_CONFIGURATION_TYPES "${CMAKE_BUILD_TYPE}") if(MaxDebug IN_LIST NCBI_PTBCFG_PROJECT_FEATURES) add_definitions(-D_GLIBCXX_DEBUG) @@ -469,7 +470,7 @@ message(STATUS "NCBI_COMPILER_WRAPPER = ${NCBI_COMPILER_WRAPPER}") set(CMAKE_SHARED_LINKER_FLAGS_RDYNAMIC "${CMAKE_SHARED_LINKER_FLAGS}") # for smooth transition, please don't use set(CMAKE_SHARED_LINKER_FLAGS_ALLOW_UNDEFINED "${CMAKE_SHARED_LINKER_FLAGS}") -if ((NOT DEFINED ${APPLE}) OR (NOT ${APPLE})) +if (NOT APPLE) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") endif () @@ -484,7 +485,7 @@ SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) SET(CMAKE_INSTALL_RPATH "/$ORIGIN/../lib") #this add RUNPATH to binaries (RPATH is already there anyway), which makes it more like binaries built by C++ Toolkit -if (NOT WIN32) +if (NOT WIN32 AND NOT APPLE) SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--enable-new-dtags") endif() diff --git a/c++/src/build-system/cmake/CMakeLists.top_builddir.txt b/c++/src/build-system/cmake/CMakeLists.top_builddir.txt index be02b053..b44cf14f 100644 --- a/c++/src/build-system/cmake/CMakeLists.top_builddir.txt +++ b/c++/src/build-system/cmake/CMakeLists.top_builddir.txt @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeLists.top_builddir.txt 603341 2020-03-10 17:23:52Z ivanov $ +# $Id: CMakeLists.top_builddir.txt 609363 2020-06-01 14:11:57Z ivanov $ ############################################################################# ############################################################################## @@ -28,5 +28,5 @@ set(NCBI_PTBCFG_KNOWN_FOLDERS ${NCBI_PTBCFG_KNOWN_FOLDERS} ) -include(build-system/cmake/CMake.NCBItoolkit.cmake) +include(${CMAKE_CURRENT_LIST_DIR}/CMake.NCBItoolkit.cmake) NCBI_add_subdirectory( ${NCBI_PTBCFG_KNOWN_FOLDERS}) diff --git a/c++/src/build-system/cmake/cmake-cfg-unix.sh b/c++/src/build-system/cmake/cmake-cfg-unix.sh index 55bc3291..38cd2482 100755 --- a/c++/src/build-system/cmake/cmake-cfg-unix.sh +++ b/c++/src/build-system/cmake/cmake-cfg-unix.sh @@ -1,6 +1,6 @@ #!/bin/sh ############################################################################# -# $Id: cmake-cfg-unix.sh 607664 2020-05-06 12:50:47Z ivanov $ +# $Id: cmake-cfg-unix.sh 609379 2020-06-01 14:15:14Z ivanov $ # Configure NCBI C++ toolkit using CMake build system. # Author: Andrei Gourianov, gouriano@ncbi ############################################################################# @@ -13,7 +13,7 @@ extension="cmake_configure_ext.sh" NCBI_EXPERIMENTAL="ON" host_os=`uname` -if test $host_os = "Darwin"; then +if test -z "${CMAKE_CMD}" -a $host_os = "Darwin"; then CMAKE_CMD=/Applications/CMake.app/Contents/bin/cmake fi if [ -z "${CMAKE_CMD}" ]; then @@ -30,6 +30,7 @@ BUILD_TYPE="Debug" BUILD_SHARED_LIBS="OFF" USE_CCACHE="ON" USE_DISTCC="ON" +SKIP_ANALYSIS="OFF" ############################################################################# Check_function_exists() { @@ -72,6 +73,7 @@ OPTIONS: --with-build-root=name -- specify a non-default build directory name --without-ccache -- do not use ccache --without-distcc -- do not use distcc + --without-analysis -- skip source tree analysis --with-generator="X" -- use generator X EOF @@ -149,21 +151,24 @@ while [ $# -ne 0 ]; do --without-distcc) USE_DISTCC="OFF" ;; + --without-analysis) + SKIP_ANALYSIS="ON" + ;; --with-projects=*) PROJECT_LIST=${1#*=} - if [ -e "${tree_root}/$PROJECT_LIST" ]; then + if [ -f "${tree_root}/$PROJECT_LIST" ]; then PROJECT_LIST="${tree_root}/$PROJECT_LIST" fi ;; --with-tags=*) PROJECT_TAGS=${1#*=} - if [ -e "${tree_root}/$PROJECT_TAGS" ]; then + if [ -f "${tree_root}/$PROJECT_TAGS" ]; then PROJECT_TAGS="${tree_root}/$PROJECT_TAGS" fi ;; --with-targets=*) PROJECT_TARGETS=${1#*=} - if [ -e "${tree_root}/$PROJECT_TARGETS" ]; then + if [ -f "${tree_root}/$PROJECT_TARGETS" ]; then PROJECT_TARGETS="${tree_root}/$PROJECT_TARGETS" fi ;; @@ -269,6 +274,9 @@ if [ -n "$CC" ]; then if test $host_os = "Darwin"; then CC_NAME=`$CC --version 2>/dev/null | awk 'NR==1{print $2}'` CC_VERSION=`$CC --version 2>/dev/null | awk 'NR==1{print $4}' | sed 's/[.]//g'` + if [ $CC_NAME = "clang" ]; then + CC_NAME="Clang" + fi else CC_NAME=`$CC --version | awk 'NR==1{print $1}' | tr '[:lower:]' '[:upper:]'` ver=`$CC -dumpfullversion 2>/dev/null || $CC -dumpversion 2>/dev/null` @@ -303,6 +311,7 @@ CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_LIST=$(Quote "${PROJECT_LIST}")" CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TAGS=$(Quote "${PROJECT_TAGS}")" CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TARGETS=$(Quote "${PROJECT_TARGETS}")" CMAKE_ARGS="$CMAKE_ARGS -DNCBI_VERBOSE_PROJECTS=$(Quote "${PROJECT_DETAILS}")" +CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_SKIP_ANALYSIS=$(Quote "${SKIP_ANALYSIS}")" if [ -n "$INSTALL_PATH" ]; then CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_INSTALL_PATH=$(Quote "${INSTALL_PATH}")" fi diff --git a/c++/src/build-system/cmake/cmake-cfg-vs.bat b/c++/src/build-system/cmake/cmake-cfg-vs.bat index 5b12e673..0c89d50f 100644 --- a/c++/src/build-system/cmake/cmake-cfg-vs.bat +++ b/c++/src/build-system/cmake/cmake-cfg-vs.bat @@ -1,7 +1,7 @@ @echo off setlocal ENABLEDELAYEDEXPANSION REM ######################################################################### -REM $Id: cmake-cfg-vs.bat 607666 2020-05-06 12:51:46Z ivanov $ +REM $Id: cmake-cfg-vs.bat 609379 2020-06-01 14:15:14Z ivanov $ REM Configure NCBI C++ toolkit for Visual Studio using CMake build system. REM Author: Andrei Gourianov, gouriano@ncbi REM ######################################################################### @@ -30,6 +30,7 @@ REM ######################################################################### REM defaults set BUILD_SHARED_LIBS=OFF set VISUAL_STUDIO=2017 +set SKIP_ANALYSIS=OFF goto :RUN REM ######################################################################### @@ -63,6 +64,7 @@ echo examples: --with-components="-Z" echo --with-features="LIST" -- specify compilation features echo examples: --with-features="StrictGI" echo --with-build-root=name -- specify a non-default build directory name +echo --without-analysis -- skip source tree analysis echo --with-vs=N -- use Visual Studio N generator echo examples: --with-vs=2017 (default) echo --with-vs=2019 @@ -128,6 +130,7 @@ if "%1"=="--with-targets" (set PROJECT_TARGETS=%~2& shift& goto :CONTINUE if "%1"=="--with-details" (set PROJECT_DETAILS=%~2& shift& goto :CONTINUEPARSEARGS) if "%1"=="--with-vs" (set VISUAL_STUDIO=%~2& shift& goto :CONTINUEPARSEARGS) if "%1"=="--with-install" (set INSTALL_PATH=%~2& shift& goto :CONTINUEPARSEARGS) +if "%1"=="--without-analysis" (set SKIP_ANALYSIS=ON& goto :CONTINUEPARSEARGS) if "%1"=="--with-generator" (set CMAKE_GENERATOR=%~2& shift& goto :CONTINUEPARSEARGS) if "%1"=="--with-prebuilt" (set prebuilt_dir=%~dp2& set prebuilt_name=%~nx2& shift& goto :CONTINUEPARSEARGS) set unknown=%unknown% %1 @@ -195,17 +198,26 @@ if "%CMAKE_GENERATOR%"=="Visual Studio 14 2015 Win64" ( if not "%PROJECT_LIST%"=="" ( if exist "%tree_root%\%PROJECT_LIST%" ( - set PROJECT_LIST=%tree_root%\%PROJECT_LIST% + type "%tree_root%\%PROJECT_LIST%" >NUL 2>&1 + if not errorlevel 1 ( + set PROJECT_LIST=%tree_root%\%PROJECT_LIST% + ) ) ) if not "%PROJECT_TAGS%"=="" ( if exist "%tree_root%\%PROJECT_TAGS%" ( - set PROJECT_TAGS=%tree_root%\%PROJECT_TAGS% + type "%tree_root%\%PROJECT_TAGS%" >NUL 2>&1 + if not errorlevel 1 ( + set PROJECT_TAGS=%tree_root%\%PROJECT_TAGS% + ) ) ) if not "%PROJECT_TARGETS%"=="" ( if exist "%tree_root%\%PROJECT_TARGETS%" ( - set PROJECT_TARGETS=%tree_root%\%PROJECT_TARGETS% + type "%tree_root%\%PROJECT_TARGETS%" >NUL 2>&1 + if not errorlevel 1 ( + set PROJECT_TARGETS=%tree_root%\%PROJECT_TARGETS% + ) ) ) @@ -222,6 +234,7 @@ set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_PROJECT_LIST="%PROJECT_LIST%" set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_PROJECT_TAGS="%PROJECT_TAGS%" set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_PROJECT_TARGETS="%PROJECT_TARGETS%" set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_VERBOSE_PROJECTS="%PROJECT_DETAILS%" +set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_SKIP_ANALYSIS=%SKIP_ANALYSIS% if not "%INSTALL_PATH%"=="" ( set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_INSTALL_PATH="%INSTALL_PATH%" ) diff --git a/c++/src/build-system/cmake/cmake-cfg-xcode.sh b/c++/src/build-system/cmake/cmake-cfg-xcode.sh index 75cfd14b..c48c5ee5 100755 --- a/c++/src/build-system/cmake/cmake-cfg-xcode.sh +++ b/c++/src/build-system/cmake/cmake-cfg-xcode.sh @@ -1,6 +1,6 @@ #!/bin/sh ############################################################################# -# $Id: cmake-cfg-xcode.sh 603557 2020-03-12 16:26:27Z ivanov $ +# $Id: cmake-cfg-xcode.sh 609379 2020-06-01 14:15:14Z ivanov $ # Configure NCBI C++ toolkit for XCode using CMake build system. # Author: Andrei Gourianov, gouriano@ncbi ############################################################################# @@ -26,6 +26,7 @@ fi ############################################################################# # defaults BUILD_SHARED_LIBS="OFF" +SKIP_ANALYSIS="OFF" ############################################################################# Check_function_exists() { @@ -62,6 +63,7 @@ OPTIONS: --with-features="LIST" -- specify compilation features examples: --with-features="StrictGI" --with-build-root=name -- specify a non-default build directory name + --without-analysis -- skip source tree analysis EOF Check_function_exists configure_ext_Usage && configure_ext_Usage @@ -115,19 +117,19 @@ while [ $# != 0 ]; do ;; --with-projects=*) PROJECT_LIST=${1#*=} - if [ -e "${tree_root}/$PROJECT_LIST" ]; then + if [ -f "${tree_root}/$PROJECT_LIST" ]; then PROJECT_LIST="${tree_root}/$PROJECT_LIST" fi ;; --with-tags=*) PROJECT_TAGS=${1#*=} - if [ -e "${tree_root}/$PROJECT_TAGS" ]; then + if [ -f "${tree_root}/$PROJECT_TAGS" ]; then PROJECT_TAGS="${tree_root}/$PROJECT_TAGS" fi ;; --with-targets=*) PROJECT_TARGETS=${1#*=} - if [ -e "${tree_root}/$PROJECT_TARGETS" ]; then + if [ -f "${tree_root}/$PROJECT_TARGETS" ]; then PROJECT_TARGETS="${tree_root}/$PROJECT_TARGETS" fi ;; @@ -151,6 +153,9 @@ while [ $# != 0 ]; do prebuilt_dir=`dirname $prebuilt_path` prebuilt_name=`basename $prebuilt_path` ;; + --without-analysis) + SKIP_ANALYSIS="ON" + ;; *) unknown="$unknown $1" ;; @@ -205,6 +210,7 @@ CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_LIST=$(Quote "${PROJECT_LIST}")" CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TAGS=$(Quote "${PROJECT_TAGS}")" CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TARGETS=$(Quote "${PROJECT_TARGETS}")" CMAKE_ARGS="$CMAKE_ARGS -DNCBI_VERBOSE_PROJECTS=$(Quote "${PROJECT_DETAILS}")" +CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_SKIP_ANALYSIS=$(Quote "${SKIP_ANALYSIS}")" if [ -n "$INSTALL_PATH" ]; then CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_INSTALL_PATH=$(Quote "${INSTALL_PATH}")" fi diff --git a/c++/src/build-system/cmake/cmake_configure_ext_gpipe.sh b/c++/src/build-system/cmake/cmake_configure_ext_gpipe.sh index 1a335e5a..ffb66054 100755 --- a/c++/src/build-system/cmake/cmake_configure_ext_gpipe.sh +++ b/c++/src/build-system/cmake/cmake_configure_ext_gpipe.sh @@ -1,7 +1,7 @@ #!/bin/sh ############################################################################# -# $Id: cmake_configure_ext_gpipe.sh 600386 2020-01-16 17:00:37Z gouriano $ +# $Id: cmake_configure_ext_gpipe.sh 609574 2020-06-03 20:26:39Z whlavina $ ############################################################################# _ext_check=`type -t Check_function_exists` @@ -40,28 +40,28 @@ configure_ext_ParseArgs() BUILD_TYPE="Release" BUILD_SHARED_LIBS="ON" PROJECT_FEATURES="${PROJECT_FEATURES};Int8GI" - BUILD_ROOT="Release" + : "${BUILD_ROOT:=../Release}" add_gpipe_warnings ;; "--gpipe-dev") BUILD_TYPE="Debug" BUILD_SHARED_LIBS="ON" PROJECT_FEATURES="${PROJECT_FEATURES};StrictGI" - BUILD_ROOT="Debug" + : "${BUILD_ROOT:=../Debug}" add_gpipe_warnings ;; "--gpipe-cgi") BUILD_TYPE="Release" BUILD_SHARED_LIBS="OFF" PROJECT_FEATURES="${PROJECT_FEATURES};Int8GI" - BUILD_ROOT="Static" + : "${BUILD_ROOT:=../Static}" add_gpipe_warnings ;; "--gpipe-distrib") BUILD_TYPE="Release" BUILD_SHARED_LIBS="OFF" PROJECT_COMPONENTS="${PROJECT_COMPONENTS};-PCRE" - BUILD_ROOT="Distrib" + : "${BUILD_ROOT:=../Distrib}" add_gpipe_warnings ;; *) diff --git a/c++/src/build-system/config.h.in b/c++/src/build-system/config.h.in index c207c1c4..fd02d141 100644 --- a/c++/src/build-system/config.h.in +++ b/c++/src/build-system/config.h.in @@ -303,6 +303,9 @@ /* Define to 1 if you have the `lchown' function. */ #undef HAVE_LCHOWN +/* Define to 1 if libparquet is available. */ +#undef HAVE_LIBAPACHE_ARROW + /* Define to 1 if libavrocpp is available. */ #undef HAVE_LIBAVRO @@ -321,6 +324,9 @@ /* Define to 1 if non-public CONNECT extensions are available. */ #undef HAVE_LIBCONNEXT +/* Define to 1 if libcppkafka is available. */ +#undef HAVE_LIBCPPKAFKA + /* Define to 1 if CRYPT is available, either in its own library or as part of the standard libraries. */ #undef HAVE_LIBCRYPT @@ -468,6 +474,9 @@ /* Define to 1 if libprotobuf$PROTOBUF_SFX is available. */ #undef HAVE_LIBPROTOBUF +/* Define to 1 if librdkafka is available. */ +#undef HAVE_LIBRDKAFKA + /* Define to 1 if RPCSVC is available, either in its own library or as part of the standard libraries. */ #undef HAVE_LIBRPCSVC @@ -533,6 +542,9 @@ /* Define to 1 if libz is available. */ #undef HAVE_LIBZ +/* Define to 1 if libzstd is available. */ +#undef HAVE_LIBZSTD + /* Define to 1 if you have the header file. */ #undef HAVE_LIMITS diff --git a/c++/src/build-system/configure b/c++/src/build-system/configure index 65d42e0d..7b0c8344 100755 --- a/c++/src/build-system/configure +++ b/c++/src/build-system/configure @@ -666,6 +666,9 @@ PERL_INCLUDE UNLESS_PUBSEQOS ncbi_xreader_pubseqos2 ncbi_xreader_pubseqos +CPPKAFKA_STATIC_LIBS +LIBRDKAFKA_STATIC_LIBS +APACHE_ARROW_STATIC_LIBS HIREDIS_STATIC_LIBS AWS_SDK_STATIC_LIBS MSGSL_INCLUDE @@ -913,6 +916,12 @@ srcdir top_srcdir build_root signature +CPPKAFKA_LIBS +CPPKAFKA_INCLUDE +LIBRDKAFKA_LIBS +LIBRDKAFKA_INCLUDE +APACHE_ARROW_LIBS +APACHE_ARROW_INCLUDE HIREDIS_LIBS HIREDIS_INCLUDE AWS_SDK_LIBS @@ -1061,6 +1070,8 @@ MBEDTLS_LIBS MBEDTLS_INCLUDE PCRE_LIBS PCRE_INCLUDE +ZSTD_LIBS +ZSTD_INCLUDE LZO_LIBS LZO_INCLUDE BZ2_LIBS @@ -1243,6 +1254,7 @@ with_backward_cpp_sig with_z with_bz2 with_lzo +with_zstd with_pcre with_mbedtls with_gmp @@ -1322,6 +1334,9 @@ with_grpc with_msgsl with_aws_sdk with_hiredis +with_apache_arrow +with_librdkafka +with_cppkafka with_3psw with_local_lbsm with_ncbi_crypt @@ -1889,7 +1904,8 @@ check ncbi-public strip pch caution ccache distcc \ ncbi-c wxwidgets wxwidgets-ucs fastcgi sss sssdb sssutils included-sss \ geo included-geo vdb downloaded-vdb static-vdb ngs libunwind libdw \ backward-cpp backward-cpp-sig \ -z bz2 lzo pcre mbedtls gmp gcrypt nettle gnutls static-gnutls openssl krb5 \ +z bz2 lzo zstd pcre mbedtls \ +gmp gcrypt nettle gnutls static-gnutls openssl krb5 \ sybase sybase-local sybase-new ftds mysql \ orbacus freetype ftgl opengl mesa glut glew glew-mx gl2ps \ bdb python perl jni sqlite3 icu boost boost-tag \ @@ -1900,6 +1916,7 @@ magic curl mimetic gsoap avro cereal sasl2 \ mongodb mongodb3 leveldb gmock lapack lmdb \ libuv libssh2 cassandra nghttp2 h2o influxdb \ libxlsxwriter protobuf grpc msgsl aws-sdk hiredis \ +apache-arrow librdkafka cppkafka \ 3psw local-lbsm ncbi-crypt connext \ serial objects dbapi app ctools gui algo internal gbench" @@ -1952,7 +1969,7 @@ for x_arg in "$@" ; do --srcdir=* | --x-includes=* | --x-libraries=* | --with-tcheck=* \ | --with-ncbi-c=* | --with-sss=* | --with-vdb=* | --with-ngs=* \ | --with-libunwind=* | --with-libdw=* | --with-backward-cpp=* \ - | --with-z=* | --with-bz2=* | --with-lzo=* \ + | --with-z=* | --with-bz2=* | --with-lzo=* | --with-zstd=* \ | --with-pcre=* | --with-mbedtls=* \ | --with-gmp=* | --with-gcrypt=* | --with-nettle=* \ | --with-gnutls=* | --with-openssl=* | --with-krb5=* \ @@ -2182,6 +2199,8 @@ Optional Packages: --without-bz2 use internal copy of bzlib --with-lzo=DIR use LZO installation in DIR (requires 2.x or up) --without-lzo do not use LZO + --with-zstd=DIR use Zstandard installation in DIR + --without-zstd do not use Zstandard --with-pcre=DIR use PCRE installation in DIR --without-pcre use internal copy of PCRE --with-mbedtls(=DIR) use external mbedTLS installation (in DIR) @@ -2331,6 +2350,12 @@ Optional Packages: --without-aws-sdk do not use the Amazon Web Services SDK --with-hiredis=DIR use Hiredis installation in DIR --without-hiredis do not use Hiredis + --with-apache-arrow=DIR use Apache Arrow installation in DIR + --without-apache-arrow do not use Apache Arrow + --with-librdkafka=DIR use librdkafka installation in DIR + --without-librdkafka do not use librdkafka + --with-cppkafka=DIR use cppkafka installation in DIR + --without-cppkafka do not use cppkafka --with-3psw=std:netopt favor standard (system) builds of the above pkgs. --without-3psw do not use any of the above packages --without-local-lbsm turn off support for IPC with locally running LBSMD @@ -3590,6 +3615,11 @@ case "$with_3psw" in else with_lzo=no fi + if test "${with_zstd-no}" != "no"; then + as_fn_error $? "incompatible options: --with-zstd but --without-3psw" + else + with_zstd=no + fi if test "${with_pcre-no}" != "no"; then as_fn_error $? "incompatible options: --with-pcre but --without-3psw" else @@ -3950,6 +3980,21 @@ case "$with_3psw" in else with_hiredis=no fi + if test "${with_apache-arrow-no}" != "no"; then + as_fn_error $? "incompatible options: --with-apache-arrow but --without-3psw" + else + with_apache-arrow=no + fi + if test "${with_librdkafka-no}" != "no"; then + as_fn_error $? "incompatible options: --with-librdkafka but --without-3psw" + else + with_librdkafka=no + fi + if test "${with_cppkafka-no}" != "no"; then + as_fn_error $? "incompatible options: --with-cppkafka but --without-3psw" + else + with_cppkafka=no + fi { NCBI=; unset NCBI;} ;; @@ -4801,6 +4846,18 @@ if test "${with_lzo+set}" = set; then : fi +# Check whether --with-zstd was given. +if test "${with_zstd+set}" = set; then : + withval=$with_zstd; +fi + + +# Check whether --with-zstd was given. +if test "${with_zstd+set}" = set; then : + withval=$with_zstd; +fi + + # Check whether --with-pcre was given. if test "${with_pcre+set}" = set; then : withval=$with_pcre; @@ -5689,9 +5746,45 @@ if test "${with_hiredis+set}" = set; then : fi -# Check whether --with-grpc was given. -if test "${with_grpc+set}" = set; then : - withval=$with_grpc; +# Check whether --with-hiredis was given. +if test "${with_hiredis+set}" = set; then : + withval=$with_hiredis; +fi + + +# Check whether --with-apache-arrow was given. +if test "${with_apache_arrow+set}" = set; then : + withval=$with_apache_arrow; +fi + + +# Check whether --with-apache-arrow was given. +if test "${with_apache_arrow+set}" = set; then : + withval=$with_apache_arrow; +fi + + +# Check whether --with-librdkafka was given. +if test "${with_librdkafka+set}" = set; then : + withval=$with_librdkafka; +fi + + +# Check whether --with-librdkafka was given. +if test "${with_librdkafka+set}" = set; then : + withval=$with_librdkafka; +fi + + +# Check whether --with-cppkafka was given. +if test "${with_cppkafka+set}" = set; then : + withval=$with_cppkafka; +fi + + +# Check whether --with-cppkafka was given. +if test "${with_cppkafka+set}" = set; then : + withval=$with_cppkafka; fi @@ -17543,6 +17636,152 @@ if test -n "$LZO_LIBS" -a "x$with_bin_release" = xyes \ LZO_LIBS="$LZO_LIBPATH -llzo2-static" fi +if test -d "$ZSTD_PATH"; then + ncbi_fix_dir_tmp=`if cd $ZSTD_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi` + case "$ncbi_fix_dir_tmp" in + /.*) ncbi_fix_dir_tmp2=`cd $ZSTD_PATH && $smart_pwd 2>/dev/null` + if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then + ZSTD_PATH=$ncbi_fix_dir_tmp2 + else + case "$ZSTD_PATH" in + /*) ;; + * ) ZSTD_PATH=$ncbi_fix_dir_tmp ;; + esac + fi + ;; + /*) ZSTD_PATH=$ncbi_fix_dir_tmp ;; + esac +fi +if test "$with_zstd" != "no"; then + case "$ZSTD_PATH:$with_zstd" in + *:yes | *: | $with_zstd* ) ;; + * ) ZSTD_PATH=$with_zstd ;; + esac + if test "$ZSTD_PATH" != /usr -a -d "$ZSTD_PATH"; then + in_path=" in $ZSTD_PATH" + if test -z "$ZSTD_INCLUDE" -a -d "$ZSTD_PATH/include"; then + ZSTD_INCLUDE="-I$ZSTD_PATH/include" + fi + if test -n "$ZSTD_LIBPATH"; then + : + elif test -d "$ZSTD_PATH/lib${bit64_sfx}"; then + ncbi_rp_L_flags= + ncbi_rp_L_sep=$CONF_f_libpath + if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then + for x in $ZSTD_PATH/lib${bit64_sfx}; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + done + ZSTD_LIBPATH="${ncbi_rp_L_flags}" + else + ncbi_rp_R_flags= + ncbi_rp_R_sep=" $CONF_f_runpath" + for x in $ZSTD_PATH/lib${bit64_sfx}; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + x=`echo $x | sed -e "$ncbi_rpath_sed"` + ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x" + ncbi_rp_R_sep=: + done + ZSTD_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" + fi + elif test -d "$ZSTD_PATH/lib"; then + ncbi_rp_L_flags= + ncbi_rp_L_sep=$CONF_f_libpath + if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then + for x in $ZSTD_PATH/lib; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + done + ZSTD_LIBPATH="${ncbi_rp_L_flags}" + else + ncbi_rp_R_flags= + ncbi_rp_R_sep=" $CONF_f_runpath" + for x in $ZSTD_PATH/lib; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + x=`echo $x | sed -e "$ncbi_rpath_sed"` + ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x" + ncbi_rp_R_sep=: + done + ZSTD_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" + fi + fi + ZSTD_LIBS="$ZSTD_LIBPATH -lzstd " + else + ZSTD_INCLUDE="" + ZSTD_LIBS="-lzstd " + in_path= + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libzstd$in_path" >&5 +$as_echo_n "checking for libzstd$in_path... " >&6; } +if ${ncbi_cv_lib_zstd+:} false; then : + $as_echo_n "(cached) " >&6 +else + CPPFLAGS=" $ZSTD_INCLUDE $orig_CPPFLAGS" + LIBS="$ZSTD_LIBS $orig_LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + ncbi_cv_lib_zstd=yes +else + ncbi_cv_lib_zstd=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_zstd" >&5 +$as_echo "$ncbi_cv_lib_zstd" >&6; } + if test "$ncbi_cv_lib_zstd" = "no"; then + if test "${with_zstd:=no}" != no; then + as_fn_error $? "--with-zstd explicitly specified, but no usable version found." "$LINENO" 5 + fi + fi + fi + if test "$with_zstd" = "no"; then + ZSTD_PATH="No_ZSTD" + ZSTD_INCLUDE= + ZSTD_LIBS= + else + WithPackages="$WithPackages${WithPackagesSep}ZSTD"; WithPackagesSep=" " + ZSTD_INCLUDE=" $ZSTD_INCLUDE" + +$as_echo "#define HAVE_LIBZSTD 1" >>confdefs.h + + fi + + + + if test -z "$PCRE_PATH" && pcre-config --version >/dev/null 2>&1; then p=`pcre-config --prefix` test "x$p" = "x/usr" || PCRE_PATH=$p @@ -28462,8 +28701,13 @@ fi ## FreeType and FTGL if test "$with_freetype" != "no" ; then - : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin} - # Extract the first word of "freetype-config", so it can be a program name with args. + ft2pc="env PKG_CONFIG_PATH=$FREETYPE_PATH/lib/pkgconfig pkg-config freetype2" + if $ft2pc --exists >/dev/null 2>&1; then + freetype_config=$ft2pc + FREETYPE_PATH=`$ft2pc --variable=exec_prefix` + else + : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin} + # Extract the first word of "freetype-config", so it can be a program name with args. set dummy freetype-config; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } @@ -28504,8 +28748,8 @@ $as_echo "no" >&6; } fi + fi if test -n "$freetype_config" ; then - : ${FREETYPE_BINPATH=`dirname $freetype_config`} : ${FREETYPE_INCLUDE=`$freetype_config --cflags`} if test -z "${FREETYPE_LIBS+set}"; then if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then @@ -32463,12 +32707,15 @@ if test -n "$GRPC_PATH"; then done fi if $grpc_pc grpc++ --exists 2>/dev/null; then + GRPC_SED=sed if test -f "$GRPC_PATH/lib/libboringssl.a"; then GRPC_SED="sed -e s/-lssl/-lboringssl/g -e s/-lcrypto/-lboringcrypto/g" - elif test -f /usr/lib/libssl.dylib -a \ + fi + if test -f /usr/lib/libssl.dylib -a \ x"`$grpc_pc grpc++ --variable=prefix`" != x/sw; then - GRPC_SED="sed -e s,-L/sw/lib,," - else + GRPC_SED="$GRPC_SED -e s,-L/sw/lib,," + fi + if test "$GRPC_SED" = sed; then GRPC_SED=cat fi GRPC_CONFIG_LIBS="`$grpc_pc grpc++ grpc --libs | $GRPC_SED`" @@ -32651,10 +32898,12 @@ if test -n "$GRPC_LIBS"; then if test -n "$GRPC_CONFIG_LIBS"; then GRPC_LIBS="$GRPC_CONFIG_LIBS $PROTOBUF_LIBS $GRPC_LDEP" GRPC_UNSECURE_LIBS="`$grpc_pc grpc++_unsecure grpc_unsecure --libs`" - case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in - *:::*" -lupb "* ) ;; - *" -lupb "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -lupb" ;; - esac + for x in address_sorting upb cares; do + case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in + *:::*" -l$x "* ) ;; + *" -l$x "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -l$x" ;; + esac + done GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS $PROTOBUF_LIBS $GRPC_LDEP" else LDFLAGS="$orig_LDFLAGS $GRPC_LIBPATH" @@ -32827,6 +33076,20 @@ if test -d "$AWS_SDK_PATH"; then fi done fi +AWS_SDK_LDEP= +AWS_SDK_STATIC_LDEP= +for d in "$AWS_SDK_PATH/lib$bit64_sfx" "$AWS_SDK_PATH/lib" \ + /usr/lib/$multiarch /usr/lib$bit64_sfx /usr/lib \ + /usr/local/lib$bit64_sfx /usr/local/lib; do + if test -f "$d/libaws-cpp-sdk-s3.a"; then + AWS_SDK_LIBDIR=$d + if test -f "$AWS_SDK_LIBDIR/libaws-c-event-stream.a"; then + AWS_SDK_LDEP="-laws-c-event-stream -laws-checksums -laws-c-common" + AWS_SDK_STATIC_LDEP="-laws-c-event-stream-static -laws-checksums-static -laws-c-common-static" + fi + break + fi +done if test "$with_aws_sdk" != "no"; then case "$AWS_SDK_PATH:$with_aws_sdk" in *:yes | *: | $with_aws_sdk* ) ;; @@ -32902,10 +33165,10 @@ if test "$with_aws_sdk" != "no"; then AWS_SDK_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" fi fi - AWS_SDK_LIBS="$AWS_SDK_LIBPATH -laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core" + AWS_SDK_LIBS="$AWS_SDK_LIBPATH -laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core $AWS_SDK_LDEP" else AWS_SDK_INCLUDE="" - AWS_SDK_LIBS="-laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core" + AWS_SDK_LIBS="-laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core $AWS_SDK_LDEP" in_path= fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libaws-cpp-sdk-s3$in_path" >&5 @@ -32959,8 +33222,8 @@ $as_echo "#define HAVE_LIBAWS_SDK 1" >>confdefs.h if test "$with_aws_sdk" != no -a \ - -f "$AWS_SDK_PATH/lib$bit64_sfx/libaws-cpp-sdk-s3-static.a"; then - AWS_SDK_STATIC_LIBS="-L$AWS_SDK_PATH/lib -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static" + -f "$AWS_SDK_LIBDIR/libaws-cpp-sdk-s3-static.a"; then + AWS_SDK_STATIC_LIBS="-L$AWS_SDK_LIBDIR -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static $AWS_SDK_STATIC_LDEP" else AWS_SDK_STATIC_LIBS=$AWS_SDK_LIBS fi @@ -33101,6 +33364,550 @@ else HIREDIS_STATIC_LIBS=$HIREDIS_LIBS fi +case "$with_apache_arrow" in + yes | no | '' ) ;; + * ) APACHE_ARROW_PATH=$with_apache_arrow ;; +esac +if test -d "$APACHE_ARROW_PATH"; then + ncbi_fix_dir_tmp=`if cd $APACHE_ARROW_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi` + case "$ncbi_fix_dir_tmp" in + /.*) ncbi_fix_dir_tmp2=`cd $APACHE_ARROW_PATH && $smart_pwd 2>/dev/null` + if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then + APACHE_ARROW_PATH=$ncbi_fix_dir_tmp2 + else + case "$APACHE_ARROW_PATH" in + /*) ;; + * ) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;; + esac + fi + ;; + /*) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;; + esac + for d in "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \ + "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \ + "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$asan_sfx" \ + "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$asan_sfx" \ + "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \ + "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do + if test -d "$d"; then + APACHE_ARROW_PATH=$d + ncbi_fix_dir_tmp=`if cd $APACHE_ARROW_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi` + case "$ncbi_fix_dir_tmp" in + /.*) ncbi_fix_dir_tmp2=`cd $APACHE_ARROW_PATH && $smart_pwd 2>/dev/null` + if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then + APACHE_ARROW_PATH=$ncbi_fix_dir_tmp2 + else + case "$APACHE_ARROW_PATH" in + /*) ;; + * ) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;; + esac + fi + ;; + /*) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;; + esac + break + fi + done +fi + +if test "$with_apache_arrow" != "no"; then + case "$APACHE_ARROW_PATH:$with_apache_arrow" in + *:yes | *: | $with_apache_arrow* ) ;; + * ) APACHE_ARROW_PATH=$with_apache_arrow ;; + esac + if test "$APACHE_ARROW_PATH" != /usr -a -d "$APACHE_ARROW_PATH"; then + in_path=" in $APACHE_ARROW_PATH" + if test -z "$APACHE_ARROW_INCLUDE" -a -d "$APACHE_ARROW_PATH/include"; then + APACHE_ARROW_INCLUDE="-I$APACHE_ARROW_PATH/include" + fi + if test -n "$APACHE_ARROW_LIBPATH"; then + : + elif test -d "$APACHE_ARROW_PATH/lib${bit64_sfx}"; then + ncbi_rp_L_flags= + ncbi_rp_L_sep=$CONF_f_libpath + if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then + for x in $APACHE_ARROW_PATH/lib${bit64_sfx}; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + done + APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}" + else + ncbi_rp_R_flags= + ncbi_rp_R_sep=" $CONF_f_runpath" + for x in $APACHE_ARROW_PATH/lib${bit64_sfx}; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + x=`echo $x | sed -e "$ncbi_rpath_sed"` + ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x" + ncbi_rp_R_sep=: + done + APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" + fi + elif test -d "$APACHE_ARROW_PATH/lib"; then + ncbi_rp_L_flags= + ncbi_rp_L_sep=$CONF_f_libpath + if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then + for x in $APACHE_ARROW_PATH/lib; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + done + APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}" + else + ncbi_rp_R_flags= + ncbi_rp_R_sep=" $CONF_f_runpath" + for x in $APACHE_ARROW_PATH/lib; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + x=`echo $x | sed -e "$ncbi_rpath_sed"` + ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x" + ncbi_rp_R_sep=: + done + APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" + fi + fi + APACHE_ARROW_LIBS="$APACHE_ARROW_LIBPATH -lparquet -larrow" + else + APACHE_ARROW_INCLUDE="" + APACHE_ARROW_LIBS="-lparquet -larrow" + in_path= + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libparquet$in_path" >&5 +$as_echo_n "checking for libparquet$in_path... " >&6; } +if ${ncbi_cv_lib_apache_arrow+:} false; then : + $as_echo_n "(cached) " >&6 +else + CPPFLAGS=" $APACHE_ARROW_INCLUDE $orig_CPPFLAGS" + LIBS="$APACHE_ARROW_LIBS $orig_LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +parquet::ParquetFileReader pfr; + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + ncbi_cv_lib_apache_arrow=yes +else + ncbi_cv_lib_apache_arrow=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_apache_arrow" >&5 +$as_echo "$ncbi_cv_lib_apache_arrow" >&6; } + if test "$ncbi_cv_lib_apache_arrow" = "no"; then + if test "${with_apache_arrow:=no}" != no; then + as_fn_error $? "--with-apache_arrow explicitly specified, but no usable version found." "$LINENO" 5 + fi + fi + fi + if test "$with_apache_arrow" = "no"; then + APACHE_ARROW_PATH="No_APACHE_ARROW" + APACHE_ARROW_INCLUDE= + APACHE_ARROW_LIBS= + else + WithPackages="$WithPackages${WithPackagesSep}APACHE_ARROW"; WithPackagesSep=" " + APACHE_ARROW_INCLUDE=" $APACHE_ARROW_INCLUDE" + +$as_echo "#define HAVE_LIBAPACHE_ARROW 1" >>confdefs.h + + fi + + + +if test "$with_apache_arrow" != no -a \ + -f "$APACHE_ARROW_LIBDIR/libparquet-static.a"; then + APACHE_ARROW_STATIC_LIBS="-L$APACHE_ARROW_LIBDIR -lparquet-static -larrow-static -larrow_bundled_dependencies-static $BZ2_LIBS $Z_LIBS -lzstd" +else + APACHE_ARROW_STATIC_LIBS=$APACHE_ARROW_LIBS +fi + +case "$with_librdkafka" in + yes | no | '' ) ;; + * ) LIBRDKAFKA_PATH=$with_librdkafka ;; +esac +if test -d "$LIBRDKAFKA_PATH"; then + ncbi_fix_dir_tmp=`if cd $LIBRDKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi` + case "$ncbi_fix_dir_tmp" in + /.*) ncbi_fix_dir_tmp2=`cd $LIBRDKAFKA_PATH && $smart_pwd 2>/dev/null` + if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then + LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp2 + else + case "$LIBRDKAFKA_PATH" in + /*) ;; + * ) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;; + esac + fi + ;; + /*) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;; + esac + for d in "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \ + "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do + if test -d "$d"; then + LIBRDKAFKA_PATH=$d + ncbi_fix_dir_tmp=`if cd $LIBRDKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi` + case "$ncbi_fix_dir_tmp" in + /.*) ncbi_fix_dir_tmp2=`cd $LIBRDKAFKA_PATH && $smart_pwd 2>/dev/null` + if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then + LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp2 + else + case "$LIBRDKAFKA_PATH" in + /*) ;; + * ) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;; + esac + fi + ;; + /*) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;; + esac + break + fi + done +fi + +if test "$with_librdkafka" != "no"; then + case "$LIBRDKAFKA_PATH:$with_librdkafka" in + *:yes | *: | $with_librdkafka* ) ;; + * ) LIBRDKAFKA_PATH=$with_librdkafka ;; + esac + if test "$LIBRDKAFKA_PATH" != /usr -a -d "$LIBRDKAFKA_PATH"; then + in_path=" in $LIBRDKAFKA_PATH" + if test -z "$LIBRDKAFKA_INCLUDE" -a -d "$LIBRDKAFKA_PATH/include"; then + LIBRDKAFKA_INCLUDE="-I$LIBRDKAFKA_PATH/include" + fi + if test -n "$LIBRDKAFKA_LIBPATH"; then + : + elif test -d "$LIBRDKAFKA_PATH/lib${bit64_sfx}"; then + ncbi_rp_L_flags= + ncbi_rp_L_sep=$CONF_f_libpath + if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then + for x in $LIBRDKAFKA_PATH/lib${bit64_sfx}; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + done + LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}" + else + ncbi_rp_R_flags= + ncbi_rp_R_sep=" $CONF_f_runpath" + for x in $LIBRDKAFKA_PATH/lib${bit64_sfx}; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + x=`echo $x | sed -e "$ncbi_rpath_sed"` + ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x" + ncbi_rp_R_sep=: + done + LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" + fi + elif test -d "$LIBRDKAFKA_PATH/lib"; then + ncbi_rp_L_flags= + ncbi_rp_L_sep=$CONF_f_libpath + if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then + for x in $LIBRDKAFKA_PATH/lib; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + done + LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}" + else + ncbi_rp_R_flags= + ncbi_rp_R_sep=" $CONF_f_runpath" + for x in $LIBRDKAFKA_PATH/lib; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + x=`echo $x | sed -e "$ncbi_rpath_sed"` + ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x" + ncbi_rp_R_sep=: + done + LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" + fi + fi + LIBRDKAFKA_LIBS="$LIBRDKAFKA_LIBPATH -lrdkafka " + else + LIBRDKAFKA_INCLUDE="" + LIBRDKAFKA_LIBS="-lrdkafka " + in_path= + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for librdkafka$in_path" >&5 +$as_echo_n "checking for librdkafka$in_path... " >&6; } +if ${ncbi_cv_lib_librdkafka+:} false; then : + $as_echo_n "(cached) " >&6 +else + CPPFLAGS=" $LIBRDKAFKA_INCLUDE $orig_CPPFLAGS" + LIBS="$LIBRDKAFKA_LIBS $orig_LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +rd_kafka_conf_t *conf = rd_kafka_conf_new(); + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + ncbi_cv_lib_librdkafka=yes +else + ncbi_cv_lib_librdkafka=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_librdkafka" >&5 +$as_echo "$ncbi_cv_lib_librdkafka" >&6; } + if test "$ncbi_cv_lib_librdkafka" = "no"; then + if test "${with_librdkafka:=no}" != no; then + as_fn_error $? "--with-librdkafka explicitly specified, but no usable version found." "$LINENO" 5 + fi + fi + fi + if test "$with_librdkafka" = "no"; then + LIBRDKAFKA_PATH="No_LIBRDKAFKA" + LIBRDKAFKA_INCLUDE= + LIBRDKAFKA_LIBS= + else + WithPackages="$WithPackages${WithPackagesSep}LIBRDKAFKA"; WithPackagesSep=" " + LIBRDKAFKA_INCLUDE=" $LIBRDKAFKA_INCLUDE" + +$as_echo "#define HAVE_LIBRDKAFKA 1" >>confdefs.h + + fi + + + +if test "$with_librdkafka" != no -a \ + -f "$LIBRDKAFKA_PATH/lib$bit64_sfx/librdkafka-static.a"; then + LIBRDKAFKA_STATIC_LIBS="-L$LIBRDKAFKA_PATH/lib$bit64_sfx -lrdkafka-static" +else + LIBRDKAFKA_STATIC_LIBS=$LIBRDKAFKA_LIBS +fi + +case "$with_cppkafka" in + yes | no | '' ) ;; + * ) CPPKAFKA_PATH=$with_cppkafka ;; +esac +if test -d "$CPPKAFKA_PATH"; then + ncbi_fix_dir_tmp=`if cd $CPPKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi` + case "$ncbi_fix_dir_tmp" in + /.*) ncbi_fix_dir_tmp2=`cd $CPPKAFKA_PATH && $smart_pwd 2>/dev/null` + if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then + CPPKAFKA_PATH=$ncbi_fix_dir_tmp2 + else + case "$CPPKAFKA_PATH" in + /*) ;; + * ) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;; + esac + fi + ;; + /*) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;; + esac + for d in "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \ + "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do + if test -d "$d"; then + CPPKAFKA_PATH=$d + ncbi_fix_dir_tmp=`if cd $CPPKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi` + case "$ncbi_fix_dir_tmp" in + /.*) ncbi_fix_dir_tmp2=`cd $CPPKAFKA_PATH && $smart_pwd 2>/dev/null` + if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then + CPPKAFKA_PATH=$ncbi_fix_dir_tmp2 + else + case "$CPPKAFKA_PATH" in + /*) ;; + * ) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;; + esac + fi + ;; + /*) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;; + esac + break + fi + done +fi + +if test "$with_cppkafka" != "no"; then + case "$CPPKAFKA_PATH:$with_cppkafka" in + *:yes | *: | $with_cppkafka* ) ;; + * ) CPPKAFKA_PATH=$with_cppkafka ;; + esac + if test "$CPPKAFKA_PATH" != /usr -a -d "$CPPKAFKA_PATH"; then + in_path=" in $CPPKAFKA_PATH" + if test -z "$CPPKAFKA_INCLUDE" -a -d "$CPPKAFKA_PATH/include"; then + CPPKAFKA_INCLUDE="-I$CPPKAFKA_PATH/include" + fi + if test -n "$CPPKAFKA_LIBPATH"; then + : + elif test -d "$CPPKAFKA_PATH/lib${bit64_sfx}"; then + ncbi_rp_L_flags= + ncbi_rp_L_sep=$CONF_f_libpath + if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then + for x in $CPPKAFKA_PATH/lib${bit64_sfx}; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + done + CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}" + else + ncbi_rp_R_flags= + ncbi_rp_R_sep=" $CONF_f_runpath" + for x in $CPPKAFKA_PATH/lib${bit64_sfx}; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + x=`echo $x | sed -e "$ncbi_rpath_sed"` + ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x" + ncbi_rp_R_sep=: + done + CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" + fi + elif test -d "$CPPKAFKA_PATH/lib"; then + ncbi_rp_L_flags= + ncbi_rp_L_sep=$CONF_f_libpath + if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then + for x in $CPPKAFKA_PATH/lib; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + done + CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}" + else + ncbi_rp_R_flags= + ncbi_rp_R_sep=" $CONF_f_runpath" + for x in $CPPKAFKA_PATH/lib; do + case "$x" in + /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch ) + continue + ;; + esac + ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x" + ncbi_rp_L_sep=" $CONF_f_libpath" + x=`echo $x | sed -e "$ncbi_rpath_sed"` + ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x" + ncbi_rp_R_sep=: + done + CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}" + fi + fi + CPPKAFKA_LIBS="$CPPKAFKA_LIBPATH -lcppkafka $LIBRDKAFKA_LIBS" + else + CPPKAFKA_INCLUDE="" + CPPKAFKA_LIBS="-lcppkafka $LIBRDKAFKA_LIBS" + in_path= + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libcppkafka$in_path" >&5 +$as_echo_n "checking for libcppkafka$in_path... " >&6; } +if ${ncbi_cv_lib_cppkafka+:} false; then : + $as_echo_n "(cached) " >&6 +else + CPPFLAGS="$LIBRDKAFKA_INCLUDE $CPPKAFKA_INCLUDE $orig_CPPFLAGS" + LIBS="$CPPKAFKA_LIBS $orig_LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +cppkafka::Configuration cfg; cfg.set("foo", "bar"); + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + ncbi_cv_lib_cppkafka=yes +else + ncbi_cv_lib_cppkafka=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_cppkafka" >&5 +$as_echo "$ncbi_cv_lib_cppkafka" >&6; } + if test "$ncbi_cv_lib_cppkafka" = "no"; then + if test "${with_cppkafka:=no}" != no; then + as_fn_error $? "--with-cppkafka explicitly specified, but no usable version found." "$LINENO" 5 + fi + fi + fi + if test "$with_cppkafka" = "no"; then + CPPKAFKA_PATH="No_CPPKAFKA" + CPPKAFKA_INCLUDE= + CPPKAFKA_LIBS= + else + WithPackages="$WithPackages${WithPackagesSep}CPPKAFKA"; WithPackagesSep=" " + CPPKAFKA_INCLUDE="$LIBRDKAFKA_INCLUDE $CPPKAFKA_INCLUDE" + +$as_echo "#define HAVE_LIBCPPKAFKA 1" >>confdefs.h + + fi + + + +if test "$with_cppkafka" != no -a \ + -f "$CPPKAFKA_PATH/lib$bit64_sfx/libcppkafka-static.a"; then + CPPKAFKA_STATIC_LIBS="-L$CPPKAFKA_PATH/lib$bit64_sfx -lcppkafka-static $LIBRDKAFKA_STATIC_LIBS" +else + CPPKAFKA_STATIC_LIBS=$CPPKAFKA_LIBS +fi + ### Restore original compiler/linker flags LIBS="$orig_LIBS" CPPFLAGS="$orig_CPPFLAGS" @@ -33526,7 +34333,7 @@ for x in ChaosMonkey Int8GI StrictGI PSGLoader GCC KCC ICC AppleClang LLVMClang ;; esac done - for x in UUID FUSE Iconv LIBUNWIND LIBDW BACKWARD_CPP Z LocalZ BZ2 LocalBZ2 LZO PCRE LocalPCRE MBEDTLS GMP GCRYPT NETTLE GNUTLS OPENSSL KRB5 CURL Sybase DBLib FreeTDS MySQL BerkeleyDB BerkeleyDB++ ODBC PYTHON PYTHON25 PYTHON26 PYTHON27 PYTHON3 PERL Boost.Chrono Boost.Filesystem Boost.Iostreams Boost.Program-Options Boost.Regex Boost.Serialization Boost.Spirit Boost.System Boost.Test Boost.Test.Included Boost.Thread C-Toolkit OpenGL MESA GLUT GLEW wxWidgets wx2.8 Fast-CGI LocalSSS LocalMSGMAIL2 SSSUTILS LocalNCBILS NCBILS2 SSSDB SP ORBacus ICU EXPAT SABLOT LIBXML LIBXSLT LIBEXSLT Xerces Xalan Zorba SQLITE3 SQLITE3ASYNC VDB NGS OECHEM SGE MUPARSER HDF5 JPEG PNG TIFF GIF UNGIF XPM GL2PS FreeType FTGL MAGIC MIMETIC GSOAP AVRO Cereal SASL2 MONGODB MONGODB3 LEVELDB GMOCK LAPACK LMDB LocalLMDB LIBUV LIBSSH2 CASSANDRA NGHTTP2 H2O INFLUXDB LIBXLSXWRITER PROTOBUF GRPC MSGSL AWS_SDK HIREDIS; do + for x in UUID FUSE Iconv LIBUNWIND LIBDW BACKWARD_CPP Z LocalZ BZ2 LocalBZ2 LZO ZSTD PCRE LocalPCRE MBEDTLS GMP GCRYPT NETTLE GNUTLS OPENSSL KRB5 CURL Sybase DBLib FreeTDS MySQL BerkeleyDB BerkeleyDB++ ODBC PYTHON PYTHON25 PYTHON26 PYTHON27 PYTHON3 PERL Boost.Chrono Boost.Filesystem Boost.Iostreams Boost.Program-Options Boost.Regex Boost.Serialization Boost.Spirit Boost.System Boost.Test Boost.Test.Included Boost.Thread C-Toolkit OpenGL MESA GLUT GLEW wxWidgets wx2.8 Fast-CGI LocalSSS LocalMSGMAIL2 SSSUTILS LocalNCBILS NCBILS2 SSSDB SP ORBacus ICU EXPAT SABLOT LIBXML LIBXSLT LIBEXSLT Xerces Xalan Zorba SQLITE3 SQLITE3ASYNC VDB NGS OECHEM SGE MUPARSER HDF5 JPEG PNG TIFF GIF UNGIF XPM GL2PS FreeType FTGL MAGIC MIMETIC GSOAP AVRO Cereal SASL2 MONGODB MONGODB3 LEVELDB GMOCK LAPACK LMDB LocalLMDB LIBUV LIBSSH2 CASSANDRA NGHTTP2 H2O INFLUXDB LIBXLSXWRITER PROTOBUF GRPC MSGSL AWS_SDK HIREDIS APACHE_ARROW LIBRDKAFKA CPPKAFKA; do case " $WithPackages " in *" $x "*) ;; *) WithoutPackages="$WithoutPackages$WithoutPackagesSep$x" @@ -33935,6 +34742,9 @@ c_ncbi_runpath=`echo "$ncbi_runpath" | sed -e 's:\\$\\$:\\$:g'` + + + diff --git a/c++/src/build-system/configure.ac b/c++/src/build-system/configure.ac index cb3fe35e..9c72b8ea 100644 --- a/c++/src/build-system/configure.ac +++ b/c++/src/build-system/configure.ac @@ -1,5 +1,5 @@ ############################################################################# -# $Id: configure.ac 608058 2020-05-11 16:30:05Z ivanov $ +# $Id: configure.ac 616396 2020-09-15 18:22:00Z ivanov $ # Derived from configure.in version 1.173. # ========================================================================== # @@ -65,7 +65,7 @@ case "$with_3psw" in with_ncbi_c=no fi m4_foreach(X, [sss, sssutils, sssdb, vdb, ngs, libunwind, - z, bz2, lzo, pcre, mbedtls, + z, bz2, lzo, zstd, pcre, mbedtls, gmp, gcrypt, nettle, gnutls, openssl, krb5, boost, lmdb, sybase, ftds, mysql, opengl, mesa, glut, glew, gl2ps, wxwidgets, freetype, ftgl, fastcgi, bdb, orbacus, odbc, @@ -75,7 +75,8 @@ case "$with_3psw" in curl, gsoap, avro, cereal, sasl2, mongodb, mongodb3, leveldb, gmock, lapack, libuv, libssh2, cassandra, nghttp2, h2o, influxdb, - libxlsxwriter, protobuf, grpc, msgsl, aws-sdk, hiredis], + libxlsxwriter, protobuf, grpc, msgsl, aws-sdk, hiredis, + apache-arrow, librdkafka, cppkafka], [if test "${[with_]X-no}" != "no"; then AC_MSG_ERROR([incompatible options: --with-]X[ but --without-3psw]) else @@ -283,6 +284,10 @@ AC_ARG_WITH(lzo, [ --with-lzo=DIR use LZO installation in DIR (requires 2.x or up)]) AC_ARG_WITH(lzo, [ --without-lzo do not use LZO]) +AC_ARG_WITH(zstd, + [ --with-zstd=DIR use Zstandard installation in DIR]) +AC_ARG_WITH(zstd, + [ --without-zstd do not use Zstandard]) AC_ARG_WITH(pcre, [ --with-pcre=DIR use PCRE installation in DIR]) AC_ARG_WITH(pcre, @@ -579,8 +584,20 @@ AC_ARG_WITH(aws-sdk, [ --without-aws-sdk do not use the Amazon Web Services SDK]) AC_ARG_WITH(hiredis, [ --with-hiredis=DIR use Hiredis installation in DIR]) -AC_ARG_WITH(grpc, +AC_ARG_WITH(hiredis, [ --without-hiredis do not use Hiredis]) +AC_ARG_WITH(apache-arrow, + [ --with-apache-arrow=DIR use Apache Arrow installation in DIR]) +AC_ARG_WITH(apache-arrow, + [ --without-apache-arrow do not use Apache Arrow]) +AC_ARG_WITH(librdkafka, + [ --with-librdkafka=DIR use librdkafka installation in DIR]) +AC_ARG_WITH(librdkafka, + [ --without-librdkafka do not use librdkafka]) +AC_ARG_WITH(cppkafka, + [ --with-cppkafka=DIR use cppkafka installation in DIR]) +AC_ARG_WITH(cppkafka, + [ --without-cppkafka do not use cppkafka]) AC_ARG_WITH(3psw, [ --with-3psw=std:netopt favor standard (system) builds of the above pkgs.]) AC_ARG_WITH(3psw, @@ -638,7 +655,8 @@ check ncbi-public strip pch caution ccache distcc \ ncbi-c wxwidgets wxwidgets-ucs fastcgi sss sssdb sssutils included-sss \ geo included-geo vdb downloaded-vdb static-vdb ngs libunwind libdw \ backward-cpp backward-cpp-sig \ -z bz2 lzo pcre mbedtls gmp gcrypt nettle gnutls static-gnutls openssl krb5 \ +z bz2 lzo zstd pcre mbedtls \ +gmp gcrypt nettle gnutls static-gnutls openssl krb5 \ sybase sybase-local sybase-new ftds mysql \ orbacus freetype ftgl opengl mesa glut glew glew-mx gl2ps \ bdb python perl jni sqlite3 icu boost boost-tag \ @@ -649,6 +667,7 @@ magic curl mimetic gsoap avro cereal sasl2 \ mongodb mongodb3 leveldb gmock lapack lmdb \ libuv libssh2 cassandra nghttp2 h2o influxdb \ libxlsxwriter protobuf grpc msgsl aws-sdk hiredis \ +apache-arrow librdkafka cppkafka \ 3psw local-lbsm ncbi-crypt connext \ serial objects dbapi app ctools gui algo internal gbench" @@ -703,7 +722,7 @@ for x_arg in "$@" ; do --srcdir=* | --x-includes=* | --x-libraries=* | --with-tcheck=* \ | --with-ncbi-c=* | --with-sss=* | --with-vdb=* | --with-ngs=* \ | --with-libunwind=* | --with-libdw=* | --with-backward-cpp=* \ - | --with-z=* | --with-bz2=* | --with-lzo=* \ + | --with-z=* | --with-bz2=* | --with-lzo=* | --with-zstd=* \ | --with-pcre=* | --with-mbedtls=* \ | --with-gmp=* | --with-gcrypt=* | --with-nettle=* \ | --with-gnutls=* | --with-openssl=* | --with-krb5=* \ @@ -4517,6 +4536,13 @@ if test -n "$LZO_LIBS" -a "x$with_bin_release" = xyes \ LZO_LIBS="$LZO_LIBPATH -llzo2-static" fi +if test -d "$ZSTD_PATH"; then + NCBI_FIX_DIR(ZSTD_PATH) +fi +NCBI_CHECK_THIRD_PARTY_LIB(zstd, + [AC_LANG_PROGRAM([@%:@include ], + [[ZSTD_CCtx* cctx = ZSTD_createCCtx();]])]) + if test -z "$PCRE_PATH" && pcre-config --version >/dev/null 2>&1; then p=`pcre-config --prefix` test "x$p" = "x/usr" || PCRE_PATH=$p @@ -7638,11 +7664,16 @@ fi ## FreeType and FTGL if test "$with_freetype" != "no" ; then - : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin} - AC_PATH_PROG(freetype_config, freetype-config, [], - [$FREETYPE_BINPATH:$PATH]) + ft2pc="env PKG_CONFIG_PATH=$FREETYPE_PATH/lib/pkgconfig pkg-config freetype2" + if $ft2pc --exists >/dev/null 2>&1; then + freetype_config=$ft2pc + FREETYPE_PATH=`$ft2pc --variable=exec_prefix` + else + : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin} + AC_PATH_PROG(freetype_config, freetype-config, [], + [$FREETYPE_BINPATH:$PATH]) + fi if test -n "$freetype_config" ; then - : ${FREETYPE_BINPATH=`dirname $freetype_config`} : ${FREETYPE_INCLUDE=`$freetype_config --cflags`} NCBI_RPATHIFY_OUTPUT_COND(FREETYPE_LIBS, $freetype_config --libs, [$no_usr_lib]) @@ -8321,12 +8352,15 @@ if test -n "$GRPC_PATH"; then done fi if $grpc_pc grpc++ --exists 2>/dev/null; then + GRPC_SED=sed if test -f "$GRPC_PATH/lib/libboringssl.a"; then GRPC_SED="sed -e s/-lssl/-lboringssl/g -e s/-lcrypto/-lboringcrypto/g" - elif test -f /usr/lib/libssl.dylib -a \ + fi + if test -f /usr/lib/libssl.dylib -a \ x"`$grpc_pc grpc++ --variable=prefix`" != x/sw; then - GRPC_SED="sed -e s,-L/sw/lib,," - else + GRPC_SED="$GRPC_SED -e s,-L/sw/lib,," + fi + if test "$GRPC_SED" = sed; then GRPC_SED=cat fi GRPC_CONFIG_LIBS="`$grpc_pc grpc++ grpc --libs | $GRPC_SED`" @@ -8368,10 +8402,12 @@ if test -n "$GRPC_LIBS"; then if test -n "$GRPC_CONFIG_LIBS"; then GRPC_LIBS="$GRPC_CONFIG_LIBS $PROTOBUF_LIBS $GRPC_LDEP" GRPC_UNSECURE_LIBS="`$grpc_pc grpc++_unsecure grpc_unsecure --libs`" - case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in - *:::*" -lupb "* ) ;; - *" -lupb "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -lupb" ;; - esac + for x in address_sorting upb cares; do + case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in + *:::*" -l$x "* ) ;; + *" -l$x "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -l$x" ;; + esac + done GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS $PROTOBUF_LIBS $GRPC_LDEP" else LDFLAGS="$orig_LDFLAGS $GRPC_LIBPATH" @@ -8425,15 +8461,30 @@ if test -d "$AWS_SDK_PATH"; then fi done fi +AWS_SDK_LDEP= +AWS_SDK_STATIC_LDEP= +for d in "$AWS_SDK_PATH/lib$bit64_sfx" "$AWS_SDK_PATH/lib" \ + /usr/lib/$multiarch /usr/lib$bit64_sfx /usr/lib \ + /usr/local/lib$bit64_sfx /usr/local/lib; do + if test -f "$d/libaws-cpp-sdk-s3.a"; then + AWS_SDK_LIBDIR=$d + if test -f "$AWS_SDK_LIBDIR/libaws-c-event-stream.a"; then + AWS_SDK_LDEP="-laws-c-event-stream -laws-checksums -laws-c-common" + AWS_SDK_STATIC_LDEP="-laws-c-event-stream-static -laws-checksums-static -laws-c-common-static" + fi + break + fi +done NCBI_CHECK_THIRD_PARTY_LIB_EX(aws_sdk, AWS_SDK, aws-cpp-sdk-s3, [AC_LANG_PROGRAM([[@%:@include @%:@include ]], [[Aws::S3::S3Client s3cli; Aws::EC2::EC2Client ec2cli;]])], - [-laws-cpp-sdk-ec2 -laws-cpp-sdk-core], [$CURL_LIBS $OPENSSL_LIBS $Z_LIBS]) + [-laws-cpp-sdk-ec2 -laws-cpp-sdk-core $AWS_SDK_LDEP], + [$CURL_LIBS $OPENSSL_LIBS $Z_LIBS]) if test "$with_aws_sdk" != no -a \ - -f "$AWS_SDK_PATH/lib$bit64_sfx/libaws-cpp-sdk-s3-static.a"; then - AWS_SDK_STATIC_LIBS="-L$AWS_SDK_PATH/lib -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static" + -f "$AWS_SDK_LIBDIR/libaws-cpp-sdk-s3-static.a"; then + AWS_SDK_STATIC_LIBS="-L$AWS_SDK_LIBDIR -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static $AWS_SDK_STATIC_LDEP" else AWS_SDK_STATIC_LIBS=$AWS_SDK_LIBS fi @@ -8448,6 +8499,96 @@ else HIREDIS_STATIC_LIBS=$HIREDIS_LIBS fi +case "$with_apache_arrow" in + yes | no | '' ) ;; + * ) APACHE_ARROW_PATH=$with_apache_arrow ;; +esac +if test -d "$APACHE_ARROW_PATH"; then + NCBI_FIX_DIR(APACHE_ARROW_PATH) + for d in "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \ + "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \ + "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$asan_sfx" \ + "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$asan_sfx" \ + "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \ + "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do + if test -d "$d"; then + APACHE_ARROW_PATH=$d + NCBI_FIX_DIR(APACHE_ARROW_PATH) + break + fi + done +fi + +NCBI_CHECK_THIRD_PARTY_LIB_EX(apache_arrow, APACHE_ARROW, parquet, + [AC_LANG_PROGRAM([[@%:@include ]], + [[parquet::ParquetFileReader pfr;]])], + [-larrow]) +if test "$with_apache_arrow" != no -a \ + -f "$APACHE_ARROW_LIBDIR/libparquet-static.a"; then + APACHE_ARROW_STATIC_LIBS="-L$APACHE_ARROW_LIBDIR -lparquet-static -larrow-static -larrow_bundled_dependencies-static $BZ2_LIBS $Z_LIBS -lzstd" +else + APACHE_ARROW_STATIC_LIBS=$APACHE_ARROW_LIBS +fi + +case "$with_librdkafka" in + yes | no | '' ) ;; + * ) LIBRDKAFKA_PATH=$with_librdkafka ;; +esac +if test -d "$LIBRDKAFKA_PATH"; then + NCBI_FIX_DIR(LIBRDKAFKA_PATH) + for d in "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \ + "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do + if test -d "$d"; then + LIBRDKAFKA_PATH=$d + NCBI_FIX_DIR(LIBRDKAFKA_PATH) + break + fi + done +fi + +NCBI_CHECK_THIRD_PARTY_LIB_EX(librdkafka, LIBRDKAFKA, rdkafka, + [AC_LANG_PROGRAM([[@%:@include ]], + [[rd_kafka_conf_t *conf = rd_kafka_conf_new();]])]) +if test "$with_librdkafka" != no -a \ + -f "$LIBRDKAFKA_PATH/lib$bit64_sfx/librdkafka-static.a"; then + LIBRDKAFKA_STATIC_LIBS="-L$LIBRDKAFKA_PATH/lib$bit64_sfx -lrdkafka-static" +else + LIBRDKAFKA_STATIC_LIBS=$LIBRDKAFKA_LIBS +fi + +case "$with_cppkafka" in + yes | no | '' ) ;; + * ) CPPKAFKA_PATH=$with_cppkafka ;; +esac +if test -d "$CPPKAFKA_PATH"; then + NCBI_FIX_DIR(CPPKAFKA_PATH) + for d in "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \ + "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \ + "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do + if test -d "$d"; then + CPPKAFKA_PATH=$d + NCBI_FIX_DIR(CPPKAFKA_PATH) + break + fi + done +fi + +NCBI_CHECK_THIRD_PARTY_LIB(cppkafka, + [AC_LANG_PROGRAM([[@%:@include ]], + [[cppkafka::Configuration cfg; cfg.set("foo", "bar");]])], + [$LIBRDKAFKA_LIBS], [], [$LIBRDKAFKA_INCLUDE]) +if test "$with_cppkafka" != no -a \ + -f "$CPPKAFKA_PATH/lib$bit64_sfx/libcppkafka-static.a"; then + CPPKAFKA_STATIC_LIBS="-L$CPPKAFKA_PATH/lib$bit64_sfx -lcppkafka-static $LIBRDKAFKA_STATIC_LIBS" +else + CPPKAFKA_STATIC_LIBS=$CPPKAFKA_LIBS +fi + ### Restore original compiler/linker flags LIBS="$orig_LIBS" CPPFLAGS="$orig_CPPFLAGS" @@ -9208,6 +9349,9 @@ AC_SUBST(GRPC_BIN) AC_SUBST(MSGSL_INCLUDE) AC_SUBST(AWS_SDK_STATIC_LIBS) AC_SUBST(HIREDIS_STATIC_LIBS) +AC_SUBST(APACHE_ARROW_STATIC_LIBS) +AC_SUBST(LIBRDKAFKA_STATIC_LIBS) +AC_SUBST(CPPKAFKA_STATIC_LIBS) AC_SUBST(ncbi_xreader_pubseqos) AC_SUBST(ncbi_xreader_pubseqos2) AC_SUBST(UNLESS_PUBSEQOS) diff --git a/c++/src/build-system/install.sh.in b/c++/src/build-system/install.sh.in index a256f94d..bd2b07bf 100644 --- a/c++/src/build-system/install.sh.in +++ b/c++/src/build-system/install.sh.in @@ -17,7 +17,7 @@ echo "[`date`]" -svn_location=`echo '$HeadURL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.10.1/c++/src/build-system/install.sh.in $' | sed "s%\\$[H]eadURL: *\\([^$][^$]*\\) \\$.*%\\1%"` +svn_location=`echo '$HeadURL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.11.0/c++/src/build-system/install.sh.in $' | sed "s%\\$[H]eadURL: *\\([^$][^$]*\\) \\$.*%\\1%"` svn_revision=`echo '$Revision: 541872 $' | sed "s%\\$[R]evision: *\\([^$][^$]*\\) \\$.*%\\1%"` script_name=`basename $0` diff --git a/c++/src/build-system/ncbi_package_version b/c++/src/build-system/ncbi_package_version index 8bbb6e40..46b81d81 100644 --- a/c++/src/build-system/ncbi_package_version +++ b/c++/src/build-system/ncbi_package_version @@ -1 +1 @@ -2.10.1 +2.11.0 diff --git a/c++/src/build-system/project_tree_builder.ini b/c++/src/build-system/project_tree_builder.ini index c103c0f7..839d9042 100644 --- a/c++/src/build-system/project_tree_builder.ini +++ b/c++/src/build-system/project_tree_builder.ini @@ -1,4 +1,4 @@ -# $Id: project_tree_builder.ini 607715 2020-05-06 17:37:02Z ivanov $ +# $Id: project_tree_builder.ini 617210 2020-09-28 17:22:08Z ivanov $ ############################################################################### @@ -27,7 +27,7 @@ ThirdParty_C_ncbi = \\\\snowman\\win-coremake\\Lib\\Ncbi\\C\\$(msvc_3rd)\\c.sc-2 #---------------------------------------------------------------------------- # Location of custom code generators -CustomCodeGenerator.proto = \\\\snowman\\win-coremake\\Lib\\ThirdParty\\grpc\\$(msvc_3rd)\\1.21.1-ncbi1\\bin\\ReleaseDLL +CustomCodeGenerator.proto = \\\\snowman\\win-coremake\\Lib\\ThirdParty\\grpc\\$(msvc_3rd)\\1.28.1\\bin\\ReleaseDLL XCode_CustomCodeGenerator.proto = /netopt/ncbi_tools/grpc-1.28.1-ncbi1/Release/bin #---------------------------------------------------------------------------- @@ -217,7 +217,7 @@ ThirdParty_GIF = $(ThirdPartyBasePath)\\gif\\$(msvc_3rd)\\4.1.3 ThirdParty_GLEW = $(ThirdPartyBasePath)\\glew\\$(msvc_3rd)\\1.5.8 ThirdParty_GL2PS = $(ThirdPartyBasePath)\\gl2ps\\$(msvc_3rd)\\1.4.0 ThirdParty_GNUTLS = $(ThirdPartyBasePath)\\gnutls\\$(msvc_3rd)\\3.4.9 -ThirdParty_GRPC = $(ThirdPartyBasePath)\\grpc\\$(msvc_3rd)\\1.21.1-ncbi1 +ThirdParty_GRPC = $(ThirdPartyBasePath)\\grpc\\$(msvc_3rd)\\1.28.1 ThirdParty_INFLUXDB = $(ThirdPartyBasePath)\\influxdb\\$(msvc_3rd)\\20190426 ###ThirdParty_ICU = $(ThirdPartyBasePath)\\icu\\$(msvc_3rd)\\3.2 ThirdParty_JDK = $(ThirdPartyBasePath)\\jdk\\1.6.0_25 @@ -243,7 +243,7 @@ ThirdParty_Xerces = $(ThirdPartyBasePath)\\xerces\\$(msvc_3rd)\\2.8.0 ThirdParty_XML = $(ThirdPartyBasePath)\\xml\\$(msvc_3rd)\\2.7.8 ThirdParty_XSLT = $(ThirdPartyBasePath)\\xslt\\$(msvc_3rd)\\1.1.26 ThirdParty_Z = $(ThirdPartyBasePath)\\z\\$(msvc_3rd)\\1.2.11 -ThirdParty_VDB = $(ThirdPartyVDBBasePath)\\vdb\\vdb-versions\\2.10.5 +ThirdParty_VDB = $(ThirdPartyVDBBasePath)\\vdb\\vdb-versions\\2.10.8 PYTHON_PATH = $(ThirdPartyAppsBasePath)\\Python252\\$(msvc_3rd) @@ -475,7 +475,7 @@ ThirdParty_GL2PS = $(XCode_ThirdPartyBasePath)/gl2ps-1.4.0 ThirdParty_wxWidgets = $(XCode_ThirdPartyBasePath)/wxWidgets-3.1.3-ncbi1 ThirdParty_FreeType = /opt/X11 ThirdParty_FTGL = $(XCode_ThirdPartyBasePath)/ftgl-2.1.3-rc5 -ThirdParty_VDB = $(XCode_ThirdPartyVDBBasePath)/vdb/vdb-versions/2.10.5 +ThirdParty_VDB = $(XCode_ThirdPartyVDBBasePath)/vdb/vdb-versions/2.10.8 ThirdParty_GMP = $(Xcode_ThirdPartyBasePath)/gmp-6.0.0a ThirdParty_Nettle = $(Xcode_ThirdPartyBasePath)/nettle-3.1.1 ThirdParty_GNUTLS = $(Xcode_ThirdPartyBasePath)/gnutls-3.4.0 @@ -2116,20 +2116,20 @@ INCLUDE = $(ThirdParty_GRPC)\\include DEFINES = _WIN32_WINNT=0x0600 [GRPC.debug] LIBPATH = $(ThirdParty_GRPC)\\lib\\DebugDLL -LIB = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobufd.lib boringssl.lib boringcrypto.lib +LIB = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobufd.lib upb.lib crypto.lib ssl.lib absl_throw_delegate.lib absl_strings.lib absl_bad_optional_access.lib absl_str_format_internal.lib absl_raw_logging_internal.lib absl_int128.lib [GRPC.release] LIBPATH = $(ThirdParty_GRPC)\\lib\\ReleaseDLL -LIB = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobuf.lib boringssl.lib boringcrypto.lib +LIB = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobuf.lib upb.lib crypto.lib ssl.lib absl_throw_delegate.lib absl_strings.lib absl_bad_optional_access.lib absl_str_format_internal.lib absl_raw_logging_internal.lib absl_int128.lib [GRPC.xcode] INCLUDE = $(ThirdParty_GRPC)/include [GRPC.xcode.debug] INCLUDE = $(ThirdParty_GRPC)/Debug/include LIBPATH = $(ThirdParty_GRPC)/Debug/lib -LIB = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobufd -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lssl -lcrypto +LIB = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobufd -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lboringssl -lboringcrypto [GRPC.xcode.release] INCLUDE = $(ThirdParty_GRPC)/Release/include LIBPATH = $(ThirdParty_GRPC)/Release/lib -LIB = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobuf -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lssl -lcrypto +LIB = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobuf -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lboringssl -lboringcrypto [HAVE_LIBGRPC] Component = PROTOBUF GRPC diff --git a/c++/src/build-system/relocate.sh.in b/c++/src/build-system/relocate.sh.in index f9d4b2f6..1ee97fb4 100644 --- a/c++/src/build-system/relocate.sh.in +++ b/c++/src/build-system/relocate.sh.in @@ -1,6 +1,6 @@ @script_shell@ -# $Id: relocate.sh.in 608163 2020-05-12 16:03:04Z blastadm $ +# $Id: relocate.sh.in 617724 2020-10-06 07:11:17Z blastadm $ # Author: Denis Vakatov, NCBI # # Adjust paths to this build tree and the relevant source tree diff --git a/c++/src/corelib/ncbi_param.cpp b/c++/src/corelib/ncbi_param.cpp index 3b2f6af8..6fde6d82 100644 --- a/c++/src/corelib/ncbi_param.cpp +++ b/c++/src/corelib/ncbi_param.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbi_param.cpp 598497 2019-12-10 14:23:27Z grichenk $ +/* $Id: ncbi_param.cpp 608309 2020-05-14 12:35:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -446,6 +446,7 @@ string NCBI_XNCBI_EXPORT g_GetConfigString(const char* section, } } const char* dvalue = default_value? default_value: ""; + if ( src ) *src = default_value? CParamBase::eSource_Default: CParamBase::eSource_NotSet; #ifdef NCBI_PARAM_ENABLE_CONFIG_DUMP if ( s_CanDumpConfig() ) { if ( section && *section ) { diff --git a/c++/src/corelib/ncbi_stack.cpp b/c++/src/corelib/ncbi_stack.cpp index 5b9361e4..510ffd50 100644 --- a/c++/src/corelib/ncbi_stack.cpp +++ b/c++/src/corelib/ncbi_stack.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbi_stack.cpp 569055 2018-08-15 17:40:18Z vasilche $ +/* $Id: ncbi_stack.cpp 613683 2020-08-11 17:27:52Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -119,6 +119,11 @@ void CStackTrace::x_ExpandStackTrace(void) const } +static const vector s_StackFilters { + "ncbi::CStackTrace::", "ncbi::CStackTraceImpl::", "ncbi::CException::", + "backward::" +}; + void CStackTrace::Write(CNcbiOstream& os) const { x_ExpandStackTrace(); @@ -129,7 +134,16 @@ void CStackTrace::Write(CNcbiOstream& os) const } ITERATE(TStack, it, m_Stack) { - os << m_Prefix << it->AsString() << endl; + string s = it->AsString(); + bool skip = false; + for (auto filter : s_StackFilters) { + if (s.find(filter) != NPOS) { + skip = true; + break; + } + } + if (skip) continue; + os << m_Prefix << s << endl; } } diff --git a/c++/src/corelib/ncbi_system.cpp b/c++/src/corelib/ncbi_system.cpp index 4bb589c1..b7beb86f 100644 --- a/c++/src/corelib/ncbi_system.cpp +++ b/c++/src/corelib/ncbi_system.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbi_system.cpp 601275 2020-02-04 21:52:35Z vakatov $ +/* $Id: ncbi_system.cpp 613789 2020-08-12 18:02:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -71,6 +71,10 @@ extern "C" { # define HAVE_MADVISE 1 #endif //NCBI_OS_UNIX +#if defined(NCBI_OS_LINUX) +# include +#endif + #ifdef NCBI_OS_DARWIN extern "C" { # include @@ -253,7 +257,7 @@ static bool s_SetExitHandler(TLimitsPrintHandler handler, ///////////////////////////////////////////////////////////////////////////// // -// SetHeapLimit +// Memory limits // #ifdef USE_SETMEMLIMIT @@ -287,10 +291,12 @@ bool SetMemoryLimit(size_t max_size, rl.rlim_cur = rl.rlim_max = RLIM_INFINITY; } if (setrlimit(RLIMIT_DATA, &rl) != 0) { + CNcbiError::SetFromErrno(); return false; } # if !defined(NCBI_OS_SOLARIS) if (setrlimit(RLIMIT_AS, &rl) != 0) { + CNcbiError::SetFromErrno(); return false; } # endif //NCBI_OS_SOLARIS @@ -320,6 +326,7 @@ bool SetMemoryLimitSoft(size_t max_size, rlimit rl; if (getrlimit(RLIMIT_DATA, &rl) != 0) { + CNcbiError::SetFromErrno(); return false; } if ( max_size ) { @@ -328,15 +335,18 @@ bool SetMemoryLimitSoft(size_t max_size, rl.rlim_cur = RLIM_INFINITY; } if (setrlimit(RLIMIT_DATA, &rl) != 0) { + CNcbiError::SetFromErrno(); return false; } # if !defined(NCBI_OS_SOLARIS) rlimit rlas; if (getrlimit(RLIMIT_AS, &rlas) != 0) { + CNcbiError::SetFromErrno(); return false; } rl.rlim_max = rlas.rlim_max; if (setrlimit(RLIMIT_AS, &rl) != 0) { + CNcbiError::SetFromErrno(); return false; } # endif //NCBI_OS_SOLARIS @@ -366,6 +376,7 @@ bool SetMemoryLimitHard(size_t max_size, size_t cur_soft_limit = 0; rlimit rl; if (getrlimit(RLIMIT_DATA, &rl) != 0) { + CNcbiError::SetFromErrno(); return false; } if ( max_size ) { @@ -378,11 +389,13 @@ bool SetMemoryLimitHard(size_t max_size, rl.rlim_max = RLIM_INFINITY; } if (setrlimit(RLIMIT_DATA, &rl) != 0) { + CNcbiError::SetFromErrno(); return false; } # if !defined(NCBI_OS_SOLARIS) rlimit rlas; if (getrlimit(RLIMIT_AS, &rlas) != 0) { + CNcbiError::SetFromErrno(); return false; } if ( max_size ) { @@ -399,6 +412,7 @@ bool SetMemoryLimitHard(size_t max_size, rlas.rlim_max = RLIM_INFINITY; } if (setrlimit(RLIMIT_AS, &rlas) != 0) { + CNcbiError::SetFromErrno(); return false; } # endif //NCBI_OS_SOLARIS @@ -434,6 +448,7 @@ bool SetHeapLimit(size_t max_size, rl.rlim_cur = rl.rlim_max = RLIM_INFINITY; } if (setrlimit(RLIMIT_DATA, &rl) != 0) { + CNcbiError::SetFromErrno(); return false; } s_MemoryLimitSoft = max_size; @@ -446,34 +461,88 @@ bool SetHeapLimit(size_t max_size, } +size_t GetVirtualMemoryLimitSoft(void) +{ + // Query limits from kernel, s_MemoryLimit* values can not reflect real limits. + rlimit rl = {0,0}; +# if !defined(NCBI_OS_SOLARIS) + if (getrlimit(RLIMIT_AS, &rl) != 0) { + CNcbiError::SetFromErrno(); + return 0; + } + if (rl.rlim_cur == RLIM_INFINITY) { + return 0; + } +#else + CNcbiError::Set(CNcbiError::eNotSupported); +#endif + return rl.rlim_cur; +} + + +size_t GetVirtualMemoryLimitHard(void) +{ + // Query limits from kernel, s_MemoryLimit* values can not reflect real limits. + rlimit rl = {0,0}; +# if !defined(NCBI_OS_SOLARIS) + if (getrlimit(RLIMIT_AS, &rl) != 0) { + CNcbiError::SetFromErrno(); + return 0; + } + if (rl.rlim_max == RLIM_INFINITY) { + return 0; + } +#else + CNcbiError::Set(CNcbiError::eNotSupported); +#endif + return rl.rlim_max; +} + + #else bool SetMemoryLimit(size_t max_size, TLimitsPrintHandler handler, TLimitsPrintParameter parameter) { - return false; + CNcbiError::Set(CNcbiError::eNotSupported); + return false; } bool SetMemoryLimitSoft(size_t max_size, TLimitsPrintHandler handler, TLimitsPrintParameter parameter) { - return false; + CNcbiError::Set(CNcbiError::eNotSupported); + return false; } bool SetMemoryLimitHard(size_t max_size, TLimitsPrintHandler handler, TLimitsPrintParameter parameter) { - return false; + CNcbiError::Set(CNcbiError::eNotSupported); + return false; } bool SetHeapLimit(size_t max_size, TLimitsPrintHandler handler, TLimitsPrintParameter parameter) { - return false; + CNcbiError::Set(CNcbiError::eNotSupported); + return false; +} + +size_t GetVirtualMemoryLimitSoft(void) +{ + CNcbiError::Set(CNcbiError::eNotSupported); + return 0; +} + +size_t GetVirtualMemoryLimitHard(void) +{ + CNcbiError::Set(CNcbiError::eNotSupported); + return 0; } #endif //USE_SETMEMLIMIT @@ -621,6 +690,53 @@ unsigned int CSystemInfo::GetCpuCount(void) } +unsigned int CSystemInfo::GetCpuCountAllowed(void) +{ + +#if defined(NCBI_OS_MSWIN) + + DWORD_PTR proc_mask = 0, sys_mask = 0; + if (!::GetProcessAffinityMask(::GetCurrentProcess(), &proc_mask, &sys_mask)) { + return 0; + } + unsigned int n = 0; // number of bits set in proc_mask + for (; proc_mask; proc_mask >>= 1) { + n += proc_mask & 1; + } + return n; + +#elif defined(NCBI_OS_LINUX) + + unsigned int total_cpus = CSystemInfo::GetCpuCount(); + if (total_cpus == 1) { + // GetCpuCount() returns 1 if unable to get real number + return 1; + } + // Standard type cpu_set_t can be limited if used directly, + // so use dynamic allocation approach + cpu_set_t* cpuset_ptr = CPU_ALLOC(total_cpus); + if (cpuset_ptr == NULL) { + return 0; + } + size_t cpuset_size = CPU_ALLOC_SIZE(total_cpus); + CPU_ZERO_S(cpuset_size, cpuset_ptr); + + if (sched_getaffinity(getpid(), cpuset_size, cpuset_ptr) != 0) { + CPU_FREE(cpuset_ptr); + return 0; + } + int n = CPU_COUNT_S(cpuset_size, cpuset_ptr); + CPU_FREE(cpuset_ptr); + return (n < 0) ? 0 : static_cast(n); + +#endif //NCBI_OS_... + + // TODO: add support for other UNIX versions where possible + + return 0; +} + + double CSystemInfo::GetUptime(void) { #if defined(NCBI_OS_MSWIN) diff --git a/c++/src/corelib/ncbiapp.cpp b/c++/src/corelib/ncbiapp.cpp index e6fc3aa8..cde3db09 100644 --- a/c++/src/corelib/ncbiapp.cpp +++ b/c++/src/corelib/ncbiapp.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbiapp.cpp 604618 2020-03-31 13:29:46Z ivanov $ +/* $Id: ncbiapp.cpp 610397 2020-06-16 18:45:55Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -69,11 +69,13 @@ BEGIN_NCBI_SCOPE // Constants // -static const char* s_ArgLogFile = "-logfile"; -static const char* s_ArgCfgFile = "-conffile"; -static const char* s_ArgVersion = "-version"; -static const char* s_ArgFullVersion = "-version-full"; -static const char* s_ArgDryRun = "-dryrun"; +static const char* s_ArgLogFile = "-logfile"; +static const char* s_ArgCfgFile = "-conffile"; +static const char* s_ArgVersion = "-version"; +static const char* s_ArgFullVersion = "-version-full"; +static const char* s_ArgFullVersionXml = "-version-full-xml"; +static const char* s_ArgFullVersionJson = "-version-full-json"; +static const char* s_ArgDryRun = "-dryrun"; ///////////////////////////////////////////////////////////////////////////// @@ -192,12 +194,21 @@ CNcbiApplicationAPI::CNcbiApplicationAPI(const SBuildInfo& build_info) m_DryRun = false; } +void CNcbiApplicationAPI::ExecuteOnExitActions() +{ + m_OnExitActions.ExecuteActions(); +} + CNcbiApplicationAPI::~CNcbiApplicationAPI(void) { CThread::sm_IsExiting = true; + // Execute exit actions before waiting for all threads to stop. - m_OnExitActions.ExecuteActions(); + // NOTE: The exit actions may already be executed by higher-level + // destructors. This is a final fail-safe place for this. + ExecuteOnExitActions(); + #if defined(NCBI_THREADS) CThread::WaitForAllThreads(); #endif @@ -240,6 +251,11 @@ CNcbiApplication::CNcbiApplication(const SBuildInfo& build_info) CNcbiApplication::~CNcbiApplication() { + // This earlier execution of the actions allows a safe use of + // CNcbiApplication::Instance() from the exit action functions. Instance() + // can return NULL pointer if called as part of CNcbiApplicationAPI dtor + // when the CNcbiApplication dtor already finished. + ExecuteOnExitActions(); } @@ -886,8 +902,7 @@ int CNcbiApplicationAPI::AppMain } else if ( NStr::strcmp(argv[i], s_ArgVersion) == 0 ) { delete[] v; // Print VERSION - cout << GetFullVersion().Print( appname, - CVersionAPI::fVersionInfo | CVersionAPI::fPackageShort ); + cout << GetFullVersion().Print( appname, CVersionAPI::fVersionInfo | CVersionAPI::fPackageShort ); diag_context.DiscardMessages(); return 0; @@ -898,6 +913,18 @@ int CNcbiApplicationAPI::AppMain cout << GetFullVersion().Print( appname ); diag_context.DiscardMessages(); return 0; + } else if ( NStr::strcmp(argv[i], s_ArgFullVersionXml) == 0 ) { + delete[] v; + // Print full VERSION in XML format + cout << GetFullVersion().PrintXml( appname ); + diag_context.DiscardMessages(); + return 0; + } else if ( NStr::strcmp(argv[i], s_ArgFullVersionJson) == 0 ) { + delete[] v; + // Print full VERSION in JSON format + cout << GetFullVersion().PrintJson( appname ); + diag_context.DiscardMessages(); + return 0; // Dry run } else if ( NStr::strcmp(argv[i], s_ArgDryRun) == 0 ) { @@ -1283,82 +1310,99 @@ void CNcbiApplicationAPI::x_SetupStdio(void) void CNcbiApplicationAPI::x_AddDefaultArgs(void) { if ( !m_DisableArgDesc ) { - for(CArgDescriptions* desc : m_ArgDesc->GetAllDescriptions()) { - if (desc->IsAutoHelpEnabled()) { - if ((m_HideArgs & fHideHelp) != 0) { - if (desc->Exist("h")) { - desc->Delete("h"); + for(CArgDescriptions* desc : m_ArgDesc->GetAllDescriptions()) + { + if (desc->IsAutoHelpEnabled()) { + if ((m_HideArgs & fHideHelp) != 0) { + if (desc->Exist("h")) { + desc->Delete("h"); + } } } - } - if ((m_HideArgs & fHideFullHelp) != 0) { - if (desc->Exist("help")) { - desc->Delete("help"); - } - } - if ((m_HideArgs & fHideXmlHelp) != 0) { - if (desc->Exist("xmlhelp")) { - desc->Delete("xmlhelp"); - } - } - if ((m_HideArgs & fHideLogfile) != 0) { - if (desc->Exist(s_ArgLogFile + 1)) { - desc->Delete(s_ArgLogFile + 1); - } - } else { - if (!desc->Exist(s_ArgLogFile + 1)) { - desc->AddOptionalKey - (s_ArgLogFile+1, "File_Name", - "File to which the program log should be redirected", - CArgDescriptions::eOutputFile); - } - } - if ((m_HideArgs & fHideConffile) != 0) { - if (desc->Exist(s_ArgCfgFile + 1)) { - desc->Delete(s_ArgCfgFile + 1); - } - } else { - if (!desc->Exist(s_ArgCfgFile + 1)) { - desc->AddOptionalKey - (s_ArgCfgFile + 1, "File_Name", - "Program's configuration (registry) data file", - CArgDescriptions::eInputFile); + if ((m_HideArgs & fHideFullHelp) != 0) { + if (desc->Exist("help")) { + desc->Delete("help"); + } } - } - if ((m_HideArgs & fHideVersion) != 0) { - if (desc->Exist(s_ArgVersion + 1)) { - desc->Delete(s_ArgVersion + 1); + if ((m_HideArgs & fHideXmlHelp) != 0) { + if (desc->Exist("xmlhelp")) { + desc->Delete("xmlhelp"); + } } - } else { - if (!desc->Exist(s_ArgVersion + 1)) { - desc->AddFlag - (s_ArgVersion + 1, - "Print version number; ignore other arguments"); + if ((m_HideArgs & fHideLogfile) != 0) { + if (desc->Exist(s_ArgLogFile + 1)) { + desc->Delete(s_ArgLogFile + 1); + } + } else { + if (!desc->Exist(s_ArgLogFile + 1)) { + desc->AddOptionalKey + (s_ArgLogFile+1, "File_Name", + "File to which the program log should be redirected", + CArgDescriptions::eOutputFile); + } } - } - if ((m_HideArgs & fHideFullVersion) != 0) { - if (desc->Exist(s_ArgFullVersion + 1)) { - desc->Delete(s_ArgFullVersion + 1); + if ((m_HideArgs & fHideConffile) != 0) { + if (desc->Exist(s_ArgCfgFile + 1)) { + desc->Delete(s_ArgCfgFile + 1); + } + } else { + if (!desc->Exist(s_ArgCfgFile + 1)) { + desc->AddOptionalKey + (s_ArgCfgFile + 1, "File_Name", + "Program's configuration (registry) data file", + CArgDescriptions::eInputFile); + } } - } else { - if (!desc->Exist(s_ArgFullVersion + 1)) { - desc->AddFlag - (s_ArgFullVersion + 1, - "Print extended version data; ignore other arguments"); + if ((m_HideArgs & fHideVersion) != 0) { + if (desc->Exist(s_ArgVersion + 1)) { + desc->Delete(s_ArgVersion + 1); + } + } else { + if (!desc->Exist(s_ArgVersion + 1)) { + desc->AddFlag + (s_ArgVersion + 1, + "Print version number; ignore other arguments"); + } } - } - if ((m_HideArgs & fHideDryRun) != 0) { - if (desc->Exist(s_ArgDryRun + 1)) { - desc->Delete(s_ArgDryRun + 1); + if ((m_HideArgs & fHideFullVersion) != 0) { + if (desc->Exist(s_ArgFullVersion + 1)) { + desc->Delete(s_ArgFullVersion + 1); + } + if (desc->Exist(s_ArgFullVersionXml+ 1)) { + desc->Delete(s_ArgFullVersionXml + 1); + } + if (desc->Exist(s_ArgFullVersionJson + 1)) { + desc->Delete(s_ArgFullVersionJson + 1); + } + } else { + if (!desc->Exist(s_ArgFullVersion + 1)) { + desc->AddFlag + (s_ArgFullVersion + 1, + "Print extended version data; ignore other arguments"); + } + if (!desc->Exist(s_ArgFullVersionXml + 1)) { + desc->AddFlag + (s_ArgFullVersionXml + 1, + "Print extended version data in XML format; ignore other arguments"); + } + if (!desc->Exist(s_ArgFullVersionJson + 1)) { + desc->AddFlag + (s_ArgFullVersionJson + 1, + "Print extended version data in JSON format; ignore other arguments"); + } } - } else { - if (!desc->Exist(s_ArgDryRun + 1)) { - desc->AddFlag - (s_ArgDryRun + 1, - "Dry run the application: do nothing, only test all preconditions"); + if ((m_HideArgs & fHideDryRun) != 0) { + if (desc->Exist(s_ArgDryRun + 1)) { + desc->Delete(s_ArgDryRun + 1); + } + } else { + if (!desc->Exist(s_ArgDryRun + 1)) { + desc->AddFlag + (s_ArgDryRun + 1, + "Dry run the application: do nothing, only test all preconditions"); + } } } - } } } diff --git a/c++/src/corelib/ncbiargs.cpp b/c++/src/corelib/ncbiargs.cpp index e408147b..09b019f8 100644 --- a/c++/src/corelib/ncbiargs.cpp +++ b/c++/src/corelib/ncbiargs.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbiargs.cpp 604618 2020-03-31 13:29:46Z ivanov $ +/* $Id: ncbiargs.cpp 609368 2020-06-01 14:12:44Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2159,10 +2159,10 @@ CArgValue* CArgErrorHandler::HandleError(const CArgDesc& arg_desc, { if ((arg_desc.GetFlags() & CArgDescriptions::fIgnoreInvalidValue) == 0) { // Re-process invalid value to throw the same exception - arg_desc.ProcessArgument(value); + return arg_desc.ProcessArgument(value); // Should never get past ProcessArgument() } - if ((arg_desc.GetFlags() & CArgDescriptions::fWarnOnInvalidValue) == 0) { + if ((arg_desc.GetFlags() & CArgDescriptions::fWarnOnInvalidValue) != 0) { ERR_POST_X(22, Warning << "Invalid value " << value << " for argument " << arg_desc.GetName() << " - argument will be ignored."); diff --git a/c++/src/corelib/ncbidiag.cpp b/c++/src/corelib/ncbidiag.cpp index c9cca4e4..de6fd6fc 100644 --- a/c++/src/corelib/ncbidiag.cpp +++ b/c++/src/corelib/ncbidiag.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbidiag.cpp 606469 2020-04-22 14:13:58Z ivanov $ +/* $Id: ncbidiag.cpp 615738 2020-09-03 11:26:10Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -255,7 +256,8 @@ extern "C" { NCBI_PARAM_DECL(bool, Diag, Old_Post_Format); NCBI_PARAM_DEF_EX(bool, Diag, Old_Post_Format, true, eParam_NoThread, DIAG_OLD_POST_FORMAT); -static CSafeStatic s_OldPostFormat; +static CSafeStatic s_OldPostFormat( + CSafeStaticLifeSpan(CSafeStaticLifeSpan::eLifeSpan_Long, 2)); // Auto-print context properties on set/change. NCBI_PARAM_DECL(bool, Diag, AutoWrite_Context); @@ -2138,7 +2140,7 @@ CDiagContext_Extra& CDiagContext_Extra::PrintNcbiAppInfoOnStart(void) CNcbiApplication* ins = CNcbiApplication::Instance(); if (ins) { Print("ncbi_app_path", ins->GetProgramExecutablePath()); - const CVersion& ver = ins->GetFullVersion(); + const CVersionAPI& ver = ins->GetFullVersion(); if (!ver.GetBuildInfo().date.empty()) { Print("ncbi_app_build_date", ver.GetBuildInfo().date); } @@ -2186,7 +2188,7 @@ CDiagContext_Extra& CDiagContext_Extra::PrintNcbiAppInfoOnRequest(void) { CNcbiApplication* ins = CNcbiApplication::Instance(); if (ins) { - const CVersion& ver = ins->GetFullVersion(); + const CVersionAPI& ver = ins->GetFullVersion(); const CVersionInfo& vi = ver.GetVersionInfo(); //#if defined (NCBI_SC_VERSION) && NCBI_SC_VERSION <= 21 #if 1 @@ -7514,6 +7516,30 @@ extern void SetDiagFilter(EDiagFilter what, const char* filter_str) } +extern string GetDiagFilter(EDiagFilter what) +{ + CDiagLock lock(CDiagLock::eWrite); + if (what == eDiagFilter_Trace) + return s_TraceFilter->GetFilterStr(); + + if (what == eDiagFilter_Post) + return s_PostFilter->GetFilterStr(); + + return kEmptyStr; +} + + +extern void AppendDiagFilter(EDiagFilter what, const char* filter_str) +{ + CDiagLock lock(CDiagLock::eWrite); + if (what == eDiagFilter_Trace || what == eDiagFilter_All) + s_TraceFilter->Append(filter_str); + + if (what == eDiagFilter_Post || what == eDiagFilter_All) + s_PostFilter->Append(filter_str); +} + + /////////////////////////////////////////////////////// // CNcbiDiag:: diff --git a/c++/src/corelib/ncbidiag_p.cpp b/c++/src/corelib/ncbidiag_p.cpp index 33d1a580..1aa5c5d6 100644 --- a/c++/src/corelib/ncbidiag_p.cpp +++ b/c++/src/corelib/ncbidiag_p.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbidiag_p.cpp 486111 2015-12-01 17:17:39Z grichenk $ +/* $Id: ncbidiag_p.cpp 611708 2020-07-09 17:56:10Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -311,10 +311,12 @@ void CDiagFilter::Clean(void) void CDiagFilter::Fill(const char* filter_string) { try { + m_Filter.clear(); CDiagSyntaxParser parser; CNcbiIstrstream in(filter_string); parser.Parse(in, *this); + m_Filter = filter_string; } catch (const CDiagSyntaxParser::TErrorInfo& err_info) { CNcbiOstrstream message; @@ -326,6 +328,13 @@ void CDiagFilter::Fill(const char* filter_string) } } +void CDiagFilter::Append(const char* filter_string) +{ + string new_filter = m_Filter + " " + filter_string; + Fill(new_filter.c_str()); +} + + EDiagFilterAction CDiagFilter::Check(const CNcbiDiag& msg, const CException* ex) const { @@ -642,7 +651,9 @@ CDiagLexParser::ESymbol CDiagLexParser::Parse(istream& in) if ( !isspace((unsigned char) symbol) ) { if ( symbol == '[' || symbol == '(' || - (symbol == '!' && CT_TO_CHAR_TYPE(in.peek()) == '(')) { + symbol == '/' || + (symbol == '!' && CT_TO_CHAR_TYPE(in.peek()) == '(') || + (symbol == '!' && CT_TO_CHAR_TYPE(in.peek()) == '/')) { in.putback( symbol ); --m_Pos; state = eStart; diff --git a/c++/src/corelib/ncbidiag_p.hpp b/c++/src/corelib/ncbidiag_p.hpp index 047e8f59..170069ac 100644 --- a/c++/src/corelib/ncbidiag_p.hpp +++ b/c++/src/corelib/ncbidiag_p.hpp @@ -1,7 +1,7 @@ #ifndef CORELIB___NCBIDIAG_P__HPP #define CORELIB___NCBIDIAG_P__HPP -/* $Id: ncbidiag_p.hpp 505891 2016-06-29 17:58:41Z gouriano $ +/* $Id: ncbidiag_p.hpp 611708 2020-07-09 17:56:10Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -260,6 +260,10 @@ public: /// Print state void Print(ostream& out) const; + const string& GetFilterStr(void) const { return m_Filter; } + + void Append(const char* filter_string); + private: /// Check if the filter accepts errcode EDiagFilterAction x_CheckErrCode(int code, int subcode, EDiagSev sev) const; @@ -296,6 +300,7 @@ private: private: typedef deque< AutoPtr > TMatchers; + string m_Filter; TMatchers m_Matchers; size_t m_NotMatchersNum; }; diff --git a/c++/src/corelib/ncbifile.cpp b/c++/src/corelib/ncbifile.cpp index f72409a9..d6b30d21 100644 --- a/c++/src/corelib/ncbifile.cpp +++ b/c++/src/corelib/ncbifile.cpp @@ -1,4 +1,4 @@ -/* $Id: ncbifile.cpp 604618 2020-03-31 13:29:46Z ivanov $ +/* $Id: ncbifile.cpp 610319 2020-06-15 17:06:08Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -4744,7 +4744,7 @@ static const SFileSystem s_FileSystem[] = { memset(&st, 0, sizeof(st)); \ if (statvfs(path.c_str(), &st) != 0) { \ CNcbiError::SetFromErrno(); \ - NCBI_THROW(CFileErrnoException, eFileSystemInfo, msg); \ + NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path); \ } \ info->total_space = (Uint8)st.f_bsize * st.f_blocks; \ if (st.f_frsize) { \ @@ -4762,7 +4762,7 @@ static const SFileSystem s_FileSystem[] = { memset(&st, 0, sizeof(st)); \ if (statfs(path.c_str(), &st) != 0) { \ CNcbiError::SetFromErrno(); \ - NCBI_THROW(CFileErrnoException, eFileSystemInfo, msg); \ + NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path); \ } \ info->total_space = (Uint8)st.f_bsize * st.f_blocks; \ info->free_space = (Uint8)st.f_bsize * st.f_bavail; \ @@ -5076,7 +5076,7 @@ void s_GetFileSystemInfo(const string& path, &fs_flags, fs_name, sizeof(fs_name)/sizeof(fs_name[0])) ) { - NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + xpath); + NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path); } info->filename_max = filename_max; ufs_name = _T_CSTRING(fs_name); @@ -5088,7 +5088,7 @@ void s_GetFileSystemInfo(const string& path, if ( !::GetDiskFreeSpaceEx(_T_XCSTRING(xpath), (PULARGE_INTEGER)&info->free_space, (PULARGE_INTEGER)&info->total_space, 0) ) { - NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + xpath); + NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path); } } @@ -5099,7 +5099,7 @@ void s_GetFileSystemInfo(const string& path, if ( !::GetDiskFreeSpace(_T_XCSTRING(xpath), &dwSectPerClust, &dwBytesPerSect, NULL, NULL) ) { - NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + xpath); + NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path); } info->block_size = dwBytesPerSect * dwSectPerClust; } @@ -5203,7 +5203,7 @@ void s_GetFileSystemInfo(const string& path, # elif defined(NCBI_OS_DARWIN) && defined(HAVE_STATFS) GET_STATFS_INFO; - // Seems statfs structure on Darwin dont have any information + // Seems statfs structure on Darwin doesn't have any information // about name length, so rely on pathconf() only. //if (need_name_max) { // info->filename_max = (unsigned long)st.f_namelen; diff --git a/c++/src/corelib/version.cpp b/c++/src/corelib/version.cpp index 3c924e35..fe77f089 100644 --- a/c++/src/corelib/version.cpp +++ b/c++/src/corelib/version.cpp @@ -1,4 +1,4 @@ -/* $Id: version.cpp 591546 2019-08-16 16:59:06Z vasilche $ +/* $Id: version.cpp 612086 2020-07-15 11:49:39Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -31,7 +31,7 @@ */ #include -#include +#include #include #include @@ -159,10 +159,10 @@ string CVersionInfo::PrintJson(void) const os << "{"; bool need_separator = false; if (m_Major >= 0) { - os << "\"major\": \"" << m_Major << - "\", \"minor\": \"" << (m_Minor >= 0 ? m_Minor : 0) << "\""; + os << "\"major\": " << m_Major << + ", \"minor\": " << (m_Minor >= 0 ? m_Minor : 0); if (m_PatchLevel >= 0) { - os << ", \"patch_level\": \"" << m_PatchLevel << "\""; + os << ", \"patch_level\": " << m_PatchLevel; } need_separator = true; } @@ -435,8 +435,9 @@ string CComponentVersionInfoAPI::PrintJson(void) const os << "{ \"name\": \"" << NStr::JsonEncode(GetComponentName()) << "\", \"version_info\": " << - CVersionInfo::PrintJson() << endl << - m_BuildInfo.PrintJson() << "}" << endl; + CVersionInfo::PrintJson() << ",\n" << + " \"build_info\": " << + m_BuildInfo.PrintJson() << "}"; return CNcbiOstrstreamToString(os); } @@ -573,22 +574,21 @@ string SBuildInfo::PrintJson(void) const { CNcbiOstrstream os; bool need_separator = false; - os << '{' << endl; + os << '{'; if ( !date.empty() ) { os << "\"" << ExtraNameJson(eBuildDate) << "\": \"" << NStr::JsonEncode(date) << '\"'; need_separator = true; } if ( !tag.empty() ) { - if ( need_separator ) os << ',' << endl; + if ( need_separator ) os << ", "; os << '\"' << ExtraNameJson(eBuildTag) << "\": \"" << NStr::JsonEncode(tag) << '\"'; need_separator = true; } for( const auto& e : m_extra) { - if ( need_separator ) os << "," << endl; + if ( need_separator ) os << ", "; os << '\"' << ExtraNameJson(e.first) << "\": \"" << NStr::JsonEncode(e.second) << '\"'; need_separator = true; } - if ( need_separator ) os << endl; os << '}'; return CNcbiOstrstreamToString(os); } @@ -810,7 +810,7 @@ string CVersionAPI::PrintJson(const string& appname, TPrintFlags flags) const if (flags & fComponents) { if ( need_separator ) os << ",\n"; - os << " \"components\": ["; + os << " \"component\": ["; need_separator = false; for (const auto& c : m_Components) { if ( need_separator ) os << ","; diff --git a/c++/src/dbapi/driver/dbapi_conn_factory.cpp b/c++/src/dbapi/driver/dbapi_conn_factory.cpp index 321ac0d5..eff961ec 100644 --- a/c++/src/dbapi/driver/dbapi_conn_factory.cpp +++ b/c++/src/dbapi/driver/dbapi_conn_factory.cpp @@ -1,4 +1,4 @@ -/* $Id: dbapi_conn_factory.cpp 600085 2020-01-11 15:56:54Z mcelhany $ +/* $Id: dbapi_conn_factory.cpp 610945 2020-06-25 18:31:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -35,9 +35,9 @@ #include #include #include +#include #include #include -#include "dbapi_pool_balancer.hpp" #include #include @@ -472,8 +472,8 @@ CDBConnectionFactory::DispatchServerName( && !service_name.empty() ) { balancer.Reset(new CDBPoolBalancer (service_name, params.GetParam("pool_name"), - ctx.driver_ctx, - rt_data.GetServerOptions(service_name))); + rt_data.GetServerOptions(service_name), + &ctx.driver_ctx)); } for ( ; !t_con && alternatives > 0; --alternatives ) { TSvrRef dsp_srv; @@ -488,7 +488,7 @@ CDBConnectionFactory::DispatchServerName( // In this case we even won't try to map it. else if (!service_name.empty()) { if (balancer.NotEmpty()) { - dsp_srv = balancer->GetServer(&t_con, params); + dsp_srv = balancer->GetServer(&t_con, ¶ms); } if (dsp_srv.Empty()) { dsp_srv = rt_data.GetDispatchedServer(service_name); @@ -537,8 +537,8 @@ CDBConnectionFactory::DispatchServerName( balancer.Reset (new CDBPoolBalancer (service_name, params.GetParam("pool_name"), - ctx.driver_ctx, - rt_data.GetServerOptions(service_name, true))); + rt_data.GetServerOptions(service_name, true), + &ctx.driver_ctx)); } full_retry_made = true; continue; diff --git a/c++/src/dbapi/driver/dbapi_impl_context.cpp b/c++/src/dbapi/driver/dbapi_impl_context.cpp index 06f35c31..67c0fb40 100644 --- a/c++/src/dbapi/driver/dbapi_impl_context.cpp +++ b/c++/src/dbapi/driver/dbapi_impl_context.cpp @@ -1,4 +1,4 @@ -/* $Id: dbapi_impl_context.cpp 600087 2020-01-11 19:46:51Z mcelhany $ +/* $Id: dbapi_impl_context.cpp 610920 2020-06-25 13:37:30Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -966,6 +966,7 @@ CDriverContext::SatisfyPoolMinimum(const CDBConnParams& params) ++total_cnt; } } + mg.Release(); vector< AutoPtr > conns(pool_min); for (int i = total_cnt; i < pool_min; ++i) { try { diff --git a/c++/src/dbapi/driver/dbapi_pool_balancer.cpp b/c++/src/dbapi/driver/dbapi_pool_balancer.cpp index ed656b3f..2a45ed76 100644 --- a/c++/src/dbapi/driver/dbapi_pool_balancer.cpp +++ b/c++/src/dbapi/driver/dbapi_pool_balancer.cpp @@ -1,4 +1,4 @@ -/* $Id: dbapi_pool_balancer.cpp 548289 2017-10-12 14:54:18Z ucko $ +/* $Id: dbapi_pool_balancer.cpp 610945 2020-06-25 18:31:37Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -33,7 +33,7 @@ #include -#include "dbapi_pool_balancer.hpp" +#include #include #include #include @@ -88,11 +88,12 @@ public: CDBPoolBalancer::CDBPoolBalancer(const string& service_name, const string& pool_name, - I_DriverContext& driver_ctx, - const IDBServiceMapper::TOptions& options) + const IDBServiceMapper::TOptions& options, + I_DriverContext* driver_ctx) : m_DriverCtx(driver_ctx), m_TotalCount(0U) { - bool is_ftds = NStr::StartsWith(driver_ctx.GetDriverName(), "ftds"); + bool is_ftds = (driver_ctx == nullptr + || NStr::StartsWith(driver_ctx->GetDriverName(), "ftds")); for (auto it : options) { CTempString name = it->GetName(); auto key = impl::MakeEndpointKey(it->GetHost(), it->GetPort()); @@ -127,10 +128,13 @@ CDBPoolBalancer::CDBPoolBalancer(const string& service_name, } const impl::CDriverContext* ctx_impl - = dynamic_cast(&driver_ctx); + = dynamic_cast(driver_ctx); impl::CDriverContext::TCounts counts; if (ctx_impl == NULL) { - ERR_POST_X(1, Warning << "Called with non-standard IDriverContext"); + if (driver_ctx != nullptr) { + ERR_POST_X(1, Warning << + "Called with non-standard IDriverContext"); + } } else if (pool_name.empty()) { ctx_impl->GetCountsForService(service_name, &counts); } else { @@ -177,7 +181,7 @@ CDBPoolBalancer::CDBPoolBalancer(const string& service_name, } TSvrRef CDBPoolBalancer::GetServer(CDB_Connection** conn, - const CDBConnParams& params) + const CDBConnParams* params) { TSvrRef result; impl::TEndpointKey conn_key = 0; @@ -196,10 +200,11 @@ TSvrRef CDBPoolBalancer::GetServer(CDB_Connection** conn, return result; } - if (/* m_TotalCount > 1 && */ conn != NULL) { - string pool_name = params.GetParam("pool_name"); - CDBConnParams_DNC dnc_params(params); - *conn = IDBConnectionFactory::CtxMakeConnection(m_DriverCtx, + if (/* m_TotalCount > 1 && */ conn != nullptr && params != nullptr + && m_DriverCtx != nullptr) { + string pool_name = params->GetParam("pool_name"); + CDBConnParams_DNC dnc_params(*params); + *conn = IDBConnectionFactory::CtxMakeConnection(*m_DriverCtx, dnc_params); if (*conn != NULL) { const string& server_name = (*conn)->ServerName(); @@ -214,7 +219,7 @@ TSvrRef CDBPoolBalancer::GetServer(CDB_Connection** conn, "Unrecognized endpoint for existing connection to " << impl::ConvertN2A(host) << ":" << port << " (" << server_name << ')'); - excess = m_DriverCtx.NofConnections(server_name, pool_name); + excess = m_DriverCtx->NofConnections(server_name, pool_name); result.Reset(&*it->second.ref); } else { double scale_factor = m_TotalCount / total_ranking; @@ -227,7 +232,7 @@ TSvrRef CDBPoolBalancer::GetServer(CDB_Connection** conn, << ":" << port << " (" << server_name << ") for turnover; projected excess count " << excess); if (excess > 0.0) { - string pool_max_str = params.GetParam("pool_maxsize"); + string pool_max_str = params->GetParam("pool_maxsize"); unsigned int pool_max = 0u; if ( !pool_max_str.empty() && pool_max_str != "default") { NStr::StringToNumeric(pool_max_str, &pool_max, @@ -306,8 +311,8 @@ TSvrRef CDBPoolBalancer::GetServer(CDB_Connection** conn, // This call might not close the exact connection we // considered, but closing any connection to the // relevant server is sufficient here. - m_DriverCtx.CloseUnusedConnections - (server_name, params.GetParam("pool_name"), 1u); + m_DriverCtx->CloseUnusedConnections + (server_name, params->GetParam("pool_name"), 1u); } } } diff --git a/c++/src/dbapi/driver/dbapi_pool_balancer.hpp b/c++/src/dbapi/driver/dbapi_pool_balancer.hpp deleted file mode 100644 index b26d6b28..00000000 --- a/c++/src/dbapi/driver/dbapi_pool_balancer.hpp +++ /dev/null @@ -1,82 +0,0 @@ -#ifndef DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP -#define DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP - -/* $Id: dbapi_pool_balancer.hpp 548289 2017-10-12 14:54:18Z ucko $ - * =========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - * Author: Aaron Ucko - * - */ - -/// @file dbapi_pool_balancer.hpp -/// Help distribute connections within a pool across servers. - -#include - -/** @addtogroup DBAPI - * - * @{ - */ - -BEGIN_NCBI_SCOPE - -class CDBPoolBalancer : public CObject -{ -public: - CDBPoolBalancer(const string& service_name, - const string& pool_name, - I_DriverContext& driver_ctx, - const IDBServiceMapper::TOptions& options); - - TSvrRef GetServer(CDB_Connection** conn, const CDBConnParams& params); - -private: - struct SEndpointInfo { - SEndpointInfo() - : effective_ranking(0.0), ideal_count(0.0), actual_count(0U), - penalty_level(0U) - { } - - CRef ref; - double effective_ranking; - double ideal_count; - unsigned int actual_count; - unsigned int penalty_level; - }; - typedef map TEndpoints; - - impl::TEndpointKey x_NameToKey(CTempString& name) const; - - TEndpoints m_Endpoints; - multiset m_Rankings; - I_DriverContext& m_DriverCtx; - unsigned int m_TotalCount; -}; - -END_NCBI_SCOPE - -/* @} */ - -#endif /* DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP */ diff --git a/c++/src/objects/dbsnp/primary_track/snpptis.cpp b/c++/src/objects/dbsnp/primary_track/snpptis.cpp index b0f1007e..85c30bd6 100644 --- a/c++/src/objects/dbsnp/primary_track/snpptis.cpp +++ b/c++/src/objects/dbsnp/primary_track/snpptis.cpp @@ -1,4 +1,5 @@ -/* =========================================================================== +/* $Id: snpptis.cpp 615550 2020-09-01 13:13:11Z fukanchi $ + * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information @@ -34,6 +35,7 @@ #ifdef HAVE_LIBGRPC # include # include +# include #endif BEGIN_NCBI_NAMESPACE; @@ -50,10 +52,46 @@ CSnpPtisClient::~CSnpPtisClient() } +#ifdef HAVE_LIBGRPC +const char* const kSection = "ID2SNP"; +const char* const kParam_PTISName = "PTIS_NAME"; +const char* const kParam_Retry = "RETRY"; +const char* const kParam_Timeout = "TIMEOUT"; +const char* const kParam_TimeoutMul = "TIMEOUT_MULTIPLIER"; +const char* const kParam_TimeoutInc = "TIMEOUT_INCREMENT"; +const char* const kParam_TimeoutMax = "TIMEOUT_MAX"; +const char* const kParam_WaitTime = "WAIT_TIME"; +const char* const kParam_WaitTimeMul = "WAIT_TIME_MULTIPLIER"; +const char* const kParam_WaitTimeInc = "WAIT_TIME_INCREMENT"; +const char* const kParam_WaitTimeMax = "WAIT_TIME_MAX"; +const int kDefault_Retry = 5; +const float kDefault_Timeout = 1; +const float kDefault_TimeoutMul = 1.5; +const float kDefault_TimeoutInc = 0; +const float kDefault_TimeoutMax = 10; +const float kDefault_WaitTime = 0.5; +const float kDefault_WaitTimeMul = 1.2; +const float kDefault_WaitTimeInc = 0.2; +const float kDefault_WaitTimeMax = 5; +#endif + + bool CSnpPtisClient::IsEnabled() { #ifdef HAVE_LIBGRPC - return CGRPCClientContext::IsImplemented(); + if ( !CGRPCClientContext::IsImplemented() ) { + return false; + } + // check if there's valid address + int source; + auto addr = g_NCBI_GRPC_GetAddress(kSection, kParam_PTISName, nullptr, &source); +#ifndef NCBI_OS_LINUX + if ( source == CParamBase::eSource_Default ) { + // default grpc link to linkerd daemon works on Linux only + return false; + } +#endif + return !addr.empty(); #else return false; #endif @@ -96,9 +134,20 @@ string CSnpPtisClient::GetPrimarySnpTrackForId(const CSeq_id& id) #ifdef HAVE_LIBGRPC CSnpPtisClient_Impl::CSnpPtisClient_Impl() { - channel = grpc::CreateChannel(g_NCBI_GRPC_GetAddress("ID2SNP", "PTIS_NAME"), - grpc::InsecureChannelCredentials()); - + grpc::ChannelArguments args; + string address = g_NCBI_GRPC_GetAddress(kSection, kParam_PTISName); + //LOG_POST(Trace<<"CSnpPtisClient: connecting to "<ForSeqId(&context, request, &reply); + int cur_retry = 0; + float cur_timeout = timeout; + float cur_wait_time = wait_time; + for ( ;; ) { + CGRPCClientContext context; + std::chrono::system_clock::time_point deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(Int8(cur_timeout*1e6)); + context.set_deadline(deadline); - if ( !status.ok() ) { + ncbi::grpcapi::dbsnp::primary_track::PrimaryTrackReply reply; + + auto status = stub->ForSeqId(&context, request, &reply); + + if ( status.ok() ) { + return reply.na_track_acc_with_filter(); + } + if ( status.error_code() == grpc::StatusCode::NOT_FOUND ) { return string(); } - NCBI_THROW(CException, eUnknown, status.error_message()); + if ( ++cur_retry >= max_retries ) { + NCBI_THROW(CException, eUnknown, status.error_message()); + } + LOG_POST(Trace<< + "CSnpPtisClient: failed : "< TUrlPrefixMap; diff --git a/c++/src/objects/genomecoll/genomic_collections_cli.cpp b/c++/src/objects/genomecoll/genomic_collections_cli.cpp index 2a47c5ee..8a015efa 100644 --- a/c++/src/objects/genomecoll/genomic_collections_cli.cpp +++ b/c++/src/objects/genomecoll/genomic_collections_cli.cpp @@ -1,4 +1,4 @@ -/* $Id: genomic_collections_cli.cpp 603970 2020-03-19 15:32:22Z ivanov $ +/* $Id: genomic_collections_cli.cpp 617470 2020-10-01 17:56:09Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -94,7 +94,7 @@ CGenomicCollectionsService::CGenomicCollectionsService(const CArgs& args) void CGenomicCollectionsService::x_ConfigureConnection() { SetTimeout(&kTimeout); - SetRetryLimit(20); + SetRetryLimit(40); // it's a backward-compatibility fix for old versions of server (no much harm to leave it - only little data overhead is expected) // always send request and get response in ASN text format so that server can properly parse request @@ -222,21 +222,34 @@ string CGenomicCollectionsService::ValidateChrType(const string& chrType, const { CGCClient_ValidateChrTypeLocRequest req; CGCClientResponse reply; - req.SetType(chrType); req.SetLocation(chrLoc); LogRequest(req); - - try { - return AskGet_chrtype_valid(req, &reply); - } catch (CException& ) { - if (reply.IsSrvr_error()) - throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error()); - throw; - } + + int retry_counter=0; + const int RETRY_MAX = 3; + for(retry_counter=1; retry_counter <= RETRY_MAX; retry_counter++) { + try { + return AskGet_chrtype_valid(req, &reply); + } catch (const CException& e) { + if( retry_counter == RETRY_MAX) { + if (reply.IsSrvr_error()) + throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error()); + throw e; + } else { + ERR_POST(Warning <<"Try "< CGenomicCollectionsService::FindOneAssemblyBySequences(const string& sequence_acc, int filter, CGCClient_GetAssemblyBySequenceRequest::ESort sort) { CRef asmseq_info(FindOneAssemblyBySequences(list(1, sequence_acc), filter, sort)); @@ -283,13 +296,26 @@ CRef CGenomicCollectionsService::x_FindAssembl LogRequest(req); - try { - return AskGet_assembly_by_sequence(req, &reply); - } catch (const CException& ) { - if (reply.IsSrvr_error()) - throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error()); - throw; - } + int retry_counter=0; + const int RETRY_MAX = 3; + for(retry_counter=1; retry_counter <= RETRY_MAX; retry_counter++) { + try { + return AskGet_assembly_by_sequence(req, &reply); + } catch (const CException& e) { + if( retry_counter == RETRY_MAX) { + if (reply.IsSrvr_error()) + throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error()); + throw e; + } else { + ERR_POST(Warning <<"Try "< CGenomicCollectionsService::GetEquivalentAs req.SetEquivalency(equivalency); LogRequest(req); - - try { - return AskGet_equivalent_assemblies(req, &reply); - } catch (const CException& ) { - if (reply.IsSrvr_error()) - throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error()); - throw; - } + + int retry_counter=0; + const int RETRY_MAX = 3; + for(retry_counter=1; retry_counter <= RETRY_MAX; retry_counter++) { + try { + return AskGet_equivalent_assemblies(req, &reply); + } catch (const CException& e) { + if( retry_counter == RETRY_MAX) { + if (reply.IsSrvr_error()) + throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error()); + throw e; + } else { + ERR_POST(Warning <<"Try "<CanGetQual() || !qual->CanGetVal()) { + continue; + } + if (qual->GetQual() != qualName) { + continue; + } + if (namedQual.empty()) { + namedQual = qual->GetVal(); + continue; + } + if (namedQual != qual->GetVal()) { + return ""; + } + } + return namedQual; +} + // ---------------------------------------------------------------------------- CSoMap::TYPEMAP CSoMap::mMapSoTypeToId; // ---------------------------------------------------------------------------- @@ -440,7 +466,7 @@ bool CSoMap::xFeatureMakeNcRna( CSeq_feat& feature) // ---------------------------------------------------------------------------- { - static const map mTypeToClass = { + static const TYPEMAP mTypeToClass = { {"ncRNA", "other"}, }; feature.SetData().SetRna().SetType(CRNA_ref::eType_ncRNA); @@ -491,7 +517,7 @@ bool CSoMap::xFeatureMakeMiscFeature( CSeq_feat& feature) // ---------------------------------------------------------------------------- { - static const map mapTypeToQual = { + static const TYPEMAP mapTypeToQual = { {"TSS", "transcription_start_site"}, }; feature.SetData().SetImp().SetKey("misc_feature"); @@ -517,7 +543,7 @@ bool CSoMap::xFeatureMakeMiscRecomb( CSeq_feat& feature) // ---------------------------------------------------------------------------- { - static const map mapTypeToQual = { + static const TYPEMAP mapTypeToQual = { {"meiotic_recombination_region", "meiotic"}, {"mitotic_recombination_region", "mitotic"}, {"non_allelic_homologous_recombination", "non_allelic_homologous"}, @@ -556,7 +582,7 @@ bool CSoMap::xFeatureMakeImp( CSeq_feat& feature) // ---------------------------------------------------------------------------- { - static const map mapTypeToKey = { + static const TYPEMAP mapTypeToKey = { {"C_gene_segment", "C_region"}, {"D_gene_segment", "D_segment"}, {"D_loop", "D-loop"}, @@ -612,7 +638,7 @@ bool CSoMap::xFeatureMakeRegulatory( CSeq_feat& feature) // ---------------------------------------------------------------------------- { - static const map mapTypeToQual = { + static const TYPEMAP mapTypeToQual = { {"DNAsel_hypersensitive_site", "DNase_I_hypersensitive_site"}, {"GC_rich_promoter_region", "GC_signal"}, {"boundary_element", "insulator"}, @@ -639,12 +665,12 @@ bool CSoMap::xFeatureMakeRepeatRegion( CSeq_feat& feature) // ---------------------------------------------------------------------------- { - static const map mapTypeToSatellite = { + static const TYPEMAP mapTypeToSatellite = { {"microsatellite", "microsatellite"}, {"minisatellite", "minisatellite"}, {"satellite_DNA", "satellite"}, }; - static const map mapTypeToRptType = { + static const TYPEMAP mapTypeToRptType = { {"tandem_repeat", "tandem"}, {"inverted_repeat", "inverted"}, {"direct_repeat", "direct"}, @@ -814,7 +840,7 @@ bool CSoMap::xMapGeneric( {CSeqFeatData::eSubtype_primer_bind, "primer_binding_site"}, {CSeqFeatData::eSubtype_promoter, "promoter"}, {CSeqFeatData::eSubtype_propeptide, "propeptide"}, - {CSeqFeatData::eSubtype_prot, "protein"}, + {CSeqFeatData::eSubtype_prot, "polypeptide"}, {CSeqFeatData::eSubtype_protein_bind, "protein_binding_site"}, {CSeqFeatData::eSubtype_rep_origin, "origin_of_replication"}, {CSeqFeatData::eSubtype_S_region, "S_region"}, @@ -849,7 +875,7 @@ bool CSoMap::xMapRegion( string& so_type) // ---------------------------------------------------------------------------- { - so_type = "region"; + so_type = "biological_region"; return true; } @@ -952,11 +978,11 @@ bool CSoMap::xMapMiscFeature( string& so_type) // ---------------------------------------------------------------------------- { - map mapFeatClassToSoType = { + static const TYPEMAP mapFeatClassToSoType = { {"transcription_start_site", "TSS"}, {"other", "sequence_feature"}, }; - string feat_class = feature.GetNamedQual("feat_class"); + string feat_class = GetUnambiguousNamedQual(feature, "feat_class"); if (feat_class.empty()) { so_type = "sequence_feature"; return true; @@ -976,7 +1002,7 @@ bool CSoMap::xMapMiscRecomb( string& so_type) // ---------------------------------------------------------------------------- { - map mapRecombClassToSoType = { + static const TYPEMAP mapRecombClassToSoType = { {"meiotic", "meiotic_recombination_region"}, {"mitotic", "mitotic_recombination_region"}, {"non_allelic_homologous", "non_allelic_homologous_recombination_region"}, @@ -985,7 +1011,7 @@ bool CSoMap::xMapMiscRecomb( {"non_allelic_homologous_recombination", "non_allelic_homologous_recombination_region"}, {"other", "recombination_feature"}, }; - string recomb_class = feature.GetNamedQual("recombination_class"); + string recomb_class = GetUnambiguousNamedQual(feature, "recombination_class"); if (recomb_class.empty()) { so_type = "recombination_feature"; return true; @@ -1022,7 +1048,7 @@ bool CSoMap::xMapNcRna( string& so_type) // ---------------------------------------------------------------------------- { - map mapNcRnaClassToSoType = { + static const TYPEMAP mapNcRnaClassToSoType = { {"antisense_RNA", "antisense_RNA"}, {"autocatalytically_spliced_intron", "autocatalytically_spliced_intron"}, {"guide_RNA", "guide_RNA"}, @@ -1044,7 +1070,7 @@ bool CSoMap::xMapNcRna( {"vault_RNA", "vault_RNA"}, {"Y_RNA", "Y_RNA"}, }; - string ncrna_class = feature.GetNamedQual("ncRNA_class"); + string ncrna_class = GetUnambiguousNamedQual(feature, "ncRNA_class"); if (ncrna_class.empty()) { if (feature.IsSetData() && feature.GetData().IsRna() && @@ -1084,8 +1110,8 @@ bool CSoMap::xMapRegulatory( string& so_type) // ---------------------------------------------------------------------------- { - map mapRegulatoryClassToSoType = { - {"DNase_I_hypersensitive_site", "DNAseI_hypersensitive_site"}, + static const TYPEMAP mapRegulatoryClassToSoType = { + {"DNase_I_hypersensitive_site", "DNaseI_hypersensitive_site"}, {"GC_signal", "GC_rich_promoter_region"}, {"enhancer_blocking_element", "enhancer_blocking_element"}, {"epigenetically_modified_region", "epigenetically_modified_region"}, @@ -1096,7 +1122,7 @@ bool CSoMap::xMapRegulatory( {"ribosome_binding_site", "ribosome_entry_site"}, }; - string regulatory_class = feature.GetNamedQual("regulatory_class"); + string regulatory_class = GetUnambiguousNamedQual(feature, "regulatory_class"); if (regulatory_class.empty()) { so_type = "regulatory_region"; return true; @@ -1124,11 +1150,11 @@ bool CSoMap::xMapBond( string& so_type) // ---------------------------------------------------------------------------- { - map mapBondTypeToSoType = { + static const TYPEMAP mapBondTypeToSoType = { {"disulfide", "disulfide_bond"}, {"xlink", "cross_link"}, }; - string bond_type = feature.GetNamedQual("bond_type"); + string bond_type = GetUnambiguousNamedQual(feature, "bond_type"); if (bond_type.empty()) { return false; } @@ -1141,18 +1167,19 @@ bool CSoMap::xMapBond( return true; } + // ---------------------------------------------------------------------------- bool CSoMap::xMapRepeatRegion( const CSeq_feat& feature, string& so_type) // ---------------------------------------------------------------------------- { - map mapSatelliteToSoType = { + static const TYPEMAP mapSatelliteToSoType = { {"satellite", "satellite_DNA"}, {"microsatellite", "microsatellite"}, {"minisatellite", "minisatellite"}, }; - string satellite = feature.GetNamedQual("satellite"); + string satellite = GetUnambiguousNamedQual(feature, "satellite"); if (!satellite.empty()) { auto cit = mapSatelliteToSoType.find(satellite); if (cit == mapSatelliteToSoType.end()) { @@ -1162,7 +1189,7 @@ bool CSoMap::xMapRepeatRegion( return true; } - map mapRptTypeToSoType = { + static const TYPEMAP mapRptTypeToSoType = { {"tandem", "tandem_repeat"}, {"inverted", "inverted_repeat"}, {"flanking", "repeat_region"}, @@ -1175,7 +1202,7 @@ bool CSoMap::xMapRepeatRegion( {"y_prime_element", "Y_prime_element"}, {"other", "repeat_region"}, }; - string rpt_type = feature.GetNamedQual("rpt_type"); + string rpt_type = GetUnambiguousNamedQual(feature, "rpt_type"); if (rpt_type.empty()) { so_type = "repeat_region"; return true; diff --git a/c++/src/objects/seqfeat/OrgMod.cpp b/c++/src/objects/seqfeat/OrgMod.cpp index 993fc0c4..fd204e14 100644 --- a/c++/src/objects/seqfeat/OrgMod.cpp +++ b/c++/src/objects/seqfeat/OrgMod.cpp @@ -1,4 +1,4 @@ -/* $Id: OrgMod.cpp 602802 2020-03-02 23:09:16Z kans $ +/* $Id: OrgMod.cpp 613887 2020-08-13 18:36:41Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -67,7 +67,8 @@ COrgMod::TSubtype COrgMod::GetSubtypeValue(const string& str, replace(name.begin(), name.end(), ' ', '-'); if (name == "note" || - NStr::EqualNocase(name, "orgmod-note")) { + NStr::EqualNocase(name, "orgmod-note") || + NStr::EqualNocase(name, "note-orgmod")) { return eSubtype_other; } else if (vocabulary == eVocabulary_insdc) { if (name == "host" || name == "specific-host") { @@ -90,7 +91,8 @@ bool COrgMod::IsValidSubtypeName(const string& str, replace(name.begin(), name.end(), ' ', '-'); if (name == "note" || - name == "orgmod-note") { + name == "orgmod-note" || + name == "note-orgmod") { return true; } else if (vocabulary == eVocabulary_insdc) { if (name == "host" || name == "sub-strain") { diff --git a/c++/src/objects/seqfeat/SeqFeatData.cpp b/c++/src/objects/seqfeat/SeqFeatData.cpp index 90251bef..3f55821b 100644 --- a/c++/src/objects/seqfeat/SeqFeatData.cpp +++ b/c++/src/objects/seqfeat/SeqFeatData.cpp @@ -1,4 +1,4 @@ -/* $Id: SeqFeatData.cpp 599381 2019-12-26 23:31:18Z kans $ +/* $Id: SeqFeatData.cpp 613780 2020-08-12 16:42:40Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -169,7 +169,7 @@ static const TInfoPair kInfoPairs[] = { FEAT_INFO_PAIR(Txinit, txinit, "TxInit", "promoter"), FEAT_INFO_PAIR(Num, num, "Num", "misc_feature"), FEAT_INFO_PAIR(Psec_str, psec_str, "SecStr", "SecStr"), - FEAT_INFO_PAIR(Non_std_residue, non_std_residue, "NonStdRes", "misc_feature"), + FEAT_INFO_PAIR(Non_std_residue, non_std_residue, "NonStdRes", "NonStdRes"), FEAT_INFO_PAIR(Het, het, "Het", "Het"), FEAT_INFO_PAIR(Biosrc, biosrc, "Src", "source"), FEAT_INFO_PAIR(Clone, clone, "CloneRef", "misc_feature"), @@ -2646,8 +2646,30 @@ const CSeqFeatData::TSubTypeQualifiersMap& CSeqFeatData::s_GetLegalQualMap() noe eQual_usedin, } }, -//{ eSubtype_non_std_residue, { -//}, +{ eSubtype_non_std_residue, { + eQual_allele, + eQual_citation, + eQual_db_xref, + eQual_exception, + eQual_experiment, + eQual_function, + eQual_gene, + eQual_gene_synonym, + eQual_inference, + eQual_label, + eQual_locus_tag, + eQual_map, + eQual_non_std_residue, + eQual_note, + eQual_number, + eQual_old_locus_tag, + eQual_phenotype, + eQual_product, + eQual_pseudo, + eQual_pseudogene, + eQual_standard_name, + eQual_usedin, +} }, //sameasmisc_feature { eSubtype_het, { @@ -3063,6 +3085,7 @@ MAKE_TWOWAY_CONST_MAP(sc_QualPairs, CSeqFeatData::EQualifier, ct::tagStrNocase, { CSeqFeatData::eQual_mol_type, "mol_type" }, { CSeqFeatData::eQual_name, "name" }, { CSeqFeatData::eQual_nomenclature, "nomenclature" }, + { CSeqFeatData::eQual_non_std_residue, "non_std_residue" }, { CSeqFeatData::eQual_ncRNA_class, "ncRNA_class" }, { CSeqFeatData::eQual_note, "note" }, { CSeqFeatData::eQual_number, "number" }, @@ -4428,6 +4451,7 @@ CSeqFeatData::EFeatureLocationAllowed CSeqFeatData::AllowedFeatureLocation(ESubt case eSubtype_propeptide_aa: case eSubtype_bond: case eSubtype_psec_str: + case eSubtype_non_std_residue: rval = eFeatureLocationAllowed_ProtOnly; break; case eSubtype_region: diff --git a/c++/src/objects/seqfeat/SubSource.cpp b/c++/src/objects/seqfeat/SubSource.cpp index a0a811b9..8aa5ed46 100644 --- a/c++/src/objects/seqfeat/SubSource.cpp +++ b/c++/src/objects/seqfeat/SubSource.cpp @@ -1,4 +1,4 @@ -/* $Id: SubSource.cpp 605788 2020-04-15 14:55:53Z ivanov $ +/* $Id: SubSource.cpp 615787 2020-09-03 18:18:36Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -97,7 +97,8 @@ CSubSource::TSubtype CSubSource::GetSubtypeValue(const string& str, if ( NStr::EqualNocase(name, "note") || NStr::EqualNocase(name, "subsource-note") || - NStr::EqualNocase(name, "subsrc-note")) { + NStr::EqualNocase(name, "subsrc-note") || + NStr::EqualNocase(name, "note-subsource")) { return eSubtype_other; } else if (vocabulary == eVocabulary_insdc) { // consider a table if more special cases arise. @@ -126,7 +127,8 @@ bool CSubSource::IsValidSubtypeName(const string& str, if ( NStr::EqualNocase(name, "note") || NStr::EqualNocase(name, "subsource-note") || - NStr::EqualNocase(name, "subsrc-note")) { + NStr::EqualNocase(name, "subsrc-note") || + NStr::EqualNocase(name, "note-subsource")) { return true; } if (vocabulary == eVocabulary_insdc) { @@ -2084,15 +2086,6 @@ string CSubSource::ValidateLatLonCountry (const string& input_countryname, strin return kEmptyStr; } - - if (NStr::EqualNocase (country, "China") && NStr::EqualNocase (cguess, "Hong Kong")) { - delete id; - return kEmptyStr; - } - if (NStr::EqualNocase (country, "USA") && NStr::EqualNocase (cguess, "Puerto Rico")) { - delete id; - return kEmptyStr; - } if (NStr::EqualNocase (country, "State of Palestine") && (NStr::EqualNocase (cguess, "Gaza Strip") || NStr::EqualNocase (cguess, "West Bank"))) { @@ -2645,11 +2638,12 @@ bool CSubSource::IsEndogenousVirusNameValid(const string& value) // 7. Spaces and other printable characters are permitted // 8. Must not contain the word "plasmid" (ignoring case) // 9. Must not contain the word "chromosome" (ignoring case) -// 10. Must not contain the phrase "linkage group" (ignoring case) -// 11. Must not contain the series of letters "chr" (ignoring case) -// 12. Must not contain the taxname (ignoring case) -// 14. Must not contain the genus (ignoring case) +// 10. Must not contain the phrase "linkage group" (ignoring case) +// 11. Must not contain the series of letters "chr" (ignoring case) +// 12. Must not contain the taxname (ignoring case) +// 14. Must not contain the genus (ignoring case) // 15. Must not contain the species (ignoring case) +// except allow the species to match the value after an initial 'p' (e.g., JX416328) // 16. Must not contain the series of letters "chrm" (ignoring case) // 17. Must not contain the series of letters "chrom" (ignoring case) // 18. Must not contain the phrase "linkage-group" (ignoring case) @@ -2676,13 +2670,18 @@ bool CSubSource::x_MeetsCommonChromosomeLinkageGroupPlasmidNameRules(const strin } size_t pos = NStr::Find(taxname, " "); if (pos != NPOS) { - if (NStr::FindNoCase(value, taxname.substr(0, pos)) != NPOS) { + string genus = taxname.substr(0, pos); + if (NStr::FindNoCase(value, genus) != NPOS) { // B.14 return false; } - if (NStr::FindNoCase(value, taxname.substr(pos + 1)) != NPOS) { - // B.15 - return false; + string species = taxname.substr(pos + 1); + pos = NStr::FindNoCase(value, species); + if (pos != NPOS) { + if (pos != 1 || value[0] != 'p') { + // B.15 + return false; + } } } } diff --git a/c++/src/objects/seqfeat/ecnum_ambiguous.inc b/c++/src/objects/seqfeat/ecnum_ambiguous.inc index e8b03951..5ce04fbf 100644 --- a/c++/src/objects/seqfeat/ecnum_ambiguous.inc +++ b/c++/src/objects/seqfeat/ecnum_ambiguous.inc @@ -1,4 +1,4 @@ -/* $Id: ecnum_ambiguous.inc 578243 2019-01-15 21:20:22Z kans $ +/* $Id: ecnum_ambiguous.inc 615790 2020-09-03 18:19:26Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -45,6 +45,8 @@ static const char* const kECNum_ambiguous[] = { "1.1.4.n\tWith a disulfide as acceptor", "1.1.5.-\tWith a quinone or similar compound as acceptor", "1.1.5.n\tWith a quinone or similar compound as acceptor", + "1.1.7.-\tWith an iron-sulfur protein as acceptor", + "1.1.7.n\tWith an iron-sulfur protein as acceptor", "1.1.9.-\tWith a copper protein as acceptor", "1.1.9.n\tWith a copper protein as acceptor", "1.1.98.-\tWith other, known, acceptors", @@ -763,6 +765,8 @@ static const char* const kECNum_ambiguous[] = { "6.2.n.n\tForming carbon-sulfur bonds", "6.2.1.-\tAcid--thiol ligases", "6.2.1.n\tAcid--thiol ligases", + "6.2.2.-\tAmide--thiol ligases", + "6.2.2.n\tAmide--thiol ligases", "6.3.-.-\tForming carbon-nitrogen bonds", "6.3.n.n\tForming carbon-nitrogen bonds", "6.3.1.-\tAcid--ammonia (or amine) ligases (amide synthases)", diff --git a/c++/src/objects/seqfeat/ecnum_ambiguous.txt b/c++/src/objects/seqfeat/ecnum_ambiguous.txt index 08af4f2c..f5731d28 100644 --- a/c++/src/objects/seqfeat/ecnum_ambiguous.txt +++ b/c++/src/objects/seqfeat/ecnum_ambiguous.txt @@ -12,6 +12,8 @@ 1.1.4.n With a disulfide as acceptor 1.1.5.- With a quinone or similar compound as acceptor 1.1.5.n With a quinone or similar compound as acceptor +1.1.7.- With an iron-sulfur protein as acceptor +1.1.7.n With an iron-sulfur protein as acceptor 1.1.9.- With a copper protein as acceptor 1.1.9.n With a copper protein as acceptor 1.1.98.- With other, known, acceptors @@ -730,6 +732,8 @@ 6.2.n.n Forming carbon-sulfur bonds 6.2.1.- Acid--thiol ligases 6.2.1.n Acid--thiol ligases +6.2.2.- Amide--thiol ligases +6.2.2.n Amide--thiol ligases 6.3.-.- Forming carbon-nitrogen bonds 6.3.n.n Forming carbon-nitrogen bonds 6.3.1.- Acid--ammonia (or amine) ligases (amide synthases) diff --git a/c++/src/objects/seqfeat/ecnum_replaced.inc b/c++/src/objects/seqfeat/ecnum_replaced.inc index 6247a763..385a290f 100644 --- a/c++/src/objects/seqfeat/ecnum_replaced.inc +++ b/c++/src/objects/seqfeat/ecnum_replaced.inc @@ -1,4 +1,4 @@ -/* $Id: ecnum_replaced.inc 604099 2020-03-23 12:20:07Z ivanov $ +/* $Id: ecnum_replaced.inc 612554 2020-07-23 15:34:08Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -119,6 +119,7 @@ static const char* const kECNum_replaced[] = { "1.3.1.52\t1.3.8.5", "1.3.1.63\t1.21.1.2", "1.3.1.80\t1.3.7.12", + "1.3.1.99\t1.3.1.122", "1.3.1.n1\t1.3.1.87", "1.3.1.n2\t1.14.19.52", "1.3.2.1\t1.3.8.1", @@ -230,6 +231,7 @@ static const char* const kECNum_replaced[] = { "1.8.6.1\t2.5.1.18", "1.8.99.3\t1.8.99.5", "1.8.99.4\t1.8.4.8", + "1.9.3.1\t7.1.1.9", "1.9.3.2\t1.7.2.1", "1.9.99.1\t1.9.98.1", "1.10.2.2\t7.1.1.8", @@ -244,6 +246,7 @@ static const char* const kECNum_replaced[] = { "1.10.99.2\t1.10.5.1", "1.10.99.3\t1.23.5.1", "1.11.1.4\t1.13.11.11", + "1.11.1.15\t1.11.1.24", "1.12.1.1\t1.12.7.2", "1.12.7.1\t1.12.7.2", "1.12.99.1\t1.12.98.1", @@ -696,7 +699,7 @@ static const char* const kECNum_replaced[] = { "2.7.7.17\t4.6.1.19", "2.7.7.21\t2.7.7.72", "2.7.7.25\t2.7.7.72", - "2.7.7.26\t3.1.27.3", + "2.7.7.26\t4.6.1.24", "2.7.7.29\t2.7.7.28", "2.7.7.54\t6.3.2.40", "2.7.7.55\t6.3.2.40", @@ -742,7 +745,7 @@ static const char* const kECNum_replaced[] = { "3.1.4.5\t3.1.21.1", "3.1.4.6\t3.1.22.1", "3.1.4.7\t3.1.31.1", - "3.1.4.8\t3.1.27.3", + "3.1.4.8\t4.6.1.24", "3.1.4.9\t3.1.30.2", "3.1.4.10\t4.6.1.13", "3.1.4.15\t2.7.7.89", @@ -762,6 +765,9 @@ static const char* const kECNum_replaced[] = { "3.1.4.n1\t3.1.4.53", "3.1.7.4\t4.2.1.133\t4.2.3.141", "3.1.7.7\t4.2.3.194", + "3.1.11.7\t3.6.1.71", + "3.1.11.8\t3.6.1.70", + "3.1.12.2\t3.6.1.72", "3.1.22.3\t3.1.21.7", "3.1.23.1\t3.1.21.4", "3.1.23.2\t3.1.21.4", @@ -830,6 +836,7 @@ static const char* const kECNum_replaced[] = { "3.1.26.n1\t3.1.26.12", "3.1.27.1\t4.6.1.19", "3.1.27.2\t4.6.1.22", + "3.1.27.3\t4.6.1.24", "3.1.27.4\t4.6.1.20", "3.1.27.5\t4.6.1.18", "3.1.27.6\t4.6.1.21", @@ -840,6 +847,7 @@ static const char* const kECNum_replaced[] = { "3.2.1.29\t3.2.1.52", "3.2.1.30\t3.2.1.52", "3.2.1.34\t3.2.1.35", + "3.2.1.44\t3.2.1.211", "3.2.1.69\t3.2.1.41", "3.2.1.79\t3.2.1.55", "3.2.1.110\t3.2.1.97", @@ -1098,6 +1106,7 @@ static const char* const kECNum_replaced[] = { "4.1.2.31\t4.1.3.16", "4.1.2.37\t4.1.2.46\t4.1.2.47", "4.1.2.39\t4.1.2.46\t4.1.2.47", + "4.1.2.41\t4.1.2.61", "4.1.2.n1\t4.1.2.44", "4.1.2.n3\t4.1.2.53", "4.1.2.n4\t4.1.2.52", diff --git a/c++/src/objects/seqfeat/ecnum_replaced.txt b/c++/src/objects/seqfeat/ecnum_replaced.txt index 4381f940..25673919 100644 --- a/c++/src/objects/seqfeat/ecnum_replaced.txt +++ b/c++/src/objects/seqfeat/ecnum_replaced.txt @@ -86,6 +86,7 @@ 1.3.1.52 1.3.8.5 1.3.1.63 1.21.1.2 1.3.1.80 1.3.7.12 +1.3.1.99 1.3.1.122 1.3.1.n1 1.3.1.87 1.3.1.n2 1.14.19.52 1.3.2.1 1.3.8.1 @@ -197,6 +198,7 @@ 1.8.6.1 2.5.1.18 1.8.99.3 1.8.99.5 1.8.99.4 1.8.4.8 +1.9.3.1 7.1.1.9 1.9.3.2 1.7.2.1 1.9.99.1 1.9.98.1 1.10.2.2 7.1.1.8 @@ -211,6 +213,7 @@ 1.10.99.2 1.10.5.1 1.10.99.3 1.23.5.1 1.11.1.4 1.13.11.11 +1.11.1.15 1.11.1.24 1.12.1.1 1.12.7.2 1.12.7.1 1.12.7.2 1.12.99.1 1.12.98.1 @@ -663,7 +666,7 @@ 2.7.7.17 4.6.1.19 2.7.7.21 2.7.7.72 2.7.7.25 2.7.7.72 -2.7.7.26 3.1.27.3 +2.7.7.26 4.6.1.24 2.7.7.29 2.7.7.28 2.7.7.54 6.3.2.40 2.7.7.55 6.3.2.40 @@ -709,7 +712,7 @@ 3.1.4.5 3.1.21.1 3.1.4.6 3.1.22.1 3.1.4.7 3.1.31.1 -3.1.4.8 3.1.27.3 +3.1.4.8 4.6.1.24 3.1.4.9 3.1.30.2 3.1.4.10 4.6.1.13 3.1.4.15 2.7.7.89 @@ -729,6 +732,9 @@ 3.1.4.n1 3.1.4.53 3.1.7.4 4.2.1.133 4.2.3.141 3.1.7.7 4.2.3.194 +3.1.11.7 3.6.1.71 +3.1.11.8 3.6.1.70 +3.1.12.2 3.6.1.72 3.1.22.3 3.1.21.7 3.1.23.1 3.1.21.4 3.1.23.2 3.1.21.4 @@ -797,6 +803,7 @@ 3.1.26.n1 3.1.26.12 3.1.27.1 4.6.1.19 3.1.27.2 4.6.1.22 +3.1.27.3 4.6.1.24 3.1.27.4 4.6.1.20 3.1.27.5 4.6.1.18 3.1.27.6 4.6.1.21 @@ -807,6 +814,7 @@ 3.2.1.29 3.2.1.52 3.2.1.30 3.2.1.52 3.2.1.34 3.2.1.35 +3.2.1.44 3.2.1.211 3.2.1.69 3.2.1.41 3.2.1.79 3.2.1.55 3.2.1.110 3.2.1.97 @@ -1065,6 +1073,7 @@ 4.1.2.31 4.1.3.16 4.1.2.37 4.1.2.46 4.1.2.47 4.1.2.39 4.1.2.46 4.1.2.47 +4.1.2.41 4.1.2.61 4.1.2.n1 4.1.2.44 4.1.2.n3 4.1.2.53 4.1.2.n4 4.1.2.52 diff --git a/c++/src/objects/seqfeat/ecnum_specific.inc b/c++/src/objects/seqfeat/ecnum_specific.inc index 31b7c274..f5a79412 100644 --- a/c++/src/objects/seqfeat/ecnum_specific.inc +++ b/c++/src/objects/seqfeat/ecnum_specific.inc @@ -1,4 +1,4 @@ -/* $Id: ecnum_specific.inc 604099 2020-03-23 12:20:07Z ivanov $ +/* $Id: ecnum_specific.inc 615790 2020-09-03 18:19:26Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -429,6 +429,10 @@ static const char* const kECNum_specific[] = { "1.1.1.417\t3-beta-hydroxysteroid-4-beta-carboxylate 3-dehydrogenase (decarboxylating)", "1.1.1.418\tPlant 3-beta-hydroxysteroid-4-alpha-carboxylate 3-dehydrogenase (decarboxylating)", "1.1.1.419\tNepetalactol dehydrogenase", + "1.1.1.420\tD-apiose dehydrogenase", + "1.1.1.421\tD-apionate oxidoisomerase", + "1.1.1.422\tPseudoephedrine dehydrogenase", + "1.1.1.423\tEphedrine dehydrogenase", "1.1.1.n4\t(-)-trans-carveol dehydrogenase", "1.1.1.n5\t3-methylmalate dehydrogenase", "1.1.1.n11\tSuccinic semialdehyde reductase", @@ -441,6 +445,7 @@ static const char* const kECNum_specific[] = { "1.1.2.7\tMethanol dehydrogenase (cytochrome c)", "1.1.2.8\tAlcohol dehydrogenase (cytochrome c)", "1.1.2.9\t1-butanol dehydrogenase (cytochrome c)", + "1.1.2.10\tLanthanide-dependent methanol dehydrogenase", "1.1.3.4\tGlucose oxidase", "1.1.3.5\tHexose oxidase", "1.1.3.6\tCholesterol oxidase", @@ -495,6 +500,7 @@ static const char* const kECNum_specific[] = { "1.1.98.4\tF420H(2):quinone oxidoreductase", "1.1.98.5\tSecondary-alcohol dehydrogenase (coenzyme-F420)", "1.1.98.6\tRibonucleoside-triphosphate reductase (formate)", + "1.1.98.7\tSerine-type anaerobic sulfatase-maturating enzyme", "1.1.99.1\tCholine dehydrogenase", "1.1.99.2\tL-2-hydroxyglutarate dehydrogenase", "1.1.99.3\tGluconate 2-dehydrogenase (acceptor)", @@ -614,6 +620,7 @@ static const char* const kECNum_specific[] = { "1.2.1.100\t5-formyl-3-hydroxy-2-methylpyridine 4-carboxylate 5-dehydrogenase", "1.2.1.101\tL-tyrosine reductase", "1.2.1.102\tIsopyridoxal dehydrogenase (5-pyridoxate-forming)", + "1.2.1.103\t[Amino-group carrier protein]-6-phospho-L-2-aminoadipate reductase", "1.2.1.n2\tFatty acyl-CoA reductase", "1.2.2.1\tFormate dehydrogenase (cytochrome)", "1.2.2.4\tCarbon-monoxide dehydrogenase (cytochrome b-561)", @@ -678,7 +685,7 @@ static const char* const kECNum_specific[] = { "1.3.1.31\t2-enoate reductase", "1.3.1.32\tMaleylacetate reductase", "1.3.1.33\tProtochlorophyllide reductase", - "1.3.1.34\t2,4-dienoyl-CoA reductase (NADPH)", + "1.3.1.34\t2,4-dienoyl-CoA reductase ((2E)-enoyl-CoA-producing)", "1.3.1.36\tGeissoschizine dehydrogenase", "1.3.1.37\tCis-2-enoyl-CoA reductase (NADPH)", "1.3.1.38\tTrans-2-enoyl-CoA reductase (NADPH)", @@ -735,7 +742,6 @@ static const char* const kECNum_specific[] = { "1.3.1.96\tBotryococcus squalene synthase", "1.3.1.97\tBotryococcene synthase", "1.3.1.98\tUDP-N-acetylmuramate dehydrogenase", - "1.3.1.99\tIridoid synthase", "1.3.1.100\tChanoclavine-I aldehyde reductase", "1.3.1.101\t2,3-bis-O-geranylgeranyl-sn-glycerol 1-phosphate reductase (NAD(P)H)", "1.3.1.102\t2-alkenal reductase (NADP(+))", @@ -757,6 +763,10 @@ static const char* const kECNum_specific[] = { "1.3.1.118\tMeromycolic acid enoyl-[acyl-carrier-protein] reductase", "1.3.1.119\tChlorobenzene dihydrodiol dehydrogenase", "1.3.1.120\tCyclohexane-1-carbonyl-CoA reductase (NADP(+))", + "1.3.1.121\t4-amino-4-deoxyprephenate dehydrogenase", + "1.3.1.122\t(S)-8-oxocitronellyl enol synthase", + "1.3.1.123\t7-epi-iridoid synthase", + "1.3.1.124\t2,4-dienoyl-CoA reductase ((3E)-enoyl-CoA-producing)", "1.3.1.n3\tCurcumin reductase", "1.3.2.3\tL-galactonolactone dehydrogenase", "1.3.3.3\tCoproporphyrinogen oxidase", @@ -806,6 +816,7 @@ static const char* const kECNum_specific[] = { "1.3.8.12\t(2S)-methylsuccinyl-CoA dehydrogenase", "1.3.8.13\tCrotonobetainyl-CoA reductase", "1.3.8.14\tL-prolyl-[peptidyl-carrier protein] dehydrogenase", + "1.3.8.15\t3-(aryl)acrylate reductase", "1.3.98.1\tDihydroorotate oxidase (fumarate)", "1.3.98.3\tCoproporphyrinogen dehydrogenase", "1.3.98.4\t5a,11a-dehydrotetracycline reductase", @@ -1084,6 +1095,8 @@ static const char* const kECNum_specific[] = { "1.8.4.12\tPeptide-methionine (R)-S-oxide reductase", "1.8.4.13\tL-methionine (S)-S-oxide reductase", "1.8.4.14\tL-methionine (R)-S-oxide reductase", + "1.8.4.15\tProtein dithiol oxidoreductase (disulfide-forming)", + "1.8.4.16\tThioredoxin:protein disulfide reductase", "1.8.5.1\tGlutathione dehydrogenase (ascorbate)", "1.8.5.2\tThiosulfate dehydrogenase (quinone)", "1.8.5.3\tRespiratory dimethylsulfoxide reductase", @@ -1092,6 +1105,7 @@ static const char* const kECNum_specific[] = { "1.8.5.6\tSulfite dehydrogenase (quinone)", "1.8.5.7\tGlutathionyl-hydroquinone reductase", "1.8.5.8\tEukaryotic sulfide quinone oxidoreductase", + "1.8.5.9\tProtein dithiol:quinone oxidoreductase DsbB", "1.8.7.1\tAssimilatory sulfite reductase (ferredoxin)", "1.8.7.2\tFerredoxin:thioredoxin reductase", "1.8.7.3\tFerredoxin:CoB-CoM heterodisulfide reductase", @@ -1101,9 +1115,9 @@ static const char* const kECNum_specific[] = { "1.8.98.4\tCoenzyme F420:CoB-CoM heterodisulfide,ferredoxin reductase", "1.8.98.5\tH(2):CoB-CoM heterodisulfide,ferredoxin reductase", "1.8.98.6\tFormate:CoB-CoM heterodisulfide,ferredoxin reductase", + "1.8.98.7\tCysteine-type anaerobic sulfatase-maturating enzyme", "1.8.99.2\tAdenylyl-sulfate reductase", "1.8.99.5\tDissimilatory sulfite reductase", - "1.9.3.1\tCytochrome-c oxidase", "1.9.6.1\tNitrate reductase (cytochrome)", "1.9.98.1\tIron--cytochrome-c reductase", "1.10.1.1\tTrans-acenaphthene-1,2-diol dehydrogenase", @@ -1118,6 +1132,7 @@ static const char* const kECNum_specific[] = { "1.10.3.11\tUbiquinol oxidase (non-electrogenic)", "1.10.3.15\tGrixazone synthase", "1.10.3.16\tDihydrophenazinedicarboxylate synthase", + "1.10.3.17\tSuperoxide oxidase", "1.10.5.1\tRibosyldihydronicotinamide dehydrogenase (quinone)", "1.11.1.1\tNADH peroxidase", "1.11.1.2\tNADPH peroxidase", @@ -1132,7 +1147,6 @@ static const char* const kECNum_specific[] = { "1.11.1.12\tPhospholipid-hydroperoxide glutathione peroxidase", "1.11.1.13\tManganese peroxidase", "1.11.1.14\tLignin peroxidase", - "1.11.1.15\tPeroxiredoxin", "1.11.1.16\tVersatile peroxidase", "1.11.1.17\tGlutathione amide-dependent peroxidase", "1.11.1.18\tBromide peroxidase", @@ -1141,6 +1155,12 @@ static const char* const kECNum_specific[] = { "1.11.1.21\tCatalase peroxidase", "1.11.1.22\tHydroperoxy fatty acid reductase", "1.11.1.23\t(S)-2-hydroxypropylphosphonic acid epoxidase", + "1.11.1.24\tThioredoxin-dependent peroxiredoxin", + "1.11.1.25\tGlutaredoxin-dependent peroxiredoxin", + "1.11.1.26\tNADH-dependent peroxiredoxin", + "1.11.1.27\tGlutathione-dependent peroxiredoxin", + "1.11.1.28\tLipoyl-dependent peroxiredoxin", + "1.11.1.29\tMycoredoxin-dependent peroxiredoxin", "1.11.2.1\tUnspecific peroxygenase", "1.11.2.2\tMyeloperoxidase", "1.11.2.3\tPlant seed peroxygenase", @@ -1325,6 +1345,8 @@ static const char* const kECNum_specific[] = { "1.14.11.67\t[Histone H3]-trimethyl-L-lysine(4) demethylase", "1.14.11.68\t[Histone H3]-trimethyl-L-lysine(27) demethylase", "1.14.11.69\t[Histone H3]-trimethyl-L-lysine(36) demethylase", + "1.14.11.70\t7-deoxycylindrospermopsin hydroxylase", + "1.14.11.71\tMethylphosphonate hydroxylase", "1.14.11.n2\tMethylcytosine dioxygenase", "1.14.11.n4\tAnkyrin-repeat-histidine dioxagenase", "1.14.12.1\tAnthranilate 1,2-dioxygenase (deaminating, decarboxylating)", @@ -1471,6 +1493,7 @@ static const char* const kECNum_specific[] = { "1.14.13.244\tPhenol 2-monooxygenase (NADH)", "1.14.13.245\tAssimilatory dimethylsulfide S-monooxygenase", "1.14.13.246\t4-beta-methylsterol monooxygenase", + "1.14.13.247\tStachydrine N-demethylase", "1.14.13.n6\tHexahomomethionine N-hydroxylase", "1.14.13.n7\t4-nitrophenol 2-hydroxylase", "1.14.14.1\tUnspecific monooxygenase", @@ -1696,6 +1719,7 @@ static const char* const kECNum_specific[] = { "1.14.18.9\t4-alpha-methylsterol monooxygenase", "1.14.18.10\tPlant 4,4-dimethylsterol C-4-alpha-methyl-monooxygenase", "1.14.18.11\tPlant 4-alpha-monomethylsterol monooxygenase", + "1.14.18.12\t2-hydroxy fatty acid dioxygenase", "1.14.19.1\tStearoyl-CoA 9-desaturase", "1.14.19.2\tStearoyl-[acyl-carrier-protein] 9-desaturase", "1.14.19.3\tAcyl-CoA 6-desaturase", @@ -1881,6 +1905,7 @@ static const char* const kECNum_specific[] = { "1.17.99.4\tUracil/thymine dehydrogenase", "1.17.99.6\tEpoxyqueuosine reductase", "1.17.99.7\tFormate dehydrogenase (acceptor)", + "1.17.99.8\tLimonene dehydrogenase", "1.18.1.1\tRubredoxin--NAD(+) reductase", "1.18.1.2\tFerredoxin--NADP(+) reductase", "1.18.1.3\tFerredoxin--NAD(+) reductase", @@ -1894,7 +1919,7 @@ static const char* const kECNum_specific[] = { "1.19.6.1\tNitrogenase (flavodoxin)", "1.20.1.1\tPhosphonate dehydrogenase", "1.20.2.1\tArsenate reductase (cytochrome c)", - "1.20.4.1\tArsenate reductase (glutaredoxin)", + "1.20.4.1\tArsenate reductase (glutathione/glutaredoxin)", "1.20.4.2\tMethylarsonate reductase", "1.20.4.3\tMycoredoxin", "1.20.4.4\tArsenate reductase (thioredoxin)", @@ -2270,13 +2295,14 @@ static const char* const kECNum_specific[] = { "2.1.1.360\t[Histone H3]-lysine(79) N-trimethyltransferase", "2.1.1.361\t[Histone H4]-lysine(20) N-methyltransferase", "2.1.1.362\t[Histone H4]-N-methyl-L-lysine(20) N-methyltransferase", + "2.1.1.363\tPre-sodorifen synthase", "2.1.1.n1\tResorcinol O-methyltransferase", "2.1.1.n4\tThiocyanate methyltransferase", "2.1.1.n7\t5-pentadecatrienyl resorcinol O-methyltransferase", "2.1.1.n8\tSmall RNA 2'-O-methyltransferase", "2.1.1.n11\tMethylphosphotriester-DNA--[protein]-cysteine S-methyltransferase", "2.1.2.1\tGlycine hydroxymethyltransferase", - "2.1.2.2\tPhosphoribosylglycinamide formyltransferase", + "2.1.2.2\tPhosphoribosylglycinamide formyltransferase 1", "2.1.2.3\tPhosphoribosylaminoimidazolecarboxamide formyltransferase", "2.1.2.4\tGlycine formimidoyltransferase", "2.1.2.5\tGlutamate formimidoyltransferase", @@ -2548,7 +2574,7 @@ static const char* const kECNum_specific[] = { "2.3.1.242\tKdo(2)-lipid IV(A) palmitoleoyltransferase", "2.3.1.243\tLauroyl-Kdo(2)-lipid IV(A) myristoyltransferase", "2.3.1.244\t2-methylbutanoate polyketide synthase", - "2.3.1.245\t3-hydroxy-5-phosphonooxypentane-2,4-dione thiolase", + "2.3.1.245\t3-hydroxy-5-phosphooxypentane-2,4-dione thiolase", "2.3.1.246\t3,5-dihydroxyphenylacetyl-CoA synthase", "2.3.1.247\t3-keto-5-aminohexanoate cleavage enzyme", "2.3.1.248\tSpermidine disinapoyl transferase", @@ -2597,6 +2623,8 @@ static const char* const kECNum_specific[] = { "2.3.1.291\tSphingoid base N-palmitoyltransferase", "2.3.1.292\t(Phenol)carboxyphthiodiolenone synthase", "2.3.1.293\tMeromycolic acid 3-oxoacyl-(acyl carrier protein) synthase I", + "2.3.1.294\tMeromycolic acid 3-oxoacyl-(acyl carrier protein) synthase II", + "2.3.1.295\tMycoketide-CoA synthase", "2.3.1.296\tOmega-hydroxyceramide transacylase", "2.3.1.297\tVery-long-chain ceramide synthase", "2.3.1.298\tUltra-long-chain ceramide synthase", @@ -2637,6 +2665,7 @@ static const char* const kECNum_specific[] = { "2.3.2.30\tL-ornithine N(alpha)-acyltransferase", "2.3.2.31\tRBR-type E3 ubiquitin transferase", "2.3.2.32\tCullin-RING-type E3 NEDD8 transferase", + "2.3.2.33\tRCR-type E3 ubiquitin transferase", "2.3.3.1\tCitrate (Si)-synthase", "2.3.3.2\tDecylcitrate synthase", "2.3.3.3\tCitrate (Re)-synthase", @@ -2993,6 +3022,10 @@ static const char* const kECNum_specific[] = { "2.4.1.368\tOleanolate 3-O-glucosyltransferase", "2.4.1.369\tEnterobactin C-glucosyltransferase", "2.4.1.370\tInositol phosphorylceramide mannosyltransferase", + "2.4.1.371\tPolymannosyl GlcNAc-diphospho-ditrans,octacis-undecaprenol 2,3-alpha-mannosylpolymerase", + "2.4.1.372\tMutansucrase", + "2.4.1.373\tAlpha-(1->2) branching sucrase", + "2.4.1.374\tBeta-1,2-mannooligosaccharide synthase", "2.4.1.n2\tLoliose synthase", "2.4.2.1\tPurine-nucleoside phosphorylase", "2.4.2.2\tPyrimidine-nucleoside phosphorylase", @@ -3053,7 +3086,7 @@ static const char* const kECNum_specific[] = { "2.4.2.60\tCysteine-dependent adenosine diphosphate thiazole synthase", "2.4.2.61\tAlpha-dystroglycan beta-1,4-xylosyltransferase", "2.4.2.n2\tGlucoside xylosyltransferase", - "2.4.2.n3\tXyloside xylosyltransferase", + "2.4.2.n3\tXylosyl alpha-1,3-xylosyltransferase", "2.4.99.1\tBeta-galactoside alpha-(2,6)-sialyltransferase", "2.4.99.2\tBeta-D-galactosyl-(1->3)-N-acetyl-beta-D-galactosaminide alpha-2,3-sialyltransferase", "2.4.99.3\tAlpha-N-acetylgalactosaminide alpha-2,6-sialyltransferase", @@ -3325,6 +3358,7 @@ static const char* const kECNum_specific[] = { "2.6.1.115\t5-hydroxydodecatetraenal 1-aminotransferase", "2.6.1.116\t6-aminohexanoate aminotransferase", "2.6.1.117\tL-glutamine--4-(methylsulfanyl)-2-oxobutanoate aminotransferase", + "2.6.1.118\t[Amino group carrier protein]-gamma-(L-lysyl)-L-glutamate aminotransferase", "2.6.3.1\tOximinotransferase", "2.6.99.1\tdATP(dGTP)--DNA purinetransferase", "2.6.99.2\tPyridoxine 5'-phosphate synthase", @@ -3373,7 +3407,7 @@ static const char* const kECNum_specific[] = { "2.7.1.45\t2-dehydro-3-deoxygluconokinase", "2.7.1.46\tL-arabinokinase", "2.7.1.47\tD-ribulokinase", - "2.7.1.48\tUridine kinase", + "2.7.1.48\tUridine/cytidine kinase", "2.7.1.49\tHydroxymethylpyrimidine kinase", "2.7.1.50\tHydroxyethylthiazole kinase", "2.7.1.51\tL-fuculokinase", @@ -3443,7 +3477,7 @@ static const char* const kECNum_specific[] = { "2.7.1.144\tTagatose-6-phosphate kinase", "2.7.1.145\tDeoxynucleoside kinase", "2.7.1.146\tADP-specific phosphofructokinase", - "2.7.1.147\tADP-specific glucokinase", + "2.7.1.147\tADP-specific glucose/glucosamine kinase", "2.7.1.148\t4-(cytidine 5'-diphospho)-2-C-methyl-D-erythritol kinase", "2.7.1.149\t1-phosphatidylinositol-5-phosphate 4-kinase", "2.7.1.150\t1-phosphatidylinositol-3-phosphate 5-kinase", @@ -3524,6 +3558,7 @@ static const char* const kECNum_specific[] = { "2.7.1.227\tInositol phosphorylceramide synthase", "2.7.1.228\tMannosyl-inositol-phosphoceramide inositolphosphotransferase", "2.7.1.229\tDeoxyribokinase", + "2.7.1.230\tAmicoumacin kinase", "2.7.2.1\tAcetate kinase", "2.7.2.2\tCarbamate kinase", "2.7.2.3\tPhosphoglycerate kinase", @@ -3537,6 +3572,8 @@ static const char* const kECNum_specific[] = { "2.7.2.13\tGlutamate 1-kinase", "2.7.2.14\tBranched-chain-fatty-acid kinase", "2.7.2.15\tPropionate kinase", + "2.7.2.16\t2-phosphoglycerate kinase", + "2.7.2.17\t[Amino-group carrier protein]-L-2-aminoadipate 6-kinase", "2.7.3.1\tGuanidinoacetate kinase", "2.7.3.2\tCreatine kinase", "2.7.3.3\tArginine kinase", @@ -3846,6 +3883,7 @@ static const char* const kECNum_specific[] = { "2.8.3.23\tCaffeate CoA-transferase", "2.8.3.24\t(R)-2-hydroxy-4-methylpentanoate CoA-transferase", "2.8.3.25\tBile acid CoA-transferase", + "2.8.3.26\tSuccinyl-CoA:mesaconate CoA transferase", "2.8.4.1\tCoenzyme-B sulfoethylthiotransferase", "2.8.4.2\tArsenate-mycothiol transferase", "2.8.4.3\ttRNA-2-methylthio-N(6)-dimethylallyladenosine synthase", @@ -3855,6 +3893,7 @@ static const char* const kECNum_specific[] = { "2.8.5.2\tL-cysteine S-thiosulfotransferase", "2.9.1.1\tL-seryl-tRNA(Sec) selenium transferase", "2.9.1.2\tO-phospho-L-seryl-tRNA(Sec):L-selenocysteinyl-tRNA synthase", + "2.9.1.3\ttRNA 2-selenouridine synthase", "2.10.1.1\tMolybdopterin molybdotransferase", "3.1.1.1\tCarboxylesterase", "3.1.1.2\tArylesterase", @@ -4092,6 +4131,8 @@ static const char* const kECNum_specific[] = { "3.1.3.104\t5-amino-6-(5-phospho-D-ribitylamino)uracil phosphatase", "3.1.3.105\tN-acetyl-D-muramate 6-phosphate phosphatase", "3.1.3.106\t2-lysophosphatidate phosphatase", + "3.1.3.107\tAmicoumacin phosphatase", + "3.1.3.108\tNocturnin", "3.1.4.1\tPhosphodiesterase I", "3.1.4.2\tGlycerophosphocholine phosphodiesterase", "3.1.4.3\tPhospholipase C", @@ -4164,10 +4205,7 @@ static const char* const kECNum_specific[] = { "3.1.11.4\tExodeoxyribonuclease (phage SP3-induced)", "3.1.11.5\tExodeoxyribonuclease V", "3.1.11.6\tExodeoxyribonuclease VII", - "3.1.11.7\tAdenosine-5'-diphospho-5'-(DNA) diphosphatase", - "3.1.11.8\tGuaosine-5'-diphospho-5'-(DNA) diphosphatase", "3.1.12.1\t5' to 3' exodeoxyribonuclease (nucleoside 3'-phosphate-forming)", - "3.1.12.2\tDNA-3'-diphospho-5'-guanosine diphosphatase", "3.1.13.1\tExoribonuclease II", "3.1.13.2\tExoribonuclease H", "3.1.13.3\tOligonucleotidase", @@ -4204,7 +4242,6 @@ static const char* const kECNum_specific[] = { "3.1.26.12\tRibonuclease E", "3.1.26.13\tRetroviral ribonuclease H", "3.1.26.n2\tArgonaute-2", - "3.1.27.3\tRibonuclease T(1)", "3.1.27.7\tRibonuclease F", "3.1.27.8\tRibonuclease V", "3.1.30.1\tAspergillus nuclease S(1)", @@ -4243,7 +4280,6 @@ static const char* const kECNum_specific[] = { "3.2.1.41\tPullulanase", "3.2.1.42\tGDP-glucosidase", "3.2.1.43\tBeta-L-rhamnosidase", - "3.2.1.44\tFucoidanase", "3.2.1.45\tGlucosylceramidase", "3.2.1.46\tGalactosylceramidase", "3.2.1.47\tGalactosylgalactosylglucosylceramidase", @@ -4348,7 +4384,7 @@ static const char* const kECNum_specific[] = { "3.2.1.152\tMannosylglycoprotein endo-beta-mannosidase", "3.2.1.153\tFructan beta-(2,1)-fructosidase", "3.2.1.154\tFructan beta-(2,6)-fructosidase", - "3.2.1.155\tXyloglucan-specific exo-beta-1,4-glucanase", + "3.2.1.155\tXyloglucan-specific endo-processive beta-1,4-glucanase", "3.2.1.156\tOligosaccharide reducing-end xylanase", "3.2.1.157\tIota-carrageenase", "3.2.1.158\tAlpha-agarase", @@ -4403,6 +4439,9 @@ static const char* const kECNum_specific[] = { "3.2.1.208\tGlucosylglycerate hydrolase", "3.2.1.209\tEndoplasmic reticulum Man(9)GlcNAc(2) 1,2-alpha-mannosidase", "3.2.1.210\tEndoplasmic reticulum Man(8)GlcNAc(2) 1,2-alpha-mannosidase", + "3.2.1.211\tEndo-(1->3)-fucoidanase", + "3.2.1.212\tEndo-(1->4)-fucoidanase", + "3.2.1.213\tGalactan exo-1,6-beta-galactobiohydrolase (non-reducing end)", "3.2.1.n1\tBlood group B branched chain alpha-1,3-galactosidase", "3.2.1.n2\tBlood group B linear chain alpha-1,3-galactosidase", "3.2.1.n3\tDictyostelium lysozyme A", @@ -4525,6 +4564,7 @@ static const char* const kECNum_specific[] = { "3.4.17.21\tGlutamate carboxypeptidase II", "3.4.17.22\tMetallocarboxypeptidase D", "3.4.17.23\tAngiotensin-converting enzyme 2", + "3.4.17.24\tTubulin-glutamate carboxypeptidase", "3.4.18.1\tCathepsin X", "3.4.19.1\tAcylaminoacyl-peptidase", "3.4.19.2\tPeptidyl-glycinamidase", @@ -4925,7 +4965,7 @@ static const char* const kECNum_specific[] = { "3.5.1.107\tMaleamate amidohydrolase", "3.5.1.108\tUDP-3-O-acyl-N-acetylglucosamine deacetylase", "3.5.1.109\tSphingomyelin deacylase", - "3.5.1.110\tPeroxyureidoacrylate/ureidoacrylate amidohydrolase", + "3.5.1.110\tUreidoacrylate amidohydrolase", "3.5.1.111\t2-oxoglutaramate amidase", "3.5.1.112\t2'-N-acetylparomamine deacetylase", "3.5.1.113\t2'''-acetyl-6'''-hydroxyneomycin C deacetylase", @@ -4944,8 +4984,9 @@ static const char* const kECNum_specific[] = { "3.5.1.127\tJasmonoyl-L-amino acid hydrolase", "3.5.1.128\tDeaminated glutathione amidase", "3.5.1.129\tN(5)-(cytidine 5'-diphosphoramidyl)-L-glutamine hydrolase", - "3.5.1.130\t[Lysine-biosynthesis-protein LysW]-lysine/ornithine hydrolase", + "3.5.1.130\t[Amino group carrier protein]-lysine hydrolase", "3.5.1.131\t1-carboxybiuret hydrolase", + "3.5.1.132\t[Amino group carrier protein]-ornithine hydrolase", "3.5.1.133\tN(alpha)-acyl-L-glutamine aminoacylase", "3.5.1.134\t(Indol-3-yl)acetyl-L-aspartate hydrolase", "3.5.1.n3\t4-deoxy-4-formamido-L-arabinose-phosphoundecaprenol deformylase", @@ -5106,6 +5147,11 @@ static const char* const kECNum_specific[] = { "3.6.1.66\tXTP/dITP diphosphatase", "3.6.1.67\tDihydroneopterin triphosphate diphosphatase", "3.6.1.68\tGeranyl diphosphate phosphohydrolase", + "3.6.1.69\t8-oxo-(d)GTP phosphatase", + "3.6.1.70\tGuaosine-5'-diphospho-5'-(DNA) diphosphatase", + "3.6.1.71\tAdenosine-5'-diphospho-5'-(DNA) diphosphatase", + "3.6.1.72\tDNA-3'-diphospho-5'-guanosine diphosphatase", + "3.6.1.73\tInosine/xanthosine triphosphatase", "3.6.1.n1\tD-tyrosyl-tRNA(Tyr) hydrolase", "3.6.1.n2\tL-cysteinyl-tRNA(Pro)", "3.6.1.n3\tL-cysteinyl-tRNA(Cys) hydrolase", @@ -5147,6 +5193,7 @@ static const char* const kECNum_specific[] = { "3.7.1.23\tMaleylpyruvate hydrolase", "3.7.1.24\t2,4-diacetylphloroglucinol hydrolase", "3.7.1.25\t2-hydroxy-6-oxohepta-2,4-dienoate hydrolase", + "3.7.1.26\t2,4-didehydro-3-deoxy-L-rhamnonate hydrolase", "3.8.1.1\tAlkylhalidase", "3.8.1.2\t(S)-2-haloacid dehalogenase", "3.8.1.3\tHaloacetate dehalogenase", @@ -5173,6 +5220,7 @@ static const char* const kECNum_specific[] = { "3.13.1.6\t[CysO sulfur-carrier protein]-S-L-cysteine hydrolase", "3.13.1.7\tCarbonyl sulfide hydrolase", "3.13.1.8\tS-adenosyl-L-methionine hydrolase (adenosine-forming)", + "3.13.1.9\tS-inosyl-L-homocysteine hydrolase", "4.1.1.1\tPyruvate decarboxylase", "4.1.1.2\tOxalate decarboxylase", "4.1.1.4\tAcetoacetate decarboxylase", @@ -5283,6 +5331,7 @@ static const char* const kECNum_specific[] = { "4.1.1.116\tD-ornithine/D-lysine decarboxylase", "4.1.1.117\t2-((L-alanin-3-ylcarbamoyl)methyl)-2-hydroxybutanedioate decarboxylase", "4.1.1.118\tIsophthalyl-CoA decarboxylase", + "4.1.1.119\tPhenylacetate decarboxylase", "4.1.2.2\tKetotetrose-phosphate aldolase", "4.1.2.4\tDeoxyribose-phosphate aldolase", "4.1.2.5\tL-threonine aldolase", @@ -5313,7 +5362,6 @@ static const char* const kECNum_specific[] = { "4.1.2.36\tLactate aldolase", "4.1.2.38\tBenzoin aldolase", "4.1.2.40\tTagatose-bisphosphate aldolase", - "4.1.2.41\tVanillin synthase", "4.1.2.42\tD-threonine aldolase", "4.1.2.43\t3-hexulose-6-phosphate synthase", "4.1.2.44\t2,3-epoxybenzoyl-CoA dihydrolase", @@ -5333,6 +5381,7 @@ static const char* const kECNum_specific[] = { "4.1.2.58\t2-dehydro-3,6-dideoxy-6-sulfogluconate aldolase", "4.1.2.59\tDihydroneopterin phosphate aldolase", "4.1.2.60\tDihydroneopterin triphosphate aldolase", + "4.1.2.61\tFeruloyl-CoA hydratase/lyase", "4.1.2.n2\t2-hydroxyphytanoyl-CoA lyase", "4.1.3.1\tIsocitrate lyase", "4.1.3.3\tN-acetylneuraminate lyase", @@ -5444,7 +5493,7 @@ static const char* const kECNum_specific[] = { "4.2.1.84\tNitrile hydratase", "4.2.1.85\tDimethylmaleate hydratase", "4.2.1.87\tOctopamine dehydratase", - "4.2.1.88\t(R)-synephrine", + "4.2.1.88\tSynephrine dehydratase", "4.2.1.90\tL-rhamnonate dehydratase", "4.2.1.91\tArogenate dehydratase", "4.2.1.92\tHydroperoxide dehydratase", @@ -5555,6 +5604,7 @@ static const char* const kECNum_specific[] = { "4.2.2.24\tRhamnogalacturonan exolyase", "4.2.2.25\tGellan lyase", "4.2.2.26\tOligo-alginate lyase", + "4.2.2.27\tPectin monosaccharide-lyase", "4.2.2.n1\tPeptidoglycan lytic exotransglycosylase", "4.2.2.n2\tPeptidoglycan lytic endotransglycosylase", "4.2.3.1\tThreonine synthase", @@ -5689,7 +5739,7 @@ static const char* const kECNum_specific[] = { "4.2.3.131\tMiltiradiene synthase", "4.2.3.132\tNeoabietadiene synthase", "4.2.3.133\tAlpha-copaene synthase", - "4.2.3.134\t5-phosphonooxy-L-lysine phospho-lyase", + "4.2.3.134\t5-phosphooxy-L-lysine phospho-lyase", "4.2.3.135\tDelta(6)-protoilludene synthase", "4.2.3.136\tAlpha-isocomene synthase", "4.2.3.137\t(E)-2-epi-beta-caryophyllene synthase", @@ -5868,6 +5918,8 @@ static const char* const kECNum_specific[] = { "4.6.1.21\tEnterobacter ribonuclease", "4.6.1.22\tBacillus subtilis ribonuclease", "4.6.1.23\tRibotoxin", + "4.6.1.24\tRibonuclease T(1)", + "4.6.1.25\tBacteriophage T(4) restriction endoribonuclease RegB", "4.7.1.1\tAlpha-D-ribose 1-methylphosphonate 5-phosphate C-P-lyase", "4.99.1.1\tProtoporphyrin ferrochelatase", "4.99.1.2\tAlkylmercury lyase", @@ -5999,7 +6051,7 @@ static const char* const kECNum_specific[] = { "5.3.1.29\tRibose 1,5-bisphosphate isomerase", "5.3.1.30\t5-deoxy-glucuronate isomerase", "5.3.1.31\tSulfoquinovose isomerase", - "5.3.1.32\t(4S)-4-hydroxy-5-phosphonooxypentane-2,3-dione isomerase", + "5.3.1.32\t(4S)-4-hydroxy-5-phosphooxypentane-2,3-dione isomerase", "5.3.1.33\tL-erythrulose 1-phosphate isomerase", "5.3.1.34\tD-erythrulose 4-phosphate isomerase", "5.3.1.35\t2-dehydrotetronate isomerase", @@ -6171,6 +6223,8 @@ static const char* const kECNum_specific[] = { "5.5.1.31\tHapalindole H synthase", "5.5.1.32\t12-epi-hapalindole U synthase", "5.5.1.33\t12-epi-fischerindole U synthase", + "5.5.1.34\t(+)-cis,trans-nepetalactol synthase", + "5.5.1.35\t(+)-cis,cis-nepetalactol synthase", "5.6.1.1\tMicrotubule-severing ATPase", "5.6.1.2\tDynein ATPase", "5.6.1.3\tPlus-end-directed kinesin ATPase", @@ -6270,6 +6324,9 @@ static const char* const kECNum_specific[] = { "6.2.1.58\tIsophthalate--CoA ligase", "6.2.1.59\tLong-chain fatty acid adenylase/transferase FadD26", "6.2.1.60\tMarinolic acid--CoA ligase", + "6.2.1.61\tSalicylate--[aryl-carrier protein] ligase", + "6.2.1.62\t3,4-dihydroxybenzoate--[aryl-carrier protein] ligase", + "6.2.1.63\tL-arginine--[L-arginyl-carrier protein] ligase", "6.2.1.n2\tAmino acid--[acyl-carrier-protein] ligase", "6.2.1.n3\tMalonate--CoA ligase", "6.3.1.1\tAspartate--ammonia ligase", @@ -6326,7 +6383,7 @@ static const char* const kECNum_specific[] = { "6.3.2.40\tCyclopeptine synthase", "6.3.2.41\tN-acetylaspartylglutamate synthase", "6.3.2.42\tN-acetylaspartylglutamylglutamate synthase", - "6.3.2.43\t[Amino group carrier protein]--L-2-aminoadipate ligase", + "6.3.2.43\t[Amino-group carrier protein]--L-2-aminoadipate ligase", "6.3.2.44\tPantoate--beta-alanine ligase (ADP-forming)", "6.3.2.45\tUDP-N-acetylmuramate L-alanyl-gamma-D-glutamyl-meso-2,6-diaminoheptanedioate ligase", "6.3.2.46\tFumarate--(S)-2,3-diaminopropanoate ligase", @@ -6335,7 +6392,7 @@ static const char* const kECNum_specific[] = { "6.3.2.49\tL-alanine--L-anticapsin ligase", "6.3.2.50\tTenuazonic acid synthetase", "6.3.2.51\tPhosphopantothenate--cysteine ligase (ATP)", - "6.3.2.52\tJasmonoyl--L-amino acid synthetase", + "6.3.2.52\tJasmonoyl--L-amino acid ligase", "6.3.2.53\tUDP-N-acetylmuramoyl-L-alanine--L-glutamate ligase", "6.3.2.54\tL-2,3-diaminopropanoate--citrate ligase", "6.3.2.55\t2-((L-alanin-3-ylcarbamoyl)methyl)-3-(2-aminoethylcarbamoyl)-2-hydroxypropanoate synthase", @@ -6412,6 +6469,7 @@ static const char* const kECNum_specific[] = { "7.1.1.6\tPlastoquinol--plastocyanin reductase", "7.1.1.7\tUbiquinol oxidase (electrogenic, proton-motive force generating)", "7.1.1.8\tQuinol--cytochrome-c reductase", + "7.1.1.9\tCytochrome-c oxidase", "7.1.2.1\tP-type H(+)-exporting transporter", "7.1.2.2\tH(+)-transporting two-sector ATPase", "7.1.3.1\tH(+)-exporting diphosphatase", @@ -6463,6 +6521,7 @@ static const char* const kECNum_specific[] = { "7.4.2.10\tABC-type glutathione transporter", "7.4.2.11\tABC-type methionine transporter", "7.4.2.12\tABC-type cystine transporter", + "7.4.2.13\tABC-type tyrosine transporter", "7.5.2.1\tABC-type maltose transporter", "7.5.2.2\tABC-type oligosaccharide transporter", "7.5.2.3\tABC-type beta-glucan transporter", diff --git a/c++/src/objects/seqfeat/ecnum_specific.txt b/c++/src/objects/seqfeat/ecnum_specific.txt index cc45423f..088ef8ec 100644 --- a/c++/src/objects/seqfeat/ecnum_specific.txt +++ b/c++/src/objects/seqfeat/ecnum_specific.txt @@ -396,6 +396,10 @@ 1.1.1.417 3-beta-hydroxysteroid-4-beta-carboxylate 3-dehydrogenase (decarboxylating) 1.1.1.418 Plant 3-beta-hydroxysteroid-4-alpha-carboxylate 3-dehydrogenase (decarboxylating) 1.1.1.419 Nepetalactol dehydrogenase +1.1.1.420 D-apiose dehydrogenase +1.1.1.421 D-apionate oxidoisomerase +1.1.1.422 Pseudoephedrine dehydrogenase +1.1.1.423 Ephedrine dehydrogenase 1.1.1.n4 (-)-trans-carveol dehydrogenase 1.1.1.n5 3-methylmalate dehydrogenase 1.1.1.n11 Succinic semialdehyde reductase @@ -408,6 +412,7 @@ 1.1.2.7 Methanol dehydrogenase (cytochrome c) 1.1.2.8 Alcohol dehydrogenase (cytochrome c) 1.1.2.9 1-butanol dehydrogenase (cytochrome c) +1.1.2.10 Lanthanide-dependent methanol dehydrogenase 1.1.3.4 Glucose oxidase 1.1.3.5 Hexose oxidase 1.1.3.6 Cholesterol oxidase @@ -462,6 +467,7 @@ 1.1.98.4 F420H(2):quinone oxidoreductase 1.1.98.5 Secondary-alcohol dehydrogenase (coenzyme-F420) 1.1.98.6 Ribonucleoside-triphosphate reductase (formate) +1.1.98.7 Serine-type anaerobic sulfatase-maturating enzyme 1.1.99.1 Choline dehydrogenase 1.1.99.2 L-2-hydroxyglutarate dehydrogenase 1.1.99.3 Gluconate 2-dehydrogenase (acceptor) @@ -581,6 +587,7 @@ 1.2.1.100 5-formyl-3-hydroxy-2-methylpyridine 4-carboxylate 5-dehydrogenase 1.2.1.101 L-tyrosine reductase 1.2.1.102 Isopyridoxal dehydrogenase (5-pyridoxate-forming) +1.2.1.103 [Amino-group carrier protein]-6-phospho-L-2-aminoadipate reductase 1.2.1.n2 Fatty acyl-CoA reductase 1.2.2.1 Formate dehydrogenase (cytochrome) 1.2.2.4 Carbon-monoxide dehydrogenase (cytochrome b-561) @@ -645,7 +652,7 @@ 1.3.1.31 2-enoate reductase 1.3.1.32 Maleylacetate reductase 1.3.1.33 Protochlorophyllide reductase -1.3.1.34 2,4-dienoyl-CoA reductase (NADPH) +1.3.1.34 2,4-dienoyl-CoA reductase ((2E)-enoyl-CoA-producing) 1.3.1.36 Geissoschizine dehydrogenase 1.3.1.37 Cis-2-enoyl-CoA reductase (NADPH) 1.3.1.38 Trans-2-enoyl-CoA reductase (NADPH) @@ -702,7 +709,6 @@ 1.3.1.96 Botryococcus squalene synthase 1.3.1.97 Botryococcene synthase 1.3.1.98 UDP-N-acetylmuramate dehydrogenase -1.3.1.99 Iridoid synthase 1.3.1.100 Chanoclavine-I aldehyde reductase 1.3.1.101 2,3-bis-O-geranylgeranyl-sn-glycerol 1-phosphate reductase (NAD(P)H) 1.3.1.102 2-alkenal reductase (NADP(+)) @@ -724,6 +730,10 @@ 1.3.1.118 Meromycolic acid enoyl-[acyl-carrier-protein] reductase 1.3.1.119 Chlorobenzene dihydrodiol dehydrogenase 1.3.1.120 Cyclohexane-1-carbonyl-CoA reductase (NADP(+)) +1.3.1.121 4-amino-4-deoxyprephenate dehydrogenase +1.3.1.122 (S)-8-oxocitronellyl enol synthase +1.3.1.123 7-epi-iridoid synthase +1.3.1.124 2,4-dienoyl-CoA reductase ((3E)-enoyl-CoA-producing) 1.3.1.n3 Curcumin reductase 1.3.2.3 L-galactonolactone dehydrogenase 1.3.3.3 Coproporphyrinogen oxidase @@ -773,6 +783,7 @@ 1.3.8.12 (2S)-methylsuccinyl-CoA dehydrogenase 1.3.8.13 Crotonobetainyl-CoA reductase 1.3.8.14 L-prolyl-[peptidyl-carrier protein] dehydrogenase +1.3.8.15 3-(aryl)acrylate reductase 1.3.98.1 Dihydroorotate oxidase (fumarate) 1.3.98.3 Coproporphyrinogen dehydrogenase 1.3.98.4 5a,11a-dehydrotetracycline reductase @@ -1051,6 +1062,8 @@ 1.8.4.12 Peptide-methionine (R)-S-oxide reductase 1.8.4.13 L-methionine (S)-S-oxide reductase 1.8.4.14 L-methionine (R)-S-oxide reductase +1.8.4.15 Protein dithiol oxidoreductase (disulfide-forming) +1.8.4.16 Thioredoxin:protein disulfide reductase 1.8.5.1 Glutathione dehydrogenase (ascorbate) 1.8.5.2 Thiosulfate dehydrogenase (quinone) 1.8.5.3 Respiratory dimethylsulfoxide reductase @@ -1059,6 +1072,7 @@ 1.8.5.6 Sulfite dehydrogenase (quinone) 1.8.5.7 Glutathionyl-hydroquinone reductase 1.8.5.8 Eukaryotic sulfide quinone oxidoreductase +1.8.5.9 Protein dithiol:quinone oxidoreductase DsbB 1.8.7.1 Assimilatory sulfite reductase (ferredoxin) 1.8.7.2 Ferredoxin:thioredoxin reductase 1.8.7.3 Ferredoxin:CoB-CoM heterodisulfide reductase @@ -1068,9 +1082,9 @@ 1.8.98.4 Coenzyme F420:CoB-CoM heterodisulfide,ferredoxin reductase 1.8.98.5 H(2):CoB-CoM heterodisulfide,ferredoxin reductase 1.8.98.6 Formate:CoB-CoM heterodisulfide,ferredoxin reductase +1.8.98.7 Cysteine-type anaerobic sulfatase-maturating enzyme 1.8.99.2 Adenylyl-sulfate reductase 1.8.99.5 Dissimilatory sulfite reductase -1.9.3.1 Cytochrome-c oxidase 1.9.6.1 Nitrate reductase (cytochrome) 1.9.98.1 Iron--cytochrome-c reductase 1.10.1.1 Trans-acenaphthene-1,2-diol dehydrogenase @@ -1085,6 +1099,7 @@ 1.10.3.11 Ubiquinol oxidase (non-electrogenic) 1.10.3.15 Grixazone synthase 1.10.3.16 Dihydrophenazinedicarboxylate synthase +1.10.3.17 Superoxide oxidase 1.10.5.1 Ribosyldihydronicotinamide dehydrogenase (quinone) 1.11.1.1 NADH peroxidase 1.11.1.2 NADPH peroxidase @@ -1099,7 +1114,6 @@ 1.11.1.12 Phospholipid-hydroperoxide glutathione peroxidase 1.11.1.13 Manganese peroxidase 1.11.1.14 Lignin peroxidase -1.11.1.15 Peroxiredoxin 1.11.1.16 Versatile peroxidase 1.11.1.17 Glutathione amide-dependent peroxidase 1.11.1.18 Bromide peroxidase @@ -1108,6 +1122,12 @@ 1.11.1.21 Catalase peroxidase 1.11.1.22 Hydroperoxy fatty acid reductase 1.11.1.23 (S)-2-hydroxypropylphosphonic acid epoxidase +1.11.1.24 Thioredoxin-dependent peroxiredoxin +1.11.1.25 Glutaredoxin-dependent peroxiredoxin +1.11.1.26 NADH-dependent peroxiredoxin +1.11.1.27 Glutathione-dependent peroxiredoxin +1.11.1.28 Lipoyl-dependent peroxiredoxin +1.11.1.29 Mycoredoxin-dependent peroxiredoxin 1.11.2.1 Unspecific peroxygenase 1.11.2.2 Myeloperoxidase 1.11.2.3 Plant seed peroxygenase @@ -1292,6 +1312,8 @@ 1.14.11.67 [Histone H3]-trimethyl-L-lysine(4) demethylase 1.14.11.68 [Histone H3]-trimethyl-L-lysine(27) demethylase 1.14.11.69 [Histone H3]-trimethyl-L-lysine(36) demethylase +1.14.11.70 7-deoxycylindrospermopsin hydroxylase +1.14.11.71 Methylphosphonate hydroxylase 1.14.11.n2 Methylcytosine dioxygenase 1.14.11.n4 Ankyrin-repeat-histidine dioxagenase 1.14.12.1 Anthranilate 1,2-dioxygenase (deaminating, decarboxylating) @@ -1438,6 +1460,7 @@ 1.14.13.244 Phenol 2-monooxygenase (NADH) 1.14.13.245 Assimilatory dimethylsulfide S-monooxygenase 1.14.13.246 4-beta-methylsterol monooxygenase +1.14.13.247 Stachydrine N-demethylase 1.14.13.n6 Hexahomomethionine N-hydroxylase 1.14.13.n7 4-nitrophenol 2-hydroxylase 1.14.14.1 Unspecific monooxygenase @@ -1663,6 +1686,7 @@ 1.14.18.9 4-alpha-methylsterol monooxygenase 1.14.18.10 Plant 4,4-dimethylsterol C-4-alpha-methyl-monooxygenase 1.14.18.11 Plant 4-alpha-monomethylsterol monooxygenase +1.14.18.12 2-hydroxy fatty acid dioxygenase 1.14.19.1 Stearoyl-CoA 9-desaturase 1.14.19.2 Stearoyl-[acyl-carrier-protein] 9-desaturase 1.14.19.3 Acyl-CoA 6-desaturase @@ -1848,6 +1872,7 @@ 1.17.99.4 Uracil/thymine dehydrogenase 1.17.99.6 Epoxyqueuosine reductase 1.17.99.7 Formate dehydrogenase (acceptor) +1.17.99.8 Limonene dehydrogenase 1.18.1.1 Rubredoxin--NAD(+) reductase 1.18.1.2 Ferredoxin--NADP(+) reductase 1.18.1.3 Ferredoxin--NAD(+) reductase @@ -1861,7 +1886,7 @@ 1.19.6.1 Nitrogenase (flavodoxin) 1.20.1.1 Phosphonate dehydrogenase 1.20.2.1 Arsenate reductase (cytochrome c) -1.20.4.1 Arsenate reductase (glutaredoxin) +1.20.4.1 Arsenate reductase (glutathione/glutaredoxin) 1.20.4.2 Methylarsonate reductase 1.20.4.3 Mycoredoxin 1.20.4.4 Arsenate reductase (thioredoxin) @@ -2237,13 +2262,14 @@ 2.1.1.360 [Histone H3]-lysine(79) N-trimethyltransferase 2.1.1.361 [Histone H4]-lysine(20) N-methyltransferase 2.1.1.362 [Histone H4]-N-methyl-L-lysine(20) N-methyltransferase +2.1.1.363 Pre-sodorifen synthase 2.1.1.n1 Resorcinol O-methyltransferase 2.1.1.n4 Thiocyanate methyltransferase 2.1.1.n7 5-pentadecatrienyl resorcinol O-methyltransferase 2.1.1.n8 Small RNA 2'-O-methyltransferase 2.1.1.n11 Methylphosphotriester-DNA--[protein]-cysteine S-methyltransferase 2.1.2.1 Glycine hydroxymethyltransferase -2.1.2.2 Phosphoribosylglycinamide formyltransferase +2.1.2.2 Phosphoribosylglycinamide formyltransferase 1 2.1.2.3 Phosphoribosylaminoimidazolecarboxamide formyltransferase 2.1.2.4 Glycine formimidoyltransferase 2.1.2.5 Glutamate formimidoyltransferase @@ -2515,7 +2541,7 @@ 2.3.1.242 Kdo(2)-lipid IV(A) palmitoleoyltransferase 2.3.1.243 Lauroyl-Kdo(2)-lipid IV(A) myristoyltransferase 2.3.1.244 2-methylbutanoate polyketide synthase -2.3.1.245 3-hydroxy-5-phosphonooxypentane-2,4-dione thiolase +2.3.1.245 3-hydroxy-5-phosphooxypentane-2,4-dione thiolase 2.3.1.246 3,5-dihydroxyphenylacetyl-CoA synthase 2.3.1.247 3-keto-5-aminohexanoate cleavage enzyme 2.3.1.248 Spermidine disinapoyl transferase @@ -2564,6 +2590,8 @@ 2.3.1.291 Sphingoid base N-palmitoyltransferase 2.3.1.292 (Phenol)carboxyphthiodiolenone synthase 2.3.1.293 Meromycolic acid 3-oxoacyl-(acyl carrier protein) synthase I +2.3.1.294 Meromycolic acid 3-oxoacyl-(acyl carrier protein) synthase II +2.3.1.295 Mycoketide-CoA synthase 2.3.1.296 Omega-hydroxyceramide transacylase 2.3.1.297 Very-long-chain ceramide synthase 2.3.1.298 Ultra-long-chain ceramide synthase @@ -2604,6 +2632,7 @@ 2.3.2.30 L-ornithine N(alpha)-acyltransferase 2.3.2.31 RBR-type E3 ubiquitin transferase 2.3.2.32 Cullin-RING-type E3 NEDD8 transferase +2.3.2.33 RCR-type E3 ubiquitin transferase 2.3.3.1 Citrate (Si)-synthase 2.3.3.2 Decylcitrate synthase 2.3.3.3 Citrate (Re)-synthase @@ -2960,6 +2989,10 @@ 2.4.1.368 Oleanolate 3-O-glucosyltransferase 2.4.1.369 Enterobactin C-glucosyltransferase 2.4.1.370 Inositol phosphorylceramide mannosyltransferase +2.4.1.371 Polymannosyl GlcNAc-diphospho-ditrans,octacis-undecaprenol 2,3-alpha-mannosylpolymerase +2.4.1.372 Mutansucrase +2.4.1.373 Alpha-(1->2) branching sucrase +2.4.1.374 Beta-1,2-mannooligosaccharide synthase 2.4.1.n2 Loliose synthase 2.4.2.1 Purine-nucleoside phosphorylase 2.4.2.2 Pyrimidine-nucleoside phosphorylase @@ -3020,7 +3053,7 @@ 2.4.2.60 Cysteine-dependent adenosine diphosphate thiazole synthase 2.4.2.61 Alpha-dystroglycan beta-1,4-xylosyltransferase 2.4.2.n2 Glucoside xylosyltransferase -2.4.2.n3 Xyloside xylosyltransferase +2.4.2.n3 Xylosyl alpha-1,3-xylosyltransferase 2.4.99.1 Beta-galactoside alpha-(2,6)-sialyltransferase 2.4.99.2 Beta-D-galactosyl-(1->3)-N-acetyl-beta-D-galactosaminide alpha-2,3-sialyltransferase 2.4.99.3 Alpha-N-acetylgalactosaminide alpha-2,6-sialyltransferase @@ -3292,6 +3325,7 @@ 2.6.1.115 5-hydroxydodecatetraenal 1-aminotransferase 2.6.1.116 6-aminohexanoate aminotransferase 2.6.1.117 L-glutamine--4-(methylsulfanyl)-2-oxobutanoate aminotransferase +2.6.1.118 [Amino group carrier protein]-gamma-(L-lysyl)-L-glutamate aminotransferase 2.6.3.1 Oximinotransferase 2.6.99.1 dATP(dGTP)--DNA purinetransferase 2.6.99.2 Pyridoxine 5'-phosphate synthase @@ -3340,7 +3374,7 @@ 2.7.1.45 2-dehydro-3-deoxygluconokinase 2.7.1.46 L-arabinokinase 2.7.1.47 D-ribulokinase -2.7.1.48 Uridine kinase +2.7.1.48 Uridine/cytidine kinase 2.7.1.49 Hydroxymethylpyrimidine kinase 2.7.1.50 Hydroxyethylthiazole kinase 2.7.1.51 L-fuculokinase @@ -3410,7 +3444,7 @@ 2.7.1.144 Tagatose-6-phosphate kinase 2.7.1.145 Deoxynucleoside kinase 2.7.1.146 ADP-specific phosphofructokinase -2.7.1.147 ADP-specific glucokinase +2.7.1.147 ADP-specific glucose/glucosamine kinase 2.7.1.148 4-(cytidine 5'-diphospho)-2-C-methyl-D-erythritol kinase 2.7.1.149 1-phosphatidylinositol-5-phosphate 4-kinase 2.7.1.150 1-phosphatidylinositol-3-phosphate 5-kinase @@ -3491,6 +3525,7 @@ 2.7.1.227 Inositol phosphorylceramide synthase 2.7.1.228 Mannosyl-inositol-phosphoceramide inositolphosphotransferase 2.7.1.229 Deoxyribokinase +2.7.1.230 Amicoumacin kinase 2.7.2.1 Acetate kinase 2.7.2.2 Carbamate kinase 2.7.2.3 Phosphoglycerate kinase @@ -3504,6 +3539,8 @@ 2.7.2.13 Glutamate 1-kinase 2.7.2.14 Branched-chain-fatty-acid kinase 2.7.2.15 Propionate kinase +2.7.2.16 2-phosphoglycerate kinase +2.7.2.17 [Amino-group carrier protein]-L-2-aminoadipate 6-kinase 2.7.3.1 Guanidinoacetate kinase 2.7.3.2 Creatine kinase 2.7.3.3 Arginine kinase @@ -3813,6 +3850,7 @@ 2.8.3.23 Caffeate CoA-transferase 2.8.3.24 (R)-2-hydroxy-4-methylpentanoate CoA-transferase 2.8.3.25 Bile acid CoA-transferase +2.8.3.26 Succinyl-CoA:mesaconate CoA transferase 2.8.4.1 Coenzyme-B sulfoethylthiotransferase 2.8.4.2 Arsenate-mycothiol transferase 2.8.4.3 tRNA-2-methylthio-N(6)-dimethylallyladenosine synthase @@ -3822,6 +3860,7 @@ 2.8.5.2 L-cysteine S-thiosulfotransferase 2.9.1.1 L-seryl-tRNA(Sec) selenium transferase 2.9.1.2 O-phospho-L-seryl-tRNA(Sec):L-selenocysteinyl-tRNA synthase +2.9.1.3 tRNA 2-selenouridine synthase 2.10.1.1 Molybdopterin molybdotransferase 3.1.1.1 Carboxylesterase 3.1.1.2 Arylesterase @@ -4059,6 +4098,8 @@ 3.1.3.104 5-amino-6-(5-phospho-D-ribitylamino)uracil phosphatase 3.1.3.105 N-acetyl-D-muramate 6-phosphate phosphatase 3.1.3.106 2-lysophosphatidate phosphatase +3.1.3.107 Amicoumacin phosphatase +3.1.3.108 Nocturnin 3.1.4.1 Phosphodiesterase I 3.1.4.2 Glycerophosphocholine phosphodiesterase 3.1.4.3 Phospholipase C @@ -4131,10 +4172,7 @@ 3.1.11.4 Exodeoxyribonuclease (phage SP3-induced) 3.1.11.5 Exodeoxyribonuclease V 3.1.11.6 Exodeoxyribonuclease VII -3.1.11.7 Adenosine-5'-diphospho-5'-(DNA) diphosphatase -3.1.11.8 Guaosine-5'-diphospho-5'-(DNA) diphosphatase 3.1.12.1 5' to 3' exodeoxyribonuclease (nucleoside 3'-phosphate-forming) -3.1.12.2 DNA-3'-diphospho-5'-guanosine diphosphatase 3.1.13.1 Exoribonuclease II 3.1.13.2 Exoribonuclease H 3.1.13.3 Oligonucleotidase @@ -4171,7 +4209,6 @@ 3.1.26.12 Ribonuclease E 3.1.26.13 Retroviral ribonuclease H 3.1.26.n2 Argonaute-2 -3.1.27.3 Ribonuclease T(1) 3.1.27.7 Ribonuclease F 3.1.27.8 Ribonuclease V 3.1.30.1 Aspergillus nuclease S(1) @@ -4210,7 +4247,6 @@ 3.2.1.41 Pullulanase 3.2.1.42 GDP-glucosidase 3.2.1.43 Beta-L-rhamnosidase -3.2.1.44 Fucoidanase 3.2.1.45 Glucosylceramidase 3.2.1.46 Galactosylceramidase 3.2.1.47 Galactosylgalactosylglucosylceramidase @@ -4315,7 +4351,7 @@ 3.2.1.152 Mannosylglycoprotein endo-beta-mannosidase 3.2.1.153 Fructan beta-(2,1)-fructosidase 3.2.1.154 Fructan beta-(2,6)-fructosidase -3.2.1.155 Xyloglucan-specific exo-beta-1,4-glucanase +3.2.1.155 Xyloglucan-specific endo-processive beta-1,4-glucanase 3.2.1.156 Oligosaccharide reducing-end xylanase 3.2.1.157 Iota-carrageenase 3.2.1.158 Alpha-agarase @@ -4370,6 +4406,9 @@ 3.2.1.208 Glucosylglycerate hydrolase 3.2.1.209 Endoplasmic reticulum Man(9)GlcNAc(2) 1,2-alpha-mannosidase 3.2.1.210 Endoplasmic reticulum Man(8)GlcNAc(2) 1,2-alpha-mannosidase +3.2.1.211 Endo-(1->3)-fucoidanase +3.2.1.212 Endo-(1->4)-fucoidanase +3.2.1.213 Galactan exo-1,6-beta-galactobiohydrolase (non-reducing end) 3.2.1.n1 Blood group B branched chain alpha-1,3-galactosidase 3.2.1.n2 Blood group B linear chain alpha-1,3-galactosidase 3.2.1.n3 Dictyostelium lysozyme A @@ -4492,6 +4531,7 @@ 3.4.17.21 Glutamate carboxypeptidase II 3.4.17.22 Metallocarboxypeptidase D 3.4.17.23 Angiotensin-converting enzyme 2 +3.4.17.24 Tubulin-glutamate carboxypeptidase 3.4.18.1 Cathepsin X 3.4.19.1 Acylaminoacyl-peptidase 3.4.19.2 Peptidyl-glycinamidase @@ -4892,7 +4932,7 @@ 3.5.1.107 Maleamate amidohydrolase 3.5.1.108 UDP-3-O-acyl-N-acetylglucosamine deacetylase 3.5.1.109 Sphingomyelin deacylase -3.5.1.110 Peroxyureidoacrylate/ureidoacrylate amidohydrolase +3.5.1.110 Ureidoacrylate amidohydrolase 3.5.1.111 2-oxoglutaramate amidase 3.5.1.112 2'-N-acetylparomamine deacetylase 3.5.1.113 2'''-acetyl-6'''-hydroxyneomycin C deacetylase @@ -4911,8 +4951,9 @@ 3.5.1.127 Jasmonoyl-L-amino acid hydrolase 3.5.1.128 Deaminated glutathione amidase 3.5.1.129 N(5)-(cytidine 5'-diphosphoramidyl)-L-glutamine hydrolase -3.5.1.130 [Lysine-biosynthesis-protein LysW]-lysine/ornithine hydrolase +3.5.1.130 [Amino group carrier protein]-lysine hydrolase 3.5.1.131 1-carboxybiuret hydrolase +3.5.1.132 [Amino group carrier protein]-ornithine hydrolase 3.5.1.133 N(alpha)-acyl-L-glutamine aminoacylase 3.5.1.134 (Indol-3-yl)acetyl-L-aspartate hydrolase 3.5.1.n3 4-deoxy-4-formamido-L-arabinose-phosphoundecaprenol deformylase @@ -5073,6 +5114,11 @@ 3.6.1.66 XTP/dITP diphosphatase 3.6.1.67 Dihydroneopterin triphosphate diphosphatase 3.6.1.68 Geranyl diphosphate phosphohydrolase +3.6.1.69 8-oxo-(d)GTP phosphatase +3.6.1.70 Guaosine-5'-diphospho-5'-(DNA) diphosphatase +3.6.1.71 Adenosine-5'-diphospho-5'-(DNA) diphosphatase +3.6.1.72 DNA-3'-diphospho-5'-guanosine diphosphatase +3.6.1.73 Inosine/xanthosine triphosphatase 3.6.1.n1 D-tyrosyl-tRNA(Tyr) hydrolase 3.6.1.n2 L-cysteinyl-tRNA(Pro) 3.6.1.n3 L-cysteinyl-tRNA(Cys) hydrolase @@ -5114,6 +5160,7 @@ 3.7.1.23 Maleylpyruvate hydrolase 3.7.1.24 2,4-diacetylphloroglucinol hydrolase 3.7.1.25 2-hydroxy-6-oxohepta-2,4-dienoate hydrolase +3.7.1.26 2,4-didehydro-3-deoxy-L-rhamnonate hydrolase 3.8.1.1 Alkylhalidase 3.8.1.2 (S)-2-haloacid dehalogenase 3.8.1.3 Haloacetate dehalogenase @@ -5140,6 +5187,7 @@ 3.13.1.6 [CysO sulfur-carrier protein]-S-L-cysteine hydrolase 3.13.1.7 Carbonyl sulfide hydrolase 3.13.1.8 S-adenosyl-L-methionine hydrolase (adenosine-forming) +3.13.1.9 S-inosyl-L-homocysteine hydrolase 4.1.1.1 Pyruvate decarboxylase 4.1.1.2 Oxalate decarboxylase 4.1.1.4 Acetoacetate decarboxylase @@ -5250,6 +5298,7 @@ 4.1.1.116 D-ornithine/D-lysine decarboxylase 4.1.1.117 2-((L-alanin-3-ylcarbamoyl)methyl)-2-hydroxybutanedioate decarboxylase 4.1.1.118 Isophthalyl-CoA decarboxylase +4.1.1.119 Phenylacetate decarboxylase 4.1.2.2 Ketotetrose-phosphate aldolase 4.1.2.4 Deoxyribose-phosphate aldolase 4.1.2.5 L-threonine aldolase @@ -5280,7 +5329,6 @@ 4.1.2.36 Lactate aldolase 4.1.2.38 Benzoin aldolase 4.1.2.40 Tagatose-bisphosphate aldolase -4.1.2.41 Vanillin synthase 4.1.2.42 D-threonine aldolase 4.1.2.43 3-hexulose-6-phosphate synthase 4.1.2.44 2,3-epoxybenzoyl-CoA dihydrolase @@ -5300,6 +5348,7 @@ 4.1.2.58 2-dehydro-3,6-dideoxy-6-sulfogluconate aldolase 4.1.2.59 Dihydroneopterin phosphate aldolase 4.1.2.60 Dihydroneopterin triphosphate aldolase +4.1.2.61 Feruloyl-CoA hydratase/lyase 4.1.2.n2 2-hydroxyphytanoyl-CoA lyase 4.1.3.1 Isocitrate lyase 4.1.3.3 N-acetylneuraminate lyase @@ -5411,7 +5460,7 @@ 4.2.1.84 Nitrile hydratase 4.2.1.85 Dimethylmaleate hydratase 4.2.1.87 Octopamine dehydratase -4.2.1.88 (R)-synephrine +4.2.1.88 Synephrine dehydratase 4.2.1.90 L-rhamnonate dehydratase 4.2.1.91 Arogenate dehydratase 4.2.1.92 Hydroperoxide dehydratase @@ -5522,6 +5571,7 @@ 4.2.2.24 Rhamnogalacturonan exolyase 4.2.2.25 Gellan lyase 4.2.2.26 Oligo-alginate lyase +4.2.2.27 Pectin monosaccharide-lyase 4.2.2.n1 Peptidoglycan lytic exotransglycosylase 4.2.2.n2 Peptidoglycan lytic endotransglycosylase 4.2.3.1 Threonine synthase @@ -5656,7 +5706,7 @@ 4.2.3.131 Miltiradiene synthase 4.2.3.132 Neoabietadiene synthase 4.2.3.133 Alpha-copaene synthase -4.2.3.134 5-phosphonooxy-L-lysine phospho-lyase +4.2.3.134 5-phosphooxy-L-lysine phospho-lyase 4.2.3.135 Delta(6)-protoilludene synthase 4.2.3.136 Alpha-isocomene synthase 4.2.3.137 (E)-2-epi-beta-caryophyllene synthase @@ -5835,6 +5885,8 @@ 4.6.1.21 Enterobacter ribonuclease 4.6.1.22 Bacillus subtilis ribonuclease 4.6.1.23 Ribotoxin +4.6.1.24 Ribonuclease T(1) +4.6.1.25 Bacteriophage T(4) restriction endoribonuclease RegB 4.7.1.1 Alpha-D-ribose 1-methylphosphonate 5-phosphate C-P-lyase 4.99.1.1 Protoporphyrin ferrochelatase 4.99.1.2 Alkylmercury lyase @@ -5966,7 +6018,7 @@ 5.3.1.29 Ribose 1,5-bisphosphate isomerase 5.3.1.30 5-deoxy-glucuronate isomerase 5.3.1.31 Sulfoquinovose isomerase -5.3.1.32 (4S)-4-hydroxy-5-phosphonooxypentane-2,3-dione isomerase +5.3.1.32 (4S)-4-hydroxy-5-phosphooxypentane-2,3-dione isomerase 5.3.1.33 L-erythrulose 1-phosphate isomerase 5.3.1.34 D-erythrulose 4-phosphate isomerase 5.3.1.35 2-dehydrotetronate isomerase @@ -6138,6 +6190,8 @@ 5.5.1.31 Hapalindole H synthase 5.5.1.32 12-epi-hapalindole U synthase 5.5.1.33 12-epi-fischerindole U synthase +5.5.1.34 (+)-cis,trans-nepetalactol synthase +5.5.1.35 (+)-cis,cis-nepetalactol synthase 5.6.1.1 Microtubule-severing ATPase 5.6.1.2 Dynein ATPase 5.6.1.3 Plus-end-directed kinesin ATPase @@ -6237,6 +6291,9 @@ 6.2.1.58 Isophthalate--CoA ligase 6.2.1.59 Long-chain fatty acid adenylase/transferase FadD26 6.2.1.60 Marinolic acid--CoA ligase +6.2.1.61 Salicylate--[aryl-carrier protein] ligase +6.2.1.62 3,4-dihydroxybenzoate--[aryl-carrier protein] ligase +6.2.1.63 L-arginine--[L-arginyl-carrier protein] ligase 6.2.1.n2 Amino acid--[acyl-carrier-protein] ligase 6.2.1.n3 Malonate--CoA ligase 6.3.1.1 Aspartate--ammonia ligase @@ -6293,7 +6350,7 @@ 6.3.2.40 Cyclopeptine synthase 6.3.2.41 N-acetylaspartylglutamate synthase 6.3.2.42 N-acetylaspartylglutamylglutamate synthase -6.3.2.43 [Amino group carrier protein]--L-2-aminoadipate ligase +6.3.2.43 [Amino-group carrier protein]--L-2-aminoadipate ligase 6.3.2.44 Pantoate--beta-alanine ligase (ADP-forming) 6.3.2.45 UDP-N-acetylmuramate L-alanyl-gamma-D-glutamyl-meso-2,6-diaminoheptanedioate ligase 6.3.2.46 Fumarate--(S)-2,3-diaminopropanoate ligase @@ -6302,7 +6359,7 @@ 6.3.2.49 L-alanine--L-anticapsin ligase 6.3.2.50 Tenuazonic acid synthetase 6.3.2.51 Phosphopantothenate--cysteine ligase (ATP) -6.3.2.52 Jasmonoyl--L-amino acid synthetase +6.3.2.52 Jasmonoyl--L-amino acid ligase 6.3.2.53 UDP-N-acetylmuramoyl-L-alanine--L-glutamate ligase 6.3.2.54 L-2,3-diaminopropanoate--citrate ligase 6.3.2.55 2-((L-alanin-3-ylcarbamoyl)methyl)-3-(2-aminoethylcarbamoyl)-2-hydroxypropanoate synthase @@ -6379,6 +6436,7 @@ 7.1.1.6 Plastoquinol--plastocyanin reductase 7.1.1.7 Ubiquinol oxidase (electrogenic, proton-motive force generating) 7.1.1.8 Quinol--cytochrome-c reductase +7.1.1.9 Cytochrome-c oxidase 7.1.2.1 P-type H(+)-exporting transporter 7.1.2.2 H(+)-transporting two-sector ATPase 7.1.3.1 H(+)-exporting diphosphatase @@ -6430,6 +6488,7 @@ 7.4.2.10 ABC-type glutathione transporter 7.4.2.11 ABC-type methionine transporter 7.4.2.12 ABC-type cystine transporter +7.4.2.13 ABC-type tyrosine transporter 7.5.2.1 ABC-type maltose transporter 7.5.2.2 ABC-type oligosaccharide transporter 7.5.2.3 ABC-type beta-glucan transporter diff --git a/c++/src/objects/seqfeat/gc.inc b/c++/src/objects/seqfeat/gc.inc index 5acdcaa6..2ceaed2d 100644 --- a/c++/src/objects/seqfeat/gc.inc +++ b/c++/src/objects/seqfeat/gc.inc @@ -1,4 +1,4 @@ -/* $Id: gc.inc 585639 2019-05-01 19:41:04Z fukanchi $ +/* $Id: gc.inc 610069 2020-06-10 17:10:47Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -145,7 +145,7 @@ static const char* const s_GenCodeTblMemStr[] = { " sncbieaa \"--*-------**--*-----------------M--M---------------M------------\"", " } ,", " {", - " name \"Pterobranchia Mitochondrial\" ,", + " name \"Rhabdopleuridae Mitochondrial\" ,", " id 24 ,", " ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG\",", " sncbieaa \"---M------**-------M---------------M---------------M------------\"", diff --git a/c++/src/objects/seqfeat/gc.prt b/c++/src/objects/seqfeat/gc.prt index 6738f01d..ffb786f6 100644 --- a/c++/src/objects/seqfeat/gc.prt +++ b/c++/src/objects/seqfeat/gc.prt @@ -6,6 +6,9 @@ -- readability at the suggestion of Peter Rice, EMBL -- Later additions by Taxonomy Group staff at NCBI -- +-- Version 4.6 +-- Renamed genetic code 24 to Rhabdopleuridae Mitochondrial +-- -- Version 4.5 -- Added Cephalodiscidae mitochondrial genetic code 33 -- @@ -263,7 +266,7 @@ Genetic-code-table ::= { -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG } , { - name "Pterobranchia Mitochondrial" , + name "Rhabdopleuridae Mitochondrial" , id 24 , ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", sncbieaa "---M------**-------M---------------M---------------M------------" diff --git a/c++/src/objects/seqfeat/institution_codes.inc b/c++/src/objects/seqfeat/institution_codes.inc index 9a2d161b..b67fe886 100644 --- a/c++/src/objects/seqfeat/institution_codes.inc +++ b/c++/src/objects/seqfeat/institution_codes.inc @@ -1,4 +1,4 @@ -/* $Id: institution_codes.inc 607542 2020-05-05 14:51:12Z ivanov $ +/* $Id: institution_codes.inc 616908 2020-09-22 18:24:46Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -31,7 +31,7 @@ */ static const char* const kInstitutionCollectionCodeList[] = { -"# $Id: institution_codes.inc 607542 2020-05-05 14:51:12Z ivanov $", +"# $Id: institution_codes.inc 616908 2020-09-22 18:24:46Z ivanov $", "A\ts\tArnold Arboretum, Harvard University\t\t\t", "AA\ts\tMinistry of Science, Academy of Sciences\t\t\t", "AAC\tc\tArignar Anna College\t\t\t", @@ -74,11 +74,11 @@ static const char* const kInstitutionCollectionCodeList[] = { "ABTRI\tc\tApex Biotechnology Training and Research Institute\t\t\t", "ABU\ts\tAhmadu Bello University Herbarium\t\t\t", "AC\ts\tAmherst College\t\t\t", -"ACA\ts\tAgricultural University of Athens\t\t\t", "ACA-DC\tc\tGreek Coordinated Collections of Microorganisms\t\t\t", "ACAD\ts\tAcadia University, K. C. Irving Environmental Science Centre & Harriet Irving Botanical Gardens\t\t\t", "ACAD\tsb\tAustralian Centre for Ancient DNA\t\t\t", "ACAM\tc\tThe Australian Collection of Antarctic Microorganisms, Cooperative Research Center for the Antarctic and Southern Ocean Environment\t\t\t", +"ACAM\ts\tAgricultural University of Athens\tACA\t\t", "ACAP\ts\tAquaculture Center of Aomori Prefecture\t\t\t", "ACBC\ts\tAgriculture Canada Research Station\t\t\t", "ACBR\tc\tAustrian Center of Biological Resources and Applied Mycology\t\t\t", @@ -465,7 +465,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "BCCM/ITM\tc\tBelgian Coordinated Collections of Microorganisms / ITM Mycobacteria Collection\tITM\t\t", "BCCM/LMG\tc\tBelgian Coordinated Collections of Microorganisms/ LMG Bacteria Collection\tLMG\thttp://bccm.belspo.be/catalogues/lmg-strain-details?NUM=\t", "BCCM/MUCL\tc\tBelgian Coordinated Collections of Microorganisms / MUCL Agro-food & Environmental Fungal Collection\tMUCL\t\t", -"BCCM/ULC\tc\tBelgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection\tULC\thttp://bccm.belspo.be/catalogues/ulc-strain-details?ACCESSION_NUMBER=\t", +"BCCM/ULC\tc\tBelgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection\tULC\thttps://bccm.belspo.be/catalogues/bm-details?accession_number=\t", "BCCN\tc\tBrucella Culture Collection\t\t\t", "BCCUSP\tc\tBrazilian Cyanobacteria Collection - University of Sao Paulo\t\t\t", "BCF\ts\tUniversitat de Barcelona, Laboratori de Botanica\t\t\t", @@ -682,7 +682,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "BPBM\ts\tBernice P. Bishop Museum\tBISHOP\t\t", "BPBM:Fish\ts\tBernice P. Bishop Museum, Fish Collection\t\t", "BPBM:IZ\ts\tBernice P. Bishop Museum, Invertebrate Zoology\t\t", -"BPI\tsc\tU.S. National Fungus Collections, Systematic Botany and Mycology Laboratory\t\thttp://nt.ars-grin.gov/fungaldatabases/specimens/new_rptSpecimenOneRec.cfm?thisrec=BPI+&spec;\t", +"BPI\tsc\tU.S. National Fungus Collections, Systematic Botany and Mycology Laboratory\t\t\t", "BPI\ts\tBernard Price Institute for Palaeontological Research\t\t\t", "BPIC\tc\tBenaki Phytopathological Institute Collection\t\t\t", "BPL\ts\tMuseum of Barnstaple & North Devon\t\t\t", @@ -972,6 +972,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "CCM\tc\tCzech Collection of Microorganisms\tCCM\t\t", "CCM-A\tc\tColeccion de Cultivos Microbianos\t\t\t", "CCM-CIBE\tc\tEscuela Superior Politecnica del Litoral\t\t\t", +"CCM-UFV\tc\tCollection of Cyanobacteria and Microalgae at the Universidade Federal de Vicosa\t\t\t", "CCM\ts\tChangchun College of Traditional Chinese Medicine, Department of Chinese Materia Medica\t\t\t", "CCM\ts\tCarter County Museum\t\t\t", "CCMA-UFSCar\tc\tCulture Collection of Freshwater Microalgae\t\t\t", @@ -1076,7 +1077,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "CGEC\ts\tChina Entomological Research Institute\t\t\t", "CGG\ts\tCambridge University Botanic Garden\t\t\t", "CGH\ts\tNational Museum of Prague\t\t\t", -"CGMCC\tc\tChina General Microbiological Culture Collection Center\tAS\t\t", +"CGMCC\tc\tChina General Microbiological Culture Collection Center\tAS\thttp://www.cgmcc.net/english/cata.php?stn=CGMCC%20\t", "CGMS\ts\tUniversidade Federal de Mato Grosso do Sul, Departamento de Biologia\t\t\t", "CGN\ts\tCentre for Genetic Resources, The Netherlands\t\t\t", "CGRIS\tb\tChinese Crop Germplasm Resources Information Network\t\t\t", @@ -1098,6 +1099,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "CHELB\ts\tCheltenham College for Boys\t\t\t", "CHEP\ts\tEscuela Superior Politecnica del Chimborazo\t\t\t", "CHER\ts\tYu. Fedcovich Chernivtsi State University, Botany Department\t\t\t", +"CHFC-EA\tsc\tChilean Fungal Collection\tCHFC,ChFC\t\t", "CHFD\ts\tChelmsford and Essex Museum\t\t\t", "CHI\ts\tUniversity of Illinois, Biological Sciences Department\t\t\t", "CHIA\ts\tNational Chiayi Agricultural College, Forestry Department\t\t\t", @@ -1139,7 +1141,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "CIBM\ts\tCentro Invest. Biol. Noroeste\t\t\t", "CIC\ts\tAlbertson College of Idaho, Biology Department\t\t\t", "CICC\tc\tChina Center for Industrial Culture Collection\t\t\t", -"CICCM\tc\tCawthron Institute Culture Collection of Micro-algae\t\t\t", +"CICCM\tc\tCawthron Institute Culture Collection of Micro-algae\tCAWD\t\t", "CICESE\ts\tCentro de Investigacion Cientifica y de Educacion Superior de Ensenada\t\t\t", "CICIM\tc\tCulture and Information Centre of Industrial Microorganisms of China's Univeristies\t\t\t", "CICIMAR\ts\tCentro Interdisciplinario de Ciencias Marinas\t\t\t", @@ -1422,6 +1424,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "CSAT\ts\tColegio de Postgraduados, Campus Tabasco\t\t\t", "CSAU\ts\tNational Agrarian University, Southern Branch \"Crimean Agrotechnological University\", Department of Botany, Plant Physiology and Genetics\t\t\t", "CSB\ts\tSt. John's University/College of Saint Benedict, Biology Department\t\t\t", +"CSBD\ts\tCentre for Study of Biological Diversity\t\t\t", "CSC\ts\tColegio del Sagrado Corazon\t\t\t", "CSC-CLCH\tc\tCentro Substrati Cellulari, Cell Lines Collection and Hybridomas\t\t\t", "CSCA\ts\tCalifornia State Collection of Arthropods\t\t\t", @@ -1474,6 +1477,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "CTNRC\ts\tThai National Reference Collections\t\t\t", "CTR\ts\tCharles T. Ramsden historical collection\t\t\t", "CTS\ts\tChongqing Teachers College\t\t\t", +"CTUA\ts\tColección Teriológica de la Universidad de Antioquia\t\t\t", "CTY\ts\tCanterbury Literary and Philosophical Institution\t\t\t", "CU\tsb\tCornell University\t\t\t", "CUAC\ts\tClemson University\t\t\t", @@ -1658,6 +1662,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "DKG\ts\tJuniper Hall Field Centre\t\t\t", "DLF\ts\tStetson University, Biology Department\t\t\t", "DLU\ts\tDa Lat University\t\t\t", +"DLUCC\tc\tDali University Culture Collection\t\t\t", "DLY\ts\tDudley and Midland Geological and Scientific Society and Field Club\t\t\t", "DM\ts\tDominion Museum\t\t\t", "DM\ts\tThe Dinosaur Museum\t\t\t", @@ -1821,6 +1826,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "ECK\ts\tBuffalo State College\t\t\t", "ECM\ts\tHubei College of Traditional Chinese Medicine, Department of Chinese Materia Medica\t\t\t", "ECNB\ts\tEscuela Nacional Ciencias\t\t\t", +"ECNU\ts\tMuseum of Biology, East China Normal University, School of Life Sciences\t\t\t", "ECOCHM\ts\tColeccion de Mamiferos del Museo de Zoologia-ECOSUR\t\t\t", "ECOL\ts\tCollection du Laborataire d'Ecologie\t\t\t", "ECOMAR\ts\tECOMAR lab University of Reunion\t\t\t", @@ -1844,7 +1850,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "EELM\ts\tEstacion Experimental Agricola de la Molina\t\t\t", "EERU\ts\tEconomic Entomology Research Unit\t\t\t", "EFC\ts\tEscola de Florestas\t\t\t", -"EFCC\ts\tEpping Forest Conservation Centre\t\t\t", +"EFCC\tc\tEntomopathogenic Fungal Collection\t\t\t", "EFH\ts\tForestry Commission\t\t\t", "EFM\ts\tEpping Forest Museum, Corporation of London\t\t\t", "EFWM\ts\tDepartment of Entomology\t\t\t", @@ -1971,7 +1977,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "F\ts\tField Museum of Natural History, Botany Department\tFMNH:F\t\t", "FAA\ts\tUniversidad Nacional del Centro de la Provincia de Buenos Aires\t\t\t", "FABR\ts\tHarmas de J. H. Fabre\t\t\t", -"FACHB\tc\tFreshwater Algae Culture Collection\t\t\t", +"FACHB\tc\tFreshwater Algae Culture Collection\tCHAB \t\t", "FACS\ts\tFujian Agricultural College\t\t\t", "FAK\ts\tDepartment of Fisheries, Faculty of Agriculture\t\t\t", "FAKOU\ts\tFaculty of Agriculture, Kochi Univerisity\t\t\t", @@ -3710,6 +3716,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "LECB\ts\tSaint Petersburg State University, Botany Department\t\t\t", "LEDLIE\ts\tPatricia Ledlie Herbarium\t\t\t", "LEF\ts\tEconomic Forestry Institute of Liaoning Province\t\t\t", +"LEGEcc\tc\tBlue Biotechnology and Ecotoxicology Culture Collection\t\t\t", "LEH\ts\tLehigh University\t\t\t", "LEI\ts\tLeicester Literary and Philosophical Society\t\t\t", "LEISHCRYOBANK\tc\tInternational Cryobank of Leishmania\t\t\t", @@ -3961,7 +3968,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "MADM\ts\tMuseu Municipal do Funchal\t\t\t", "MADS\ts\tMuseu de Historia Natural do Seminario do Funchal\t\t\t", "MAF\ts\tUniversidad Complutense, Departamento de Biologia Vegetal II\t\t\t", -"MAFF\tc\tMAFF Genebank, Ministry of Agriculture Forestry and Fisheries\t\t\t", +"MAFF\tc\tMAFF Genebank, Ministry of Agriculture Forestry and Fisheries\t\thttps://www.gene.affrc.go.jp/databases-micro_search_detail_en.php?maff=\t", "MAFF\ts\tColo-i-Suva Silvicultural Station\t\t\t", "MAFI\ts\tMagyar Allami Foeldtani Intezet, Budapest - Hungarian Geological Survey\t\t\t", "MAFST\ts\tInstituto Forestal de la Moncloa\t\t\t", @@ -4152,6 +4159,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "MDTN\ts\tMiddleton Botanical Society\t\t\t", "MDUG\ts\tUniversidad Guanajuato, Museo Alfredo Duges\t\t\t", "MDZAU\ts\tMuseum Deptartment of Zoology\t\t\t", +"MEAN\tc\tMicoteca da Estacao Agronomica Nacional\t\t\t", "MECB\ts\tUniversidade Federal de Pelotas, Museu Entomologico Ceslau Biezanko\t\t\t", "MECG\ts\tMedical Entomology Collection Gallery\t\t\t", "MECN\ts\tMuseo Ecuadoriano de Ciencias Naturales\tDHMECN\t\t", @@ -4616,6 +4624,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "MU\tc\tMugla Sitki Kocman University\t\t\t", "MU\ts\tMiami University, Botany Department, Willard Sherman Turrell Herbarium\t\thttp://herbarium.muohio.edu/herbariummu/\t", "MU\ts\tMidwestern University\t\t\t", +"MUA-AVP\ts\tMuseo Universitario de la Universidad de Antioquia\t\t\t", "MUACC\tc\tMurdoch University Algal Culture Collection\t\t\t", "MUAF\tc\tCulture collection of Mendel University of Agriculture and Forestry in Brno\t\t\t", "MUAP\ts\tMuseo del Mar Universidad Arturo Prat\t\t\t", @@ -4710,6 +4719,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "MZCR\ts\tMuseo de Zoologia\t\t\t", "MZFC\ts\tMuseo de Zoologia \"Alfonso L. Herrera\"\t\t\t", "MZFN\ts\tMuseo Zoologico dell'Universita \"Federico II\"\t\t\t", +"MZFS-DAR\ts\tMuseu de Zoologia da Universidade Estadual de Feira de Santana\t\t\t", "MZGZ\ts\tMuseum Zoologia del Giardino Zoologico\t\t\t", "MZH\ts\tZoolgical Museum, Finnish Museum of Natural History\t\t\t", "MZKI\tc\tMicrobial Culture Collection of National Institute of Chemistry\t\t\t", @@ -5000,6 +5010,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "NLU\ts\tUniversity of Louisiana at Monroe, Museum of Natural History\t\t\t", "NLUH\ts\tUniversity of the Philippines College Baguio\t\t\t", "NM\ts\tNorthern Michigan University, Biology Department\t\t\t", +"NMA\ts\tNational Museum Australia -\t\t\t", "NMAC\ts\tInner Mongolia Agricultural University, Department of Pratacultural Science\t\t\t", "NMAG\ts\tNaturhistorisches Museum, Augsburg\t\t\t", "NMB\tc\tNingbo Marine Biotechnology\t\t\t", @@ -5167,10 +5178,11 @@ static const char* const kInstitutionCollectionCodeList[] = { "NRC\ts\tNational Research Centre\t\t\t", "NRCC\ts\tNational Research Council of Canada\t\t\t", "NRCS\tc\tNational Reference Center for Streptococci in Aachen\t\t\t", +"NRI\ts\tTexas A&M Natural Resources Institute\t\t\t", "NRIBAS\ts\tNational Research Institute of Biology, Academia Sinica\t\t\t", "NRIC\tc\tNODAI Research Institute Culture Collection\t\t\t", "NRL\tc\tNeisseria Reference Laboratory\t\t\t", -"NRM\ts\tSwedish Museum of Natural History\t\t\t", +"NRM\ts\tSwedish Museum of Natural History\tSMNH\t\t", "NRN\ts\tNairn Literary Society Library, Public Library\t\t\t", "NRNZ\ts\tNorthland Regional Museum\t\t\t", "NRPSU\tc\tDepartment of Agro-industry, Faculty of Natural Resources\t\t\t", @@ -5220,9 +5232,10 @@ static const char* const kInstitutionCollectionCodeList[] = { "NTOU\tsc\tInstitute of Marine Biology, National Taiwan Ocean University\t\t\t", "NTS\ts\tNevada Operations Office, U.S. Department of Energy\t\t\t", "NTSC\ts\tUniversity of North Texas, Biological Sciences Department\t\t\t", +"NTUCC\tc\tPlant Pathology and Microbiology, National Taiwan University Culture Collection\t\t\t", "NTUF\ts\tNational Taiwan University, Forestry Department\t\t\t", -"NTUM\ts\tNational Taiwan University\t\t\t", -"NTUMA\ts\tNational Taiwan University\t\t\t", +"NTUH\ts\tHerbarium of the Department of Plant Pathology and Microbiology, National Taiwan University\t\t\t", +"NTUM\ts\tNational Taiwan University Museum\t\t\t", "NU\tc\tDepartment of Microbiology, Faculty of Science\t\t\t", "NU\ts\tUniversity of Natal, School of Botany and Zoology\t\t\t", "NUA\tc\tDepartment of Microbiology, National University of Athens\t\t\t", @@ -7165,7 +7178,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "UHCC\tc\tUniversity of Helsinki Cyanobacteria Culture Collection\t\t\t", "UHI\ts\tUssishkin House, Botany Department\t\t\t", "UHM\ts\tManoa, College of Tropical Agriculture, Department of Entomology\t\t\t", -"UI\ts\tUniversity of Ibadan\t\t\t", +"UI\tsc\tUniversity of Ibadan\t\t\t", "UI\ts\tBureau of Land Management (Uinta Herbarium)\t\t\t", "UICC\tc\tUniversity of Indonesia Culture Collection\t\t\t", "UIDA\ts\tUniversity of Idaho, Bird and Mammal Museum\t\t\t", @@ -7358,7 +7371,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "UPEI\ts\tUniversity of Prince Edward Island, Biology Department\t\t\t", "UPF\ts\tUniversite de Polynesie Francaise Herbarium\t\t\t", "UPIE\tb\tUnidad de Patologia Infecciosa y Epidemiologia\t\t\t", -"UPLB\ts\tMuseum of Natural History, University of the Philippines\t\t\t", +"UPLB\ts\tUniversity of Philippines Los Banos\t\t\t", "UPM\ts\tDepartement des Siences de la Terre\t\t\t", "UPM\ts\tUniversiti Pertanian Malaysia, Biology Department\t\t\t", "UPM\ts\tUdory Paleontological Museum\t\t\t", @@ -7652,7 +7665,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "VPCI\tc\tFungal Culture Collection\t\t\t", "VPH\ts\tVan Pharmaceutical Herbarium, Yuzuncu Yil University\t\t\t", "VPI\tsc\tVirginia Polytechnic Institute and State University\tVTMH\t\t", -"VPI:F\ts\tVirginia Polytechnic Institute and State University, Fungal Collection\t", +"VPI:F\ts\tVirginia Polytechnic Institute and State University, Fungal Collection\t\t", "VPIC\ts\tVirginia Polytechnic Institute and State University\t\t\t", "VPIMM\ts\tVirginia Polytechnic University, Mammal Museum\t\t\t", "VPM\ts\tVolgograd Provincial Museum\t\t\t", @@ -7747,6 +7760,7 @@ static const char* const kInstitutionCollectionCodeList[] = { "WFPL\tc\tWestern Forest Products Laboratory\t\t\t", "WFU\ts\tWake Forest University, Biology Department\t\t\t", "WFUVC\ts\tWake Forest University, Vertebrate Collection\t\t\t", +"WFVZ\ts\tWestern Foundation of Vertebrate Zoology\t\t\t", "WGC\ts\tState University of West Georgia, Biology Department\t\t\t", "WGCH\ts\tWilton Garden Club\t\t\t", "WGD\ts\tWashington Game Department\t\t\t", diff --git a/c++/src/objects/seqfeat/institution_codes.txt b/c++/src/objects/seqfeat/institution_codes.txt index 03d296c1..e63a4495 100644 --- a/c++/src/objects/seqfeat/institution_codes.txt +++ b/c++/src/objects/seqfeat/institution_codes.txt @@ -1,4 +1,4 @@ -# $Id: institution_codes.txt 607542 2020-05-05 14:51:12Z ivanov $ +# $Id: institution_codes.txt 616908 2020-09-22 18:24:46Z ivanov $ A s Arnold Arboretum, Harvard University AA s Ministry of Science, Academy of Sciences AAC c Arignar Anna College @@ -41,11 +41,11 @@ ABTC s Australian Biological Tissue Collection, South Australian Museum SAMA:ABT ABTRI c Apex Biotechnology Training and Research Institute ABU s Ahmadu Bello University Herbarium AC s Amherst College -ACA s Agricultural University of Athens ACA-DC c Greek Coordinated Collections of Microorganisms ACAD s Acadia University, K. C. Irving Environmental Science Centre & Harriet Irving Botanical Gardens ACAD sb Australian Centre for Ancient DNA ACAM c The Australian Collection of Antarctic Microorganisms, Cooperative Research Center for the Antarctic and Southern Ocean Environment +ACAM s Agricultural University of Athens ACA ACAP s Aquaculture Center of Aomori Prefecture ACBC s Agriculture Canada Research Station ACBR c Austrian Center of Biological Resources and Applied Mycology @@ -432,7 +432,7 @@ BCCM/IHEM c Belgian Coordinated Collections of Microorganisms / IHEM Fungi colle BCCM/ITM c Belgian Coordinated Collections of Microorganisms / ITM Mycobacteria Collection ITM BCCM/LMG c Belgian Coordinated Collections of Microorganisms/ LMG Bacteria Collection LMG http://bccm.belspo.be/catalogues/lmg-strain-details?NUM= BCCM/MUCL c Belgian Coordinated Collections of Microorganisms / MUCL Agro-food & Environmental Fungal Collection MUCL -BCCM/ULC c Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection ULC http://bccm.belspo.be/catalogues/ulc-strain-details?ACCESSION_NUMBER= +BCCM/ULC c Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection ULC https://bccm.belspo.be/catalogues/bm-details?accession_number= BCCN c Brucella Culture Collection BCCUSP c Brazilian Cyanobacteria Collection - University of Sao Paulo BCF s Universitat de Barcelona, Laboratori de Botanica @@ -649,7 +649,7 @@ BP s Hungarian Natural History Museum, Botanical Department BPBM s Bernice P. Bishop Museum BISHOP BPBM:Fish s Bernice P. Bishop Museum, Fish Collection BPBM:IZ s Bernice P. Bishop Museum, Invertebrate Zoology -BPI sc U.S. National Fungus Collections, Systematic Botany and Mycology Laboratory http://nt.ars-grin.gov/fungaldatabases/specimens/new_rptSpecimenOneRec.cfm?thisrec=BPI+&spec; +BPI sc U.S. National Fungus Collections, Systematic Botany and Mycology Laboratory BPI s Bernard Price Institute for Palaeontological Research BPIC c Benaki Phytopathological Institute Collection BPL s Museum of Barnstaple & North Devon @@ -939,6 +939,7 @@ CCIM c Culture Collection of Industrial Microorganisms CCM c Czech Collection of Microorganisms CCM CCM-A c Coleccion de Cultivos Microbianos CCM-CIBE c Escuela Superior Politecnica del Litoral +CCM-UFV c Collection of Cyanobacteria and Microalgae at the Universidade Federal de Vicosa CCM s Changchun College of Traditional Chinese Medicine, Department of Chinese Materia Medica CCM s Carter County Museum CCMA-UFSCar c Culture Collection of Freshwater Microalgae @@ -1043,7 +1044,7 @@ CGE s Cambridge University, Department of Plant Sciences CGEC s China Entomological Research Institute CGG s Cambridge University Botanic Garden CGH s National Museum of Prague -CGMCC c China General Microbiological Culture Collection Center AS +CGMCC c China General Microbiological Culture Collection Center AS http://www.cgmcc.net/english/cata.php?stn=CGMCC%20 CGMS s Universidade Federal de Mato Grosso do Sul, Departamento de Biologia CGN s Centre for Genetic Resources, The Netherlands CGRIS b Chinese Crop Germplasm Resources Information Network @@ -1065,6 +1066,7 @@ CHEL s Chelsea Physic Garden CHELB s Cheltenham College for Boys CHEP s Escuela Superior Politecnica del Chimborazo CHER s Yu. Fedcovich Chernivtsi State University, Botany Department +CHFC-EA sc Chilean Fungal Collection CHFC,ChFC CHFD s Chelmsford and Essex Museum CHI s University of Illinois, Biological Sciences Department CHIA s National Chiayi Agricultural College, Forestry Department @@ -1106,7 +1108,7 @@ CIBIO s Centro de Investigacao em Biodiversidade e Recursos Geneticos CIBM s Centro Invest. Biol. Noroeste CIC s Albertson College of Idaho, Biology Department CICC c China Center for Industrial Culture Collection -CICCM c Cawthron Institute Culture Collection of Micro-algae +CICCM c Cawthron Institute Culture Collection of Micro-algae CAWD CICESE s Centro de Investigacion Cientifica y de Educacion Superior de Ensenada CICIM c Culture and Information Centre of Industrial Microorganisms of China's Univeristies CICIMAR s Centro Interdisciplinario de Ciencias Marinas @@ -1389,6 +1391,7 @@ CSAEG c Culture Collection of Phytopathogenic Fungi at the Colegio Superior Agr CSAT s Colegio de Postgraduados, Campus Tabasco CSAU s National Agrarian University, Southern Branch "Crimean Agrotechnological University", Department of Botany, Plant Physiology and Genetics CSB s St. John's University/College of Saint Benedict, Biology Department +CSBD s Centre for Study of Biological Diversity CSC s Colegio del Sagrado Corazon CSC-CLCH c Centro Substrati Cellulari, Cell Lines Collection and Hybridomas CSCA s California State Collection of Arthropods @@ -1441,6 +1444,7 @@ CTN s Free Library and Museum CTNRC s Thai National Reference Collections CTR s Charles T. Ramsden historical collection CTS s Chongqing Teachers College +CTUA s Colección Teriológica de la Universidad de Antioquia CTY s Canterbury Literary and Philosophical Institution CU sb Cornell University CUAC s Clemson University @@ -1625,6 +1629,7 @@ DIX s Dixie College, Natural History Museum DKG s Juniper Hall Field Centre DLF s Stetson University, Biology Department DLU s Da Lat University +DLUCC c Dali University Culture Collection DLY s Dudley and Midland Geological and Scientific Society and Field Club DM s Dominion Museum DM s The Dinosaur Museum @@ -1788,6 +1793,7 @@ ECH s Elmira College ECK s Buffalo State College ECM s Hubei College of Traditional Chinese Medicine, Department of Chinese Materia Medica ECNB s Escuela Nacional Ciencias +ECNU s Museum of Biology, East China Normal University, School of Life Sciences ECOCHM s Coleccion de Mamiferos del Museo de Zoologia-ECOSUR ECOL s Collection du Laborataire d'Ecologie ECOMAR s ECOMAR lab University of Reunion @@ -1811,7 +1817,7 @@ EEBP s Estacao Experimental de Biologia e Piscicultura de Pirassununga EELM s Estacion Experimental Agricola de la Molina EERU s Economic Entomology Research Unit EFC s Escola de Florestas -EFCC s Epping Forest Conservation Centre +EFCC c Entomopathogenic Fungal Collection EFH s Forestry Commission EFM s Epping Forest Museum, Corporation of London EFWM s Department of Entomology @@ -1938,7 +1944,7 @@ EXR s University of Exeter, Biological Sciences Department F s Field Museum of Natural History, Botany Department FMNH:F FAA s Universidad Nacional del Centro de la Provincia de Buenos Aires FABR s Harmas de J. H. Fabre -FACHB c Freshwater Algae Culture Collection +FACHB c Freshwater Algae Culture Collection CHAB FACS s Fujian Agricultural College FAK s Department of Fisheries, Faculty of Agriculture FAKOU s Faculty of Agriculture, Kochi Univerisity @@ -3677,6 +3683,7 @@ LEC s Universita degli Studi di Lecce, Dipartimento di Biologia LECB s Saint Petersburg State University, Botany Department LEDLIE s Patricia Ledlie Herbarium LEF s Economic Forestry Institute of Liaoning Province +LEGEcc c Blue Biotechnology and Ecotoxicology Culture Collection LEH s Lehigh University LEI s Leicester Literary and Philosophical Society LEISHCRYOBANK c International Cryobank of Leishmania @@ -3928,7 +3935,7 @@ MADJ s Jardim Botanico da Madeira MADM s Museu Municipal do Funchal MADS s Museu de Historia Natural do Seminario do Funchal MAF s Universidad Complutense, Departamento de Biologia Vegetal II -MAFF c MAFF Genebank, Ministry of Agriculture Forestry and Fisheries +MAFF c MAFF Genebank, Ministry of Agriculture Forestry and Fisheries https://www.gene.affrc.go.jp/databases-micro_search_detail_en.php?maff= MAFF s Colo-i-Suva Silvicultural Station MAFI s Magyar Allami Foeldtani Intezet, Budapest - Hungarian Geological Survey MAFST s Instituto Forestal de la Moncloa @@ -4119,6 +4126,7 @@ MDRG s Museum voor Dierkunde, Rijksuniversiteit MDTN s Middleton Botanical Society MDUG s Universidad Guanajuato, Museo Alfredo Duges MDZAU s Museum Deptartment of Zoology +MEAN c Micoteca da Estacao Agronomica Nacional MECB s Universidade Federal de Pelotas, Museu Entomologico Ceslau Biezanko MECG s Medical Entomology Collection Gallery MECN s Museo Ecuadoriano de Ciencias Naturales DHMECN @@ -4583,6 +4591,7 @@ MTUF s University Museum, Tokyo University of Fisheries MU c Mugla Sitki Kocman University MU s Miami University, Botany Department, Willard Sherman Turrell Herbarium http://herbarium.muohio.edu/herbariummu/ MU s Midwestern University +MUA-AVP s Museo Universitario de la Universidad de Antioquia MUACC c Murdoch University Algal Culture Collection MUAF c Culture collection of Mendel University of Agriculture and Forestry in Brno MUAP s Museo del Mar Universidad Arturo Prat @@ -4677,6 +4686,7 @@ MZCP s Universidade de Coimbra MZCR s Museo de Zoologia MZFC s Museo de Zoologia "Alfonso L. Herrera" MZFN s Museo Zoologico dell'Universita "Federico II" +MZFS-DAR s Museu de Zoologia da Universidade Estadual de Feira de Santana MZGZ s Museum Zoologia del Giardino Zoologico MZH s Zoolgical Museum, Finnish Museum of Natural History MZKI c Microbial Culture Collection of National Institute of Chemistry @@ -4967,6 +4977,7 @@ NLSN s Notre Dame University, Biological Sciences Department NLU s University of Louisiana at Monroe, Museum of Natural History NLUH s University of the Philippines College Baguio NM s Northern Michigan University, Biology Department +NMA s National Museum Australia - NMAC s Inner Mongolia Agricultural University, Department of Pratacultural Science NMAG s Naturhistorisches Museum, Augsburg NMB c Ningbo Marine Biotechnology @@ -5134,10 +5145,11 @@ NRC c Division of Biological Sciences, National Research Council of Canada NRC s National Research Centre NRCC s National Research Council of Canada NRCS c National Reference Center for Streptococci in Aachen +NRI s Texas A&M Natural Resources Institute NRIBAS s National Research Institute of Biology, Academia Sinica NRIC c NODAI Research Institute Culture Collection NRL c Neisseria Reference Laboratory -NRM s Swedish Museum of Natural History +NRM s Swedish Museum of Natural History SMNH NRN s Nairn Literary Society Library, Public Library NRNZ s Northland Regional Museum NRPSU c Department of Agro-industry, Faculty of Natural Resources @@ -5187,9 +5199,10 @@ NTNU-VM s Norwegian University of Science and Technology, Museum of Natural Hist NTOU sc Institute of Marine Biology, National Taiwan Ocean University NTS s Nevada Operations Office, U.S. Department of Energy NTSC s University of North Texas, Biological Sciences Department +NTUCC c Plant Pathology and Microbiology, National Taiwan University Culture Collection NTUF s National Taiwan University, Forestry Department -NTUM s National Taiwan University -NTUMA s National Taiwan University +NTUH s Herbarium of the Department of Plant Pathology and Microbiology, National Taiwan University +NTUM s National Taiwan University Museum NU c Department of Microbiology, Faculty of Science NU s University of Natal, School of Botany and Zoology NUA c Department of Microbiology, National University of Athens @@ -7132,7 +7145,7 @@ UH s University of Hawaii UHCC c University of Helsinki Cyanobacteria Culture Collection UHI s Ussishkin House, Botany Department UHM s Manoa, College of Tropical Agriculture, Department of Entomology -UI s University of Ibadan +UI sc University of Ibadan UI s Bureau of Land Management (Uinta Herbarium) UICC c University of Indonesia Culture Collection UIDA s University of Idaho, Bird and Mammal Museum @@ -7325,7 +7338,7 @@ UPCT s Universidad Politecnica De Cartagena UPEI s University of Prince Edward Island, Biology Department UPF s Universite de Polynesie Francaise Herbarium UPIE b Unidad de Patologia Infecciosa y Epidemiologia -UPLB s Museum of Natural History, University of the Philippines +UPLB s University of Philippines Los Banos UPM s Departement des Siences de la Terre UPM s Universiti Pertanian Malaysia, Biology Department UPM s Udory Paleontological Museum @@ -7619,7 +7632,7 @@ VPB c Veterinary Pathology and Bacteriology Collection VPCI c Fungal Culture Collection VPH s Van Pharmaceutical Herbarium, Yuzuncu Yil University VPI sc Virginia Polytechnic Institute and State University VTMH -VPI:F s Virginia Polytechnic Institute and State University, Fungal Collection +VPI:F s Virginia Polytechnic Institute and State University, Fungal Collection VPIC s Virginia Polytechnic Institute and State University VPIMM s Virginia Polytechnic University, Mammal Museum VPM s Volgograd Provincial Museum @@ -7714,6 +7727,7 @@ WFIS s Wagner Free Institute of Science WFPL c Western Forest Products Laboratory WFU s Wake Forest University, Biology Department WFUVC s Wake Forest University, Vertebrate Collection +WFVZ s Western Foundation of Vertebrate Zoology WGC s State University of West Georgia, Biology Department WGCH s Wilton Garden Club WGD s Washington Game Department diff --git a/c++/src/objects/seqfeat/lat_lon_country.inc b/c++/src/objects/seqfeat/lat_lon_country.inc index 4c7f464c..551edebd 100644 --- a/c++/src/objects/seqfeat/lat_lon_country.inc +++ b/c++/src/objects/seqfeat/lat_lon_country.inc @@ -1,4 +1,4 @@ -/* $Id: lat_lon_country.inc 599818 2020-01-07 20:09:07Z kans $ +/* $Id: lat_lon_country.inc 612552 2020-07-23 15:34:00Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE diff --git a/c++/src/objects/seqloc/Seq_id.cpp b/c++/src/objects/seqloc/Seq_id.cpp index 529e6315..861f3c0e 100644 --- a/c++/src/objects/seqloc/Seq_id.cpp +++ b/c++/src/objects/seqloc/Seq_id.cpp @@ -1,4 +1,4 @@ -/* $Id: Seq_id.cpp 603822 2020-03-17 17:37:01Z ivanov $ +/* $Id: Seq_id.cpp 617367 2020-09-30 12:57:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2276,8 +2276,10 @@ SIZE_TYPE CSeq_id::ParseIDs(CBioseq::TId& ids, const CTempString& s, E_Choice type = WhichInverseSeqId(fasta_pieces.front()); ETypeVariant tv; if (type == e_not_set) { - // unknown database are reported as 'general' - type = e_General; + if (fasta_pieces.size() == 2) { + // unknown database are reported as 'general' + type = e_General; + } tv = eTV_plain; } else { tv = x_IdentifyTypeVariant(type, fasta_pieces.front()); @@ -2306,8 +2308,19 @@ SIZE_TYPE CSeq_id::ParseIDs(CBioseq::TId& ids, const CTempString& s, ids.push_back(id); ++count; } catch (std::exception& e) { + if (fasta_pieces.empty()) { + throw; + } if ((flags & fParse_PartialOK) != 0) { ERR_POST_X(7, Warning << e.what()); + do { + auto l = fasta_pieces.front().size(); + if (l != 2 && l != 3) { + fasta_pieces.pop_front(); + } else { + break; + } + } while ( !fasta_pieces.empty() ); } else { throw; } diff --git a/c++/src/objects/seqloc/accguide.inc b/c++/src/objects/seqloc/accguide.inc index 2038a529..84b0dece 100644 --- a/c++/src/objects/seqloc/accguide.inc +++ b/c++/src/objects/seqloc/accguide.inc @@ -1,4 +1,4 @@ -/* $Id: accguide.inc 603797 2020-03-17 13:51:04Z ucko $ +/* $Id: accguide.inc 615212 2020-08-28 13:43:44Z ucko $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -31,7 +31,7 @@ */ static const char* const kBuiltInGuide[] = { - "# $Id: accguide.inc 603797 2020-03-17 13:51:04Z ucko $", + "# $Id: accguide.inc 615212 2020-08-28 13:43:44Z ucko $", "version 1 # of file format", "", "# three-letter-prefix protein accessions (traditionally with five digits)", @@ -1181,7 +1181,7 @@ static const char* const kBuiltInGuide[] = { "2+6 LQ embl_patent", "2+8 LQ embl_other_nuc", "2+10 LQ embl_other_nuc", - "2+6 LR embl_dirsub # embl_patent?", + "2+6 LR embl_dirsub * # embl_patent?", "2+8 LR embl_other_nuc", "2+10 LR embl_other_nuc", "2+6 LS embl_dirsub # embl_patent?", @@ -1220,7 +1220,7 @@ static const char* const kBuiltInGuide[] = { "2+6 MD ddbj_patent", "2+8 MD ddbj_other_nuc", "2+10 MD ddbj_other_nuc", - "2+6 ME ddbj_other_nuc", + "2+6 ME ddbj_patent", "2+8 ME ddbj_other_nuc", "2+10 ME ddbj_other_nuc", "2+6 MF gb_dirsub", @@ -1271,6 +1271,12 @@ static const char* const kBuiltInGuide[] = { "2+6 MU gb_con", "2+8 MU gb_other_nuc", "2+10 MU gb_other_nuc", + "2+6 MV gb_patent", + "2+8 MV gb_other_nuc", + "2+10 MV gb_other_nuc", + "2+6 MW gb_dirsub", + "2+8 MW gb_other_nuc", + "2+10 MW gb_other_nuc", "2+6 M? gb_other_nuc", "2+8 M? gb_other_nuc", "2+10 M? gb_other_nuc", @@ -3508,6 +3514,9 @@ static const char* const kBuiltInGuide[] = { "special LN901386-LN901412 embl_est", "", "# Nominally embl_dirsub.", + "special LR594708-LR594709 embl_tpa_nuc", + "", + "# Nominally embl_dirsub.", "special LT159851-LT159865 embl_est", "special LT548096-LT548244 embl_tpa_nuc", "special LT556286-LT558089 embl_est", @@ -3522,7 +3531,15 @@ static const char* const kBuiltInGuide[] = { "special OB000001-OB660024 embl_con", "", "# Some \"EMBL\" WGS nucleotide accessions are really third-party annotations.", - "special CAADVW000000000-CAADVX999999999 embl_tpa_wgs_nuc # 6+9", + "special CAADSF000000000-CAADSF999999999 embl_tpa_wgs_nuc # 6+9", + "special CAADSM000000000-CAAGJX999999999 embl_tpa_wgs_nuc # 6+9", + "special CAAGKD000000000-CAAGKQ999999999 embl_tpa_wgs_nuc # 6+9", + "special CAAGKS000000000-CAAGRI999999999 embl_tpa_wgs_nuc # 6+9", + "special CAAGRK000000000-CAAGSH999999999 embl_tpa_wgs_nuc # 6+9", + "special CAAHDL000000000-CAAHDL999999999 embl_tpa_wgs_nuc # 6+9", + "special CAAHDO000000000-CAAHFA999999999 embl_tpa_wgs_nuc # 6+9", + "special CADEPO000000000-CADEVH999999999 embl_tpa_wgs_nuc # 6+9", + "special CADEVJ000000000-CADFGZ999999999 embl_tpa_wgs_nuc # 6+9", "", "# Some \"EMBL\" 8-character protein accessions are really third-party", "# annotations.", diff --git a/c++/src/objects/seqloc/accguide.txt b/c++/src/objects/seqloc/accguide.txt index cc4b7633..7b6107ea 100644 --- a/c++/src/objects/seqloc/accguide.txt +++ b/c++/src/objects/seqloc/accguide.txt @@ -1,4 +1,4 @@ -# $Id: accguide.txt 603797 2020-03-17 13:51:04Z ucko $ +# $Id: accguide.txt 615212 2020-08-28 13:43:44Z ucko $ version 1 # of file format # three-letter-prefix protein accessions (traditionally with five digits) @@ -1148,7 +1148,7 @@ version 1 # of file format 2+6 LQ embl_patent 2+8 LQ embl_other_nuc 2+10 LQ embl_other_nuc -2+6 LR embl_dirsub # embl_patent? +2+6 LR embl_dirsub * # embl_patent? 2+8 LR embl_other_nuc 2+10 LR embl_other_nuc 2+6 LS embl_dirsub # embl_patent? @@ -1187,7 +1187,7 @@ version 1 # of file format 2+6 MD ddbj_patent 2+8 MD ddbj_other_nuc 2+10 MD ddbj_other_nuc -2+6 ME ddbj_other_nuc +2+6 ME ddbj_patent 2+8 ME ddbj_other_nuc 2+10 ME ddbj_other_nuc 2+6 MF gb_dirsub @@ -1238,6 +1238,12 @@ version 1 # of file format 2+6 MU gb_con 2+8 MU gb_other_nuc 2+10 MU gb_other_nuc +2+6 MV gb_patent +2+8 MV gb_other_nuc +2+10 MV gb_other_nuc +2+6 MW gb_dirsub +2+8 MW gb_other_nuc +2+10 MW gb_other_nuc 2+6 M? gb_other_nuc 2+8 M? gb_other_nuc 2+10 M? gb_other_nuc @@ -3474,6 +3480,9 @@ special LN898187-LN898198 embl_tpa_nuc special LN901194-LN901210 embl_tpa_nuc special LN901386-LN901412 embl_est +# Nominally embl_dirsub. +special LR594708-LR594709 embl_tpa_nuc + # Nominally embl_dirsub. special LT159851-LT159865 embl_est special LT548096-LT548244 embl_tpa_nuc @@ -3489,7 +3498,15 @@ special LT990249-LT990597 embl_tpa_nuc special OB000001-OB660024 embl_con # Some "EMBL" WGS nucleotide accessions are really third-party annotations. -special CAADVW000000000-CAADVX999999999 embl_tpa_wgs_nuc # 6+9 +special CAADSF000000000-CAADSF999999999 embl_tpa_wgs_nuc # 6+9 +special CAADSM000000000-CAAGJX999999999 embl_tpa_wgs_nuc # 6+9 +special CAAGKD000000000-CAAGKQ999999999 embl_tpa_wgs_nuc # 6+9 +special CAAGKS000000000-CAAGRI999999999 embl_tpa_wgs_nuc # 6+9 +special CAAGRK000000000-CAAGSH999999999 embl_tpa_wgs_nuc # 6+9 +special CAAHDL000000000-CAAHDL999999999 embl_tpa_wgs_nuc # 6+9 +special CAAHDO000000000-CAAHFA999999999 embl_tpa_wgs_nuc # 6+9 +special CADEPO000000000-CADEVH999999999 embl_tpa_wgs_nuc # 6+9 +special CADEVJ000000000-CADFGZ999999999 embl_tpa_wgs_nuc # 6+9 # Some "EMBL" 8-character protein accessions are really third-party # annotations. diff --git a/c++/src/objects/valerr/ValidErrItem.cpp b/c++/src/objects/valerr/ValidErrItem.cpp index 077ef847..d59d4551 100644 --- a/c++/src/objects/valerr/ValidErrItem.cpp +++ b/c++/src/objects/valerr/ValidErrItem.cpp @@ -1,4 +1,4 @@ -/* $Id: ValidErrItem.cpp 597158 2019-11-18 17:58:02Z kans $ +/* $Id: ValidErrItem.cpp 611904 2020-07-13 15:51:08Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2205,6 +2205,9 @@ same id type" } }, { eErr_SEQ_FEAT_CDSdoesNotMatchVDJC, { "CDSdoesNotMatchVDJC", "The CDS does not have a parent VDJ or C segment" } }, + { eErr_SEQ_FEAT_GeneOnNucPositionOfPeptide, + { "GeneOnNucPositionOfPeptide", + "Peptide under CDS matches small Gene" } }, /* SEQ_ALIGN */ diff --git a/c++/src/objmgr/scope.cpp b/c++/src/objmgr/scope.cpp index 27d92796..3e1a9d95 100644 --- a/c++/src/objmgr/scope.cpp +++ b/c++/src/objmgr/scope.cpp @@ -1,4 +1,4 @@ -/* $Id: scope.cpp 603742 2020-03-16 17:25:41Z ivanov $ +/* $Id: scope.cpp 610058 2020-06-10 16:19:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -328,6 +328,18 @@ void CScope::ResetDataAndHistory(ERemoveDataLoaders) } +void CScope::RemoveFromHistory(const CSeq_id_Handle& seq_id) +{ + m_Impl->RemoveFromHistory(seq_id); +} + + +void CScope::RemoveFromHistory(const CSeq_id& seq_id) +{ + RemoveFromHistory(CSeq_id_Handle::GetHandle(seq_id)); +} + + void CScope::RemoveFromHistory(const CBioseq_Handle& bioseq, EActionIfLocked action) { diff --git a/c++/src/objmgr/scope_impl.cpp b/c++/src/objmgr/scope_impl.cpp index 4d4439e6..5d6b8642 100644 --- a/c++/src/objmgr/scope_impl.cpp +++ b/c++/src/objmgr/scope_impl.cpp @@ -1,4 +1,4 @@ -/* $Id: scope_impl.cpp 602775 2020-03-02 19:52:55Z grichenk $ +/* $Id: scope_impl.cpp 610058 2020-06-10 16:19:48Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2587,6 +2587,28 @@ void CScope_Impl::RemoveFromHistory(const CTSE_Handle& tse, int action) } +void CScope_Impl::RemoveFromHistory(const CSeq_id_Handle& seq_id) +{ + if ( !seq_id ) { + return; + } + TConfWriteLockGuard guard(m_ConfLock); + // Clear removed bioseq handles + TSeq_idMap::iterator it = m_Seq_idMap.find(seq_id); + if ( it != m_Seq_idMap.end() ) { + it->second.x_ResetAnnotRef_Info(); + if ( it->second.m_Bioseq_Info ) { + CBioseq_ScopeInfo& binfo = *it->second.m_Bioseq_Info; + binfo.x_ResetAnnotRef_Info(); + if ( binfo.IsDetached() ) { + binfo.m_SynCache.Reset(); + m_Seq_idMap.erase(it); + } + } + } +} + + void CScope_Impl::ResetHistory(int action) { TConfWriteLockGuard guard(m_ConfLock); diff --git a/c++/src/objmgr/tse_info.cpp b/c++/src/objmgr/tse_info.cpp index 230b8774..bd9b4ebe 100644 --- a/c++/src/objmgr/tse_info.cpp +++ b/c++/src/objmgr/tse_info.cpp @@ -1,4 +1,4 @@ -/* $Id: tse_info.cpp 606922 2020-04-28 18:58:25Z ivanov $ +/* $Id: tse_info.cpp 611227 2020-07-01 11:37:30Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -428,7 +428,7 @@ CBioObjectId CTSE_Info::x_RegisterBioObject(CTSE_Info_Object& info) } uniq_id = CBioObjectId(CBioObjectId::eUniqNumber, - m_InternalBioObjNumber++); + ++m_InternalBioObjNumber); m_BioObjects[uniq_id] = &info; return uniq_id; } diff --git a/c++/src/objmgr/util/autodef.cpp b/c++/src/objmgr/util/autodef.cpp index 7b2b6478..0ca537ea 100644 --- a/c++/src/objmgr/util/autodef.cpp +++ b/c++/src/objmgr/util/autodef.cpp @@ -1,4 +1,4 @@ -/* $Id: autodef.cpp 607821 2020-05-07 19:13:41Z ivanov $ +/* $Id: autodef.cpp 611612 2020-07-08 17:43:23Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -880,6 +880,9 @@ string CAutoDef::x_GetNonFeatureListEnding() case CAutoDefOptions::eListAllFeatures: end = " sequence."; break; + case CAutoDefOptions::eWholeGenomeShotgunSequence: + end = " whole genome shotgun sequence."; + break; default: break; } diff --git a/c++/src/objmgr/util/autodef_options.cpp b/c++/src/objmgr/util/autodef_options.cpp index 7020c8de..a13933a6 100644 --- a/c++/src/objmgr/util/autodef_options.cpp +++ b/c++/src/objmgr/util/autodef_options.cpp @@ -1,4 +1,4 @@ -/* $Id: autodef_options.cpp 530196 2017-03-13 12:59:43Z bollin $ +/* $Id: autodef_options.cpp 611612 2020-07-08 17:43:23Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -217,7 +217,8 @@ const TNameValPair sc_FeatureListTypeStr[] = { { "List All Features", CAutoDefOptions::eListAllFeatures }, { "Partial Genome", CAutoDefOptions::ePartialGenome }, { "Partial Sequence", CAutoDefOptions::ePartialSequence }, - { "Sequence", CAutoDefOptions::eSequence } + { "Sequence", CAutoDefOptions::eSequence }, + { "Whole Genome Shotgun Sequence", CAutoDefOptions::eWholeGenomeShotgunSequence } }; DEFINE_STATIC_ARRAY_MAP_WITH_COPY(TNameValPairMap, sc_FeatureListTypeStrsMap, sc_FeatureListTypeStr); diff --git a/c++/src/objmgr/util/create_defline.cpp b/c++/src/objmgr/util/create_defline.cpp index 7d532895..2d574511 100644 --- a/c++/src/objmgr/util/create_defline.cpp +++ b/c++/src/objmgr/util/create_defline.cpp @@ -1432,7 +1432,7 @@ static bool x_EndsWithStrain ( return false; } - pos = NStr::FindNoCase (taxname, strain, 0, taxname.size() - 1, NStr::eLast); + pos = NStr::Find (taxname, strain, NStr::eNocase, NStr::eReverseSearch); if (pos == taxname.size() - strain.size()) { // check for space to avoid fortuitous match to end of taxname char ch = taxname[pos - 1]; @@ -2047,7 +2047,7 @@ static string s_RemoveBracketedOrgFromEnd (string str, string taxname) int len = str.length(); if (len < 5) return str; if (str [len - 1] != ']') return str; - SIZE_TYPE cp = NStr::Find(str, "[", 0, NPOS, NStr::eLast); + SIZE_TYPE cp = NStr::Find(str, "[", NStr::eNocase, NStr::eReverseSearch); if (cp == NPOS) return str; string suffix = str.substr(cp+1); if (NStr::StartsWith(suffix, "NAD")) return str; @@ -2953,7 +2953,7 @@ static size_t s_TitleEndsInOrganism ( idx = len1 - len2 - 3; if (len1 > len2 + 4 && title [idx] == ' ' && title [idx + 1] == '[' && title [len1 - 1] == ']') { - pos = NStr::FindNoCase(title, taxname, 0, NPOS, NStr::eLast); + pos = NStr::Find(title, taxname, NStr::eNocase, NStr::eReverseSearch); if (pos == idx + 2) { return pos - 1; } @@ -3009,7 +3009,7 @@ void CDeflineGenerator::x_AdjustProteinTitleSuffixIdx ( tpos = s_TitleEndsInOrganism(m_MainTitle, binomial); if (tpos == NPOS) { if (m_IsCrossKingdom) { - pos = NStr::FindNoCase(m_MainTitle, "][", 0, NPOS, NStr::eLast); + pos = NStr::Find(m_MainTitle, "][", NStr::eNocase, NStr::eReverseSearch); if (pos != NPOS) { m_MainTitle.erase (pos + 1); s_TrimMainTitle (m_MainTitle); @@ -3162,7 +3162,7 @@ void CDeflineGenerator::x_AdjustProteinTitleSuffix ( tpos = s_TitleEndsInOrganism(m_MainTitle, binomial); if (tpos == NPOS) { if (m_IsCrossKingdom) { - pos = NStr::FindNoCase(m_MainTitle, "][", 0, NPOS, NStr::eLast); + pos = NStr::Find(m_MainTitle, "][", NStr::eNocase, NStr::eReverseSearch); if (pos != NPOS) { m_MainTitle.erase (pos + 1); s_TrimMainTitle (m_MainTitle); @@ -3412,6 +3412,51 @@ string CDeflineGenerator::x_GetModifiers(const CBioseq_Handle & bsh) } } } + if ( bios && bios->IsSetPcr_primers() ) { + const CBioSource_Base::TPcr_primers & primers = bios->GetPcr_primers(); + if ( primers.CanGet() ) { + ITERATE( CBioSource_Base::TPcr_primers::Tdata, it, primers.Get() ) { + + // bool has_fwd_seq = false; + // bool has_rev_seq = false; + + if( (*it)->IsSetForward() ) { + const CPCRReaction_Base::TForward &forward = (*it)->GetForward(); + if( forward.CanGet() ) { + ITERATE( CPCRReaction_Base::TForward::Tdata, it2, forward.Get() ) { + const string &fwd_name = ( (*it2)->CanGetName() ? (*it2)->GetName().Get() : kEmptyStr ); + if( ! fwd_name.empty() ) { + joiner.Add("fwd-primer-name", fwd_name); + } + const string &fwd_seq = ( (*it2)->CanGetSeq() ? (*it2)->GetSeq().Get() : kEmptyStr ); + // NStr::ToLower( fwd_seq ); + if( ! fwd_seq.empty() ) { + joiner.Add("fwd-primer-seq", fwd_seq); + // has_fwd_seq = true; + } + } + } + } + if( (*it)->IsSetReverse() ) { + const CPCRReaction_Base::TReverse &reverse = (*it)->GetReverse(); + if( reverse.CanGet() ) { + ITERATE( CPCRReaction_Base::TReverse::Tdata, it2, reverse.Get() ) { + const string &rev_name = ((*it2)->CanGetName() ? (*it2)->GetName().Get() : kEmptyStr ); + if( ! rev_name.empty() ) { + joiner.Add("rev-primer-name", rev_name); + } + const string &rev_seq = ( (*it2)->CanGetSeq() ? (*it2)->GetSeq().Get() : kEmptyStr ); + // NStr::ToLower( rev_seq ); // do we need this? + if( ! rev_seq.empty() ) { + joiner.Add("rev-primer-seq", rev_seq); + // has_rev_seq = true; + } + } + } + } + } + } + } } catch (CException &) { // ignore exception; it probably just means there's no org-ref diff --git a/c++/src/objmgr/util/feature_edit.cpp b/c++/src/objmgr/util/feature_edit.cpp index 1035d23e..e8541345 100644 --- a/c++/src/objmgr/util/feature_edit.cpp +++ b/c++/src/objmgr/util/feature_edit.cpp @@ -1,5 +1,5 @@ -/* $Id: feature_edit.cpp 599823 2020-01-07 21:35:24Z foleyjp $ +/* $Id: feature_edit.cpp 610146 2020-06-11 11:11:01Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -282,6 +282,59 @@ void CFeatTrim::x_TrimLocation(const TSeqPos from, const TSeqPos to, } +static TSeqPos s_GetTrimmedLength(const CSeq_loc& trimmed_loc) +{ + + if (trimmed_loc.IsEmpty() || trimmed_loc.IsNull()) { + return 0; + } + + if (trimmed_loc.IsPnt()) { + return 1; + } + + if (trimmed_loc.IsInt()) { + return trimmed_loc.GetInt().GetLength(); + } + + if (trimmed_loc.IsPacked_int()) { + TSeqPos length=0; + for (auto pSubInt : trimmed_loc.GetPacked_int().Get()) { + length += pSubInt->GetLength(); + } + return length; + } + + if (trimmed_loc.IsPacked_pnt()) { + return trimmed_loc.GetPacked_pnt().GetPoints().size(); + } + + if (trimmed_loc.IsMix()) { + TSeqPos length=0; + for (auto pSubLoc : trimmed_loc.GetMix().Get()) { + length += s_GetTrimmedLength(*pSubLoc); + } + return length; + } + + return 0; +} + +static TSeqPos s_GetTrimmedLength(const CSeq_loc& loc, TSeqPos from, TSeqPos to) +{ + auto pTrimmedInt = Ref(new CSeq_loc()); + CSeq_loc_CI loc_it(loc); + pTrimmedInt->SetInt().SetId().Assign(loc_it.GetSeq_id()); + pTrimmedInt->SetInt().SetFrom(from); + pTrimmedInt->SetInt().SetTo(to); + auto pTrimmedLoc = loc.Intersect(*pTrimmedInt, CSeq_loc::fStrand_Ignore, nullptr); + if (pTrimmedLoc) { + return s_GetTrimmedLength(*pTrimmedLoc); + } + return 0; +} + + TSeqPos CFeatTrim::x_GetStartOffset(const CSeq_feat& feat, TSeqPos from, TSeqPos to) { @@ -292,13 +345,19 @@ TSeqPos CFeatTrim::x_GetStartOffset(const CSeq_feat& feat, if (strand != eNa_strand_minus) { TSeqPos feat_from = feat_range.GetFrom(); if (feat_from < from) { - offset = from - feat_from; + if (feat.GetLocation().IsInt()) { + return (from - feat_from); + } + return s_GetTrimmedLength(feat.GetLocation(), feat_from, from-1); } } else { // eNa_strand_minus TSeqPos feat_to = feat_range.GetTo(); if (feat_to > to) { - offset = feat_to - to; + if (feat.GetLocation().IsInt()) { + return (feat_to - to); + } + return s_GetTrimmedLength(feat.GetLocation(), to+1, feat_to); } } return offset; @@ -326,7 +385,6 @@ TSeqPos CFeatTrim::x_GetFrame(const CCdregion& cds) CCdregion::EFrame CFeatTrim::GetCdsFrame(const CSeq_feat& cds_feature, const CRange& range) { const TSeqPos offset = x_GetStartOffset(cds_feature, range.GetFrom(), range.GetTo()); - return x_GetNewFrame(offset, cds_feature.GetData().GetCdregion()); } @@ -340,7 +398,12 @@ CCdregion::EFrame CFeatTrim::x_GetNewFrame(const TSeqPos offset, const CCdregion } const TSeqPos old_frame = x_GetFrame(cdregion); - const TSeqPos new_frame = (old_frame + frame_change)%3; + + // RW-1098 + const TSeqPos new_frame = 3 - ((3 + offset - old_frame)%3); + // Note new_frame, thus defined, takes values 1,2,3, + // whereas old_frame takes values 0,1,2. + // However, 0 == 3 in modulo 3 arithmetic. if (new_frame == 1) { return CCdregion::eFrame_two; } diff --git a/c++/src/objmgr/util/indexer.cpp b/c++/src/objmgr/util/indexer.cpp index a493dbd3..f23f70a4 100644 --- a/c++/src/objmgr/util/indexer.cpp +++ b/c++/src/objmgr/util/indexer.cpp @@ -42,6 +42,7 @@ #include #include +#include #define NCBI_USE_ERRCODE_X ObjMgr_Indexer @@ -53,60 +54,60 @@ BEGIN_SCOPE(objects) // CSeqEntryIndex // Constructors take top-level sequence object, create a CRef, and call its initializer -CSeqEntryIndex::CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy, TFlags flags, int depth) +CSeqEntryIndex::CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy, TFlags flags) { m_Idx.Reset(new CSeqMasterIndex); - m_Idx->x_Initialize(topseh, policy, flags, depth); + m_Idx->x_Initialize(topseh, policy, flags); } -CSeqEntryIndex::CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy, TFlags flags, int depth) +CSeqEntryIndex::CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy, TFlags flags) { m_Idx.Reset(new CSeqMasterIndex); - m_Idx->x_Initialize(bsh, policy, flags, depth); + m_Idx->x_Initialize(bsh, policy, flags); } -CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy, TFlags flags, int depth) +CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy, TFlags flags) { m_Idx.Reset(new CSeqMasterIndex); - m_Idx->x_Initialize(topsep, policy, flags, depth); + m_Idx->x_Initialize(topsep, policy, flags); } -CSeqEntryIndex::CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy, TFlags flags, int depth) +CSeqEntryIndex::CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy, TFlags flags) { m_Idx.Reset(new CSeqMasterIndex); - m_Idx->x_Initialize(seqset, policy, flags, depth); + m_Idx->x_Initialize(seqset, policy, flags); } -CSeqEntryIndex::CSeqEntryIndex (CBioseq& bioseq, EPolicy policy, TFlags flags, int depth) +CSeqEntryIndex::CSeqEntryIndex (CBioseq& bioseq, EPolicy policy, TFlags flags) { m_Idx.Reset(new CSeqMasterIndex); - m_Idx->x_Initialize(bioseq, policy, flags, depth); + m_Idx->x_Initialize(bioseq, policy, flags); } -CSeqEntryIndex::CSeqEntryIndex (CSeq_submit& submit, EPolicy policy, TFlags flags, int depth) +CSeqEntryIndex::CSeqEntryIndex (CSeq_submit& submit, EPolicy policy, TFlags flags) { m_Idx.Reset(new CSeqMasterIndex); - m_Idx->x_Initialize(submit, policy, flags, depth); + m_Idx->x_Initialize(submit, policy, flags); } -CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy, TFlags flags, int depth) +CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy, TFlags flags) { m_Idx.Reset(new CSeqMasterIndex); - m_Idx->x_Initialize(topsep, sblock, policy, flags, depth); + m_Idx->x_Initialize(topsep, sblock, policy, flags); } -CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy, TFlags flags, int depth) +CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy, TFlags flags) { m_Idx.Reset(new CSeqMasterIndex); - m_Idx->x_Initialize(topsep, descr, policy, flags, depth); + m_Idx->x_Initialize(topsep, descr, policy, flags); } // Get first Bioseq index @@ -151,35 +152,58 @@ CRef CSeqEntryIndex::GetBioseqIndex (const CSeq_loc& loc) return m_Idx->GetBioseqIndex(loc); } -// Get Bioseq index by subrange -CRef CSeqEntryIndex::GetBioseqIndex (const string& accn, int from, int to, bool rev_comp) +const vector>& CSeqEntryIndex::GetBioseqIndices(void) { - return m_Idx->GetBioseqIndex(accn, from, to, rev_comp); + return m_Idx->GetBioseqIndices(); } -CRef CSeqEntryIndex::GetBioseqIndex (int from, int to, bool rev_comp) +const vector>& CSeqEntryIndex::GetSeqsetIndices(void) { - return m_Idx->GetBioseqIndex("", from, to, rev_comp); + return m_Idx->GetSeqsetIndices(); } -const vector>& CSeqEntryIndex::GetBioseqIndices(void) +bool CSeqEntryIndex::DistributedReferences(void) { - return m_Idx->GetBioseqIndices(); + return m_Idx->DistributedReferences(); } -const vector>& CSeqEntryIndex::GetSeqsetIndices(void) +void CSeqEntryIndex::SetSnpFunc(FAddSnpFunc* snp) { - return m_Idx->GetSeqsetIndices(); + m_Idx->SetSnpFunc (snp); } -bool CSeqEntryIndex::DistributedReferences(void) +FAddSnpFunc* CSeqEntryIndex::GetSnpFunc(void) { - return m_Idx->DistributedReferences(); + return m_Idx->GetSnpFunc(); +} + +void CSeqEntryIndex::SetFeatDepth(int featDepth) + +{ + m_Idx->SetFeatDepth (featDepth); +} + +int CSeqEntryIndex::GetFeatDepth(void) + +{ + return m_Idx->GetFeatDepth(); +} + +void CSeqEntryIndex::SetGapDepth(int featDepth) + +{ + m_Idx->SetGapDepth (featDepth); +} + +int CSeqEntryIndex::GetGapDepth(void) + +{ + return m_Idx->GetGapDepth(); } bool CSeqEntryIndex::IsFetchFailure(void) @@ -198,11 +222,10 @@ bool CSeqEntryIndex::IsIndexFailure(void) // CSeqMasterIndex // Initializers take top-level sequence object, create Seq-entry wrapper if necessary -void CSeqMasterIndex::x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth) +void CSeqMasterIndex::x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags) { m_Policy = policy; m_Flags = flags; - m_Depth = depth; m_Tseh = topseh.GetTopLevelEntry(); CConstRef tcsep = m_Tseh.GetCompleteSeq_entry(); @@ -215,6 +238,9 @@ void CSeqMasterIndex::x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::E m_HasOperon = false; m_IsSmallGenomeSet = false; m_DistributedReferences = false; + m_SnpFunc = 0; + m_FeatDepth = 0; + m_GapDepth = 0; m_IndexFailure = false; try { @@ -243,11 +269,10 @@ void CSeqMasterIndex::x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::E } } -void CSeqMasterIndex::x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth) +void CSeqMasterIndex::x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags) { m_Policy = policy; m_Flags = flags; - m_Depth = depth; m_Tseh = bsh.GetTopLevelEntry(); CConstRef tcsep = m_Tseh.GetCompleteSeq_entry(); @@ -260,6 +285,9 @@ void CSeqMasterIndex::x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy m_HasOperon = false; m_IsSmallGenomeSet = false; m_DistributedReferences = false; + m_SnpFunc = 0; + m_FeatDepth = 0; + m_GapDepth = 0; m_IndexFailure = false; try { @@ -288,11 +316,10 @@ void CSeqMasterIndex::x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy } } -void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth) +void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags) { m_Policy = policy; m_Flags = flags; - m_Depth = depth; topsep.Parentize(); m_Tsep.Reset(&topsep); @@ -300,11 +327,10 @@ void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy x_Init(); } -void CSeqMasterIndex::x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth) +void CSeqMasterIndex::x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags) { m_Policy = policy; m_Flags = flags; - m_Depth = depth; CSeq_entry* parent = seqset.GetParentEntry(); if (parent) { @@ -320,11 +346,10 @@ void CSeqMasterIndex::x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy x_Init(); } -void CSeqMasterIndex::x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth) +void CSeqMasterIndex::x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags) { m_Policy = policy; m_Flags = flags; - m_Depth = depth; CSeq_entry* parent = bioseq.GetParentEntry(); if (parent) { @@ -340,11 +365,10 @@ void CSeqMasterIndex::x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy pol x_Init(); } -void CSeqMasterIndex::x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth) +void CSeqMasterIndex::x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags) { m_Policy = policy; m_Flags = flags; - m_Depth = depth; _ASSERT(submit.CanGetData()); _ASSERT(submit.CanGetSub()); @@ -359,11 +383,10 @@ void CSeqMasterIndex::x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy x_Init(); } -void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth) +void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags) { m_Policy = policy; m_Flags = flags; - m_Depth = depth; topsep.Parentize(); m_Tsep.Reset(&topsep); @@ -372,11 +395,10 @@ void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, C x_Init(); } -void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth) +void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags) { m_Policy = policy; m_Flags = flags; - m_Depth = depth; topsep.Parentize(); m_Tsep.Reset(&topsep); @@ -385,6 +407,43 @@ void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqE x_Init(); } +void CSeqMasterIndex::SetSnpFunc (FAddSnpFunc* snp) + +{ + m_SnpFunc = snp; +} + +FAddSnpFunc* CSeqMasterIndex::GetSnpFunc (void) + +{ + return m_SnpFunc; +} + +void CSeqMasterIndex::SetFeatDepth (int featDepth) + +{ + m_FeatDepth = featDepth; +} + +int CSeqMasterIndex::GetFeatDepth (void) + +{ + return m_FeatDepth; +} + +void CSeqMasterIndex::SetGapDepth (int gapDepth) + +{ + m_GapDepth = gapDepth; +} + +int CSeqMasterIndex::GetGapDepth (void) + +{ + return m_GapDepth; +} + + // At end of program, poll all Bioseqs to check for far fetch failure flag bool CSeqMasterIndex::IsFetchFailure (void) @@ -459,7 +518,7 @@ void CSeqMasterIndex::x_InitSeqs (const CSeq_entry& sep, CRef prnt CBioseq_Handle bsh = m_Scope->GetBioseqHandle(bsp); if (bsh) { // create CBioseqIndex object for current Bioseq - CRef bsx(new CBioseqIndex(bsh, bsp, bsh, prnt, m_Tseh, m_Scope, *this, m_Policy, m_Flags, m_Depth, false)); + CRef bsx(new CBioseqIndex(bsh, bsp, bsh, prnt, m_Tseh, m_Scope, *this, m_Policy, m_Flags)); // record CBioseqIndex in vector for IterateBioseqs or GetBioseqIndex m_BsxList.push_back(bsx); @@ -573,6 +632,9 @@ void CSeqMasterIndex::x_Init (void) m_HasOperon = false; m_IsSmallGenomeSet = false; m_DistributedReferences = false; + m_SnpFunc = 0; + m_FeatDepth = 0; + m_GapDepth = 0; m_IndexFailure = false; try { @@ -604,92 +666,6 @@ void CSeqMasterIndex::x_Init (void) } } -// Support for temporary delta sequence referring to subrange of original sequence -CRef CSeqMasterIndex::x_MakeUniqueId(void) -{ - CRef id(new CSeq_id()); - bool good = false; - while (!good) { - id->SetLocal().SetStr("tmp_delta_subset_" + NStr::NumericToString(m_Counter.Add(1))); - CBioseq_Handle bsh = m_Scope->GetBioseqHandle(*id); - if (! bsh) { - good = true; - } - } - return id; -} - -CRef CSeqMasterIndex::x_DeltaIndex(const CSeq_loc& loc) - -{ - try { - // create delta sequence referring to location or range, using temporary local Seq-id - CBioseq_Handle bsh = m_Scope->GetBioseqHandle(loc); - CRef delta(new CBioseq()); - delta->SetId().push_back(x_MakeUniqueId()); - delta->SetInst().Assign(bsh.GetInst()); - delta->SetInst().ResetSeq_data(); - delta->SetInst().ResetExt(); - delta->SetInst().SetRepr(CSeq_inst::eRepr_delta); - CRef element(new CDelta_seq()); - element->SetLoc().Assign(loc); - delta->SetInst().SetExt().SetDelta().Set().push_back(element); - delta->SetInst().SetLength(sequence::GetLength(loc, m_Scope)); - - // add to scope - CBioseq_Handle deltaBsh = m_Scope->AddBioseq(*delta); - - if (deltaBsh) { - // create CBioseqIndex object for delta Bioseq - CRef noparent; - - CRef bsx(new CBioseqIndex(deltaBsh, *delta, bsh, noparent, m_Tseh, m_Scope, *this, m_Policy, m_Flags, m_Depth, true)); - - return bsx; - } - } - catch (CException& e) { - LOG_POST_X(2, Error << "Error in CSeqMasterIndex::x_DeltaIndex: " << e.what()); - } - return CRef (); -} - -CConstRef CSeqMasterIndex::x_SubRangeLoc(const string& accn, int from, int to, bool rev_comp) - -{ - TAccnIndexMap::iterator it = m_AccnIndexMap.find(accn); - if (it != m_AccnIndexMap.end()) { - CRef bsx = it->second; - for (const CRef& id : bsx->GetBioseq().GetId()) { - switch (id->Which()) { - case CSeq_id::e_Other: - case CSeq_id::e_Genbank: - case CSeq_id::e_Embl: - case CSeq_id::e_Ddbj: - case CSeq_id::e_Tpg: - case CSeq_id::e_Tpe: - case CSeq_id::e_Tpd: - { - CSeq_loc::TStrand strand = eNa_strand_unknown; - if (rev_comp) { - strand = eNa_strand_minus; - } - CSeq_id& nc_id = const_cast(*id); - // create location from range - CConstRef loc(new CSeq_loc(nc_id, from, to, strand)); - if (loc) { - return loc; - } - } - break; - default: - break; - } - } - } - return CConstRef (); -} - // Get first Bioseq index CRef CSeqMasterIndex::GetBioseqIndex (void) @@ -762,40 +738,8 @@ CRef CSeqMasterIndex::GetBioseqIndex (const CMappedFeat& mf) CRef CSeqMasterIndex::GetBioseqIndex (const CSeq_loc& loc) { - CRef bsx = x_DeltaIndex(loc); - - if (bsx) { - return bsx; - } - return CRef (); -} - -// Get Bioseq index by subrange -CRef CSeqMasterIndex::GetBioseqIndex (const string& accn, int from, int to, bool rev_comp) - -{ - string accession = accn; - if (accession.empty()) { - CRef bsx = GetBioseqIndex(); - if (bsx) { - accession = bsx->GetAccession(); - } - } - - if (! accession.empty()) { - CConstRef loc = x_SubRangeLoc(accession, from, to, rev_comp); - - if (loc) { - return GetBioseqIndex(*loc); - } - } - return CRef (); -} - -CRef CSeqMasterIndex::GetBioseqIndex (int from, int to, bool rev_comp) - -{ - return GetBioseqIndex("", from, to, rev_comp); + CBioseq_Handle bsh = m_Scope->GetBioseqHandle(loc); + return GetBioseqIndex(bsh); } // Allow access to internal vectors for application to use in iterators @@ -841,9 +785,7 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh, CRef scope, CSeqMasterIndex& idx, CSeqEntryIndex::EPolicy policy, - CSeqEntryIndex::TFlags flags, - int depth, - bool surrogate) + CSeqEntryIndex::TFlags flags) : m_Bsh(bsh), m_Bsp(bsp), m_OrigBsh(obsh), @@ -852,9 +794,7 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh, m_Scope(scope), m_Idx(&idx), m_Policy(policy), - m_Flags(flags), - m_Depth(depth), - m_Surrogate(surrogate) + m_Flags(flags) { m_FetchFailure = false; @@ -873,6 +813,7 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh, m_Topology = NCBI_SEQTOPOLOGY(not_set); m_IsDelta = false; + m_IsDeltaLitOnly = false; m_IsVirtual = false; m_IsMap = false; @@ -885,6 +826,7 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh, m_Accession.clear(); + m_IsRefSeq = false; m_IsNC = false; m_IsNM = false; m_IsNR = false; @@ -931,7 +873,7 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh, m_Taxname.clear(); m_Common.clear(); m_Lineage.clear(); - m_Taxid = 0; + m_Taxid = ZERO_TAX_ID; m_UsingAnamorph = false; m_Genus.clear(); m_Species.clear(); @@ -1000,16 +942,44 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh, m_IsVirtual = (repr == CSeq_inst::eRepr_virtual); m_IsMap = (repr == CSeq_inst::eRepr_map); } + if (m_IsDelta && m_Bsh.IsSetInst_Ext()) { + const CBioseq_Handle::TInst_Ext& ext = m_Bsh.GetInst_Ext(); + bool hasLoc = false; + if ( ext.IsDelta() ) { + ITERATE (CDelta_ext::Tdata, it, ext.GetDelta().Get()) { + if ( (*it)->IsLoc() ) { + const CSeq_loc& loc = (*it)->GetLoc(); + if (loc.IsNull()) continue; + hasLoc = true; + } + } + } + if (! hasLoc) { + m_IsDeltaLitOnly = true; + } + } } // process Seq-ids for (CSeq_id_Handle sid : obsh.GetId()) { + // first switch to set RefSeq and ThirdParty flags switch (sid.Which()) { + case NCBI_SEQID(Other): + m_IsRefSeq = true; + break; case NCBI_SEQID(Tpg): case NCBI_SEQID(Tpe): case NCBI_SEQID(Tpd): m_ThirdParty = true; - // fall through + break; + default: + break; + } + // second switch now avoids complicated flag setting logic + switch (sid.Which()) { + case NCBI_SEQID(Tpg): + case NCBI_SEQID(Tpe): + case NCBI_SEQID(Tpd): case NCBI_SEQID(Other): case NCBI_SEQID(Genbank): case NCBI_SEQID(Embl): @@ -1117,13 +1087,6 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh, CBioseqIndex::~CBioseqIndex (void) { - if (m_Surrogate) { - try { - m_Scope->RemoveBioseq(m_Bsh); - } catch (CException&) { - // presumably still in use; let it be - } - } } // Gap collection (delayed until needed) @@ -1144,11 +1107,12 @@ void CBioseqIndex::x_InitGaps (void) SSeqMapSelector sel; size_t resolveCount = 0; - /* - if (m_Policy == CSeqEntryIndex::eInternal) { - resolveCount = 0; + + CWeakRef idx = GetSeqMasterIndex(); + auto idxl = idx.Lock(); + if (idxl) { + resolveCount = idxl->GetGapDepth(); } - */ sel.SetFlags(CSeqMap::fFindGap) .SetResolveCount(resolveCount); @@ -1855,339 +1819,187 @@ void CBioseqIndex::x_InitDescs (void) } } -// Feature collection (delayed until needed) -void CBioseqIndex::x_InitFeats (void) +void CBioseqIndex::x_DefaultSelector(SAnnotSelector& sel, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, bool onlyNear, CScope& scope) { - try { - if (m_FeatsInitialized) { - return; - } + bool snpOK = false; + bool cddOK = false; - if (! m_DescsInitialized) { - // initialize descriptors first to get m_ForceOnlyNearFeats flag - x_InitDescs(); - } + if (policy == CSeqEntryIndex::eExhaustive) { - m_FeatsInitialized = true; + // experimental policy forces collection of features from all sequence levels + sel.SetResolveAll(); + sel.SetResolveDepth(kMax_Int); + // ignores RefSeq/INSD barrier, overrides far fetch policy user object + // for now, always excludes external annots, ignores custom enable bits - SAnnotSelector sel; + } else if (policy == CSeqEntryIndex::eInternal || onlyNear) { - if (m_Policy != CSeqEntryIndex::eExternal) { - // unless explicitly desired, exclude external annots - need explicit show flags - if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) { - sel.ExcludeNamedAnnots("SNP"); - } - if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) { - sel.ExcludeNamedAnnots("CDD"); - } - sel.ExcludeNamedAnnots("STS"); - } + // do not fetch features from underlying sequence component records + sel.SetResolveDepth(0); + sel.SetExcludeExternal(true); + // always excludes external annots, ignores custom enable bits - if (m_Policy == CSeqEntryIndex::eInternal || m_ForceOnlyNearFeats) { + } else if (policy == CSeqEntryIndex::eAdaptive) { - // do not fetch features from underlying sequence component records - if (m_Surrogate) { - // delta with sublocation needs to map features from original Bioseq - sel.SetResolveAll(); - sel.SetResolveDepth(1); - sel.SetExcludeExternal(); - } else { - // otherwise limit collection to local records in top-level Seq-entry - sel.SetResolveDepth(0); - sel.SetExcludeExternal(); - } + sel.SetResolveAll(); + // normal situation uses adaptive depth for feature collection, + // includes barrier between RefSeq and INSD accession types + sel.SetAdaptiveDepth(true); - } else if (m_Policy == CSeqEntryIndex::eExhaustive) { + // conditionally allows external annots, based on custom enable bits + if ((flags & CSeqEntryIndex::fShowSNPFeats) != 0) { + snpOK = true; + } + if ((flags & CSeqEntryIndex::fShowCDDFeats) != 0) { + cddOK = true; + } - sel.SetResolveAll(); - // experimental flag forces collection of features from all levels - sel.SetResolveDepth(kMax_Int); - // also ignores RefSeq/INSD barrier, far fetch policy user object + } else if (policy == CSeqEntryIndex::eExternal) { - } else if (m_Policy == CSeqEntryIndex::eExternal) { + // same as eAdaptive + sel.SetResolveAll(); + sel.SetAdaptiveDepth(true); - // same as eAdaptive, except also allows external annots - sel.SetResolveAll(); - sel.SetAdaptiveDepth(true); - // needs to be here - sel.AddUnnamedAnnots(); - // allow external SNPs - if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) { - sel.IncludeNamedAnnotAccession("SNP"); - sel.AddNamedAnnots("SNP"); - } - if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) { - sel.IncludeNamedAnnotAccession("CDD"); - sel.AddNamedAnnots("CDD"); - } - m_Scope->SetKeepExternalAnnotsForEdit(); - // obey flag to hide CDD features by default in the web display - if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) { - sel.ExcludeNamedAnnots("CDD"); - } + // but always allows external annots without need for custom enable bits + snpOK = true; + cddOK = true; - } else if (m_Depth > -1) { + } else if (policy == CSeqEntryIndex::eFtp) { + // for public ftp releases + if (m_IsRefSeq) { sel.SetResolveAll(); - // explicit depth setting overrides adaptive depth (probably only needed for debugging) - sel.SetResolveDepth(m_Depth); + sel.SetAdaptiveDepth(true); + } else if (m_IsDeltaLitOnly) { + sel.SetResolveDepth(0); + sel.SetExcludeExternal(true); + } else { + sel.SetResolveDepth(0); + sel.SetExcludeExternal(true); + } - } else if (m_Policy == CSeqEntryIndex::eAdaptive) { + } else if (policy == CSeqEntryIndex::eWeb) { + // for public web pages + if (m_IsRefSeq) { sel.SetResolveAll(); - // normal situation uses adaptive depth for feature collection, - // includes barrier between RefSeq and INSD accession types sel.SetAdaptiveDepth(true); - - // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot - // but commenting it out allows external variations in NG_008330 to override internal gene, mRNA, CDS, and exon features - sel.AddUnnamedAnnots(); - - // allow external SNPs - testing for now, probably needs to be in external policy - if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) { - sel.IncludeNamedAnnotAccession("SNP"); - sel.AddNamedAnnots("SNP"); - } - if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) { - sel.IncludeNamedAnnotAccession("CDD"); - sel.AddNamedAnnots("CDD"); - } - m_Scope->SetKeepExternalAnnotsForEdit(); - - } else if (m_Policy == CSeqEntryIndex::eIncremental) { - - // do not fetch features from underlying sequence component records - if (m_Surrogate) { - // delta with sublocation needs to map features from original Bioseq - sel.SetResolveAll(); - sel.SetResolveDepth(1); - sel.SetExcludeExternal(); - } else { - // otherwise limit collection to local records in top-level Seq-entry - sel.SetResolveAll(); - sel.SetResolveDepth(0); - sel.SetExcludeExternal(); - } - - /* + } else if (m_IsDeltaLitOnly) { sel.SetResolveAll(); - // flatfile generator now needs to do its own exploration of far delta components - // and needs to implement barrier between RefSeq and INSD accession types - sel.SetResolveDepth(1); - - // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot - // sel.AddUnnamedAnnots(); - - // allow external SNPs - testing for now, probably needs to be in external policy - if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) { - sel.IncludeNamedAnnotAccession("SNP"); - sel.AddNamedAnnots("SNP"); - } - if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) { - sel.IncludeNamedAnnotAccession("CDD"); - sel.AddNamedAnnots("CDD"); - } - m_Scope->SetKeepExternalAnnotsForEdit(); - */ + sel.SetAdaptiveDepth(true); + } else { + sel.SetResolveAll(); + sel.SetAdaptiveDepth(true); } - // bit flags exclude specific features - if ((m_Flags & CSeqEntryIndex::fHideImpFeats) != 0) { - sel.ExcludeFeatType(CSeqFeatData::e_Imp); - } - if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) { - sel.ExcludeFeatType(CSeqFeatData::e_Variation); - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_variation); + // conditionally allows external annots, based on custom enable bits + if ((flags & CSeqEntryIndex::fShowSNPFeats) != 0) { + snpOK = true; } - if ((m_Flags & CSeqEntryIndex::fHideSTSFeats) != 0) { - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_STS); - } - if ((m_Flags & CSeqEntryIndex::fHideExonFeats) != 0) { - sel.ExcludeNamedAnnots("Exon"); - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon); - } - if ((m_Flags & CSeqEntryIndex::fHideIntronFeats) != 0) { - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron); - } - if ((m_Flags & CSeqEntryIndex::fHideMiscFeats) != 0) { - sel.ExcludeFeatType(CSeqFeatData::e_Site); - sel.ExcludeFeatType(CSeqFeatData::e_Bond); - sel.ExcludeFeatType(CSeqFeatData::e_Region); - sel.ExcludeFeatType(CSeqFeatData::e_Comment); - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature); - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_preprotein); - } - if ((m_Flags & CSeqEntryIndex::fHideGapFeats) != 0) { - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_gap); - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_assembly_gap); - } - - // additional common settings - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue) - .ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite) - .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq); - - sel.SetFeatComparator(new feature::CFeatComparatorByLabel); - - // request exception to capture fetch failure - sel.SetFailUnresolved(); - - bool onlyGeneRNACDS = false; - if ((m_Flags & CSeqEntryIndex::fGeneRNACDSOnly) != 0) { - onlyGeneRNACDS = true; + if ((flags & CSeqEntryIndex::fShowCDDFeats) != 0) { + cddOK = true; } + } - // variables for setting m_BestProteinFeature - TSeqPos longest = 0; - CProt_ref::EProcessed bestprocessed = CProt_ref::eProcessed_not_set; - CProt_ref::EProcessed processed; + // fHideSNPFeats and fHideCDDFeats flags override any earlier settings + if ((flags & CSeqEntryIndex::fHideSNPFeats) != 0) { + snpOK = false; + } + if ((flags & CSeqEntryIndex::fHideCDDFeats) != 0) { + cddOK = false; + } - // next gap - CGapIndex* sgx = NULL; - if (m_GapList.size() > 0) { - sgx = m_GapList[0]; - } + // configure remote annot settings in selector + if ( snpOK ) { CWeakRef idx = GetSeqMasterIndex(); auto idxl = idx.Lock(); if (idxl) { - /* - if (! idxl->IsSmallGenomeSet()) { - // limit feature collection to immediate Bioseq-set parent - CRef prnt = GetParent(); - if (prnt) { - CBioseq_set_Handle bssh = prnt->GetSeqsetHandle(); - if (bssh) { - CSeq_entry_Handle pseh = bssh.GetParentEntry(); - if (pseh) { - sel.SetLimitSeqEntry(pseh); - } - } + FAddSnpFunc* func = idxl->GetSnpFunc(); + if (func) { + // under PubSeq Gateway, need to get exact accession for SNP retrieval + CBioseq_Handle bsh = GetBioseqHandle(); + string na_acc; + (*func) (bsh, na_acc); + if (na_acc.length() > 0) { + sel.IncludeNamedAnnotAccession(na_acc); } + } else { + // otherwise just give SNP name + sel.IncludeNamedAnnotAccession("SNP"); } - */ - - CRef ft = idxl->GetFeatTree(); - - // iterate features on Bioseq - for (CFeat_CI feat_it(m_Bsh, sel); feat_it; ++feat_it) { - const CMappedFeat mf = *feat_it; - - if (onlyGeneRNACDS) { - const CSeqFeatData& data = mf.GetData(); - CSeqFeatData::E_Choice type = data.Which(); - if (type != CSeqFeatData::e_Gene && - type != CSeqFeatData::e_Rna && - type != CSeqFeatData::e_Cdregion) { - continue; - } - } - - CSeq_feat_Handle hdl = mf.GetSeq_feat_Handle(); - - CRef sfx(new CFeatureIndex(hdl, mf, *this)); - m_SfxList.push_back(sfx); - - ft->AddFeature(mf); - - // CFeatureIndex from CMappedFeat for use with GetBestGene - m_FeatIndexMap[mf] = sfx; + } - // set specific flags for various feature types - CSeqFeatData::E_Choice type = sfx->GetType(); - CSeqFeatData::ESubtype subtype = sfx->GetSubtype(); + } else { + sel.ExcludeNamedAnnotAccession("SNP"); + } - if (type == CSeqFeatData::e_Biosrc) { - m_HasSource = true; - if (! m_BioSource) { - if (! mf.IsSetData ()) continue; - const CSeqFeatData& sfdata = mf.GetData(); - const CBioSource& biosrc = sfdata.GetBiosrc(); - m_BioSource.Reset (&biosrc); - } - continue; - } + if ( cddOK ) { + sel.IncludeNamedAnnotAccession("CDD"); + } else { + sel.ExcludeNamedAnnotAccession("CDD"); + } - if (type == CSeqFeatData::e_Gene) { - m_HasGene = true; - if (m_HasMultiIntervalGenes) { - continue; - } - const CSeq_loc& loc = mf.GetLocation (); - switch (loc.Which()) { - case CSeq_loc::e_Packed_int: - case CSeq_loc::e_Packed_pnt: - case CSeq_loc::e_Mix: - case CSeq_loc::e_Equiv: - m_HasMultiIntervalGenes = true; - break; - default: - break; - } - continue; - } + CWeakRef idx = GetSeqMasterIndex(); + auto idxl = idx.Lock(); + if (idxl) { + int featDepth = idxl->GetFeatDepth(); + if (featDepth > 0) { + sel.SetResolveDepth(featDepth); + } + } - if (subtype == CSeqFeatData::eSubtype_operon) { - idxl->SetHasOperon(true); - continue; - } + // bit flags exclude specific features + // source features are collected elsewhere + sel.ExcludeFeatType(CSeqFeatData::e_Biosrc); + // pub features are used in the REFERENCES section + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_pub); + // some feature types are always excluded (deprecated?) + // sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue) + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite) + .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq); + // exclude other types based on user flags + if ((flags & CSeqEntryIndex::fHideImpFeats) != 0) { + sel.ExcludeFeatType(CSeqFeatData::e_Imp); + } + if ((flags & CSeqEntryIndex::fHideSTSFeats) != 0) { + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_STS); + } + if ((flags & CSeqEntryIndex::fHideExonFeats) != 0) { + sel.ExcludeNamedAnnots("Exon"); + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon); + } + if ((flags & CSeqEntryIndex::fHideIntronFeats) != 0) { + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron); + } + if ((flags & CSeqEntryIndex::fHideMiscFeats) != 0) { + sel.ExcludeFeatType(CSeqFeatData::e_Site); + sel.ExcludeFeatType(CSeqFeatData::e_Bond); + sel.ExcludeFeatType(CSeqFeatData::e_Region); + sel.ExcludeFeatType(CSeqFeatData::e_Comment); + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature); + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_preprotein); + } + if ((flags & CSeqEntryIndex::fHideGapFeats) != 0) { + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_gap); + sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_assembly_gap); + } - if (type == CSeqFeatData::e_Prot && IsAA()) { - if (! mf.IsSetData ()) continue; - const CSeqFeatData& sfdata = mf.GetData(); - const CProt_ref& prp = sfdata.GetProt(); - processed = CProt_ref::eProcessed_not_set; - if (prp.IsSetProcessed()) { - processed = prp.GetProcessed(); - } - const CSeq_loc& loc = mf.GetLocation (); - TSeqPos prot_length = sequence::GetLength(loc, m_Scope); - if (prot_length > longest) { - m_BestProtFeatInitialized = true; - m_BestProteinFeature = sfx; - longest = prot_length; - bestprocessed = processed; - } else if (prot_length == longest) { - // unprocessed 0 > preprotein 1 > mat peptide 2 - if (processed < bestprocessed) { - m_BestProtFeatInitialized = true; - m_BestProteinFeature = sfx; - longest = prot_length; - bestprocessed = processed; - } - } - continue; - } + // additional common settings + sel.SetFeatComparator(new feature::CFeatComparatorByLabel); - if (type == CSeqFeatData::e_Cdregion && IsNA()) { - } else if (type == CSeqFeatData::e_Rna && IsNA()) { - } else if (type == CSeqFeatData::e_Prot && IsAA()) { - } else { - continue; - } + // limit exploration of far deltas with no features to avoid timeout + sel.SetMaxSearchSegments(500); + sel.SetMaxSearchSegmentsAction(SAnnotSelector::eMaxSearchSegmentsSilent); + sel.SetMaxSearchTime(25); - // index feature for (local) product Bioseq (CDS -> protein, mRNA -> cDNA, or Prot -> peptide) - CSeq_id_Handle idh = mf.GetProductId(); - if (idh) { - string str = idh.AsString(); - CRef bsxp = idxl->GetBioseqIndex(str); - if (bsxp) { - bsxp->m_FeatForProdInitialized = true; - bsxp->m_FeatureForProduct = sfx; - } - } - } - } - } - catch (CException& e) { - m_FetchFailure = true; - LOG_POST_X(6, Error << "Error in CBioseqIndex::x_InitFeats: " << e.what()); - } + // request exception to capture fetch failure + sel.SetFailUnresolved(); } -// Feature collection (delayed until needed) -void CBioseqIndex::x_InitFeatsByLoc (const CSeq_loc& slp) +// Feature collection common implementation method (delayed until needed) +void CBioseqIndex::x_InitFeats (CSeq_loc* slpp) { try { @@ -2202,165 +2014,7 @@ void CBioseqIndex::x_InitFeatsByLoc (const CSeq_loc& slp) SAnnotSelector sel; - if (m_Policy != CSeqEntryIndex::eExternal) { - // unless explicitly desired, exclude external annots - need explicit show flags - if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) { - sel.ExcludeNamedAnnots("SNP"); - } - if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) { - sel.ExcludeNamedAnnots("CDD"); - } - sel.ExcludeNamedAnnots("STS"); - } - - if (m_Policy == CSeqEntryIndex::eExhaustive) { - - sel.SetResolveAll(); - // experimental flag forces collection of features from all levels - sel.SetResolveDepth(kMax_Int); - // also ignores RefSeq/INSD barrier, far fetch policy user object - - } else if (m_Policy == CSeqEntryIndex::eExternal) { - - // same as eAdaptive, except also allows external annots - sel.SetResolveAll(); - sel.SetAdaptiveDepth(true); - // needs to be here - sel.AddUnnamedAnnots(); - // allow external SNPs - if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) { - sel.IncludeNamedAnnotAccession("SNP"); - sel.AddNamedAnnots("SNP"); - } - if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) { - sel.IncludeNamedAnnotAccession("CDD"); - sel.AddNamedAnnots("CDD"); - } - m_Scope->SetKeepExternalAnnotsForEdit(); - // obey flag to hide CDD features by default in the web display - if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) { - sel.ExcludeNamedAnnots("CDD"); - } - - } else if (m_Policy == CSeqEntryIndex::eInternal || m_ForceOnlyNearFeats) { - - // do not fetch features from underlying sequence component records - if (m_Surrogate) { - // delta with sublocation needs to map features from original Bioseq - sel.SetResolveAll(); - sel.SetResolveDepth(1); - sel.SetExcludeExternal(); - } else { - // otherwise limit collection to local records in top-level Seq-entry - sel.SetResolveDepth(0); - sel.SetExcludeExternal(); - } - - } else if (m_Depth > -1) { - - sel.SetResolveAll(); - // explicit depth setting overrides adaptive depth (probably only needed for debugging) - sel.SetResolveDepth(m_Depth); - - } else if (m_Policy == CSeqEntryIndex::eAdaptive) { - - sel.SetResolveAll(); - // normal situation uses adaptive depth for feature collection, - // includes barrier between RefSeq and INSD accession types - sel.SetAdaptiveDepth(true); - - // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot - // but commenting it out allows external variations in NG_008330 to override internal gene, mRNA, CDS, and exon features - sel.AddUnnamedAnnots(); - - // allow external SNPs - testing for now, probably needs to be in external policy - if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) { - sel.IncludeNamedAnnotAccession("SNP"); - sel.AddNamedAnnots("SNP"); - } - if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) { - sel.IncludeNamedAnnotAccession("CDD"); - sel.AddNamedAnnots("CDD"); - } - m_Scope->SetKeepExternalAnnotsForEdit(); - - } else if (m_Policy == CSeqEntryIndex::eIncremental) { - - // do not fetch features from underlying sequence component records - if (m_Surrogate) { - // delta with sublocation needs to map features from original Bioseq - sel.SetResolveAll(); - sel.SetResolveDepth(1); - sel.SetExcludeExternal(); - } else { - // otherwise limit collection to local records in top-level Seq-entry - sel.SetResolveAll(); - sel.SetResolveDepth(0); - sel.SetExcludeExternal(); - } - - /* - sel.SetResolveAll(); - // flatfile generator now needs to do its own exploration of far delta components - // and needs to implement barrier between RefSeq and INSD accession types - sel.SetResolveDepth(1); - - // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot - // sel.AddUnnamedAnnots(); - - // allow external SNPs - testing for now, probably needs to be in external policy - if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) { - sel.IncludeNamedAnnotAccession("SNP"); - sel.AddNamedAnnots("SNP"); - } - if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) { - sel.IncludeNamedAnnotAccession("CDD"); - sel.AddNamedAnnots("CDD"); - } - m_Scope->SetKeepExternalAnnotsForEdit(); - */ - } - - // bit flags exclude specific features - if ((m_Flags & CSeqEntryIndex::fHideImpFeats) != 0) { - sel.ExcludeFeatType(CSeqFeatData::e_Imp); - } - if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) { - sel.ExcludeFeatType(CSeqFeatData::e_Variation); - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_variation); - } - if ((m_Flags & CSeqEntryIndex::fHideSTSFeats) != 0) { - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_STS); - } - if ((m_Flags & CSeqEntryIndex::fHideExonFeats) != 0) { - sel.ExcludeNamedAnnots("Exon"); - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon); - } - if ((m_Flags & CSeqEntryIndex::fHideIntronFeats) != 0) { - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron); - } - if ((m_Flags & CSeqEntryIndex::fHideMiscFeats) != 0) { - sel.ExcludeFeatType(CSeqFeatData::e_Site); - sel.ExcludeFeatType(CSeqFeatData::e_Bond); - sel.ExcludeFeatType(CSeqFeatData::e_Region); - sel.ExcludeFeatType(CSeqFeatData::e_Comment); - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature); - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_preprotein); - } - if ((m_Flags & CSeqEntryIndex::fHideGapFeats) != 0) { - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_gap); - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_assembly_gap); - } - - // additional common settings - sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue) - .ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite) - .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq); - - sel.SetFeatComparator(new feature::CFeatComparatorByLabel); - - // request exception to capture fetch failure - sel.SetFailUnresolved(); + x_DefaultSelector(sel, m_Policy, m_Flags, m_ForceOnlyNearFeats, *m_Scope); bool onlyGeneRNACDS = false; if ((m_Flags & CSeqEntryIndex::fGeneRNACDSOnly) != 0) { @@ -2372,12 +2026,6 @@ void CBioseqIndex::x_InitFeatsByLoc (const CSeq_loc& slp) CProt_ref::EProcessed bestprocessed = CProt_ref::eProcessed_not_set; CProt_ref::EProcessed processed; - // next gap - CGapIndex* sgx = NULL; - if (m_GapList.size() > 0) { - sgx = m_GapList[0]; - } - CWeakRef idx = GetSeqMasterIndex(); auto idxl = idx.Lock(); if (idxl) { @@ -2402,23 +2050,58 @@ void CBioseqIndex::x_InitFeatsByLoc (const CSeq_loc& slp) // start collection over on each segment m_SfxList.clear(); + // iterate features on Bioseq or sublocation + CFeat_CI feat_it; + CRef slice_mapper; + if (slpp == 0) { + feat_it = CFeat_CI(m_Bsh, sel); + } else { + SAnnotSelector sel_cpy = sel; + sel_cpy.SetIgnoreStrand(); + /* + if (selp->IsSetStrand() && selp->GetStrand() == eNa_strand_minus) { + sel_cpy.SetSortOrder(SAnnotSelector::eSortOrder_Reverse); + } + */ + CConstRef bsid = m_Bsh.GetSeqId(); + if (bsid) { + SetDiagFilter(eDiagFilter_All, "!(1305.28,31)"); + CSeq_id seq_id; + seq_id.Assign( *bsid ); + CSeq_loc old_loc; + old_loc.SetInt().SetId( seq_id ); + old_loc.SetInt().SetFrom( 0 ); + old_loc.SetInt().SetTo( m_Length - 1 ); + slice_mapper = new CSeq_loc_Mapper( *slpp, old_loc, m_Scope ); + slice_mapper->SetFuzzOption( CSeq_loc_Mapper::fFuzzOption_RemoveLimTlOrTr ); + slice_mapper->TruncateNonmappingRanges(); + SetDiagFilter(eDiagFilter_All, ""); + } + feat_it = CFeat_CI(*m_Scope, *slpp, sel_cpy); + } + // iterate features on Bioseq - for (CFeat_CI feat_it(*m_Scope, slp, sel); feat_it; ++feat_it) { + for (; feat_it; ++feat_it) { const CMappedFeat mf = *feat_it; + const CSeqFeatData& data = mf.GetData(); + CSeqFeatData::E_Choice typ = data.Which(); if (onlyGeneRNACDS) { - const CSeqFeatData& data = mf.GetData(); - CSeqFeatData::E_Choice type = data.Which(); - if (type != CSeqFeatData::e_Gene && - type != CSeqFeatData::e_Rna && - type != CSeqFeatData::e_Cdregion) { + if (typ != CSeqFeatData::e_Gene && + typ != CSeqFeatData::e_Rna && + typ != CSeqFeatData::e_Cdregion) { continue; } } CSeq_feat_Handle hdl = mf.GetSeq_feat_Handle(); - CRef sfx(new CFeatureIndex(hdl, mf, *this)); + CConstRef feat_loc(&mf.GetLocation()); + if (slpp) { + feat_loc.Reset( slice_mapper->Map( mf.GetLocation() ) ); + } + + CRef sfx(new CFeatureIndex(hdl, mf, feat_loc, *this)); m_SfxList.push_back(sfx); ft->AddFeature(mf); @@ -2514,10 +2197,23 @@ void CBioseqIndex::x_InitFeatsByLoc (const CSeq_loc& slp) } catch (CException& e) { m_FetchFailure = true; - LOG_POST_X(6, Error << "Error in CBioseqIndex::x_InitFeatsByLoc: " << e.what()); + LOG_POST_X(6, Error << "Error in CBioseqIndex::x_InitFeats: " << e.what()); } } +// Feature collection methods (delayed until needed) +void CBioseqIndex::x_InitFeats (void) + +{ + x_InitFeats(0); +} + +void CBioseqIndex::x_InitFeats (CSeq_loc& slp) + +{ + x_InitFeats(&slp); +} + // GetFeatureForProduct allows hypothetical protein defline generator to obtain gene locus tag CRef CBioseqIndex::GetFeatureForProduct (void) @@ -2756,7 +2452,7 @@ const string& CBioseqIndex::GetLineage (void) return m_Lineage; } -int CBioseqIndex::GetTaxid (void) +TTaxId CBioseqIndex::GetTaxid (void) { if (! m_SourcesInitialized) { @@ -3338,14 +3034,14 @@ CGapIndex::CGapIndex (TSeqPos start, bool isUnknownLength, bool isAssemblyGap, CBioseqIndex& bsx) - : m_Start(start), + : m_Bsx(&bsx), + m_Start(start), m_End(end), m_Length(length), m_GapType(type), m_GapEvidence(evidence), m_IsUnknownLength(isUnknownLength), - m_IsAssemblyGap(isAssemblyGap), - m_Bsx(&bsx) + m_IsAssemblyGap(isAssemblyGap) { } @@ -3367,6 +3063,7 @@ CDescriptorIndex::CDescriptorIndex (const CSeqdesc& sd, // Constructor CFeatureIndex::CFeatureIndex (CSeq_feat_Handle sfh, const CMappedFeat mf, + CConstRef feat_loc, CBioseqIndex& bsx) : m_Sfh(sfh), m_Mf(mf), @@ -3375,11 +3072,9 @@ CFeatureIndex::CFeatureIndex (CSeq_feat_Handle sfh, const CSeqFeatData& data = m_Mf.GetData(); m_Type = data.Which(); m_Subtype = data.GetSubtype(); - const CSeq_feat& mpd = m_Mf.GetMappedFeature(); - CConstRef fl(&mpd.GetLocation()); - m_Fl = fl; - m_Start = fl->GetStart(eExtreme_Positional); - m_End = fl->GetStop(eExtreme_Positional); + m_Fl = feat_loc; + m_Start = m_Fl->GetStart(eExtreme_Positional); + m_End = m_Fl->GetStop(eExtreme_Positional); } // Find CFeatureIndex object for best gene using internal CFeatTree diff --git a/c++/src/objtools/CMakeLists.txt b/c++/src/objtools/CMakeLists.txt index b7f92044..f70a294c 100644 --- a/c++/src/objtools/CMakeLists.txt +++ b/c++/src/objtools/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeLists.txt 593577 2019-09-20 12:22:42Z gouriano $ +# $Id: CMakeLists.txt 612973 2020-07-30 19:13:00Z ivanov $ ############################################################################# NCBI_add_subdirectory( @@ -7,5 +7,5 @@ NCBI_add_subdirectory( alnmgr cddalignview test manip cleanup format edit validator asniotest align seqmasks_io eutils align_format snputil uudutil variation writers pubseq_gateway - logging import + logging import flatfile ) diff --git a/c++/src/objtools/Makefile.in b/c++/src/objtools/Makefile.in index bdaf1aff..ce3274a5 100644 --- a/c++/src/objtools/Makefile.in +++ b/c++/src/objtools/Makefile.in @@ -1,4 +1,4 @@ -# $Id: Makefile.in 586035 2019-05-08 18:29:07Z vakatov $ +# $Id: Makefile.in 612973 2020-07-30 19:13:00Z ivanov $ # Meta-makefile("objtools" project) ################################# @@ -10,7 +10,7 @@ SUB_PROJ = logging unit_test_util readers blast lds2 pubseq_gateway \ alnmgr cddalignview test manip edit cleanup format validator \ asniotest align seqmasks_io eutils \ align_format snputil uudutil variation writers \ - import + import flatfile srcdir = @srcdir@ include @builddir@/Makefile.meta diff --git a/c++/src/objtools/alnmgr/alnvec.cpp b/c++/src/objtools/alnmgr/alnvec.cpp index 8d2355c3..2ef99953 100644 --- a/c++/src/objtools/alnmgr/alnvec.cpp +++ b/c++/src/objtools/alnmgr/alnvec.cpp @@ -1,4 +1,4 @@ -/* $Id: alnvec.cpp 577167 2018-12-31 20:16:49Z dicuccio $ +/* $Id: alnvec.cpp 608806 2020-05-21 14:51:55Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -233,8 +233,8 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow row, const bool record_coords = scrn_width && scrn_lefts && scrn_rights; // allocate space for the row - char* c_buff = new char[aln_len + 1]; - char* c_buff_ptr = c_buff; + buffer.clear(); + buffer.reserve(aln_len); string buff; const TNumseg& left_seg = x_GetSeqLeftSeg(row); @@ -282,12 +282,10 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow row, // add regular sequence to buffer GetSeqString(buff, row, start, stop); TSeqPos buf_len = min(buff.size(), seg_len); - memcpy(c_buff_ptr, buff.c_str(), buf_len); - c_buff_ptr += buf_len; + buffer += buff; if (buf_len < seg_len) { // Not enough chars in the sequence, add gap buf_len = seg_len - buf_len; - char* ch_buff = new char[buf_len + 1]; char fill_ch; if (seg < left_seg || seg > right_seg) { @@ -296,11 +294,9 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow row, fill_ch = GetGapChar(row); } - memset(ch_buff, fill_ch, buf_len); - ch_buff[buf_len] = 0; - memcpy(c_buff_ptr, ch_buff, buf_len); - c_buff_ptr += buf_len; - delete[] ch_buff; + for (size_t i = 0; i < buf_len; ++i) { + buffer += fill_ch; + } } // take care of coords if necessary @@ -364,7 +360,6 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow row, } else { // add appropriate number of gap/end chars - char* ch_buff = new char[seg_len + 1]; char fill_ch; if (seg < left_seg || seg > right_seg) { @@ -373,11 +368,9 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow row, fill_ch = GetGapChar(row); } - memset(ch_buff, fill_ch, seg_len); - ch_buff[seg_len] = 0; - memcpy(c_buff_ptr, ch_buff, seg_len); - c_buff_ptr += seg_len; - delete[] ch_buff; + for (size_t i = 0; i < seg_len; ++i) { + buffer += fill_ch; + } } aln_pos += len; } @@ -403,9 +396,6 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow row, } } } - c_buff[aln_len] = '\0'; - buffer = c_buff; - delete [] c_buff; return buffer; } diff --git a/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp b/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp index 8db09d82..5dd4eb3b 100644 --- a/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp +++ b/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp @@ -1,4 +1,4 @@ -/* $Id: blastdb_dataextract.cpp 591961 2019-08-23 13:08:25Z madden $ +/* $Id: blastdb_dataextract.cpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -184,13 +184,13 @@ string CBlastDBExtractor::ExtractLinksInteger() if (seqid->IsGi()) { if (seqid->GetGi() == m_Gi) { ITERATE(CBlast_def_line::TLinks, links_int, (*itr)->GetLinks()) { - retval += NStr::IntToString(*links_int) + SEPARATOR; + retval += NStr::NumericToString(*links_int) + SEPARATOR; } break; } } else { ITERATE(CBlast_def_line::TLinks, links_int, (*itr)->GetLinks()) { - retval += NStr::IntToString(*links_int) + SEPARATOR; + retval += NStr::NumericToString(*links_int) + SEPARATOR; } } } @@ -384,28 +384,28 @@ string CBlastDBExtractor::ExtractTitle() { } string CBlastDBExtractor::ExtractTaxId() { - return NStr::IntToString(x_ExtractTaxId()); + return NStr::NumericToString(x_ExtractTaxId()); } string CBlastDBExtractor::ExtractLeafTaxIds() { - set taxids; + set taxids; x_ExtractLeafTaxIds(taxids); if (taxids.empty()) { return ExtractTaxId(); } string retval; - ITERATE(set, taxids_iter, taxids) { + ITERATE(set, taxids_iter, taxids) { if (retval.empty()) { - retval = NStr::IntToString(*taxids_iter); + retval = NStr::NumericToString(*taxids_iter); } else { - retval += SEPARATOR + NStr::IntToString(*taxids_iter); + retval += SEPARATOR + NStr::NumericToString(*taxids_iter); } } return retval; } string CBlastDBExtractor::ExtractCommonTaxonomicName() { - const int kTaxID = x_ExtractTaxId(); + const TTaxId kTaxID = x_ExtractTaxId(); SSeqDBTaxInfo tax_info; string retval(NOT_AVAILABLE); try { @@ -417,12 +417,12 @@ string CBlastDBExtractor::ExtractCommonTaxonomicName() { } string CBlastDBExtractor::ExtractLeafCommonTaxonomicNames() { - set taxids; + set taxids; x_ExtractLeafTaxIds(taxids); SSeqDBTaxInfo tax_info; string retval; - ITERATE(set, taxid_iter, taxids) { - const int kTaxID = *taxid_iter; + ITERATE(set, taxid_iter, taxids) { + const TTaxId kTaxID = *taxid_iter; try { m_BlastDb.GetTaxInfo(kTaxID, tax_info); _ASSERT(kTaxID == tax_info.taxid); @@ -441,7 +441,7 @@ string CBlastDBExtractor::ExtractLeafCommonTaxonomicNames() { } string CBlastDBExtractor::ExtractScientificName() { - const int kTaxID = x_ExtractTaxId(); + const TTaxId kTaxID = x_ExtractTaxId(); SSeqDBTaxInfo tax_info; string retval(NOT_AVAILABLE); try { @@ -453,12 +453,12 @@ string CBlastDBExtractor::ExtractScientificName() { } string CBlastDBExtractor::ExtractLeafScientificNames() { - set taxids; + set taxids; x_ExtractLeafTaxIds(taxids); SSeqDBTaxInfo tax_info; string retval; - ITERATE(set, taxid_iter, taxids) { - const int kTaxID = *taxid_iter; + ITERATE(set, taxid_iter, taxids) { + const TTaxId kTaxID = *taxid_iter; try { m_BlastDb.GetTaxInfo(kTaxID, tax_info); _ASSERT(kTaxID == tax_info.taxid); @@ -477,7 +477,7 @@ string CBlastDBExtractor::ExtractLeafScientificNames() { } string CBlastDBExtractor::ExtractBlastName() { - const int kTaxID = x_ExtractTaxId(); + const TTaxId kTaxID = x_ExtractTaxId(); SSeqDBTaxInfo tax_info; string retval(NOT_AVAILABLE); try { @@ -513,7 +513,7 @@ string CBlastDBExtractor::ExtractBlastName() { //} string CBlastDBExtractor::ExtractSuperKingdom() { - const int kTaxID = x_ExtractTaxId(); + const TTaxId kTaxID = x_ExtractTaxId(); SSeqDBTaxInfo tax_info; string retval(NOT_AVAILABLE); try { @@ -739,7 +739,7 @@ string CBlastDBExtractor::ExtractFasta(const CBlastDBSeqId &id) { return out.str(); } -int CBlastDBExtractor::x_ExtractTaxId() +TTaxId CBlastDBExtractor::x_ExtractTaxId() { x_SetGi(); @@ -752,12 +752,12 @@ int CBlastDBExtractor::x_ExtractTaxId() return m_Gi2TaxidMap.second[m_Gi]; } // for database without Gi: - vector taxid; + vector taxid; m_BlastDb.GetTaxIDs(m_Oid, taxid); - return taxid.size() ? taxid[0] : 0; + return taxid.size() ? taxid[0] : ZERO_TAX_ID; } -void CBlastDBExtractor::x_ExtractLeafTaxIds(set& taxids) +void CBlastDBExtractor::x_ExtractLeafTaxIds(set& taxids) { x_SetGi(); @@ -768,12 +768,12 @@ void CBlastDBExtractor::x_ExtractLeafTaxIds(set& taxids) m_BlastDb.GetLeafTaxIDs(m_Oid, m_Gi2TaxidSetMap.second); } taxids.clear(); - const set& taxid_set = m_Gi2TaxidSetMap.second[m_Gi]; + const set& taxid_set = m_Gi2TaxidSetMap.second[m_Gi]; taxids.insert(taxid_set.begin(), taxid_set.end()); return; } // for database without Gi: - vector taxid; + vector taxid; m_BlastDb.GetLeafTaxIDs(m_Oid, taxid); taxids.clear(); taxids.insert(taxid.begin(), taxid.end()); @@ -861,7 +861,7 @@ void CBlastDeflineUtil::ExtractDataFromBlastDefline(const CBlast_def_line & dl, } } if ((fields.tax_id == 1) || (fields.tax_names == 1)) { - unsigned int tax_id = 0; + TTaxId tax_id = ZERO_TAX_ID; if (dl.IsSetTaxid()) { tax_id = dl.GetTaxid(); } @@ -888,18 +888,18 @@ void CBlastDeflineUtil::ExtractDataFromBlastDefline(const CBlast_def_line & dl, } if ((fields.leaf_node_tax_ids == 1) || (fields.leaf_node_tax_names == 1)) { - set tax_id_set = dl.GetLeafTaxIds(); + set tax_id_set = dl.GetLeafTaxIds(); if (tax_id_set.empty()) { if (dl.IsSetTaxid()) { tax_id_set.insert(dl.GetTaxid()); } else { - tax_id_set.insert(0); + tax_id_set.insert(ZERO_TAX_ID); } } string separator = kEmptyStr; - ITERATE(set, itr, tax_id_set) { + ITERATE(set, itr, tax_id_set) { if (fields.leaf_node_tax_names == 1) { try { SSeqDBTaxInfo taxinfo; @@ -941,7 +941,7 @@ void CBlastDeflineUtil::ExtractDataFromBlastDefline(const CBlast_def_line & dl, if(fields.links == 1) { if (dl.IsSetLinks()) { ITERATE(CBlast_def_line::TLinks, links_int, dl.GetLinks()) { - results[CBlastDeflineUtil::links] += NStr::IntToString(*links_int) + SEPARATOR; + results[CBlastDeflineUtil::links] += NStr::NumericToString(*links_int) + SEPARATOR; } } else { diff --git a/c++/src/objtools/blast/seqdb_reader/seqdb.cpp b/c++/src/objtools/blast/seqdb_reader/seqdb.cpp index dec19758..82a345c7 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdb.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdb.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdb.cpp 605340 2020-04-09 16:06:43Z ivanov $ +/* $Id: seqdb.cpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -254,12 +254,12 @@ void CSeqDB::AccessionsToOids(const vector& accs, vector& m_Impl->AccessionsToOids(accs, oids); } -void CSeqDB::TaxIdsToOids(set& tax_ids, vector& rv) const +void CSeqDB::TaxIdsToOids(set& tax_ids, vector& rv) const { m_Impl->TaxIdsToOids(tax_ids, rv); } -void CSeqDB::GetDBTaxIds(set & tax_ids) const +void CSeqDB::GetDBTaxIds(set & tax_ids) const { m_Impl->GetDBTaxIds(tax_ids); } @@ -434,11 +434,11 @@ CSeqDB::ESeqType CSeqDB::GetSequenceType() const } void CSeqDB::GetTaxIDs(int oid, - map & gi_to_taxid, + map & gi_to_taxid, bool persist) const { ////m_Impl->Verify(); - typedef map TmpMap; + typedef map TmpMap; TmpMap gi_to_taxid_tmp; m_Impl->GetTaxIDs(oid, gi_to_taxid_tmp, persist); if ( !persist ) { @@ -451,7 +451,7 @@ void CSeqDB::GetTaxIDs(int oid, } void CSeqDB::GetTaxIDs(int oid, - vector & taxids, + vector & taxids, bool persist) const { ////m_Impl->Verify(); @@ -460,19 +460,19 @@ void CSeqDB::GetTaxIDs(int oid, } void CSeqDB::GetAllTaxIDs(int oid, - set & taxids) const + set & taxids) const { m_Impl->GetAllTaxIDs(oid, taxids); } void CSeqDB::GetLeafTaxIDs( int oid, - map >& gi_to_taxid_set, + map >& gi_to_taxid_set, bool persist ) const { ////m_Impl->Verify(); - typedef map > TmpMap; + typedef map > TmpMap; TmpMap gi_to_taxid_set_tmp; m_Impl->GetLeafTaxIDs(oid, gi_to_taxid_set_tmp, persist); if ( !persist ) { @@ -486,7 +486,7 @@ void CSeqDB::GetLeafTaxIDs( void CSeqDB::GetLeafTaxIDs( int oid, - vector& taxids, + vector& taxids, bool persist ) const { @@ -1100,7 +1100,7 @@ void CSeqDB::GetAliasFileValues(TAliasFileValues & afv) ////m_Impl->Verify(); } -void CSeqDB::GetTaxInfo(int taxid, SSeqDBTaxInfo & info) +void CSeqDB::GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info) { CSeqDBImpl::GetTaxInfo(taxid, info); } diff --git a/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp b/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp index 0fc86119..a19f0dd7 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdb_lmdb.cpp 595902 2019-10-29 17:32:09Z fongah2 $ +/* $Id: seqdb_lmdb.cpp 616872 2020-09-22 13:14:27Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -75,22 +75,23 @@ void CBlastLMDBManager::CBlastEnv::InitDbi(lmdb::env & env, ELMDBFileType file_t } CBlastLMDBManager::CBlastEnv::CBlastEnv(const string & fname, ELMDBFileType file_type, bool read_only, Uint8 map_size) : - m_Filename(fname), m_FileType(file_type),m_Env(lmdb::env::create()), m_Count(1), m_ReadOnly(read_only), m_MapSize(map_size) + m_Filename(fname), m_FileType(file_type),m_Env(lmdb::env::create()), m_Count(1), m_ReadOnly(read_only) { const MDB_dbi num_db(3); m_Env.set_max_dbs(num_db); m_dbis.resize(eDbiMax, UINT_MAX); if(m_ReadOnly) { CFile tf(fname); - m_MapSize = (tf.GetLength()/10000 + 1) *10000; - m_Env.set_mapsize(m_MapSize); + Uint8 readMapSize = (tf.GetLength()/10000 + 1) *10000; + m_Env.set_mapsize(readMapSize); m_Env.open(m_Filename.c_str(), MDB_NOSUBDIR|MDB_NOLOCK|MDB_RDONLY, 0664); InitDbi(m_Env,file_type); } else { + LOG_POST(Info <<"Initial Map Size: " << map_size); /// map_size 0 means use lmdb default - if(m_MapSize != 0) { - m_Env.set_mapsize(m_MapSize); + if(map_size != 0) { + m_Env.set_mapsize(map_size); } m_Env.open(m_Filename.c_str(), MDB_NOSUBDIR , 0664); } @@ -130,6 +131,13 @@ MDB_dbi CBlastLMDBManager::CBlastEnv::GetDbi(EDbiType dbi_type) return m_dbis[dbi_type]; } +void CBlastLMDBManager::CBlastEnv::SetMapSize(Uint8 map_size) +{ + if(!m_ReadOnly) { + m_Env.set_mapsize(map_size); + } +} + CBlastLMDBManager & CBlastLMDBManager::GetInstance() { static CSafeStatic lmdb_manager; return lmdb_manager.Get(); @@ -142,31 +150,41 @@ lmdb::env & CBlastLMDBManager::GetReadEnvVol(const string & fname, MDB_dbi & db db_volname = p->GetDbi(CBlastEnv::eDbiVolname); return p->GetEnv(); } -lmdb::env & CBlastLMDBManager::GetReadEnvAcc(const string & fname, MDB_dbi & db_acc) +lmdb::env & CBlastLMDBManager::GetReadEnvAcc(const string & fname, MDB_dbi & db_acc, bool* opened) { - CBlastEnv* p = GetBlastEnv(fname, eLMDB); + CBlastEnv* p = GetBlastEnv(fname, eLMDB, opened); db_acc = p->GetDbi(CBlastEnv::eDbiAcc2oid); return p->GetEnv(); } -lmdb::env & CBlastLMDBManager::GetReadEnvTax(const string & fname, MDB_dbi & db_tax) +lmdb::env & CBlastLMDBManager::GetReadEnvTax(const string & fname, MDB_dbi & db_tax, bool* opened) { - CBlastEnv* p = GetBlastEnv(fname, eTaxId2Offsets); + CBlastEnv* p = GetBlastEnv(fname, eTaxId2Offsets, opened); db_tax = p->GetDbi(CBlastEnv::eDbiTaxid2offset); return p->GetEnv(); } -CBlastLMDBManager::CBlastEnv* CBlastLMDBManager::GetBlastEnv(const string & fname, ELMDBFileType file_type) +CBlastLMDBManager::CBlastEnv* CBlastLMDBManager::GetBlastEnv(const string & fname, + ELMDBFileType file_type, + bool* opened) { CFastMutexGuard guard(m_Mutex); NON_CONST_ITERATE(list , itr, m_EnvList) { if((*itr)->GetFilename() == fname) { (*itr)->AddReference(); + if ( opened && !*opened ) { + (*itr)->AddReference(); + *opened = true; + } return (*itr); } } CBlastEnv * p (new CBlastEnv(fname, file_type)); m_EnvList.push_back(p); + if ( opened && !*opened ) { + p->AddReference(); + *opened = true; + } return p; } @@ -212,9 +230,17 @@ CSeqDBLMDB::CSeqDBLMDB(const string & fname) m_Oid2SeqIdsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eOid2SeqIds)), m_Oid2TaxIdsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eOid2TaxIds)), m_TaxId2OidsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eTaxId2Oids)), - m_TaxId2OffsetsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eTaxId2Offsets)) + m_TaxId2OffsetsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eTaxId2Offsets)), + m_LMDBFileOpened(false) +{ +} +CSeqDBLMDB::~CSeqDBLMDB() { + if ( m_LMDBFileOpened ) { + CBlastLMDBManager::GetInstance().CloseEnv(m_LMDBFile); + m_LMDBFileOpened = false; + } } void @@ -224,7 +250,7 @@ CSeqDBLMDB::GetOid(const string & accession, vector & oids, const oids.clear(); { MDB_dbi dbi_handle; - lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle); + lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle, &m_LMDBFileOpened); lmdb::dbi dbi(dbi_handle); auto txn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); auto cursor = lmdb::cursor::open(txn, dbi); @@ -319,7 +345,7 @@ CSeqDBLMDB::GetOids(const vector& accessions, vector& oid oids.resize(accessions.size(), kSeqDBEntryNotFound); MDB_dbi dbi_handle; - lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle); + lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle, &m_LMDBFileOpened); { lmdb::dbi dbi(dbi_handle); auto txn = lmdb::txn::begin(env, nullptr, MDB_RDONLY); @@ -510,7 +536,7 @@ CSeqDBLMDB::NegativeSeqIdsToOids(const vector& ids, vector & tax_ids) const +void CSeqDBLMDB::GetDBTaxIds(vector & tax_ids) const { tax_ids.clear(); @@ -523,7 +549,7 @@ void CSeqDBLMDB::GetDBTaxIds(vector & tax_ids) const auto cursor = lmdb::cursor::open(txn, dbi); lmdb::val key; while (cursor.get(key, MDB_NEXT)) { - Int4 taxid = *((Int4 *)key.data()); + TTaxId taxid = TAX_ID_FROM(Int4, *((Int4 *)key.data())); tax_ids.push_back(taxid); } cursor.close(); @@ -540,9 +566,10 @@ void CSeqDBLMDB::GetDBTaxIds(vector & tax_ids) const NCBI_THROW( CSeqDBException, eArgErr, "Taxonomy Id to Oids lookup error in " + dbname); } } + CBlastLMDBManager::GetInstance().CloseEnv(m_TaxId2OffsetsFile); } -void CSeqDBLMDB::GetOidsForTaxIds(const set & tax_ids, vector& oids, vector & tax_ids_found) const +void CSeqDBLMDB::GetOidsForTaxIds(const set & tax_ids, vector& oids, vector & tax_ids_found) const { try { @@ -555,8 +582,8 @@ void CSeqDBLMDB::GetOidsForTaxIds(const set & tax_ids, vector, itr, tax_ids) { - Int4 tax_id = *itr; + ITERATE(set, itr, tax_ids) { + Int4 tax_id = TAX_ID_TO(Int4, *itr); lmdb::val data2find(tax_id); if (cursor.get(data2find, MDB_SET)) { @@ -626,14 +653,14 @@ public: m_DataStart += (2* (num_of_oids + 1)); } - inline void GetTaxIdListForOid(blastdb::TOid oid, vector & taxid_list); + inline void GetTaxIdListForOid(blastdb::TOid oid, vector & taxid_list); private: Uint8 * m_IndexStart; Int4 * m_DataStart; }; -void CLookupTaxIds::GetTaxIdListForOid(blastdb::TOid oid, vector & taxid_list) +void CLookupTaxIds::GetTaxIdListForOid(blastdb::TOid oid, vector & taxid_list) { taxid_list.clear(); Uint8 * index_ptr = m_IndexStart + oid; @@ -641,23 +668,23 @@ void CLookupTaxIds::GetTaxIdListForOid(blastdb::TOid oid, vector & taxid_l index_ptr--; Int4 * begin = (oid == 0) ? m_DataStart:m_DataStart + (*index_ptr); while (begin < end) { - taxid_list.push_back(*begin); + taxid_list.push_back(TAX_ID_FROM(Int4, *begin)); begin++; } } void -CSeqDBLMDB::NegativeTaxIdsToOids(const set& tax_ids, vector& rv, vector & tax_ids_found) const +CSeqDBLMDB::NegativeTaxIdsToOids(const set& tax_ids, vector& rv, vector & tax_ids_found) const { rv.clear(); vector oids; GetOidsForTaxIds(tax_ids, oids, tax_ids_found); CMemoryFile oid_file(m_Oid2TaxIdsFile); - set tax_id_list(tax_ids.begin(), tax_ids.end()); + set tax_id_list(tax_ids.begin(), tax_ids.end()); CLookupTaxIds lookup(oid_file); for(unsigned int i=0; i < oids.size(); i++) { - vector file_list; + vector file_list; lookup.GetTaxIdListForOid(oids[i], file_list); if(file_list.size() > tax_ids.size()) { continue; @@ -676,12 +703,12 @@ CSeqDBLMDB::NegativeTaxIdsToOids(const set& tax_ids, vector } } -void CSeqDBLMDB::GetTaxIdsForOids(const vector & oids, set & tax_ids) const +void CSeqDBLMDB::GetTaxIdsForOids(const vector & oids, set & tax_ids) const { CMemoryFile oid_file(m_Oid2TaxIdsFile); CLookupTaxIds lookup(oid_file); for(unsigned int i=0; i < oids.size(); i++) { - vector taxid_list; + vector taxid_list; lookup.GetTaxIdListForOid(oids[i], taxid_list); tax_ids.insert(taxid_list.begin(), taxid_list.end()); } diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp index eb34f9a8..072277fa 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbcommon.cpp 605336 2020-04-09 16:04:52Z ivanov $ +/* $Id: seqdbcommon.cpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1102,7 +1102,7 @@ void SeqDB_ReadMemoryTaxIdList(const char * fbeginp, } for(Int4 * elem = (bbeginp + 2); elem < bendp; ++elem) { - taxids.tax_ids.insert(SeqDB_GetStdOrd(elem)); + taxids.tax_ids.insert(TAX_ID_FROM(Int4, SeqDB_GetStdOrd(elem))); } } else { Int4 elem(0); @@ -1113,7 +1113,7 @@ void SeqDB_ReadMemoryTaxIdList(const char * fbeginp, if (dig == -1) { // Skip blank lines or comments by ignoring zero. if (elem != 0) { - taxids.tax_ids.insert(elem); + taxids.tax_ids.insert(TAX_ID_FROM(Int4, elem)); } elem = 0; continue; diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp index e0097aa2..38fcf76e 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbgilistset.cpp 597735 2019-11-26 17:53:47Z fongah2 $ +/* $Id: seqdbgilistset.cpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -254,7 +254,7 @@ CSeqDBGiListSet::x_ResolvePositiveList(CSeqDBAtlas & atlas, } if(user_list->GetNumTaxIds() > 0) { vector & oids = user_list->SetOidsForTaxIdsList(); - set & tax_ids = user_list->GetTaxIdsList(); + set & tax_ids = user_list->GetTaxIdsList(); lmdb_set.TaxIdsToOids(tax_ids, oids); } if((user_list->GetNumGis() == 0) && (user_list->GetNumTis() == 0) && @@ -332,7 +332,7 @@ CSeqDBGiListSet::x_ResolveNegativeList(CSeqDBAtlas & atlas, } if(m_NegativeList->GetNumTaxIds() > 0) { vector & oids = m_NegativeList->SetExcludedOids(); - set & tax_ids = m_NegativeList->GetTaxIdsList(); + set & tax_ids = m_NegativeList->GetTaxIdsList(); lmdb_set.NegativeTaxIdsToOids(tax_ids, oids); } diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp index 65741950..0c6b94dd 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbimpl.cpp 607218 2020-04-30 18:42:35Z ivanov $ +/* $Id: seqdbimpl.cpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -438,7 +438,7 @@ int CSeqDBImpl::GetSeqLengthApprox(int oid) const } void CSeqDBImpl::GetTaxIDs(int oid, - map & gi_to_taxid, + map & gi_to_taxid, bool persist) { CSeqDBLockHold locked(m_Atlas); @@ -474,7 +474,7 @@ void CSeqDBImpl::GetTaxIDs(int oid, } void CSeqDBImpl::GetTaxIDs(int oid, - vector & taxids, + vector & taxids, bool persist) { CSeqDBLockHold locked(m_Atlas); @@ -499,7 +499,7 @@ void CSeqDBImpl::GetTaxIDs(int oid, } void CSeqDBImpl::GetAllTaxIDs(int oid, - set & taxids) + set & taxids) { CSeqDBLockHold locked(m_Atlas); @@ -516,7 +516,7 @@ void CSeqDBImpl::GetAllTaxIDs(int oid, void CSeqDBImpl::GetLeafTaxIDs( int oid, - map >& gi_to_taxid_set, + map >& gi_to_taxid_set, bool persist ) { @@ -553,7 +553,7 @@ void CSeqDBImpl::GetLeafTaxIDs( void CSeqDBImpl::GetLeafTaxIDs( int oid, - vector& taxids, + vector& taxids, bool persist ) { @@ -1317,7 +1317,7 @@ void CSeqDBImpl::AccessionToOids(const string & acc, } -void CSeqDBImpl::TaxIdsToOids(set& tax_ids, vector& rv) +void CSeqDBImpl::TaxIdsToOids(set& tax_ids, vector& rv) { CHECK_MARKER(); rv.clear(); @@ -1339,7 +1339,7 @@ void CSeqDBImpl::TaxIdsToOids(set& tax_ids, vector& rv) return; } -void CSeqDBImpl::GetDBTaxIds(set & tax_ids) +void CSeqDBImpl::GetDBTaxIds(set & tax_ids) { CHECK_MARKER(); CSeqDBLockHold locked(m_Atlas); @@ -1629,7 +1629,7 @@ void CSeqDBImpl::x_ScanTotals(bool approx, } } -void CSeqDBImpl::GetTaxInfo(int taxid, SSeqDBTaxInfo & info) +void CSeqDBImpl::GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info) { if (! CSeqDBTaxInfo::GetTaxNames(taxid, info)) { CNcbiOstrstream oss; diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp index 9f9400dc..285f412d 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_READERS_SEQDB__SEQDBIMPL_HPP #define OBJTOOLS_READERS_SEQDB__SEQDBIMPL_HPP -/* $Id: seqdbimpl.hpp 605340 2020-04-09 16:06:43Z ivanov $ +/* $Id: seqdbimpl.hpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -244,7 +244,7 @@ public: /// @param persist /// If false, the map will be cleared before adding new entries. void GetTaxIDs(int oid, - map & gi_to_taxid, + map & gi_to_taxid, bool persist); /// Get taxids for an OID. @@ -264,7 +264,7 @@ public: /// @param persist /// If false, the map will be cleared before adding new entries. void GetTaxIDs(int oid, - vector & taxids, + vector & taxids, bool persist); /// Get gi to taxid map for an OID. @@ -284,13 +284,13 @@ public: /// If false, the map will be cleared before adding new entries. void GetLeafTaxIDs( int oid, - map >& gi_to_taxid_set, + map >& gi_to_taxid_set, bool persist ); /// Get all tax ids (leaf and non-leaf for an oid void GetAllTaxIDs(int oid, - set & taxids); + set & taxids); /// Get gi to taxid map for an OID. /// @@ -310,7 +310,7 @@ public: /// If false, the map will be cleared before adding new entries. void GetLeafTaxIDs( int oid, - vector& gi_to_taxid_set, + vector& gi_to_taxid_set, bool persist ); @@ -727,7 +727,7 @@ public: /// An integer identifying the taxid to fetch. /// @param info /// A structure containing taxonomic description strings. - static void GetTaxInfo(int taxid, SSeqDBTaxInfo & info); + static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info); /// Returns the sum of the sequence lengths. /// @@ -1066,11 +1066,11 @@ public: /// Get Oid list for input tax ids /// @param tax_ids taxonomy ids /// @param rv oids corrpond to tax ids - void TaxIdsToOids(set& tax_ids, vector& rv); + void TaxIdsToOids(set& tax_ids, vector& rv); /// Get all unique tax ids from db /// @param tax_ids return taxonomy ids in db - void GetDBTaxIds(set & tax_ids); + void GetDBTaxIds(set & tax_ids); private: CLASS_MARKER_FIELD("IMPL") diff --git a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp index aeb7c5e3..e2436ffb 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp @@ -161,24 +161,24 @@ void CSeqDBLMDBEntry::NegativeSeqIdsToOids(const vector& ids, vector& tax_ids, vector& rv, vector & tax_ids_found) const +void CSeqDBLMDBEntry::TaxIdsToOids(const set& tax_ids, vector& rv, vector & tax_ids_found) const { m_LMDB->GetOidsForTaxIds(tax_ids, rv, tax_ids_found); x_AdjustOidsOffset_TaxList(rv); } -void CSeqDBLMDBEntry::NegativeTaxIdsToOids(const set& tax_ids, vector& rv, vector & tax_ids_found) const +void CSeqDBLMDBEntry::NegativeTaxIdsToOids(const set& tax_ids, vector& rv, vector & tax_ids_found) const { m_LMDB->NegativeTaxIdsToOids(tax_ids, rv, tax_ids_found); x_AdjustOidsOffset_TaxList(rv); } -void CSeqDBLMDBEntry::GetDBTaxIds(vector & tax_ids) const +void CSeqDBLMDBEntry::GetDBTaxIds(vector & tax_ids) const { m_LMDB->GetDBTaxIds(tax_ids); } -void CSeqDBLMDBEntry::GetTaxIdsForOids(const vector & oids, set & tax_ids) const +void CSeqDBLMDBEntry::GetTaxIdsForOids(const vector & oids, set & tax_ids) const { if(m_isPartial) { vector tmp; @@ -311,10 +311,10 @@ void CSeqDBLMDBSet::NegativeSeqIdsToOids(const vector& ids, vector& tax_ids, vector& rv) const +void CSeqDBLMDBSet::TaxIdsToOids(set& tax_ids, vector& rv) const { - vector tax_ids_found; - set rv_tax_ids; + vector tax_ids_found; + set rv_tax_ids; m_LMDBEntrySet[0]->TaxIdsToOids(tax_ids, rv, tax_ids_found); rv_tax_ids.insert(tax_ids_found.begin(), tax_ids_found.end()); for(unsigned int i=1; i < m_LMDBEntrySet.size(); i++) { @@ -331,10 +331,10 @@ void CSeqDBLMDBSet::TaxIdsToOids(set& tax_ids, vector& rv) tax_ids.swap(rv_tax_ids); } -void CSeqDBLMDBSet::NegativeTaxIdsToOids(set& tax_ids, vector& rv) const +void CSeqDBLMDBSet::NegativeTaxIdsToOids(set& tax_ids, vector& rv) const { - vector tax_ids_found; - set rv_tax_ids; + vector tax_ids_found; + set rv_tax_ids; m_LMDBEntrySet[0]->NegativeTaxIdsToOids(tax_ids, rv, tax_ids_found); rv_tax_ids.insert(tax_ids_found.begin(), tax_ids_found.end()); for(unsigned int i=1; i < m_LMDBEntrySet.size(); i++) { @@ -352,9 +352,9 @@ void CSeqDBLMDBSet::NegativeTaxIdsToOids(set& tax_ids, vector & tax_ids) const +void CSeqDBLMDBSet::GetDBTaxIds(set & tax_ids) const { - vector t; + vector t; m_LMDBEntrySet[0]->GetDBTaxIds(t); tax_ids.insert(t.begin(), t.end()); for(unsigned int i=1; i < m_LMDBEntrySet.size(); i++) { @@ -365,7 +365,7 @@ void CSeqDBLMDBSet::GetDBTaxIds(set & tax_ids) const } -void CSeqDBLMDBSet::GetTaxIdsForOids(const vector & oids, set & tax_ids) const +void CSeqDBLMDBSet::GetTaxIdsForOids(const vector & oids, set & tax_ids) const { if (m_LMDBEntrySet.size() > 1) { vector t; @@ -373,7 +373,7 @@ void CSeqDBLMDBSet::GetTaxIdsForOids(const vector & oids, set= m_LMDBEntrySet[j]->GetOIDEnd()){ if (t.size() > 0){ - set t_set; + set t_set; m_LMDBEntrySet[j]->GetTaxIdsForOids(t, t_set); t.clear(); tax_ids.insert(t_set.begin(), t_set.end()); @@ -383,7 +383,7 @@ void CSeqDBLMDBSet::GetTaxIdsForOids(const vector & oids, setGetOIDStart()); } if (t.size() > 0){ - set t_set; + set t_set; m_LMDBEntrySet[j]->GetTaxIdsForOids(t, t_set); tax_ids.insert(t_set.begin(), t_set.end()); } diff --git a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp index 0eb441bd..8477af83 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp @@ -100,13 +100,13 @@ public: void NegativeSeqIdsToOids(const vector& ids, vector& rv) const; - void TaxIdsToOids(const set& tax_ids, vector& rv, vector & tax_ids_found) const; + void TaxIdsToOids(const set& tax_ids, vector& rv, vector & tax_ids_found) const; - void NegativeTaxIdsToOids(const set& tax_ids, vector& rv, vector & tax_ids_found) const; + void NegativeTaxIdsToOids(const set& tax_ids, vector& rv, vector & tax_ids_found) const; - void GetDBTaxIds(vector & tax_ids) const; + void GetDBTaxIds(vector & tax_ids) const; - void GetTaxIdsForOids(const vector & oids, set & tax_ids) const; + void GetTaxIdsForOids(const vector & oids, set & tax_ids) const; private: void x_AdjustOidsOffset(vector & oids) const; @@ -171,13 +171,13 @@ public: void NegativeSeqIdsToOids(const vector& ids, vector& rv) const; - void TaxIdsToOids(set& tax_ids, vector& rv) const; + void TaxIdsToOids(set& tax_ids, vector& rv) const; - void NegativeTaxIdsToOids(set& tax_ids, vector& rv) const; + void NegativeTaxIdsToOids(set& tax_ids, vector& rv) const; - void GetDBTaxIds(set & tax_ids) const; + void GetDBTaxIds(set & tax_ids) const; - void GetTaxIdsForOids(const vector & oids, set & tax_ids) const; + void GetTaxIdsForOids(const vector & oids, set & tax_ids) const; private: vector > m_LMDBEntrySet; diff --git a/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp b/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp index de3d0f57..fbfb13bc 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdboidlist.cpp 579001 2019-01-29 13:54:57Z fongah2 $ +/* $Id: seqdboidlist.cpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -641,11 +641,11 @@ void s_ProcessTaxIdFilters(const vector & fnames, return; } - set user_taxids; + set user_taxids; if(!user_list.Empty() && (user_list->GetNumTaxIds() > 0)) { user_taxids = user_list->GetTaxIdsList(); } - set neg_user_taxids; + set neg_user_taxids; if(!neg_user_list.Empty() && (neg_user_list->GetNumTaxIds() > 0)) { neg_user_taxids = neg_user_list->GetTaxIdsList(); } @@ -655,15 +655,15 @@ void s_ProcessTaxIdFilters(const vector & fnames, vector oids; CRef list(new CSeqDBFileGiList(fnames[k], CSeqDBFileGiList::eTaxIdList)); s_GetFilteredOidRange(volset, fnames_vols[k], excluded_vols, list); - set taxids; + set taxids; taxids = list->GetTaxIdsList(); if(taxids.size() == 0){ continue; } if(user_taxids.size() > 0){ - vector common; + vector common; common.resize(taxids.size()); - vector::iterator itr = set_intersection(taxids.begin(), taxids.end(), + vector::iterator itr = set_intersection(taxids.begin(), taxids.end(), user_taxids.begin(), user_taxids.end(), common.begin()); common.resize(itr-common.begin()); if( common.size() == 0) { @@ -673,9 +673,9 @@ void s_ProcessTaxIdFilters(const vector & fnames, taxids.insert(common.begin(), common.end()); } if(neg_user_taxids.size() > 0) { - vector difference; + vector difference; difference.resize(taxids.size()); - vector::iterator itr = set_difference(taxids.begin(), taxids.end(), + vector::iterator itr = set_difference(taxids.begin(), taxids.end(), neg_user_taxids.begin(), neg_user_taxids.end(), difference.begin()); difference.resize(itr-difference.begin()); if(difference.size() == 0){ diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbtax.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbtax.cpp index ada5e6c1..1ee99291 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbtax.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbtax.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbtax.cpp 530943 2017-03-20 12:53:37Z fongah2 $ +/* $Id: seqdbtax.cpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -61,9 +61,9 @@ public: } /// Return the taxonomic identifier field (in host order) - Int4 GetTaxId()const + TTaxId GetTaxId()const { - return SeqDB_GetStdOrd(& m_Taxid); + return TAX_ID_FROM(Int4, SeqDB_GetStdOrd(& m_Taxid)); } /// Return the offset field (in host order) @@ -216,7 +216,7 @@ CTaxDBFileInfo::~CTaxDBFileInfo() } -bool CSeqDBTaxInfo::GetTaxNames(Int4 tax_id, +bool CSeqDBTaxInfo::GetTaxNames(TTaxId tax_id, SSeqDBTaxInfo & info ) { static CTaxDBFileInfo t; @@ -227,8 +227,8 @@ bool CSeqDBTaxInfo::GetTaxNames(Int4 tax_id, const char * Data = t.GetDataPtr(); const CSeqDBTaxId* Index = t.GetIndexPtr(); - Int4 low_taxid = Index[low_index ].GetTaxId(); - Int4 high_taxid = Index[high_index].GetTaxId(); + TTaxId low_taxid = Index[low_index ].GetTaxId(); + TTaxId high_taxid = Index[high_index].GetTaxId(); if((tax_id < low_taxid) || (tax_id > high_taxid)) return false; @@ -237,7 +237,7 @@ bool CSeqDBTaxInfo::GetTaxNames(Int4 tax_id, Int4 old_index = new_index; while(1) { - Int4 curr_taxid = Index[new_index].GetTaxId(); + TTaxId curr_taxid = Index[new_index].GetTaxId(); if (tax_id < curr_taxid) { high_index = new_index; diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp index d30a4269..7d58c7d3 100644 --- a/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp +++ b/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp @@ -1,4 +1,4 @@ -/* $Id: seqdbvol.cpp 607218 2020-04-30 18:42:35Z ivanov $ +/* $Id: seqdbvol.cpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1082,18 +1082,18 @@ CSeqDBVol::x_GetTaxonomy(int oid, //m_Atlas.Lock(locked); for(TBDLLConstIter iter = dl.begin(); iter != dl.end(); iter ++) { - int taxid = 0; + TTaxId taxid = ZERO_TAX_ID; if ((*iter)->CanGetTaxid()) { taxid = (*iter)->GetTaxid(); } - if (taxid <= 0) { + if (taxid <= ZERO_TAX_ID) { continue; } bool have_org_desc = false; - if (use_taxinfo_cache && m_TaxCache.Lookup(taxid).NotEmpty()) { + if (use_taxinfo_cache && m_TaxCache.Lookup(TAX_ID_TO(int, taxid)).NotEmpty()) { have_org_desc = true; } @@ -1110,11 +1110,11 @@ CSeqDBVol::x_GetTaxonomy(int oid, if (provide_new_taxonomy_info) { if (have_org_desc) { - taxonomy.push_back(m_TaxCache.Lookup(taxid)); + taxonomy.push_back(m_TaxCache.Lookup(TAX_ID_TO(int, taxid))); } else { CRef org_tag(new CDbtag); org_tag->SetDb(TAX_ORGREF_DB_NAME); - org_tag->SetTag().SetId(taxid); + org_tag->SetTag().SetId(TAX_ID_TO(int, taxid)); CRef org(new COrg_ref); if (found_taxid_in_taxonomy_blastdb) { @@ -1133,7 +1133,7 @@ CSeqDBVol::x_GetTaxonomy(int oid, taxonomy.push_back(desc); if (use_taxinfo_cache) { - m_TaxCache.Lookup(taxid) = desc; + m_TaxCache.Lookup(TAX_ID_TO(int, taxid)) = desc; } } } @@ -1845,7 +1845,7 @@ CSeqDBVol::GetFilteredHeader(int oid, return x_GetFilteredHeader(oid, NULL); } -bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set & user_tax_ids) +bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set & user_tax_ids) { CBlast_def_line::TTaxIds tax_ids; if (def.IsSetTaxid()) { @@ -1853,8 +1853,12 @@ bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set & user_t } if(def.IsSetLinks()) { CBlast_def_line::TLinks leaf_ids = def.GetLinks(); - tax_ids.insert(leaf_ids.begin(), leaf_ids.end()); - } +#ifdef NCBI_STRICT_TAX_ID + ITERATE(CBlast_def_line::TLinks, it, leaf_ids) tax_ids.insert(TAX_ID_FROM(int, *it)); +#else + tax_ids.insert(leaf_ids.begin(), leaf_ids.end()); +#endif + } if(user_tax_ids.size() > tax_ids.size()) { ITERATE(CBlast_def_line::TTaxIds, itr, tax_ids) { @@ -1865,7 +1869,7 @@ bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set & user_t } else { - ITERATE(set, itr, user_tax_ids) { + ITERATE(set, itr, user_tax_ids) { if(tax_ids.find(*itr) != tax_ids.end()) { return true; } @@ -1874,7 +1878,7 @@ bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set & user_t return false; } -bool s_IncludeDefline_NegativeTaxid(const CBlast_def_line & def, const set & user_tax_ids) +bool s_IncludeDefline_NegativeTaxid(const CBlast_def_line & def, const set & user_tax_ids) { CBlast_def_line::TTaxIds taxid_set = def.GetTaxIds(); if(taxid_set.size() > user_tax_ids.size()) { diff --git a/c++/src/objtools/blast/seqdb_writer/build-alias-index b/c++/src/objtools/blast/seqdb_writer/build-alias-index index 0e93a356..ad308915 100755 --- a/c++/src/objtools/blast/seqdb_writer/build-alias-index +++ b/c++/src/objtools/blast/seqdb_writer/build-alias-index @@ -3,7 +3,7 @@ # subdirectory # Author: Kevin Bealer # Original date: 10/21/2005 -# $URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.10.1/c++/src/objtools/blast/seqdb_writer/build-alias-index $ +# $URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.11.0/c++/src/objtools/blast/seqdb_writer/build-alias-index $ INDEX_NAME=index.alx OUTNAME=index.alx.new diff --git a/c++/src/objtools/blast/seqdb_writer/taxid_set.cpp b/c++/src/objtools/blast/seqdb_writer/taxid_set.cpp index cab43334..c629bbde 100644 --- a/c++/src/objtools/blast/seqdb_writer/taxid_set.cpp +++ b/c++/src/objtools/blast/seqdb_writer/taxid_set.cpp @@ -1,4 +1,4 @@ -/* $Id: taxid_set.cpp 548810 2017-10-18 13:38:41Z ivanov $ +/* $Id: taxid_set.cpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -40,6 +40,8 @@ BEGIN_NCBI_SCOPE USING_SCOPE(objects); #endif +const TTaxId CTaxIdSet::kTaxIdNotSet = ZERO_TAX_ID; + void CTaxIdSet::SetMappingFromFile(CNcbiIstream & f) { while(f && (! f.eof())) { @@ -62,7 +64,7 @@ void CTaxIdSet::SetMappingFromFile(CNcbiIstream & f) } if (gi_str.size() && tx_str.size()) { - int taxid = NStr::StringToInt(tx_str, NStr::fAllowLeadingSpaces); + TTaxId taxid = NStr::StringToNumeric(tx_str, NStr::fAllowLeadingSpaces); string key = AccessionToKey(gi_str); m_TaxIdMap[key] = taxid; @@ -71,9 +73,9 @@ void CTaxIdSet::SetMappingFromFile(CNcbiIstream & f) m_Matched = (m_GlobalTaxId != kTaxIdNotSet) || m_TaxIdMap.empty(); } -int CTaxIdSet::x_SelectBestTaxid(const objects::CBlast_def_line & defline) +TTaxId CTaxIdSet::x_SelectBestTaxid(const objects::CBlast_def_line & defline) { - int retval = m_GlobalTaxId; + TTaxId retval = m_GlobalTaxId; if (retval != kTaxIdNotSet) { return retval; @@ -87,7 +89,7 @@ int CTaxIdSet::x_SelectBestTaxid(const objects::CBlast_def_line & defline) if (key->empty()) continue; - map::const_iterator item = m_TaxIdMap.find(*key); + map::const_iterator item = m_TaxIdMap.find(*key); if (item != m_TaxIdMap.end()) { retval = item->second; diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_lmdb_unit_test.cpp b/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_lmdb_unit_test.cpp index b2d166ba..fdc399a4 100644 --- a/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_lmdb_unit_test.cpp +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_lmdb_unit_test.cpp @@ -72,9 +72,9 @@ BOOST_AUTO_TEST_CASE(CreateLMDBFile) test_db.InsertVolumesInfo(vol_names, vol_num_oids); CWriteDB_TaxID taxdb(tax_lmdb,100000); - const int taxids[5] = { 9606, 562, 0, 2, 10239 }; + const TTaxId taxids[5] = { TAX_ID_CONST(9606), TAX_ID_CONST(562), TAX_ID_CONST(0), TAX_ID_CONST(2), TAX_ID_CONST(10239) }; for (int i=0; i < source_db.GetNumOIDs(); i++) { - set t; + set t; for(int j=0; j < (i % 5 + 1); j++) { t.insert(taxids[j]); } @@ -122,9 +122,9 @@ BOOST_AUTO_TEST_CASE(CreateLMDBFile) /* Test Tax Ids */ vector tax_oids; - set tax_ids; - tax_ids.insert(10239); - vector rv_tax_ids; + set tax_ids; + tax_ids.insert(TAX_ID_CONST(10239)); + vector rv_tax_ids; test_db.GetOidsForTaxIds(tax_ids, tax_oids, rv_tax_ids); for(unsigned int i=0; i < tax_ids.size(); i++) { BOOST_REQUIRE_EQUAL(tax_oids[i] % 5, 4); @@ -146,6 +146,103 @@ BOOST_AUTO_TEST_CASE(CreateLMDBFile) } +BOOST_AUTO_TEST_CASE(TestLMDBMapSize) +{ + const string base_name = "tmp_lmdb"; + DeleteLMDBFiles(true, base_name); + const string lmdb_name = BuildLMDBFileName(base_name, true); + const string tax_lmdb = GetFileNameFromExistingLMDBFile(lmdb_name, ELMDBFileType::eTaxId2Offsets); + const int kNumVols = 4; + CSeqDB source_db("data/writedb_prot",CSeqDB::eProtein); + vector vol_names; + vector vol_num_oids; + for(unsigned int k=0; k < kNumVols; k++) { + vol_names.push_back("tmp_lmdb" + NStr::IntToString(k)); + vol_num_oids.push_back(k*1234); + } + + { + CWriteDB_LMDB test_db(lmdb_name, 10); + for (int i=0; i < source_db.GetNumOIDs(); i++) { + list< CRef > ids = source_db.GetSeqIDs(i); + test_db.InsertEntries(ids, i); + } + test_db.InsertVolumesInfo(vol_names, vol_num_oids); + + CWriteDB_TaxID taxdb(tax_lmdb,10); + const TTaxId taxids[5] = { TAX_ID_CONST(9606), TAX_ID_CONST(562), TAX_ID_CONST(0), TAX_ID_CONST(2), TAX_ID_CONST(10239) }; + for (int i=0; i < source_db.GetNumOIDs(); i++) { + set t; + for(int j=0; j < (i % 5 + 1); j++) { + t.insert(taxids[j]); + } + taxdb.InsertEntries(t, i); + } + } + + { + vector test_neg_accs; + CSeqDBLMDB test_db(lmdb_name); + + /* Test GetOids from Seq IDs */ + for(int i=0; i < source_db.GetNumOIDs(); i++) { + vector test_accs; + vector test_oids; + list< CRef > ids = source_db.GetSeqIDs(i); + CRef n_id = FindBestChoice(ids, CSeq_id::WorstRank); + test_neg_accs.push_back(n_id->GetSeqIdString(false)); + ITERATE(list< CRef >, itr, ids) { + if((*itr)->IsGi()) { + continue; + } + test_accs.push_back((*itr)->GetSeqIdString(true)); + test_accs.push_back((*itr)->GetSeqIdString(false)); + } + test_db.GetOids(test_accs, test_oids); + for(unsigned int j=0; j < test_accs.size(); j++) { + BOOST_REQUIRE_EQUAL(test_oids[j], i); + } + } + + /* Test Negative Seq IDs to OIDs */ + vector neg_oids; + test_db.NegativeSeqIdsToOids(test_neg_accs, neg_oids); + BOOST_REQUIRE_EQUAL(neg_oids.size(), 65); + + /* Test Vol Info */ + vector test_vol_names; + vector test_vol_num_oids; + test_db.GetVolumesInfo(test_vol_names, test_vol_num_oids); + for(unsigned int k=0; k < kNumVols; k++) { + BOOST_REQUIRE_EQUAL(test_vol_num_oids[k], vol_num_oids[k]); + BOOST_REQUIRE_EQUAL(test_vol_names[k], vol_names[k]); + } + + /* Test Tax Ids */ + vector tax_oids; + set tax_ids; + tax_ids.insert(TAX_ID_CONST(10239)); + vector rv_tax_ids; + test_db.GetOidsForTaxIds(tax_ids, tax_oids, rv_tax_ids); + for(unsigned int i=0; i < tax_ids.size(); i++) { + BOOST_REQUIRE_EQUAL(tax_oids[i] % 5, 4); + } + + test_db.NegativeTaxIdsToOids(tax_ids, tax_oids, rv_tax_ids); + BOOST_REQUIRE_EQUAL(tax_oids.size(), 0); + + tax_ids.clear(); + tax_ids.insert(9606); + tax_ids.insert(562); + test_db.NegativeTaxIdsToOids(tax_ids, tax_oids, rv_tax_ids); + for(unsigned int i=0; i < rv_tax_ids.size(); i++) { + BOOST_REQUIRE((tax_oids[i] % 5 < 2)); + } + + } + DeleteLMDBFiles(true, base_name); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp b/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp index 74e659bb..898a6a09 100644 --- a/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp +++ b/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp @@ -1,4 +1,4 @@ -/* $Id: writedb_unit_test.cpp 588813 2019-07-01 12:29:54Z fongah2 $ +/* $Id: writedb_unit_test.cpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2537,7 +2537,7 @@ BOOST_AUTO_TEST_CASE(CBuildDatabase_WriteToInvalidPathUnix) BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomy) { - const int kTaxId(9986); + const TTaxId kTaxId = TAX_ID_CONST(9986); CTaxIdSet tis(kTaxId); const string kDbName("foo"); CWriteDB blastdb(kDbName, CWriteDB::eNucleotide, kDbName); @@ -2563,7 +2563,7 @@ BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomy) int total=db.GetNumSeqs(); for (int oid=0; oid taxids; + vector taxids; db.GetTaxIDs(oid, taxids); BOOST_REQUIRE(taxids.size() == 1); BOOST_REQUIRE_EQUAL(kTaxId, taxids.front()); @@ -2573,7 +2573,7 @@ BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomy) BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMap) { - const int kTaxId(9986); + const TTaxId kTaxId = TAX_ID_CONST(9986); CRef tis(new CTaxIdSet()); const string kDbName("foo"); CWriteDB blastdb(kDbName, CWriteDB::eNucleotide, kDbName); @@ -2601,7 +2601,7 @@ BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMap) int total=db.GetNumSeqs(); for (int oid=0; oid taxids; + vector taxids; db.GetTaxIDs(oid, taxids); BOOST_REQUIRE(taxids.size() == 1); BOOST_REQUIRE_EQUAL(kTaxId, taxids.front()); @@ -2611,7 +2611,7 @@ BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMap) BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMapLclIds) { - const int kTaxId(382); + const TTaxId kTaxId = TAX_ID_CONST(382); CRef tis(new CTaxIdSet()); const string kDbName("foo"); CWriteDB blastdb(kDbName, CWriteDB::eProtein, kDbName); @@ -2637,7 +2637,7 @@ BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMapLclIds) int total=db.GetNumSeqs(); for (int oid=0; oid taxids; + vector taxids; db.GetTaxIDs(oid, taxids); BOOST_REQUIRE(taxids.size() == 1); BOOST_REQUIRE_EQUAL(kTaxId, taxids.front()); @@ -3577,7 +3577,7 @@ BOOST_AUTO_TEST_CASE(LimitProteinDeflines) CSeqDB readdb(dbname, CSeqDB::eProtein); for(unsigned int i=0; i < kNumOfDeflines; i++){ CRef new_set = readdb.GetHdr(i); - set t; + set t; readdb.GetAllTaxIDs(i, t); BOOST_REQUIRE_EQUAL(num_taxids[i], t.size()); BOOST_REQUIRE_EQUAL(num_deflines[i], new_set->Set().size()); diff --git a/c++/src/objtools/blast/seqdb_writer/writedb_impl.cpp b/c++/src/objtools/blast/seqdb_writer/writedb_impl.cpp index bb14e692..0c3abbe8 100644 --- a/c++/src/objtools/blast/seqdb_writer/writedb_impl.cpp +++ b/c++/src/objtools/blast/seqdb_writer/writedb_impl.cpp @@ -1,4 +1,4 @@ -/* $Id: writedb_impl.cpp 588812 2019-07-01 12:29:10Z fongah2 $ +/* $Id: writedb_impl.cpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -583,7 +583,7 @@ CWriteDB_Impl::x_BuildDeflinesFromBioseq(const CBioseq & bioseq return; } - vector taxids; + vector taxids; string titles; // Scan the CBioseq for taxids and the title string. @@ -616,7 +616,7 @@ CWriteDB_Impl::x_BuildDeflinesFromBioseq(const CBioseq & bioseq if (oi.IsId()) { //defline->SetTaxid(oi.GetId()); - taxids.push_back(oi.GetId()); + taxids.push_back(TAX_ID_FROM(CObject_id::TId, oi.GetId())); } } } @@ -752,7 +752,7 @@ CWriteDB_Impl::x_ExtractDeflines(CConstRef & bioseq, const vector< vector > & membbits, const vector< vector > & linkouts, int pig, - set & tax_ids, + set & tax_ids, int OID, bool parse_ids, bool long_ids, @@ -1553,7 +1553,7 @@ CWriteDB_Impl::ExtractBioseqDeflines(const CBioseq & bs, bool parse_ids, CConstRef deflines; string binary_header; vector< vector > v1, v2; - set t; + set t; CConstRef bsref(& bs); x_ExtractDeflines(bsref, deflines, binary_header, v2, v2, 0, t, -1, parse_ids, diff --git a/c++/src/objtools/blast/seqdb_writer/writedb_impl.hpp b/c++/src/objtools/blast/seqdb_writer/writedb_impl.hpp index f62085ac..4012c8c9 100644 --- a/c++/src/objtools/blast/seqdb_writer/writedb_impl.hpp +++ b/c++/src/objtools/blast/seqdb_writer/writedb_impl.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS_WRITERS_WRITEDB__WRITEDB_IMPL_HPP #define OBJTOOLS_WRITERS_WRITEDB__WRITEDB_IMPL_HPP -/* $Id: writedb_impl.hpp 588812 2019-07-01 12:29:10Z fongah2 $ +/* $Id: writedb_impl.hpp 616350 2020-09-15 12:19:05Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -530,7 +530,7 @@ private: const vector< vector > & membbits, const vector< vector > & linkouts, int pig, - set & tax_ids, + set & tax_ids, int OID=-1, bool parse_ids=true, bool long_seqid=false, @@ -610,7 +610,7 @@ private: /// Binary header in format that will be written to disk. string m_BinHdr; - set m_TaxIds; + set m_TaxIds; // Volumes diff --git a/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp b/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp index 12fe387c..98d2547b 100644 --- a/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp +++ b/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp @@ -51,7 +51,8 @@ BEGIN_NCBI_SCOPE CWriteDB_LMDB::CWriteDB_LMDB(const string& dbname, Uint8 map_size, Uint8 capacity): m_Db(dbname), m_Env(CBlastLMDBManager::GetInstance().GetWriteEnv(dbname, map_size)), m_ListCapacity(capacity), - m_MaxEntryPerTxn(DEFAULT_MAX_ENTRY_PER_TXN) + m_MaxEntryPerTxn(DEFAULT_MAX_ENTRY_PER_TXN), + m_TotalIdsLength(0) { m_list.reserve(m_ListCapacity); char* max_entry_str = getenv("MAX_LMDB_TXN_ENTRY"); @@ -72,6 +73,8 @@ CWriteDB_LMDB::~CWriteDB_LMDB() void CWriteDB_LMDB::InsertVolumesInfo(const vector & vol_names, const vector & vol_num_oids) { + x_IncreaseEnvMapSize(vol_names, vol_num_oids); + lmdb::txn txn = lmdb::txn::begin(m_Env); lmdb::dbi volinfo = lmdb::dbi::open(txn, blastdb::volinfo_str.c_str(), MDB_CREATE | MDB_INTEGERKEY); lmdb::dbi volname = lmdb::dbi::open(txn, blastdb::volname_str.c_str(), MDB_CREATE | MDB_INTEGERKEY); @@ -90,7 +93,6 @@ void CWriteDB_LMDB::InsertVolumesInfo(const vector & vol_names, const ve txn.commit(); } - int CWriteDB_LMDB::InsertEntries(const list> & seqids, const blastdb::TOid oid) { int count = 0; @@ -193,6 +195,60 @@ void CWriteDB_LMDB::x_InsertEntry(const CRef &seqid, const blastdb::TOi return; } +void CWriteDB_LMDB::x_IncreaseEnvMapSize(const vector & vol_names, const vector & vol_num_oids) +{ + // 2 meta pages + const size_t MIN_PAGES = 3; + const size_t BRANCH_PAGES = 2; + // Each entry has 8 byte overhead + size of (key + entry) + size_t vol_name_size = (vol_names.front().size() + 24)* vol_names.size(); + size_t vol_info_size = 24* vol_names.size(); + + MDB_env *env = m_Env.handle(); + MDB_stat stat; + MDB_envinfo info; + lmdb::env_stat(env, &stat); + lmdb::env_info(env, &info); + size_t page_size = stat.ms_psize; + // For each page 16 byte header + size_t page_max_size = page_size -16; + size_t last_page_num = info.me_last_pgno; + size_t max_num_pages = info.me_mapsize/page_size; + size_t leaf_pages_needed = vol_name_size/page_max_size + vol_info_size/page_max_size + 2; + size_t total_pages_needed = MIN_PAGES + BRANCH_PAGES + leaf_pages_needed; + if( (total_pages_needed + last_page_num) > max_num_pages ) { + size_t newMapSize = (total_pages_needed + last_page_num) * page_size; + m_Env.set_mapsize(newMapSize); + LOG_POST(Info << "Increased lmdb mapsize to " << newMapSize); + } + +} + +void CWriteDB_LMDB::x_IncreaseEnvMapSize() +{ + size_t size = m_TotalIdsLength + m_list.size() * 16; + size_t avg_id_length = m_TotalIdsLength/m_list.size(); + MDB_env *env = m_Env.handle(); + MDB_stat stat; + MDB_envinfo info; + lmdb::env_stat(env, &stat); + lmdb::env_info(env, &info); + size_t page_size = stat.ms_psize; + // 16 byte header for each page + size_t page_max_size = page_size -16; + size_t last_page_num = info.me_last_pgno; + size_t max_num_pages = info.me_mapsize/page_size; + size_t leaf_pages_needed = size/page_max_size + 1; + size_t dup_pages = (leaf_pages_needed > 200) ? 14: 7; + size_t branch_pages_needed = (avg_id_length + 16)* leaf_pages_needed/page_max_size + 1; + size_t total_pages_needed = leaf_pages_needed + branch_pages_needed + dup_pages; + if( (total_pages_needed + last_page_num) > max_num_pages) { + size_t newMapSize = (total_pages_needed + last_page_num) * page_size; + m_Env.set_mapsize(newMapSize); + LOG_POST(Info << "Increased lmdb mapsize to " << newMapSize); + } +} + void CWriteDB_LMDB::x_Split(vector::iterator b, vector::iterator e, const unsigned int min_chunk_size) { #ifdef _OPENMP @@ -250,6 +306,9 @@ void CWriteDB_LMDB::x_CommitTransaction() #else std::sort (m_list.begin(), m_list.end(), SKeyValuePair::cmp_key); #endif + + x_IncreaseEnvMapSize(); + unsigned int j=0; while (j < m_list.size()){ lmdb::txn txn = lmdb::txn::begin(m_Env); @@ -334,6 +393,7 @@ void CWriteDB_LMDB::x_CreateOidToSeqidsLookupFile() count++; tmp_ids.clear(); } + m_TotalIdsLength +=m_list[i].id.size(); if(!m_list[i].saveToOidList) { continue; } @@ -383,17 +443,17 @@ CWriteDB_TaxID::~CWriteDB_TaxID() CFile(m_Db+"-lock").Remove(); } -int CWriteDB_TaxID::InsertEntries(const set & tax_ids, const blastdb::TOid oid) +int CWriteDB_TaxID::InsertEntries(const set & tax_ids, const blastdb::TOid oid) { int count = 0; if(tax_ids.size() == 0) { x_Resize(); - SKeyValuePair kv(0, oid); + SKeyValuePair kv(ZERO_TAX_ID, oid); m_TaxId2OidList.push_back(kv); return 1; } - ITERATE(set, itr, tax_ids) { + ITERATE(set, itr, tax_ids) { x_Resize(); SKeyValuePair kv(*itr, oid); m_TaxId2OidList.push_back(kv); @@ -403,11 +463,37 @@ int CWriteDB_TaxID::InsertEntries(const set & tax_ids, const blastdb::TOid return count; } +void CWriteDB_TaxID::x_IncreaseEnvMapSize() +{ + const size_t MIN_PAGES = 4; + MDB_env *env = m_Env.handle(); + MDB_stat stat; + MDB_envinfo info; + lmdb::env_stat(env, &stat); + lmdb::env_info(env, &info); + size_t size = m_TaxId2OffsetsList.size()*32; + size_t page_size = stat.ms_psize; + size_t page_max_size = stat.ms_psize - 16; + size_t last_page_num = info.me_last_pgno; + size_t max_num_pages = info.me_mapsize/page_size; + size_t leaf_pages_needed = size/page_max_size + 1; + size_t branch_pages_needed = 24 * leaf_pages_needed/page_max_size + 1; + size_t total_pages_needed = leaf_pages_needed + branch_pages_needed + MIN_PAGES; + if( (total_pages_needed + last_page_num) > max_num_pages) { + size_t newMapSize = (total_pages_needed + last_page_num) * page_size; + m_Env.set_mapsize(newMapSize); + LOG_POST(Info << "Increased lmdb mapsize to " << newMapSize); + } +} + + void CWriteDB_TaxID::x_CommitTransaction() { _ASSERT(m_TaxId2OffsetsList.size()); sort (m_TaxId2OffsetsList.begin(), m_TaxId2OffsetsList.end(), SKeyValuePair::cmp_key); + x_IncreaseEnvMapSize(); + unsigned int j=0; while (j < m_TaxId2OffsetsList.size()){ lmdb::txn txn = lmdb::txn::begin(m_Env); @@ -420,25 +506,25 @@ void CWriteDB_TaxID::x_CommitTransaction() } for(; i < j; i++){ Uint8 & offset = m_TaxId2OffsetsList[i].value; - Int4 & tax_id = m_TaxId2OffsetsList[i].tax_id; + TTaxId & tax_id = m_TaxId2OffsetsList[i].tax_id; //cerr << m_list[i].id << endl; lmdb::val value{&offset, sizeof(offset)}; lmdb::val key{&tax_id, sizeof(tax_id)}; bool rc = lmdb::dbi_put(txn, dbi.handle(), key, value, MDB_APPENDDUP); if (!rc) { - NCBI_THROW( CSeqDBException, eArgErr, "taxid2offset error for tax id " + tax_id); + NCBI_THROW( CSeqDBException, eArgErr, "taxid2offset error for tax id " + NStr::NumericToString(tax_id)); } } txn.commit(); } return; - } -Uint4 s_WirteTaxIds(CNcbiOfstream & os, vector & tax_ids) +Uint4 s_WirteTaxIds(CNcbiOfstream & os, vector & tax_ids) { for(unsigned int j =0; j < tax_ids.size(); j++) { - os.write((char *)&tax_ids[j], 4); + Int4 tid = TAX_ID_TO(Int4, tax_ids[j]); + os.write((char *)&tid, 4); } return tax_ids.size(); } @@ -462,7 +548,7 @@ void CWriteDB_TaxID::x_CreateOidToTaxIdsLookupFile() os.flush(); blastdb::TOid count = 0; - vector tmp_tax_ids; + vector tmp_tax_ids; for(unsigned int i = 0; i < m_TaxId2OidList.size(); i++) { if(i > 0 && m_TaxId2OidList[i].value != m_TaxId2OidList[i-1].value ) { if((m_TaxId2OidList[i].value - m_TaxId2OidList[i-1].value) != 1) { diff --git a/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt b/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt index 3cb87a86..8eec0484 100644 --- a/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt +++ b/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeLists.cleanup.lib.txt 594157 2019-09-30 18:28:48Z gouriano $ +# $Id: CMakeLists.cleanup.lib.txt 608332 2020-05-14 16:04:14Z ivanov $ ############################################################################# NCBI_begin_lib(xcleanup) @@ -7,6 +7,7 @@ NCBI_begin_lib(xcleanup) autogenerated_cleanup autogenerated_extended_cleanup cleanup cleanup_utils gene_qual_normalization cleanup_user_object cleanup_author cleanup_pub newcleanupp capitalization_string fix_feature_id + cleanup_message ) NCBI_uses_toolkit_libraries(xobjedit) NCBI_project_watchers(bollin kans) diff --git a/c++/src/objtools/cleanup/Makefile.cleanup.lib b/c++/src/objtools/cleanup/Makefile.cleanup.lib index e6339062..0d8299a1 100644 --- a/c++/src/objtools/cleanup/Makefile.cleanup.lib +++ b/c++/src/objtools/cleanup/Makefile.cleanup.lib @@ -1,4 +1,4 @@ -# $Id: Makefile.cleanup.lib 581537 2019-03-01 21:27:51Z ucko $ +# $Id: Makefile.cleanup.lib 608332 2020-05-14 16:04:14Z ivanov $ # Build library "xcleanup" ############################### @@ -8,7 +8,8 @@ WATCHERS = bollin kans ASN_DEP = submit taxon3 valid SRC = autogenerated_cleanup autogenerated_extended_cleanup cleanup \ cleanup_utils gene_qual_normalization cleanup_user_object cleanup_author \ - cleanup_pub newcleanupp capitalization_string fix_feature_id + cleanup_pub newcleanupp capitalization_string fix_feature_id \ + cleanup_message DLL_LIB = $(OBJEDIT_LIBS) xregexp $(PCRE_LIB) LIB = xcleanup diff --git a/c++/src/objtools/cleanup/cleanup.cpp b/c++/src/objtools/cleanup/cleanup.cpp index fb02b687..7b129d82 100644 --- a/c++/src/objtools/cleanup/cleanup.cpp +++ b/c++/src/objtools/cleanup/cleanup.cpp @@ -1,4 +1,4 @@ -/* $Id: cleanup.cpp 608035 2020-05-11 13:51:46Z ivanov $ +/* $Id: cleanup.cpp 614966 2020-08-25 16:46:33Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -66,11 +66,14 @@ #include #include #include "cleanup_utils.hpp" +#include #include #include "newcleanupp.hpp" +#include + BEGIN_NCBI_SCOPE BEGIN_SCOPE(objects) @@ -2637,7 +2640,20 @@ static bool s_CleanupIsShortrRNA(const CSeq_feat& f, CScope* scope) // used in f } bool is_bad = false; size_t len = sequence::GetLength(f.GetLocation(), scope); - string rrna_name = f.GetData().GetRna().GetRnaProductName(); + const CRNA_ref& rrna = f.GetData().GetRna(); + string rrna_name = rrna.GetRnaProductName(); + if (rrna_name.empty()) { + // RNA name may still be in product GBQual + if (f.IsSetQual()) { + for (auto qit : f.GetQual()) { + const CGb_qual& gbq = *qit; + if ( gbq.IsSetQual() && gbq.GetQual() == "product" ) { + rrna_name = gbq.GetVal(); + break; + } + } + } + } ITERATE (TRNALengthMap, it, kTrnaLengthMap) { SIZE_TYPE pos = NStr::FindNoCase(rrna_name, it->first); if (pos != string::npos && len < it->second.first && !(it->second.second && f.IsSetPartial() && f.GetPartial()) ) { @@ -2764,26 +2780,12 @@ bool CCleanup::WGSCleanup(CSeq_entry_Handle entry, bool instantiate_missing_prot CTSE_Handle tse = entry.GetTSE_Handle(); - for (CFeat_CI gene_it(entry, SAnnotSelector(CSeqFeatData::e_Gene)); gene_it; ++gene_it) { - bool change_this_gene; - CRef new_gene(new CSeq_feat()); - new_gene->Assign(*(gene_it->GetSeq_feat())); - - change_this_gene = ExpandGeneToIncludeChildren(*new_gene, tse); - - change_this_gene |= SetGenePartialByLongestContainedFeature(*new_gene, entry.GetScope()); - - if (change_this_gene) { - CSeq_feat_EditHandle gene_h(*gene_it); - gene_h.Replace(*new_gene); - any_changes = true; - } - } - for (CFeat_CI rna_it(entry, SAnnotSelector(CSeqFeatData::e_Rna)); rna_it; ++rna_it) { const CSeq_feat& rna_feat = *(rna_it->GetSeq_feat()); - if (rna_feat.IsSetData() && rna_feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_rRNA && !rna_feat.IsSetPartial() && s_CleanupIsShortrRNA(rna_feat, &(entry.GetScope()))) { + if (rna_feat.IsSetData() && + rna_feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_rRNA && + s_CleanupIsShortrRNA(rna_feat, &(entry.GetScope()))) { bool change_this_rrna = false; CRef new_rrna(new CSeq_feat()); @@ -2818,6 +2820,22 @@ bool CCleanup::WGSCleanup(CSeq_entry_Handle entry, bool instantiate_missing_prot } } + for (CFeat_CI gene_it(entry, SAnnotSelector(CSeqFeatData::e_Gene)); gene_it; ++gene_it) { + bool change_this_gene; + CRef new_gene(new CSeq_feat()); + new_gene->Assign(*(gene_it->GetSeq_feat())); + + change_this_gene = ExpandGeneToIncludeChildren(*new_gene, tse); + + change_this_gene |= SetGenePartialByLongestContainedFeature(*new_gene, entry.GetScope()); + + if (change_this_gene) { + CSeq_feat_EditHandle gene_h(*gene_it); + gene_h.Replace(*new_gene); + any_changes = true; + } + } + NormalizeDescriptorOrder(entry); for (CBioseq_CI bi(entry, CSeq_inst::eMol_na); bi; ++bi) { @@ -3135,7 +3153,7 @@ void s_GetAuthorsString( void CCleanup::GetPubdescLabels (const CPubdesc& pd, -vector& pmids, vector& muids, vector& serials, +vector& pmids, vector& muids, vector& serials, vector& published_labels, vector& unpublished_labels) { @@ -3208,8 +3226,8 @@ vector > CCleanup::GetCitationList(CBioseq_Handle bsh) // first get descriptor pubs CSeqdesc_CI di(bsh, CSeqdesc::e_Pub); while (di) { - vector pmids; - vector muids; + vector pmids; + vector muids; vector serials; vector published_labels; vector unpublished_labels; @@ -3241,8 +3259,8 @@ vector > CCleanup::GetCitationList(CBioseq_Handle bsh) // now get pub features CFeat_CI fi(bsh, SAnnotSelector(CSeqFeatData::e_Pub)); while (fi) { - vector pmids; - vector muids; + vector pmids; + vector muids; vector serials; vector published_labels; vector unpublished_labels; @@ -4417,7 +4435,11 @@ bool CCleanup::ConvertDeltaSeqToRaw(CSeq_entry_Handle seh, CSeq_inst::EMol filte } -bool CCleanup::ParseCodeBreak(const CSeq_feat& feat, CCdregion& cds, const string& str, CScope& scope) +bool CCleanup::ParseCodeBreak(const CSeq_feat& feat, + CCdregion& cds, + const CTempString& str, + CScope& scope, + IObjtoolsListener* pMessageListener) { if (str.empty() || !feat.IsSetLocation()) { return false; @@ -4459,7 +4481,19 @@ bool CCleanup::ParseCodeBreak(const CSeq_feat& feat, CCdregion& cds, const strin } loc_pos = NStr::Find(str, "(pos:"); + + using TSubcode = CCleanupMessage::ESubcode; + auto postMessage = + [pMessageListener](string msg, TSubcode subcode) { + pMessageListener->PutMessage( + CCleanupMessage(msg, eDiag_Error, CCleanupMessage::ECode::eCodeBreak, subcode)); + }; + if (loc_pos == string::npos) { + if (pMessageListener) { + string msg = "Unable to identify code-break location in '" + str + "'"; + postMessage(msg, TSubcode::eParseError); + } return false; } loc_pos += 5; @@ -4485,11 +4519,26 @@ bool CCleanup::ParseCodeBreak(const CSeq_feat& feat, CCdregion& cds, const strin break_loc = ReadLocFromText(pos, feat_loc_seq_id, &scope); if (break_loc == NULL) { + if (pMessageListener) { + string msg = "Unable to extract code-break location from '" + str + "'"; + postMessage(msg, TSubcode::eParseError); + } return false; - } else if (break_loc->IsInt() && sequence::GetLength(*break_loc, &scope) > 3) { + } + + if (break_loc->IsInt() && sequence::GetLength(*break_loc, &scope) > 3) { + if (pMessageListener) { + string msg = "code-break location exceeds 3 bases"; + postMessage(msg, TSubcode::eBadLocation); + } return false; - } else if ((break_loc->IsInt() || break_loc->IsPnt()) && + } + if ((break_loc->IsInt() || break_loc->IsPnt()) && sequence::Compare(*break_loc, feat.GetLocation(), &scope, sequence::fCompareOverlapping) != sequence::eContained) { + if (pMessageListener) { + string msg = "code-break location lies outside of coding region"; + postMessage(msg, TSubcode::eBadLocation); + } return false; } diff --git a/c++/src/objtools/cleanup/cleanup_message.cpp b/c++/src/objtools/cleanup/cleanup_message.cpp new file mode 100644 index 00000000..297bced0 --- /dev/null +++ b/c++/src/objtools/cleanup/cleanup_message.cpp @@ -0,0 +1,48 @@ +/* $Id: cleanup_message.cpp 608332 2020-05-14 16:04:14Z ivanov $ + * =========================================================================== + * + * PUBLIC DOMAIN NOTICE + * National Center for Biotechnology Information + * + * This software/database is a "United States Government Work" under the + * terms of the United States Copyright Act. It was written as part of + * the author's official duties as a United States Government employee and + * thus cannot be copyrighted. This software/database is freely available + * to the public for use. The National Library of Medicine and the U.S. + * Government have not placed any restriction on its use or reproduction. + * + * Although all reasonable efforts have been taken to ensure the accuracy + * and reliability of the software and data, the NLM and the U.S. + * Government do not and cannot warrant the performance or results that + * may be obtained by using this software or data. The NLM and the U.S. + * Government disclaim all warranties, express or implied, including + * warranties of performance, merchantability or fitness for any particular + * purpose. + * + * Please cite the author in any work or product based on this material. + * + * =========================================================================== + * + * Author: Justin Foley + * + * File Description: + * ....... + * + */ +#include +#include + +BEGIN_NCBI_SCOPE +BEGIN_SCOPE(objects) + +CCleanupMessage::CCleanupMessage(string text, EDiagSev sev, ECode code, ESubcode subcode) + : CObjtoolsMessage(text, sev), m_Code(code), m_Subcode(subcode) {} + +CCleanupMessage* CCleanupMessage::Clone(void) const +{ + return new CCleanupMessage(GetText(), GetSeverity(), m_Code, m_Subcode); +} + +END_SCOPE(objects) +END_NCBI_SCOPE + diff --git a/c++/src/objtools/cleanup/cleanup_pub.cpp b/c++/src/objtools/cleanup/cleanup_pub.cpp index 60fb375f..424d9c05 100644 --- a/c++/src/objtools/cleanup/cleanup_pub.cpp +++ b/c++/src/objtools/cleanup/cleanup_pub.cpp @@ -1,4 +1,4 @@ -/* $Id: cleanup_pub.cpp 591351 2019-08-14 14:26:28Z bollin $ +/* $Id: cleanup_pub.cpp 614966 2020-08-25 16:46:33Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -190,8 +190,8 @@ bool CPubEquivCleaner::Clean(bool fix_initials, bool strip_serial) // we keep the last of these because we might transfer one // to the other as necessary to fill in gaps. - int last_pmid = 0; - int last_article_pubmed_id = 0; // the last from a journal + TEntrezId last_pmid = ZERO_ENTREZ_ID; + TEntrezId last_article_pubmed_id = ZERO_ENTREZ_ID; // the last from a journal CRef last_article; auto& pe_set = m_Equiv.Set(); @@ -239,13 +239,13 @@ bool CPubEquivCleaner::Clean(bool fix_initials, bool strip_serial) } // Now, we might have to transfer data to fill in missing information - if (last_pmid == 0 && last_article_pubmed_id > 0) { + if (last_pmid == ZERO_ENTREZ_ID && last_article_pubmed_id > ZERO_ENTREZ_ID) { CRef new_pub(new CPub); new_pub->SetPmid().Set(last_article_pubmed_id); m_Equiv.Set().insert(m_Equiv.Set().begin(), new_pub); change = true; } - else if (last_pmid > 0 && last_article_pubmed_id == 0 && last_article) { + else if (last_pmid > ZERO_ENTREZ_ID && last_article_pubmed_id == ZERO_ENTREZ_ID && last_article) { CRef new_article_id(new CArticleId); new_article_id->SetPubmed().Set(last_pmid); last_article->SetIds().Set().push_back(new_article_id); @@ -403,7 +403,7 @@ bool CCitGenCleaner::IsEmpty() { return (!m_Gen.IsSetCit()) && !m_Gen.IsSetAuthors() && - (!m_Gen.IsSetMuid() || m_Gen.GetMuid() <= 0) && + (!m_Gen.IsSetMuid() || m_Gen.GetMuid() <= ZERO_ENTREZ_ID) && !m_Gen.IsSetJournal() && (!m_Gen.IsSetVolume() || m_Gen.GetVolume().empty()) && (!m_Gen.IsSetIssue() || m_Gen.GetIssue().empty()) && @@ -411,7 +411,7 @@ bool CCitGenCleaner::IsEmpty() !m_Gen.IsSetDate() && (!m_Gen.IsSetSerial_number() || m_Gen.GetSerial_number() <= 0) && (!m_Gen.IsSetTitle() || m_Gen.GetTitle().empty()) && - (!m_Gen.IsSetPmid() || m_Gen.GetPmid() <= 0); + (!m_Gen.IsSetPmid() || m_Gen.GetPmid().Get() <= ZERO_ENTREZ_ID); } diff --git a/c++/src/objtools/cleanup/cleanup_utils.hpp b/c++/src/objtools/cleanup/cleanup_utils.hpp index d5d36e30..af4b38e0 100644 --- a/c++/src/objtools/cleanup/cleanup_utils.hpp +++ b/c++/src/objtools/cleanup/cleanup_utils.hpp @@ -1,7 +1,7 @@ #ifndef OBJECTS_GENERAL___CLEANUP_UTILS__HPP #define OBJECTS_GENERAL___CLEANUP_UTILS__HPP -/* $Id: cleanup_utils.hpp 581496 2019-03-01 16:42:04Z bollin $ +/* $Id: cleanup_utils.hpp 613129 2020-08-03 12:12:19Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -379,7 +379,7 @@ if ((o).IsSet##x()) { \ CRef ReadLocFromText(const string& text, const CSeq_id *id, CScope *scope); // for finding the correct amino acid letter given an abbreviation -char ValidAminoAcid (const string& abbrev); +char NCBI_CLEANUP_EXPORT ValidAminoAcid (const string& abbrev); // for sorting and uniquing dbtags bool s_DbtagCompare (const CRef& dbt1, const CRef& dbt2); diff --git a/c++/src/objtools/cleanup/newcleanupp.cpp b/c++/src/objtools/cleanup/newcleanupp.cpp index 653e9683..1b1f427e 100644 --- a/c++/src/objtools/cleanup/newcleanupp.cpp +++ b/c++/src/objtools/cleanup/newcleanupp.cpp @@ -5972,6 +5972,10 @@ void CNewCleanup_imp::Except_textBC ( { if (NStr::Find (except_text, "ribosome slippage") == NPOS && + NStr::Find (except_text, "ribosome-slippage") == NPOS && + NStr::Find (except_text, "ribosome_slippage") == NPOS && + NStr::Find (except_text, "ribosomal-slippage") == NPOS && + NStr::Find (except_text, "ribosomal_slippage") == NPOS && NStr::Find (except_text, "trans splicing") == NPOS && NStr::Find (except_text, "trans_splicing") == NPOS && NStr::Find (except_text, "alternate processing") == NPOS && @@ -5991,7 +5995,8 @@ void CNewCleanup_imp::Except_textBC ( ChangeMade (CCleanupChange::eTrimSpaces); } if (! text.empty()) { - if (text == "ribosome slippage") { + if (text == "ribosome slippage" || text == "ribosome-slippage" || text == "ribosome_slippage" || + text == "ribosomal-slippage" || text == "ribosomal_slippage") { text = "ribosomal slippage"; ChangeMade (CCleanupChange::eChangeException); } else if (text == "trans splicing" || text == "trans_splicing") { @@ -8540,12 +8545,12 @@ void CNewCleanup_imp::x_PostProcessing(void) if( ! m_MuidPubContainer.empty() ) { NON_CONST_ITERATE( TMuidPubContainer, pub_iter, m_MuidPubContainer ) { CPub &pub = **pub_iter; - const int muid = pub.GetMuid(); + const TEntrezId muid = pub.GetMuid(); // attempt to find that muid in the muid-to-pmid mapping created earlier - TMuidToPmidMap::const_iterator map_iter = m_MuidToPmidMap.find(muid); + TMuidToPmidMap::const_iterator map_iter = m_MuidToPmidMap.find(ENTREZ_ID_TO(int, muid)); if( map_iter != m_MuidToPmidMap.end() ) { - const int pmid = map_iter->second; + const TEntrezId pmid = ENTREZ_ID_FROM(int, map_iter->second); pub.SetPmid().Set(pmid); ChangeMade(CCleanupChange::eChangePublication); } @@ -8686,16 +8691,16 @@ void CNewCleanup_imp::x_NotePubdescOrAnnotPubs( } void CNewCleanup_imp::x_NotePubdescOrAnnotPubs_RecursionHelper( - const CPub_equiv &pub_equiv, int &muid, int &pmid ) + const CPub_equiv &pub_equiv, int &muid, int &pmid ) { FOR_EACH_PUB_ON_PUBEQUIV(pub_iter, pub_equiv) { const CPub &pub = **pub_iter; switch( pub.Which() ) { case NCBI_PUB(Muid): - muid = pub.GetMuid(); + muid = ENTREZ_ID_TO(int, pub.GetMuid()); break; case NCBI_PUB(Pmid): - pmid = pub.GetPmid().Get(); + pmid = ENTREZ_ID_TO(int, pub.GetPmid().Get()); break; case NCBI_PUB(Gen): { @@ -10083,9 +10088,9 @@ bool CNewCleanup_imp::x_IsPubContentBad(const CPub& pub) { if (pub.IsGen() && IsMinimal(pub.GetGen())) { return true; - } else if (pub.IsMuid() && pub.GetMuid() == 0) { + } else if (pub.IsMuid() && pub.GetMuid() == ZERO_ENTREZ_ID) { return true; - } else if (pub.IsPmid() && pub.GetPmid() == 0) { + } else if (pub.IsPmid() && pub.GetPmid() == ZERO_ENTREZ_ID) { return true; } else if (pub.IsPat_id() && x_IsPubContentBad(pub.GetPat_id())) { return true; diff --git a/c++/src/objtools/data_loaders/blastdb/bdbloader.cpp b/c++/src/objtools/data_loaders/blastdb/bdbloader.cpp index 00e58d4e..c19f215d 100644 --- a/c++/src/objtools/data_loaders/blastdb/bdbloader.cpp +++ b/c++/src/objtools/data_loaders/blastdb/bdbloader.cpp @@ -1,4 +1,4 @@ -/* $Id: bdbloader.cpp 500404 2016-05-04 14:59:01Z camacho $ +/* $Id: bdbloader.cpp 612733 2020-07-27 11:38:27Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -167,6 +167,7 @@ CBlastDbDataLoader::CBlastDbDataLoader(const string & loader_name, m_DBName (param.m_DbName), m_DBType (param.m_DbType), m_BlastDb (0), + m_Ids (1000), m_UseFixedSizeSlices (param.m_UseFixedSizeSlices) { if (param.m_BlastDbHandle.NotEmpty()) { diff --git a/c++/src/objtools/data_loaders/genbank/reader.cpp b/c++/src/objtools/data_loaders/genbank/reader.cpp index 9398fa04..f399331c 100644 --- a/c++/src/objtools/data_loaders/genbank/reader.cpp +++ b/c++/src/objtools/data_loaders/genbank/reader.cpp @@ -1,4 +1,4 @@ -/* $Id: reader.cpp 578792 2019-01-25 16:39:00Z vasilche $ +/* $Id: reader.cpp 610682 2020-06-22 17:47:10Z ivanov $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information @@ -77,6 +77,7 @@ CReader::CDebugPrinter::~CDebugPrinter() } +#define DEFAULT_PREOPEN false #define DEFAULT_RETRY_COUNT 5 #define DEFAULT_WAIT_TIME_ERRORS 2 #define DEFAULT_WAIT_TIME 1 @@ -110,7 +111,7 @@ static CIncreasingTime::SAllParams s_WaitTimeParams = { CReader::CReader(void) : m_Dispatcher(0), m_MaxConnections(0), - m_PreopenConnection(true), + m_PreopenConnection(DEFAULT_PREOPEN), m_NextNewConnection(0), m_NumFreeConnections(0, 1000), m_MaximumRetryCount(3), @@ -140,7 +141,7 @@ void CReader::InitParams(CConfig& conf, conf.GetBool(driver_name, NCBI_GBLOADER_READER_PARAM_PREOPEN, CConfig::eErr_NoThrow, - true); + DEFAULT_PREOPEN); SetPreopenConnection(open_initial_connection); m_WaitTimeErrors = conf.GetInt(driver_name, diff --git a/c++/src/objtools/edit/CMakeLists.edit.lib.txt b/c++/src/objtools/edit/CMakeLists.edit.lib.txt index 30578d64..51165d0b 100644 --- a/c++/src/objtools/edit/CMakeLists.edit.lib.txt +++ b/c++/src/objtools/edit/CMakeLists.edit.lib.txt @@ -1,5 +1,5 @@ ############################################################################# -# $Id: CMakeLists.edit.lib.txt 594157 2019-09-30 18:28:48Z gouriano $ +# $Id: CMakeLists.edit.lib.txt 615131 2020-08-27 17:51:01Z fukanchi $ ############################################################################# NCBI_begin_lib(xobjedit) @@ -13,7 +13,7 @@ NCBI_begin_lib(xobjedit) external_annots feature_propagate text_object_description seq_edit ) - NCBI_uses_toolkit_libraries(mlacli taxon3 valid xobjread xobjutil) + NCBI_uses_toolkit_libraries(mlacli taxon3 valid xobjread xobjutil xlogging) NCBI_project_watchers(bollin gotvyans foleyjp) NCBI_end_lib() diff --git a/c++/src/objtools/edit/feattable_edit.cpp b/c++/src/objtools/edit/feattable_edit.cpp index 737655e3..561a0dc5 100644 --- a/c++/src/objtools/edit/feattable_edit.cpp +++ b/c++/src/objtools/edit/feattable_edit.cpp @@ -1,4 +1,4 @@ -/* $Id: feattable_edit.cpp 594944 2019-10-11 12:07:51Z ludwigf $ +/* $Id: feattable_edit.cpp 612522 2020-07-23 11:23:26Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -38,6 +38,8 @@ #include #include #include +#include +#include #include #include @@ -355,7 +357,75 @@ void CFeatTableEdit::EliminateBadQualifiers() } } +// ---------------------------------------------------------------------------- +void CFeatTableEdit::ProcessCodonRecognized() +// ---------------------------------------------------------------------------- +{ + static map> sIUPACmap { + {'A', list({'A'})}, + {'G', list({'G'})}, + {'C', list({'C'})}, + {'T', list({'T'})}, + {'U', list({'U'})}, + {'M', list({'A', 'C'})}, + {'R', list({'A', 'G'})}, + {'W', list({'A', 'T'})}, + {'S', list({'C', 'G'})}, + {'Y', list({'C', 'T'})}, + {'K', list({'G', 'T'})}, + {'V', list({'A', 'C', 'G'})}, + {'H', list({'A', 'C', 'T'})}, + {'D', list({'A', 'G', 'T'})}, + {'B', list({'C', 'G', 'T'})}, + {'N', list({'A', 'C', 'G', 'T'})} + }; + SAnnotSelector sel; + sel.IncludeFeatSubtype(CSeqFeatData::eSubtype_tRNA); + CFeat_CI it(mHandle, sel); + for (; it; ++it) { + CMappedFeat mf = *it; + auto codonRecognized = mf.GetNamedQual("codon_recognized"); + if (codonRecognized.empty()) { + continue; + } + if (codonRecognized.size() != 3) { + xPutErrorBadCodonRecognized(codonRecognized); + return; + } + NStr::ToUpper(codonRecognized); + + const CSeq_feat& origFeat = mf.GetOriginalFeature(); + + CRef pEditedFeat(new CSeq_feat); + pEditedFeat->Assign(origFeat); + CRNA_ref::C_Ext::TTRNA & extTrna = pEditedFeat->SetData().SetRna().SetExt().SetTRNA(); + + set codons; + try { + for (char char1 : sIUPACmap.at(codonRecognized[0])) { + for (char char2 : sIUPACmap.at(codonRecognized[1])) { + for (char char3 : sIUPACmap.at(codonRecognized[2])) { + const auto codonIndex = CGen_code_table::CodonToIndex(char1, char2, char3); + codons.insert(codonIndex); + } + } + } + } + catch(CException&) { + xPutErrorBadCodonRecognized(codonRecognized); + return; + } + if (!codons.empty()) { + for (const auto codonIndex : codons) { + extTrna.SetCodon().push_back(codonIndex); + } + CSeq_feat_EditHandle feh(mpScope->GetObjectHandle(origFeat)); + feh.Replace(*pEditedFeat); + feh.RemoveQualifier("codon_recognized"); + } + } +} // --------------------------------------------------------------------------- void CFeatTableEdit::GenerateProteinAndTranscriptIds() @@ -1580,6 +1650,20 @@ CFeatTableEdit::xPutErrorMissingTranscriptId( xPutError(message); } +// ---------------------------------------------------------------------------- +void +CFeatTableEdit::xPutErrorBadCodonRecognized( + const string codonRecognized) +// ---------------------------------------------------------------------------- +{ + if (!mpMessageListener) { + return; + } + string message = "tRNA with bad codon recognized attribute \"" + + codonRecognized + "\"."; + xPutError(message); +} + // ---------------------------------------------------------------------------- void CFeatTableEdit::xPutErrorMissingProteinId( @@ -1606,7 +1690,7 @@ CFeatTableEdit::xPutErrorMissingProteinId( void CFeatTableEdit::xPutErrorDifferingProteinIds( const CMappedFeat& mrna) - // ---------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- { if (!mpMessageListener) { return; @@ -1624,7 +1708,7 @@ CFeatTableEdit::xPutErrorDifferingProteinIds( void CFeatTableEdit::xPutErrorDifferingTranscriptIds( const CMappedFeat& mrna) - // ---------------------------------------------------------------------------- +// ---------------------------------------------------------------------------- { if (!mpMessageListener) { return; diff --git a/c++/src/objtools/edit/loc_edit.cpp b/c++/src/objtools/edit/loc_edit.cpp index 7a9e26b6..a0ff2cdc 100644 --- a/c++/src/objtools/edit/loc_edit.cpp +++ b/c++/src/objtools/edit/loc_edit.cpp @@ -1,4 +1,4 @@ -/* $Id: loc_edit.cpp 601240 2020-02-04 16:06:49Z ludwigf $ +/* $Id: loc_edit.cpp 609624 2020-06-04 15:45:32Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -2608,8 +2608,10 @@ bool ExtendPartialFeatureEnds(CBioseq_Handle bsh) CRef new_cds(new CSeq_feat()); new_cds->Assign(*(f->GetOriginalSeq_feat())); - if (AdjustFeatureEnd5(*new_cds, related_features, bsh.GetScope()) || - AdjustFeatureEnd3(*new_cds, related_features, bsh.GetScope())) { + const bool adjusted_5prime = AdjustFeatureEnd5(*new_cds, related_features, bsh.GetScope()); + const bool adjusted_3prime = AdjustFeatureEnd3(*new_cds, related_features, bsh.GetScope()); + + if (adjusted_5prime || adjusted_3prime) { feature::RetranslateCDS(*new_cds, bsh.GetScope()); CSeq_feat_EditHandle feh(*f); feh.Replace(*new_cds); diff --git a/c++/src/objtools/edit/remote_updater.cpp b/c++/src/objtools/edit/remote_updater.cpp index 91b2e56c..7fc406f6 100644 --- a/c++/src/objtools/edit/remote_updater.cpp +++ b/c++/src/objtools/edit/remote_updater.cpp @@ -1,4 +1,4 @@ -/* $Id: remote_updater.cpp 605109 2020-04-07 11:01:53Z ivanov $ +/* $Id: remote_updater.cpp 614634 2020-08-20 13:02:41Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -57,6 +57,8 @@ #include #include +#include +#include #include /* This header must go last */ @@ -69,7 +71,7 @@ DEFINE_CLASS_STATIC_MUTEX(CRemoteUpdater::m_static_mutex); namespace { -int FindPMID(CMLAClient& mlaClient, const CPub_equiv::Tdata& arr) +TEntrezId FindPMID(const list>& arr) { for (auto pPub : arr) { if (pPub->IsPmid()) { @@ -77,36 +79,93 @@ int FindPMID(CMLAClient& mlaClient, const CPub_equiv::Tdata& arr) } } - return 0; + return ZERO_ENTREZ_ID; } -// the method is not used at the momment -void CreatePubPMID(CMLAClient& mlaClient, CPub_equiv::Tdata& arr, int id) + +static bool s_IsConnectionFailure(EError_val mlaErrorVal) { + switch(mlaErrorVal) { + case eError_val_cannot_connect_pmdb: + case eError_val_cannot_connect_searchbackend_pmdb: + return true; + default: + break; + } + return false; +} + + +CRef s_GetPubFrompmid(CMLAClient& mlaClient, TEntrezId id, int maxAttempts, IObjtoolsListener* pMessageListener) { - try { - CPubMedId req(id); - CRef new_pub = mlaClient.AskGetpubpmid(req); - if (new_pub.NotEmpty()) - { - // authors come back in a weird format that we need - // to convert to ISO - if (new_pub->IsSetAuthors()) - CRemoteUpdater::ConvertToStandardAuthors((CAuth_list&)new_pub->GetAuthors()); + CRef result; + CPubMedId request(id); + CMLAClient::TReply reply; + + int maxCount = max(1, maxAttempts); + for (int count=0; count new_pmid(new CPub); - new_pmid->SetPmid().Set(id); - arr.push_back(new_pmid); - arr.push_back(new_pub); + CNcbiOstrstream oss; + oss << "Failed to retrieve publication for PMID " + << id + << ". "; + if (isConnectionError) { + oss << count+1 << " attempts made. "; + } + oss << "CMLAClient : " + << errorVal; + string msg = CNcbiOstrstreamToString(oss); + if (pMessageListener) { + pMessageListener->PutMessage(CObjEditMessage(msg, eDiag_Error)); + break; + } + else { + NCBI_THROW(CException, eUnknown, msg); + } } - } catch(...) { - // don't worry if we can't look it up } - + return result; } }// end anonymous namespace +bool CRemoteUpdater::xUpdatePubPMID(list>& arr, TEntrezId id) +{ + CMLAClient::TReply reply; + auto new_pub = + s_GetPubFrompmid(*m_mlaClient, id, m_MaxMlaAttempts, m_pMessageListener); + if (!new_pub) { + return false; + } + + // authors come back in a weird format that we need + // to convert to ISO + if (new_pub->IsSetAuthors()) + CRemoteUpdater::ConvertToStandardAuthors((CAuth_list&)new_pub->GetAuthors()); + + arr.clear(); + CRef new_pmid(new CPub); + new_pmid->SetPmid().Set(id); + arr.push_back(new_pmid); + arr.push_back(new_pub); + return true; +} + + +void CRemoteUpdater::SetMaxMlaAttempts(int maxAttempts) +{ + m_MaxMlaAttempts = maxAttempts; +} + class CCachedTaxon3_impl { @@ -131,6 +190,30 @@ public: } } + CRef GetOrg(const COrg_ref& org, IObjtoolsListener* pMessageListener=nullptr) + { + CRef result; + CRef reply = GetOrgReply(org); + if (reply->IsError() && pMessageListener) + { + const string& error_message = + "Taxon update: " + + (org.IsSetTaxname() ? org.GetTaxname() : NStr::NumericToString(org.GetTaxId())) + ": " + + reply->GetError().GetMessage(); + + pMessageListener->PutMessage( + CObjEditMessage(error_message, eDiag_Error)); + + } + else + if (reply->IsData() && reply->SetData().IsSetOrg()) + { + result.Reset(&reply->SetData().SetOrg()); + } + return result; + } + + CRef GetOrg(const COrg_ref& org, CRemoteUpdater::FLogger f_logger) { CRef result; @@ -141,14 +224,8 @@ public: "Taxon update: " + (org.IsSetTaxname() ? org.GetTaxname() : NStr::IntToString(org.GetTaxId())) + ": " + reply->GetError().GetMessage(); - -/* - logger->PutError(*auto_ptr( - CLineError::Create(ILineError::eProblem_Unset, eDiag_Warning, "", 0, - string("Taxon update: ") + - (org.IsSetTaxname() ? org.GetTaxname() : NStr::IntToString(org.GetTaxId())) + ": " + - reply->GetError().GetMessage()))); - */ + + f_logger(error_message); } else if (reply->IsData() && reply->SetData().IsSetOrg()) @@ -218,7 +295,7 @@ protected: auto_ptr m_cache; }; -void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeqdesc& obj) +void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, CSeqdesc& obj) { if (obj.IsOrg()) { @@ -232,7 +309,8 @@ void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeqdesc& obj) } void CRemoteUpdater::xUpdateOrgTaxname(FLogger logger, COrg_ref& org) -{ +{ // remove after the deprecated UpdateOrgFromTaxon(FLogger, CSeqdes&) + // has been removed. CMutexGuard guard(m_Mutex); int taxid = org.GetTaxId(); @@ -252,6 +330,42 @@ void CRemoteUpdater::xUpdateOrgTaxname(FLogger logger, COrg_ref& org) } } +void CRemoteUpdater::UpdateOrgFromTaxon(CSeqdesc& desc) +{ + if (desc.IsOrg()) + { + xUpdateOrgTaxname(desc.SetOrg()); + } + else + if (desc.IsSource() && desc.GetSource().IsSetOrg()) + { + xUpdateOrgTaxname(desc.SetSource().SetOrg()); + } +} + + +void CRemoteUpdater::xUpdateOrgTaxname(COrg_ref& org) +{ + CMutexGuard guard(m_Mutex); + + TTaxId taxid = org.GetTaxId(); + if (taxid == ZERO_TAX_ID && !org.IsSetTaxname()) + return; + + if (m_taxClient.get() == 0) + { + m_taxClient.reset(new CCachedTaxon3_impl); + m_taxClient->Init(); + } + + CRef new_org = m_taxClient->GetOrg(org, m_pMessageListener); + if (new_org.NotEmpty()) + { + org.Assign(*new_org); + } +} + + CRemoteUpdater& CRemoteUpdater::GetInstance() { CMutexGuard guard(m_static_mutex); @@ -261,6 +375,12 @@ CRemoteUpdater& CRemoteUpdater::GetInstance() return instance; } +CRemoteUpdater::CRemoteUpdater(IObjtoolsListener* pMessageListener) : + m_pMessageListener(pMessageListener) +{ +} + + CRemoteUpdater::CRemoteUpdater(bool enable_caching) :m_enable_caching(enable_caching) { @@ -280,7 +400,7 @@ void CRemoteUpdater::ClearCache() } } -void CRemoteUpdater::UpdatePubReferences(objects::CSeq_entry_EditHandle& obj) +void CRemoteUpdater::UpdatePubReferences(CSeq_entry_EditHandle& obj) { for (CBioseq_CI it(obj); it; ++it) { @@ -336,61 +456,59 @@ void CRemoteUpdater::xUpdatePubReferences(CSeq_entry& entry) xUpdatePubReferences(entry.SetDescr()); } -void CRemoteUpdater::xUpdatePubReferences(objects::CSeq_descr& seq_descr) + + +void CRemoteUpdater::xUpdatePubReferences(CSeq_descr& seq_descr) { CMutexGuard guard(m_Mutex); - CSeq_descr::Tdata& descr = seq_descr.Set(); - size_t count = descr.size(); - CSeq_descr::Tdata::iterator it = descr.begin(); - - for (size_t i=0; iIsPub() || !pDesc->GetPub().IsSetPub()) { continue; + } - CPub_equiv::Tdata& arr = (**it).SetPub().SetPub().Set(); - if (m_mlaClient.Empty()) - m_mlaClient.Reset(new CMLAClient); + auto& arr = pDesc->SetPub().SetPub().Set(); + if (m_mlaClient.Empty()) + m_mlaClient.Reset(new CMLAClient()); - int id = FindPMID(*m_mlaClient, arr); - if (id>0) - { - CreatePubPMID(*m_mlaClient, arr, id); + auto id = FindPMID(arr); + if (id>ZERO_ENTREZ_ID) { + xUpdatePubPMID(arr, id); + continue; } - else - // nothing was found - NON_CONST_ITERATE(CPub_equiv::Tdata, item_it, arr) - { - if ((**item_it).IsArticle()) - try - { - id = m_mlaClient->AskCitmatchpmid(**item_it); - if (id>0) + + for (auto pPubEquiv : arr) { + if (pPubEquiv->IsArticle()) { + CMLAClient::TReply reply; + try { + id = ENTREZ_ID_FROM(int, m_mlaClient->AskCitmatchpmid(*pPubEquiv, &reply)); + } + catch(CException& e) { - CreatePubPMID(*m_mlaClient, arr, id); + continue; + } + if (id>ZERO_ENTREZ_ID && + xUpdatePubPMID(arr,id)) { break; } } - catch(CException& /*ex*/) - { - } } } } + namespace { typedef set* > TOwnerSet; typedef struct { TOwnerSet owner; CRef org_ref; } TOwner; typedef map TOrgMap; - void _UpdateOrgFromTaxon(CRemoteUpdater::FLogger logger, objects::CSeq_entry& entry, TOrgMap& m) + void _UpdateOrgFromTaxon(CSeq_entry& entry, TOrgMap& m) { if (entry.IsSet()) { NON_CONST_ITERATE(CSeq_entry::TSet::TSeq_set, it, entry.SetSet().SetSeq_set()) { - _UpdateOrgFromTaxon(logger, **it, m); + _UpdateOrgFromTaxon(**it, m); } } @@ -404,13 +522,11 @@ namespace CRef org_ref; if (desc.IsOrg()) { - //xUpdateOrgTaxname(logger, desc.SetOrg()); org_ref.Reset(&desc.SetOrg()); } else if (desc.IsSource() && desc.GetSource().IsSetOrg()) { - //xUpdateOrgTaxname(logger, desc.SetSource().SetOrg()); org_ref.Reset(&desc.SetSource().SetOrg()); } if (org_ref) @@ -442,11 +558,12 @@ namespace } } } -void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeq_entry& entry) + +void CRemoteUpdater::UpdateOrgFromTaxon(CSeq_entry& entry) { TOrgMap org_to_update; - _UpdateOrgFromTaxon(logger, entry, org_to_update); + _UpdateOrgFromTaxon(entry, org_to_update); if (org_to_update.empty()) return; @@ -475,7 +592,14 @@ void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeq_entry& ent } } -void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeq_entry_EditHandle& obj) +void CRemoteUpdater::UpdateOrgFromTaxon(FLogger /*logger*/, CSeq_entry& entry) +{ + // this method is deprecated. + // until we remove it, it simply calls the non-deprecated method + UpdateOrgFromTaxon(entry); +} + +void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, CSeq_entry_EditHandle& obj) { for (CBioseq_CI bioseq_it(obj); bioseq_it; ++bioseq_it) { @@ -486,6 +610,8 @@ void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeq_entry_Edit } } + + namespace { bool s_IsAllCaps(const string& str) @@ -663,7 +789,10 @@ void CRemoteUpdater::PostProcessPubs(CSeq_entry_EditHandle& obj) PostProcessPubs((CPubdesc&)desc_it->GetPub()); } } - +} + +void CRemoteUpdater::SetMLAClient(CMLAClient& mlaClient) { + m_mlaClient.Reset(&mlaClient); } END_SCOPE(edit) diff --git a/c++/src/objtools/format/context.cpp b/c++/src/objtools/format/context.cpp index d1346af1..ac96ff35 100644 --- a/c++/src/objtools/format/context.cpp +++ b/c++/src/objtools/format/context.cpp @@ -1,4 +1,4 @@ -/* $Id: context.cpp 602293 2020-02-20 18:24:39Z kans $ +/* $Id: context.cpp 608545 2020-05-18 19:35:41Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -688,7 +688,8 @@ void CBioseqContext::x_CheckForShowComments() const } // JIRA SQD-4444 : copy annot selector from the one saved in this context structure - SAnnotSelector sel = m_FFCtx.SetAnnotSelector(); + // SAnnotSelector sel = m_FFCtx.SetAnnotSelector(); + SAnnotSelector sel; sel.SetAnnotType(CSeq_annot::TData::e_Ftable); CAnnot_CI annot_ci(m_Handle, sel); for( ; annot_ci; ++annot_ci ) { diff --git a/c++/src/objtools/format/dbsource_item.cpp b/c++/src/objtools/format/dbsource_item.cpp index 2fa2903d..59be61e4 100644 --- a/c++/src/objtools/format/dbsource_item.cpp +++ b/c++/src/objtools/format/dbsource_item.cpp @@ -1,4 +1,4 @@ -/* $Id: dbsource_item.cpp 577454 2019-01-03 22:58:25Z kans $ +/* $Id: dbsource_item.cpp 614611 2020-08-20 12:59:34Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -628,7 +628,7 @@ string CDBSourceItem::x_FormatDBSourceID(const CSeq_id_Handle& idh) GetContext()->Config().GetHTMLFormatter().FormatUniProtId(ht, acc); } else { GetContext()->Config().GetHTMLFormatter().FormatNucId(ht, *idh.GetSeqId(), - GetContext()->GetScope().GetGi(idh), acc); + GI_TO(TIntId, GetContext()->GetScope().GetGi(idh)), acc); } #endif s += comma + sep + "accession " + ht; diff --git a/c++/src/objtools/format/defline_item.cpp b/c++/src/objtools/format/defline_item.cpp index 2ed653f0..4504eab3 100644 --- a/c++/src/objtools/format/defline_item.cpp +++ b/c++/src/objtools/format/defline_item.cpp @@ -1,4 +1,4 @@ -/* $Id: defline_item.cpp 577454 2019-01-03 22:58:25Z kans $ +/* $Id: defline_item.cpp 613774 2020-08-12 16:32:22Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -91,6 +91,9 @@ void CDeflineItem::x_GatherInfo(CBioseqContext& ctx) if ( ctx.Config().IgnoreExistingTitle() ) { flags |= sequence::CDeflineGenerator::fIgnoreExisting; } + if ( ctx.Config().ShowDeflineModifiers() ) { + flags |= sequence::CDeflineGenerator::fShowModifiers; + } if ( ctx.UsingSeqEntryIndex() ) { CRef idx = ctx.GetSeqEntryIndex(); CBioseq_Handle bsh = scope.GetBioseqHandle(*bioseq); @@ -101,8 +104,11 @@ void CDeflineItem::x_GatherInfo(CBioseqContext& ctx) if (! Defliner.UsePDBCompoundForDefline()) { ctx.SetPDBCompoundForComment(true); } - // CompressSpaces( m_Defline ); - CleanAndCompress (m_Defline, m_Defline.c_str()); + if ( ctx.Config().ShowDeflineModifiers() ) { + CompressSpaces( m_Defline ); + } else { + CleanAndCompress (m_Defline, m_Defline.c_str()); + } ConvertQuotes(m_Defline); AddPeriod(m_Defline); CSeqdesc_CI di(ctx.GetHandle(), CSeqdesc::e_Title); diff --git a/c++/src/objtools/format/feature_item.cpp b/c++/src/objtools/format/feature_item.cpp index 8b989cbd..97a3e314 100644 --- a/c++/src/objtools/format/feature_item.cpp +++ b/c++/src/objtools/format/feature_item.cpp @@ -1,4 +1,4 @@ -/* $Id: feature_item.cpp 606747 2020-04-27 11:07:41Z ivanov $ +/* $Id: feature_item.cpp 615038 2020-08-26 13:39:07Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -489,7 +489,7 @@ static bool s_SkipFeature(const CMappedFeat& feat, CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype(); if ( subtype == CSeqFeatData::eSubtype_pub || - subtype == CSeqFeatData::eSubtype_non_std_residue || + /* subtype == CSeqFeatData::eSubtype_non_std_residue || */ subtype == CSeqFeatData::eSubtype_biosrc || subtype == CSeqFeatData::eSubtype_rsite || subtype == CSeqFeatData::eSubtype_seq ) { @@ -1698,16 +1698,16 @@ void CFeatureItem::x_AddQualsIdx( gf = &(mf.GetMappedFeature()); gr = &(mf.GetData().GetGene()); if (gr) { - if (feat_gene_xref->IsSetLocus() && gr->IsSetLocus()) { - if (feat_gene_xref->GetLocus() == gr->GetLocus()) { + if (feat_gene_xref->IsSetLocus_tag() && gr->IsSetLocus_tag()) { + if (feat_gene_xref->GetLocus_tag() == gr->GetLocus_tag()) { gene_feat = &(mf.GetMappedFeature()); gene_ref = &(mf.GetData().GetGene()); } else { // RW-985 gene_ref = feat_gene_xref; } - } else if (feat_gene_xref->IsSetLocus_tag() && gr->IsSetLocus_tag()) { - if (feat_gene_xref->GetLocus_tag() == gr->GetLocus_tag()) { + } else if (feat_gene_xref->IsSetLocus() && gr->IsSetLocus()) { + if (feat_gene_xref->GetLocus() == gr->GetLocus()) { gene_feat = &(mf.GetMappedFeature()); gene_ref = &(mf.GetData().GetGene()); } else { @@ -1817,6 +1817,9 @@ void CFeatureItem::x_AddQualsIdx( case CSeqFeatData::e_Psec_str: x_AddQualsPsecStr( ctx ); break; + case CSeqFeatData::e_Non_std_residue: + x_AddQualsNonStd( ctx ); + break; case CSeqFeatData::e_Het: x_AddQualsHet( ctx ); break; @@ -2017,6 +2020,9 @@ void CFeatureItem::x_AddQuals( case CSeqFeatData::e_Psec_str: x_AddQualsPsecStr( ctx ); break; + case CSeqFeatData::e_Non_std_residue: + x_AddQualsNonStd( ctx ); + break; case CSeqFeatData::e_Het: x_AddQualsHet( ctx ); break; @@ -2162,7 +2168,7 @@ void CFeatureItem::x_AddQualsRna( x_AddQual(slot, new CFlatSeqIdQVal(*acc_id)); } /* - if (! cfg.HideGI()) { + if (! (cfg.HideGI() || cfg.IsPolicyFtp())) { x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(*sip, true)); } */ @@ -2593,7 +2599,7 @@ void CFeatureItem::x_GetAssociatedProtInfoIdx( if ( protId ) { if ( !cfg.AlwaysTranslateCDS() ) { CScope::EGetBioseqFlag get_flag = CScope::eGetBioseq_Loaded; - if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() ) { + if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() ) { get_flag = CScope::eGetBioseq_All; } protHandle = scope.GetBioseqHandle(*protId, get_flag); @@ -2642,7 +2648,7 @@ void CFeatureItem::x_GetAssociatedProtInfo( if ( protId ) { if ( !cfg.AlwaysTranslateCDS() ) { CScope::EGetBioseqFlag get_flag = CScope::eGetBioseq_Loaded; - if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() ) { + if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() ) { get_flag = CScope::eGetBioseq_All; } protHandle = scope.GetBioseqHandle(*protId, get_flag); @@ -2753,7 +2759,7 @@ void CFeatureItem::x_AddQualProteinId( case CSeq_id::e_Gi: if( seqid.GetGi() > ZERO_GI ) { const CFlatFileConfig& cfg = GetContext()->Config(); - if (! cfg.HideGI()) { + if (! (cfg.HideGI() || cfg.IsPolicyFtp())) { if ( eLastRegularChoice == CSeq_id::e_not_set ) { // use as protein_id if it's the first usable one x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) ); @@ -3134,7 +3140,7 @@ void CFeatureItem::x_AddProductIdQuals( const CFlatFileConfig& cfg = GetContext()->Config(); ITERATE( CBioseq_Handle::TId, id_iter, ids ) { if( id_iter->IsGi() ) { - if (! cfg.HideGI()) { + if (! (cfg.HideGI() || cfg.IsPolicyFtp())) { x_AddQual( eFQ_db_xref, new CFlatStringQVal("GI:" + NStr::NumericToString(id_iter->GetGi()) )); } @@ -3263,6 +3269,20 @@ void CFeatureItem::x_AddQualsPsecStr( x_AddQual( eFQ_sec_str_type, new CFlatStringQVal( sec_str_as_str ) ); } +// ---------------------------------------------------------------------------- +void CFeatureItem::x_AddQualsNonStd( + CBioseqContext& ctx ) +// ---------------------------------------------------------------------------- +{ + _ASSERT( m_Feat.GetData().IsNon_std_residue() ); + + const CSeqFeatData& data = m_Feat.GetData(); + + CSeqFeatData_Base::TNon_std_residue n_s_res = data.GetNon_std_residue(); + + x_AddQual( eFQ_non_std_residue, new CFlatStringQVal( n_s_res ) ); +} + // ---------------------------------------------------------------------------- void CFeatureItem::x_AddQualsHet( CBioseqContext& ctx ) @@ -4503,6 +4523,7 @@ void CFeatureItem::x_FormatQuals(CFlatFeature& ff) const DO_QUAL(site_type); DO_QUAL(sec_str_type); DO_QUAL(heterogen); + DO_QUAL(non_std_residue); DO_QUAL(tag_peptide); @@ -5186,6 +5207,7 @@ static const TQualPair sc_GbToFeatQualMap[] = { { eFQ_mol_wt, CSeqFeatData::eQual_calculated_mol_wt }, { eFQ_ncRNA_class, CSeqFeatData::eQual_ncRNA_class }, { eFQ_nomenclature, CSeqFeatData::eQual_nomenclature }, + { eFQ_non_std_residue, CSeqFeatData::eQual_non_std_residue }, { eFQ_number, CSeqFeatData::eQual_number }, { eFQ_old_locus_tag, CSeqFeatData::eQual_old_locus_tag }, { eFQ_operon, CSeqFeatData::eQual_operon }, @@ -5311,6 +5333,9 @@ void CFeatureItem::x_AddFTableQuals( case CSeqFeatData::e_Psec_str: x_AddFTablePsecStrQuals(data.GetPsec_str()); break; + case CSeqFeatData::e_Non_std_residue: + x_AddFTableNonStdQuals(data.GetNon_std_residue()); + break; case CSeqFeatData::e_Het: x_AddFTablePsecStrQuals(data.GetHet()); break; @@ -5521,6 +5546,8 @@ void CFeatureItem::x_AddFTableAnticodon( case CTrna_ext::C_Aa::e_Ncbistdaa: aa = GetAAName(trna_ext.GetAa().GetNcbistdaa(), false); break; + default: + break; } string seq("---"); @@ -5585,7 +5612,7 @@ void CFeatureItem::x_AddFTableRnaQuals( CBioseq_Handle prod = ctx.GetScope().GetBioseqHandle(m_Feat.GetProductId()); if ( prod ) { - string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !ctx.Config().HideGI()); + string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !(ctx.Config().HideGI() || ctx.Config().IsPolicyFtp())); if (!NStr::IsBlank(id_str)) { x_AddFTableQual("transcript_id", id_str); } @@ -5719,7 +5746,7 @@ void CFeatureItem::x_AddFTableCdregionQuals( } if (prod && !cfg.HideProteinID()) { - string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !ctx.Config().HideGI()); + string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !(ctx.Config().HideGI() || ctx.Config().IsPolicyFtp())); if (!NStr::IsBlank(id_str)) { x_AddFTableQual("protein_id", id_str); } @@ -5812,6 +5839,16 @@ void CFeatureItem::x_AddFTablePsecStrQuals( } } +// ---------------------------------------------------------------------------- +void CFeatureItem::x_AddFTableNonStdQuals( + const CSeqFeatData::TNon_std_residue& res ) +// ---------------------------------------------------------------------------- +{ + if ( !res.empty() ) { + x_AddFTableQual("non_std_residue", res); + } +} + static const string s_GetSubtypeString(const COrgMod::TSubtype& subtype) { @@ -6067,6 +6104,13 @@ static string s_GetSpecimenVoucherText( if( voucher_info_ref->m_Prefix != NULL ) { text << *voucher_info_ref->m_Prefix; } + if( voucher_info_ref->m_Trim != NULL ) { + const string& trim = *voucher_info_ref->m_Trim; + if (NStr::StartsWith(id, trim)) { + NStr::TrimPrefixInPlace(id, trim); + NStr::TruncateSpacesInPlace(id); + } + } if( voucher_info_ref->m_PadTo > 0 && voucher_info_ref->m_PadWith != NULL) { int len_id = id.length(); int len_pad = voucher_info_ref->m_PadWith->length(); @@ -6448,6 +6492,7 @@ void CSourceFeatureItem::x_FormatGBNoteQuals(CFlatFeature& ff) const } +/* static bool s_IsExactAndNonExactMatchOnNoteQuals(CFlatFeature::TQuals& qvec, const string& str) { if (qvec.empty()) { @@ -6473,6 +6518,7 @@ static bool s_IsExactAndNonExactMatchOnNoteQuals(CFlatFeature::TQuals& qvec, con if (has_exact == 1 && non_exact > 0) return true; return false; } +*/ diff --git a/c++/src/objtools/format/flat_file_config.cpp b/c++/src/objtools/format/flat_file_config.cpp index 287b248b..a509b9fd 100644 --- a/c++/src/objtools/format/flat_file_config.cpp +++ b/c++/src/objtools/format/flat_file_config.cpp @@ -1,4 +1,4 @@ -/* $Id: flat_file_config.cpp 606754 2020-04-27 11:09:46Z ivanov $ +/* $Id: flat_file_config.cpp 614736 2020-08-21 13:43:48Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -314,15 +314,17 @@ CFlatFileConfig::CFlatFileConfig( TStyle style, TFlags flags, TView view, - TPolicy policy) : - m_Format(format), m_Mode(mode), m_Style(style), m_Flags(flags), m_View(view), m_Policy(policy) + TPolicy policy, + TCustom custom) : + m_Format(format), m_Mode(mode), m_Style(style), m_Flags(flags), m_View(view), m_Policy(policy), m_Custom(custom) { m_RefSeqConventions = false; + m_FeatDepth = 0; + m_GapDepth = 0; SetGenbankBlocks(fGenbankBlocks_All); SetGenbankBlockCallback(NULL); SetCanceledCallback(NULL); BasicCleanup(false); - SetCustom(0); // FTable always requires master style if (m_Format == eFormat_FTable) { @@ -535,7 +537,7 @@ void CFlatFileConfig::AddArgumentDescriptions(CArgDescriptions& args) "Far fetch policy", CArgDescriptions::eString, "adaptive"); arg_desc->SetConstraint("policy", - &(*new CArgAllow_Strings, "adaptive", "internal", "external", "exhaustive")); + &(*new CArgAllow_Strings, "adaptive", "internal", "external", "exhaustive", "ftp", "web")); // flags (default: 0) arg_desc->AddDefaultKey("flags", "Flags", @@ -615,6 +617,9 @@ void CFlatFileConfig::AddArgumentDescriptions(CArgDescriptions& args) arg_desc->AddOptionalKey("depth", "Depth", "Exploration depth", CArgDescriptions::eInteger); + arg_desc->AddOptionalKey("gap-depth", "GapDepth", + "Gap exploration depth", CArgDescriptions::eInteger); + arg_desc->AddOptionalKey("max_search_segments", "MaxSearchSegments", "Max number of empty segments to search", CArgDescriptions::eInteger); @@ -746,6 +751,10 @@ CFlatFileConfig::EPolicy x_GetPolicy(const CArgs& args) return CFlatFileConfig::ePolicy_External; } else if ( Policy == "exhaustive" ) { return CFlatFileConfig::ePolicy_Exhaustive; + } else if ( Policy == "ftp" ) { + return CFlatFileConfig::ePolicy_Ftp; + } else if ( Policy == "web" ) { + return CFlatFileConfig::ePolicy_Web; } // default @@ -847,11 +856,6 @@ CFlatFileConfig::ECustom x_GetCustom(const CArgs& args) { int custom = args["custom"].AsInteger(); - // ID-5865 : Set the "show SNP" and "show CDD" bits based on the value of the - // "enable-external" flag. - if (args["enable-external"] || args["policy"].AsString() == "external") - custom |= (CFlatFileConfig::fShowSNPFeatures | CFlatFileConfig::fShowCDDFeatures); - return (CFlatFileConfig::ECustom)custom; } @@ -916,6 +920,24 @@ void CFlatFileConfig::FromArguments(const CArgs& args) CFlatFileConfig::TGenbankBlocks genbank_blocks = x_GetGenbankBlocks(args); CFlatFileConfig::ECustom custom = x_GetCustom(args); + // ID-5865 : Set the "show SNP" and "show CDD" bits based on the value of the + // "enable-external" flag. + if (args["no-external"]) { + int flg = (int) flags; + flg |= CFlatFileConfig::fHideCDDFeatures; + flg |= CFlatFileConfig::fHideSNPFeatures; + flags = (CFlatFileConfig::EFlags) flg; + } else if (args["enable-external"] || args["policy"].AsString() == "external") { + int cust = (int) custom; + if ((flags & CFlatFileConfig::fHideCDDFeatures) == 0) { + cust |= CFlatFileConfig::fShowCDDFeatures; + } + if ((flags & CFlatFileConfig::fHideSNPFeatures) == 0) { + cust |= CFlatFileConfig::fShowSNPFeatures; + } + custom = (CFlatFileConfig::ECustom) cust; + } + SetFormat(format); SetMode(mode); SetStyle(style); @@ -925,6 +947,15 @@ void CFlatFileConfig::FromArguments(const CArgs& args) m_fGenbankBlocks = genbank_blocks; m_BasicCleanup = args["cleanup"]; SetCustom(custom); + + if( args["depth"] ) { + int featDepth = args["depth"].AsInteger(); + SetFeatDepth(featDepth); + } + if( args["gap-depth"] ) { + int gapDepth = args["gap-depth"].AsInteger(); + SetGapDepth(gapDepth); + } } #ifdef NEW_HTML_FMT @@ -958,7 +989,7 @@ void CHTMLEmptyFormatter::FormatNucSearch(CNcbiOstream& os, const string& id) co os << id; } -void CHTMLEmptyFormatter::FormatTaxid(string& str, const int taxid, const string& taxname) const +void CHTMLEmptyFormatter::FormatTaxid(string& str, const TTaxId taxid, const string& taxname) const { str = taxname; } diff --git a/c++/src/objtools/format/flat_file_generator.cpp b/c++/src/objtools/format/flat_file_generator.cpp index 2337cf73..f7679515 100644 --- a/c++/src/objtools/format/flat_file_generator.cpp +++ b/c++/src/objtools/format/flat_file_generator.cpp @@ -1,4 +1,4 @@ -/* $Id: flat_file_generator.cpp 606748 2020-04-27 11:07:58Z ivanov $ +/* $Id: flat_file_generator.cpp 615047 2020-08-26 13:40:19Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -84,14 +84,14 @@ CFlatFileGenerator::CFlatFileGenerator CFlatFileConfig::TStyle style, CFlatFileConfig::TFlags flags, CFlatFileConfig::TView view, - CFlatFileConfig::TCustom custom) : - m_Ctx(new CFlatFileContext(CFlatFileConfig(format, mode, style, flags, view))) + CFlatFileConfig::TCustom custom, + CFlatFileConfig::TPolicy policy) : + m_Ctx(new CFlatFileContext(CFlatFileConfig(format, mode, style, flags, view, policy, custom))) { m_Failed = false; if ( !m_Ctx ) { NCBI_THROW(CFlatException, eInternal, "Unable to initialize context"); } - m_Ctx->SetConfig().SetCustom(custom); } @@ -319,6 +319,12 @@ void CFlatFileGenerator::Generate if ( m_Ctx->GetConfig().IsPolicyExhaustive() ) { policy = CSeqEntryIndex::eExhaustive; } + if ( m_Ctx->GetConfig().IsPolicyFtp() ) { + policy = CSeqEntryIndex::eFtp; + } + if ( m_Ctx->GetConfig().IsPolicyWeb() ) { + policy = CSeqEntryIndex::eWeb; + } CRef idx(new CSeqEntryIndex( topseh, policy, flags )); m_Ctx->SetSeqEntryIndex(idx); if (idx->IsIndexFailure()) { @@ -337,9 +343,11 @@ void CFlatFileGenerator::Generate // bool nearFeatsSuppress = false; bool isNc = false; + /* bool isNgNtNwNz = false; bool isGED = false; bool isTPA = false; + */ bool hasLocalFeat = false; bool forceOnlyNear = false; @@ -355,12 +363,12 @@ void CFlatFileGenerator::Generate case CSeq_id::e_Genbank: case CSeq_id::e_Embl: case CSeq_id::e_Ddbj: - isGED = true; + // isGED = true; break; case CSeq_id::e_Tpg: case CSeq_id::e_Tpe: case CSeq_id::e_Tpd: - isTPA = true; + // isTPA = true; break; case CSeq_id::e_Other: { @@ -370,7 +378,7 @@ void CFlatFileGenerator::Generate if (acc == "NC_") { isNc = true; } else if (acc == "NG_" || acc == "NT_" || acc == "NW_" || acc == "NZ_") { - isNgNtNwNz = true; + // isNgNtNwNz = true; } } } @@ -718,31 +726,39 @@ void CFlatFileGenerator::Generate } if ( cfg.HideSNPFeatures() ) { flags |= CSeqEntryIndex::fHideSNPFeats; + } else if ( cfg.ShowSNPFeatures() ) { + flags |= CSeqEntryIndex::fShowSNPFeats; } if ( cfg.HideCDDFeatures() ) { flags |= CSeqEntryIndex::fHideCDDFeats; - } - if ( cfg.ShowSNPFeatures() ) { - flags |= CSeqEntryIndex::fShowSNPFeats; - } - if ( cfg.ShowCDDFeatures() ) { + } else if ( cfg.ShowCDDFeatures() ) { flags |= CSeqEntryIndex::fShowCDDFeats; } - if ( m_Ctx->GetConfig().IsPolicyInternal() ) { + if ( cfg.IsPolicyInternal() ) { policy = CSeqEntryIndex::eInternal; } - if ( m_Ctx->GetConfig().IsPolicyExternal() ) { + if ( cfg.IsPolicyExternal() ) { policy = CSeqEntryIndex::eExternal; } - if ( m_Ctx->GetConfig().IsPolicyExhaustive() ) { + if ( cfg.IsPolicyExhaustive() ) { policy = CSeqEntryIndex::eExhaustive; } - CRef idx(new CSeqEntryIndex( topseh, policy, flags )); + if ( cfg.IsPolicyFtp() ) { + policy = CSeqEntryIndex::eFtp; + } + if ( cfg.IsPolicyWeb() ) { + policy = CSeqEntryIndex::eWeb; + } + CRef idx(new CSeqEntryIndex( topseh, policy, flags)); m_Ctx->SetSeqEntryIndex(idx); if (idx->IsIndexFailure()) { m_Failed = true; return; } + int featDepth = cfg.GetFeatDepth(); + idx->SetFeatDepth(featDepth); + int gapDepth = cfg.GetGapDepth(); + idx->SetGapDepth(gapDepth); } catch(CException &) { m_Failed = true; return; @@ -781,6 +797,20 @@ void CFlatFileGenerator::Generate } +void CFlatFileGenerator::Generate +(const CBioseq_Handle& bsh, + CNcbiOstream& os, + bool useSeqEntryIndexing) +{ + CRef + item_os(new CFormatItemOStream(new COStreamTextOStream(os))); + + const CSeq_entry_Handle entry = bsh.GetSeq_entry_Handle(); + Generate(entry, *item_os, useSeqEntryIndexing); + +} + + void CFlatFileGenerator::Generate (const CSeq_submit& submit, CScope& scope, diff --git a/c++/src/objtools/format/flat_qual_slots.cpp b/c++/src/objtools/format/flat_qual_slots.cpp index f26530e2..074303cf 100644 --- a/c++/src/objtools/format/flat_qual_slots.cpp +++ b/c++/src/objtools/format/flat_qual_slots.cpp @@ -1,4 +1,4 @@ -/* $Id: flat_qual_slots.cpp 564513 2018-05-29 17:40:10Z kans $ +/* $Id: flat_qual_slots.cpp 613781 2020-08-12 16:42:43Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -114,6 +114,7 @@ GetStringOfFeatQual(EFeatureQualifier eFeatureQualifier) TYPICAL_FQ(mol_wt), TYPICAL_FQ(ncRNA_class), TYPICAL_FQ(nomenclature), + TYPICAL_FQ(non_std_residue), TYPICAL_FQ(number), TYPICAL_FQ(old_locus_tag), TYPICAL_FQ(operon), diff --git a/c++/src/objtools/format/gather_items.cpp b/c++/src/objtools/format/gather_items.cpp index 9848b033..5f080943 100644 --- a/c++/src/objtools/format/gather_items.cpp +++ b/c++/src/objtools/format/gather_items.cpp @@ -1,4 +1,4 @@ -/* $Id: gather_items.cpp 607405 2020-05-04 14:19:32Z ivanov $ +/* $Id: gather_items.cpp 615788 2020-09-03 18:19:11Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -751,7 +751,7 @@ void CFlatGatherer::x_GatherReferencesIdx(const CSeq_loc& loc, TReferences& refs if (! bsx) return; // gather references from descriptors - bsx->IterateDescriptors([this, &ctx, &scope, &refs, &idx, bsx](CDescriptorIndex& sdx) { + bsx->IterateDescriptors([this, &refs, &idx, bsx](CDescriptorIndex& sdx) { try { CSeqdesc::E_Choice chs = sdx.GetType(); if (chs == CSeqdesc::e_Pub) { @@ -1247,8 +1247,7 @@ void CFlatGatherer::x_IdComments(CBioseqContext& ctx, string genome_build_number = CGenomeAnnotComment::GetGenomeBuildNumber(ctx.GetHandle()); bool has_ref_track_status = s_HasRefTrackStatus(ctx.GetHandle()); - CCommentItem::ECommentFormat format = ctx.Config().DoHTML() ? - CCommentItem::eFormat_Html : CCommentItem::eFormat_Text; + // CCommentItem::ECommentFormat format = ctx.Config().DoHTML() ? CCommentItem::eFormat_Html : CCommentItem::eFormat_Text; ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) { const CSeq_id& id = **id_iter; @@ -2006,7 +2005,7 @@ void CFlatGatherer::x_CollectBioSourcesOnBioseq // if protein, get sources applicable to DNA location of CDS if ( ctx.IsProt() ) { // collect biosources features on bioseq - if ( !ctx.DoContigStyle() || cfg.ShowContigSources() ) { + if ( !ctx.DoContigStyle() || cfg.ShowContigSources() || cfg.IsPolicyFtp() ) { CConstRef src_feat = x_GetSourceFeatFromCDS (bh); if (src_feat.NotEmpty()) { // CMappedFeat mapped_feat(bh.GetScope().GetSeq_featHandle(*src_feat)); @@ -2028,7 +2027,7 @@ void CFlatGatherer::x_CollectBioSourcesOnBioseq if ( ! ctx.IsProt() ) { // collect biosources features on bioseq - if ( !ctx.DoContigStyle() || cfg.ShowContigSources() ) { + if ( !ctx.DoContigStyle() || cfg.ShowContigSources() || cfg.IsPolicyFtp() ) { x_CollectSourceFeatures(bh, range, ctx, srcs); } } @@ -2038,7 +2037,7 @@ void CFlatGatherer::x_CollectBioSourcesOnBioseq void CFlatGatherer::x_CollectBioSources(TSourceFeatSet& srcs) const { CBioseqContext& ctx = *m_Current; - CScope* scope = &ctx.GetScope(); + // CScope* scope = &ctx.GetScope(); const CFlatFileConfig& cfg = ctx.Config(); x_CollectBioSourcesOnBioseq(ctx.GetHandle(), @@ -2462,6 +2461,7 @@ bool CFlatGatherer::x_BiosourcesEqualForMergingPurposes( return true; } +// for the non-indexed, non-faster, older version of the flatfile generator void s_SetSelection(SAnnotSelector& sel, CBioseqContext& ctx) { const CFlatFileConfig& cfg = ctx.Config(); @@ -2966,7 +2966,7 @@ static bool s_IsCDD(const CSeq_feat_Handle& feat) { if (feat.GetAnnot().IsNamed()) { const string& name = feat.GetAnnot().GetName(); - return (name == "Annot:CDD" || name == "CDDSearch"); + return (name == "Annot:CDD" || name == "CDDSearch" || name == "CDD"); } return false; } @@ -3006,7 +3006,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx SAnnotSelector& sel, CBioseqContext& ctx) const { - CScope& scope = ctx.GetScope(); + // CScope& scope = ctx.GetScope(); CFlatItemOStream& out = *m_ItemOS; CSeqMap_CI gap_it = s_CreateGapMapIter(loc, ctx); @@ -3045,7 +3045,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx s_SetGapIdxData (gap_data, gaps); } - bsx->IterateFeatures([this, &ctx, &scope, &prev_feat, &gap_it, &loc_len, &item, &out, &slice_mapper, + bsx->IterateFeatures([this, &ctx, &prev_feat, &loc_len, &item, &out, &slice_mapper, gaps, &gap_data, showGapsOfSizeZero, bsx](CFeatureIndex& sfx) { try { CMappedFeat mf = sfx.GetMappedFeat(); @@ -3104,7 +3104,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx // may need to map sig_peptide on a different segment if (feat.GetData().IsCdregion()) { if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) { - x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, slice_mapper); + x_GetFeatsOnCdsProductIdx(original_feat, ctx, slice_mapper); } } return; // continue; @@ -3120,7 +3120,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx bool has_gap = gap_data.has_gap; int gap_start = gap_data.gap_start; int gap_end = gap_data.gap_end; - while (has_gap && gap_start < feat_start) { + while (has_gap && gap_start <= feat_start) { const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_start <= gap_end) ); const bool gapMatch = ( subtype == CSeqFeatData::eSubtype_gap && feat_start == gap_start && feat_end == gap_end - 1 ); if ( noGapSizeProblem && ! gapMatch ) { @@ -3170,7 +3170,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx {{ // map features from protein if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) { - x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, + x_GetFeatsOnCdsProductIdx(original_feat, ctx, slice_mapper, CConstRef(static_cast(item.GetNonNullPointer())) ); } @@ -3206,7 +3206,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx // when all features are done, output remaining gaps while (gap_data.has_gap) { - const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_data.gap_start < gap_data.gap_end) ); + const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_data.gap_start <= gap_data.gap_end) ); if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) { item.Reset( s_NewGapItem(gap_data.gap_start, gap_data.gap_end, gap_data.gap_length, gap_data.gap_type, gap_data.gap_evidence, gap_data.is_unknown_length, gap_data.is_assembly_gap, ctx) ); @@ -3491,17 +3491,18 @@ void CFlatGatherer::x_GatherFeaturesOnRangeIdx CRef slice_mapper = s_MakeSliceMapper(loc, ctx); // Gaps of length zero are only shown for SwissProt Genpept records - const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot ); + // const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot ); // cache to avoid repeated calculations - const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ; + // const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ; CSeq_feat_Handle prev_feat; CConstRef item; CRef idx = ctx.GetSeqEntryIndex(); if (! idx) return; - CRef bsx = idx->GetBioseqIndex (loc); + // CRef bsx = idx->GetBioseqIndex (loc); + CRef bsx = idx->GetBioseqIndex (); if (! bsx) return; const vector>& gaps = bsx->GetGapIndices(); @@ -3515,8 +3516,10 @@ void CFlatGatherer::x_GatherFeaturesOnRangeIdx s_SetGapIdxData (gap_data, gaps); } - bsx->IterateFeatures([this, &ctx, &scope, &prev_feat, &gap_it, &loc_len, &item, &out, &slice_mapper, - gaps, &gap_data, showGapsOfSizeZero, bsx](CFeatureIndex& sfx) { + CSeq_loc slp; + slp.Assign(loc); + bsx->IterateFeatures(slp, [this, &ctx, &scope, &prev_feat, &item, &out, &slice_mapper, + gaps, bsx](CFeatureIndex& sfx) { try { CMappedFeat mf = sfx.GetMappedFeat(); CSeq_feat_Handle feat = sfx.GetSeqFeatHandle(); // it->GetSeq_feat_Handle(); @@ -3582,7 +3585,7 @@ void CFlatGatherer::x_GatherFeaturesOnRangeIdx // may need to map sig_peptide on a different segment if (feat.GetData().IsCdregion()) { if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) { - x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, slice_mapper); + x_GetFeatsOnCdsProductIdx(original_feat, ctx, slice_mapper); } } return; @@ -3597,135 +3600,11 @@ void CFlatGatherer::x_GatherFeaturesOnRangeIdx const CSeq_loc& loc = original_feat.GetLocation(); CRef loc2(new CSeq_loc); loc2->Assign(*feat_loc); - loc2->SetId(*loc.GetId()); - - item.Reset( x_NewFeatureItem(mf, ctx, loc2, m_Feat_Tree, CFeatureItem::eMapped_not_mapped, true) ); - out << item; - - // Add more features depending on user preferences - - switch (feat.GetFeatSubtype()) { - case CSeqFeatData::eSubtype_mRNA: - {{ - // optionally map CDS from cDNA onto genomic - if (s_CopyCDSFromCDNA(ctx) && feat.IsSetProduct()) { - x_CopyCDSFromCDNA(original_feat, ctx); - } - break; - }} - case CSeqFeatData::eSubtype_cdregion: - {{ - // map features from protein - if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) { - x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, - slice_mapper, - CConstRef(static_cast(item.GetNonNullPointer())) ); - } - break; - }} - default: - break; + const CSeq_id* id2 = loc.GetId(); + // test needed for gene in X55766, to prevent seg fault, but still does not produce correct mixed location + if (id2) { + loc2->SetId(*id2); } - } catch (CException& e) { - // special case: Job cancellation exceptions make us stop - // generating features. - CMappedFeat mf = sfx.GetMappedFeat(); - if( NStr::EqualNocase(e.what(), "job cancelled") || - NStr::EqualNocase(e.what(), "job canceled") ) - { - LOG_POST_X(2, Error << "Job canceled while processing feature " - << s_GetFeatDesc(mf.GetSeq_feat_Handle()) - << " [" << e << "]; flatfile may be truncated"); - return; - } - - // for cases where a halt is requested, just rethrow the exception - if( e.GetErrCodeString() == string("eHaltRequested") ) { - throw e; - } - - // post to log, go on to next feature - LOG_POST_X(2, Error << "Error processing feature " - << s_GetFeatDesc(mf.GetSeq_feat_Handle()) - << " [" << e << "]"); - } - }); // end of for loop -} - -size_t CFlatGatherer::x_GatherFeaturesOnSegmentIdx -(const CSeq_loc& loc, - SAnnotSelector& sel, - CBioseqContext& ctx) const -{ - size_t count = 0; - - CScope& scope = ctx.GetScope(); - CFlatItemOStream& out = *m_ItemOS; - - // logic to handle offsets that occur when user sets - // the -from and -to command-line parameters - // build slice_mapper for mapping locations - CRef slice_mapper = s_MakeSliceMapper(loc, ctx); - - // Gaps of length zero are only shown for SwissProt Genpept records - const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot ); - - // cache to avoid repeated calculations - const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ; - - CSeq_feat_Handle prev_feat; - CConstRef item; - - CRef idx = ctx.GetSeqEntryIndex(); - if (! idx) return count; - CRef bsx = idx->GetBioseqIndex (); - if (! bsx) return count; - - count = bsx->IterateFeaturesByLoc(loc, [this, &ctx, &scope, &prev_feat, &loc_len, - &item, &out, &slice_mapper, showGapsOfSizeZero, bsx](CFeatureIndex& sfx) { - try { - CMappedFeat mf = sfx.GetMappedFeat(); - CSeq_feat_Handle feat = sfx.GetSeqFeatHandle(); // it->GetSeq_feat_Handle(); - const CSeq_feat& original_feat = sfx.GetMappedFeat().GetOriginalFeature(); // it->GetOriginalFeature(); - - /// we need to cleanse CDD features - - s_CleanCDDFeature(original_feat); - - const CFlatFileConfig& cfg = ctx.Config(); - CSeqFeatData::ESubtype subtype = feat.GetFeatSubtype(); - if (cfg.HideCDDFeatures() && - (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) && - s_IsCDD(feat)) { - return; - } - - if( (feat.GetFeatSubtype() == CSeqFeatData::eSubtype_gap) && ! feat.IsPlainFeat() ) { - // skip gaps when we take slices (i.e. "-from" and "-to" command-line args), - // unless they're a plain feature. - // (compare NW_001468136 (100 to 200000) and AC185591 (100 to 100000) ) - return; - } - - // supress duplicate features - if (prev_feat && s_IsDuplicateFeatures(prev_feat, feat)) { - return; - } - prev_feat = feat; - - CConstRef feat_loc( sfx.GetMappedLocation()); // &it->GetLocation()); - - feat_loc = s_NormalizeNullsBetween( feat_loc ); - - feat_loc = Seq_loc_Merge(*feat_loc, CSeq_loc::fMerge_Abutting, &scope); - - // HANDLE GAPS SECTION GOES HERE - - - const CSeq_loc& loc = original_feat.GetLocation(); - CRef loc2(new CSeq_loc); - loc2->Assign(*feat_loc); - loc2->SetId(*loc.GetId()); item.Reset( x_NewFeatureItem(mf, ctx, loc2, m_Feat_Tree, CFeatureItem::eMapped_not_mapped, true) ); out << item; @@ -3745,7 +3624,7 @@ size_t CFlatGatherer::x_GatherFeaturesOnSegmentIdx {{ // map features from protein if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) { - x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, + x_GetFeatsOnCdsProductIdx(original_feat, ctx, slice_mapper, CConstRef(static_cast(item.GetNonNullPointer())) ); } @@ -3778,8 +3657,6 @@ size_t CFlatGatherer::x_GatherFeaturesOnSegmentIdx << " [" << e << "]"); } }); // end of for loop - - return count; } void CFlatGatherer::x_GatherFeaturesOnRange @@ -4020,10 +3897,35 @@ s_ContainsGaps( const CSeq_loc &loc ) return false; } -void CFlatGatherer::x_GatherFeatures(void) const +/* +static bool s_NotForceNearFeats(CBioseqContext& ctx) +{ + // asn2flat -id NW_003127872 -flags 2 -faster -custom 2048 + CRef idx = ctx.GetSeqEntryIndex(); + if (idx) { + CBioseq_Handle hdl = ctx.GetHandle(); + CRef bsx = idx->GetBioseqIndex (hdl); + if (bsx) { + if (bsx->IsForceOnlyNearFeats()) return false; + } + } + + return true; +} +*/ + +void CFlatGatherer::x_GatherFeaturesIdx(void) const { CBioseqContext& ctx = *m_Current; const CFlatFileConfig& cfg = ctx.Config(); + if ( ! cfg.UseSeqEntryIndexer()) return; + + CRef idx = ctx.GetSeqEntryIndex(); + if (! idx) return; + CBioseq_Handle hdl = ctx.GetHandle(); + CRef bsx = idx->GetBioseqIndex (hdl); + if (! bsx) return; + CFlatItemOStream& out = *m_ItemOS; CConstRef item; @@ -4059,96 +3961,144 @@ void CFlatGatherer::x_GatherFeatures(void) const } // collect features - // if ( ctx.IsSegmented() && cfg.IsStyleMaster() && cfg.OldFeaturesOrder() ) { - if ( cfg.UseSeqEntryIndexer() && ctx.IsDelta() && ! ctx.IsDeltaLitOnly() && cfg.IsStyleMaster() && ctx.GetLocation().IsWhole() ) { + if (ctx.GetLocation().IsWhole()) { + x_GatherFeaturesOnWholeLocationIdx(loc, sel, ctx); + } else { + x_GatherFeaturesOnRangeIdx(loc, sel, ctx); + } + + if ( ctx.IsProt() ) { + // Also collect features which this protein is their product. + // Currently there are only two possible candidates: Coding regions + // and Prot features (rare). - CRef idx = ctx.GetSeqEntryIndex(); - if (! idx) return; - CBioseq_Handle hdl = ctx.GetHandle(); - CRef bsx = idx->GetBioseqIndex (hdl); - if (! bsx) return; + // look for the Cdregion feature for this protein + CBioseq_Handle handle = ( ctx.CanGetMaster() ? ctx.GetMaster().GetHandle() : ctx.GetHandle() ); + SAnnotSelector sel(CSeqFeatData::e_Cdregion); + sel.SetByProduct().SetResolveDepth(0); + // try first in-TSE CDS + sel.SetLimitTSE(handle.GetTSE_Handle()); + CFeat_CI feat_it(handle, sel); + if ( !feat_it ) { + // then any other CDS + sel.SetLimitNone().ExcludeTSE(handle.GetTSE_Handle()); + feat_it = CFeat_CI(handle, sel); + } + if (feat_it) { + try { + CMappedFeat cds = *feat_it; - // Gaps of length zero are only shown for SwissProt Genpept records - const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot ); + // map CDS location to its location on the product + CSeq_loc_Mapper mapper(*cds.GetOriginalSeq_feat(), + CSeq_loc_Mapper::eLocationToProduct, + &ctx.GetScope()); + mapper.SetFuzzOption( CSeq_loc_Mapper::fFuzzOption_CStyle | CSeq_loc_Mapper::fFuzzOption_RemoveLimTlOrTr ); + CRef cds_prod = mapper.Map(cds.GetLocation()); + cds_prod = cds_prod->Merge( ( s_IsCircularTopology(ctx) ? CSeq_loc::fMerge_All : CSeq_loc::fSortAndMerge_All ), NULL ); - const vector>& gaps = bsx->GetGapIndices(); + // it's a common case that we map one residue past the edge of the protein (e.g. NM_131089). + // In that case, we shrink the cds's location back one residue. + if( cds_prod->IsInt() && cds.GetProduct().IsWhole() ) { + const CSeq_id *cds_prod_seq_id = cds.GetProduct().GetId(); + if( cds_prod_seq_id != NULL ) { + CBioseq_Handle prod_bioseq_handle = ctx.GetScope().GetBioseqHandle( *cds_prod_seq_id ); + if( prod_bioseq_handle ) { + const TSeqPos bioseq_len = prod_bioseq_handle.GetBioseqLength(); + if( cds_prod->GetInt().GetTo() >= bioseq_len ) { + cds_prod->SetInt().SetTo( bioseq_len - 1 ); + } + } + } + } - SGapIdxData gap_data{}; + // if there are any gaps in the location, we know that there was an issue with the mapping, so + // we fall back on the product. + if( s_ContainsGaps(*cds_prod) ) { + cds_prod->Assign( cds.GetProduct() ); + } - gap_data.num_gaps = gaps.size(); - gap_data.next_gap = 0; + // remove fuzz + cds_prod->SetPartialStart( false, eExtreme_Positional ); + cds_prod->SetPartialStop ( false, eExtreme_Positional ); - if (gap_data.num_gaps > 0 && ! ctx.Config().HideGapFeatures()) { - s_SetGapIdxData (gap_data, gaps); + item.Reset( + x_NewFeatureItem(cds, ctx, &*cds_prod, m_Feat_Tree, + CFeatureItem::eMapped_from_cdna) ); + + out << item; + } catch (CAnnotMapperException& e) { + LOG_POST_X(2, Error << e ); + } } - SSeqMapSelector msel; - msel.SetFlags(CSeqMap::fFindAny); - CBioseq_Handle bsh = ctx.GetHandle(); + // look for Prot features (only for RefSeq records or + // GenBank not release_mode). + if ( ctx.IsRefSeq() || !cfg.ForGBRelease() ) { + SAnnotSelector prod_sel(CSeqFeatData::e_Prot, true); + prod_sel.SetLimitTSE(ctx.GetHandle().GetTopLevelEntry()); + prod_sel.SetResolveMethod(SAnnotSelector::eResolve_TSE); + prod_sel.SetOverlapType(SAnnotSelector::eOverlap_Intervals); + CFeat_CI it(ctx.GetHandle(), prod_sel); + ctx.GetFeatTree().AddFeatures(it); + for ( ; it; ++it) { + item.Reset(x_NewFeatureItem(*it, + ctx, + &it->GetProduct(), + m_Feat_Tree, + CFeatureItem::eMapped_from_prot) ); + out << item; + } + } + } +} - bool keepGoing = true; - bool noFeatsSeen = true; - int withoutFeats = 0; +void CFlatGatherer::x_GatherFeatures(void) const +{ + CBioseqContext& ctx = *m_Current; + const CFlatFileConfig& cfg = ctx.Config(); - SetDiagFilter(eDiagFilter_All, "!(1305.28,31)"); + if (cfg.UseSeqEntryIndexer()) { + x_GatherFeaturesIdx(); + return; + } - CConstRef seqmap; - if (ctx.GetLocation().IsWhole()) { - seqmap = &bsh.GetSeqMap(); - } else { - seqmap = CSeqMap::CreateSeqMapForSeq_loc(loc, &ctx.GetScope()); - } + CFlatItemOStream& out = *m_ItemOS; + CConstRef item; - for ( CSeqMap_CI seg(seqmap, &ctx.GetScope(), msel); seg; ++seg ) { - if (seg.GetType() != CSeqMap::eSeqGap) { - if (keepGoing) { - // go over each of the segments - ENa_strand strand = eNa_strand_unknown; - if (seg.GetRefMinusStrand()) { - strand = eNa_strand_minus; - } - // cout << "SEG " << seg.GetType() << " @ " << seg.GetPosition() << " - " << seg.GetEndPosition() << " " << seg.GetLength() << endl; - CRef sl = bsh.GetRangeSeq_loc(seg.GetPosition(), seg.GetEndPosition() - 1, strand); - if (sl) { - size_t count = x_GatherFeaturesOnSegmentIdx(*sl, *selp, ctx); - if (count > 0) { - noFeatsSeen = false; - } else if (ctx.IsEMBL() || ctx.IsDDBJ()) { - withoutFeats++; - if (withoutFeats > 20 && noFeatsSeen) { - keepGoing = false; - } - } - } - } - } else { - // cout << "GAP " << seg.GetType() << " @ " << seg.GetPosition() << " - " << seg.GetEndPosition() << " " << seg.GetLength() << endl; - const bool noGapSizeProblem = ( false || (seg.GetPosition() < seg.GetEndPosition()) ); - if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) { - CConstRef item; - if (gap_data.has_gap) { - const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_data.gap_start < gap_data.gap_end) ); - if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) { - item.Reset( s_NewGapItem(gap_data.gap_start, gap_data.gap_end, gap_data.gap_length, gap_data.gap_type, - gap_data.gap_evidence, gap_data.is_unknown_length, gap_data.is_assembly_gap, ctx) ); - out << item; - } - if (gap_data.next_gap < gap_data.num_gaps) { - s_SetGapIdxData (gap_data, gaps); - } else { - gap_data.has_gap = false; - } - } - } + SAnnotSelector sel; + SAnnotSelector* selp = &sel; + if (ctx.GetAnnotSelector() != NULL) { + selp = &ctx.SetAnnotSelector(); + } + s_SetSelection(*selp, ctx); + + // optionally map gene from genomic onto cDNA + if ( ctx.IsInGPS() && cfg.CopyGeneToCDNA() && + ctx.GetBiomol() == CMolInfo::eBiomol_mRNA ) { + CMappedFeat mrna = GetMappedmRNAForProduct(ctx.GetHandle()); + if (mrna) { + CMappedFeat gene = GetBestGeneForMrna(mrna, &ctx.GetFeatTree()); + if (gene) { + CRef loc(new CSeq_loc); + loc->SetWhole(*ctx.GetPrimaryId()); + item.Reset( + x_NewFeatureItem(gene, ctx, loc, m_Feat_Tree, + CFeatureItem::eMapped_from_genomic) ); + out << item; } } + } - SetDiagFilter(eDiagFilter_All, ""); - + CSeq_loc loc; + if ( ctx.GetMasterLocation() != 0 ) { + loc.Assign(*ctx.GetMasterLocation()); } else { - x_GatherFeaturesOnLocation(loc, *selp, ctx); + loc.Assign(*ctx.GetHandle().GetRangeSeq_loc(0, 0)); } + // collect features + x_GatherFeaturesOnLocation(loc, *selp, ctx); + if ( ctx.IsProt() ) { // Also collect features which this protein is their product. // Currently there are only two possible candidates: Coding regions @@ -4334,7 +4284,6 @@ void s_FixIntervalProtToCds( // ============================================================================ void CFlatGatherer::x_GetFeatsOnCdsProductIdx( - CMappedFeat mf, const CSeq_feat& feat, CBioseqContext& ctx, CRef slice_mapper, @@ -4359,42 +4308,25 @@ void CFlatGatherer::x_GetFeatsOnCdsProductIdx( CBioseq_Handle prot; - prot = scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle()); + // prot = scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle()); + prot = scope.GetBioseqHandle(*prot_id); // !!! need a flag for fetching far proteins if (!prot) { return; } - - CRef idx = ctx.GetSeqEntryIndex(); - if (! idx) { - return; - } - - CRef bsx = idx->GetBioseqIndex (prot); - if (! bsx) return; - - /* CFeat_CI it(prot, s_GetCdsProductSel(ctx)); if (!it) { return; } ctx.GetFeatTree().AddFeatures( it ); // !!! - */ // map from cds product to nucleotide CSeq_loc_Mapper prot_to_cds(feat, CSeq_loc_Mapper::eProductToLocation, &scope); prot_to_cds.SetFuzzOption( CSeq_loc_Mapper::fFuzzOption_CStyle ); CSeq_feat_Handle prev; // keep track of the previous feature - /* - for ( ; it; ++it ) - */ - bsx->IterateFeatures([this, &ctx, &scope, &prev, &cfg, &prot_to_cds, &slice_mapper, &cdsFeatureItem, bsx](CFeatureIndex& sfx) { - - CMappedFeat mf = sfx.GetMappedFeat(); - CSeq_feat_Handle curr = sfx.GetSeqFeatHandle(); // it->GetSeq_feat_Handle(); - const CSeq_feat& original_feat = sfx.GetMappedFeat().GetOriginalFeature(); // it->GetOriginalFeature(); - + for ( ; it; ++it ) { + CSeq_feat_Handle curr = it->GetSeq_feat_Handle(); const CSeq_loc& curr_loc = curr.GetLocation(); CSeqFeatData::ESubtype subtype = curr.GetFeatSubtype(); @@ -4406,24 +4338,24 @@ void CFlatGatherer::x_GetFeatsOnCdsProductIdx( subtype != CSeqFeatData::eSubtype_transit_peptide_aa && subtype != CSeqFeatData::eSubtype_preprotein && subtype != CSeqFeatData::eSubtype_propeptide_aa) { - return; + continue; } - if ( cfg.HideCDDFeatures() && + if ( ( cfg.HideCDDFeatures() || ! cfg.ShowCDDFeatures() ) && (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) && s_IsCDD(curr) ) { // passing this test prevents mapping of COG CDD region features - return; + continue; } // suppress duplicate features (on protein) if (prev && s_IsDuplicateFeatures(curr, prev)) { - return; + continue; } /// we need to cleanse CDD features - s_CleanCDDFeature(original_feat); + s_CleanCDDFeature(it->GetOriginalFeature()); // map prot location to nuc location CRef loc(prot_to_cds.Map(curr_loc)); @@ -4438,20 +4370,20 @@ void CFlatGatherer::x_GetFeatsOnCdsProductIdx( } } if (!loc || loc->IsNull()) { - return; + continue; } if ( !s_SeqLocEndsOnBioseq(*loc, ctx, eEndsOnBioseqOpt_AnyPartOfSeqLoc, CSeqFeatData::e_Cdregion) ) { - return; + continue; } CConstRef item; // for command-line args "-from" and "-to" - CMappedFeat mapped_feat = mf; + CMappedFeat mapped_feat = *it; if( slice_mapper && loc ) { CRange range = ctx.GetLocation().GetTotalRange(); CRef mapped_loc = slice_mapper->Map(*CFeatTrim::Apply(*loc, range)); if( mapped_loc->IsNull() ) { - return; + continue; } CRef feat(new CSeq_feat()); feat->Assign(mapped_feat.GetMappedFeature()); @@ -4461,7 +4393,7 @@ void CFlatGatherer::x_GetFeatsOnCdsProductIdx( loc = mapped_loc; } - item = ConstRef( x_NewFeatureItem(mapped_feat, ctx, + item = ConstRef( x_NewFeatureItem(*it, ctx, s_NormalizeNullsBetween(loc), m_Feat_Tree, CFeatureItem::eMapped_from_prot, true, cdsFeatureItem ) ); @@ -4469,7 +4401,7 @@ void CFlatGatherer::x_GetFeatsOnCdsProductIdx( *m_ItemOS << item; prev = curr; - }); // end of iterate loop + } } // ============================================================================ diff --git a/c++/src/objtools/format/gbseq_formatter.cpp b/c++/src/objtools/format/gbseq_formatter.cpp index 17e698aa..94b00837 100644 --- a/c++/src/objtools/format/gbseq_formatter.cpp +++ b/c++/src/objtools/format/gbseq_formatter.cpp @@ -1,4 +1,4 @@ -/* $Id: gbseq_formatter.cpp 601813 2020-02-13 18:41:46Z kans $ +/* $Id: gbseq_formatter.cpp 614619 2020-08-20 13:00:42Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -806,8 +806,8 @@ void CGBSeqFormatter::FormatReference str.append( s_CloseTag(" ", "GBXref")); str.append( s_CloseTag(" ", "GBReference_xref")); } - if ( ref.GetPMID() != 0 ) { - str.append( s_CombineStrings(" ", "GBReference_pubmed", ref.GetPMID())); + if ( ref.GetPMID() != ZERO_ENTREZ_ID ) { + str.append( s_CombineStrings(" ", "GBReference_pubmed", ENTREZ_ID_TO(int, ref.GetPMID()))); } if ( !ref.GetRemark().empty() ) { str.append( s_CombineStrings(" ", "GBReference_remark", ref.GetRemark())); diff --git a/c++/src/objtools/format/genbank_formatter.cpp b/c++/src/objtools/format/genbank_formatter.cpp index e75d7307..54474e0c 100644 --- a/c++/src/objtools/format/genbank_formatter.cpp +++ b/c++/src/objtools/format/genbank_formatter.cpp @@ -1,4 +1,4 @@ -/* $Id: genbank_formatter.cpp 602692 2020-02-28 22:11:47Z kans $ +/* $Id: genbank_formatter.cpp 615046 2020-08-26 13:40:11Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -523,7 +523,7 @@ void CGenbankFormatter::FormatVersion version_line << version.GetAccession(); if ( version.GetGi() > ZERO_GI ) { const CFlatFileConfig& cfg = GetContext().GetConfig(); - if (! cfg.HideGI()) { + if (! (cfg.HideGI() || cfg.IsPolicyFtp())) { version_line << " GI:" << version.GetGi(); } } @@ -852,7 +852,7 @@ void CGenbankFormatter::FormatReference x_Consortium(l, ref, ctx); x_Title(l, ref, ctx); x_Journal(l, ref, ctx); - if (ref.GetPMID() == 0) { // suppress MEDLINE if has PUBMED + if (ref.GetPMID() == ZERO_ENTREZ_ID) { // suppress MEDLINE if has PUBMED x_Medline(l, ref, ctx); } x_Pubmed(l, ref, ctx); @@ -1054,10 +1054,10 @@ void CGenbankFormatter::x_Medline bool bHtml = ctx.Config().DoHTML(); string strDummy( "[PUBMED-ID]" ); - if ( ref.GetMUID() != 0 ) { + if ( ref.GetMUID() != ZERO_ENTREZ_ID) { Wrap(l, GetWidth(), "MEDLINE", strDummy, eSubp); } - string strPubmed( NStr::IntToString( ref.GetMUID() ) ); + string strPubmed( NStr::NumericToString( ref.GetMUID() ) ); if ( bHtml ) { string strLink = "Config().GetHTMLFormatter().FormatLocation(strLink, item.GetFeat().GetLocation(), iGi, strRawKey); + item.GetContext()->Config().GetHTMLFormatter().FormatLocation(strLink, item.GetFeat().GetLocation(), GI_TO(TIntId, iGi), strRawKey); #else // check if this is a protein or nucleotide link bool is_prot = false; @@ -1913,7 +1913,7 @@ s_FormatRegularSequencePiece fill(line, line+kLineBufferSize, ' '); // add the span stuff - TSeqPos length_of_span_before_base_count = 0; + length_of_span_before_base_count = 0; if( bHtml ) { string kSpan = " GetLocation().IsWhole()) { diff --git a/c++/src/objtools/format/genome_project_item.cpp b/c++/src/objtools/format/genome_project_item.cpp index 13a25fdc..734804fd 100644 --- a/c++/src/objtools/format/genome_project_item.cpp +++ b/c++/src/objtools/format/genome_project_item.cpp @@ -1,4 +1,4 @@ -/* $Id: genome_project_item.cpp 577454 2019-01-03 22:58:25Z kans $ +/* $Id: genome_project_item.cpp 615791 2020-09-03 18:19:35Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -205,12 +205,16 @@ void CGenomeProjectItem::x_GatherInfo(CBioseqContext& ctx) } string strHeader = uo.GetType().GetStr(); if ( NStr::EqualNocase(strHeader, "GenomeProjectsDB")) { - genome_projects_user_obje = &uo; - x_SetObject(*desc); - } else if( NStr::EqualNocase( strHeader, "DBLink" ) ) { - dblink_user_obj = &uo; - x_SetObject(*desc); - } + if (! genome_projects_user_obje) { + genome_projects_user_obje = &uo; + x_SetObject(*desc); + } + } else if( NStr::EqualNocase( strHeader, "DBLink" ) ) { + if (! dblink_user_obj) { + dblink_user_obj = &uo; + x_SetObject(*desc); + } + } } // process GenomeProjectsDB diff --git a/c++/src/objtools/format/inst_info_map.cpp b/c++/src/objtools/format/inst_info_map.cpp index a6c1df84..78e7b30c 100644 --- a/c++/src/objtools/format/inst_info_map.cpp +++ b/c++/src/objtools/format/inst_info_map.cpp @@ -1,4 +1,4 @@ -/* $Id: inst_info_map.cpp 601754 2020-02-12 23:10:12Z kans $ +/* $Id: inst_info_map.cpp 611903 2020-07-13 15:51:00Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -49,7 +49,7 @@ CInstInfoMap::GetInstitutionVoucherInfo( static const string s_acbr_base("http://www.acbr-database.at/BioloMICS.aspx?Link=T&DB=0&Table=0&Descr="); static const string s_atcc_base("http://www.atcc.org/Products/All/"); - static const string s_bccm_base("http://bccm.belspo.be/catalogues/ulc-strain-details?ACCESSION_NUMBER=ULC"); + static const string s_bccm_base("https://bccm.belspo.be/catalogues/bm-details?accession_number=ULC%20"); static const string s_bcrc_base("https://catalog.bcrc.firdi.org.tw/BSAS_cart/controller?event=SEARCH&bcrc_no="); static const string s_cas_base("http://collections.calacademy.org/herp/specimen/"); static const string s_cbs_base("http://www.cbs.knaw.nl/collections/BioloMICS.aspx?Fields=All&ExactMatch=T&Table=CBS+strain+database&Name=CBS+"); @@ -90,9 +90,11 @@ CInstInfoMap::GetInstitutionVoucherInfo( static const string yp0("0"); + static const string s_bccm_trim("ULC"); + static const string s_colon_pfx(":"); static const string s_uscr_pfx("_"); - + static const string s_kui_pfx("KUI/"); static const string s_kuit_pfx("KUIT/"); static const string s_psu_pfx("PSU:Mamm:"); @@ -113,114 +115,114 @@ CInstInfoMap::GetInstitutionVoucherInfo( typedef SStaticPair TVoucherInfoElem; static const TVoucherInfoElem sc_voucher_info_map[] = { - { "ACBR", TVoucherInfoRef(new SVoucherInfo(&s_acbr_base, false, false, 0, NULL, NULL, &s_acbr_sfx, "Austrian Center of Biological Resources and Applied Mycology") ) }, - { "ATCC", TVoucherInfoRef(new SVoucherInfo(&s_atcc_base, false, false, 0, NULL, NULL, &s_atcc_sfx, "American Type Culture Collection") ) }, - { "BCCM", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 0, NULL, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) }, - { "BCCM/ULC", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 0, NULL, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) }, - { "BCRC", TVoucherInfoRef(new SVoucherInfo(&s_bcrc_base, false, false, 0, NULL, NULL, &s_bcrc_sfx, "Bioresource Collection and Research Center") ) }, - { "CAS:HERP", TVoucherInfoRef(new SVoucherInfo(&s_cas_base, true, false, 0, NULL, &s_colon_pfx, NULL, "California Academy of Sciences, Herpetology collection") ) }, - { "CBS", TVoucherInfoRef(new SVoucherInfo(&s_cbs_base, false, false, 0, NULL, NULL, NULL, "Westerdijk Fungal Biodiversity Institute") ) }, - { "CCAP", TVoucherInfoRef(new SVoucherInfo(&s_ccap_base, false, false, 0, NULL, NULL, NULL, "Culture Collection of Algae and Protozoa") ) }, - { "CCMP", TVoucherInfoRef(new SVoucherInfo(&s_ccmp_base, false, false, 0, NULL, NULL, NULL, "Provasoli-Guillard National Center for Culture of Marine Phytoplankton") ) }, - { "CCUG", TVoucherInfoRef(new SVoucherInfo(&s_ccug_base, false, false, 0, NULL, NULL, NULL, "Culture Collection, University of Goteborg, Department of Clinical Bacteriology") ) }, - { "CFMR", TVoucherInfoRef(new SVoucherInfo(&s_cfmr_base, false, false, 0, NULL, NULL, NULL, "USDA Forest Service, Center for Forest Mycology Research") ) }, - { "CHR", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, &s_uscr_pfx, NULL, "Allan Herbarium, Landcare Research New Zealand Limited") ) }, - { "CRCM:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Washington State University, Charles R. Conner Museum, bird collection") ) }, - { "CUMV:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Cornell University Museum of Vertebrates, Fish Collection") ) }, - { "Coriell", TVoucherInfoRef(new SVoucherInfo(&s_cori_base, false, false, 0, NULL, NULL, NULL, "Coriell Institute for Medical Research") ) }, - { "DGR:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, bird tissue collection") ) }, - { "DGR:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, entomology tissue collection") ) }, - { "DGR:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, fish tissue collection") ) }, - { "DGR:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, herpetology tissue collection") ) }, - { "DGR:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, mammal tissue collection") ) }, - { "DMNS:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Ornithology Collections") ) }, - { "DMNS:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Mammology Collection") ) }, - { "DMNS:Para", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Parasite Collection") ) }, - { "DSM", TVoucherInfoRef(new SVoucherInfo(&s_dsm_base, false, false, 0, NULL, NULL, NULL, "Deutsche Sammlung von Mikroorganismen und Zellkulturen GmbH") ) }, - { "DSMZ", TVoucherInfoRef(new SVoucherInfo(&s_dsmz_base, false, false, 0, NULL, NULL, NULL, "Deutsche Sammlung von Mikroorganismen und Zellkulturen") ) }, - { "EMEC", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, true, false, 0, NULL, NULL, NULL, "Essig Museum") ) }, - { "EMEC:EMEC", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, false, true, 0, NULL, NULL, NULL, "Essig Museum") ) }, - { "EMEC:UCIS", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, false, true, 0, NULL, NULL, NULL, "Essig Museum") ) }, - { "FRR", TVoucherInfoRef(new SVoucherInfo(&s_frr_base, false, false, 0, NULL, NULL, NULL, "Food Science Australia, Ryde") ) }, - { "FSU", TVoucherInfoRef(new SVoucherInfo(&s_fsu_base, false, false, 0, NULL, NULL, NULL, "Jena Microbial Resource Collection") ) }, - { "ICMP", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, &s_uscr_pfx, NULL, "International Collection of Microorganisms from Plants") ) }, - { "JCM", TVoucherInfoRef(new SVoucherInfo(&s_jcm_base, false, false, 0, NULL, NULL, NULL, "Japan Collection of Microorganisms") ) }, - { "KCTC", TVoucherInfoRef(new SVoucherInfo(&s_kctc_base, false, false, 0, NULL, NULL, NULL, "Korean Collection for Type Cultures") ) }, - { "KNWR:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Kenai National Wildlife Refuge, Entomology Collection") ) }, - { "KU:I", TVoucherInfoRef(new SVoucherInfo(&s_ku_base, false, false, 0, NULL, &s_kui_pfx, &s_ku_sfx, "University of Kansas, Museum of Natural History, Ichthyology collection") ) }, - { "KU:IT", TVoucherInfoRef(new SVoucherInfo(&s_ku_base, false, false, 0, NULL, &s_kuit_pfx, &s_ku_sfx, "University of Kansas, Museum of Natural History, Ichthyology tissue collection") ) }, - { "KWP:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Kenelm W. Philip Collection, University of Alaska Museum of the North, Lepidoptera collection") ) }, - { "MAFF", TVoucherInfoRef(new SVoucherInfo(&s_maff_base, false, false, 0, NULL, NULL, NULL, "Genebank, Ministry of Agriculture Forestry and Fisheries") ) }, - { "MCZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Ornithology Collection") ) }, - { "MCZ:Cryo", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Cryogenic Collection") ) }, - { "MCZ:Ent", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Entomology Collection") ) }, - { "MCZ:Fish", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Icthyology Collection") ) }, - { "MCZ:Herp", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Herpetology Collection") ) }, - { "MCZ:IP", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Invertebrate Paleontology Collection") ) }, - { "MCZ:IZ", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Invertebrate Zoology Collection") ) }, - { "MCZ:Ich", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Icthyology Collection") ) }, - { "MCZ:Mala", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Malacology Collection") ) }, - { "MCZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Mammalogy Collection") ) }, - { "MCZ:Orn", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Ornithology Collection") ) }, - { "MLZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Moore Laboratory of Zoology, Occidental College, Bird Collection" ) ) }, - { "MLZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Moore Laboratory of Zoology, Occidental College, Mammal Collection" ) ) }, - { "MSB:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Bird Collection") ) }, - { "MSB:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Mammal Collection") ) }, - { "MSB:Para", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Parasitology Collection") ) }, - { "MTCC", TVoucherInfoRef(new SVoucherInfo(&s_mtcc_base, false, false, 0, NULL, NULL, NULL, "Microbial Type Culture Collection & Gene Bank") ) }, - { "MUCL", TVoucherInfoRef(new SVoucherInfo(&s_mucl_base, false, false, 0, NULL, NULL, &s_mucl_sfx, "Mycotheque de l'Universite Catholique de Louvain") ) }, - { "MVZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Bird Collection") ) }, - { "MVZ:Egg", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Egg Collection") ) }, - { "MVZ:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) }, - { "MVZ:Hild", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Milton Hildebrand collection") ) }, - { "MVZ:Img", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Image Collection") ) }, - { "MVZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Mammal Collection") ) }, - { "MVZ:Page", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Notebook Page Collection") ) }, - { "MVZObs:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) }, - { "NBRC", TVoucherInfoRef(new SVoucherInfo(&s_nbrc_base, false, false, 8, &yp0, NULL, NULL, "NITE Biological Resource Center") ) }, - { "NBSB:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "National Biomonitoring Specimen Bank, U.S. Geological Survey, bird collection") ) }, - { "NCIMB", TVoucherInfoRef(new SVoucherInfo(&s_ncimb_base, false, false, 0, NULL, NULL, NULL, "National Collections of Industrial Food and Marine Bacteria (incorporating the NCFB)") ) }, - { "NCTC", TVoucherInfoRef(new SVoucherInfo(&s_nctc_base, false, false, 0, NULL, NULL, NULL, "National Collection of Type Cultures") ) }, - { "NRRL", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_base, false, false, 0, NULL, NULL, NULL, "Agricultural Research Service Culture Collection") ) }, - { "NRRL:MOLD", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_mold, false, false, 0, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Mold collection") ) }, - { "NRRL:PROK", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_prok, false, false, 0, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Prokaryotic collection") ) }, - { "NRRL:YEAST", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_yest, false, false, 0, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Yeast Collection") ) }, - { "NZAC", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, &s_uscr_pfx, NULL, "New Zealand Arthropod Collection") ) }, - { "PCC", TVoucherInfoRef(new SVoucherInfo(&s_pcc_base, false, false, 0, NULL, NULL, NULL, "Pasteur Culture Collection of Cyanobacteria") ) }, - { "PCMB", TVoucherInfoRef(new SVoucherInfo(&s_pcmb_base, false, false, 0, NULL, NULL, NULL, "The Pacific Center for Molecular Biodiversity") ) }, - { "PDD", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, &s_uscr_pfx, NULL, "New Zealand Fungarium") ) }, - { "PSU:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, false, false, 0, NULL, &s_psu_pfx, NULL, "Portland State University, Vertebrate Biology Museum, Mammal Collection") ) }, - { "PYCC", TVoucherInfoRef(new SVoucherInfo(&s_pycc_base, false, false, 0, NULL, NULL, &s_pycc_sfx, "Portuguese Yeast Culture Collection") ) }, - { "SAG", TVoucherInfoRef(new SVoucherInfo(&s_sag_base, false, false, 0, NULL, NULL, NULL, "Sammlung von Algenkulturen at Universitat Gottingen") ) }, - { "TGRC", TVoucherInfoRef(new SVoucherInfo(&s_tgrc_base, false, false, 0, NULL, NULL, NULL, "C.M. Rick Tomato Genetics Resource Center") ) }, - { "UAM:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Bird Collection") ) }, - { "UAM:Bryo", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Bryozoan Collection") ) }, - { "UAM:Crus", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Marine Arthropod Collection") ) }, - { "UAM:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Insect Collection") ) }, - { "UAM:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Fish Collection") ) }, - { "UAM:Herb", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, UAM Herbarium") ) }, - { "UAM:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Amphibian and Reptile Collection") ) }, - { "UAM:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mammal Collection") ) }, - { "UAM:Moll", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mollusc Collection") ) }, - { "UAM:Paleo", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, paleontology collection") ) }, - { "UAMH", TVoucherInfoRef(new SVoucherInfo(&s_uamh_base, false, false, 0, NULL, NULL, NULL, "Centre for Global Microfungal Biodiversity") ) }, - { "UAMObs:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mammal Collection") ) }, - { "ULC", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 0, NULL, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) }, - { "USNM:Birds", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Birds") ) }, - { "USNM:ENT", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Entomology Collection") ) }, - { "USNM:Fish", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, National Fish Collection") ) }, - { "USNM:Herp", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Amphibians and Reptiles") ) }, - { "USNM:IZ", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Department of Invertebrate Zoology") ) }, - { "USNM:MAMM", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Mammals") ) }, - { "WNMU:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, bird collection") ) }, - { "WNMU:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, fish collection") ) }, - { "WNMU:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, mammal collection") ) }, - { "YPM:ENT", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypment_pfx, NULL, "Yale Peabody Museum of Natural History, Entomology Collection") ) }, - { "YPM:HER", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypmher_pfx, NULL, "Yale Peabody Museum of Natural History, Herpetology Collection") ) }, - { "YPM:ICH", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypmich_pfx, NULL, "Yale Peabody Museum of Natural History, Ichthyology Collection") ) }, - { "YPM:IZ", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypmiz_pfx, NULL, "Yale Peabody Museum of Natural History, Invertebrate Zoology Collection") ) }, - { "YPM:MAM", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypmmam_pfx, NULL, "Yale Peabody Museum of Natural History, Mammology Collection") ) }, - { "YPM:ORN", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypmorn_pfx, NULL, "Yale Peabody Museum of Natural History, Ornithology Collection") ) } + { "ACBR", TVoucherInfoRef(new SVoucherInfo(&s_acbr_base, false, false, 0, NULL, NULL, NULL, &s_acbr_sfx, "Austrian Center of Biological Resources and Applied Mycology") ) }, + { "ATCC", TVoucherInfoRef(new SVoucherInfo(&s_atcc_base, false, false, 0, NULL, NULL, NULL, &s_atcc_sfx, "American Type Culture Collection") ) }, + { "BCCM", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 4, &yp0, NULL, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) }, + { "BCCM/ULC", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 4, &yp0, &s_bccm_trim, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) }, + { "BCRC", TVoucherInfoRef(new SVoucherInfo(&s_bcrc_base, false, false, 0, NULL, &s_bccm_trim, NULL, &s_bcrc_sfx, "Bioresource Collection and Research Center") ) }, + { "CAS:HERP", TVoucherInfoRef(new SVoucherInfo(&s_cas_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "California Academy of Sciences, Herpetology collection") ) }, + { "CBS", TVoucherInfoRef(new SVoucherInfo(&s_cbs_base, false, false, 0, NULL, NULL, NULL, NULL, "Westerdijk Fungal Biodiversity Institute") ) }, + { "CCAP", TVoucherInfoRef(new SVoucherInfo(&s_ccap_base, false, false, 0, NULL, NULL, NULL, NULL, "Culture Collection of Algae and Protozoa") ) }, + { "CCMP", TVoucherInfoRef(new SVoucherInfo(&s_ccmp_base, false, false, 0, NULL, NULL, NULL, NULL, "Provasoli-Guillard National Center for Culture of Marine Phytoplankton") ) }, + { "CCUG", TVoucherInfoRef(new SVoucherInfo(&s_ccug_base, false, false, 0, NULL, NULL, NULL, NULL, "Culture Collection, University of Goteborg, Department of Clinical Bacteriology") ) }, + { "CFMR", TVoucherInfoRef(new SVoucherInfo(&s_cfmr_base, false, false, 0, NULL, NULL, NULL, NULL, "USDA Forest Service, Center for Forest Mycology Research") ) }, + { "CHR", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, NULL, &s_uscr_pfx, NULL, "Allan Herbarium, Landcare Research New Zealand Limited") ) }, + { "CRCM:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Washington State University, Charles R. Conner Museum, bird collection") ) }, + { "CUMV:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Cornell University Museum of Vertebrates, Fish Collection") ) }, + { "Coriell", TVoucherInfoRef(new SVoucherInfo(&s_cori_base, false, false, 0, NULL, NULL, NULL, NULL, "Coriell Institute for Medical Research") ) }, + { "DGR:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, bird tissue collection") ) }, + { "DGR:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, entomology tissue collection") ) }, + { "DGR:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, fish tissue collection") ) }, + { "DGR:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, herpetology tissue collection") ) }, + { "DGR:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, mammal tissue collection") ) }, + { "DMNS:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Ornithology Collections") ) }, + { "DMNS:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Mammology Collection") ) }, + { "DMNS:Para", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Parasite Collection") ) }, + { "DSM", TVoucherInfoRef(new SVoucherInfo(&s_dsm_base, false, false, 0, NULL, NULL, NULL, NULL, "Deutsche Sammlung von Mikroorganismen und Zellkulturen GmbH") ) }, + { "DSMZ", TVoucherInfoRef(new SVoucherInfo(&s_dsmz_base, false, false, 0, NULL, NULL, NULL, NULL, "Deutsche Sammlung von Mikroorganismen und Zellkulturen") ) }, + { "EMEC", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, true, false, 0, NULL, NULL, NULL, NULL, "Essig Museum") ) }, + { "EMEC:EMEC", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, false, true, 0, NULL, NULL, NULL, NULL, "Essig Museum") ) }, + { "EMEC:UCIS", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, false, true, 0, NULL, NULL, NULL, NULL, "Essig Museum") ) }, + { "FRR", TVoucherInfoRef(new SVoucherInfo(&s_frr_base, false, false, 0, NULL, NULL, NULL, NULL, "Food Science Australia, Ryde") ) }, + { "FSU", TVoucherInfoRef(new SVoucherInfo(&s_fsu_base, false, false, 0, NULL, NULL, NULL, NULL, "Jena Microbial Resource Collection") ) }, + { "ICMP", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, NULL, &s_uscr_pfx, NULL, "International Collection of Microorganisms from Plants") ) }, + { "JCM", TVoucherInfoRef(new SVoucherInfo(&s_jcm_base, false, false, 0, NULL, NULL, NULL, NULL, "Japan Collection of Microorganisms") ) }, + { "KCTC", TVoucherInfoRef(new SVoucherInfo(&s_kctc_base, false, false, 0, NULL, NULL, NULL, NULL, "Korean Collection for Type Cultures") ) }, + { "KNWR:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Kenai National Wildlife Refuge, Entomology Collection") ) }, + { "KU:I", TVoucherInfoRef(new SVoucherInfo(&s_ku_base, false, false, 0, NULL, NULL, &s_kui_pfx, &s_ku_sfx, "University of Kansas, Museum of Natural History, Ichthyology collection") ) }, + { "KU:IT", TVoucherInfoRef(new SVoucherInfo(&s_ku_base, false, false, 0, NULL, NULL, &s_kuit_pfx, &s_ku_sfx, "University of Kansas, Museum of Natural History, Ichthyology tissue collection") ) }, + { "KWP:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Kenelm W. Philip Collection, University of Alaska Museum of the North, Lepidoptera collection") ) }, + { "MAFF", TVoucherInfoRef(new SVoucherInfo(&s_maff_base, false, false, 0, NULL, NULL, NULL, NULL, "Genebank, Ministry of Agriculture Forestry and Fisheries") ) }, + { "MCZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Ornithology Collection") ) }, + { "MCZ:Cryo", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Cryogenic Collection") ) }, + { "MCZ:Ent", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Entomology Collection") ) }, + { "MCZ:Fish", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Icthyology Collection") ) }, + { "MCZ:Herp", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Herpetology Collection") ) }, + { "MCZ:IP", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Invertebrate Paleontology Collection") ) }, + { "MCZ:IZ", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Invertebrate Zoology Collection") ) }, + { "MCZ:Ich", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Icthyology Collection") ) }, + { "MCZ:Mala", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Malacology Collection") ) }, + { "MCZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Mammalogy Collection") ) }, + { "MCZ:Orn", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Ornithology Collection") ) }, + { "MLZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Moore Laboratory of Zoology, Occidental College, Bird Collection" ) ) }, + { "MLZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Moore Laboratory of Zoology, Occidental College, Mammal Collection" ) ) }, + { "MSB:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Bird Collection") ) }, + { "MSB:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Mammal Collection") ) }, + { "MSB:Para", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Parasitology Collection") ) }, + { "MTCC", TVoucherInfoRef(new SVoucherInfo(&s_mtcc_base, false, false, 0, NULL, NULL, NULL, NULL, "Microbial Type Culture Collection & Gene Bank") ) }, + { "MUCL", TVoucherInfoRef(new SVoucherInfo(&s_mucl_base, false, false, 0, NULL, NULL, NULL, &s_mucl_sfx, "Mycotheque de l'Universite Catholique de Louvain") ) }, + { "MVZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Bird Collection") ) }, + { "MVZ:Egg", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Egg Collection") ) }, + { "MVZ:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) }, + { "MVZ:Hild", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Milton Hildebrand collection") ) }, + { "MVZ:Img", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Image Collection") ) }, + { "MVZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Mammal Collection") ) }, + { "MVZ:Page", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Notebook Page Collection") ) }, + { "MVZObs:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) }, + { "NBRC", TVoucherInfoRef(new SVoucherInfo(&s_nbrc_base, false, false, 8, &yp0, &yp0, NULL, NULL, "NITE Biological Resource Center") ) }, + { "NBSB:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "National Biomonitoring Specimen Bank, U.S. Geological Survey, bird collection") ) }, + { "NCIMB", TVoucherInfoRef(new SVoucherInfo(&s_ncimb_base, false, false, 0, NULL, NULL, NULL, NULL, "National Collections of Industrial Food and Marine Bacteria (incorporating the NCFB)") ) }, + { "NCTC", TVoucherInfoRef(new SVoucherInfo(&s_nctc_base, false, false, 0, NULL, NULL, NULL, NULL, "National Collection of Type Cultures") ) }, + { "NRRL", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_base, false, false, 0, NULL, NULL, NULL, NULL, "Agricultural Research Service Culture Collection") ) }, + { "NRRL:MOLD", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_mold, false, false, 0, NULL, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Mold collection") ) }, + { "NRRL:PROK", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_prok, false, false, 0, NULL, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Prokaryotic collection") ) }, + { "NRRL:YEAST", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_yest, false, false, 0, NULL, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Yeast Collection") ) }, + { "NZAC", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, NULL, &s_uscr_pfx, NULL, "New Zealand Arthropod Collection") ) }, + { "PCC", TVoucherInfoRef(new SVoucherInfo(&s_pcc_base, false, false, 0, NULL, NULL, NULL, NULL, "Pasteur Culture Collection of Cyanobacteria") ) }, + { "PCMB", TVoucherInfoRef(new SVoucherInfo(&s_pcmb_base, false, false, 0, NULL, NULL, NULL, NULL, "The Pacific Center for Molecular Biodiversity") ) }, + { "PDD", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, NULL, &s_uscr_pfx, NULL, "New Zealand Fungarium") ) }, + { "PSU:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, false, false, 0, NULL, NULL, &s_psu_pfx, NULL, "Portland State University, Vertebrate Biology Museum, Mammal Collection") ) }, + { "PYCC", TVoucherInfoRef(new SVoucherInfo(&s_pycc_base, false, false, 0, NULL, NULL, NULL, &s_pycc_sfx, "Portuguese Yeast Culture Collection") ) }, + { "SAG", TVoucherInfoRef(new SVoucherInfo(&s_sag_base, false, false, 0, NULL, NULL, NULL, NULL, "Sammlung von Algenkulturen at Universitat Gottingen") ) }, + { "TGRC", TVoucherInfoRef(new SVoucherInfo(&s_tgrc_base, false, false, 0, NULL, NULL, NULL, NULL, "C.M. Rick Tomato Genetics Resource Center") ) }, + { "UAM:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Bird Collection") ) }, + { "UAM:Bryo", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Bryozoan Collection") ) }, + { "UAM:Crus", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Marine Arthropod Collection") ) }, + { "UAM:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Insect Collection") ) }, + { "UAM:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Fish Collection") ) }, + { "UAM:Herb", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, UAM Herbarium") ) }, + { "UAM:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Amphibian and Reptile Collection") ) }, + { "UAM:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mammal Collection") ) }, + { "UAM:Moll", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mollusc Collection") ) }, + { "UAM:Paleo", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, paleontology collection") ) }, + { "UAMH", TVoucherInfoRef(new SVoucherInfo(&s_uamh_base, false, false, 0, NULL, NULL, NULL, NULL, "Centre for Global Microfungal Biodiversity") ) }, + { "UAMObs:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mammal Collection") ) }, + { "ULC", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 4, &yp0, &s_bccm_trim, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) }, + { "USNM:Birds", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Birds") ) }, + { "USNM:ENT", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Entomology Collection") ) }, + { "USNM:Fish", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, National Fish Collection") ) }, + { "USNM:Herp", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Amphibians and Reptiles") ) }, + { "USNM:IZ", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Department of Invertebrate Zoology") ) }, + { "USNM:MAMM", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Mammals") ) }, + { "WNMU:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, bird collection") ) }, + { "WNMU:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, fish collection") ) }, + { "WNMU:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, mammal collection") ) }, + { "YPM:ENT", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypment_pfx, NULL, "Yale Peabody Museum of Natural History, Entomology Collection") ) }, + { "YPM:HER", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypmher_pfx, NULL, "Yale Peabody Museum of Natural History, Herpetology Collection") ) }, + { "YPM:ICH", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypmich_pfx, NULL, "Yale Peabody Museum of Natural History, Ichthyology Collection") ) }, + { "YPM:IZ", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypmiz_pfx, NULL, "Yale Peabody Museum of Natural History, Invertebrate Zoology Collection") ) }, + { "YPM:MAM", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypmmam_pfx, NULL, "Yale Peabody Museum of Natural History, Mammology Collection") ) }, + { "YPM:ORN", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypmorn_pfx, NULL, "Yale Peabody Museum of Natural History, Ornithology Collection") ) } }; typedef CStaticArrayMap TVoucherInfoMap; DEFINE_STATIC_ARRAY_MAP(TVoucherInfoMap, sc_VoucherInfoMap, sc_voucher_info_map); diff --git a/c++/src/objtools/format/inst_info_map.hpp b/c++/src/objtools/format/inst_info_map.hpp index e8a6ab52..861b3fda 100644 --- a/c++/src/objtools/format/inst_info_map.hpp +++ b/c++/src/objtools/format/inst_info_map.hpp @@ -1,4 +1,4 @@ -/* $Id: inst_info_map.hpp 567275 2018-07-16 20:27:34Z kans $ +/* $Id: inst_info_map.hpp 611903 2020-07-13 15:51:00Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -49,6 +49,7 @@ public: bool prependCollection, int pad_to, const string *pad_with, + const string *trim, const string *prefix, const string *suffix, const char *inst_full_name ): @@ -57,6 +58,7 @@ public: m_PrependCollection(prependCollection), m_PadTo(pad_to), m_PadWith(pad_with), + m_Trim(trim), m_Prefix(prefix), m_Suffix(suffix), m_InstFullName(inst_full_name) { } @@ -66,6 +68,7 @@ public: bool m_PrependCollection; int m_PadTo; const string *m_PadWith; + const string *m_Trim; const string *m_Prefix; const string *m_Suffix; const char *m_InstFullName; diff --git a/c++/src/objtools/format/primary_item.cpp b/c++/src/objtools/format/primary_item.cpp index 1d0937d2..7d3c5aa7 100644 --- a/c++/src/objtools/format/primary_item.cpp +++ b/c++/src/objtools/format/primary_item.cpp @@ -1,4 +1,4 @@ -/* $Id: primary_item.cpp 577454 2019-01-03 22:58:25Z kans $ +/* $Id: primary_item.cpp 610065 2020-06-10 17:10:26Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -203,13 +203,15 @@ void CPrimaryItem::x_GetStrForPrimary(CBioseqContext& ctx) string str; string s; - s.reserve(80); + string r; + s.reserve(82); CConstRef other_id; TSignedSeqPos last_stop = -1; ITERATE( TAlnConstList, it, seglist ) { s.erase(); + r.erase(); const CSeq_align& align = **it; TSeqPos this_start = align.GetSeqStart(0); @@ -287,13 +289,18 @@ void CPrimaryItem::x_GetStrForPrimary(CBioseqContext& ctx) } s += tid; s.resize(39, ' '); - s += NStr::IntToString(align.GetSeqStart(1) + 1) + '-' + + r = NStr::IntToString(align.GetSeqStart(1) + 1) + '-' + NStr::IntToString(align.GetSeqStop(1) + 1); + s += r; ENa_strand s0 = align.GetSeqStrand(0); ENa_strand s1 = align.GetSeqStrand(1); if (s0 != s1) { - s.resize(59, ' '); + if (r.length() > 20) { + s.resize(61, ' '); + } else { + s.resize(59, ' '); + } s += 'c'; } diff --git a/c++/src/objtools/format/qualifiers.cpp b/c++/src/objtools/format/qualifiers.cpp index ea9d9601..737aabfb 100644 --- a/c++/src/objtools/format/qualifiers.cpp +++ b/c++/src/objtools/format/qualifiers.cpp @@ -1,4 +1,4 @@ -/* $Id: qualifiers.cpp 578574 2019-01-22 18:30:29Z kans $ +/* $Id: qualifiers.cpp 615036 2020-08-26 13:38:52Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -144,33 +144,26 @@ static string s_GetGOText( } } else { bool add_dash = false; + // RW-922 only make one link from GO:id - text. + go_text.clear(); if (go_id != NULL) { - go_text = string( "GO:" ); if( is_html ) { go_text += ""; } + go_text += string( "GO:" ); go_text += *go_id; - if( is_html ) { - go_text += ""; - } add_dash = true; - } else { - go_text.clear(); } if ( text_string != 0 && text_string->length() > 0 ) { if (add_dash) { go_text += string( " - " ); } - if( is_html && go_id != NULL ) { - go_text += ""; - } // NO, we NO LONGER have the dash here even if there's no go_id (RETAIN compatibility with CHANGE in C) go_text += *text_string; - if( is_html && go_id != NULL ) { - go_text += ""; - } + } + if( is_html && go_id != NULL ) { + go_text += ""; } if ( evidence != 0 ) { go_text += string( " [Evidence " ) + *evidence + string( "]" ); @@ -1035,11 +1028,11 @@ void CFlatPubSetQVal::Format(TFlatQuals& q, const CTempString& name, string value; string pub_id_str; int serial = (*ref_iter)->GetSerial(); - int pmid = (*ref_iter)->GetPMID(); + TEntrezId pmid = (*ref_iter)->GetPMID(); if (serial) { pub_id_str = NStr::IntToString(serial); - } else if (pmid) { - pub_id_str = NStr::IntToString(pmid); + } else if (pmid != ZERO_ENTREZ_ID) { + pub_id_str = NStr::NumericToString(pmid); } /* string pub_id_str = @@ -1047,10 +1040,10 @@ void CFlatPubSetQVal::Format(TFlatQuals& q, const CTempString& name, NStr::IntToString((*ref_iter)->GetSerial())); */ - if(bHtml && pmid) { + if(bHtml && pmid != ZERO_ENTREZ_ID) { // create a link value = "[" + pub_id_str + "]"; + value += strLinkBasePubmed + NStr::NumericToString(pmid) + "\">" + pub_id_str + "]"; } else { value = '[' + pub_id_str + ']'; } @@ -1069,7 +1062,7 @@ void CFlatPubSetQVal::Format(TFlatQuals& q, const CTempString& name, CPub_set_Base::TPub::iterator pub_iter = unusedPubs.begin(); for (; pub_iter != unusedPubs.end(); ++pub_iter) { if ((*pub_iter)->IsPmid()) { - const int pmid = (*pub_iter)->GetPmid().Get(); + const TEntrezId pmid = (*pub_iter)->GetPmid().Get(); string pmid_str = NStr::NumericToString(pmid); pubmed = "[PUBMED "; if (bHtml) { @@ -1118,7 +1111,7 @@ void CFlatSeqIdQVal::Format(TFlatQuals& q, const CTempString& name, if ( m_Value->IsGi() ) { if ( m_GiPrefix ) { id_str = "GI:"; - if (ctx.Config().HideGI() && name == "db_xref") return; + if ((ctx.Config().HideGI() || ctx.Config().IsPolicyFtp()) && name == "db_xref") return; } m_Value->GetLabel(&id_str, CSeq_id::eContent); } else { diff --git a/c++/src/objtools/format/reference_item.cpp b/c++/src/objtools/format/reference_item.cpp index 50cef023..0f585637 100644 --- a/c++/src/objtools/format/reference_item.cpp +++ b/c++/src/objtools/format/reference_item.cpp @@ -1,4 +1,4 @@ -/* $Id: reference_item.cpp 604101 2020-03-23 12:20:44Z ivanov $ +/* $Id: reference_item.cpp 615039 2020-08-26 13:39:14Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -97,10 +97,10 @@ private: ///////////////////////////////////////////////////////////////////////////// CCacheItem::CCacheItem(CBioseqContext& ctx, TCache csh, int length, bool is_prot) : + CFlatItem(&ctx), m_Cache(csh), m_Length(length), - m_IsProt(is_prot), - CFlatItem(&ctx) + m_IsProt(is_prot) { } @@ -192,7 +192,7 @@ void CReferenceItem::FormatAffil(const CAffil& affil, string& result, bool gen_s CReferenceItem::CReferenceItem(const CSeqdesc& desc, CBioseqContext& ctx) : CFlatItem(&ctx), m_PubType(ePub_not_set), m_Category(eUnknown), - m_PatentId(0), m_PMID(0), m_MUID(0), m_Serial(kMax_Int), + m_PatentId(0), m_PMID(ZERO_ENTREZ_ID), m_MUID(ZERO_ENTREZ_ID), m_Serial(kMax_Int), m_JustUids(true), m_Elect(false) { _ASSERT(desc.IsPub()); @@ -215,7 +215,7 @@ CReferenceItem::CReferenceItem CBioseqContext& ctx, const CSeq_loc* loc) : CFlatItem(&ctx), m_PubType(ePub_not_set), m_Category(eUnknown), - m_PatentId(0), m_PMID(0), m_MUID(0), m_Serial(kMax_Int), + m_PatentId(0), m_PMID(ZERO_ENTREZ_ID), m_MUID(ZERO_ENTREZ_ID), m_Serial(kMax_Int), m_JustUids(true), m_Elect(false) { _ASSERT(feat.GetData().IsPub()); @@ -242,7 +242,7 @@ CReferenceItem::CReferenceItem CReferenceItem::CReferenceItem(const CSubmit_block& sub, CBioseqContext& ctx) : CFlatItem(&ctx), m_PubType(ePub_sub), m_Category(eSubmission), - m_PatentId(0), m_PMID(0), m_MUID(0), m_Serial(kMax_Int), + m_PatentId(0), m_PMID(ZERO_ENTREZ_ID), m_MUID(ZERO_ENTREZ_ID), m_Serial(kMax_Int), m_JustUids(false), m_Elect(false) { x_SetObject(sub); @@ -322,12 +322,12 @@ static bool s_ShouldRemoveRef }} // same PMID ( and overlap ) - if( curr_ref.GetPMID() != 0 && prev_ref.GetPMID() != 0 ) { + if( curr_ref.GetPMID() != ZERO_ENTREZ_ID && prev_ref.GetPMID() != ZERO_ENTREZ_ID) { return ( curr_ref.GetPMID() == prev_ref.GetPMID() ); } // same MUID ( and overlap ) - if( curr_ref.GetMUID() != 0 && prev_ref.GetMUID() != 0 ) { + if( curr_ref.GetMUID() != ZERO_ENTREZ_ID && prev_ref.GetMUID() != ZERO_ENTREZ_ID) { return ( curr_ref.GetMUID() == prev_ref.GetMUID() ); } @@ -383,8 +383,8 @@ static void s_CombineRefs }} // most merging ops are only done if muid or pmid match - const bool same_muid = ( curr_ref.GetMUID() != 0 && (prev_ref.GetMUID() == curr_ref.GetMUID()) ); - const bool same_pmid = ( curr_ref.GetPMID() != 0 && (prev_ref.GetPMID() == curr_ref.GetPMID()) ); + const bool same_muid = ( curr_ref.GetMUID() != ZERO_ENTREZ_ID && (prev_ref.GetMUID() == curr_ref.GetMUID()) ); + const bool same_pmid = ( curr_ref.GetPMID() != ZERO_ENTREZ_ID && (prev_ref.GetPMID() == curr_ref.GetPMID()) ); if( (same_muid || same_pmid) && ( prev_ref.GetRemark() != curr_ref.GetRemark() ) ) { @@ -612,7 +612,7 @@ bool CReferenceItem::Matches(const CPub& pub) const {{ // you can only compare on unique string if the reference // does not have a pmid or muid (example accession: L40362.1) - if( GetMUID() == 0 && GetPMID() == 0 ) { + if( GetMUID() == ZERO_ENTREZ_ID && GetPMID() == ZERO_ENTREZ_ID) { x_CreateUniqueStr(); const string& uniquestr = m_UniqueStr; @@ -681,7 +681,7 @@ void CReferenceItem::x_GatherInfo(CBioseqContext& ctx) switch(pub.Which()) { case CPub::e_Pmid: { - const int pmid = pub.GetPmid().Get(); + const TEntrezId pmid = pub.GetPmid().Get(); CPubMedId req(pmid); CMLAClient::TReply reply; @@ -690,7 +690,7 @@ void CReferenceItem::x_GatherInfo(CBioseqContext& ctx) break; case CPub::e_Muid: { - const int muid = pub.GetMuid(); + const TEntrezId muid = pub.GetMuid(); // RW-1040: removed mlaClient.AskUidtopmid and AskGetpubpmid } break; @@ -755,7 +755,7 @@ void CReferenceItem::x_Init(const CPub& pub, CBioseqContext& ctx) break; case CPub::e_Muid: - if (m_MUID == 0) { + if (m_MUID == ZERO_ENTREZ_ID) { m_MUID = pub.GetMuid(); m_Category = ePublished; } @@ -800,8 +800,8 @@ void CReferenceItem::x_Init(const CPub& pub, CBioseqContext& ctx) break; case CPub::e_Pmid: - if (m_PMID == 0) { - m_PMID = pub.GetPmid(); + if (m_PMID == ZERO_ENTREZ_ID) { + m_PMID = pub.GetPmid().Get(); m_Category = ePublished; } break; @@ -882,13 +882,13 @@ void CReferenceItem::x_Init(const CCit_gen& gen, CBioseqContext& ctx) } // MUID - if (gen.CanGetMuid() && m_MUID == 0) { + if (gen.CanGetMuid() && m_MUID == ZERO_ENTREZ_ID) { m_MUID = gen.GetMuid(); } // PMID - if (gen.CanGetPmid() && m_PMID == 0) { - m_PMID = gen.GetPmid(); + if (gen.CanGetPmid() && m_PMID == ZERO_ENTREZ_ID) { + m_PMID = gen.GetPmid().Get(); } } @@ -922,12 +922,12 @@ void CReferenceItem::x_Init(const CMedline_entry& mle, CBioseqContext& ctx) { m_Category = ePublished; - if (mle.CanGetUid() && m_MUID == 0) { + if (mle.CanGetUid() && m_MUID == ZERO_ENTREZ_ID) { m_MUID = mle.GetUid(); } - if (mle.CanGetPmid() && m_PMID == 0) { - m_PMID = mle.GetPmid(); + if (mle.CanGetPmid() && m_PMID == ZERO_ENTREZ_ID) { + m_PMID = mle.GetPmid().Get(); } if (mle.CanGetCit()) { @@ -1033,13 +1033,13 @@ void CReferenceItem::x_Init(const CCit_art& art, CBioseqContext& ctx) ITERATE (CArticleIdSet::Tdata, it, art.GetIds().Get()) { switch ((*it)->Which()) { case CArticleId::e_Pubmed: - if (m_PMID == 0) { - m_PMID = (*it)->GetPubmed(); + if (m_PMID == ZERO_ENTREZ_ID) { + m_PMID = (*it)->GetPubmed().Get(); } break; case CArticleId::e_Medline: - if (m_MUID == 0) { - m_MUID = (*it)->GetMedline(); + if (m_MUID == ZERO_ENTREZ_ID) { + m_MUID = (*it)->GetMedline().Get(); } break; case CArticleId::e_Doi: @@ -1662,10 +1662,10 @@ void CReferenceItem::x_GatherRemark(CBioseqContext& ctx) // no DOIs pritned if there's a pmid or muid bool hasPmidOrMuid = false; ITERATE( CArticleIdSet_Base::Tdata, it, ids.Get() ) { - if( (*it)->IsPubmed() && (*it)->GetPubmed().Get() != 0 ) { + if( (*it)->IsPubmed() && (*it)->GetPubmed().Get() != ZERO_ENTREZ_ID ) { hasPmidOrMuid = true; break; - } else if( (*it)->IsMedline() && (*it)->GetMedline().Get() != 0 ) { + } else if( (*it)->IsMedline() && (*it)->GetMedline().Get() != ZERO_ENTREZ_ID ) { hasPmidOrMuid = true; break; } @@ -1822,20 +1822,20 @@ bool LessThan::operator() // after: dates are the same, or both missing. // distinguish by uids (swap order for RefSeq) - if ( ref1->GetPMID() != 0 && ref2->GetPMID() != 0 && + if ( ref1->GetPMID() != ZERO_ENTREZ_ID && ref2->GetPMID() != ZERO_ENTREZ_ID && !(ref1->GetPMID() == ref2->GetPMID()) ) { return m_IsRefSeq ? (ref1->GetPMID() > ref2->GetPMID()) : (ref1->GetPMID() < ref2->GetPMID()); } - if ( ref1->GetMUID() != 0 && ref2->GetMUID() != 0 && + if ( ref1->GetMUID() != ZERO_ENTREZ_ID && ref2->GetMUID() != ZERO_ENTREZ_ID && !(ref1->GetMUID() == ref2->GetMUID()) ) { return m_IsRefSeq ? (ref1->GetMUID() > ref2->GetMUID()) : (ref1->GetMUID() < ref2->GetMUID()); } // just uids goes last - if ( (ref1->GetPMID() != 0 && ref2->GetPMID() != 0) || - (ref1->GetMUID() != 0 && ref2->GetMUID() != 0) ) { + if ( (ref1->GetPMID() != ZERO_ENTREZ_ID && ref2->GetPMID() != ZERO_ENTREZ_ID) || + (ref1->GetMUID() != ZERO_ENTREZ_ID && ref2->GetMUID() != ZERO_ENTREZ_ID) ) { if ( ref1->IsJustUids() && !ref2->IsJustUids() ) { return true; } else if ( !ref1->IsJustUids() && ref2->IsJustUids() ) { diff --git a/c++/src/objtools/format/source_item.cpp b/c++/src/objtools/format/source_item.cpp index 854d7f96..ce6101e2 100644 --- a/c++/src/objtools/format/source_item.cpp +++ b/c++/src/objtools/format/source_item.cpp @@ -1,4 +1,4 @@ -/* $Id: source_item.cpp 577454 2019-01-03 22:58:25Z kans $ +/* $Id: source_item.cpp 614736 2020-08-21 13:43:48Z fukanchi $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -52,7 +52,7 @@ BEGIN_NCBI_SCOPE BEGIN_SCOPE(objects) -const CSourceItem::TTaxid CSourceItem::kInvalidTaxid = -1; +const CSourceItem::TTaxid CSourceItem::kInvalidTaxid = INVALID_TAX_ID; /////////////////////////////////////////////////////////////////////////// @@ -521,7 +521,7 @@ void CSourceItem::x_SetSource // Taxid {{ TTaxid taxid = org.GetTaxId(); - if (taxid != 0) { + if (taxid != ZERO_TAX_ID) { m_Taxid = taxid; } }} diff --git a/c++/src/objtools/logging/listener.cpp b/c++/src/objtools/logging/listener.cpp index 5b235b8a..c059b36c 100644 --- a/c++/src/objtools/logging/listener.cpp +++ b/c++/src/objtools/logging/listener.cpp @@ -1,5 +1,5 @@ -/* $Id: listener.cpp 600608 2020-01-23 17:32:17Z foleyjp $ +/* $Id: listener.cpp 608330 2020-05-14 16:03:45Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -52,7 +52,7 @@ CObjtoolsListener::~CObjtoolsListener() = default; bool CObjtoolsListener::PutMessage(const IObjtoolsMessage& message) { - m_Messages.emplace_back(dynamic_cast(message.Clone())); + m_Messages.emplace_back(message.Clone()); return true; } diff --git a/c++/src/objtools/pubseq_gateway/client/psg_client.cpp b/c++/src/objtools/pubseq_gateway/client/psg_client.cpp index 3e14dd1e..8fd28d60 100644 --- a/c++/src/objtools/pubseq_gateway/client/psg_client.cpp +++ b/c++/src/objtools/pubseq_gateway/client/psg_client.cpp @@ -1,4 +1,4 @@ -/* $Id: psg_client.cpp 605160 2020-04-07 18:06:40Z ivanov $ +/* $Id: psg_client.cpp 612393 2020-07-21 13:51:24Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -308,6 +308,7 @@ string CPSG_Queue::SImpl::x_GetAbsPathRef(shared_ptr user_re } os << ioc.GetClientId(); + if (const auto hops = user_request->m_Hops) os << "&hops=" << hops; return os.str(); } @@ -1060,6 +1061,11 @@ bool CPSG_Queue::IsEmpty() const return m_Impl->Empty(); } +CPSG_Queue::TApiLock CPSG_Queue::GetApiLock() +{ + return SImpl::GetApiLock(); +} + END_NCBI_SCOPE diff --git a/c++/src/objtools/pubseq_gateway/client/psg_client_impl.hpp b/c++/src/objtools/pubseq_gateway/client/psg_client_impl.hpp index e345f7af..37ee6008 100644 --- a/c++/src/objtools/pubseq_gateway/client/psg_client_impl.hpp +++ b/c++/src/objtools/pubseq_gateway/client/psg_client_impl.hpp @@ -1,7 +1,7 @@ #ifndef OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_IMPL_HPP #define OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_IMPL_HPP -/* $Id: psg_client_impl.hpp 598004 2019-12-02 22:13:17Z sadyrovr $ +/* $Id: psg_client_impl.hpp 612393 2020-07-21 13:51:24Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -93,6 +93,8 @@ struct CPSG_Queue::SImpl : CPSG_WaitingStack> bool SendRequest(shared_ptr request, const CDeadline& deadline); + static TApiLock GetApiLock() { return CService::GetMap(); } + private: class CService { @@ -100,7 +102,6 @@ private: using TMap = unordered_map>; SPSG_IoCoordinator& GetIoC(const string& service); - static shared_ptr GetMap(); shared_ptr m_Map; static pair> sm_Instance; @@ -109,6 +110,8 @@ private: SPSG_IoCoordinator& ioc; CService(const string& service) : m_Map(GetMap()), ioc(GetIoC(service)) {} + + static shared_ptr GetMap(); }; string x_GetAbsPathRef(shared_ptr user_request); diff --git a/c++/src/objtools/pubseq_gateway/client/psg_client_transport.cpp b/c++/src/objtools/pubseq_gateway/client/psg_client_transport.cpp index 00e3f977..6b6634cb 100644 --- a/c++/src/objtools/pubseq_gateway/client/psg_client_transport.cpp +++ b/c++/src/objtools/pubseq_gateway/client/psg_client_transport.cpp @@ -1,4 +1,4 @@ -/* $Id: psg_client_transport.cpp 608076 2020-05-11 17:59:21Z ivanov $ +/* $Id: psg_client_transport.cpp 609548 2020-06-03 17:22:06Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -53,6 +53,7 @@ #define __STDC_FORMAT_MACROS #include +#include #include #include "psg_client_transport.hpp" diff --git a/c++/src/objtools/readers/aln_reader.cpp b/c++/src/objtools/readers/aln_reader.cpp index f6dd312a..f9691dea 100644 --- a/c++/src/objtools/readers/aln_reader.cpp +++ b/c++/src/objtools/readers/aln_reader.cpp @@ -1,4 +1,4 @@ -/* $Id: aln_reader.cpp 602230 2020-02-19 15:48:48Z foleyjp $ +/* $Id: aln_reader.cpp 610753 2020-06-23 18:10:35Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -170,8 +170,8 @@ void CDefaultIdValidate::operator()( CAlnReader::CAlnReader(CNcbiIstream& is, FValidateIds fValidateIds) : m_fValidateIds(fValidateIds), - m_IS(is), m_ReadDone(false), m_ReadSucceeded(false), m_AlignFormat(EAlignFormat::UNKNOWN), + m_IS(is), m_ReadDone(false), m_ReadSucceeded(false), m_UseNexusInfo(true) { m_Errors.clear(); @@ -290,18 +290,6 @@ sReportError( } -static void -sReportError( - ILineErrorListener* pEC, - EDiagSev severity, - const string& seqId, - int lineNumber, - const string& message, - ILineError::EProblem problemType=ILineError::eProblem_GeneralParsingError) -{ - sReportError(pEC, severity, eReader_Alignment, 0, seqId, lineNumber, message, problemType); -} - void CAlnReader::Read( TReadFlags readFlags, ncbi::objects::ILineErrorListener* pErrorListener) @@ -403,7 +391,6 @@ void CAlnReader::x_VerifyAlignmentInfo( "Only one sequence was detected in the alignment file. An alignment file must contain more than one sequence."); } - const auto numSequences = alignmentInfo.NumSequences(); m_Seqs.assign(alignmentInfo.mSequences.begin(), alignmentInfo.mSequences.end()); @@ -750,9 +737,6 @@ CRef CAlnReader::GetSeqEntry(const TFastaFlags fasta_flags, m_Entry = new CSeq_entry(); CRef seq_align = GetSeqAlign(fasta_flags, pErrorListener); - const CDense_seg& denseg = seq_align->GetSegs().GetDenseg(); - _ASSERT(denseg.GetIds().size() == m_Dim); - CRef seq_annot (new CSeq_annot); seq_annot->SetData().SetAlign().push_back(seq_align); @@ -768,7 +752,6 @@ CRef CAlnReader::GetSeqEntry(const TFastaFlags fasta_flags, // seq-id(s) auto& ids = pSubEntry->SetSeq().SetId(); - //ids.push_back(denseg.GetIds()[row_i]); ids = m_Ids[row_i]; // mol diff --git a/c++/src/objtools/readers/aln_scanner_clustal.cpp b/c++/src/objtools/readers/aln_scanner_clustal.cpp index 4e091404..0b90b69f 100644 --- a/c++/src/objtools/readers/aln_scanner_clustal.cpp +++ b/c++/src/objtools/readers/aln_scanner_clustal.cpp @@ -1,5 +1,5 @@ /* - * $Id: aln_scanner_clustal.cpp 589468 2019-07-11 14:51:16Z kornbluh $ + * $Id: aln_scanner_clustal.cpp 610753 2020-06-23 18:10:35Z ivanov $ * * =========================================================================== * @@ -110,10 +110,8 @@ CAlnScannerClustal::xImportAlignmentData( bool inBlock = false; int blockLineLength = 0; int blockCount = 0; - bool firstBlock = true; int numSeqs = 0; int seqCount = 0; - int maxSeqCount = 0; string line; int lineCount = 0; diff --git a/c++/src/objtools/readers/aln_scanner_nexus.cpp b/c++/src/objtools/readers/aln_scanner_nexus.cpp index c657d943..2687b660 100644 --- a/c++/src/objtools/readers/aln_scanner_nexus.cpp +++ b/c++/src/objtools/readers/aln_scanner_nexus.cpp @@ -1,5 +1,5 @@ /* - * $Id: aln_scanner_nexus.cpp 599135 2019-12-19 16:40:05Z foleyjp $ + * $Id: aln_scanner_nexus.cpp 610753 2020-06-23 18:10:35Z ivanov $ * * =========================================================================== * @@ -421,7 +421,7 @@ CAlnScannerNexus::xProcessMatrix( } string seqData = NStr::Join(tokens.begin()+1, tokens.end(), ""); - auto dataSize = seqData.size(); + const int dataSize = seqData.size(); @@ -666,7 +666,7 @@ CAlnScannerNexus::xGetArgPos(const TCommandArgs& args, // ---------------------------------------------------------------------------- -int +size_t CAlnScannerNexus::sFindCharOutsideComment( char c, const string& line, @@ -674,7 +674,7 @@ CAlnScannerNexus::sFindCharOutsideComment( size_t startPos) // ---------------------------------------------------------------------------- { - for (int index=startPos; index> commentLimits; - int index=0; - int start=0; - int stop; + list> commentLimits; + size_t index=0; + size_t start=0; + size_t stop; while (index < line.size()) { const auto& c = line[index]; if (c == '[') { @@ -884,9 +883,9 @@ CAlnScannerNexus::sStripCommentsOutsideCommand( return; } - list> commentLimits; - int start=0; - int stop; + list> commentLimits; + size_t start=0; + size_t stop; if (!inCommand && (numUnmatchedLeftBrackets == 0) && @@ -896,7 +895,7 @@ CAlnScannerNexus::sStripCommentsOutsideCommand( const auto len = line.size(); - for (int index=0; indexSetBioSource().SetOrg().SetTaxname(value); if (!preserve_taxid && - m_pDescrCache->SetBioSource().GetOrg().GetTaxId()) { + m_pDescrCache->SetBioSource().GetOrg().GetTaxId() != ZERO_ENTREZ_ID) { // clear taxid if it does not occur in this modifier set - m_pDescrCache->SetBioSource().SetOrg().SetTaxId(0); + m_pDescrCache->SetBioSource().SetOrg().SetTaxId(ZERO_ENTREZ_ID); } return true; } if (name == "taxid") { const auto& value = x_GetModValue(mod_entry); - int taxid; + TTaxId taxid; try { - taxid = NStr::StringToInt(value); + taxid = NStr::StringToNumeric(value); } catch (...) { x_ReportInvalidValue(mod_entry.second.front(), "Integer value expected."); @@ -925,9 +925,9 @@ void CDescrModApply::x_SetPMID(const TModEntry& mod_entry) for (const auto& mod : mod_entry.second) { const auto& value = mod.GetValue(); - int pmid; + TEntrezId pmid; try { - pmid = NStr::StringToInt(value); + pmid = NStr::StringToNumeric(value); } catch(...) { x_ReportInvalidValue(mod_entry.second.front(), "Expected integer value."); @@ -1097,7 +1097,7 @@ CUser_object& CDescrCache::SetDBLink() CUser_object& CDescrCache::SetFileTrack() { return x_SetDescriptor(eFileTrack, - [this](const CSeqdesc& desc) { + [](const CSeqdesc& desc) { return (desc.IsUser() && s_IsUserType(desc.GetUser(), "FileTrack")); }, [this]() { @@ -1112,7 +1112,7 @@ CUser_object& CDescrCache::SetFileTrack() CUser_object& CDescrCache::SetTpaAssembly() { return x_SetDescriptor(eTpa, - [this](const CSeqdesc& desc) { + [](const CSeqdesc& desc) { return (desc.IsUser() && s_IsUserType(desc.GetUser(), "TpaAssembly")); }, [this]() { @@ -1127,7 +1127,7 @@ CUser_object& CDescrCache::SetTpaAssembly() CUser_object& CDescrCache::SetGenomeProjects() { return x_SetDescriptor(eGenomeProjects, - [this](const CSeqdesc& desc) { + [](const CSeqdesc& desc) { return (desc.IsUser() && s_IsUserType(desc.GetUser(), "GenomeProjectsDB")); }, [this]() { diff --git a/c++/src/objtools/readers/fasta.cpp b/c++/src/objtools/readers/fasta.cpp index 30044d9a..6d01a708 100644 --- a/c++/src/objtools/readers/fasta.cpp +++ b/c++/src/objtools/readers/fasta.cpp @@ -1,4 +1,4 @@ -/* $Id: fasta.cpp 600608 2020-01-23 17:32:17Z foleyjp $ +/* $Id: fasta.cpp 612524 2020-07-23 11:37:59Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -220,7 +220,6 @@ inline static bool s_ASCII_IsUnAmbigNuc(unsigned char c) CFastaReader::CFastaReader(ILineReader& reader, TFlags flags, FIdCheck f_idcheck) : m_LineReader(&reader), m_MaskVec(0), - m_IDGenerator(new CSeqIdGenerator()), m_gapNmin(0), m_gap_Unknown_length(0), m_MaxIDLength(kMax_UI4), m_fIdCheck(f_idcheck) @@ -237,7 +236,6 @@ CFastaReader::CFastaReader(const string& path, TFlags flags, FIdCheck f_idcheck) CFastaReader::CFastaReader(CReaderBase::TReaderFlags fBaseFlags, TFlags flags, FIdCheck f_idcheck) : CReaderBase(fBaseFlags), m_MaskVec(0), - m_IDGenerator(new CSeqIdGenerator), m_gapNmin(0), m_gap_Unknown_length(0), m_MaxIDLength(kMax_UI4), m_fIdCheck(f_idcheck) @@ -352,11 +350,12 @@ CRef CFastaReader::ReadOneSeq(ILineErrorListener * pMessageListener) ParseDefLine(">", pMessageListener); need_defline = false; } else { + const auto lineNum = LineNumber(); GetLineReader().UngetLine(); NCBI_THROW2(CObjReaderParseException, eNoDefline, "CFastaReader: Input doesn't start with" - " a defline or comment around line " + NStr::NumericToString(LineNumber()), - LineNumber() ); + " a defline or comment around line " + NStr::NumericToString(lineNum), + lineNum); } } @@ -506,6 +505,7 @@ void CFastaReader::SetMaxIDLength(Uint4 max_len) CFastaDeflineReader::s_MaxLocalIDLength = CFastaDeflineReader::s_MaxGeneralTagLength = CFastaDeflineReader::s_MaxAccessionLength = m_MaxIDLength = max_len; + m_bModifiedMaxIdLength=true; } @@ -584,10 +584,7 @@ bool CFastaReader::xSetSeqMol(const list>& ids, CSeq_inst_Base::EM void CFastaReader::ParseDefLine(const TStr& s, ILineErrorListener * pMessageListener) { SDefLineParseInfo parseInfo; - parseInfo.fBaseFlags = m_iFlags; - parseInfo.fFastaFlags = GetFlags(); - parseInfo.maxIdLength = m_MaxIDLength; - parseInfo.lineNumber = LineNumber(); + x_SetDeflineParseInfo(parseInfo); CFastaDeflineReader::SDeflineData data; CFastaDeflineReader::ParseDefline(s, parseInfo, data, pMessageListener, m_fIdCheck); @@ -679,14 +676,21 @@ bool CFastaReader::ParseIDs( bool CFastaReader::ParseIDs( const TStr& s, ILineErrorListener * pMessageListener) { - SDefLineParseInfo info; + x_SetDeflineParseInfo(info); + + return CFastaDeflineReader::ParseIDs(s, info, m_ignorable, SetIDs(), pMessageListener); +} + + +void CFastaReader::x_SetDeflineParseInfo(SDefLineParseInfo& info) +{ info.fBaseFlags = m_iFlags; info.fFastaFlags = GetFlags(); - info.maxIdLength = m_MaxIDLength; + info.maxIdLength = m_bModifiedMaxIdLength ? + m_MaxIDLength : + 0; info.lineNumber = LineNumber(); - - return CFastaDeflineReader::ParseIDs(s, info, m_ignorable, SetIDs(), pMessageListener); } @@ -2235,12 +2239,8 @@ void CFastaReader::SetGapLinkageEvidence( void CFastaReader::SetGapLinkageEvidences(CSeq_gap::EType type, const set& evidences) { - if (type == -1) - m_gap_type.Release(); - else - m_gap_type.Reset(new SGap::TGapTypeObj(type)); - - + m_gap_type.Reset(new SGap::TGapTypeObj(type)); + m_DefaultLinkageEvidence.clear(); for (const auto& evidence : evidences) { m_DefaultLinkageEvidence.insert(static_cast(evidence)); diff --git a/c++/src/objtools/readers/fasta_exception.cpp b/c++/src/objtools/readers/fasta_exception.cpp index 88bce36d..9e803812 100644 --- a/c++/src/objtools/readers/fasta_exception.cpp +++ b/c++/src/objtools/readers/fasta_exception.cpp @@ -1,4 +1,4 @@ -/* $Id: fasta_exception.cpp 407174 2013-07-18 16:27:25Z gouriano $ +/* $Id: fasta_exception.cpp 610176 2020-06-11 19:24:49Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -141,7 +141,7 @@ void CBadResiduesException::SBadResiduePositions::ConvertBadIndexesToString( pos_prefix = ", "; } - if( rangesFound.size() > maxRanges ) { + if (iRangesFound > maxRanges) { out << ", and more"; return; } diff --git a/c++/src/objtools/readers/fasta_reader_utils.cpp b/c++/src/objtools/readers/fasta_reader_utils.cpp index 594fbdef..131a8689 100644 --- a/c++/src/objtools/readers/fasta_reader_utils.cpp +++ b/c++/src/objtools/readers/fasta_reader_utils.cpp @@ -1,4 +1,4 @@ -/* $Id: fasta_reader_utils.cpp 599582 2020-01-02 20:02:39Z foleyjp $ +/* $Id: fasta_reader_utils.cpp 612524 2020-07-23 11:37:59Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -350,23 +350,6 @@ TSeqPos CFastaDeflineReader::ParseRange( return TSeqPos(s.length() - pos); } -static bool s_ASCII_IsUnAmbigNuc(unsigned char c) -{ - switch( c ) { - case 'A': - case 'C': - case 'G': - case 'T': - case 'a': - case 'c': - case 'g': - case 't': - return true; - default: - return false; - } -} - class CIdErrorReporter { @@ -507,7 +490,6 @@ bool CFastaDeflineReader::ParseIDs( return true; } - TSeqPos num_ids = 0; // be generous overall, and give raw local IDs the benefit of the // doubt for now CSeq_id::TParseFlags flags @@ -527,7 +509,8 @@ bool CFastaDeflineReader::ParseIDs( for (auto& ch : local_copy) if (ch == ',') ch = '_'; - num_ids = CSeq_id::ParseIDs(ids, local_copy, flags); + + CSeq_id::ParseIDs(ids, local_copy, flags); const string errMessage = "Near line " + NStr::NumericToString(info.lineNumber) @@ -545,7 +528,7 @@ bool CFastaDeflineReader::ParseIDs( } else { - num_ids = CSeq_id::ParseIDs(ids, s, flags); + CSeq_id::ParseIDs(ids, s, flags); } } catch (CSeqIdException&) { // swap(ids, old_ids); @@ -557,9 +540,11 @@ bool CFastaDeflineReader::ParseIDs( CFastaIdValidate idValidate(info.fFastaFlags); - idValidate.SetMaxLocalIDLength(info.maxIdLength); - idValidate.SetMaxGeneralTagLength(info.maxIdLength); - idValidate.SetMaxAccessionLength(info.maxIdLength); + if (info.maxIdLength) { + idValidate.SetMaxLocalIDLength(info.maxIdLength); + idValidate.SetMaxGeneralTagLength(info.maxIdLength); + idValidate.SetMaxAccessionLength(info.maxIdLength); + } idValidate(ids, info.lineNumber, CIdErrorReporter(pMessageListener, ignoreGeneralParsingError)); return true; @@ -587,6 +572,11 @@ void CSeqIdCheck::operator()(const TIds& ids, } CFastaIdValidate s_IdValidate(info.fFastaFlags); + if (info.maxIdLength) { + s_IdValidate.SetMaxLocalIDLength(info.maxIdLength); + s_IdValidate.SetMaxGeneralTagLength(info.maxIdLength); + s_IdValidate.SetMaxAccessionLength(info.maxIdLength); + } s_IdValidate(ids, info.lineNumber, CIdErrorReporter(listener)); } diff --git a/c++/src/objtools/readers/gff2_data.cpp b/c++/src/objtools/readers/gff2_data.cpp index 0c69d6e8..5b6cff76 100644 --- a/c++/src/objtools/readers/gff2_data.cpp +++ b/c++/src/objtools/readers/gff2_data.cpp @@ -1,4 +1,4 @@ -/* $Id: gff2_data.cpp 607807 2020-05-07 18:58:43Z ivanov $ +/* $Id: gff2_data.cpp 610645 2020-06-22 11:31:02Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -821,7 +821,9 @@ bool CGff2Record::xMigrateAttributes( it = attrs_left.find("partial"); if (it != attrs_left.end()) { - pFeature->SetPartial(true); + if (!(flags & CGff2Reader::fGenbankMode)) { + pFeature->AddQualifier("partial", it->second); + } attrs_left.erase(it); } diff --git a/c++/src/objtools/readers/gff2_reader.cpp b/c++/src/objtools/readers/gff2_reader.cpp index 5dfa0c97..4bb0d19c 100644 --- a/c++/src/objtools/readers/gff2_reader.cpp +++ b/c++/src/objtools/readers/gff2_reader.cpp @@ -1,4 +1,4 @@ -/* $Id: gff2_reader.cpp 603569 2020-03-12 18:23:57Z ivanov $ +/* $Id: gff2_reader.cpp 610837 2020-06-24 15:29:29Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -182,6 +182,44 @@ void CGff2Reader::xPostProcessAnnot( } } +// ---------------------------------------------------------------------------- +void +CGff2Reader::xGetData( + ILineReader& lr, + TReaderData& readerData) +// ---------------------------------------------------------------------------- +{ + readerData.clear(); + string line; + if (xGetLine(lr, line)) { + if (xNeedsNewSeqAnnot(line)) { + return; + } + if (xIsTrackLine(line)) { + if (!mCurrentFeatureCount) { + xParseTrackLine(line); + xGetData(lr, readerData); + return; + } + m_PendingLine = line; + return; + } + if (xIsTrackTerminator(line)) { + if (!mCurrentFeatureCount) { + xParseTrackLine("track"); + xGetData(lr, readerData); + } + return; + } + if (!xIsCurrentDataType(line)) { + xUngetLine(lr); + return; + } + readerData.push_back(TReaderLine{m_uLineNumber, line}); + } + ++m_uDataCount; +} + // ---------------------------------------------------------------------------- void CGff2Reader::xAssignAnnotId( CSeq_annot& annot, @@ -1347,5 +1385,37 @@ bool CGff2Reader::xIsIgnoredFeatureId( return false; } +// --------------------------------------------------------------------------- +bool +CGff2Reader::xNeedsNewSeqAnnot( + const string& line) +// --------------------------------------------------------------------------- +{ + if (IsInGenbankMode()) { + vector columns; + NStr::Split(line, "\t ", columns, NStr::eMergeDelims); + string seqId = columns[0]; + if (m_CurrentSeqId == seqId) { + return false; + } + m_CurrentSeqId = seqId; + if (mCurrentFeatureCount == 0) { + return false; + } + m_PendingLine = line; + return true; + } + return false; +} + +// ---------------------------------------------------------------------------- +bool CGff2Reader::IsInGenbankMode() const +// ---------------------------------------------------------------------------- +{ + return (m_iFlags & CGff2Reader::fGenbankMode); +} + + + END_objects_SCOPE END_NCBI_SCOPE diff --git a/c++/src/objtools/readers/gff3_reader.cpp b/c++/src/objtools/readers/gff3_reader.cpp index 1e0e5206..aca1edcf 100644 --- a/c++/src/objtools/readers/gff3_reader.cpp +++ b/c++/src/objtools/readers/gff3_reader.cpp @@ -1,4 +1,4 @@ -/* $Id: gff3_reader.cpp 607807 2020-05-07 18:58:43Z ivanov $ +/* $Id: gff3_reader.cpp 610837 2020-06-24 15:29:29Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -224,52 +224,6 @@ CGff3Reader::xProcessData( } } -// ---------------------------------------------------------------------------- -bool CGff3Reader::IsInGenbankMode() const -// ---------------------------------------------------------------------------- -{ - return (m_iFlags & CGff3Reader::fGenbankMode); -} - -// ---------------------------------------------------------------------------- -void -CGff3Reader::xGetData( - ILineReader& lr, - TReaderData& readerData) -// ---------------------------------------------------------------------------- -{ - readerData.clear(); - string line; - if (xGetLine(lr, line)) { - if (xNeedsNewSeqAnnot(line)) { - lr.UngetLine(); - return; - } - if (xIsTrackLine(line)) { - if (!mCurrentFeatureCount) { - xParseTrackLine(line); - xGetData(lr, readerData); - return; - } - m_PendingLine = line; - return; - } - if (xIsTrackTerminator(line)) { - if (!mCurrentFeatureCount) { - xParseTrackLine("track"); - xGetData(lr, readerData); - } - return; - } - if (!xIsCurrentDataType(line)) { - xUngetLine(lr); - return; - } - readerData.push_back(TReaderLine{m_uLineNumber, line}); - } - ++m_uDataCount; -} - // ---------------------------------------------------------------------------- void CGff3Reader::xProcessAlignmentData( CSeq_annot& annot) @@ -1080,29 +1034,5 @@ void CGff3Reader::xPostProcessAnnot( return CGff2Reader::xPostProcessAnnot(annot); } -// --------------------------------------------------------------------------- -bool -CGff3Reader::xNeedsNewSeqAnnot( - const string& line) -// --------------------------------------------------------------------------- -{ - if (IsInGenbankMode()) { - vector columns; - NStr::Split(line, "\t ", columns, NStr::eMergeDelims); - string seqId = columns[0]; - if (m_CurrentSeqId == seqId) { - return false; - } - m_CurrentSeqId = seqId; - if (mCurrentFeatureCount == 0) { - return false; - } - m_PendingLine = line; - return true; - } - return false; -} - - END_objects_SCOPE END_NCBI_SCOPE diff --git a/c++/src/objtools/readers/gtf_reader.cpp b/c++/src/objtools/readers/gtf_reader.cpp index e08d008d..64a4a1a4 100644 --- a/c++/src/objtools/readers/gtf_reader.cpp +++ b/c++/src/objtools/readers/gtf_reader.cpp @@ -1,4 +1,4 @@ -/* $Id: gtf_reader.cpp 603569 2020-03-12 18:23:57Z ivanov $ +/* $Id: gtf_reader.cpp 610936 2020-06-25 16:26:53Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -169,6 +169,7 @@ CGtfReader::ReadSeqAnnot( ILineErrorListener* pEC ) // ---------------------------------------------------------------------------- { + mCurrentFeatureCount = 0; return CReaderBase::ReadSeqAnnot(lineReader, pEC); } @@ -181,6 +182,9 @@ CGtfReader::xProcessData( { for (const auto& lineData: readerData) { const auto& line = lineData.mData; + if (xIsTrackTerminator(line)) { + continue; + } if (xParseStructuredComment(line)) { continue; } @@ -292,24 +296,7 @@ bool CGtfReader::xUpdateAnnotCds( return false; } } - - if ( xCdsIsPartial( gff ) ) { - CRef pParent = xFindParentMrna(gff); - if (pParent) { - CSeq_loc& loc = pCds->SetLocation(); - size_t uCdsStart = gff.SeqStart(); - size_t uMrnaStart = pParent->GetLocation().GetStart( eExtreme_Positional ); - if ( uCdsStart == uMrnaStart ) { - loc.SetPartialStart( true, eExtreme_Positional ); - } - size_t uCdsStop = gff.SeqStop(); - size_t uMrnaStop = pParent->GetLocation().GetStop( eExtreme_Positional ); - if ( uCdsStop == uMrnaStop && gff.Type() != "stop_codon" ) { - loc.SetPartialStop( true, eExtreme_Positional ); - } - } - } return true; } @@ -950,8 +937,10 @@ bool CGtfReader::xProcessQualifierSpecialCase( return true; } if ( 0 == NStr::CompareNocase(key, "partial")) { - feature.SetPartial( true ); - return true; + // RW-1108 - ignore partial attribute in Genbank mode + if (m_iFlags & CGtfReader::fGenbankMode) { + return true; + } } return false; } diff --git a/c++/src/objtools/readers/line_error.cpp b/c++/src/objtools/readers/line_error.cpp index ba897d65..d29e6108 100644 --- a/c++/src/objtools/readers/line_error.cpp +++ b/c++/src/objtools/readers/line_error.cpp @@ -1,4 +1,4 @@ -/* $Id: line_error.cpp 580916 2019-02-22 16:30:37Z foleyjp $ +/* $Id: line_error.cpp 610758 2020-06-23 18:11:06Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -153,9 +153,9 @@ CLineErrorEx::CLineErrorEx( : m_eProblem(eProblem), m_eSeverity( eSeverity ), m_Code(code), m_Subcode(subcode), m_strSeqId(strSeqId), m_uLine( uLine ), - m_strErrorMessage(strErrorMessage), m_strFeatureName(strFeatureName), m_strQualifierName(strQualifierName), m_strQualifierValue(strQualifierValue), + m_strErrorMessage(strErrorMessage), m_vecOfOtherLines(vecOfOtherLines) { } diff --git a/c++/src/objtools/readers/mod_reader.cpp b/c++/src/objtools/readers/mod_reader.cpp index 783ed768..b8e8ec62 100644 --- a/c++/src/objtools/readers/mod_reader.cpp +++ b/c++/src/objtools/readers/mod_reader.cpp @@ -1,4 +1,4 @@ -/* $Id: mod_reader.cpp 600608 2020-01-23 17:32:17Z foleyjp $ +/* $Id: mod_reader.cpp 610749 2020-06-23 18:10:01Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -798,7 +798,6 @@ bool CTitleParser::HasMods(const CTempString& title) bool CTitleParser::x_FindBrackets(const CTempString& line, size_t& start, size_t& stop, size_t& eq_pos) { // Copied from CSourceModParser size_t i = start; - bool found = false; eq_pos = CTempString::npos; const char* s = line.data() + start; diff --git a/c++/src/objtools/readers/readfeat.cpp b/c++/src/objtools/readers/readfeat.cpp index c75de676..01a76cd1 100644 --- a/c++/src/objtools/readers/readfeat.cpp +++ b/c++/src/objtools/readers/readfeat.cpp @@ -769,10 +769,8 @@ static const TOrgModKey orgmod_key_to_subtype [] = { typedef CStaticPairArrayMap TOrgModMap; DEFINE_STATIC_ARRAY_MAP(TOrgModMap, sm_OrgModKeys, orgmod_key_to_subtype); - -typedef SStaticPair TTrnaKey; - -static const TTrnaKey trna_key_to_subtype [] = { +static const map sm_TrnaKeys +{ { "Ala", 'A' }, { "Alanine", 'A' }, { "Arg", 'R' }, @@ -831,13 +829,11 @@ static const TTrnaKey trna_key_to_subtype [] = { { "Valine", 'V' }, { "Xle", 'J' }, { "Xxx", 'X' }, + { "Undet", 'X' }, { "fMet", 'M' }, { "iMet", 'M' } }; -typedef CStaticPairArrayMap TTrnaMap; -DEFINE_STATIC_ARRAY_MAP(TTrnaMap, sm_TrnaKeys, trna_key_to_subtype); - static set @@ -1550,7 +1546,8 @@ CFeatureTableReader_Imp::x_ParseTrnaExtString(CTrna_ext & ext_trna, const string seq_start - (aa_start+3); string abbrev = pos_str.substr (aa_start + 3, aa_length); - TTrnaMap::const_iterator t_iter = sm_TrnaKeys.find (abbrev.c_str ()); + //TTrnaMap::const_iterator + auto t_iter = sm_TrnaKeys.find (abbrev.c_str ()); if (t_iter == sm_TrnaKeys.end ()) { // unable to parse return false; @@ -2441,7 +2438,7 @@ bool CFeatureTableReader_Imp::x_AddQualifierToFeature ( case CSeqFeatData::e_Pub: if( qtype == eQual_PubMed ) { CRef new_pub( new CPub ); - new_pub->SetPmid( CPubMedId( x_StringToLongNoThrow(val, feat_name, qual) ) ); + new_pub->SetPmid( CPubMedId( ENTREZ_ID_FROM(long, x_StringToLongNoThrow(val, feat_name, qual)) ) ); sfdata.SetPub().SetPub().Set().push_back( new_pub ); return true; } @@ -2649,25 +2646,31 @@ bool CFeatureTableReader_Imp::x_AddQualifierToFeature ( { if (featType == CSeqFeatData::e_Rna && sfdata.GetRna().GetType() == CRNA_ref::eType_mRNA) { + CBioseq::TId ids; try { - CBioseq::TId ids; CSeq_id::ParseIDs(ids, val, - CSeq_id::fParse_ValidLocal - | CSeq_id::fParse_PartialOK); - for (const auto& id : ids) { - auto id_string = id->GetSeqIdString(true); - auto res = m_ProcessedTranscriptIds.insert(id_string); - if (res.second == false) { // Insertion failed because Seq-id already encountered - x_ProcessMsg( - ILineError::eProblem_DuplicateIDs, eDiag_Error, - feat_name, qual, val, - "Transcript ID " + id_string + " appears on multiple mRNA features" - ); - } - } + CSeq_id::fParse_ValidLocal + | CSeq_id::fParse_PartialOK); } - catch (CException&) { - return false; + catch (CSeqIdException& e) + { + x_ProcessMsg( + ILineError::eProblem_QualifierBadValue, eDiag_Error, + feat_name, qual, val, + "Invalid transcript_id : " + val); + return true; + } + + for (const auto& id : ids) { + auto id_string = id->GetSeqIdString(true); + auto res = m_ProcessedTranscriptIds.insert(id_string); + if (res.second == false) { // Insertion failed because Seq-id already encountered + x_ProcessMsg( + ILineError::eProblem_DuplicateIDs, eDiag_Error, + feat_name, qual, val, + "Transcript ID " + id_string + " appears on multiple mRNA features" + ); + } } } x_AddGBQualToFeature(sfp, qual, val); @@ -2681,41 +2684,48 @@ bool CFeatureTableReader_Imp::x_AddQualifierToFeature ( (featType == CSeqFeatData::e_Prot && sfdata.GetProt().IsSetProcessed() && sfdata.GetProt().GetProcessed() == CProt_ref::eProcessed_mature)) - try { + { CBioseq::TId ids; - CSeq_id::ParseIDs(ids, val, - CSeq_id::fParse_ValidLocal | - CSeq_id::fParse_PartialOK); - if (!ids.empty()) { - if (featType == CSeqFeatData::e_Cdregion) { - for (const auto& id : ids) { - auto id_string = id->GetSeqIdString(true); - auto res = m_ProcessedProteinIds.insert(id_string); - if (res.second == false) { // Insertion failed because Seq-id already encountered - x_ProcessMsg( - ILineError::eProblem_DuplicateIDs, eDiag_Error, - feat_name, qual, val, - "Protein ID " + id_string + " appears on multiple CDS features" - ); - } + try { + CSeq_id::ParseIDs(ids, val, + CSeq_id::fParse_ValidLocal | + CSeq_id::fParse_PartialOK); + } + catch (CSeqIdException& e) + { + x_ProcessMsg( + ILineError::eProblem_QualifierBadValue, eDiag_Error, + feat_name, qual, val, + "Invalid protein_id : " + val); + return true; + } + + if (featType == CSeqFeatData::e_Cdregion) { + for (const auto& id : ids) { + auto id_string = id->GetSeqIdString(true); + auto res = m_ProcessedProteinIds.insert(id_string); + if (res.second == false) { // Insertion failed because Seq-id already encountered + x_ProcessMsg( + ILineError::eProblem_DuplicateIDs, eDiag_Error, + feat_name, qual, val, + "Protein ID " + id_string + " appears on multiple CDS features" + ); } } + } - if (featType != CSeqFeatData::e_Rna) { - auto pBestId = GetBestId(ids); - if (pBestId) { - sfp->SetProduct().SetWhole(*pBestId); - } - } - - if (featType != CSeqFeatData::e_Prot) { - x_AddGBQualToFeature(sfp, qual, val); + if (featType != CSeqFeatData::e_Rna) { // mRNA only has a protein_id qualifier + auto pBestId = GetBestId(ids); + if (pBestId) { + sfp->SetProduct().SetWhole(*pBestId); } } - return true; - } catch( CSeqIdException & ) { - return false; } + + if (featType != CSeqFeatData::e_Prot) { // Mat-peptide has an instantiated product, but no qualifier + x_AddGBQualToFeature(sfp, qual, val); + } + return true; case eQual_regulatory_class: // This should've been handled up in x_AddQualifierToImp // so it's always a bad value to be here @@ -3298,14 +3308,6 @@ CRef CFeatureTableReader_Imp::ReadSequinFeatureTable ( while ( !m_reader->AtEOF() ) { - // since reader's UngetLine doesn't actually push back - // into the reader's underlying stream, we try to - // be careful to detect the most common case of - // "there's another feature next" - if( m_reader->PeekChar() == '>' ) { - break; - } - CTempString line = *++(*m_reader); if( m_reader->GetLineNumber() % 10000 == 0 && @@ -3349,17 +3351,6 @@ CRef CFeatureTableReader_Imp::ReadSequinFeatureTable ( } } else if (x_ParseFeatureTableLine (line, loc_info, feat, qual, qual_value, offset)) { - // } else if (x_ParseFeatureTableLine (line, &start, &stop, &partial5, &partial3, - // &ispoint, &isminus, feat, qual, qual_value, offset)) { -/* - SFeatLocInfo loc_info; - loc_info.start_pos = start; - loc_info.stop_pos = stop; - loc_info.is_5p_partial = partial5; - loc_info.is_3p_partial = partial3; - loc_info.is_point = ispoint; - loc_info.is_minus_strand = isminus; - */ // process line in feature table replace( qual_value.begin(), qual_value.end(), '\"', '\'' ); @@ -3391,9 +3382,6 @@ CRef CFeatureTableReader_Imp::ReadSequinFeatureTable ( // and add first interval x_AddIntervalToFeature (curr_feat_name, sfp, loc_info); - // x_AddIntervalToFeature (curr_feat_name, sfp, - // start, stop, partial5, partial3, ispoint, isminus); - ignore_until_next_feature_key = false; curr_feat_name = feat; @@ -3465,7 +3453,6 @@ CRef CFeatureTableReader_Imp::ReadSequinFeatureTable ( { x_CreateGenesFromCDSs(sap, choiceToFeatMap, flags); } - return sap; } diff --git a/c++/src/objtools/readers/rm_reader.cpp b/c++/src/objtools/readers/rm_reader.cpp index 442e6dd6..bce2ba8e 100644 --- a/c++/src/objtools/readers/rm_reader.cpp +++ b/c++/src/objtools/readers/rm_reader.cpp @@ -1,4 +1,4 @@ -/* $Id: rm_reader.cpp 601856 2020-02-14 14:44:09Z mozese2 $ +/* $Id: rm_reader.cpp 610834 2020-06-24 15:29:06Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -789,6 +789,9 @@ CRepeatMaskerReader::ReadSeqAnnot(ILineReader& lr, ILineErrorListener* pMessageL continue; } ++record_counter; + //if (record_counter == 91555) { + // cerr << ""; + //} SRepeatRegion mask_data; if ( ! ParseRecord( line, mask_data ) ) { @@ -973,15 +976,15 @@ bool CRepeatMaskerReader::ParseRecord(const string& record, SRepeatRegion& mask_ // fields position 12 and 14 flip depending on the strand value. string rpt_left; if (mask_data.IsReverseStrand()) { - mask_data.rpt_pos_begin = NStr::StringToUInt( field14 ); + mask_data.rpt_pos_begin = NStr::StringToInt( field14 ); rpt_left = field12; } else { - mask_data.rpt_pos_begin = NStr::StringToUInt( field12 ); + mask_data.rpt_pos_begin = NStr::StringToInt( field12 ); rpt_left = field14; } StripParens(rpt_left); - mask_data.rpt_left = NStr::StringToUInt(rpt_left); + mask_data.rpt_left = NStr::StringToInt(rpt_left); // 15: "ID" ++it; diff --git a/c++/src/objtools/readers/source_mod_parser.cpp b/c++/src/objtools/readers/source_mod_parser.cpp index bbe7d873..4a308400 100644 --- a/c++/src/objtools/readers/source_mod_parser.cpp +++ b/c++/src/objtools/readers/source_mod_parser.cpp @@ -1,4 +1,4 @@ -/* $Id: source_mod_parser.cpp 571491 2018-09-27 16:13:08Z foleyjp $ +/* $Id: source_mod_parser.cpp 610750 2020-06-23 18:10:12Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -1158,11 +1158,11 @@ void CSourceModParser::x_ApplyMods(CAutoInitDesc& bsrc, if ((mod = FindMod(s_Mod_taxid)) != NULL) { - bsrc->SetOrg().SetTaxId( NStr::StringToInt(mod->value, NStr::fConvErr_NoThrow) ); + bsrc->SetOrg().SetTaxId( NStr::StringToNumeric(mod->value, NStr::fConvErr_NoThrow) ); } else - if (reset_taxid && bsrc->IsSetOrgname() && bsrc->GetOrg().GetTaxId() != 0) { - bsrc->SetOrg().SetTaxId(0); + if (reset_taxid && bsrc->IsSetOrgname() && bsrc->GetOrg().GetTaxId() != ZERO_ENTREZ_ID) { + bsrc->SetOrg().SetTaxId(ZERO_ENTREZ_ID); } } @@ -1582,7 +1582,7 @@ void s_ApplyPubMods(CBioseq& bioseq, const CSourceModParser::TModsRange& range) { for (CSourceModParser::TModsCI it = range.first; it != range.second; ++it) { - TIntId pmid = NStr::StringToNumeric(it->value, NStr::fConvErr_NoThrow); + TEntrezId pmid = NStr::StringToNumeric(it->value, NStr::fConvErr_NoThrow); CRef pub(new CPub); pub->SetPmid().Set(pmid); CRef pubdesc(new CSeqdesc); diff --git a/c++/src/serial/grpc_integration/grpc_integration.cpp b/c++/src/serial/grpc_integration/grpc_integration.cpp index eb172a62..bebbb1d4 100644 --- a/c++/src/serial/grpc_integration/grpc_integration.cpp +++ b/c++/src/serial/grpc_integration/grpc_integration.cpp @@ -1,4 +1,4 @@ -/* $Id: grpc_integration.cpp 606576 2020-04-23 17:12:06Z ivanov $ +/* $Id: grpc_integration.cpp 608310 2020-05-14 12:35:38Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -458,15 +458,16 @@ bool CGRPCServerCallbacks::x_IsRealRequest(const TGRPCServerContext* sctx) /// (in order of priority): /// - Config file entry "[section] variable" /// - Environment variables: env_var_name (if not empty/NULL); -/// then "NCBI_CONFIG__
__"; then "grpc_proxy" +/// then "NCBI_CONFIG__
__"; then "GRPC_PROXY" /// - The hard-coded NCBI default "linkerd:4142" string g_NCBI_GRPC_GetAddress(const char* section, const char* variable, - const char* env_var_name) + const char* env_var_name, + int* value_source) { - auto addr = g_GetConfigString(section, variable, env_var_name, nullptr); + auto addr = g_GetConfigString(section, variable, env_var_name, nullptr, value_source); if ( addr.empty() ) { - addr = g_GetConfigString(nullptr, nullptr, "grpc_proxy", "linkerd:4142"); + addr = g_GetConfigString(nullptr, nullptr, "GRPC_PROXY", "linkerd:4142", value_source); } return addr; } diff --git a/c++/src/serial/rpcbase.cpp b/c++/src/serial/rpcbase.cpp index 4ea37a55..23157cd8 100644 --- a/c++/src/serial/rpcbase.cpp +++ b/c++/src/serial/rpcbase.cpp @@ -1,4 +1,4 @@ -/* $Id: rpcbase.cpp 604211 2020-03-24 16:03:08Z ivanov $ +/* $Id: rpcbase.cpp 615799 2020-09-03 18:56:59Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -37,14 +37,81 @@ BEGIN_NCBI_SCOPE +static string s_GetConfigString(const string& service, + const string& variable) +{ + if (service.empty() || variable.empty()) return kEmptyStr; + + string env_var = service + "__RPC_CLIENT__" + variable; + NStr::ToUpper(env_var); + const TXChar* str = NcbiSys_getenv(_T_XCSTRING(env_var.c_str())); + + if (str && *str) { + return _T_CSTRING(str); + } + + CNcbiApplicationGuard app = CNcbiApplication::InstanceGuard(); + if (app && app->HasLoadedConfig()) { + return app->GetConfig().Get(service + ".rpc_client", variable); + } + return kEmptyStr; +} + + +static unsigned int s_GetTryLimit(const string& service) +{ + string str = s_GetConfigString(service, "max_try"); + if (!str.empty()) { + try { + unsigned int ret = NStr::StringToNumeric(str); + return ret > 0 ? ret : 3; + } + catch (...) { + ERR_POST(Warning << "Bad " << service << "/max_try value: " << str); + } + } + return 3; +} + + +static CTimeSpan s_GetRetryDelay(const string& service) +{ + CTimeSpan ret; + string str = s_GetConfigString(service, "retry_delay"); + if (!str.empty()) { + try { + double sec = NStr::StringToNumeric(str); + return CTimeSpan(sec > 0 ? sec : 0); + } + catch (...) { + ERR_POST(Warning << "Bad " << service << "/retry_delay value: " << str); + } + } + return ret; +} + + +CRPCClient_Base::CRPCClient_Base(const string& service, + ESerialDataFormat format) + : m_Format(format), + m_RetryDelay(s_GetRetryDelay(service)), + m_TryCount(0), + m_RecursionCount(0), + m_Service(service), + m_TryLimit(s_GetTryLimit(service)) +{ +} + + CRPCClient_Base::CRPCClient_Base(const string& service, - ESerialDataFormat format, - unsigned int retry_limit) + ESerialDataFormat format, + unsigned int try_limit) : m_Format(format), - m_RetryCount(0), + m_RetryDelay(s_GetRetryDelay(service)), + m_TryCount(0), m_RecursionCount(0), m_Service(service), - m_RetryLimit(retry_limit) + m_TryLimit(try_limit > 0 ? try_limit : 3) { } @@ -102,7 +169,7 @@ void CRPCClient_Base::SetAffinity(const string& affinity) { if (m_Affinity != affinity) { if (m_RecursionCount > 1) { - ERR_POST("Affinity can not be changed on a recursive request"); + ERR_POST("Affinity cannot be changed on a recursive request"); return; } Disconnect(); @@ -151,24 +218,26 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply) { CMutexGuard LOCK(m_Mutex); if (m_RecursionCount == 0) { - m_RetryCount = 0; + m_TryCount = 0; } // Recursion counter needs to be decremented on both success and failure. CCounterGuard recursion_guard(&m_RecursionCount); - const string& request_name = ( request.GetThisTypeInfo() != NULL - ? ("("+request.GetThisTypeInfo()->GetName()+")") : "(no_request_type)"); + const string& request_name = request.GetThisTypeInfo() != NULL + ? ("("+request.GetThisTypeInfo()->GetName()+")") + : "(no_request_type)"; // Reset headers from previous requests if any. m_RetryCtx.Reset(); - double max_span = m_RetryDelay.GetAsDouble()*m_RetryLimit; + double max_span = m_RetryDelay.GetAsDouble()*m_TryLimit; double span = max_span; bool limit_by_time = !m_RetryDelay.IsEmpty(); // Retry context can be either the default one (m_RetryCtx), or provided // through an exception. for (;;) { if ( IsCanceled() ) { - NCBI_THROW(CRPCClientException, eFailed, "Request canceled "+request_name); + NCBI_THROW(CRPCClientException, eFailed, + "Request canceled " + request_name); } try { SetAffinity(x_GetAffinity(request)); @@ -185,7 +254,8 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply) } m_Stream->peek(); // send data, read response headers if (!m_Stream->good() && !m_Stream->eof()) { - NCBI_THROW(CRPCClientException, eFailed, "Connection stream is in bad state "+request_name); + NCBI_THROW(CRPCClientException, eFailed, + "Connection stream is in bad state " + request_name); } if (m_RetryCtx.IsSetContentOverride() && m_RetryCtx.GetContentOverride() == CHttpRetryContext::eFromResponse) { @@ -214,7 +284,7 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply) // proceed to retry } else if ( !dynamic_cast(&e) - && !dynamic_cast(&e) ) { + && !dynamic_cast(&e) ) { // Not a retry related exception, abort. throw; } @@ -228,18 +298,18 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply) // If using time limit, allow to make more than m_RetryLimit attempts // if the server has set shorter delay. - if ((!limit_by_time && ++m_RetryCount >= m_RetryLimit) || - !x_ShouldRetry(m_RetryCount)) { + if ((!limit_by_time && ++m_TryCount >= m_TryLimit) || + !x_ShouldRetry(m_TryCount)) { NCBI_THROW(CRPCClientException, eFailed, - "Failed to receive reply after " + - NStr::NumericToString(m_RetryCount) + - (m_RetryCount == 1 ? " try" : " tries") + - " " + request_name ); + "Failed to receive reply after " + + NStr::NumericToString(m_TryCount) + + (m_TryCount == 1 ? " try " : " tries ") + + request_name ); } if ( m_RetryCtx.IsSetStop() ) { NCBI_THROW(CRPCClientException, eFailed, - "Retrying request stopped by the server: " + - m_RetryCtx.GetStopReason() + " " + request_name); + "Retrying request stopped by the server: " + + m_RetryCtx.GetStopReason() + ' ' + request_name); } CTimeSpan delay = x_GetRetryDelay(span); if ( !delay.IsEmpty() ) { @@ -248,24 +318,26 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply) span -= delay.GetAsDouble(); if (limit_by_time && span <= 0) { NCBI_THROW(CRPCClientException, eFailed, - "Failed to receive reply in " + - CTimeSpan(max_span).AsSmartString() + - " " + request_name); + "Failed to receive reply in " + + CTimeSpan(max_span).AsSmartString() + + ' ' + request_name); } } // Always reconnect on retry. if ( IsCanceled() ) { - NCBI_THROW(CRPCClientException, eFailed, "Request canceled "+request_name); + NCBI_THROW(CRPCClientException, eFailed, + "Request canceled " + request_name); } try { Reset(); - } STD_CATCH_ALL_XX(Serial_RPCClient, 1 ,"CRPCClient_Base::Reset()"+request_name); + } STD_CATCH_ALL_XX(Serial_RPCClient, 1, + "CRPCClient_Base::Reset() " + request_name); } // Reset retry context when done. m_RetryCtx.Reset(); // If there were any retries, force disconnect to prevent using old // retry url, args etc. with the next request. - if ( m_RetryCount > 0 && m_RecursionCount <= 1 ) { + if ( m_TryCount > 0 && m_RecursionCount <= 1 ) { Disconnect(); } } @@ -274,7 +346,7 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply) bool CRPCClient_Base::x_ShouldRetry(unsigned int tries) /* NCBI_FAKE_WARNING */ { _TRACE("CRPCClient_Base::x_ShouldRetry: retrying after " << tries - << " failures"); + << " failure(s)"); return true; } diff --git a/c++/src/util/format_guess.cpp b/c++/src/util/format_guess.cpp index 854518ad..9f64e4e4 100644 --- a/c++/src/util/format_guess.cpp +++ b/c++/src/util/format_guess.cpp @@ -1,4 +1,4 @@ -/* $Id: format_guess.cpp 600741 2020-01-27 15:56:56Z foleyjp $ +/* $Id: format_guess.cpp 612523 2020-07-23 11:23:30Z ivanov $ * =========================================================================== * * PUBLIC DOMAIN NOTICE @@ -152,9 +152,9 @@ static void init_symbol_type_table(void) } -// Must list all EFormats except eUnknown and eFormat_max. +// Must list all *supported* EFormats except eUnknown and eFormat_max. // Will cause assertion if violated! -int CFormatGuess::s_CheckOrder[] = +vector CFormatGuess::sm_CheckOrder = { eBam, // must precede eGZip! eZip, @@ -180,74 +180,107 @@ int CFormatGuess::s_CheckOrder[] = eHgvs, eDistanceMatrix, eFlatFileSequence, + eFlatFileUniProt, + eFlatFileEna, + eFlatFileGenbank, eFiveColFeatureTable, eSnpMarkers, eFasta, eTextASN, eAlignment, eTaxplot, - ePhrapAce, eTable, eBinaryASN, + ePhrapAce, eUCSCRegion, - eJSON + eJSON, }; // This array must stay in sync with enum EFormat, but that's not // supposed to change in the middle anyway, so the explicit size // should suffice to avoid accidental skew. -const char* const CFormatGuess::sm_FormatNames[CFormatGuess::eFormat_max] = -{ - "unknown", - "binary ASN.1", - "RepeatMasker", - "GFF/GTF Poisoned", - "Glimmer3", - "AGP", - "XML", - "WIGGLE", - "BED", - "BED15", - "Newick", - "alignment", - "distance matrix", - "flat-file sequence", - "five-column feature table", - "SNP Markers", - "FASTA", - "text ASN.1", - "Taxplot", - "Phrap ACE", - "table", - "GTF", - "GFF3", - "GFF2", - "HGVS", - "GVF", - "zip", - "gzip", - "bzip2", - "lzo", - "SRA", - "BAM", - "VCF", - "UCSC Region", - "GFF Augustus", - "JSON", - "PSL", +const CFormatGuess::NAME_MAP CFormatGuess::sm_FormatNames = { + {eUnknown, "unknown"}, + {eBinaryASN, "binary ASN.1"}, + {eRmo, "RepeatMasker"}, + {eGtf_POISENED, "GFF/GTF Poisoned"}, + {eGlimmer3, "Glimmer3"}, + {eAgp, "AGP"}, + {eXml, "XML"}, + {eWiggle, "WIGGLE"}, + {eBed, "BED"}, + {eBed15, "BED15"}, + {eNewick, "Newick"}, + {eAlignment, "alignment"}, + {eDistanceMatrix, "distance matrix"}, + {eFlatFileSequence, "flat-file sequence"}, + {eFiveColFeatureTable, "five-column feature table"}, + {eSnpMarkers, "SNP Markers"}, + {eFasta, "FASTA"}, + {eTextASN, "text ASN.1"}, + {eTaxplot, "Taxplot"}, + {ePhrapAce, "Phrap ACE"}, + {eTable, "table"}, + {eGtf, "GTF"}, + {eGff3, "GFF3"}, + {eGff2, "GFF2"}, + {eHgvs, "HGVS"}, + {eGvf, "GVF"}, + {eZip, "zip"}, + {eGZip, "gzip"}, + {eBZip2, "bzip2"}, + {eLzo, "lzo"}, + {eSra, "SRA"}, + {eBam, "BAM"}, + {eVcf, "VCF"}, + {eUCSCRegion, "UCSC Region"}, + {eGffAugustus, "GFF Augustus"}, + {eJSON, "JSON"}, + {ePsl, "PSL"}, + {eAltGraphX, "altGraphX"}, + {eBed5FloatScore, "BED5 float score"}, + {eBedGraph, "BED graph"}, + {eBedRnaElements, "BED Rna elements"}, + {eBigBarChart, "bigBarChart"}, + {eBigBed, "BigBED"}, + {eBigPsl, "BigPSL"}, + {eBigChain, "BigChain"}, + {eBigMaf, "BigMaf"}, + {eBigWig, "BigWig"}, + {eBroadPeak, "BroadPeak"}, + {eChain, "Chain"}, + {eClonePos, "ClonePos"}, + {eColoredExon, "ColoredExon"}, + {eCtgPos, "CtgPos"}, + {eDownloadsOnly, "DowloadsOnly"}, + {eEncodeFiveC, "EncodeFiveC"}, + {eExpRatio, "ExpRatio"}, + {eFactorSource, "FactorSource"}, + {eGenePred, "GenePred"}, + {eLd2, "Ld2"}, + {eNarrowPeak, "NarrowPeak"}, + {eNetAlign, "NetAlign"}, + {ePeptideMapping, "PeptideMapping"}, + {eRmsk, "Rmsk"}, + {eSnake, "Snake"}, + {eVcfTabix, "VcfTabix"}, + {eWigMaf, "WigMaf"}, + {eFlatFileGenbank, "Genbank FlatFile"}, + {eFlatFileEna, "ENA FlatFile"}, + {eFlatFileUniProt, "UniProt FlatFile"}, }; const char* CFormatGuess::GetFormatName(EFormat format) { - unsigned int i = static_cast(format); - if (i >= static_cast (eFormat_max)) { + auto formatIt = sm_FormatNames.find(format); + if (formatIt == sm_FormatNames.end()) { NCBI_THROW(CUtilException, eWrongData, "CFormatGuess::GetFormatName: out-of-range format value " - + NStr::IntToString(i)); + + NStr::IntToString(format)); } - return sm_FormatNames[i]; + return formatIt->second; } @@ -381,6 +414,14 @@ CFormatGuess::~CFormatGuess() } } +// ---------------------------------------------------------------------------- +bool +CFormatGuess::IsSupportedFormat(EFormat format) +{ + return (std::find(sm_CheckOrder.begin(), sm_CheckOrder.end(), format) + != sm_CheckOrder.end()); +} + // ---------------------------------------------------------------------------- CFormatGuess::EFormat CFormatGuess::GuessFormat( EMode ) @@ -411,12 +452,12 @@ CFormatGuess::GuessFormat( } EMode mode = eQuick; - size_t uFormatCount = ArraySize(s_CheckOrder); + size_t uFormatCount = sm_CheckOrder.size(); // First, try to use hints if ( !m_Hints.IsEmpty() ) { for (size_t f = 0; f < uFormatCount; ++f) { - EFormat fmt = EFormat( s_CheckOrder[f] ); + EFormat fmt = EFormat( sm_CheckOrder[f] ); if (m_Hints.IsPreferred(fmt) && x_TestFormat(fmt, mode)) { return fmt; } @@ -425,7 +466,7 @@ CFormatGuess::GuessFormat( // Check other formats, skip the ones that are disabled through hints for (size_t f = 0; f < uFormatCount; ++f) { - EFormat fmt = EFormat( s_CheckOrder[f] ); + EFormat fmt = EFormat( sm_CheckOrder[f] ); if ( ! m_Hints.IsDisabled(fmt) && x_TestFormat(fmt, mode) ) { return fmt; } @@ -532,6 +573,12 @@ bool CFormatGuess::x_TestFormat(EFormat format, EMode mode) return TestFormatAugustus( mode ); case eJSON: return TestFormatJson( mode ); + case eFlatFileGenbank: + return TestFormatFlatFileGenbank( mode ); + case eFlatFileEna: + return TestFormatFlatFileEna( mode ); + case eFlatFileUniProt: + return TestFormatFlatFileUniProt( mode ); default: NCBI_THROW( CCoreException, eInvalidArg, "CFormatGuess::x_TestFormat(): Unsupported format ID (" + @@ -543,13 +590,8 @@ bool CFormatGuess::x_TestFormat(EFormat format, EMode mode) void CFormatGuess::Initialize() { - NCBI_ASSERT(eFormat_max-2 == sizeof( s_CheckOrder ) / sizeof( int ), - "Indices in s_CheckOrder do not match format count ---" - "update s_CheckOrder to list all formats" - ); - NCBI_ASSERT(eFormat_max == sizeof(sm_FormatNames) / sizeof(const char*) - && sm_FormatNames[eFormat_max - 1] != NULL, - "sm_FormatNames does not list all possible formats"); + NCBI_ASSERT(eFormat_max == sm_FormatNames.size(), + "sm_FormatNames does not list all possible formats"); m_pTestBuffer = 0; m_bStatsAreValid = false; @@ -700,6 +742,33 @@ bool CFormatGuess::TestFormatRepeatMasker( IsInputRepeatMaskerWithoutHeader(); } + +// ---------------------------------------------------------------------------- + +static bool s_LooksLikeNucSeqData(const string& line, size_t minLength=10) { + if (line.size()(c); + if (symbol_type_table[index] & fDNA_Main_Alphabet) { + ++nucCount; + } + continue; + } + + if (!isspace(c)) { + return false; + } + } + + return (nucCount/line.size() > 0.9); +} + + // ---------------------------------------------------------------------------- bool CFormatGuess::TestFormatPhrapAce( @@ -709,9 +778,19 @@ CFormatGuess::TestFormatPhrapAce( return false; } - ITERATE( list, it, m_TestLines ) { - if ( IsLinePhrapId( *it ) ) { - return true; + if (memchr(m_pTestBuffer, 0, m_iTestDataSize)) { // Cannot contain NuLL bytes + return false; // RW-1102 + } + + bool foundId = false; + for (const auto& line : m_TestLines) { + if (foundId) { + if (s_LooksLikeNucSeqData(line)) { + return true; + } + } + else if (IsLinePhrapId(line)) { + foundId = true; } } return false; @@ -1994,6 +2073,381 @@ bool CFormatGuess::TestFormatPsl(EMode mode) return (uPslLineCount != 0); } +// ---------------------------------------------------------------------------- +bool +GenbankGetKeywordLine( + list::iterator& lineIt, + list::iterator endIt, + string& keyword, + string& data) +// ---------------------------------------------------------------------------- +{ + if (lineIt == endIt) { + return false; + } + if (lineIt->size() > 79) { + return false; + } + + vector validIndents = {0, 2, 3, 5, 12, 21}; + auto firstNotBlank = lineIt->find_first_not_of(" "); + while (firstNotBlank != 0) { + if (std::find(validIndents.begin(), validIndents.end(), firstNotBlank) == + validIndents.end()) { + auto firstNotBlankOrDigit = lineIt->find_first_not_of(" 1234567890"); + if (firstNotBlankOrDigit != 10) { + return false; + } + } + lineIt++; + if (lineIt == endIt) { + return false; + } + firstNotBlank = lineIt->find_first_not_of(" "); + } + try { + NStr::SplitInTwo( + *lineIt, " ", keyword, data, NStr::fSplit_MergeDelimiters); + } + catch (CException&) { + return false; + } + lineIt++; + return true; +} + +// ---------------------------------------------------------------------------- +bool CFormatGuess::TestFormatFlatFileGenbank( + EMode /*unused*/) +{ + // see ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt + + if ( ! EnsureStats() || ! EnsureSplitLines() ) { + return false; + } + + // smell test: + // note: sample size at least 8000 characters, line length soft limited to + // 80 characters + if (m_TestLines.size() < 9) { // number of required records + return false; + } + + string keyword, data, lookingFor; + auto recordIt = m_TestLines.begin(); + auto endIt = m_TestLines.end(); + NStr::SplitInTwo( + *recordIt, " ", keyword, data, NStr::fSplit_MergeDelimiters); + + lookingFor = "LOCUS"; // excactly one + if (keyword != lookingFor) { + return false; + } + recordIt++; + if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) { + return false; + } + + lookingFor = "DEFINITION"; // one or more + if (keyword != lookingFor) { + return false; + } + while (keyword == lookingFor) { + if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) { + return false; + } + } + + lookingFor = "ACCESSION"; // one or more + if (keyword != lookingFor) { + return false; + } + while (keyword == lookingFor) { + if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) { + return false; + } + } + + bool nidSeen = false; + lookingFor = "NID"; // zero or one, can come before or after VERSION + if (keyword == lookingFor) { + nidSeen = true; + if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) { + return false; + } + } + + lookingFor = "VERSION"; // exactly one + if (keyword != lookingFor) { + return false; + } + if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) { + return false; + } + + if (!nidSeen) { + lookingFor = "NID"; // zero or one + if (keyword == lookingFor) { + if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) { + return false; + } + } + } + + lookingFor = "PROJECT"; // zero or more + while (keyword == lookingFor) { + if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) { + return false; + } + } + + lookingFor = "DBLINK"; // zero or more + while (keyword == lookingFor) { + if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) { + return false; + } + } + + lookingFor = "KEYWORDS"; // one or more + if (keyword != lookingFor) { + return false; + } + + // I am convinced now. There may be flaws farther down but this input + // definitely wants to be a Genbank flat file. + return true; +} + +// ---------------------------------------------------------------------------- +bool +EnaGetLineData( + list::iterator& lineIt, + list::iterator endIt, + string& lineCode, + string& lineData) +// ---------------------------------------------------------------------------- +{ + while (lineIt != endIt && NStr::StartsWith(*lineIt, "XX")) { + lineIt++; + } + if (lineIt == endIt) { + return false; + } + try { + NStr::SplitInTwo( + *lineIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters); + } + catch(CException&) { + lineCode = *lineIt; + lineData = ""; + } + lineIt++; + return true; +} + +// ---------------------------------------------------------------------------- +bool CFormatGuess::TestFormatFlatFileEna( + EMode /*unused*/) +{ + // see: ftp://ftp.ebi.ac.uk/pub/databases/ena/sequence/release/doc/usrman.txt + + if ( ! EnsureStats() || ! EnsureSplitLines() ) { + return false; + } + + // smell test: + // note: sample size at least 8000 characters, line length soft limited to + // 78 characters + if (m_TestLines.size() < 19) { // number of required records + return false; + } + + string lineCode, lineData, lookingFor; + auto recordIt = m_TestLines.begin(); + auto endIt = m_TestLines.end(); + NStr::SplitInTwo( + *recordIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters); + + lookingFor = "ID"; // excactly one + if (lineCode != lookingFor) { + return false; + } + recordIt++; + + lookingFor = "AC"; // one or more + if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) { + return false; + } + if (lineCode != lookingFor) { + return false; + } + while (lineCode == lookingFor) { + if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) { + return false; + } + } + + lookingFor = "PR"; // zero or more + while (lineCode == lookingFor) { + if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) { + return false; + } + } + + lookingFor = "DT"; // two (first hard difference from UniProt) + for (int i = 0; i < 2; ++i) { + if (lineCode != lookingFor) { + return false; + } + if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) { + return false; + } + } + + lookingFor = "DE"; // one or more + if (lineCode != lookingFor) { + return false; + } + while (lineCode == lookingFor) { + if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) { + return true; + } + } + + lookingFor = "KW"; // one or more + if (lineCode != lookingFor) { + return false; + } + while (lineCode == lookingFor) { + if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) { + return true; + } + } + + lookingFor = "OS"; // one or more + if (lineCode != lookingFor) { + return false; + } + while (lineCode == lookingFor) { + if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) { + return true; + } + } + + lookingFor = "OC"; // one or more + if (lineCode != lookingFor) { + return false; + } + while (lineCode == lookingFor) { + if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) { + return true; + } + } + + // once here it's Ena or someone is messing with me + return true; +} + +// ---------------------------------------------------------------------------- +bool +UniProtGetLineData( + list::iterator& lineIt, + list::iterator endIt, + string& lineCode, + string& lineData) +// ---------------------------------------------------------------------------- +{ + if (lineIt == endIt) { + return false; + } + try { + NStr::SplitInTwo( + *lineIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters); + } + catch(CException&) { + lineCode = *lineIt; + lineData = ""; + } + lineIt++; + return true; +} + +// ---------------------------------------------------------------------------- +bool CFormatGuess::TestFormatFlatFileUniProt( + EMode /*unused*/) +{ + // see: https://web.expasy.org/docs/userman.html#genstruc + + if ( ! EnsureStats() || ! EnsureSplitLines() ) { + return false; + } + + // smell test: + // note: sample size at least 8000 characters, line length soft limited to + // 75 characters + if (m_TestLines.size() < 15) { // number of required records + return false; + } + + // note: + // we are only trying to assert that the input is *meant* to be uniprot. + // we should not be in the business of validation - this should happen + // downstream, with better error messages than we could possibly provide here. + string lineCode, lineData, lookingFor; + auto recordIt = m_TestLines.begin(); + auto endIt = m_TestLines.end(); + NStr::SplitInTwo( + *recordIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters); + + lookingFor = "ID"; // excatly one + if (lineCode != lookingFor) { + return false; + } + recordIt++; + + lookingFor = "AC"; // one or more + if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) { + return false; + } + if (lineCode != lookingFor) { + return false; + } + while (lineCode == lookingFor) { + if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) { + return false; + } + } + + lookingFor = "DT"; // three (first hard difference from UniProt) + for (int i = 0; i < 3; ++i) { + if (lineCode != lookingFor) { + return false; + } + if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) { + return false; + } + } + + + lookingFor = "DE"; // one or more + if (lineCode != lookingFor) { + return false; + } + while (lineCode == lookingFor) { + if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) { + return true; + } + } + + // optional "GN" line or first "OS" line + if (lineCode != "GN" && lineCode != "OS") { + return false; + } + + // once here it's UniProt or someone is messing with me + return true; +} + // ---------------------------------------------------------------------------- bool CFormatGuess::TestFormatVcf( EMode)