New upstream version 2.11.0+ds

author Aaron M. Ucko <ucko@debian.org>

Wed, 27 Jan 2021 01:57:37 +0000 (20:57 -0500)

committer Aaron M. Ucko <ucko@debian.org>

Wed, 27 Jan 2021 01:57:37 +0000 (20:57 -0500)
author Aaron M. Ucko <ucko@debian.org>
Wed, 27 Jan 2021 01:57:37 +0000 (20:57 -0500)
committer Aaron M. Ucko <ucko@debian.org>
Wed, 27 Jan 2021 01:57:37 +0000 (20:57 -0500)
diff --git a/c++/include/algo/blast/api/blast_aux.hpp b/c++/include/algo/blast/api/blast_aux.hpp

index 778a47fbcdcfb76127f585803c0add6921373b0c..ad815ff7404b3954d75a6365fb3eaf6e5ba8154c 100644 (file)
--- a/c++/include/algo/blast/api/blast_aux.hpp
+++ b/c++/include/algo/blast/api/blast_aux.hpp
@@ -1,4 +1,4 @@
-/*  $Id: blast_aux.hpp 507721 2016-07-21 14:07:53Z fongah2 $
+/*  $Id: blast_aux.hpp 615182 2020-08-28 04:28:48Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -43,6 +43,7 @@
  #include <objects/seqloc/Seq_interval.hpp>
  #include <util/range.hpp>       // For TSeqRange
  #include <objects/seq/seqlocinfo.hpp>
+#include <objects/blast/Blast4_error.hpp>
  #include <objmgr/scope.hpp>
  
  // BLAST includes
@@ -58,6 +59,7 @@
  #include <algo/blast/core/blast_psi.h>
  #include <algo/blast/core/blast_hspstream.h>
  
+
  BEGIN_NCBI_SCOPE
  
  BEGIN_SCOPE(objects)
@@ -240,6 +242,28 @@ private:
      static Uint4 m_RefCounter;
  };
  
+/// Class to capture message from diag handler
+class NCBI_XBLAST_EXPORT CBlastAppDiagHandler : public CDiagHandler
+{
+public:
+       /// Constructor
+       CBlastAppDiagHandler():m_handler(GetDiagHandler(true)), m_save (true) {}
+       /// Destructor
+       ~CBlastAppDiagHandler();
+       /// Save and post diag message
+       virtual void Post (const SDiagMessage & mess);
+       /// Reset messgae buffer, erase all saved message
+       void ResetMessages(void);
+       /// Call to turn off saving diag message, discard all saved message
+       void DoNotSaveMessages(void);
+       /// Return list of saved diag messages
+       list<CRef<objects::CBlast4_error> > & GetMessages(void) { return m_messages;}
+private :
+       CDiagHandler * m_handler;
+       list<CRef<objects::CBlast4_error> > m_messages;
+       bool m_save;
+};
+
  
  /** Declares class to handle deallocating of the structure using the appropriate
   * function
diff --git a/c++/include/algo/blast/api/blast_node.hpp b/c++/include/algo/blast/api/blast_node.hpp

new file mode 100644 (file)

index 0000000..91ae6f3
--- /dev/null
+++ b/c++/include/algo/blast/api/blast_node.hpp
@@ -0,0 +1,195 @@
+/*  $Id: blast_node.hpp 615348 2020-08-31 15:38:28Z fukanchi $
+ * ===========================================================================
+ *
+ *                            PUBLIC DOMAIN NOTICE
+ *               National Center for Biotechnology Information
+ *
+ *  This software/database is a "United States Government Work" under the
+ *  terms of the United States Copyright Act.  It was written as part of
+ *  the author's official duties as a United States Government employee and
+ *  thus cannot be copyrighted.  This software/database is freely available
+ *  to the public for use. The National Library of Medicine and the U.S.
+ *  Government have not placed any restriction on its use or reproduction.
+ *
+ *  Although all reasonable efforts have been taken to ensure the accuracy
+ *  and reliability of the software and data, the NLM and the U.S.
+ *  Government do not and cannot warrant the performance or results that
+ *  may be obtained by using this software or data. The NLM and the U.S.
+ *  Government disclaim all warranties, express or implied, including
+ *  warranties of performance, merchantability or fitness for any particular
+ *  purpose.
+ *
+ *  Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file blast_node.hpp
+ *  BLAST node api
+ */
+
+#ifndef ALGO_BLAST_API___BLAST_NODE__HPP
+#define ALGO_BLAST_API___BLAST_NODE__HPP
+
+#include <algo/blast/core/blast_export.h>
+#include <algo/blast/api/blast_aux.hpp>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(blast)
+
+class NCBI_XBLAST_EXPORT CBlastNodeMsg : public CObject
+{
+public:
+       enum EMsgType {
+               eRunRequest,
+               ePostResult,
+               eErrorExit,
+               ePostLog
+       };
+       CBlastNodeMsg(EMsgType type, void * obj_ptr): m_MsgType(type), m_Obj(obj_ptr) {}
+       EMsgType GetMsgType() { return m_MsgType; }
+       void * GetMsgBody() { return m_Obj; }
+private:
+       EMsgType m_MsgType;
+       void * m_Obj;
+};
+
+class NCBI_XBLAST_EXPORT CBlastNodeMailbox : public CObject
+{
+public:
+       CBlastNodeMailbox(int node_num, CConditionVariable & notify): m_NodeNum(node_num), m_Notify(notify){}
+       void SendMsg(CRef<CBlastNodeMsg> msg);
+       CRef<CBlastNodeMsg> ReadMsg()
+       {
+               CFastMutexGuard guard(m_Mutex);
+               CRef<CBlastNodeMsg> rv;
+               if (! m_MsgQueue.empty()){
+                       rv.Reset(m_MsgQueue.front());
+                       m_MsgQueue.pop_front();
+               }
+               return rv;
+       }
+       void UnreadMsg(CRef<CBlastNodeMsg> msg) { CFastMutexGuard guard(m_Mutex); m_MsgQueue.push_front(msg);}
+       int GetNumMsgs () { CFastMutexGuard guard(m_Mutex); return m_MsgQueue.size(); }
+       int GetNodeNum() { return m_NodeNum; }
+       ~CBlastNodeMailbox() { m_MsgQueue.resize(0); }
+private:
+       int m_NodeNum;
+       CConditionVariable & m_Notify;
+       list <CRef<CBlastNodeMsg> > m_MsgQueue;
+       CFastMutex m_Mutex;
+};
+
+class NCBI_XBLAST_EXPORT CBlastNode : public CThread
+{
+public :
+       enum EState {
+               eInitialized,
+               eRunning,
+               eError,
+               eDone,
+       };
+       CBlastNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+                           CBlastAppDiagHandler & bah, EProgram blast_program,
+                           int query_index, int num_queries, CBlastNodeMailbox * mailbox);
+
+        virtual int GetBlastResults(string & results) = 0;
+        int GetNodeNum() { return m_NodeNum;}
+        EState GetState() { return m_State; }
+        int GetStatus() { return m_Status; }
+        const CArgs & GetArgs() { return m_Args; }
+        CBlastAppDiagHandler & GetDiagHandler() { return m_Bah; }
+        const CNcbiArguments & GetArguments() { return m_NcbiArgs; }
+        void SendMsg(CBlastNodeMsg::EMsgType msg_type, void* ptr = NULL);
+        string & GetNodeIdStr() { return m_NodeIdStr;}
+        int GetNumOfQueries() {return m_NumOfQueries;}
+        int GetQueriesLength() {return m_QueriesLength;}
+protected:
+       virtual ~CBlastNode(void);
+       virtual void* Main(void) = 0;
+       void SetState(EState state) { m_State = state; }
+       void SetStatus(int status) { m_Status = status; }
+       void SetQueriesLength(int l) { m_QueriesLength = l;}
+       int m_NodeNum;
+private:
+       const CNcbiArguments & m_NcbiArgs;
+       const CArgs & m_Args;
+        CBlastAppDiagHandler & m_Bah;
+       EProgram m_BlastProgram;
+       int m_QueryIndex;
+       int m_NumOfQueries;
+       string m_NodeIdStr;
+       CRef<CBlastNodeMailbox> m_Mailbox;
+       EState m_State;
+       int m_Status;
+       int m_QueriesLength;
+};
+
+
+class NCBI_XBLAST_EXPORT CBlastMasterNode
+{
+public:
+       CBlastMasterNode(CNcbiOstream & out_stream, int num_threads);
+       typedef map<int, CRef<CBlastNodeMailbox> > TPostOffice;
+       typedef map<int, CRef<CBlastNode> > TRegisteredNodes;
+       typedef map<int, double> TActiveNodes;
+       typedef map<int, CRef<CBlastNodeMsg> > TFormatQueue;
+       void RegisterNode(CBlastNode * node, CBlastNodeMailbox * mailbox);
+       int GetNumNodes() { return m_RegisteredNodes.size();}
+       int IsFull();
+       void Shutdown() { m_MaxNumNodes = -1; }
+       bool Processing();
+       int IsActive()
+       {
+               if ((m_MaxNumNodes < 0) && (m_RegisteredNodes.size() == 0)){
+                       return false;
+               }
+               return true;
+       }
+       void FormatResults();
+       CConditionVariable & GetBuzzer() {return m_NewEvent;}
+       ~CBlastMasterNode() {}
+       int GetNumOfQueries() { return m_NumQueries; }
+       Int8 GetQueriesLength() { return m_QueriesLength; }
+       int GetNumErrStatus() { return m_NumErrStatus; }
+private:
+       void x_WaitForNewEvent();
+
+       CNcbiOstream & m_OutputStream;
+       int m_MaxNumThreads;
+       int m_MaxNumNodes;
+       CFastMutex m_Mutex;
+       CStopWatch m_StopWatch;
+       TPostOffice m_PostOffice;
+       TRegisteredNodes m_RegisteredNodes;
+       TActiveNodes m_ActiveNodes;
+       TFormatQueue m_FormatQueue;
+       CConditionVariable m_NewEvent;
+       int m_NumErrStatus;
+       int m_NumQueries;
+       Int8 m_QueriesLength;
+};
+
+
+class NCBI_XBLAST_EXPORT CBlastNodeInputReader : public CStreamLineReader
+{
+public:
+
+       CBlastNodeInputReader(CNcbiIstream& is, int batch_size, int est_avg_len) :
+               CStreamLineReader(is), m_QueryBatchSize(batch_size), m_EstAvgQueryLength(est_avg_len), m_QueryCount(0) {}
+
+       int GetQueryBatch(string & queries, int & query_no);
+
+private:
+       const int m_QueryBatchSize;
+       const int m_EstAvgQueryLength;
+       int m_QueryCount;
+};
+
+END_SCOPE(blast)
+END_NCBI_SCOPE
+
+#endif /* ALGO_BLAST_API___BLAST_NODE__HPP */
diff --git a/c++/include/algo/blast/api/blast_usage_report.hpp b/c++/include/algo/blast/api/blast_usage_report.hpp

new file mode 100644 (file)

index 0000000..777605b
--- /dev/null
+++ b/c++/include/algo/blast/api/blast_usage_report.hpp
@@ -0,0 +1,120 @@
+/*  $Id: blast_usage_report.hpp 617231 2020-09-28 18:27:17Z ivanov $
+ * ===========================================================================
+ *
+ *                            PUBLIC DOMAIN NOTICE
+ *               National Center for Biotechnology Information
+ *
+ *  This software/database is a "United States Government Work" under the
+ *  terms of the United States Copyright Act.  It was written as part of
+ *  the author's official duties as a United States Government employee and
+ *  thus cannot be copyrighted.  This software/database is freely available
+ *  to the public for use. The National Library of Medicine and the U.S.
+ *  Government have not placed any restriction on its use or reproduction.
+ *
+ *  Although all reasonable efforts have been taken to ensure the accuracy
+ *  and reliability of the software and data, the NLM and the U.S.
+ *  Government do not and cannot warrant the performance or results that
+ *  may be obtained by using this software or data. The NLM and the U.S.
+ *  Government disclaim all warranties, express or implied, including
+ *  warranties of performance, merchantability or fitness for any particular
+ *  purpose.
+ *
+ *  Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file blast_usage_report.hpp
+ *  BLAST usage report api
+ */
+
+#ifndef ALGO_BLAST_API___BLAST_USAGE_REPORT__HPP
+#define ALGO_BLAST_API___BLAST_USAGE_REPORT__HPP
+
+#include <connect/ncbi_usage_report.hpp>
+#include <algo/blast/core/blast_export.h>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(blast)
+
+class NCBI_XBLAST_EXPORT CBlastUsageReport : public CUsageReport
+{
+
+public:
+       enum EUsageParams {
+               eApp,
+               eVersion,
+               eProgram,
+               eTask,
+               eExitStatus,
+               eRunTime,
+               eDBName,
+               eDBLength,
+               eDBNumSeqs,
+               eDBDate,
+               eBl2seq,
+               eNumSubjects,
+               eSubjectsLength,
+               eNumQueries,
+               eTotalQueryLength,
+               eEvalueThreshold,
+               eNumThreads,
+               eHitListSize,
+               eOutputFmt,
+               eTaxIdList,
+               eNegTaxIdList,
+               eGIList,
+               eNegGIList,
+               eSeqIdList,
+               eNegSeqIdList,
+               eIPGList,
+               eNegIPGList,
+               eMaskAlgo,
+               eCompBasedStats,
+               eRange,
+               eMTMode,
+               eNumQueryBatches,
+               eNumErrStatus,
+               ePSSMInput,
+               eConverged,
+               eArchiveInput,
+               eRIDInput,
+               eDBInfo,
+               eDBTaxInfo,
+               eDBEntry,
+               eDBDumpAll,
+               eDBType,
+               eInputType,
+               eParseSeqIDs,
+               eSeqType,
+               eDBTest,
+               eDBAliasMode,
+               eDocker,
+               eGCP,
+               eAWS,
+               eELBJobId,
+               eELBBatchNum
+       };
+
+       CBlastUsageReport();
+       ~CBlastUsageReport();
+       void AddParam(EUsageParams p, int val);
+       void AddParam(EUsageParams p, const string & val);
+       void AddParam(EUsageParams p, const double & val);
+       void AddParam(EUsageParams p, Int8 val);
+       void AddParam(EUsageParams p, bool val);
+
+private:
+       void x_CheckBlastUsageEnv();
+       string x_EUsageParmsToString(EUsageParams p);
+       void x_CheckRunEnv();
+       CUsageReportParameters m_Params;
+};
+
+END_SCOPE(blast)
+END_NCBI_SCOPE
+
+#endif /* ALGO_BLAST_API___BLAST_USAGE_REPORT__HPP */
diff --git a/c++/include/algo/blast/blastinput/blast_input.hpp b/c++/include/algo/blast/blastinput/blast_input.hpp

index 68fad00fb7c1302a7a908838a8278dc37ad4ac3a..e8146bfc072275f32f775a222249b7e020ebc090 100644 (file)
--- a/c++/include/algo/blast/blastinput/blast_input.hpp
+++ b/c++/include/algo/blast/blastinput/blast_input.hpp
@@ -1,4 +1,4 @@
-/*  $Id: blast_input.hpp 575325 2018-11-27 18:22:00Z ucko $
+/*  $Id: blast_input.hpp 615335 2020-08-31 15:36:38Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -310,11 +310,11 @@ public:
      ///               be in a batch of converted sequences
      ///
      CBlastInput(CBlastInputSource* source, int batch_size = kMax_Int)
-        : m_Source(source), m_BatchSize(batch_size) {}
+        : m_Source(source), m_BatchSize(batch_size), m_NumSeqs(0), m_TotalLength(0) {}
  
      /// Destructor
      ///
-    ~CBlastInput() {}
+    ~CBlastInput(){}
  
      /// Read and convert all the sequences from the source
      /// @param scope CScope object to use in return value [in]
@@ -357,6 +357,8 @@ public:
      /// Determine if we have reached the end of the BLAST input
      bool End() { return m_Source->End(); }
  
+    int GetNumSeqsProcessed() const { return m_NumSeqs; }
+    int GetTotalLengthProcessed() const { return m_TotalLength; }
  private:
      CRef<CBlastInputSource> m_Source;  ///< pointer to source of sequences
      TSeqPos m_BatchSize;          ///< total size of one block of sequences
@@ -369,6 +371,12 @@ private:
  
      /// Perform the actual copy for assignment operator and copy constructor
      void do_copy(const CBlastInput& input);
+
+    // # of seqs processed
+    int m_NumSeqs;
+
+    // Total length processed
+    int m_TotalLength;
  };
  
  /// Auxiliary class for creating Bioseqs given SeqIds
diff --git a/c++/include/algo/blast/blastinput/cmdline_flags.hpp b/c++/include/algo/blast/blastinput/cmdline_flags.hpp

index 7fd7e1b153134283cfabc42d9f4291b8d0076c82..f8df6d4f7bd10e56397144924ed8b05003ebfe7e 100644 (file)
--- a/c++/include/algo/blast/blastinput/cmdline_flags.hpp
+++ b/c++/include/algo/blast/blastinput/cmdline_flags.hpp
@@ -1,4 +1,4 @@
-/*  $Id: cmdline_flags.hpp 605536 2020-04-13 11:07:50Z ivanov $
+/*  $Id: cmdline_flags.hpp 615184 2020-08-28 04:29:55Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -469,6 +469,8 @@ NCBI_BLASTINPUT_EXPORT extern const string kArgPrintMdTag;
  NCBI_BLASTINPUT_EXPORT extern const string kArgUnalignedOutput;
  /// Argument to specify format for reporting unaligned reads
  NCBI_BLASTINPUT_EXPORT extern const string kArgUnalignedFormat;
+/// Argument to specify mt mode (split by db or split by queries)
+NCBI_BLASTINPUT_EXPORT extern const string kArgMTMode;
  
  END_SCOPE(blast)
  END_NCBI_SCOPE
diff --git a/c++/include/algo/blast/blastinput/rpsblast_args.hpp b/c++/include/algo/blast/blastinput/rpsblast_args.hpp

index aec42f16161ba3ad2f52afea889a6fe2db852e67..32b528d4e8a5299f41edfaf81c11d12d4ea46eb9 100644 (file)
--- a/c++/include/algo/blast/blastinput/rpsblast_args.hpp
+++ b/c++/include/algo/blast/blastinput/rpsblast_args.hpp
@@ -1,4 +1,4 @@
-/*  $Id: rpsblast_args.hpp 544441 2017-08-23 11:55:51Z camacho $
+/*  $Id: rpsblast_args.hpp 615185 2020-08-28 04:30:03Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -56,6 +56,14 @@ public:
      /// @inheritDoc
      virtual int GetQueryBatchSize() const;
  
+    /// Get the input stream
+    virtual CNcbiIstream& GetInputStream();
+
+    /// Get the output stream
+    virtual CNcbiOstream& GetOutputStream();
+
+    virtual ~CRPSBlastAppArgs() {}
+
  protected:
      /// @inheritDoc
      virtual CRef<CBlastOptionsHandle>
@@ -63,6 +71,35 @@ protected:
                            const CArgs& args);
  };
  
+class NCBI_BLASTINPUT_EXPORT CRPSBlastNodeArgs : public CRPSBlastAppArgs
+{
+public:
+    /// Constructor
+    CRPSBlastNodeArgs(const string & input);
+
+    /// @inheritDoc
+    virtual int GetQueryBatchSize() const;
+
+    /// Get the input stream
+    virtual CNcbiIstream& GetInputStream();
+
+    /// Get the output stream
+    virtual CNcbiOstream& GetOutputStream();
+
+    CNcbiOstrstream & GetOutputStrStream() { return m_OutputStream; }
+
+    virtual ~CRPSBlastNodeArgs();
+
+protected:
+    /// @inheritDoc
+    virtual CRef<CBlastOptionsHandle>
+    x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs& args);
+
+private :
+    CNcbiOstrstream m_OutputStream;
+    CNcbiIstrstream * m_InputStream;
+};
+
  
  END_SCOPE(blast)
  END_NCBI_SCOPE
diff --git a/c++/include/algo/blast/blastinput/rpstblastn_args.hpp b/c++/include/algo/blast/blastinput/rpstblastn_args.hpp

index 7887a0ced37252c9f08dd98a96e3402b62d40853..bb199ebfa61fcad7abe41cb9a7bce9bced615d47 100644 (file)
--- a/c++/include/algo/blast/blastinput/rpstblastn_args.hpp
+++ b/c++/include/algo/blast/blastinput/rpstblastn_args.hpp
@@ -1,4 +1,4 @@
-/*  $Id: rpstblastn_args.hpp 161402 2009-05-27 17:35:47Z camacho $
+/*  $Id: rpstblastn_args.hpp 615188 2020-08-28 04:30:31Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -49,6 +49,14 @@ public:
      /// @inheritDoc
      virtual int GetQueryBatchSize() const;
  
+    /// Get the input stream
+    virtual CNcbiIstream& GetInputStream();
+
+    /// Get the output stream
+    virtual CNcbiOstream& GetOutputStream();
+
+    virtual ~CRPSTBlastnAppArgs() {}
+
  protected:
      /// @inheritDoc
      virtual CRef<CBlastOptionsHandle>
@@ -56,6 +64,36 @@ protected:
                            const CArgs& args);
  };
  
+class NCBI_BLASTINPUT_EXPORT CRPSTBlastnNodeArgs : public CRPSTBlastnAppArgs
+{
+public:
+    /// Constructor
+    CRPSTBlastnNodeArgs(const string & input);
+
+    /// @inheritDoc
+    virtual int GetQueryBatchSize() const;
+
+    /// Get the input stream
+    virtual CNcbiIstream& GetInputStream();
+
+    /// Get the output stream
+    virtual CNcbiOstream& GetOutputStream();
+
+    CNcbiOstrstream & GetOutputStrStream() { return m_OutputStream; }
+
+    virtual ~CRPSTBlastnNodeArgs();
+
+protected:
+    /// @inheritDoc
+    virtual CRef<CBlastOptionsHandle>
+    x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs& args);
+
+private :
+    CNcbiOstrstream m_OutputStream;
+    CNcbiIstrstream * m_InputStream;
+};
+
+
  
  END_SCOPE(blast)
  END_NCBI_SCOPE
diff --git a/c++/include/algo/blast/format/blast_format.hpp b/c++/include/algo/blast/format/blast_format.hpp

index 9278e4e242cdb7a3037eda211e56ef2131ef83c7..be85afe127cfe9d8926f8c11a06887409c9ef397 100644 (file)
--- a/c++/include/algo/blast/format/blast_format.hpp
+++ b/c++/include/algo/blast/format/blast_format.hpp
@@ -1,4 +1,4 @@
-/* $Id: blast_format.hpp 591152 2019-08-12 11:18:21Z fongah2 $
+/* $Id: blast_format.hpp 615337 2020-08-31 15:36:55Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -55,6 +55,7 @@ Author: Jason Papadopoulos
  #include <algo/blast/api/blast_seqinfosrc.hpp>
  #include <algo/blast/format/sam.hpp>
  #include <objects/blast/blast__.hpp>
+#include <algo/blast/api/blast_usage_report.hpp>
  
  
  BEGIN_NCBI_SCOPE
@@ -323,6 +324,10 @@ public:
  
      static void PrintArchive(CRef<objects::CBlast4_archive> archive,
                                                   CNcbiOstream& out);
+
+    // Extract search info in CBlastFormat and add to blast report usage
+    void LogBlastSearchInfo(blast::CBlastUsageReport & report);
+
  private:
      /// Format type
      blast::CFormattingArgs::EOutputFormat m_FormatType;
diff --git a/c++/include/common/config/ncbiconf_msvc.h b/c++/include/common/config/ncbiconf_msvc.h

index 2fc3df7555af1a9ed26b0dac9d081367eed4ce5d..98dcae4e55772f0197a3ae29b8a436cd4bc223f4 100644 (file)
--- a/c++/include/common/config/ncbiconf_msvc.h
+++ b/c++/include/common/config/ncbiconf_msvc.h
@@ -1,4 +1,4 @@
-/* $Id: ncbiconf_msvc.h 602172 2020-02-18 15:13:29Z ucko $
+/* $Id: ncbiconf_msvc.h 608266 2020-05-13 18:56:44Z ivanov $
   * By Denis Vakatov, NCBI (vakatov@ncbi.nlm.nih.gov)
   *
   * MS-Win 32/64, MSVC++ 6.0/.NET
@@ -135,7 +135,10 @@ typedef   int   ssize_t;
  #define NETDB_REENTRANT                 1
  
  #if _MSC_VER >= 1400
-
+// need to include some standard header to get all debugging macros
+# ifdef __cplusplus
+#  include <cstdint>
+# endif
  /* Suppress 'deprecated' warning for STD functions */
  #if !defined(_CRT_NONSTDC_DEPRECATE)
  #define _CRT_NONSTDC_DEPRECATE(x)
diff --git a/c++/include/common/ncbi_export.h b/c++/include/common/ncbi_export.h

index 84d9da09e9825ebad5f8c3a97dc4f3586870539a..9f94380b98c23c1bdee4aedc4da0015f8672c893 100644 (file)
--- a/c++/include/common/ncbi_export.h
+++ b/c++/include/common/ncbi_export.h
@@ -1,7 +1,7 @@
  #ifndef COMMON___NCBI_EXPORT__H
  #define COMMON___NCBI_EXPORT__H
  
-/*  $Id: ncbi_export.h 605871 2020-04-16 11:23:58Z ivanov $
+/*  $Id: ncbi_export.h 617033 2020-09-24 18:56:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
diff --git a/c++/include/common/ncbi_package_ver.h b/c++/include/common/ncbi_package_ver.h

index 0ed06d2dce8d5c76e6a5eed0d467cb6866d6c1ce..1bdf142754ca0b93a7ffa8bbc7b4c6fcb6c04aae 100644 (file)
--- a/c++/include/common/ncbi_package_ver.h
+++ b/c++/include/common/ncbi_package_ver.h
@@ -7,8 +7,8 @@
  #define NCBI_PACKAGE                       1
  #define NCBI_PACKAGE_NAME                  "blast"
  #define NCBI_PACKAGE_VERSION_MAJOR         2
-#define NCBI_PACKAGE_VERSION_MINOR         10
-#define NCBI_PACKAGE_VERSION_PATCH         1
+#define NCBI_PACKAGE_VERSION_MINOR         11
+#define NCBI_PACKAGE_VERSION_PATCH         0
  #define NCBI_PACKAGE_CONFIG                ""
  
  #define NCBI_PACKAGE_VERSION_STRINGIFY(x)  #x
diff --git a/c++/include/common/ncbiconf_impl.h b/c++/include/common/ncbiconf_impl.h

index 65daf62d7c68c15c6003759614a26af4140a38f3..58a3cd8bd45ba1167c161a9ed8e3fa9e11fcfd8d 100644 (file)
--- a/c++/include/common/ncbiconf_impl.h
+++ b/c++/include/common/ncbiconf_impl.h
@@ -1,7 +1,7 @@
  #ifndef COMMON___NCBICONF_IMPL__H
  #define COMMON___NCBICONF_IMPL__H
  
-/* $Id: ncbiconf_impl.h 606329 2020-04-20 16:28:09Z ivanov $
+/* $Id: ncbiconf_impl.h 609547 2020-06-03 17:21:47Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -41,8 +41,6 @@
  #  error "The header can be used from <ncbiconf.h> only."
  #endif /*!FORWARDING_NCBICONF_H*/
  
-#include <common/ncbi_build_info.h>
-
  
  /** @addtogroup Portability
   *
diff --git a/c++/include/connect/ncbi_usage_report.hpp b/c++/include/connect/ncbi_usage_report.hpp

index 38d9574f11112233b7650ff8f9c5964e52c24d31..2579a0329eb03b9d3eb0495acc5049a822e99ebb 100644 (file)
--- a/c++/include/connect/ncbi_usage_report.hpp
+++ b/c++/include/connect/ncbi_usage_report.hpp
@@ -1,7 +1,7 @@
  #ifndef CONNECT___NCBI_USAGE_REPORT__HPP
  #define CONNECT___NCBI_USAGE_REPORT__HPP
  
-/* $Id: ncbi_usage_report.hpp 602851 2020-03-03 18:47:23Z ivanov $
+/* $Id: ncbi_usage_report.hpp 617219 2020-09-28 17:23:04Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -29,7 +29,7 @@
   * Authors:  Vladislav Evgeniev, Vladimir Ivanov
   *
   * File Description:
- *   Log usage information to NCBI \93pinger\94.
+ *   Log usage information to NCBI "pinger".
   *
   */
  
@@ -75,7 +75,7 @@ public:
          fOS         = 1 << 3,   ///< OS name ("os")
          fHost       = 1 << 4,   ///< Host name ("host")
          //
-        fDefault    = fAppName | fAppVersion | fOS | fHost
+        fDefault    = fAppName | fAppVersion | fOS
      };
      typedef int TWhat;  ///< Binary OR of "EWhat"
  };
@@ -325,10 +325,10 @@ public:
      /// to allow checking reporting progress or failures, see EState for a list of states.
      /// @sa 
      ///   EState, CUsageReport::Send()
-    virtual void OnStateChange(EState state) {};
+    virtual void OnStateChange(EState /*state*/) {};
  
      /// Copy constructor.
-    CUsageReportJob(const CUsageReportJob& other) { x_CopyFrom(other); };
+    CUsageReportJob(const CUsageReportJob& other) : CUsageReportParameters(other) { m_State = other.m_State; };
      /// Copy assignment operator.
      CUsageReportJob& operator=(const CUsageReportJob& other) { x_CopyFrom(other); return *this; };
      
diff --git a/c++/include/corelib/impl/ncbi_dbsvcmapper.hpp b/c++/include/corelib/impl/ncbi_dbsvcmapper.hpp

index f5dbc2ba826e91b38097ae165a7c046f53c60f2d..f7ec68377b20baffc066bb0c1a8e2daa742584c5 100644 (file)
--- a/c++/include/corelib/impl/ncbi_dbsvcmapper.hpp
+++ b/c++/include/corelib/impl/ncbi_dbsvcmapper.hpp
@@ -1,7 +1,7 @@
  #ifndef CORELIB___DB_SERVICE_MAPPER__HPP
  #define CORELIB___DB_SERVICE_MAPPER__HPP
  
-/*  $Id: ncbi_dbsvcmapper.hpp 586267 2019-05-13 18:15:06Z ucko $
+/*  $Id: ncbi_dbsvcmapper.hpp 610944 2020-06-25 18:30:27Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -34,8 +34,7 @@
   */
  
  
-#include <corelib/ncbistd.hpp>
-#include <corelib/ncbiobj.hpp>
+#include <corelib/ncbimtx.hpp>
  
  #ifdef NCBI_OS_MSWIN
  #  include <winsock2.h>
diff --git a/c++/include/corelib/mswin_no_popup.h b/c++/include/corelib/mswin_no_popup.h

index 2aa87a21d46752978ffadc5e53a59cfdf2988424..7285587a1d20fe747d2e7cb894ad4bf0ca871c50 100644 (file)
--- a/c++/include/corelib/mswin_no_popup.h
+++ b/c++/include/corelib/mswin_no_popup.h
@@ -1,7 +1,7 @@
  #ifndef CORELIB___MSWIN_NO_POPUP__H
  #define CORELIB___MSWIN_NO_POPUP__H
  
-/*  $Id: mswin_no_popup.h 171076 2009-09-21 16:22:34Z ivanov $
+/*  $Id: mswin_no_popup.h 617213 2020-09-28 17:22:30Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -45,7 +45,7 @@
  #define NCBI_MSWIN_NO_POPUP
  
  /* In case anyone needs to always disable the popup messages (regardless of DIAG_SILENT_ABDORT)
-   another pre-processor macro can be defined before #include\92ing either 
+   another pre-processor macro can be defined before #include'ing either 
     <corelib/mswin_no_popup.h> (or <common/test_assert.h>).
  */
  /* #define NCBI_MSWIN_NO_POPUP_EVER */
diff --git a/c++/include/corelib/ncbi_system.hpp b/c++/include/corelib/ncbi_system.hpp

index 01e1acd279243b7f835f53feec3315a89b27ef5c..99552ee11e39c72896508963edbe061be2cb0653 100644 (file)
--- a/c++/include/corelib/ncbi_system.hpp
+++ b/c++/include/corelib/ncbi_system.hpp
@@ -1,7 +1,7 @@
  #ifndef NCBI_SYSTEM__HPP
  #define NCBI_SYSTEM__HPP
  
-/*  $Id: ncbi_system.hpp 603334 2020-03-10 17:10:33Z ivanov $
+/*  $Id: ncbi_system.hpp 613789 2020-08-12 18:02:48Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -63,7 +63,7 @@ BEGIN_NCBI_SCOPE
  enum ELimitsExitCode {
      eLEC_None,    ///< Normal exit.
      eLEC_Memory,  ///< Memory limit.
-    eLEC_Cpu      ///< CPU usage limit.
+    eLEC_Cpu      ///< CPU time usage limit.
  };
  
  /// Type of parameter for print handler.
@@ -166,7 +166,12 @@ extern bool SetMemoryLimit(size_t max_size,
                             TLimitsPrintParameter parameter = NULL);
  
  /// [UNIX only]  Set soft memory limit.
-/// @sa SetMemoryLimit
+/// @note
+///   The soft limit is the value that the kernel enforces for the corresponding resource. 
+///   An unprivileged process may only set its soft limit to a value in the range
+///   from 0 up to the hard limit, and (irreversibly) lower its hard limit.
+///   A privileged process may make arbitrary changes to either limit value. 
+/// @sa SetMemoryLimit, SetMemoryLimitHard
  NCBI_XNCBI_EXPORT
  extern bool SetMemoryLimitSoft(size_t max_size, 
                             TLimitsPrintHandler   handler   = NULL, 
@@ -174,18 +179,58 @@ extern bool SetMemoryLimitSoft(size_t max_size,
  
  /// [UNIX only]  Set hard memory limit.
  /// @note
+///   The hard limit acts as a ceiling for the soft limit: 
  ///   Current soft memory limit will be automatically decreased,
  ///   if it exceed new value for the hard memory limit.
  /// @note
  ///   Only privileged process can increase current hard level limit.
-/// @sa SetMemoryLimit
+/// @sa SetMemoryLimit, SetMemoryLimitSoft
  NCBI_XNCBI_EXPORT
  extern bool SetMemoryLimitHard(size_t max_size, 
                             TLimitsPrintHandler   handler   = NULL, 
                             TLimitsPrintParameter parameter = NULL);
  
  
-/// [UNIX only]  Set CPU usage limit.
+/// [UNIX only]  Get "soft" memory limit of the virtual memory (address space) in bytes for a current process.
+/// @return
+///   Returns "soft" value set by setrlimit(), SetMemoryLimit() or ulimit command
+///   line utility for virtual memory address space.
+///   0 - if an error occurs and CNcbiError is set, or the memory limit is set to "unlimited".
+/// @note 
+///   The implementation of malloc() can be different on many flavors of UNIX, and we
+///   usually don't know how exactly it is implemented on the current system. 
+///   Some systems use sbrk()-based implementation (heap), other use mmap() system call
+///   and virtual memory (address space) to allocate memory, some other use hybrid approach
+///   and may allocate memory in two different ways depending on requested memory size
+///   and certain parameters.
+///   Almost all modern Unix versions uses mmap()-based approach for all memory allocations
+///   or at least for big chunks of memory, so probably virtual memory limits is more
+///   important nowadays.
+/// @sa SetMemoryLimit, GetVirtualMemoryLimitHard
+NCBI_XNCBI_EXPORT
+extern size_t GetVirtualMemoryLimitSoft(void);
+
+/// [UNIX only]  Get "hard" memory limit of the virtual memory (address space) in bytes for a current process.
+/// @return
+///   Returns "hard" value set by setrlimit(), SetMemoryLimit() or ulimit command
+///   line utility for virtual memory address space.
+///   0 - if an error occurs and CNcbiError is set, or the memory limit is set to "unlimited".
+/// @note 
+///   The implementation of malloc() can be different on many flavors of UNIX, and we
+///   usually don't know how exactly it is implemented on the current system. 
+///   Some systems use sbrk()-based implementation (heap), other use mmap() system call
+///   and virtual memory (address space) to allocate memory, some other use hybrid approach
+///   and may allocate memory in two different ways depending on requested memory size
+///   and certain parameters.
+///   Almost all modern Unix versions uses mmap()-based approach for all memory allocations
+///   or at least for big chunks of memory, so probably virtual memory limits is more
+///   important nowadays.
+/// @sa SetMemoryLimit, GetVirtualMemoryLimitSoft
+NCBI_XNCBI_EXPORT
+extern size_t GetVirtualMemoryLimitHard(void);
+
+
+/// [UNIX only]  Set CPU time usage limit.
  ///
  /// Set the limit for the CPU time that can be consumed by current process.
  /// 
@@ -371,9 +416,13 @@ public:
      ///   Process owner user name, or empty string if it cannot be determined.
      static string GetUserName(void);
  
-    /// Return number of active CPUs (never less than 1).
+    /// Return number of active CPUs/cores (never less than 1).
      static unsigned int GetCpuCount(void);
      
+    /// Return number of allowed to use CPUs/cores for the current thread.
+    /// Returns 0 if unable to get this information on the current OS, or error occurs.
+    static unsigned int GetCpuCountAllowed(void);
+    
      /// Get system uptime in seconds.
      /// @return
      ///   Seconds since last boot, or negative number if cannot determine it
diff --git a/c++/include/corelib/ncbiapp_api.hpp b/c++/include/corelib/ncbiapp_api.hpp

index 81b5a622dead355a37c7930986ff22430677c028..fec38c61bb96aaa4350a8d3703b2923189050dc9 100644 (file)
--- a/c++/include/corelib/ncbiapp_api.hpp
+++ b/c++/include/corelib/ncbiapp_api.hpp
@@ -1,7 +1,7 @@
  #ifndef CORELIB___NCBIAPP_API__HPP
  #define CORELIB___NCBIAPP_API__HPP
  
-/*  $Id: ncbiapp_api.hpp 593047 2019-09-11 15:29:02Z grichenk $
+/*  $Id: ncbiapp_api.hpp 610397 2020-06-16 18:45:55Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -395,7 +395,10 @@ public:
      NCBI_DEPRECATED virtual bool SetupDiag_AppSpecific(void);
  
      /// Add callback to be executed from CNcbiApplicationAPI destructor.
-    /// @sa CNcbiActionGuard
+    /// @note It is executed earlier, at CNcbiApplication destructor; and, it
+    ///       may be executed even earlier from destructors of other
+    ///       CNcbiApplicationAPI-derived classes.
+    /// @sa CNcbiActionGuard, ExecuteOnExitActions()
      template<class TFunc> void AddOnExitAction(TFunc func)
      {
          m_OnExitActions.AddAction(func);
@@ -616,6 +619,12 @@ protected:
      typedef int TAppFlags;
      void SetAppFlags(TAppFlags flags) { m_AppFlags = flags; }
  
+    /// Should only be called from the destructors of classes derived from
+    /// CNcbiApplicationAPI - if it is necessary to access their data members
+    /// and virtual methods; or to dynamic_cast<> from the base app class.
+    /// @sa AddOnExitAction()
+    void ExecuteOnExitActions();
+
  private:
      /// Read standard NCBI application configuration settings.
      ///
diff --git a/c++/include/corelib/ncbidiag.hpp b/c++/include/corelib/ncbidiag.hpp

index 6cc92b0b4b8f2f2b2ab73b2c20154dbc010a5fd8..f6d83373deee78e7a0252c275a65996c66b9f214 100644 (file)
--- a/c++/include/corelib/ncbidiag.hpp
+++ b/c++/include/corelib/ncbidiag.hpp
@@ -1,7 +1,7 @@
  #ifndef CORELIB___NCBIDIAG__HPP
  #define CORELIB___NCBIDIAG__HPP
  
-/*  $Id: ncbidiag.hpp 606470 2020-04-22 14:14:58Z ivanov $
+/*  $Id: ncbidiag.hpp 611708 2020-07-09 17:56:10Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -2565,6 +2565,25 @@ enum EDiagFilter {
  NCBI_XNCBI_EXPORT
  extern void SetDiagFilter(EDiagFilter what, const char* filter_str);
  
+/// Get current diagnostic filter
+///
+/// @param what
+///    Filter is set for, only eDiagFilter_Trace and eDiagFilter_Post values are allowed,
+///    otherwise the function returns empty string.
+/// @sa SetDiagFilter
+NCBI_XNCBI_EXPORT
+extern string GetDiagFilter(EDiagFilter what);
+
+/// Append diagnostic filter
+///
+/// @param what
+///    Filter is set for
+/// @param filter_str
+///    Filter string
+/// @sa SetDiagFilter
+NCBI_XNCBI_EXPORT
+extern void AppendDiagFilter(EDiagFilter what, const char* filter_str);
+
  
  /////////////////////////////////////////////////////////////////////////////
  ///
diff --git a/c++/include/corelib/ncbimisc.hpp b/c++/include/corelib/ncbimisc.hpp

index e424fe88586363319aa34b529ee4377bccfbf63c..72297bb439227a00431f81d333c9339d8d43536f 100644 (file)
--- a/c++/include/corelib/ncbimisc.hpp
+++ b/c++/include/corelib/ncbimisc.hpp
@@ -1,7 +1,7 @@
  #ifndef CORELIB___NCBIMISC__HPP
  #define CORELIB___NCBIMISC__HPP
  
-/*  $Id: ncbimisc.hpp 607883 2020-05-08 15:09:10Z grichenk $
+/*  $Id: ncbimisc.hpp 609927 2020-06-08 16:52:43Z grichenk $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -874,8 +874,13 @@ const TSeqPos kInvalidSeqPos = ((TSeqPos) (-1));
  /// Use this typedef rather than its expansion, which may change.
  typedef int TSignedSeqPos;
  
-/// Type for Taxon1-name.taxid
+/// Taxon id type
  typedef int TTaxId;
+# define TAX_ID_TO(T, tax_id) (static_cast<T>(tax_id))
+# define TAX_ID_FROM(T, value) (static_cast<ncbi::TTaxId>(value))
+# define TAX_ID_CONST(id) id
+#define ZERO_TAX_ID TAX_ID_CONST(0)
+#define INVALID_TAX_ID TAX_ID_CONST(-1)
  
  /// Type for sequence GI.
  ///
@@ -1029,25 +1034,25 @@ class CConstGIChecker {
  public:
      static const TIntId value = gi;
  };
-#define GI_CONST(gi) (TGi(CConstGIChecker<gi>::value))
+#define GI_CONST(gi) (ncbi::TGi(CConstGIChecker<gi>::value))
  #define ZERO_GI GI_CONST(0)
  #define INVALID_GI GI_CONST(-1)
  
  /// Temporary macros to convert TGi to other types (int, unsigned etc.).
-#define GI_TO(T, gi) (static_cast<T>(TIntId(gi)))
-#define GI_FROM(T, value) (TGi(static_cast<TIntId>(value)))
+#define GI_TO(T, gi) (static_cast<T>(ncbi::TIntId(gi)))
+#define GI_FROM(T, value) (ncbi::TGi(static_cast<ncbi::TIntId>(value)))
  
-#define ENTREZ_ID_CONST(id) (TEntrezId(CConstGIChecker<id>::value))
+#define ENTREZ_ID_CONST(id) (ncbi::TEntrezId(CConstGIChecker<id>::value))
  #define ZERO_ENTREZ_ID ENTREZ_ID_CONST(0)
  #define INVALID_ENTREZ_ID ENTREZ_ID_CONST(-1)
  
  /// Temporary macros to convert TEntrezId to other types (int, unsigned etc.).
-#define ENTREZ_ID_TO(T, entrez_id) (static_cast<T>(TIntId(entrez_id)))
-#define ENTREZ_ID_FROM(T, value) (TEntrezId(static_cast<TIntId>(value)))
+#define ENTREZ_ID_TO(T, entrez_id) (static_cast<T>(ncbi::TIntId(entrez_id)))
+#define ENTREZ_ID_FROM(T, value) (ncbi::TEntrezId(static_cast<ncbi::TIntId>(value)))
  
  /// Convert gi-compatible int to/from other types.
  #define INT_ID_TO(T, id) (static_cast<T>(id))
-#define INT_ID_FROM(T, value) (static_cast<TIntId>(value))
+#define INT_ID_FROM(T, value) (static_cast<ncbi::TIntId>(value))
  
  
  /// Helper address class
diff --git a/c++/include/corelib/ncbistre.hpp b/c++/include/corelib/ncbistre.hpp

index 14829879ab83e844f9a5b930e6a1d4faaea74e36..e303ebc57e55aa5b63d84c4ec0712ae1eeb40cc4 100644 (file)
--- a/c++/include/corelib/ncbistre.hpp
+++ b/c++/include/corelib/ncbistre.hpp
@@ -1,7 +1,7 @@
  #ifndef CORELIB___NCBISTRE__HPP
  #define CORELIB___NCBISTRE__HPP
  
-/*  $Id: ncbistre.hpp 606328 2020-04-20 16:27:53Z ivanov $
+/*  $Id: ncbistre.hpp 617212 2020-09-28 17:22:22Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -227,41 +227,6 @@ public:
          CNcbiIfstream::open(_Filename.c_str(), _Mode, _Prot);
      }
  };
-#elif defined(NCBI_COMPILER_MSVC)
-#  if _MSC_VER >= 1200  &&  _MSC_VER < 1300
-class CNcbiIfstream : public IO_PREFIX::ifstream
-{
-public:
-    CNcbiIfstream() : m_Fp(0)
-    {
-    }
-
-    explicit CNcbiIfstream(const char* s,
-                           IOS_BASE::openmode mode = IOS_BASE::in)
-    {
-        fastopen(s, mode);
-    }
-
-    void fastopen(const char* s, IOS_BASE::openmode mode = IOS_BASE::in)
-    {
-        if (is_open()  ||  !(m_Fp = __Fiopen(s, mode | in)))
-            setstate(failbit);
-        else
-            (void) new (rdbuf()) basic_filebuf<char, char_traits<char> >(m_Fp);
-    }
-
-    virtual ~CNcbiIfstream(void)
-    {
-        if (m_Fp)
-            fclose(m_Fp);
-    }
-private:
-    FILE* m_Fp;
-};
-#  else
-/// Portable alias for ifstream.
-typedef IO_PREFIX::ifstream      CNcbiIfstream;
-#  endif
  #else
  /// Portable alias for ifstream.
  typedef IO_PREFIX::ifstream      CNcbiIfstream;
@@ -323,41 +288,6 @@ public:
          CNcbiOfstream::open(_Filename.c_str(), _Mode, _Prot);
      }
  };
-#elif defined(NCBI_COMPILER_MSVC)
-#  if _MSC_VER >= 1200  &&  _MSC_VER < 1300
-class CNcbiOfstream : public IO_PREFIX::ofstream
-{
-public:
-    CNcbiOfstream() : m_Fp(0)
-    {
-    }
-
-    explicit CNcbiOfstream(const char* s,
-                           IOS_BASE::openmode mode = IOS_BASE::out)
-    {
-        fastopen(s, mode);
-    }
-
-    void fastopen(const char* s, IOS_BASE::openmode mode = IOS_BASE::out)
-    {
-        if (is_open()  ||  !(m_Fp = __Fiopen(s, mode | out)))
-            setstate(failbit);
-        else
-            (void) new (rdbuf()) basic_filebuf<char, char_traits<char> >(m_Fp);
-    }
-
-    virtual ~CNcbiOfstream(void)
-    {
-        if (m_Fp)
-            fclose(m_Fp);
-    }
-private:
-    FILE* m_Fp;
-};
-#  else
-/// Portable alias for ofstream.
-typedef IO_PREFIX::ofstream      CNcbiOfstream;
-#  endif
  #else
  /// Portable alias for ofstream.
  typedef IO_PREFIX::ofstream      CNcbiOfstream;
@@ -396,43 +326,6 @@ public:
          IO_PREFIX::fstream::open(_Filename,_Mode,_Prot);
      }
  };
-#elif defined(NCBI_COMPILER_MSVC)
-#  if _MSC_VER >= 1200  &&  _MSC_VER < 1300
-class CNcbiFstream : public IO_PREFIX::fstream
-{
-public:
-    CNcbiFstream() : m_Fp(0)
-    {
-    }
-
-    explicit CNcbiFstream(const char* s,
-                          IOS_BASE::openmode
-                          mode = IOS_BASE::in | IOS_BASE::out)
-    {
-        fastopen(s, mode);
-    }
-
-    void fastopen(const char* s, IOS_BASE::openmode
-                  mode = IOS_BASE::in | IOS_BASE::out)
-    {
-        if (is_open()  ||  !(m_Fp = __Fiopen(s, mode)))
-            setstate(failbit);
-        else
-            (void) new (rdbuf()) basic_filebuf<char, char_traits<char> >(m_Fp);
-    }
-
-    virtual ~CNcbiFstream(void)
-    {
-        if (m_Fp)
-            fclose(m_Fp);
-    }
-private:
-    FILE* m_Fp;
-};
-#  else
-/// Portable alias for fstream.
-typedef IO_PREFIX::fstream       CNcbiFstream;
-#  endif
  #else
  /// Portable alias for fstream.
  typedef IO_PREFIX::fstream       CNcbiFstream;
@@ -698,8 +591,7 @@ NCBI_XNCBI_EXPORT
  CNcbiOstream& operator<<(CNcbiOstream& out, const CNcbiOstrstreamToString& s);
  
  inline
-Int8 
-GetOssSize(CNcbiOstrstream& oss)
+Int8 GetOssSize(CNcbiOstrstream& oss)
  {
  #ifdef NCBI_SHUN_OSTRSTREAM
      return NcbiStreamposToInt8(oss.tellp());
@@ -709,8 +601,7 @@ GetOssSize(CNcbiOstrstream& oss)
  }
  
  inline
-bool
-IsOssEmpty(CNcbiOstrstream& oss)
+bool IsOssEmpty(CNcbiOstrstream& oss)
  {
      return GetOssSize(oss) == 0;
  }
@@ -861,13 +752,6 @@ CNcbiOstream& operator<<(CNcbiOstream& out, CPrintableStringConverter s);
  NCBI_XNCBI_EXPORT
  CNcbiOstream& operator<<(CNcbiOstream& out, CPrintableCharPtrConverter s);
  
-#ifdef NCBI_COMPILER_MSVC
-#  if _MSC_VER >= 1200  &&  _MSC_VER < 1300
-NCBI_XNCBI_EXPORT
-CNcbiOstream& operator<<(CNcbiOstream& out, __int64 val);
-#  endif
-#endif
-
  
  /////////////////////////////////////////////////////////////////////////////
  ///
@@ -959,8 +843,8 @@ enum EBOMDiscard {
  ///        CStreamUtils::Pushback().
  /// @sa CStreamUtils::Pushback()
  NCBI_XNCBI_EXPORT
-EEncodingForm GetTextEncodingForm(CNcbiIstream& input,
-                                  EBOMDiscard   discard_bom);
+EEncodingForm GetTextEncodingForm(CNcbiIstream& input, EBOMDiscard discard_bom);
+
  
  /// Byte Order Mark helper class to use in serialization
  ///
@@ -986,9 +870,11 @@ private:
      EEncodingForm m_EncodingForm;
  };
  
+
  /// Write Byte Order Mark into output stream
  NCBI_XNCBI_EXPORT CNcbiOstream& operator<< (CNcbiOstream& str, const CByteOrderMark&  bom);
  
+
  /// Read Byte Order Mark, if present, from input stream
  ///
  /// @note
@@ -1012,10 +898,10 @@ END_NCBI_SCOPE
  // NOTE:  these must have been inside the _NCBI_SCOPE and without the
  //        "ncbi::" and "std::" prefixes, but there is some bug in SunPro 5.0...
  #if defined(NCBI_USE_OLD_IOSTREAM)
-extern NCBI_NS_NCBI::CNcbiOstream& operator<<(NCBI_NS_NCBI::CNcbiOstream& os,
-                                              const NCBI_NS_STD::string& str);
-extern NCBI_NS_NCBI::CNcbiIstream& operator>>(NCBI_NS_NCBI::CNcbiIstream& is,
-                                              NCBI_NS_STD::string& str);
+extern NCBI_NS_NCBI::CNcbiOstream& 
+    operator<<(NCBI_NS_NCBI::CNcbiOstream& os, const NCBI_NS_STD::string& str);
+extern NCBI_NS_NCBI::CNcbiIstream& 
+    operator>>(NCBI_NS_NCBI::CNcbiIstream& is, NCBI_NS_STD::string& str);
  #endif // NCBI_USE_OLD_IOSTREAM
  
  
diff --git a/c++/include/corelib/request_ctx.hpp b/c++/include/corelib/request_ctx.hpp

index f93d84a3f807f50340d2942e2a88bd4e20f6466b..712f5c5d34c5a8ef2cda1f1641fea31a17ac4350 100644 (file)
--- a/c++/include/corelib/request_ctx.hpp
+++ b/c++/include/corelib/request_ctx.hpp
@@ -1,7 +1,7 @@
  #ifndef CORELIB___REQUEST_CTX__HPP
  #define CORELIB___REQUEST_CTX__HPP
  
-/*  $Id: request_ctx.hpp 574926 2018-11-20 20:23:54Z ucko $
+/*  $Id: request_ctx.hpp 617468 2020-10-01 17:54:00Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -64,7 +64,8 @@ class CRequestContext_PassThrough;
  class NCBI_XNCBI_EXPORT CSharedHitId
  {
  public:
-    explicit CSharedHitId(const string& hit) : m_HitId(hit), m_SubHitId(0) {}
+    explicit CSharedHitId(const string& hit)
+        : m_HitId(hit), m_SubHitId(0), m_AppState(GetDiagContext().GetAppState()) {}
      CSharedHitId(void) : m_SubHitId(0) {}
      ~CSharedHitId(void) {}
  
@@ -90,6 +91,7 @@ public:
          m_SharedSubHitId.Reset();
          m_SubHitId = 0;
          m_HitId = hit_id;
+        m_AppState = GetDiagContext().GetAppState();
      }
  
      typedef unsigned int TSubHitId;
@@ -106,12 +108,21 @@ public:
          return IsShared() ? (TSubHitId)m_SharedSubHitId->GetData().Add(1) : ++m_SubHitId;
      }
  
+    /// Check if this hit ID was set at request level.
+    bool IsRequestLevel(void) const
+    {
+        return m_AppState == eDiagAppState_RequestBegin ||
+            m_AppState == eDiagAppState_Request ||
+            m_AppState == eDiagAppState_RequestEnd;
+    }
+
  private:
      typedef CObjectFor<CAtomicCounter> TSharedCounter;
  
      string m_HitId;
      TSubHitId m_SubHitId;
      mutable CRef<TSharedCounter> m_SharedSubHitId;
+    EDiagAppState m_AppState;
  };
  
  
@@ -678,7 +689,7 @@ bool CRequestContext::IsSetHitID(EHitIDSource src) const
          return true;
      }
      if ((src & eHitID_Request)  &&  x_IsSetProp(eProp_HitID)) {
-        return true;
+        return m_HitID.IsRequestLevel();
      }
      if ((src & eHitID_Default) && GetDiagContext().x_IsSetDefaultHitID()) {
          return true;
diff --git a/c++/include/corelib/test_boost.hpp b/c++/include/corelib/test_boost.hpp

index 2476a3638f5aaef162b9517ecc580f7b6232342b..563cc8a917bbabc6ea92ab277fb8549cfecacb0c 100644 (file)
--- a/c++/include/corelib/test_boost.hpp
+++ b/c++/include/corelib/test_boost.hpp
@@ -1,7 +1,7 @@
  #ifndef CORELIB___TEST_BOOST__HPP
  #define CORELIB___TEST_BOOST__HPP
  
-/*  $Id: test_boost.hpp 604629 2020-03-31 13:43:18Z ivanov $
+/*  $Id: test_boost.hpp 617213 2020-09-28 17:22:30Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -64,7 +64,11 @@
  #endif
  
  #include <boost/version.hpp>
-#include <boost/test/auto_unit_test.hpp>
+#if BOOST_VERSION >= 107000
+#  include <boost/test/unit_test.hpp>
+#else 
+#  include <boost/test/auto_unit_test.hpp>
+#endif
  #include <boost/test/floating_point_comparison.hpp>
  #include <boost/test/framework.hpp>
  #include <boost/test/execution_monitor.hpp>
diff --git a/c++/include/corelib/version.hpp b/c++/include/corelib/version.hpp

index 92eb3d90cf517dd8308e16721f1b84f3cb2582d3..1c1f884556c35694acfc8a3deb9da41f6dff27b6 100644 (file)
--- a/c++/include/corelib/version.hpp
+++ b/c++/include/corelib/version.hpp
@@ -1,7 +1,7 @@
  #ifndef CORELIB___VERSION__HPP
  #define CORELIB___VERSION__HPP
  
-/*  $Id: version.hpp 593438 2019-09-18 18:13:51Z lavr $
+/*  $Id: version.hpp 609546 2020-06-03 17:21:38Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -36,6 +36,7 @@
  
  
  #include <corelib/version_api.hpp>
+#include <common/ncbi_build_info.h>
  
  
  
@@ -51,6 +52,72 @@ BEGIN_NCBI_SCOPE
  # define NCBI_BUILD_TIME __DATE__ " " __TIME__
  #endif
  
+#ifdef NCBI_BUILD_TAG
+#  define NCBI_BUILD_TAG_PROXY  NCBI_AS_STRING(NCBI_BUILD_TAG)
+#else
+#  define NCBI_BUILD_TAG_PROXY  ""
+#endif
+
+// Cope with potentially having an older ncbi_build_info.h
+#ifndef NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO
+#  define NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \
+    .Extra(SBuildInfo::eTeamCityProjectName, NCBI_TEAMCITY_PROJECT_NAME_PROXY)
+#  define NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \
+    .Extra(SBuildInfo::eTeamCityBuildConf, NCBI_TEAMCITY_BUILDCONF_NAME_PROXY)
+#  define NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \
+    .Extra(SBuildInfo::eTeamCityBuildNumber, NCBI_TEAMCITY_BUILD_NUMBER_PROXY)
+#  define NCBI_SUBVERSION_REVISION_SBUILDINFO \
+    .Extra(SBuildInfo::eSubversionRevision, NCBI_SUBVERSION_REVISION_PROXY)
+#  define NCBI_SC_VERSION_SBUILDINFO \
+    .Extra(SBuildInfo::eStableComponentsVersion, NCBI_SC_VERSION_PROXY)
+#endif
+
+// Cope with potentially having an older ncbi_source_ver.h
+#ifndef NCBI_SRCTREE_VER_SBUILDINFO
+#  ifdef NCBI_SRCTREE_NAME_PROXY
+#    define NCBI_SRCTREE_VER_SBUILDINFO \
+    .Extra(NCBI_SRCTREE_NAME_PROXY, NCBI_SRCTREE_VER_PROXY)
+#  else
+#    define NCBI_SRCTREE_VER_SBUILDINFO /* empty */
+#  endif
+#endif
+
+#ifdef NCBI_APP_BUILT_AS
+#  define NCBI_BUILT_AS_SBUILDINFO \
+    .Extra(SBuildInfo::eBuiltAs, NCBI_AS_STRING(NCBI_APP_BUILT_AS))
+#else
+#  define NCBI_BUILT_AS_SBUILDINFO /* empty */
+#endif
+
+#ifdef NCBI_TEAMCITY_BUILD_ID
+#  define NCBI_BUILD_ID NCBI_TEAMCITY_BUILD_ID
+#elif defined(NCBI_BUILD_SESSION_ID)
+#  define NCBI_BUILD_ID NCBI_AS_STRING(NCBI_BUILD_SESSION_ID)
+#endif
+#ifdef NCBI_BUILD_ID
+#  define NCBI_BUILD_ID_SBUILDINFO .Extra(SBuildInfo::eBuildID, NCBI_BUILD_ID)
+#else
+#  define NCBI_BUILD_ID_SBUILDINFO /* empty */
+#endif
+
+#define NCBI_SBUILDINFO_DEFAULT_IMPL() \
+    NCBI_SBUILDINFO_DEFAULT_INSTANCE() \
+        NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \
+        NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \
+        NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \
+        NCBI_BUILD_ID_SBUILDINFO \
+        NCBI_SUBVERSION_REVISION_SBUILDINFO \
+        NCBI_SC_VERSION_SBUILDINFO \
+        NCBI_SRCTREE_VER_SBUILDINFO \
+        NCBI_BUILT_AS_SBUILDINFO
+
+#if defined(NCBI_USE_PCH) && !defined(NCBI_TEAMCITY_BUILD_NUMBER)
+#define NCBI_SBUILDINFO_DEFAULT() SBuildInfo()
+#else
+#define NCBI_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL()
+#endif
+#define NCBI_APP_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL()
+
  #ifdef NCBI_SBUILDINFO_DEFAULT_INSTANCE
  # undef NCBI_SBUILDINFO_DEFAULT_INSTANCE
  #endif
diff --git a/c++/include/corelib/version_api.hpp b/c++/include/corelib/version_api.hpp

index 1d2202ec4b9b6976e8e256505e3b508d21e3fa24..676433d27045ab59ed7825d96cb80a50ecc2dae7 100644 (file)
--- a/c++/include/corelib/version_api.hpp
+++ b/c++/include/corelib/version_api.hpp
@@ -1,7 +1,7 @@
  #ifndef CORELIB___VERSION_API__HPP
  #define CORELIB___VERSION_API__HPP
  
-/*  $Id: version_api.hpp 591729 2019-08-19 20:52:06Z vasilche $
+/*  $Id: version_api.hpp 609546 2020-06-03 17:21:38Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -96,73 +96,8 @@ struct NCBI_XNCBI_EXPORT SBuildInfo
      string PrintJson(void) const;
  };
  
-#ifdef NCBI_BUILD_TAG
-#  define NCBI_BUILD_TAG_PROXY  NCBI_AS_STRING(NCBI_BUILD_TAG)
-#else
-#  define NCBI_BUILD_TAG_PROXY  ""
-#endif
-
-// Cope with potentially having an older ncbi_build_info.h
-#ifndef NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO
-#  define NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \
-    .Extra(SBuildInfo::eTeamCityProjectName, NCBI_TEAMCITY_PROJECT_NAME_PROXY)
-#  define NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \
-    .Extra(SBuildInfo::eTeamCityBuildConf, NCBI_TEAMCITY_BUILDCONF_NAME_PROXY)
-#  define NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \
-    .Extra(SBuildInfo::eTeamCityBuildNumber, NCBI_TEAMCITY_BUILD_NUMBER_PROXY)
-#  define NCBI_SUBVERSION_REVISION_SBUILDINFO \
-    .Extra(SBuildInfo::eSubversionRevision, NCBI_SUBVERSION_REVISION_PROXY)
-#  define NCBI_SC_VERSION_SBUILDINFO \
-    .Extra(SBuildInfo::eStableComponentsVersion, NCBI_SC_VERSION_PROXY)
-#endif
-
-// Cope with potentially having an older ncbi_source_ver.h
-#ifndef NCBI_SRCTREE_VER_SBUILDINFO
-#  ifdef NCBI_SRCTREE_NAME_PROXY
-#    define NCBI_SRCTREE_VER_SBUILDINFO \
-    .Extra(NCBI_SRCTREE_NAME_PROXY, NCBI_SRCTREE_VER_PROXY)
-#  else
-#    define NCBI_SRCTREE_VER_SBUILDINFO /* empty */
-#  endif
-#endif
-
-#ifdef NCBI_APP_BUILT_AS
-#  define NCBI_BUILT_AS_SBUILDINFO \
-    .Extra(SBuildInfo::eBuiltAs, NCBI_AS_STRING(NCBI_APP_BUILT_AS))
-#else
-#  define NCBI_BUILT_AS_SBUILDINFO /* empty */
-#endif
-
-#ifdef NCBI_TEAMCITY_BUILD_ID
-#  define NCBI_BUILD_ID NCBI_TEAMCITY_BUILD_ID
-#elif defined(NCBI_BUILD_SESSION_ID)
-#  define NCBI_BUILD_ID NCBI_AS_STRING(NCBI_BUILD_SESSION_ID)
-#endif
-#ifdef NCBI_BUILD_ID
-#  define NCBI_BUILD_ID_SBUILDINFO .Extra(SBuildInfo::eBuildID, NCBI_BUILD_ID)
-#else
-#  define NCBI_BUILD_ID_SBUILDINFO /* empty */
-#endif
-
  #define NCBI_SBUILDINFO_DEFAULT_INSTANCE() SBuildInfo()
  
-#define NCBI_SBUILDINFO_DEFAULT_IMPL() \
-    NCBI_SBUILDINFO_DEFAULT_INSTANCE() \
-        NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \
-        NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \
-        NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \
-        NCBI_BUILD_ID_SBUILDINFO \
-        NCBI_SUBVERSION_REVISION_SBUILDINFO \
-        NCBI_SC_VERSION_SBUILDINFO \
-        NCBI_SRCTREE_VER_SBUILDINFO \
-        NCBI_BUILT_AS_SBUILDINFO
-
-#if defined(NCBI_USE_PCH) && !defined(NCBI_TEAMCITY_BUILD_NUMBER)
-#define NCBI_SBUILDINFO_DEFAULT() SBuildInfo()
-#else
-#define NCBI_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL()
-#endif
-#define NCBI_APP_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL()
  
  /////////////////////////////////////////////////////////////////////////////
  ///
diff --git a/c++/include/dbapi/driver/impl/dbapi_pool_balancer.hpp b/c++/include/dbapi/driver/impl/dbapi_pool_balancer.hpp

new file mode 100644 (file)

index 0000000..56411a6
--- /dev/null
+++ b/c++/include/dbapi/driver/impl/dbapi_pool_balancer.hpp
@@ -0,0 +1,82 @@
+#ifndef DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP
+#define DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP
+
+/*  $Id: dbapi_pool_balancer.hpp 610945 2020-06-25 18:31:37Z ivanov $
+ * ===========================================================================
+ *
+ *                            PUBLIC DOMAIN NOTICE
+ *               National Center for Biotechnology Information
+ *
+ *  This software/database is a "United States Government Work" under the
+ *  terms of the United States Copyright Act.  It was written as part of
+ *  the author's official duties as a United States Government employee and
+ *  thus cannot be copyrighted.  This software/database is freely available
+ *  to the public for use. The National Library of Medicine and the U.S.
+ *  Government have not placed any restriction on its use or reproduction.
+ *
+ *  Although all reasonable efforts have been taken to ensure the accuracy
+ *  and reliability of the software and data, the NLM and the U.S.
+ *  Government do not and cannot warrant the performance or results that
+ *  may be obtained by using this software or data. The NLM and the U.S.
+ *  Government disclaim all warranties, express or implied, including
+ *  warranties of performance, merchantability or fitness for any particular
+ *  purpose.
+ *
+ *  Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author:  Aaron Ucko
+ *
+ */
+
+/// @file dbapi_pool_balancer.hpp
+/// Help distribute connections within a pool across servers.
+
+#include <dbapi/driver/impl/dbapi_driver_utils.hpp>
+
+/** @addtogroup DBAPI
+ *
+ * @{
+ */
+
+BEGIN_NCBI_SCOPE
+
+class CDBPoolBalancer : public CObject
+{
+public:
+    CDBPoolBalancer(const string& service_name,
+                    const string& pool_name,
+                    const IDBServiceMapper::TOptions& options,
+                    I_DriverContext* driver_ctx = nullptr);
+
+    TSvrRef GetServer(CDB_Connection** conn, const CDBConnParams* params);
+
+private:
+    struct SEndpointInfo {
+        SEndpointInfo()
+            : effective_ranking(0.0), ideal_count(0.0), actual_count(0U),
+              penalty_level(0U)
+            { }
+        
+        CRef<CDBServerOption>  ref;
+        double                 effective_ranking;
+        double                 ideal_count;
+        unsigned int           actual_count;
+        unsigned int           penalty_level;
+    };
+    typedef map<impl::TEndpointKey, SEndpointInfo> TEndpoints;
+
+    impl::TEndpointKey x_NameToKey(CTempString& name) const;
+    
+    TEndpoints        m_Endpoints;
+    multiset<double>  m_Rankings;
+    I_DriverContext*  m_DriverCtx;
+    unsigned int      m_TotalCount;
+};
+
+END_NCBI_SCOPE
+
+/* @} */
+
+#endif  /* DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP */
diff --git a/c++/include/ncbi_pch.hpp b/c++/include/ncbi_pch.hpp

index b283fde248b1e07ad54b72cbfc18ab9907adc902..4d5d335ff166ef9719ec0d49e78e3194b13e0e8b 100644 (file)
--- a/c++/include/ncbi_pch.hpp
+++ b/c++/include/ncbi_pch.hpp
@@ -1,5 +1,5 @@
  #if defined(NCBI_USE_PCH)  &&  !defined(NCBI_PCH__HPP)
-/*  $Id: ncbi_pch.hpp 608162 2020-05-12 16:01:31Z blastadm $
+/*  $Id: ncbi_pch.hpp 617723 2020-10-06 07:10:56Z blastadm $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
diff --git a/c++/include/ncbi_source_ver.h b/c++/include/ncbi_source_ver.h

index ef07d61c088f021c0d6ccb89fd6acb2b65163845..c445b14056129e1b6a51a9e5369e3b8852cca29f 100644 (file)
--- a/c++/include/ncbi_source_ver.h
+++ b/c++/include/ncbi_source_ver.h
@@ -1,4 +1,4 @@
-/*  $Id: ncbi_source_ver.h 608162 2020-05-12 16:01:31Z blastadm $
+/*  $Id: ncbi_source_ver.h 617723 2020-10-06 07:10:56Z blastadm $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
diff --git a/c++/include/ncbiconf.h b/c++/include/ncbiconf.h

index 6d7f4474fd7c66952cf9ca0ceca12881eeee05fd..0971a38cea3d2deb35f6e8d944f1e8670340ecd5 100644 (file)
--- a/c++/include/ncbiconf.h
+++ b/c++/include/ncbiconf.h
@@ -1,7 +1,7 @@
  #ifndef FORWARDING_NCBICONF_H
  #define FORWARDING_NCBICONF_H
  
-/*  $Id: ncbiconf.h 608162 2020-05-12 16:01:31Z blastadm $
+/*  $Id: ncbiconf.h 617723 2020-10-06 07:10:56Z blastadm $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
diff --git a/c++/include/objects/dbsnp/primary_track/impl/snpptis_impl.hpp b/c++/include/objects/dbsnp/primary_track/impl/snpptis_impl.hpp

index 126d16715767062ff360526d5ee251b15631df0e..2daad0306660aa39dc02c32a84f28140f46abcc9 100644 (file)
--- a/c++/include/objects/dbsnp/primary_track/impl/snpptis_impl.hpp
+++ b/c++/include/objects/dbsnp/primary_track/impl/snpptis_impl.hpp
@@ -1,6 +1,6 @@
  #ifndef SRA__READER__SRA__IMPL__SNPPTIS__HPP
  #define SRA__READER__SRA__IMPL__SNPPTIS__HPP
-/*  $Id: snpptis_impl.hpp 597185 2019-11-18 19:46:30Z vasilche $
+/*  $Id: snpptis_impl.hpp 615550 2020-09-01 13:13:11Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -58,7 +58,16 @@ private:
      typedef ncbi::grpcapi::dbsnp::primary_track::SeqIdRequestStringAccverUnion TRequest;
  
      string x_GetPrimarySnpTrack(const TRequest& request);
-    
+
+    int max_retries;
+    float timeout;
+    float timeout_mul;
+    float timeout_inc;
+    float timeout_max;
+    float wait_time;
+    float wait_time_mul;
+    float wait_time_inc;
+    float wait_time_max;
      shared_ptr<grpc::Channel> channel;
      unique_ptr<ncbi::grpcapi::dbsnp::primary_track::DbSnpPrimaryTrack::Stub> stub;
  };
diff --git a/c++/include/objects/general/Dbtag.hpp b/c++/include/objects/general/Dbtag.hpp

index 9003c632e6cf69240626745172798afdf9208282..c3ebf6632b030835f845fea94fcdc59f73c91d6a 100644 (file)
--- a/c++/include/objects/general/Dbtag.hpp
+++ b/c++/include/objects/general/Dbtag.hpp
@@ -1,4 +1,4 @@
-/* $Id: Dbtag.hpp 591286 2019-08-13 18:04:06Z kans $
+/* $Id: Dbtag.hpp 615789 2020-09-03 18:19:18Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -220,7 +220,8 @@ public:
          eDbtagType_EPDnew,
          eDbtagType_Ensembl,
          eDbtagType_PseudoCAP,
-        eDbtagType_MarpolBase
+        eDbtagType_MarpolBase,
+        eDbtagType_dbVar
      };
  
      enum EDbtagGroup {
diff --git a/c++/include/objects/seqfeat/SeqFeatData.hpp b/c++/include/objects/seqfeat/SeqFeatData.hpp

index cac2d2af48573b7f8bb8ec82c2679c17c1382ad1..ba58d745d25718e47d821a1f3b2856f8233546eb 100644 (file)
--- a/c++/include/objects/seqfeat/SeqFeatData.hpp
+++ b/c++/include/objects/seqfeat/SeqFeatData.hpp
@@ -1,4 +1,4 @@
-/* $Id: SeqFeatData.hpp 597755 2019-11-26 19:03:13Z gotvyans $
+/* $Id: SeqFeatData.hpp 613780 2020-08-12 16:42:40Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -398,6 +398,7 @@ public:
          eQual_mol_type,
          eQual_name,
          eQual_nomenclature,
+        eQual_non_std_residue,
          eQual_ncRNA_class,
          eQual_note,
          eQual_number,
diff --git a/c++/include/objects/taxon1/local_taxon.hpp b/c++/include/objects/taxon1/local_taxon.hpp

index bed177ae3fd1ba6bcce8bbca2b77cd16c150fff8..e75f91d2db24b1e3b5f0733a54c99a17f13cb20f 100644 (file)
--- a/c++/include/objects/taxon1/local_taxon.hpp
+++ b/c++/include/objects/taxon1/local_taxon.hpp
@@ -1,4 +1,4 @@
-/*  $Id: local_taxon.hpp 598592 2019-12-11 15:20:21Z badrazat $
+/*  $Id: local_taxon.hpp 615586 2020-09-01 17:59:29Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -71,6 +71,8 @@ public:
      { return GetAncestorByRank(taxid, "species"); }
      TTaxid GetGenus(TTaxid taxid)
      { return GetAncestorByRank(taxid, "genus"); }
+    TTaxid GetOrder(TTaxid taxid)
+    { return GetAncestorByRank(taxid, "order"); }
  
      TLineage GetLineage(TTaxid taxid);
      TTaxid Join(TTaxid taxid1, TTaxid taxid2);
diff --git a/c++/include/objects/valerr/ValidErrItem.hpp b/c++/include/objects/valerr/ValidErrItem.hpp

index 4cb21b24611332a7d3a496aea4c398ae79dc9249..aef2a9cc0b703eea6130a67f3d25a790a1c76088 100644 (file)
--- a/c++/include/objects/valerr/ValidErrItem.hpp
+++ b/c++/include/objects/valerr/ValidErrItem.hpp
@@ -1,4 +1,4 @@
-/* $Id: ValidErrItem.hpp 597158 2019-11-18 17:58:02Z kans $
+/* $Id: ValidErrItem.hpp 611904 2020-07-13 15:51:08Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -717,6 +717,7 @@ enum EErrType {
      eErr_SEQ_FEAT_TRNAinsideTMRNA,
      eErr_SEQ_FEAT_IncorrectQualifierCapitalization,
      eErr_SEQ_FEAT_CDSdoesNotMatchVDJC,
+    eErr_SEQ_FEAT_GeneOnNucPositionOfPeptide,
      ERR_CODE_END(SEQ_FEAT),
  
      ERR_CODE_BEGIN(SEQ_ALIGN) = 5000,
diff --git a/c++/include/objmgr/impl/scope_impl.hpp b/c++/include/objmgr/impl/scope_impl.hpp

index d9199337ca903e04106be9f6ca01999a8b6cb40f..2dd0498f6127626c08c6e4229af0f17b67594e83 100644 (file)
--- a/c++/include/objmgr/impl/scope_impl.hpp
+++ b/c++/include/objmgr/impl/scope_impl.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJMGR_IMPL_SCOPE_IMPL__HPP
  #define OBJMGR_IMPL_SCOPE_IMPL__HPP
  
-/*  $Id: scope_impl.hpp 606922 2020-04-28 18:58:25Z ivanov $
+/*  $Id: scope_impl.hpp 610058 2020-06-10 16:19:48Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -301,6 +301,7 @@ public:
      void ResetHistory(int action); // CScope::EActionIfLocked
      void ResetDataAndHistory(void);
      void RemoveFromHistory(const CTSE_Handle& tse, int action);
+    void RemoveFromHistory(const CSeq_id_Handle& seq_id);
  
      // Revoke data sources from the scope. Throw exception if the
      // operation fails (e.g. data source is in use or not found).
diff --git a/c++/include/objmgr/scope.hpp b/c++/include/objmgr/scope.hpp

index fa3b68cd95915f23fb557d76c70dbf793f29c9b4..4aacf94ceceeaa65afe6c6d644afcf474dfbf900 100644 (file)
--- a/c++/include/objmgr/scope.hpp
+++ b/c++/include/objmgr/scope.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJMGR_SCOPE__HPP
  #define OBJMGR_SCOPE__HPP
  
-/*  $Id: scope.hpp 575832 2018-12-04 21:08:18Z vasilche $
+/*  $Id: scope.hpp 610058 2020-06-10 16:19:48Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -365,6 +365,14 @@ public:
      ///  Bioseq, which TSE is to be removed from the cache.
      void RemoveFromHistory(const CBioseq_Handle& bioseq,
                             EActionIfLocked action = eKeepIfLocked);
+    /// Remove the Seq-id failed resolution from the scope's history.
+    /// @param seq_id
+    ///  Seq-id that failed resolution
+    void RemoveFromHistory(const CSeq_id_Handle& seq_id);
+    /// Remove the Seq-id failed resolution from the scope's history.
+    /// @param seq_id
+    ///  Seq-id that failed resolution
+    void RemoveFromHistory(const CSeq_id& seq_id);
  
      /// Revoke data loader from the scope. Throw exception if the
      /// operation fails (e.g. data source is in use or not found).
diff --git a/c++/include/objmgr/util/autodef_options.hpp b/c++/include/objmgr/util/autodef_options.hpp

index fca8434e750b0122e52050902e9624f8f0f257ce..dfb612b6b2bf2f857344bc58d7cdd67bdfc0c7da 100644 (file)
--- a/c++/include/objmgr/util/autodef_options.hpp
+++ b/c++/include/objmgr/util/autodef_options.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJMGR_UTIL___AUTODEF_OPTIONS__HPP
  #define OBJMGR_UTIL___AUTODEF_OPTIONS__HPP
  
-/*  $Id: autodef_options.hpp 530276 2017-03-13 18:20:08Z bollin $
+/*  $Id: autodef_options.hpp 611612 2020-07-08 17:43:23Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -104,7 +104,8 @@ public:
          eCompleteGenome,
          ePartialSequence,
          ePartialGenome,
-        eSequence
+        eSequence,
+        eWholeGenomeShotgunSequence
      };
  
      typedef unsigned int TFeatureListType;
diff --git a/c++/include/objmgr/util/indexer.hpp b/c++/include/objmgr/util/indexer.hpp

index 2f45c496d1d186a25c970e93f963ada4742fab12..bcef906efe3338d9a354a679d2396beb789b00cb 100644 (file)
--- a/c++/include/objmgr/util/indexer.hpp
+++ b/c++/include/objmgr/util/indexer.hpp
@@ -58,6 +58,7 @@ class CGapIndex;
  class CDescriptorIndex;
  class CFeatureIndex;
  
+typedef void (*FAddSnpFunc)(CBioseq_Handle bsh, string& na_acc);
  
  // CSeqEntryIndex
  //
@@ -93,7 +94,8 @@ public:
          eInternal = 1,
          eExternal = 2,
          eExhaustive = 3,
-        eIncremental = 4
+        eFtp = 4,
+        eWeb = 5
      };
  
      enum EFlags {
@@ -116,21 +118,21 @@ public:
      // Constructors take the top-level sequence object
  
      // The primary constructor uses an existing CScope created by the application
-    CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
-    CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
+    CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy = eAdaptive, TFlags flags = fDefault);
+    CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy = eAdaptive, TFlags flags = fDefault);
  
      // Alternative constructors take an object and create a new local default CScope
-    CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
-    CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
-    CSeqEntryIndex (CBioseq& bioseq, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
-    CSeqEntryIndex (CSeq_submit& submit, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
+    CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy = eAdaptive, TFlags flags = fDefault);
+    CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy = eAdaptive, TFlags flags = fDefault);
+    CSeqEntryIndex (CBioseq& bioseq, EPolicy policy = eAdaptive, TFlags flags = fDefault);
+    CSeqEntryIndex (CSeq_submit& submit, EPolicy policy = eAdaptive, TFlags flags = fDefault);
  
      // Specialized constructors are for streaming through release files, one component at a time
  
      // Submit-block obtained from top of Seq-submit release file
-    CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
+    CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy = eAdaptive, TFlags flags = fDefault);
      // Seq-descr chain obtained from top of Bioseq-set release file
-    CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
+    CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy = eAdaptive, TFlags flags = fDefault);
  
  private:
      // Prohibit copy constructor & assignment operator
@@ -153,14 +155,8 @@ public:
      CRef<CBioseqIndex> GetBioseqIndex (CBioseq_Handle bsh);
      // Get Bioseq index by mapped feature
      CRef<CBioseqIndex> GetBioseqIndex (const CMappedFeat& mf);
-
-    // Subrange processing creates a new CBioseqIndex around a temporary delta Bioseq
-
      // Get Bioseq index by sublocation
      CRef<CBioseqIndex> GetBioseqIndex (const CSeq_loc& loc);
-    // Get Bioseq index by subrange
-    CRef<CBioseqIndex> GetBioseqIndex (const string& accn, int from, int to, bool rev_comp);
-    CRef<CBioseqIndex> GetBioseqIndex (int from, int to, bool rev_comp);
  
      // Seqset exploration iterator
      template<typename Fnc> size_t IterateSeqsets (Fnc m);
@@ -171,6 +167,18 @@ public:
  
      bool DistributedReferences(void);
  
+    void SetSnpFunc(FAddSnpFunc* snp);
+
+    FAddSnpFunc* GetSnpFunc(void);
+
+    void SetFeatDepth(int featDepth);
+
+    int GetFeatDepth(void);
+
+    void SetGapDepth(int gapDepth);
+
+    int GetGapDepth(void);
+
      // Check all Bioseqs for failure to fetch remote sequence components or feature annotation
      bool IsFetchFailure(void);
  
@@ -197,16 +205,16 @@ public:
  
  public:
      // Initializers take the top-level sequence object
-    void x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
-    void x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
+    void x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
+    void x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
  
-    void x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
-    void x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
-    void x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
-    void x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
+    void x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
+    void x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
+    void x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
+    void x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
  
-    void x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
-    void x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
+    void x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
+    void x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
  
  private:
      // Prohibit copy constructor & assignment operator
@@ -229,13 +237,8 @@ public:
      CRef<CBioseqIndex> GetBioseqIndex (string& str);
      // Get Bioseq index by feature
      CRef<CBioseqIndex> GetBioseqIndex (const CMappedFeat& mf);
-
-    // Subrange processing creates a new CBioseqIndex around a temporary delta Bioseq
      // Get Bioseq index by sublocation
      CRef<CBioseqIndex> GetBioseqIndex (const CSeq_loc& loc);
-    // Get Bioseq index by subrange
-    CRef<CBioseqIndex> GetBioseqIndex (const string& accn, int from, int to, bool rev_comp);
-    CRef<CBioseqIndex> GetBioseqIndex (int from, int to, bool rev_comp);
  
      // Seqset exploration iterator
      template<typename Fnc> size_t IterateSeqsets (Fnc m);
@@ -260,6 +263,18 @@ public:
  
      bool DistributedReferences (void) const { return m_DistributedReferences; }
  
+    void SetSnpFunc(FAddSnpFunc* snp);
+
+    FAddSnpFunc* GetSnpFunc(void);
+
+    void SetFeatDepth(int featDepth);
+
+    int GetFeatDepth(void);
+
+    void SetGapDepth(int gapDepth);
+
+    int GetGapDepth(void);
+
      // Check all Bioseqs for failure to fetch remote sequence components or remote feature annotation
      bool IsFetchFailure(void);
  
@@ -274,14 +289,6 @@ private:
      // Recursive exploration to populate vector of index objects for Bioseqs in Seq-entry
      void x_InitSeqs (const CSeq_entry& sep, CRef<CSeqsetIndex> prnt, int level = 0);
  
-    CRef<CSeq_id> x_MakeUniqueId(void);
-
-    // Create delta sequence referring to location, using temporary local ID
-    CRef<CBioseqIndex> x_DeltaIndex(const CSeq_loc& loc);
-
-    // Create location from range, to use in x_DeltaIndex
-    CConstRef<CSeq_loc> x_SubRangeLoc(const string& accn, int from, int to, bool rev_comp);
-
  private:
      CRef<CObjectManager> m_Objmgr;
      CRef<CScope> m_Scope;
@@ -294,7 +301,6 @@ private:
  
      CSeqEntryIndex::EPolicy m_Policy;
      CSeqEntryIndex::TFlags m_Flags;
-    int m_Depth;
  
      vector<CRef<CBioseqIndex>> m_BsxList;
  
@@ -313,6 +319,11 @@ private:
  
      bool m_DistributedReferences;
  
+    FAddSnpFunc* m_SnpFunc;
+
+    int m_FeatDepth;
+    int m_GapDepth;
+
      mutable CAtomicCounter m_Counter;
  
      bool m_IndexFailure;
@@ -391,9 +402,7 @@ public:
                    CRef<CScope> scope,
                    CSeqMasterIndex& idx,
                    CSeqEntryIndex::EPolicy policy,
-                  CSeqEntryIndex::TFlags flags,
-                  int depth,
-                  bool surrogate);
+                  CSeqEntryIndex::TFlags flags);
  
      // Destructor
      ~CBioseqIndex (void);
@@ -412,8 +421,7 @@ public:
  
      // Feature exploration iterator
      template<typename Fnc> size_t IterateFeatures (Fnc m);
-
-    template<typename Fnc> size_t IterateFeaturesByLoc (const CSeq_loc& slp, Fnc m);
+    template<typename Fnc> size_t IterateFeatures (CSeq_loc& slp, Fnc m);
  
      // Getters
      CBioseq_Handle GetBioseqHandle (void) const { return m_Bsh; }
@@ -464,12 +472,14 @@ public:
      CSeq_inst::TLength GetLength (void) const { return m_Length; }
  
      bool IsDelta (void) const { return m_IsDelta; }
+    bool IsDeltaLitOnly (void) const { return m_IsDeltaLitOnly; }
      bool IsVirtual (void) const { return m_IsVirtual; }
      bool IsMap (void) const { return m_IsMap; }
  
      // Seq-id fields
      const string& GetAccession (void) const { return m_Accession; }
  
+    bool IsRefSeq (void) const { return m_IsRefSeq; }
      bool IsNC (void) const { return m_IsNC; }
      bool IsNM (void) const { return m_IsNM; }
      bool IsNR (void) const { return m_IsNR; }
@@ -515,7 +525,7 @@ public:
  
      const string& GetCommon (void);
      const string& GetLineage (void);
-    int GetTaxid (void);
+    TTaxId GetTaxid (void);
      bool IsUsingAnamorph (void);
  
      CTempString GetGenus (void);
@@ -584,8 +594,12 @@ private:
  
      // Common feature collection, delayed until actually needed
      void x_InitFeats (void);
-    // Collect features by location
-    void x_InitFeatsByLoc (const CSeq_loc& slp);
+    void x_InitFeats (CSeq_loc& slp);
+
+    void x_DefaultSelector(SAnnotSelector& sel, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, bool onlyNear, CScope& scope);
+
+    // common implementation method
+    void x_InitFeats (CSeq_loc* slpp);
  
      // Set BioSource flags
      void x_InitSource (void);
@@ -625,7 +639,6 @@ private:
  
      CSeqEntryIndex::EPolicy m_Policy;
      CSeqEntryIndex::TFlags m_Flags;
-    int m_Depth;
  
      bool m_FetchFailure;
  
@@ -637,12 +650,14 @@ private:
      CSeq_inst::TLength m_Length;
  
      bool m_IsDelta;
+    bool m_IsDeltaLitOnly;
      bool m_IsVirtual;
      bool m_IsMap;
  
      // Seq-id fields
      string m_Accession;
  
+    bool m_IsRefSeq;
      bool m_IsNC;
      bool m_IsNM;
      bool m_IsNR;
@@ -690,7 +705,7 @@ private:
  
      string m_Common;
      string m_Lineage;
-    int m_Taxid;
+    TTaxId m_Taxid;
      bool m_UsingAnamorph;
  
      CTempString m_Genus;
@@ -757,9 +772,6 @@ private:
  
      // Map fields
      string m_rEnzyme;
-
-    // true if this index is for a temporary subrange delta Bioseq
-    bool m_Surrogate;
  };
  
  
@@ -855,6 +867,7 @@ public:
      // Constructor
      CFeatureIndex (CSeq_feat_Handle sfh,
                     const CMappedFeat mf,
+                   CConstRef<CSeq_loc> feat_loc,
                     CBioseqIndex& bsx);
  
  private:
@@ -1071,16 +1084,15 @@ size_t CBioseqIndex::IterateFeatures (Fnc m)
      return count;
  }
  
-// Visit CFeatureIndex objects for range of features
  template<typename Fnc>
  inline
-size_t CBioseqIndex::IterateFeaturesByLoc (const CSeq_loc& slp, Fnc m)
+size_t CBioseqIndex::IterateFeatures (CSeq_loc& slp, Fnc m)
  
  {
      int count = 0;
      try {
          // Delay feature collection until first request, but do not bail on m_FeatsInitialized flag
-        x_InitFeatsByLoc(slp);
+        x_InitFeats(slp);
  
          for (auto& sfx : m_SfxList) {
              count++;
@@ -1088,7 +1100,7 @@ size_t CBioseqIndex::IterateFeaturesByLoc (const CSeq_loc& slp, Fnc m)
          }
      }
      catch (CException& e) {
-        LOG_POST(Error << "Error in CBioseqIndex::IterateFeaturesByLoc: " << e.what());
+        LOG_POST(Error << "Error in CBioseqIndex::IterateFeatures: " << e.what());
      }
      return count;
  }
diff --git a/c++/include/objtools/blast/blastdb_format/blastdb_dataextract.hpp b/c++/include/objtools/blast/blastdb_format/blastdb_dataextract.hpp

index 4cf05b5ef819c24805f90853933075025635616a..e2991b7e4896c9ef5c0011802797e9ab20a883f2 100644 (file)
--- a/c++/include/objtools/blast/blastdb_format/blastdb_dataextract.hpp
+++ b/c++/include/objtools/blast/blastdb_format/blastdb_dataextract.hpp
@@ -1,4 +1,4 @@
-/*  $Id: blastdb_dataextract.hpp 591546 2019-08-16 16:59:06Z vasilche $
+/*  $Id: blastdb_dataextract.hpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -141,9 +141,9 @@ protected:
      /// Cache the defline (for membership bits)
      CRef<CBlast_def_line_set> m_Defline;
      /// Pair with a gi2taxid map for one Oid
-    pair<TOID, map<TGi, int> > m_Gi2TaxidMap;
+    pair<TOID, map<TGi, TTaxId> > m_Gi2TaxidMap;
      /// Pair with a gi2taxid-set map for one Oid
-    pair<TOID, map<TGi, set<int> > > m_Gi2TaxidSetMap;
+    pair<TOID, map<TGi, set<TTaxId> > > m_Gi2TaxidSetMap;
      /// Pair with a gi2accesion map for one Oid
      pair<TOID, map<TGi, string> > m_Gi2AccMap;
      /// Pair with a gi2title map for one Oid
@@ -156,8 +156,8 @@ protected:
      bool m_UseLongSeqIds;
  private:
      void x_ExtractMaskingData(CSeqDB::TSequenceRanges &ranges, int algo_id);
-    int x_ExtractTaxId();
-    void x_ExtractLeafTaxIds(set<int>& taxids);
+    TTaxId x_ExtractTaxId();
+    void x_ExtractLeafTaxIds(set<TTaxId>& taxids);
      /// Sets the map
      void x_SetGi2AccMap();
      /// Sets the map
diff --git a/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp b/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp

index 18793bdef2ea0db2da8f17761aa6a8af295a68d6..722319efe7c24276e671893510171b83333ea855 100644 (file)
--- a/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp
+++ b/c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_READERS_BLAST__SEQDB__SEQDB_LMDB_HPP
  #define OBJTOOLS_READERS_BLAST__SEQDB__SEQDB_LMDB_HPP
  
-/*  $Id: seqdb_lmdb.hpp 585739 2019-05-03 11:01:28Z fongah2 $
+/*  $Id: seqdb_lmdb.hpp 616351 2020-09-15 12:19:15Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -44,6 +44,7 @@ class NCBI_XOBJREAD_EXPORT CSeqDBLMDB : public CObject
  {
  public:
      CSeqDBLMDB(const string & fname);
+    virtual ~CSeqDBLMDB();
      CSeqDBLMDB& operator=(const CSeqDBLMDB&) = delete;
      CSeqDBLMDB(const CSeqDBLMDB&) = delete;
  
@@ -82,21 +83,21 @@ public:
      /// Get Oids for Tax Ids list, idenitcal Oids are merged.
      /// @param tax_ids  Input tax ids /Output tax ids found
      /// @param oids  Oids found for input tax ids
-    void GetOidsForTaxIds(const set<Int4> & tax_ids, vector<blastdb::TOid>& oids, vector<Int4> & tax_ids_found) const;
+    void GetOidsForTaxIds(const set<TTaxId> & tax_ids, vector<blastdb::TOid>& oids, vector<TTaxId> & tax_ids_found) const;
  
      /// Get Oids to exclude for Tax ids
      /// @parm ids Input tax ids to exclude /Output tax ids found
      /// @param rv Oids to exclude based on input tax id list
-    void NegativeTaxIdsToOids(const set<Int4>& ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const;
+    void NegativeTaxIdsToOids(const set<TTaxId>& ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const;
  
      /// Get All Unique Tax Ids for db
      /// @parma tax_ids  Return all unique tax ids found in db
-    void GetDBTaxIds(vector<Int4> & tax_ids) const;
+    void GetDBTaxIds(vector<TTaxId> & tax_ids) const;
  
      /// Get Tax Ids for oid list
      /// @param oids Input oid list
      /// @param tax_ids Output tax id list
-    void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const;
+    void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const;
  
  private:
      string  m_LMDBFile;
@@ -104,6 +105,7 @@ private:
      string  m_Oid2TaxIdsFile;
      string  m_TaxId2OidsFile;
      string  m_TaxId2OffsetsFile;
+    mutable bool m_LMDBFileOpened;
  };
  
  /// Build the canonical LMDB file name for BLAST databases
@@ -137,8 +139,8 @@ class NCBI_XOBJREAD_EXPORT CBlastLMDBManager
  public:
         static CBlastLMDBManager & GetInstance();
         lmdb::env & GetReadEnvVol(const string & fname, MDB_dbi & db_volname, MDB_dbi & db_volinfo);
-       lmdb::env & GetReadEnvAcc(const string & fname, MDB_dbi & db_acc);
-       lmdb::env & GetReadEnvTax(const string & fname, MDB_dbi & db_tax);
+       lmdb::env & GetReadEnvAcc(const string & fname, MDB_dbi & db_acc, bool* opened = 0);
+       lmdb::env & GetReadEnvTax(const string & fname, MDB_dbi & db_tax, bool* opened = 0);
         lmdb::env & GetWriteEnv(const string & fname, Uint8 map_size);
  
         void CloseEnv(const string & fname);
@@ -149,7 +151,7 @@ private:
         public:
                 CBlastEnv(const string & fname, ELMDBFileType file_type, bool read_only = true, Uint8 map_size =0);
                 lmdb::env & GetEnv() { return m_Env; }
-               const string & GetFilename () { return m_Filename; }
+               const string & GetFilename () const { return m_Filename; }
                 ~CBlastEnv();
                 unsigned int AddReference(){ m_Count++; return m_Count;}
                 unsigned int RemoveReference(){ m_Count--; return m_Count;}
@@ -162,17 +164,19 @@ private:
                 };
                 MDB_dbi GetDbi(EDbiType dbi_type);
                 void InitDbi(lmdb::env & env, ELMDBFileType file_type);
+               void SetMapSize(Uint8 map_size);
+               bool IsReadOnly() { return m_ReadOnly; }
+
         private:
                 string m_Filename;
                 ELMDBFileType m_FileType;
                 lmdb::env m_Env;
                 unsigned int m_Count;
                 bool m_ReadOnly;
-               Uint8 m_MapSize;
                 vector<MDB_dbi> m_dbis;
         };
  
-       CBlastEnv* GetBlastEnv(const string & fname, ELMDBFileType file_type);
+       CBlastEnv* GetBlastEnv(const string & fname, ELMDBFileType file_type, bool* opened = 0);
         CBlastLMDBManager(){}
         ~CBlastLMDBManager();
         friend class CSafeStatic_Allocator<CBlastLMDBManager>;
diff --git a/c++/include/objtools/blast/seqdb_reader/impl/seqdbtax.hpp b/c++/include/objtools/blast/seqdb_reader/impl/seqdbtax.hpp

index 72ffb794305a95bab9a920c76b53f48b94664a06..5b4f61b0d847a421ed26ea61375d9ee8aada6bf7 100644 (file)
--- a/c++/include/objtools/blast/seqdb_reader/impl/seqdbtax.hpp
+++ b/c++/include/objtools/blast/seqdb_reader/impl/seqdbtax.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_READERS_SEQDB__SEQDBTAX_HPP
  #define OBJTOOLS_READERS_SEQDB__SEQDBTAX_HPP
  
-/*  $Id: seqdbtax.hpp 553714 2017-12-20 18:36:44Z vakatov $
+/*  $Id: seqdbtax.hpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -69,7 +69,7 @@ public:
      /// @param locked
      ///   The lock holder object for this thread.
      /// @return true if the taxonomic id was found
-    static bool GetTaxNames(Int4 tax_id, SSeqDBTaxInfo  & info);
+    static bool GetTaxNames(TTaxId tax_id, SSeqDBTaxInfo  & info);
      
  
  };
diff --git a/c++/include/objtools/blast/seqdb_reader/seqdb.hpp b/c++/include/objtools/blast/seqdb_reader/seqdb.hpp

index 640ec543c98ec5d498770b90fc9fea09ae670fe5..1aeed62ab1d0341292c726dd07b9e7f487d9ea43 100644 (file)
--- a/c++/include/objtools/blast/seqdb_reader/seqdb.hpp
+++ b/c++/include/objtools/blast/seqdb_reader/seqdb.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_BLAST_SEQDB_READER___SEQDB__HPP
  #define OBJTOOLS_BLAST_SEQDB_READER___SEQDB__HPP
  
-/*  $Id: seqdb.hpp 605340 2020-04-09 16:06:43Z ivanov $
+/*  $Id: seqdb.hpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -532,7 +532,7 @@ public:
      ///
      /// This finds the leaf-node TAXIDS associated with a given OID and
      /// computes a mapping from GI to taxid.  This mapping is added to the
-    /// map<int,set<int>> provided by the user.  If the "persist" flag is
+    /// map<TGi,set<TTaxId>> provided by the user.  If the "persist" flag is
      /// set to true, the new associations will simply be added to the
      /// map.  If it is false (the default), the map will be cleared
      /// first.
@@ -545,7 +545,7 @@ public:
      ///   If false, the map will be cleared before adding new entries.
      void GetLeafTaxIDs(
              int                  oid,
-            map<TGi, set<int> >& gi_to_taxid_set,
+            map<TGi, set<TTaxId> >& gi_to_taxid_set,
              bool                 persist = false
      ) const;
  
@@ -567,7 +567,7 @@ public:
      ///   If false, the map will be cleared before adding new entries.
      void GetLeafTaxIDs(
              int           oid,
-            vector<int> & taxids,
+            vector<TTaxId> & taxids,
              bool          persist = false
      ) const;
  
@@ -587,7 +587,7 @@ public:
      /// @param persist
      ///   If false, the map will be cleared before adding new entries.
      void GetTaxIDs(int             oid,
-                   map<TGi, int> & gi_to_taxid,
+                   map<TGi, TTaxId> & gi_to_taxid,
                     bool            persist = false) const;
  
      /// Get taxids for an OID.
@@ -607,7 +607,7 @@ public:
      /// @param persist
      ///   If false, the map will be cleared before adding new entries.
      void GetTaxIDs(int           oid,
-                   vector<int> & taxids,
+                   vector<TTaxId> & taxids,
                     bool          persist = false) const;
  
      /// Get all tax ids for an oid
@@ -618,7 +618,7 @@ public:
      /// @param taxids
      ///   A returned a set of taxids.
      void GetAllTaxIDs(int           oid,
-                      set<int> & taxids) const;
+                      set<TTaxId> & taxids) const;
  
      /// Get a CBioseq for a sequence.
      ///
@@ -1213,7 +1213,7 @@ public:
      ///   An integer identifying the taxid to fetch.
      /// @param info
      ///   A structure containing taxonomic description strings.
-    static void GetTaxInfo(int taxid, SSeqDBTaxInfo & info);
+    static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info);
  
      /// Fetch data as a CSeq_data object.
      ///
@@ -1500,11 +1500,11 @@ public:
      /// Get Oid list for input tax ids
      /// @param tax_ids taxonomy ids, return only tax ids found in db
      // @param rv               oids corrpond to tax ids
-    void TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const;
+    void TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const;
  
      /// Get all unique tax ids from db
      /// @param tax_ids return taxonomy ids in db
-    void GetDBTaxIds(set<Int4> & tax_ids) const;
+    void GetDBTaxIds(set<TTaxId> & tax_ids) const;
  
  protected:
      /// Implementation details are hidden.  (See seqdbimpl.hpp).
diff --git a/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp b/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp

index aabb60c2dd0d3cae6f92d62b29000ef9b80b29ef..1b4ffe33ae94a2eb3f88bf61f4db10cea3a30d83 100644 (file)
--- a/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp
+++ b/c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_BLAST_SEQDB_READER___SEQDBCOMMON__HPP
  #define OBJTOOLS_BLAST_SEQDB_READER___SEQDBCOMMON__HPP
  
-/*  $Id: seqdbcommon.hpp 605335 2020-04-09 16:04:38Z ivanov $
+/*  $Id: seqdbcommon.hpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -214,7 +214,7 @@ public:
      };
  
      struct STaxIdsOids {
-       set<int> tax_ids;
+       set<TTaxId> tax_ids;
         vector<blastdb::TOid> oids;
      };
  
@@ -473,7 +473,7 @@ public:
      void GetPigList(vector<TPig>& pigs) const;
  
  
-    set<Int4> & GetTaxIdsList()
+    set<TTaxId> & GetTaxIdsList()
         {
         return m_TaxIdsOids.tax_ids;
         }
@@ -507,9 +507,9 @@ public:
          m_SisOids.push_back(si);
      }
  
-    void AddTaxIds(const set<int> & tax_ids)
+    void AddTaxIds(const set<TTaxId> & tax_ids)
      {
-       set<int> & tids = m_TaxIdsOids.tax_ids;
+       set<TTaxId> & tids = m_TaxIdsOids.tax_ids;
         tids.insert(tax_ids.begin(), tax_ids.end());
      }
  
@@ -1078,12 +1078,12 @@ public:
                 return m_ListInfo;
      }
  
-    void AddTaxIds(const set<int> & tax_ids)
+    void AddTaxIds(const set<TTaxId> & tax_ids)
      {
                 m_TaxIds.insert(tax_ids.begin(), tax_ids.end());
      }
  
-    set<Int4> & GetTaxIdsList()
+    set<TTaxId> & GetTaxIdsList()
      {
                 return m_TaxIds;
      }
@@ -1104,7 +1104,7 @@ protected:
  
      /// SeqIds to exclude from the SeqDB instance.
      vector<string> m_Sis;
-    set<Int4> m_TaxIds;
+    set<TTaxId> m_TaxIds;
  
  private:
      /// Prevent copy constructor.
@@ -1821,13 +1821,13 @@ typedef map< string, TSeqDBAliasFileVersions > TSeqDBAliasFileValues;
  struct SSeqDBTaxInfo {
      /// Default constructor
      /// @param t the taxonomy ID to set for this structure
-    SSeqDBTaxInfo(int t = 0)
+    SSeqDBTaxInfo(TTaxId t = ZERO_TAX_ID)
          : taxid(t)
      {
      }
  
      /// An identifier for this species or taxonomic group.
-    int taxid;
+    TTaxId taxid;
  
      /// Scientific name, such as "Aotus vociferans".
      string scientific_name;
diff --git a/c++/include/objtools/blast/seqdb_writer/taxid_set.hpp b/c++/include/objtools/blast/seqdb_writer/taxid_set.hpp

index 2e21d63e077cb0cd2189db60ca3bc574f2b9cbf1..5e4337c78ddb0335bde8550163d0e5489c86c964 100644 (file)
--- a/c++/include/objtools/blast/seqdb_writer/taxid_set.hpp
+++ b/c++/include/objtools/blast/seqdb_writer/taxid_set.hpp
@@ -1,4 +1,4 @@
-/*  $Id: taxid_set.hpp 208050 2010-10-13 15:48:11Z maning $
+/*  $Id: taxid_set.hpp 616350 2020-09-15 12:19:05Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -44,9 +44,9 @@ BEGIN_NCBI_SCOPE
  
  class NCBI_XOBJWRITE_EXPORT CTaxIdSet : public CObject {
  public:
-    static const int kTaxIdNotSet = 0;
+    static const TTaxId kTaxIdNotSet;
  
-    CTaxIdSet(int global_taxid = kTaxIdNotSet)
+    CTaxIdSet(TTaxId global_taxid = kTaxIdNotSet)
          : m_GlobalTaxId(global_taxid),
            m_Matched(true) {}
      
@@ -60,14 +60,14 @@ public:
      bool HasEverFixedId() const { return m_Matched; };
      
  private:
-    int                m_GlobalTaxId;
-    map< string, int > m_TaxIdMap;
-    bool               m_Matched;
+    TTaxId                m_GlobalTaxId;
+    map< string, TTaxId > m_TaxIdMap;
+    bool                  m_Matched;
  
      /// Selects the most suitable tax id for the input passed in, checking the
      /// global taxid first, then the mapping provided by an input file, and
      /// finally what's found in the defline argument
-    int x_SelectBestTaxid(const objects::CBlast_def_line & defline);
+    TTaxId x_SelectBestTaxid(const objects::CBlast_def_line & defline);
      
  };
  
diff --git a/c++/include/objtools/blast/seqdb_writer/writedb_lmdb.hpp b/c++/include/objtools/blast/seqdb_writer/writedb_lmdb.hpp

index 79913967a5dbabba19115f2f939a2432e21cb955..785c1ee12e0fae013d2066671344f1bb930f2ea4 100644 (file)
--- a/c++/include/objtools/blast/seqdb_writer/writedb_lmdb.hpp
+++ b/c++/include/objtools/blast/seqdb_writer/writedb_lmdb.hpp
@@ -48,6 +48,13 @@ USING_SCOPE(objects);
  
  BEGIN_NCBI_SCOPE
  
+#ifdef NCBI_OS_MSWIN
+#define DEFAULT_LMDB_MAP_SIZE 500000
+#define DEFAULT_TAXID_MAP_SIZE 500000
+#else
+#define DEFAULT_LMDB_MAP_SIZE 300000000000
+#define DEFAULT_TAXID_MAP_SIZE 100000000000
+#endif
  
  /// This class supports creation of a string accession to integer OID
  /// lmdb database
@@ -59,7 +66,7 @@ public:
  
      /// Constructor for LMDB write access
      /// @param dbname Database name
-    CWriteDB_LMDB(const string& dbname, Uint8 map_size = 300000000000, Uint8 capacity = 500000);
+    CWriteDB_LMDB(const string& dbname, Uint8 map_size = DEFAULT_LMDB_MAP_SIZE, Uint8 capacity = 500000);
  
      // Destructor
      ~CWriteDB_LMDB();
@@ -93,11 +100,14 @@ private:
      void x_InsertEntry(const CRef<CSeq_id> &seqid, const blastdb::TOid oid);
      void x_CreateOidToSeqidsLookupFile();
      void x_Resize();
+    void x_IncreaseEnvMapSize();
+    void x_IncreaseEnvMapSize(const vector<string> & vol_names, const vector<blastdb::TOid> & vol_num_oids);
  
      string m_Db;
      lmdb::env  &m_Env;
      Uint8 m_ListCapacity;
      unsigned int m_MaxEntryPerTxn;
+    size_t m_TotalIdsLength;
      struct SKeyValuePair {
         string id;
         blastdb::TOid oid;
@@ -130,7 +140,7 @@ public:
  
      /// Constructor for LMDB write access
      /// @param dbname Database name
-    CWriteDB_TaxID(const string& dbname, Uint8 map_size = 300000000000, Uint8 capacity = 500000);
+    CWriteDB_TaxID(const string& dbname, Uint8 map_size = DEFAULT_TAXID_MAP_SIZE, Uint8 capacity = 500000);
  
      // Destructor
      ~CWriteDB_TaxID();
@@ -143,13 +153,15 @@ public:
      /// @param tax_ids list for oid
      /// @return number of rows added to database
      /// @see InsertEntry
-    int InsertEntries(const set<Int4> & tax_ids, const blastdb::TOid oid);
+    int InsertEntries(const set<TTaxId> & tax_ids, const blastdb::TOid oid);
  
  private:
      void x_CommitTransaction();
      void x_CreateOidToTaxIdsLookupFile();
      void x_CreateTaxIdToOidsLookupFile();
      void x_Resize();
+    void x_IncreaseEnvMapSize();
+
  
      string m_Db;
      lmdb::env  &m_Env;
@@ -157,9 +169,9 @@ private:
      unsigned int m_MaxEntryPerTxn;
      template <class valueType>
      struct SKeyValuePair {
-       Int4 tax_id;
+        TTaxId tax_id;
         valueType value;
-       SKeyValuePair(int t, valueType v) : tax_id(t), value(v) {}
+       SKeyValuePair(TTaxId t, valueType v) : tax_id(t), value(v) {}
         static bool cmp_key(const SKeyValuePair & v, const SKeyValuePair & k) {
                         if(v.tax_id == k.tax_id) {
                                 return v.value < k.value;
diff --git a/c++/include/objtools/cleanup/cleanup.hpp b/c++/include/objtools/cleanup/cleanup.hpp

index 404a3d860b169e17007759248d678974affcf9c6..6d8bb69478ddee1d794188028753237e3a63af43 100644 (file)
--- a/c++/include/objtools/cleanup/cleanup.hpp
+++ b/c++/include/objtools/cleanup/cleanup.hpp
@@ -1,7 +1,7 @@
  #ifndef CLEANUP___CLEANUP__HPP
  #define CLEANUP___CLEANUP__HPP
  
-/*  $Id: cleanup.hpp 605251 2020-04-08 14:24:56Z ivanov $
+/*  $Id: cleanup.hpp 614966 2020-08-25 16:46:33Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -63,6 +63,7 @@ class CSeq_annot_Handle;
  class CSeq_feat_Handle;
  
  class CCleanupChange;
+class IObjtoolsListener;
  
  class NCBI_CLEANUP_EXPORT CCleanup : public CObject 
  {
@@ -414,7 +415,7 @@ public:
  /// Get labels for a pubdesc. To be used in citations.
      static void GetPubdescLabels
          (const CPubdesc& pd,
-        vector<int>& pmids, vector<int>& muids, vector<int>& serials,
+        vector<TEntrezId>& pmids, vector<TEntrezId>& muids, vector<int>& serials,
          vector<string>& published_labels, vector<string>& unpublished_labels);
  
  /// Get list of pubs that can be used for citations for Seq-feat on a Bioseq-handle
@@ -504,7 +505,11 @@ public:
  /// @param str  string from which to parse code break
  /// @param scope scope in which to find sequences referenced (used for location comparisons)
  /// @return bool indicates string was successfully parsed and code break was added
-    static bool ParseCodeBreak(const CSeq_feat& feat, CCdregion& cds, const string& str, CScope& scope);
+    static bool ParseCodeBreak(const CSeq_feat& feat, 
+            CCdregion& cds, 
+            const CTempString& str, 
+            CScope& scope, 
+            IObjtoolsListener* pMessageListener=nullptr);
  
  /// Parses all valid transl_except Gb-quals into code-breaks for cdregion,
  /// then removes the transl_except Gb-quals that were successfully parsed
diff --git a/c++/include/objtools/cleanup/cleanup_message.hpp b/c++/include/objtools/cleanup/cleanup_message.hpp

new file mode 100644 (file)

index 0000000..fd49384
--- /dev/null
+++ b/c++/include/objtools/cleanup/cleanup_message.hpp
@@ -0,0 +1,71 @@
+#ifndef _CLEANUP_MESSAGE_HPP_
+#define _CLEANUP_MESSAGE_HPP_
+
+/*  $Id: cleanup_message.hpp 608332 2020-05-14 16:04:14Z ivanov $
+ * ===========================================================================
+ *
+ *                            PUBLIC DOMAIN NOTICE
+ *               National Center for Biotechnology Information
+ *
+ *  This software/database is a "United States Government Work" under the
+ *  terms of the United States Copyright Act.  It was written as part of
+ *  the author's official duties as a United States Government employee and
+ *  thus cannot be copyrighted.  This software/database is freely available
+ *  to the public for use. The National Library of Medicine and the U.S.
+ *  Government have not placed any restriction on its use or reproduction.
+ *
+ *  Although all reasonable efforts have been taken to ensure the accuracy
+ *  and reliability of the software and data, the NLM and the U.S.
+ *  Government do not and cannot warrant the performance or results that
+ *  may be obtained by using this software or data. The NLM and the U.S.
+ *  Government disclaim all warranties, express or implied, including
+ *  warranties of performance, merchantability or fitness for any particular
+ *  purpose.
+ *
+ *  Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author:  Justin Foley
+ *
+ * File Description:
+ *   .......
+ *
+ */
+
+#include <objtools/logging/message.hpp>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(objects)
+
+class NCBI_CLEANUP_EXPORT CCleanupMessage : public CObjtoolsMessage
+{
+public:
+    enum class ECode {
+        eCodeBreak
+    };
+
+    enum class ESubcode {
+        eBadLocation,
+        eParseError
+    };
+
+    CCleanupMessage(string text, EDiagSev sev, ECode code, ESubcode subcode);
+
+    CCleanupMessage *Clone(void) const override;
+
+    int GetCode(void) const override {
+        return static_cast<int>(m_Code);
+    }
+    int GetSubCode(void) const override {
+        return static_cast<int>(m_Subcode);
+    }
+private:
+    ECode m_Code;
+    ESubcode m_Subcode;
+};
+
+END_SCOPE(objects)
+END_NCBI_SCOPE    
+
+#endif // _CLEANUP_MESSAGE_HPP_
diff --git a/c++/include/objtools/data_loaders/blastdb/bdbloader.hpp b/c++/include/objtools/data_loaders/blastdb/bdbloader.hpp

index 5b7b34e7cafc313fda5da08c94b1eae13e99bebb..546149e13e70ee3a262ef703819eec198ce9a87f 100644 (file)
--- a/c++/include/objtools/data_loaders/blastdb/bdbloader.hpp
+++ b/c++/include/objtools/data_loaders/blastdb/bdbloader.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP
  #define OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP
  
-/*  $Id: bdbloader.hpp 368048 2012-07-02 13:25:25Z camacho $
+/*  $Id: bdbloader.hpp 612733 2020-07-27 11:38:27Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -39,6 +39,7 @@
  #include <objmgr/data_loader.hpp>
  #include <objtools/data_loaders/blastdb/blastdb_adapter.hpp>
  #include <objects/seqset/Seq_entry.hpp>
+#include <util/limited_size_map.hpp>
  
  BEGIN_NCBI_SCOPE
  BEGIN_SCOPE(objects)
@@ -151,7 +152,7 @@ public:
      virtual TTSE_Lock GetBlobById(const TBlobId& blob_id);
      
      /// A mapping from sequence identifier to blob ids.
-    typedef map< CSeq_id_Handle, int > TIdMap;
+    typedef limited_size_map<CSeq_id_Handle, int> TIdMap;
  
      /// @note this is added to temporarily comply with the toolkit's stable
      /// components rule of having backwards compatible APIs
diff --git a/c++/include/objtools/edit/feattable_edit.hpp b/c++/include/objtools/edit/feattable_edit.hpp

index d770c4d27b135e5a564b38653a2c05b33dc80a8c..ddce16cfb7b7800a08efb63dc1d1df244d8c62cb 100644 (file)
--- a/c++/include/objtools/edit/feattable_edit.hpp
+++ b/c++/include/objtools/edit/feattable_edit.hpp
@@ -1,4 +1,4 @@
-/*  $Id: feattable_edit.hpp 593415 2019-09-18 14:52:52Z ludwigf $
+/*  $Id: feattable_edit.hpp 612521 2020-07-23 11:23:16Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -76,6 +76,7 @@ public:
          bool forEukaryote);
      void GenerateMissingParentFeaturesForEukaryote();
      void GenerateMissingParentFeaturesForProkaryote();
+    void ProcessCodonRecognized();
      unsigned int PendingLocusTagNumber() const {
          return mLocusTagNumber;
      }
@@ -113,6 +114,8 @@ protected:
          const CMappedFeat& mrna);
      void xPutErrorDifferingProteinIds(
          const CMappedFeat& mrna);
+    void xPutErrorBadCodonRecognized(
+        const string codonRecognized);
  
  
      void xFeatureAddQualifier(
diff --git a/c++/include/objtools/edit/remote_updater.hpp b/c++/include/objtools/edit/remote_updater.hpp

index ffffb7e88452e244584f1fdfe0040305b9ad094d..83df4ed7a6617274fb8b8bba96dacb597fa2515b 100755 (executable)
--- a/c++/include/objtools/edit/remote_updater.hpp
+++ b/c++/include/objtools/edit/remote_updater.hpp
@@ -1,6 +1,41 @@
+/*  $Id: remote_updater.hpp 614636 2020-08-20 13:02:57Z fukanchi $
+* ===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE
+*               National Center for Biotechnology Information
+*
+*  This software/database is a "United States Government Work" under the
+*  terms of the United States Copyright Act.  It was written as part of
+*  the author's official duties as a United States Government employee and
+*  thus cannot be copyrighted.  This software/database is freely available
+*  to the public for use. The National Library of Medicine and the U.S.
+*  Government have not placed any restriction on its use or reproduction.
+*
+*  Although all reasonable efforts have been taken to ensure the accuracy
+*  and reliability of the software and data, the NLM and the U.S.
+*  Government do not and cannot warrant the performance or results that
+*  may be obtained by using this software or data. The NLM and the U.S.
+*  Government disclaim all warranties, express or implied, including
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.
+*
+*  Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+* Authors:  Sergiy Gotvyanskyy, NCBI
+*           Colleen Bolin, NCBI
+*
+* File Description:
+*   Front-end class for making remote request to MLA and taxon
+* 
+* ===========================================================================
+*/
+
  #ifndef __REMOTE_UPDATER_HPP_INCLUDED__
  #define __REMOTE_UPDATER_HPP_INCLUDED__
  
+#include <corelib/ncbimisc.hpp>
  #include<functional>
  
  BEGIN_NCBI_SCOPE
@@ -16,6 +51,8 @@ class CSeq_descr;
  class COrg_ref;
  class CMLAClient;
  class CAuth_list;
+class IObjtoolsListener;
+class CPub;
  
  BEGIN_SCOPE(edit)
  
@@ -27,35 +64,51 @@ public:
  
     using FLogger = function<void(const string&)>;
  
+   // With this constructor, an exception is thrown 
+   // if the updater cannot retrieve a publication for a PMID.
     CRemoteUpdater(bool enable_caching = true);
+   // With this constructor, failure to retrieve 
+   // a publication for a PMID is logged with the supplied message listener.
+   // If no message listener is supplied, an exception is thrown.
+   CRemoteUpdater(IObjtoolsListener* pMessageListener);
     ~CRemoteUpdater();
  
     void UpdatePubReferences(CSerialObject& obj);
     void UpdatePubReferences(CSeq_entry_EditHandle& obj);
+   void SetMaxMlaAttempts(int max);
  
-   void UpdateOrgFromTaxon(FLogger f_logger, CSeq_entry& entry);
+   NCBI_DEPRECATED void UpdateOrgFromTaxon(FLogger /*f_logger*/, CSeq_entry& entry);
     void UpdateOrgFromTaxon(FLogger f_logger, CSeq_entry_EditHandle& obj);
-   void UpdateOrgFromTaxon(FLogger f_logger, CSeqdesc& obj);
+   NCBI_DEPRECATED void UpdateOrgFromTaxon(FLogger f_logger, CSeqdesc& obj);
+
+   void UpdateOrgFromTaxon(CSeq_entry& entry);
+   void UpdateOrgFromTaxon(CSeqdesc& desc);
+
+
     void ClearCache();
     static void ConvertToStandardAuthors(CAuth_list& auth_list);
     static void PostProcessPubs(CSeq_entry_EditHandle& obj);
     static void PostProcessPubs(CSeq_entry& obj);
     static void PostProcessPubs(CPubdesc& pubdesc);
  
+   void SetMLAClient(CMLAClient& mlaClient);
     // Use either shared singleton or individual instances
     static CRemoteUpdater& GetInstance();
  
  private:
     void xUpdatePubReferences(CSeq_entry& entry);
     void xUpdatePubReferences(CSeq_descr& descr);
-   void xUpdateOrgTaxname(FLogger f_logger, COrg_ref& org);
-
+   void xUpdateOrgTaxname(FLogger f_logger, COrg_ref& org); 
+   void xUpdateOrgTaxname(COrg_ref& org);
+   bool xUpdatePubPMID(list<CRef<CPub>>& pubs, TEntrezId id);
  
+   IObjtoolsListener* m_pMessageListener=nullptr;
     CRef<CMLAClient>  m_mlaClient;
     auto_ptr<CCachedTaxon3_impl>  m_taxClient;
-   bool m_enable_caching;
+   bool m_enable_caching=true;
     CMutex m_Mutex;
     DECLARE_CLASS_STATIC_MUTEX(m_static_mutex);
+   int m_MaxMlaAttempts=3;
  };
  
  END_SCOPE(edit)
diff --git a/c++/include/objtools/format/flat_file_config.hpp b/c++/include/objtools/format/flat_file_config.hpp

index 0cf3bb217df986dc639f10bc772ef071253383f2..712ce9bd16c984e78bde248f386203c3d6504db7 100644 (file)
--- a/c++/include/objtools/format/flat_file_config.hpp
+++ b/c++/include/objtools/format/flat_file_config.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_FORMAT___FLAT_FILE_CONFIG__HPP
  #define OBJTOOLS_FORMAT___FLAT_FILE_CONFIG__HPP
  
-/*  $Id: flat_file_config.hpp 607400 2020-05-04 14:18:10Z ivanov $
+/*  $Id: flat_file_config.hpp 614736 2020-08-21 13:43:48Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -89,7 +89,7 @@ public:
      virtual void FormatTranscriptId(string& str, const CSeq_id& seq_id, const string& nuc_id) const = 0;
      virtual void FormatNucSearch(CNcbiOstream& os, const string& id) const = 0;
      virtual void FormatNucId(string& str, const CSeq_id& seq_id, TIntId gi, const string& acc_id) const = 0;
-    virtual void FormatTaxid(string& str, const int taxid, const string& taxname) const = 0;
+    virtual void FormatTaxid(string& str, const TTaxId taxid, const string& taxname) const = 0;
      virtual void FormatLocation(string& str, const CSeq_loc& loc, TIntId gi, const string& visible_text) const = 0;
      virtual void FormatModelEvidence(string& str, const SModelEvidance& me) const = 0;
      virtual void FormatTranscript(string& str, const string& name) const = 0;
@@ -107,7 +107,7 @@ public:
      void FormatTranscriptId(string& str, const CSeq_id& seq_id, const string& nuc_id) const;
      void FormatNucSearch(CNcbiOstream& os, const string& id) const;
      void FormatNucId(string& str, const CSeq_id& seq_id, TIntId gi, const string& acc_id) const;
-    void FormatTaxid(string& str, const int taxid, const string& taxname) const;
+    void FormatTaxid(string& str, const TTaxId taxid, const string& taxname) const;
      void FormatLocation(string& str, const CSeq_loc& loc, TIntId gi, const string& visible_text) const;
      void FormatModelEvidence(string& str, const SModelEvidance& me) const;
      void FormatTranscript(string& str, const string& name) const;
@@ -200,7 +200,8 @@ public:
          fIgnoreExistingTitle   = 1 << 13,
          fGeneRNACDSFeatures    = 1 << 14,
          fShowFtablePeptides    = 1 << 15,
-        fDisableReferenceCache = 1 << 16
+        fDisableReferenceCache = 1 << 16,
+        fShowDeflineModifiers  = 1 << 17
      };
  
      enum EView {
@@ -216,7 +217,9 @@ public:
          ePolicy_Adaptive = 0,
          ePolicy_Internal,
          ePolicy_External,
-        ePolicy_Exhaustive
+        ePolicy_Exhaustive,
+        ePolicy_Ftp,
+        ePolicy_Web
      };
  
      // These flags are used to select the GenBank sections to print or skip.
@@ -390,7 +393,8 @@ public:
                      TStyle  style = eStyle_Normal,
                      TFlags  flags = 0,
                      TView   view = fViewNucleotides,
-                    TPolicy policy = ePolicy_Adaptive);
+                    TPolicy policy = ePolicy_Adaptive,
+                    TCustom custom = 0);
  
      // destructor
      ~CFlatFileConfig(void);
@@ -494,12 +498,16 @@ public:
      bool IsPolicyInternal(void) const { return m_Policy == ePolicy_Internal; }
      bool IsPolicyExternal (void) const { return m_Policy == ePolicy_External;  }
      bool IsPolicyExhaustive (void) const { return m_Policy == ePolicy_Exhaustive;  }
+    bool IsPolicyFtp (void) const { return m_Policy == ePolicy_Ftp;  }
+    bool IsPolicyWeb (void) const { return m_Policy == ePolicy_Web;  }
      // setters
      void SetPolicy(const TPolicy& Policy) { m_Policy = Policy;  }
      void SetPolicyAdaptive (void) { m_Policy = ePolicy_Adaptive;  }
      void SetPolicyInternal(void) { m_Policy = ePolicy_Internal; }
      void SetPolicyExternal (void) { m_Policy = ePolicy_External;  }
      void SetPolicyExhaustive (void) { m_Policy = ePolicy_Exhaustive;  }
+    void SetPolicyFtp (void) { m_Policy = ePolicy_Ftp;  }
+    void SetPolicyWeb (void) { m_Policy = ePolicy_Web;  }
  
      // -- Flags
      // getters
@@ -617,6 +625,7 @@ public:
      bool GeneRNACDSFeatures    (void) const;
      bool ShowFtablePeptides    (void) const;
      bool DisableReferenceCache (void) const;
+    bool ShowDeflineModifiers  (void) const;
  
      // setters
      void SetCustom(const TCustom& custom) { m_Custom = custom; }
@@ -633,10 +642,17 @@ public:
      CFlatFileConfig& SetGeneRNACDSFeatures    (bool val = true);
      CFlatFileConfig& SetShowFtablePeptides    (bool val = true);
      CFlatFileConfig& SetDisableReferenceCache (bool val = true);
+    CFlatFileConfig& SetShowDeflineModifiers  (bool val = true);
  
      // adjust mode dependant flags for RefSeq
      void SetRefSeqConventions(void);
  
+  int GetFeatDepth(void) const { return m_FeatDepth; }
+  void SetFeatDepth(const int featDepth) { m_FeatDepth = featDepth; }
+
+  int GetGapDepth(void) const { return m_GapDepth; }
+  void SetGapDepth(const int gapDepth) { m_GapDepth = gapDepth; }
+
  
      void SetGenbankBlocks(const TGenbankBlocks& genbank_blocks) 
      {
@@ -723,6 +739,8 @@ private:
      const ICanceled * m_pCanceledCallback; // instance does NOT own it
      bool        m_BasicCleanup;
      TCustom     m_Custom;
+    int         m_FeatDepth;
+    int         m_GapDepth;
  #ifdef NEW_HTML_FMT
      CRef<IHTMLFormatter> m_html_formatter;
  #endif
@@ -823,6 +841,7 @@ CUSTOM_ARG_IMP(IgnoreExistingTitle)
  CUSTOM_ARG_IMP(GeneRNACDSFeatures)
  CUSTOM_ARG_IMP(ShowFtablePeptides)
  CUSTOM_ARG_IMP(DisableReferenceCache)
+CUSTOM_ARG_IMP(ShowDeflineModifiers)
  
  #undef FLAG_ARG_IMP
  #undef FLAG_ARG_GET
diff --git a/c++/include/objtools/format/flat_file_generator.hpp b/c++/include/objtools/format/flat_file_generator.hpp

index 722a6f363f8cc6d4114027aef47dad4421347148..ec1edb465be14340f256ac6a31de8f552004427e 100644 (file)
--- a/c++/include/objtools/format/flat_file_generator.hpp
+++ b/c++/include/objtools/format/flat_file_generator.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_FORMAT___FLAT_FILE_GENERATOR__HPP
  #define OBJTOOLS_FORMAT___FLAT_FILE_GENERATOR__HPP
  
-/*  $Id: flat_file_generator.hpp 604097 2020-03-23 12:19:07Z ivanov $
+/*  $Id: flat_file_generator.hpp 613149 2020-08-03 15:02:23Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -69,7 +69,8 @@ public:
          CFlatFileConfig::TStyle    style  = CFlatFileConfig::eStyle_Normal,
          CFlatFileConfig::TFlags    flags  = 0,
          CFlatFileConfig::TView     view   = CFlatFileConfig::fViewNucleotides,
-        CFlatFileConfig::TCustom   custom = 0);
+        CFlatFileConfig::TCustom   custom = 0,
+        CFlatFileConfig::TPolicy   policy = CFlatFileConfig::ePolicy_Adaptive);
      
      // destructor
      ~CFlatFileGenerator(void);
@@ -101,6 +102,7 @@ public:
  
      // Versions that loop through Bioseq components
      void Generate(const CSeq_entry_Handle& entry, CNcbiOstream& os, bool useSeqEntryIndexing);
+    void Generate(const CBioseq_Handle& bsh, CNcbiOstream& os, bool useSeqEntryIndexing);
      void Generate(const CSeq_entry_Handle& entry, CFlatItemOStream& item_os, bool useSeqEntryIndexing);
      void Generate(const CSeq_loc& loc, CScope& scope, CNcbiOstream& os, bool useSeqEntryIndexing);
  
diff --git a/c++/include/objtools/format/gather_items.hpp b/c++/include/objtools/format/gather_items.hpp

index 271782d0dea90ed0bc5dad7b88e678680f568fe5..425d2e93249b1865580b774166cf1d6dc5fcb749 100644 (file)
--- a/c++/include/objtools/format/gather_items.hpp
+++ b/c++/include/objtools/format/gather_items.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_FORMAT___GATHER_ITEMS__HPP
  #define OBJTOOLS_FORMAT___GATHER_ITEMS__HPP
  
-/*  $Id: gather_items.hpp 607397 2020-05-04 14:17:25Z ivanov $
+/*  $Id: gather_items.hpp 610489 2020-06-18 14:52:27Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -123,11 +123,12 @@ protected:
      void x_GatherCDSReferences(TReferences& refs) const;
  
      // features
-    void x_GatherFeatures  (void) const;
+    void x_GatherFeatures (void) const;
+    void x_GatherFeaturesIdx(void) const;
      void x_GetFeatsOnCdsProduct(const CSeq_feat& feat, CBioseqContext& ctx,
          CRef<CSeq_loc_Mapper> slice_mapper,
          CConstRef<CFeatureItem> cdsFeatureItem = CConstRef<CFeatureItem>() ) const;
-    void x_GetFeatsOnCdsProductIdx(CMappedFeat mf,const CSeq_feat& feat, CBioseqContext& ctx,
+    void x_GetFeatsOnCdsProductIdx(const CSeq_feat& feat, CBioseqContext& ctx,
          CRef<CSeq_loc_Mapper> slice_mapper,
          CConstRef<CFeatureItem> cdsFeatureItem = CConstRef<CFeatureItem>() ) const;
      static void x_GiveOneResidueIntervalsBogusFuzz(CSeq_loc & loc);
@@ -142,8 +143,6 @@ protected:
          CBioseqContext& ctx) const;
      void x_GatherFeaturesOnRangeIdx(const CSeq_loc& loc, SAnnotSelector& sel,
          CBioseqContext& ctx) const;
-    size_t x_GatherFeaturesOnSegmentIdx(const CSeq_loc& loc, SAnnotSelector& sel,
-        CBioseqContext& ctx) const;
      void x_GatherFeaturesOnRange(const CSeq_loc& loc, SAnnotSelector& sel,
          CBioseqContext& ctx) const;
  
diff --git a/c++/include/objtools/format/items/feature_item.hpp b/c++/include/objtools/format/items/feature_item.hpp

index 5bf52e3e4cf6502c1f8470bf87298c268792bfbc..e2f51a8b300f4da1f6d5fab3c0d1e68d99e811b8 100644 (file)
--- a/c++/include/objtools/format/items/feature_item.hpp
+++ b/c++/include/objtools/format/items/feature_item.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_FORMAT_ITEMS___FLAT_FEATURE__HPP
  #define OBJTOOLS_FORMAT_ITEMS___FLAT_FEATURE__HPP
  
-/*  $Id: feature_item.hpp 604098 2020-03-23 12:19:42Z ivanov $
+/*  $Id: feature_item.hpp 615031 2020-08-26 13:38:14Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -240,6 +240,8 @@ protected:
      void x_AddQualsRegion( CBioseqContext& );
      void x_AddQualsProt( CBioseqContext&, bool );
      void x_AddQualsPsecStr( CBioseqContext& );
+    void x_AddQualsNonStd(CBioseqContext& ctx );
+
      void x_AddQualsHet( CBioseqContext& ctx );
      void x_AddQualsVariation( CBioseqContext& ctx );
  
@@ -277,6 +279,7 @@ protected:
      void x_AddFTableSiteQuals(const CSeqFeatData::TSite& site);
      void x_AddFTablePsecStrQuals(const CSeqFeatData::TPsec_str& psec_str);
      void x_AddFTablePsecStrQuals(const CSeqFeatData::THet& het);
+    void x_AddFTableNonStdQuals(const CSeqFeatData::TNon_std_residue& res);
      void x_AddFTableBiosrcQuals(const CBioSource& src);
      void x_AddFTableDbxref(const CSeq_feat::TDbxref& dbxref);
      void x_AddFTableExtQuals(const CSeq_feat::TExt& ext);
diff --git a/c++/include/objtools/format/items/flat_qual_slots.hpp b/c++/include/objtools/format/items/flat_qual_slots.hpp

index f00aa58a4185404945a430b6e614038d580f1b1e..c0ad4348f1340aae12aaefd8355cb95257b308ae 100644 (file)
--- a/c++/include/objtools/format/items/flat_qual_slots.hpp
+++ b/c++/include/objtools/format/items/flat_qual_slots.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_FLAT___FLAT_QUAL_SLOTS__HPP
  #define OBJTOOLS_FLAT___FLAT_QUAL_SLOTS__HPP
  
-/*  $Id: flat_qual_slots.hpp 564513 2018-05-29 17:40:10Z kans $
+/*  $Id: flat_qual_slots.hpp 613781 2020-08-12 16:42:43Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -107,6 +107,7 @@ enum EFeatureQualifier {
      eFQ_mol_wt,
      eFQ_ncRNA_class,
      eFQ_nomenclature,
+    eFQ_non_std_residue,
      eFQ_number,
      eFQ_old_locus_tag,
      eFQ_operon,
diff --git a/c++/include/objtools/format/items/reference_item.hpp b/c++/include/objtools/format/items/reference_item.hpp

index 1ed743b5f70ab00a7fb9936462a4d60000138a13..d0a42df3e35db9d26830e5074bbc83d010b75d20 100644 (file)
--- a/c++/include/objtools/format/items/reference_item.hpp
+++ b/c++/include/objtools/format/items/reference_item.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_FORMAT_ITEMS___REFERENCE_ITEM__HPP
  #define OBJTOOLS_FORMAT_ITEMS___REFERENCE_ITEM__HPP
  
-/*  $Id: reference_item.hpp 602636 2020-02-27 20:27:11Z kans $
+/*  $Id: reference_item.hpp 614619 2020-08-20 13:00:42Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -157,8 +157,8 @@ public:
      bool              IsSetJournal (void) const { return m_Journal.NotEmpty(); }
      const CCit_jour&  GetJournal   (void) const { return *m_Journal;           }
  
-    int               GetPMID      (void) const { return m_PMID;               }
-    int               GetMUID      (void) const { return m_MUID;               }
+    TEntrezId         GetPMID      (void) const { return m_PMID;               }
+    TEntrezId         GetMUID      (void) const { return m_MUID;               }
      const string&     GetDOI       (void) const { return m_DOI;                }
      const string&     GetPII       (void) const { return m_ELocationPII;       }
      const string&     GetOldPII    (void) const { return m_OldPII;             }
@@ -236,8 +236,8 @@ private:
      CConstRef<CCit_jour>  m_Journal;
      CConstRef<CSeq_loc>   m_Loc;
      CConstRef<CDate>      m_Date;
-    int                   m_PMID;
-    int                   m_MUID;
+    TEntrezId             m_PMID;
+    TEntrezId             m_MUID;
      string                m_DOI;
      string                m_ELocationPII;
      string                m_OldPII;
@@ -259,7 +259,7 @@ inline
  const string& CReferenceItem::GetUniqueStr(void) const
  {
      // supress creation if other identifiers exist.
-    if (m_MUID == 0  &&  m_PMID == 0) {
+    if (m_MUID == ZERO_ENTREZ_ID  &&  m_PMID == ZERO_ENTREZ_ID) {
          x_CreateUniqueStr();
      }
      return m_UniqueStr;
diff --git a/c++/include/objtools/logging/listener.hpp b/c++/include/objtools/logging/listener.hpp

index ea45c1f57f2135d8367962218c345b4ffc37d09b..ae50a4af74ec668b621b9190fff8880ffd232f10 100644 (file)
--- a/c++/include/objtools/logging/listener.hpp
+++ b/c++/include/objtools/logging/listener.hpp
@@ -1,5 +1,5 @@
  
-/*  $Id: listener.hpp 600659 2020-01-24 15:26:41Z foleyjp $
+/*  $Id: listener.hpp 608330 2020-05-14 16:03:45Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -34,7 +34,7 @@
  #ifndef _OBJTOOLS_LISTENER_HPP_
  #define _OBJTOOLS_LISTENER_HPP_
  
-#include <corelib/ncbi_message.hpp>
+#include <corelib/ncbistd.hpp>
  #include <objtools/logging/message.hpp>
  
  BEGIN_NCBI_SCOPE
diff --git a/c++/include/objtools/logging/message.hpp b/c++/include/objtools/logging/message.hpp

index 9bcd8e62dab242cab5d6b60b2674bf86c6eed289..f0fd15da8ef7802c4974ca022a2d2efc03d9f908 100644 (file)
--- a/c++/include/objtools/logging/message.hpp
+++ b/c++/include/objtools/logging/message.hpp
@@ -1,4 +1,4 @@
-/*  $Id: message.hpp 599046 2019-12-18 18:34:26Z ludwigf $
+/*  $Id: message.hpp 608330 2020-05-14 16:03:45Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -33,23 +33,32 @@
  #ifndef _OBJTOOLS_MESSAGE_HPP_
  #define _OBJTOOLS_MESSAGE_HPP_
  
-#include <corelib/ncbi_message.hpp>
+#include <corelib/ncbistd.hpp>
  
  BEGIN_NCBI_SCOPE
  BEGIN_SCOPE(objects) 
  
  //  ============================================================================
-class NCBI_XOBJUTIL_EXPORT IObjtoolsMessage : public IMessage
+class NCBI_XOBJUTIL_EXPORT IObjtoolsMessage 
  //  ============================================================================
  {
  public:
      virtual ~IObjtoolsMessage(void) = default;
  
+    virtual IObjtoolsMessage *Clone(void) const = 0;
+
+    virtual void Write(CNcbiOstream& out) const = 0;
+
      virtual void Dump(CNcbiOstream& out) const = 0;
  
      virtual void WriteAsXML(CNcbiOstream& out) const = 0;
  
      virtual void DumpAsXML(CNcbiOstream& out) const = 0;
+
+    virtual string GetText(void) const = 0;
+    virtual EDiagSev GetSeverity(void) const = 0;
+    virtual int GetCode(void) const = 0;
+    virtual int GetSubCode(void) const = 0;
  };
  
  
@@ -63,7 +72,7 @@ public:
  
      virtual CObjtoolsMessage *Clone(void) const;
  
-    virtual string Compose(void) const;
+    NCBI_DEPRECATED virtual string Compose(void) const;
  
      virtual void Write(CNcbiOstream& out) const;
  
diff --git a/c++/include/objtools/pubseq_gateway/client/psg_client.hpp b/c++/include/objtools/pubseq_gateway/client/psg_client.hpp

index bed7e4214a08995d31b331ce12dc7e176ae6b080..82dbd2acde064a5045ab4a19d0149f3cea02a4cd 100644 (file)
--- a/c++/include/objtools/pubseq_gateway/client/psg_client.hpp
+++ b/c++/include/objtools/pubseq_gateway/client/psg_client.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP
  #define OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP
  
-/*  $Id: psg_client.hpp 599706 2020-01-06 18:04:04Z sadyrovr $
+/*  $Id: psg_client.hpp 612393 2020-07-21 13:51:24Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -87,6 +87,9 @@ public:
      // Get request ID
      string GetId() const { return x_GetId(); }
  
+    /// Set hops
+    void SetHops(unsigned hops) { m_Hops = hops; }
+
  protected:
      CPSG_Request(shared_ptr<void> user_context = {},
                   CRef<CRequestContext> request_context = {})
@@ -105,6 +108,7 @@ private:
  
      shared_ptr<void> m_UserContext;
      CRef<CRequestContext> m_RequestContext;
+    unsigned m_Hops = 0;
  
      friend class CPSG_Queue;
  };
@@ -858,6 +862,20 @@ public:
      /// Check whether the queue was stopped/reset and is now empty.
      bool IsEmpty() const;
  
+
+    /// Check whether the queue has been initialized.
+    bool IsInitialized() const { return static_cast<bool>(m_Impl); }
+
+
+    /// Get an API lock.
+    /// Holding this API lock is essential if numerous short-lived queue instances are used.
+    /// It prevents an internal I/O implementation (threads, TCP connections, HTTP sessions, etc)
+    /// from being destroyed (on destroying last remaining queue instance)
+    /// and then re-created (with new queue instance).
+    using TApiLock = shared_ptr<void>;
+    static TApiLock GetApiLock();
+
+
      CPSG_Queue(CPSG_Queue&&);
      CPSG_Queue& operator=(CPSG_Queue&&);
  
diff --git a/c++/include/objtools/readers/fasta.hpp b/c++/include/objtools/readers/fasta.hpp

index 7dcc6fd1d9c7bad960159c98a4df91ef0903ac10..0096f38c26c6247d946c06b2aa1c0388ff014a4a 100644 (file)
--- a/c++/include/objtools/readers/fasta.hpp
+++ b/c++/include/objtools/readers/fasta.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_READERS___FASTA__HPP
  #define OBJTOOLS_READERS___FASTA__HPP
  
-/*  $Id: fasta.hpp 598690 2019-12-12 14:55:16Z foleyjp $
+/*  $Id: fasta.hpp 612524 2020-07-23 11:37:59Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -341,6 +341,11 @@ private:
                       TSeqPos line_number,
                       CBioseq& bioseq,
                       ILineErrorListener* pMessageListener);
+
+    void x_SetDeflineParseInfo(SDefLineParseInfo& info);
+
+    bool m_bModifiedMaxIdLength=false;
+
  protected:
      struct SGap : public CObject {
          enum EKnownSize {
diff --git a/c++/include/objtools/readers/fasta_reader_utils.hpp b/c++/include/objtools/readers/fasta_reader_utils.hpp

index 92f0431b31b33a6d7af4a5ffa5134c422198fc69..1c5fa79f5d420d8f965cfc6f46d1dbfb4378ff85 100644 (file)
--- a/c++/include/objtools/readers/fasta_reader_utils.hpp
+++ b/c++/include/objtools/readers/fasta_reader_utils.hpp
@@ -1,7 +1,7 @@
  #ifndef FASTA_READER_UTILS_HPP
  #define FASTA_READER_UTILS_HPP
  
-/*  $Id: fasta_reader_utils.hpp 599727 2020-01-06 20:18:10Z foleyjp $
+/*  $Id: fasta_reader_utils.hpp 612524 2020-07-23 11:37:59Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -76,7 +76,8 @@ public:
      struct SDeflineParseInfo {
          TBaseFlags fBaseFlags;
          TFastaFlags fFastaFlags;
-        TSeqPos maxIdLength;
+        TSeqPos maxIdLength=0; // If maxIdLength is zero, the code uses the 
+                               // default values specified in CSeq_id
          TSeqPos lineNumber;
      };
  
diff --git a/c++/include/objtools/readers/gff2_reader.hpp b/c++/include/objtools/readers/gff2_reader.hpp

index e25d15dc068b64d099782ce427fd7d0dff29e6e0..b13844b415e99fdca74c172eb0c6c4a2e121b141 100644 (file)
--- a/c++/include/objtools/readers/gff2_reader.hpp
+++ b/c++/include/objtools/readers/gff2_reader.hpp
@@ -1,4 +1,4 @@
- /*  $Id: gff2_reader.hpp 603569 2020-03-12 18:23:57Z ivanov $
+ /*  $Id: gff2_reader.hpp 610837 2020-06-24 15:29:29Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -117,6 +117,12 @@ public:
      //
      //  new stuff: 
      //
+    virtual void xGetData(
+        ILineReader&,
+        TReaderData&);
+
+    bool IsInGenbankMode() const;
+
      virtual bool xParseStructuredComment(
          const string&);
      
@@ -277,6 +283,9 @@ protected:
          CSeq_feat&,
          CSeq_feat&);
  
+    bool xNeedsNewSeqAnnot(
+        const string&);
+
      //  data:
      //
  protected:
diff --git a/c++/include/objtools/readers/gff3_reader.hpp b/c++/include/objtools/readers/gff3_reader.hpp

index bad353d889eee9a54aa6890f10bb17f6edc85993..2219b9b36cd04512f1b342e73ab901c5a2f3fc30 100644 (file)
--- a/c++/include/objtools/readers/gff3_reader.hpp
+++ b/c++/include/objtools/readers/gff3_reader.hpp
@@ -1,4 +1,4 @@
- /*  $Id: gff3_reader.hpp 607807 2020-05-07 18:58:43Z ivanov $
+ /*  $Id: gff3_reader.hpp 610837 2020-06-24 15:29:29Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -106,13 +106,7 @@ public:
          ILineReader& lr,
          ILineErrorListener* pErrors=nullptr);
  
-    bool IsInGenbankMode() const;
-
  protected:
-    virtual void xGetData(
-        ILineReader&,
-        TReaderData&);
-
      virtual void xProcessData(
          const TReaderData&,
          CSeq_annot&);
@@ -216,9 +210,6 @@ protected:
      virtual bool xParseAlignment(
          const string& strLine);
  
-    bool xNeedsNewSeqAnnot(
-        const string&);
-
      // Data:
      map<string, string> mCdsParentMap;
      map<string, CRef<CSeq_interval> > mMrnaLocs;
diff --git a/c++/include/objtools/readers/gtf_reader.hpp b/c++/include/objtools/readers/gtf_reader.hpp

index cab127b88e093a79db1934e51f4d2df3cdd075ec..16c05938e50d16fa6bf9300a98d3dca001e7e2fc 100644 (file)
--- a/c++/include/objtools/readers/gtf_reader.hpp
+++ b/c++/include/objtools/readers/gtf_reader.hpp
@@ -1,4 +1,4 @@
- /*  $Id: gtf_reader.hpp 598212 2019-12-05 12:32:15Z ludwigf $
+ /*  $Id: gtf_reader.hpp 610647 2020-06-22 11:31:17Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -310,7 +310,7 @@ protected:
          const CGtfAttributes::MultiValue&,
          CSeq_feat&);
  
-    bool xCdsIsPartial(
+    NCBI_DEPRECATED bool xCdsIsPartial(
          const CGtfReadRecord& );
  
      typedef map< string, CRef< CSeq_feat > > TIdToFeature;
diff --git a/c++/include/objtools/readers/message_listener.hpp b/c++/include/objtools/readers/message_listener.hpp

index 328434fc27014a5df801be471c7126e753592193..fa22fc322fde2c6ac361f8483ab83e0cf21ea8ec 100644 (file)
--- a/c++/include/objtools/readers/message_listener.hpp
+++ b/c++/include/objtools/readers/message_listener.hpp
@@ -1,4 +1,4 @@
-/*  $Id: message_listener.hpp 600664 2020-01-24 15:57:16Z foleyjp $
+/*  $Id: message_listener.hpp 608381 2020-05-15 12:43:35Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -51,13 +51,19 @@ public:
      virtual ~ILineErrorListener() {}
  
      // IListener::Post() implementation
-    virtual void Post(const IMessage& message)
+    NCBI_STD_DEPRECATED("This method is no longer functional and will be removed in SC-25.") 
+    virtual void Post(const IMessage& /*message*/)
      {
-        const ILineError* le = dynamic_cast<const ILineError*>(&message);
-        if (!le) return;
-        PutError(*le);
+        // Remove in SC-25
+        return;
      }
  
+    NCBI_STD_DEPRECATED("This method is redundant and will be removed in SC-25.") 
+    virtual void Push(const IObjtoolsMessage& message) 
+    {
+        // Remove in SC-25
+        PutMessage(message);
+    }
      /// Store error in the container, and 
      /// return true if error was stored fine, and
      /// return false if the caller should terminate all further processing.
@@ -74,13 +80,12 @@ public:
      }
      
      // IListener::Get() implementation
-    virtual const IMessage& Get(size_t index) const
-    { return const_cast<ILineErrorListener*>(this)->GetError(index); }
+    virtual const ILineError& Get(size_t index) const
+    { return this->GetError(index); }
  
      /// 0-based error retrieval.
      virtual const ILineError&
-    GetError(
-        size_t ) =0;
+    GetError(size_t ) const =0;
  
      virtual size_t Count(void) const = 0;
  
@@ -105,7 +110,7 @@ public:
          const Uint8 iNumDone = 0,
          const Uint8 iNumTotal = 0 ) = 0;
  
-    virtual const IMessage& GetMessage(size_t index) const
+    virtual const ILineError& GetMessage(size_t index) const
      { return Get(index); }
  
      virtual void Clear(void)
@@ -150,7 +155,7 @@ public:
      
      const ILineError&
      GetError(
-        size_t uPos ) { 
+        size_t uPos ) const { 
              return *dynamic_cast<ILineError*>(m_Errors[ uPos ].get()); }
      
      virtual void Dump()
diff --git a/c++/include/serial/grpc_integration/grpc_integration.hpp b/c++/include/serial/grpc_integration/grpc_integration.hpp

index 8dde2efbe39bc23291d140934a1d5a545de8e7a0..8291b637d0f7c7a21544f98e5c75b36a713b5199 100644 (file)
--- a/c++/include/serial/grpc_integration/grpc_integration.hpp
+++ b/c++/include/serial/grpc_integration/grpc_integration.hpp
@@ -1,7 +1,7 @@
  #ifndef SERIAL_GRPC_INTEGRATION___GRPC_INTEGRATION__HPP
  #define SERIAL_GRPC_INTEGRATION___GRPC_INTEGRATION__HPP
  
-/*  $Id: grpc_integration.hpp 606576 2020-04-23 17:12:06Z ivanov $
+/*  $Id: grpc_integration.hpp 608310 2020-05-14 12:35:38Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -114,11 +114,13 @@ public:
  /// (in order of priority):
  /// - Config file entry "[section] variable"
  /// - Environment variables: env_var_name (if not empty/NULL);
-///   then "NCBI_CONFIG__<section>__<name>"; then "grpc_proxy"
+///   then "NCBI_CONFIG__<section>__<name>"; then "GRPC_PROXY"
  /// - The hard-coded NCBI default "linkerd:4142"
+/// The value_source (if not null) will get CParamBase::EParamSource value
  string g_NCBI_GRPC_GetAddress(const char* section,
                                const char* variable,
-                              const char* env_var_name = nullptr);
+                              const char* env_var_name = nullptr,
+                              int* value_source = nullptr);
  
  END_NCBI_SCOPE
  
diff --git a/c++/include/serial/grpc_integration/impl/grpc_support.hpp b/c++/include/serial/grpc_integration/impl/grpc_support.hpp

index fc1a7e13c08ab97c5b3ef450903c349fcceeed14..1bf0f39645220477624b6ae1db6a61174809cd49 100644 (file)
--- a/c++/include/serial/grpc_integration/impl/grpc_support.hpp
+++ b/c++/include/serial/grpc_integration/impl/grpc_support.hpp
@@ -1,7 +1,7 @@
  #ifndef SERIAL_GRPC_INTEGRATION_IMPL___GRPC_SUPPORT__HPP
  #define SERIAL_GRPC_INTEGRATION_IMPL___GRPC_SUPPORT__HPP
  
-/*  $Id: grpc_support.hpp 607417 2020-05-04 15:40:44Z ivanov $
+/*  $Id: grpc_support.hpp 608345 2020-05-14 18:36:54Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -38,7 +38,7 @@
  #include <corelib/ncbimtx.hpp>
  #include <corelib/request_ctx.hpp>
  #include <corelib/request_status.hpp>
-#ifdef HAVE_LIBPROTOBUF
+#ifdef HAVE_LIBGRPC // HAVE_LIBPROTOBUF
  #  include <google/protobuf/message.h>
  #  if GOOGLE_PROTOBUF_VERSION >= 3002000
  #    define NCBI_GRPC_GET_BYTE_SIZE(msg) ((msg).ByteSizeLong())
@@ -103,7 +103,7 @@ public:
  private:
      CDiagContext&    m_DiagContext;
      CRequestContext& m_RequestContext;
-#ifdef HAVE_LIBPROTOBUF
+#ifdef HAVE_LIBGRPC // HAVE_LIBPROTOBUF
      const TMessage&  m_Reply;
  #endif
      bool             m_ManagingRequest;
@@ -169,7 +169,7 @@ CGRPCRequestLogger::CGRPCRequestLogger(TGRPCServerContext* sctx,
                                         const TMessage& reply)
      : m_DiagContext(GetDiagContext()),
        m_RequestContext(m_DiagContext.GetRequestContext()),
-#ifdef HAVE_LIBPROTOBUF
+#ifdef HAVE_LIBGRPC // HAVE_LIBPROTOBUF
        m_Reply(reply),
  #endif
        m_ManagingRequest(false)
diff --git a/c++/include/serial/rpcbase.hpp b/c++/include/serial/rpcbase.hpp

index 3d630b903cfbb5f77976bab5877505804901c2b6..1be4fdaad05f6ccc78130a67bab9fc681a169dc9 100644 (file)
--- a/c++/include/serial/rpcbase.hpp
+++ b/c++/include/serial/rpcbase.hpp
@@ -1,7 +1,7 @@
  #ifndef SERIAL___RPCBASE__HPP
  #define SERIAL___RPCBASE__HPP
  
-/*  $Id: rpcbase.hpp 588592 2019-06-26 18:52:32Z ucko $
+/*  $Id: rpcbase.hpp 615694 2020-09-02 18:14:03Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -62,12 +62,21 @@ class CRPCClient : public    CObject,
                     protected CConnIniter
  {
  public:
-    CRPCClient(const string&     service     = kEmptyStr,
-               ESerialDataFormat format      = eSerial_AsnBinary,
-               unsigned int      retry_limit = 3)
-        : CRPCClient_Base(service, format, retry_limit),
+    CRPCClient(const string& service = kEmptyStr)
+        : CRPCClient_Base(service, eSerial_AsnBinary),
            m_Timeout(kDefaultTimeout)
          {}
+    CRPCClient(const string&     service,
+        ESerialDataFormat        format)
+        : CRPCClient_Base(service, format),
+        m_Timeout(kDefaultTimeout)
+    {}
+    CRPCClient(const string& service,
+        ESerialDataFormat    format,
+        unsigned int         try_limit)
+        : CRPCClient_Base(service, format, try_limit),
+        m_Timeout(kDefaultTimeout)
+    {}
      virtual ~CRPCClient(void)
      {
          if ( !sx_IsSpecial(m_Timeout) ) {
diff --git a/c++/include/serial/rpcbase_impl.hpp b/c++/include/serial/rpcbase_impl.hpp

index c895fe07f4dda9c0a549d375e36170f2e97e53de..b58c25cf16251661ceaa3677eb91337c23a26e7f 100644 (file)
--- a/c++/include/serial/rpcbase_impl.hpp
+++ b/c++/include/serial/rpcbase_impl.hpp
@@ -1,7 +1,7 @@
  #ifndef SERIAL___RPCBASE_IMPL__HPP
  #define SERIAL___RPCBASE_IMPL__HPP
  
-/*  $Id: rpcbase_impl.hpp 588592 2019-06-26 18:52:32Z ucko $
+/*  $Id: rpcbase_impl.hpp 615694 2020-09-02 18:14:03Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -55,8 +55,10 @@ class NCBI_XSERIAL_EXPORT CRPCClient_Base
  {
  public:
      CRPCClient_Base(const string&     service,
-                    ESerialDataFormat format,
-                    unsigned int      retry_limit);
+                    ESerialDataFormat format);
+    CRPCClient_Base(const string&     service,
+        ESerialDataFormat format,
+        unsigned int      try_limit);
      virtual ~CRPCClient_Base(void);
  
      void Connect(void);
@@ -75,9 +77,24 @@ public:
      ESerialDataFormat GetFormat(void) const            { return m_Format; }
                   void SetFormat(ESerialDataFormat fmt) { m_Format = fmt; }
  
-    unsigned int GetRetryLimit(void) const     { return m_RetryLimit; }
-            void SetRetryLimit(unsigned int n) { m_RetryLimit = n; }
-
+    /// Get number of request attempts. If not set explicitly through SetTryLimit
+    /// or constructor argument, the following values are used:
+    /// - <upcase_service_name>__RPC_CLIENT__MAX_TRY environment varialbe
+    /// - [service_name.rpc_client] section, max_try value in the INI file
+    /// - 3 (global default)
+    unsigned int GetTryLimit(void) const     { return m_TryLimit; }
+            void SetTryLimit(unsigned int n) { m_TryLimit = n > 0 ? n : 3; }
+    /// @deprecated Use GetTryLimit()
+    NCBI_DEPRECATED
+    unsigned int GetRetryLimit(void) const { return GetTryLimit(); }
+    /// @deprecated Use SetTryLimit()
+    NCBI_DEPRECATED
+    void SetRetryLimit(unsigned int n) { SetTryLimit(n); }
+
+    /// Get retry delay. If not set explicitly through SetRetryDelay, the following values are used:
+    /// - <upcase_service_name>__RPC_CLIENT__RETRY_DELAY environment varialbe
+    /// - [service_name.rpc_client] section, retry_delay value in the INI file
+    /// - 0 (global default)
      const CTimeSpan GetRetryDelay(void) const          { return m_RetryDelay; }
      void            SetRetryDelay(const CTimeSpan& ts) { m_RetryDelay = ts; }
  
@@ -111,7 +128,7 @@ private:
      ESerialDataFormat        m_Format;
      CMutex                   m_Mutex;   ///< To allow sharing across threads.
      CTimeSpan                m_RetryDelay;
-    unsigned int             m_RetryCount;
+    unsigned int             m_TryCount;
      int                      m_RecursionCount;
  
  protected:
@@ -121,7 +138,7 @@ protected:
      unique_ptr<CObjectIStream> m_In;
      unique_ptr<CObjectOStream> m_Out;
      string                   m_Affinity;
-    unsigned int             m_RetryLimit;
+    unsigned int             m_TryLimit;
      CHttpRetryContext        m_RetryCtx;
      CConstIRef<ICanceled>    m_Canceler;
  
@@ -129,7 +146,7 @@ protected:
      // true.  May reset the connection (or do anything else, really),
      // but note that Ask() will always automatically reconnect if the
      // stream is explicitly bad.  (Ask() also takes care of enforcing
-    // m_RetryLimit.)
+    // m_TryLimit.)
      virtual bool x_ShouldRetry(unsigned int tries);
  
      // Calculate effective retry delay. Returns value from CRetryContext
diff --git a/c++/include/serial/streamiter.hpp b/c++/include/serial/streamiter.hpp

index ebe11e52c3e2a8f97ecf18ba00666fc06904c312..0c2a40ef60a93646d8bfd20c4fb5b876ae14ab34 100644 (file)
--- a/c++/include/serial/streamiter.hpp
+++ b/c++/include/serial/streamiter.hpp
@@ -1,7 +1,7 @@
  #ifndef STREAMITER__HPP
  #define STREAMITER__HPP
  
-/*  $Id: streamiter.hpp 583376 2019-03-27 18:06:15Z dicuccio $
+/*  $Id: streamiter.hpp 609566 2020-06-03 19:29:58Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -924,7 +924,7 @@ CObjectIStreamIterator<TRoot>::CData::x_NextSeqWithFilter(const CObjectInfo& obj
              case eAllRandom:
                  done.insert(mi_now);
                  // no break
-                /* FALLTHROUGH */
+                NCBI_FALLTHROUGH;
              case eAllSeq:
                  {
                      CObjectInfo oi = minfo.GetMember().GetTypeFamily() == eTypeFamilyPointer ?
diff --git a/c++/include/util/bitset/bm.h b/c++/include/util/bitset/bm.h

index 14822cf1a7c22d09d43f87c4ae761db66dec8d89..e295265875d56f6f9089713456c590c309ad6963 100644 (file)
--- a/c++/include/util/bitset/bm.h
+++ b/c++/include/util/bitset/bm.h
@@ -145,19 +145,19 @@ public:
      class reference
      {
      public:
-        reference(bvector<Alloc>& bv, size_type position)
+        reference(bvector<Alloc>& bv, size_type position) BMNOEXCEPT
          : bv_(bv),
            position_(position)
          {}
  
-        reference(const reference& ref)
+        reference(const reference& ref) BMNOEXCEPT
          : bv_(ref.bv_), 
            position_(ref.position_)
          {
              bv_.set(position_, ref.bv_.get_bit(position_));
          }
          
-        operator bool() const
+        operator bool() const BMNOEXCEPT
          {
              return bv_.get_bit(position_);
          }
@@ -168,13 +168,13 @@ public:
              return *this;
          }
  
-        const reference& operator=(bool value) const
+        const reference& operator=(bool value) const BMNOEXCEPT
          {
              bv_.set(position_, value);
              return *this;
          }
  
-        bool operator==(const reference& ref) const
+        bool operator==(const reference& ref) const BMNOEXCEPT
          {
              return bool(*this) == bool(ref);
          }
@@ -204,13 +204,13 @@ public:
          }
  
          /*! Logical Not operator */
-        bool operator!() const
+        bool operator!() const BMNOEXCEPT
          {
              return !bv_.get_bit(position_);
          }
  
          /*! Bit Not operator */
-        bool operator~() const
+        bool operator~() const BMNOEXCEPT
          {
              return !bv_.get_bit(position_);
          }
@@ -237,34 +237,37 @@ public:
      {
      friend class bvector;
      public:
-        iterator_base() : bv_(0), position_(bm::id_max), block_(0) {}
+        iterator_base() BMNOEXCEPT 
+            : bv_(0), position_(bm::id_max), block_(0), block_type_(0),
+              block_idx_(0)
+        {}
  
-        bool operator==(const iterator_base& it) const
+        bool operator==(const iterator_base& it) const BMNOEXCEPT
          {
              return (position_ == it.position_) && (bv_ == it.bv_);
          }
  
-        bool operator!=(const iterator_base& it) const
+        bool operator!=(const iterator_base& it) const BMNOEXCEPT
          {
              return ! operator==(it);
          }
  
-        bool operator < (const iterator_base& it) const
+        bool operator < (const iterator_base& it) const BMNOEXCEPT
          {
              return position_ < it.position_;
          }
  
-        bool operator <= (const iterator_base& it) const
+        bool operator <= (const iterator_base& it) const BMNOEXCEPT
          {
              return position_ <= it.position_;
          }
  
-        bool operator > (const iterator_base& it) const
+        bool operator > (const iterator_base& it) const BMNOEXCEPT
          {
              return position_ > it.position_;
          }
  
-        bool operator >= (const iterator_base& it) const
+        bool operator >= (const iterator_base& it) const BMNOEXCEPT
          {
              return position_ >= it.position_;
          }
@@ -274,18 +277,19 @@ public:
             \brief Checks if iterator is still valid. Analog of != 0 comparison for pointers.
             \returns true if iterator is valid.
          */
-        bool valid() const { return position_ != bm::id_max; }
+        bool valid() const BMNOEXCEPT { return position_ != bm::id_max; }
  
          /**
             \fn bool bm::bvector::iterator_base::invalidate() 
             \brief Turns iterator into an invalid state.
          */
-        void invalidate() { position_ = bm::id_max; }
+        void invalidate() BMNOEXCEPT
+            { position_ = bm::id_max; block_type_ = ~0u;}
          
          /** \brief Compare FSMs for testing purposes
              \internal
          */
-        bool compare_state(const iterator_base& ib) const
+        bool compare_state(const iterator_base& ib) const BMNOEXCEPT
          {
              if (this->bv_ != ib.bv_)                 return false;
              if (this->position_ != ib.position_)     return false;
@@ -317,7 +321,9 @@ public:
  
      public:
  
-        /** Information about current bitblock. */
+        /** Bit-block descriptor
+            @internal
+        */
          struct bitblock_descr
          {
              const bm::word_t*   ptr;      //!< Word pointer.
@@ -327,7 +333,9 @@ public:
              size_type           pos;      //!< Last bit position decode before
          };
  
-        /** Information about current DGAP block. */
+        /** Information about current DGAP block.
+            @internal
+        */
          struct dgap_descr
          {
              const gap_word_t*   ptr;       //!< Word pointer.
@@ -379,9 +387,9 @@ public:
          typedef void               pointer;
          typedef void               reference;
  
-        insert_iterator() : bvect_(0), max_bit_(0) {}
+        insert_iterator() BMNOEXCEPT : bvect_(0), max_bit_(0) {}
  
-        insert_iterator(bvector<Alloc>& bvect)
+        insert_iterator(bvector<Alloc>& bvect) BMNOEXCEPT
              : bvect_(&bvect), 
                max_bit_(bvect.size())
          {
@@ -463,7 +471,7 @@ public:
          typedef void                     pointer;
          typedef void                     reference;
  
-        bulk_insert_iterator()
+        bulk_insert_iterator() BMNOEXCEPT
              : bvect_(0), buf_(0), buf_size_(0), sorted_(BM_UNKNOWN) {}
          
          ~bulk_insert_iterator()
@@ -473,7 +481,8 @@ public:
                  bvect_->blockman_.get_allocator().free_bit_block((bm::word_t*)buf_);
          }
  
-        bulk_insert_iterator(bvector<Alloc>& bvect, bm::sort_order so = BM_UNKNOWN)
+        bulk_insert_iterator(bvector<Alloc>& bvect,
+                             bm::sort_order so = BM_UNKNOWN) BMNOEXCEPT
              : bvect_(&bvect), sorted_(so)
          {
              bvect_->init();
@@ -499,7 +508,7 @@ public:
              sorted_ = BM_UNKNOWN;
          }
  
-        bulk_insert_iterator(bulk_insert_iterator&& iit) BMNOEXEPT
+        bulk_insert_iterator(bulk_insert_iterator&& iit) BMNOEXCEPT
              : bvect_(iit.bvect_)
          {
              buf_ = iit.buf_; iit.buf_ = 0;
@@ -518,7 +527,7 @@ public:
              return *this;
          }
          
-        bulk_insert_iterator& operator=(bulk_insert_iterator&& ii) BMNOEXEPT
+        bulk_insert_iterator& operator=(bulk_insert_iterator&& ii) BMNOEXCEPT
          {
              bvect_ = ii.bvect_;
              if (buf_)
@@ -562,11 +571,11 @@ public:
              bvect_->sync_size();
          }
          
-        bvector_type* get_bvector() const { return bvect_; }
+        bvector_type* get_bvector() const BMNOEXCEPT { return bvect_; }
          
      protected:
          static
-        size_type buf_size_max()
+        size_type buf_size_max() BMNOEXCEPT
          {
              #ifdef BM64ADDR
                  return bm::set_block_size / 2;
@@ -599,26 +608,40 @@ public:
          typedef unsigned&    reference;
  
      public:
-        enumerator() : iterator_base()
+        enumerator() BMNOEXCEPT : iterator_base()
          {}
          
          /*! @brief Construct enumerator associated with a vector.
              This construction creates unpositioned iterator with status
              valid() == false. It can be re-positioned using go_first() or go_to()
          */
-        enumerator(const bvector<Alloc>* bv)
+        enumerator(const bvector<Alloc>* bv) BMNOEXCEPT
              : iterator_base()
          {
              this->bv_ = const_cast<bvector<Alloc>*>(bv);
          }
  
+        /*! @brief Construct enumerator for bit vector
+            @param bv  bit-vector reference
+            @param pos bit position in the vector
+                       if position is 0, it finds the next 1 or becomes not valid
+                       (en.valid() == false)
+        */
+        enumerator(const bvector<Alloc>& bv, size_type pos = 0) BMNOEXCEPT
+            : iterator_base()
+        {
+            this->bv_ = const_cast<bvector<Alloc>*>(&bv);
+            go_to(pos);
+        }
+
+
          /*! @brief Construct enumerator for bit vector
              @param bv  bit-vector pointer
              @param pos bit position in the vector
                         if position is 0, it finds the next 1 or becomes not valid
                         (en.valid() == false)
          */
-        enumerator(const bvector<Alloc>* bv, size_type pos)
+        enumerator(const bvector<Alloc>* bv, size_type pos) BMNOEXCEPT
              : iterator_base()
          { 
              this->bv_ = const_cast<bvector<Alloc>*>(bv);
@@ -626,505 +649,72 @@ public:
          }
  
          /*! \brief Get current position (value) */
-        size_type operator*() const { return this->position_; }
+        size_type operator*() const BMNOEXCEPT { return this->position_; }
  
          /*! \brief Get current position (value) */
-        size_type value() const { return this->position_; }
+        size_type value() const BMNOEXCEPT { return this->position_; }
          
          /*! \brief Advance enumerator forward to the next available bit */
-        enumerator& operator++() { return this->go_up(); }
+        enumerator& operator++() BMNOEXCEPT { this->go_up(); return *this; }
  
          /*! \brief Advance enumerator forward to the next available bit.
               Possibly do NOT use this operator it is slower than the pre-fix increment.
           */
-        enumerator operator++(int)
+        enumerator operator++(int) BMNOEXCEPT
          {
              enumerator tmp = *this;
              this->go_up();
              return tmp;
          }
  
-
          /*! \brief Position enumerator to the first available bit */
-        void go_first()
-        {
-            BM_ASSERT(this->bv_);
-            
-            blocks_manager_type* bman = &(this->bv_->blockman_);
-            if (!bman->is_init())
-            {
-                this->invalidate();
-                return;
-            }
-            
-            bm::word_t*** blk_root = bman->top_blocks_root();
-
-            this->block_idx_ = this->position_= 0;
-            unsigned i, j;
-
-            for (i = 0; i < bman->top_block_size(); ++i)
-            {
-                bm::word_t** blk_blk = blk_root[i];
-
-                if (blk_blk == 0) // not allocated
-                {
-                    this->block_idx_ += bm::set_sub_array_size;
-                    this->position_ += bm::bits_in_array;
-                    continue;
-                }
-                
-                if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
-                    blk_blk = FULL_SUB_BLOCK_REAL_ADDR;
-
-                for (j = 0; j < bm::set_sub_array_size; ++j,++(this->block_idx_))
-                {
-                    this->block_ = blk_blk[j];
-
-                    if (this->block_ == 0)
-                    {
-                        this->position_ += bits_in_block;
-                        continue;
-                    }
-
-                    if (BM_IS_GAP(this->block_))
-                    {
-                        this->block_type_ = 1;
-                        if (search_in_gapblock())
-                        {
-                            return;
-                        }
-                    }
-                    else
-                    {
-                        if (this->block_ == FULL_BLOCK_FAKE_ADDR)
-                            this->block_ = FULL_BLOCK_REAL_ADDR;
-
-                        this->block_type_ = 0;
-                        if (search_in_bitblock())
-                        {
-                            return;
-                        }
-                    }
-            
-                } // for j
-
-            } // for i
-
-            this->invalidate();
-        }
-        
-        /// advance iterator forward by one
-        void advance() { this->go_up(); }
+        void go_first() BMNOEXCEPT;
  
+        /*! advance iterator forward by one
+            @return true if advance was successfull and the enumerator is valid
+        */
+        bool advance() BMNOEXCEPT { return this->go_up(); }
  
          /*! \brief Advance enumerator to the next available bit */
-        enumerator& go_up()
-        {
-            BM_ASSERT(this->valid());
-            BM_ASSERT_THROW(this->valid(), BM_ERR_RANGE);
+        bool go_up() BMNOEXCEPT;
  
-            // Current block search.
-            //
-            
-            block_descr_type* bdescr = &(this->bdescr_);
-            switch (this->block_type_)
-            {
-            case 0:   //  BitBlock
-                {
-                    // check if we can get the value from the bits traversal cache
-                    unsigned short idx = ++(bdescr->bit_.idx);
-                    if (idx < bdescr->bit_.cnt)
-                    {
-                        this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx];
-                        return *this;
-                    }
-                    this->position_ +=
-                        (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx];
-                    
-                    bdescr->bit_.ptr += bm::set_bitscan_wave_size;
-                    if (decode_bit_group(bdescr))
-                    {
-                        return *this;
-                    }
-                }
-                break;
-            case 1:   // DGAP Block
-                {
-                    ++this->position_;
-                    if (--(bdescr->gap_.gap_len))
-                    {
-                        return *this;
-                    }
-
-                    // next gap is "OFF" by definition.
-                    if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
-                    {
-                        break;
-                    }
-                    gap_word_t prev = *(bdescr->gap_.ptr);
-                    unsigned int val = *(++(bdescr->gap_.ptr));
-                    
-                    this->position_ += val - prev;
-                    // next gap is now "ON"
-                    if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
-                    {
-                        break;
-                    }
-                    prev = *(bdescr->gap_.ptr);
-                    val = *(++(bdescr->gap_.ptr));
-                    bdescr->gap_.gap_len = (gap_word_t)(val - prev);
-                    return *this;  // next "ON" found;
-                }
-            default:
-                BM_ASSERT(0);
-
-            } // switch
-
-            if (search_in_blocks())
-                return *this;
-            
-            this->invalidate();
-            return *this;
-        }
-        
          /*!
              @brief Skip to specified relative rank
-            @param rank - number of ON bits to go for
+            @param rank - number of ON bits to go for (must be: > 0)
+            @return true if skip was successfull and enumerator is valid
          */
-        enumerator& skip_to_rank(size_type rank)
+        bool skip_to_rank(size_type rank) BMNOEXCEPT
          {
+            BM_ASSERT(rank);
              --rank;
              if (!rank)
-                return *this;
+                return this->valid();
              return skip(rank);
          }
          
          /*!
              @brief Skip specified number of bits from enumeration
              @param rank - number of ON bits to skip
+            @return true if skip was successfull and enumerator is valid
          */
-        enumerator& skip(size_type rank)
-        {
-            if (!this->valid() || !rank)
-                return *this;
-            for (; rank; --rank)
-            {
-                block_descr_type* bdescr = &(this->bdescr_);
-                switch (this->block_type_)
-                {
-                case 0:   //  BitBlock
-                    for (; rank; --rank)
-                    {
-                        unsigned short idx = ++(bdescr->bit_.idx);
-                        if (idx < bdescr->bit_.cnt)
-                        {
-                            this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx];
-                            continue;
-                        }
-                        this->position_ +=
-                            (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx];
-                        bdescr->bit_.ptr += bm::set_bitscan_wave_size;
-                        
-                        if (!decode_bit_group(bdescr, rank))
-                            break;
-                    } // for rank
-                    break;
-                case 1:   // DGAP Block
-                    for (; rank; --rank) // TODO: better skip logic
-                    {
-                        ++this->position_;
-                        if (--(bdescr->gap_.gap_len))
-                        {
-                            continue;
-                        }
-
-                        // next gap is "OFF" by definition.
-                        if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
-                        {
-                            break;
-                        }
-                        gap_word_t prev = *(bdescr->gap_.ptr);
-                        unsigned int val = *(++(bdescr->gap_.ptr));
-                        
-                        this->position_ += val - prev;
-                        // next gap is now "ON"
-                        if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
-                        {
-                            break;
-                        }
-                        prev = *(bdescr->gap_.ptr);
-                        val = *(++(bdescr->gap_.ptr));
-                        bdescr->gap_.gap_len = (gap_word_t)(val - prev);
-                    } // for rank
-                    break;
-                default:
-                    BM_ASSERT(0);
-                } // switch
-                
-                if (!rank)
-                    return *this;
+        bool skip(size_type rank) BMNOEXCEPT;
  
-                if (!search_in_blocks())
-                {
-                    this->invalidate();
-                    return *this;
-                }
-            } // for rank
-            return *this;
-        }
-        
          /*!
              @brief go to a specific position in the bit-vector (or next)
          */
-        enumerator& go_to(size_type pos)
-        {
-            if (pos == 0)
-            {
-                go_first();
-                return *this;
-            }
-
-            size_type new_pos = this->bv_->check_or_next(pos); // find the true pos
-            if (new_pos == 0) // no bits available
-            {
-                this->invalidate();
-                return *this;
-            }
-            BM_ASSERT(new_pos >= pos);
-            pos = new_pos;
-            
-            
-            this->position_ = pos;
-            size_type nb = this->block_idx_ = (pos >> bm::set_block_shift);
-            bm::bvector<Alloc>::blocks_manager_type& bman =
-                                                 this->bv_->get_blocks_manager();
-            unsigned i0, j0;
-            bm::get_block_coord(nb, i0, j0);
-            this->block_ = bman.get_block(i0, j0);
-
-            BM_ASSERT(this->block_);
-            
-            this->block_type_ = (bool)BM_IS_GAP(this->block_);
-
-            block_descr_type* bdescr = &(this->bdescr_);
-            unsigned nbit = unsigned(pos & bm::set_block_mask);
-
-            if (this->block_type_) // gap
-            {
-                this->position_ = nb * bm::set_block_size * 32;
-                search_in_gapblock();
-                
-                if (this->position_ == pos)
-                    return *this;
-                this->position_ = pos;
-
-                gap_word_t* gptr = BMGAP_PTR(this->block_);
-                unsigned is_set;
-                unsigned gpos = bm::gap_bfind(gptr, nbit, &is_set);
-                BM_ASSERT(is_set);
-                
-                bdescr->gap_.ptr = gptr + gpos;
-                if (gpos == 1)
-                {
-                    bdescr->gap_.gap_len = bm::gap_word_t(gptr[gpos] - (nbit - 1));
-                }
-                else
-                {
-                    bm::gap_word_t interval = bm::gap_word_t(gptr[gpos] - gptr[gpos - 1]);
-                    bm::gap_word_t interval2 = bm::gap_word_t(nbit - gptr[gpos - 1]);
-                    bdescr->gap_.gap_len = bm::gap_word_t(interval - interval2 + 1);
-                }
-            }
-            else // bit
-            {
-                if (nbit == 0)
-                {
-                    search_in_bitblock();
-                    return *this;
-                }
-
-                unsigned nword  = unsigned(nbit >> bm::set_word_shift);
-                
-                // check if we need to step back to match the wave
-                unsigned parity = nword % bm::set_bitscan_wave_size;
-                bdescr->bit_.ptr = this->block_ + (nword - parity);
-                bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits);
-                BM_ASSERT(bdescr->bit_.cnt);
-                bdescr->bit_.pos = (nb * bm::set_block_size * 32) + ((nword - parity) * 32);
-                bdescr->bit_.idx = 0;
-                nbit &= bm::set_word_mask;
-                nbit += 32 * parity;
-                for (unsigned i = 0; i < bdescr->bit_.cnt; ++i)
-                {
-                    if (bdescr->bit_.bits[i] == nbit)
-                        return *this;
-                    bdescr->bit_.idx++;
-                } // for
-                BM_ASSERT(0);
-            }
-            return *this;
-        }
-
+        bool go_to(size_type pos) BMNOEXCEPT;
  
      private:
          typedef typename iterator_base::block_descr block_descr_type;
          
-        bool decode_wave(block_descr_type* bdescr)
-        {
-            bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits);
-            if (bdescr->bit_.cnt) // found
-            {
-                bdescr->bit_.idx ^= bdescr->bit_.idx; // = 0;
-                bdescr->bit_.pos = this->position_;
-                this->position_ += bdescr->bit_.bits[0];
-                return true;
-            }
-            return false;
-        }
-        
-        bool decode_bit_group(block_descr_type* bdescr)
-        {
-            const word_t* block_end = this->block_ + bm::set_block_size;
-            for (; bdescr->bit_.ptr < block_end;)
-            {
-                if (decode_wave(bdescr))
-                    return true;
-                this->position_ += bm::set_bitscan_wave_size * 32; // wave size
-                bdescr->bit_.ptr += bm::set_bitscan_wave_size;
-            } // for
-            return false;
-        }
-        
-        bool decode_bit_group(block_descr_type* bdescr, size_type& rank)
-        {
-            const word_t* block_end = this->block_ + bm::set_block_size;
-            
-            for (; bdescr->bit_.ptr < block_end;)
-            {
-                const bm::id64_t* w64_p = (bm::id64_t*)bdescr->bit_.ptr;
-                bm::id64_t w64 = *w64_p;
-                unsigned cnt = bm::word_bitcount64(w64);
-                if (rank > cnt)
-                {
-                    rank -= cnt;
-                }
-                else
-                {
-                    if (decode_wave(bdescr))
-                        return true;
-                }
-                this->position_ += bm::set_bitscan_wave_size * 32; // wave size
-                bdescr->bit_.ptr += bm::set_bitscan_wave_size;
-            } // for
-            return false;
-        }
-
-        bool search_in_bitblock()
-        {
-            BM_ASSERT(this->block_type_ == 0);
-            
-            block_descr_type* bdescr = &(this->bdescr_);
-            bdescr->bit_.ptr = this->block_;
-            
-            return decode_bit_group(bdescr);
-        }
-
-        bool search_in_gapblock()
-        {
-            BM_ASSERT(this->block_type_ == 1);
-
-            block_descr_type* bdescr = &(this->bdescr_);
-            bdescr->gap_.ptr = BMGAP_PTR(this->block_);
-            unsigned bitval = *(bdescr->gap_.ptr) & 1;
-
-            ++(bdescr->gap_.ptr);
-
-            for (;true;)
-            {
-                unsigned val = *(bdescr->gap_.ptr);
-                if (bitval)
-                {
-                    gap_word_t* first = BMGAP_PTR(this->block_) + 1;
-                    if (bdescr->gap_.ptr == first)
-                    {
-                        bdescr->gap_.gap_len = (gap_word_t)(val + 1);
-                    }
-                    else
-                    {
-                        bdescr->gap_.gap_len = 
-                             (gap_word_t)(val - *(bdescr->gap_.ptr-1));
-                    }
-                    return true;
-                }
-                this->position_ += val + 1;
-                if (val == bm::gap_max_bits - 1)
-                    break;
-                bitval ^= 1;
-                ++(bdescr->gap_.ptr);
-            }
-            return false;
-        }
-        
-        bool search_in_blocks()
-        {
-            ++(this->block_idx_);
-            const blocks_manager_type& bman = this->bv_->blockman_;
-            block_idx_type i = this->block_idx_ >> bm::set_array_shift;
-            block_idx_type top_block_size = bman.top_block_size();
-            bm::word_t*** blk_root = bman.top_blocks_root();
-            for (; i < top_block_size; ++i)
-            {
-                bm::word_t** blk_blk = blk_root[i];
-                if (blk_blk == 0)
-                {
-                    // fast scan fwd in top level 
-                    size_type bn = this->block_idx_ + bm::set_sub_array_size;
-                    size_type pos = this->position_ + bm::bits_in_array;
-                    for (++i; i < top_block_size; ++i)
-                    {
-                        if (blk_root[i])
-                            break;
-                        bn += bm::set_sub_array_size;
-                        pos += bm::bits_in_array;
-                    } // for i
-                    this->block_idx_ = bn;
-                    this->position_ = pos;
-                    if ((i < top_block_size) && blk_root[i])
-                        --i;
-                    continue;
-                }
-                if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
-                    blk_blk = FULL_SUB_BLOCK_REAL_ADDR;
-
-                block_idx_type j = this->block_idx_ & bm::set_array_mask;
-
-                for(; j < bm::set_sub_array_size; ++j, ++(this->block_idx_))
-                {
-                    this->block_ = blk_blk[j];
-
-                    if (this->block_ == 0)
-                    {
-                        this->position_ += bm::bits_in_block;
-                        continue;
-                    }
+        static bool decode_wave(block_descr_type* bdescr) BMNOEXCEPT;
+        bool decode_bit_group(block_descr_type* bdescr) BMNOEXCEPT;
+        bool decode_bit_group(block_descr_type* bdescr,
+                              size_type& rank) BMNOEXCEPT;
+        bool search_in_bitblock() BMNOEXCEPT;
+        bool search_in_gapblock() BMNOEXCEPT;
+        bool search_in_blocks() BMNOEXCEPT;
  
-                    this->block_type_ = BM_IS_GAP(this->block_);
-                    if (this->block_type_)
-                    {
-                        if (search_in_gapblock())
-                            return true;
-                    }
-                    else
-                    {
-                        if (this->block_ == FULL_BLOCK_FAKE_ADDR)
-                            this->block_ = FULL_BLOCK_REAL_ADDR;
-                        if (search_in_bitblock())
-                            return true;
-                    }
-                } // for j
-            } // for i
-            return false;
-        }
      };
      
      /*!
@@ -1142,15 +732,14 @@ public:
  #ifndef BM_NO_STL
          typedef std::input_iterator_tag  iterator_category;
  #endif
-        counted_enumerator() : bit_count_(0){}
+        counted_enumerator() BMNOEXCEPT : bit_count_(0){}
          
-        counted_enumerator(const enumerator& en) : enumerator(en)
+        counted_enumerator(const enumerator& en) BMNOEXCEPT : enumerator(en)
          {
-            if (this->valid())
-                bit_count_ = 1;
+            bit_count_ = this->valid(); // 0 || 1
          }
          
-        counted_enumerator& operator=(const enumerator& en)
+        counted_enumerator& operator=(const enumerator& en) BMNOEXCEPT
          {
              enumerator* me = this;
              *me = en;
@@ -1159,11 +748,10 @@ public:
              return *this;
          }
          
-        counted_enumerator& operator++()
+        counted_enumerator& operator++() BMNOEXCEPT
          {
              this->go_up();
-            if (this->valid())
-                ++(this->bit_count_);
+            this->bit_count_ += this->valid();
              return *this;
          }
  
@@ -1171,8 +759,7 @@ public:
          {
              counted_enumerator tmp(*this);
              this->go_up();
-            if (this->valid())
-                ++bit_count_;
+            this->bit_count_ += this->valid();
              return tmp;
          }
          
@@ -1181,7 +768,7 @@ public:
              Method returns number of ON bits fromn the bit 0 to the current bit 
              For the first bit in bitvector it is 1, for the second 2 
          */
-        size_type count() const { return bit_count_; }
+        size_type count() const BMNOEXCEPT { return bit_count_; }
      private:
          /*! Function closed for usage */
          counted_enumerator& go_to(size_type pos);
@@ -1198,10 +785,10 @@ public:
      class mem_pool_guard
      {
      public:
-        mem_pool_guard() : bv_(0)
+        mem_pool_guard() BMNOEXCEPT : bv_(0)
          {}
  
-        mem_pool_guard(allocator_pool_type& pool, bvector<Alloc>& bv)
+        mem_pool_guard(allocator_pool_type& pool, bvector<Alloc>& bv) BMNOEXCEPT
              : bv_(&bv)
          {
              bv.set_allocator_pool(&pool);
@@ -1213,13 +800,14 @@ public:
          }
  
          /// check if vector has no assigned allocator and set one
-        void assign_if_not_set(allocator_pool_type& pool, bvector<Alloc>& bv)
+        void assign_if_not_set(allocator_pool_type& pool,
+                               bvector<Alloc>& bv) BMNOEXCEPT
          {
-            if (bv.get_allocator_pool() == 0) // alloc pool not set yet
+            if (!bv.get_allocator_pool()) // alloc pool not set yet
              {
                  BM_ASSERT(!bv_);
                  bv_ = &bv;
-                bv.set_allocator_pool(&pool);
+                bv_->set_allocator_pool(&pool);
              }
          }
  
@@ -1248,7 +836,7 @@ public:
          const         gap_word_t* glevel_len;
          
          allocation_policy(bm::strategy s=BM_BIT,
-                          const gap_word_t* glevels = bm::gap_len_table<true>::_len)
+            const gap_word_t* glevels = bm::gap_len_table<true>::_len) BMNOEXCEPT
          : strat(s), glevel_len(glevels)
          {}
      };
@@ -1329,7 +917,7 @@ public:
      }
  
      
-    ~bvector() BMNOEXEPT {}
+    ~bvector() BMNOEXCEPT {}
      /*!
          \brief Explicit post-construction initialization
      */
@@ -1353,7 +941,7 @@ public:
      /*!
          \brief Move constructor
      */
-    bvector(bvector<Alloc>&& bvect) BMNOEXEPT
+    bvector(bvector<Alloc>&& bvect) BMNOEXCEPT
      {
          blockman_.move_from(bvect.blockman_);
          size_ = bvect.size_;
@@ -1380,7 +968,7 @@ public:
      /*! 
          \brief Move assignment operator
      */
-    bvector& operator=(bvector<Alloc>&& bvect) BMNOEXEPT
+    bvector& operator=(bvector<Alloc>&& bvect) BMNOEXCEPT
      {
          this->move_from(bvect);
          return *this;
@@ -1389,11 +977,11 @@ public:
      /*!
          \brief Move bvector content from another bvector
      */
-    void move_from(bvector<Alloc>& bvect) BMNOEXEPT;
+    void move_from(bvector<Alloc>& bvect) BMNOEXCEPT;
      
      /*! \brief Exchanges content of bv and this bvector.
      */
-    void swap(bvector<Alloc>& bvect) BMNOEXEPT;
+    void swap(bvector<Alloc>& bvect) BMNOEXCEPT;
  
      /*! \brief Merge/move content from another vector
      
@@ -1419,7 +1007,7 @@ public:
          return reference(*this, n);
      }
  
-    bool operator[](size_type n) const
+    bool operator[](size_type n) const BMNOEXCEPT
      {
          BM_ASSERT(n < size_);
          return get_bit(n);
@@ -1434,25 +1022,23 @@ public:
      bool operator <= (const bvector<Alloc>& bv) const { return compare(bv)<=0; }
      bool operator > (const bvector<Alloc>& bv) const { return compare(bv)>0; }
      bool operator >= (const bvector<Alloc>& bv) const { return compare(bv) >= 0; }
-    bool operator == (const bvector<Alloc>& bv) const { return equal(bv); }
-    bool operator != (const bvector<Alloc>& bv) const { return !equal(bv); }
+    bool operator == (const bvector<Alloc>& bv) const BMNOEXCEPT { return equal(bv); }
+    bool operator != (const bvector<Alloc>& bv) const BMNOEXCEPT { return !equal(bv); }
  
      bvector<Alloc> operator~() const { return bvector<Alloc>(*this).invert(); }
      
      Alloc get_allocator() const
-    {
-        return blockman_.get_allocator();
-    }
+        { return blockman_.get_allocator(); }
  
-    /// Set allocator pool for local (non-threaded) 
+    /// Set allocator pool for local (non-th readed) 
      /// memory cyclic(lots of alloc-free ops) opertations
      ///
-    void set_allocator_pool(allocator_pool_type* pool_ptr)
+    void set_allocator_pool(allocator_pool_type* pool_ptr) BMNOEXCEPT
                          { blockman_.get_allocator().set_pool(pool_ptr); }
  
      /// Get curent allocator pool (if set)
      /// @return pointer to the current pool or NULL
-    allocator_pool_type* get_allocator_pool()
+    allocator_pool_type* get_allocator_pool() BMNOEXCEPT
                          { return blockman_.get_allocator().get_pool(); }
  
      // --------------------------------------------------------------------
@@ -1567,6 +1153,10 @@ public:
      */
      void set_bit_no_check(size_type n);
  
+    /**
+        \brief Set specified bit without checking preconditions (size, etc)
+    */
+    bool set_bit_no_check(size_type n, bool val);
  
      /*!
          \brief Sets all bits in the specified closed interval [left,right]
@@ -1596,9 +1186,7 @@ public:
          @sa set_range
      */
      void clear_range(size_type left, size_type right)
-    {
-        set_range(left, right, false);
-    }
+                        { set_range(left, right, false); }
  
  
      /*!
@@ -1642,20 +1230,13 @@ public:
         \param free_mem if "true" (default) bvector frees the memory,
         otherwise sets blocks to 0.
      */
-    void clear(bool free_mem = false)
-    {
-        blockman_.set_all_zero(free_mem);
-    }
+    void clear(bool free_mem = false) { blockman_.set_all_zero(free_mem); }
  
      /*!
         \brief Clears every bit in the bitvector.
         \return *this;
      */
-    bvector<Alloc>& reset()
-    {
-        clear(true);
-        return *this;
-    }
+    bvector<Alloc>& reset() { clear(true); return *this; }
      
      /*!
         \brief Flips bit n
@@ -1688,7 +1269,7 @@ public:
      //size_type capacity() const { return blockman_.capacity(); }
  
      /*! \brief return current size of the vector (bits) */
-    size_type size() const { return size_; }
+    size_type size() const BMNOEXCEPT { return size_; }
  
      /*!
          \brief Change size of the bvector
@@ -1699,15 +1280,16 @@ public:
      //@}
      // --------------------------------------------------------------------
  
-    /*! @name Population counting and ranking methods
+    /*! @name Population counting, ranks, ranges and intervals
      */
      //@{
  
      /*!
         \brief population cout (count of ON bits)
-       \return Total number of bits ON.
+       \sa count_range
+       \return Total number of bits ON
      */
-    size_type count() const;
+    size_type count() const BMNOEXCEPT;
  
      /*! \brief Computes bitcount values for all bvector blocks
          \param arr - pointer on array of block bit counts
@@ -1715,8 +1297,9 @@ public:
          This number +1 gives you number of arr elements initialized during the
          function call.
      */
-    block_idx_type count_blocks(unsigned* arr) const;
-    
+    block_idx_type count_blocks(unsigned* arr) const BMNOEXCEPT;
+
+
      /*!
         \brief Returns count of 1 bits in the given range [left..right]
         Uses rank-select index to accelerate the search
@@ -1729,7 +1312,7 @@ public:
      */
      size_type count_range(size_type left,
                            size_type right,
-                          const rs_index_type&  rs_idx) const;
+                          const rs_index_type&  rs_idx) const BMNOEXCEPT;
      
      /*!
         \brief Returns count of 1 bits in the given range [left..right]
@@ -1739,10 +1322,32 @@ public:
  
         \return population count in the diapason
      */
-    size_type count_range(size_type left,
-                          size_type right) const;
+    size_type count_range(size_type left, size_type right) const BMNOEXCEPT;
+
+    /*!
+       \brief Returns true if all bits in the range are 1s (saturated interval)
+       Function uses closed interval [left, right]
+
+       \param left - index of first bit start checking
+       \param right - index of last bit
+
+       \return true if all bits are 1, false otherwise
+       @sa any_range, count_range
+    */
+    bool is_all_one_range(size_type left, size_type right) const BMNOEXCEPT;
+
+    /*!
+       \brief Returns true if any bits in the range are 1s (non-empty interval)
+       Function uses closed interval [left, right]
+
+       \param left - index of first bit start checking
+       \param right - index of last bit
+
+       \return true if at least 1 bits is set
+       @sa is_all_one_range, count_range
+    */
+    bool any_range(size_type left, size_type right) const BMNOEXCEPT;
  
-    
  
      /*! \brief compute running total of all blocks in bit vector (rank-select index)
          \param rs_idx - [out] pointer to index / count structure
@@ -1762,23 +1367,41 @@ public:
                         should be prepared using build_rs_index
         \return population count in the range [0..n]
         \sa build_rs_index
-       \sa count_to_test, select, rank
+       \sa count_to_test, select, rank, rank_corrected
      */
-    size_type count_to(size_type n, const rs_index_type&  rs_idx) const;
+    size_type count_to(size_type n,
+                       const rs_index_type&  rs_idx) const BMNOEXCEPT;
      
      
      /*!
-       \brief Returns rank of specified bit position
+       \brief Returns rank of specified bit position (same as count_to())
       
         \param n - index of bit to rank
         \param rs_idx -  rank-select index
         \return population count in the range [0..n]
         \sa build_rs_index
-       \sa count_to_test, select, rank
+       \sa count_to_test, select, rank, rank_corrected
      */
-    size_type rank(size_type n, const rs_index_type&  rs_idx) const
-                                            {  return count_to(n, rs_idx); }
+    size_type rank(size_type n, 
+                   const rs_index_type&  rs_idx) const BMNOEXCEPT
+                                    {  return count_to(n, rs_idx); }
      
+    /*!
+       \brief Returns rank corrceted by the requested border value (as -1)
+
+       This is rank function (bit-count) minus value of bit 'n'
+       if bit-n is true function returns rank()-1 if false returns rank()
+       faster than rank() + test().
+
+
+       \param n - index of bit to rank
+       \param rs_idx -  rank-select index
+       \return population count in the range [0..n] corrected as -1 by the value of n
+       \sa build_rs_index
+       \sa count_to_test, select, rank
+    */
+    size_type rank_corrected(size_type n,
+                 const rs_index_type&  rs_idx) const BMNOEXCEPT;
  
      /*!
          \brief popcount in [0..right] range if test(right) == true
@@ -1787,25 +1410,28 @@ public:
          plus count_to()
       
          \param n - index of bit to test and rank
-        \param blocks_cnt - block count structure to accelerate search
-               should be prepared using running_count_blocks
+        \param rs_idx - rank-select index
+                       (block count structure to accelerate search)
+                        should be prepared using build_rs_index()
  
          \return population count in the diapason or 0 if right bit test failed
  
          \sa build_rs_index
          \sa count_to
      */
-    size_type count_to_test(size_type n, const rs_index_type&  blocks_cnt) const;
+    size_type
+    count_to_test(size_type n, 
+                  const rs_index_type&  rs_idx) const BMNOEXCEPT;
  
  
      /*! Recalculate bitcount (deprecated)
      */
-    size_type recalc_count() { return count(); }
+    size_type recalc_count() BMNOEXCEPT { return count(); }
      
      /*!
          Disables count cache. (deprecated).
      */
-    void forget_count() {}
+    void forget_count() BMNOEXCEPT {}
      
      //@}
      
@@ -1818,14 +1444,14 @@ public:
         \param n - Index of the bit to check.
         \return Bit value (1 or 0)
      */
-    bool get_bit(size_type n) const;
+    bool get_bit(size_type n) const BMNOEXCEPT;
  
      /*!
         \brief returns true if bit n is set and false is bit n is 0. 
         \param n - Index of the bit to check.
         \return Bit value (1 or 0)
      */
-    bool test(size_type n) const { return get_bit(n); }
+    bool test(size_type n) const BMNOEXCEPT { return get_bit(n); }
      
      //@}
      
@@ -1874,12 +1500,12 @@ public:
         \brief Returns true if any bits in this bitset are set, and otherwise returns false.
         \return true if any bit is set
      */
-    bool any() const;
+    bool any() const BMNOEXCEPT;
  
      /*!
          \brief Returns true if no bits are set, otherwise returns false.
      */
-    bool none() const { return !any(); }
+    bool none() const BMNOEXCEPT { return !any(); }
      
      //@}
      // --------------------------------------------------------------------
@@ -1890,21 +1516,22 @@ public:
      /*!
         \fn bool bvector::find(bm::id_t& pos) const
         \brief Finds index of first 1 bit
-       \param pos - index of the found 1 bit
+       \param pos - [out] index of the found 1 bit
         \return true if search returned result
         \sa get_first, get_next, extract_next, find_reverse, find_first_mismatch
      */
-    bool find(size_type& pos) const;
+    bool find(size_type& pos) const BMNOEXCEPT;
  
      /*!
         \fn bool bvector::find(bm::id_t from, bm::id_t& pos) const
-       \brief Finds index of 1 bit starting from position
+       \brief Find index of 1 bit starting from position
         \param from - position to start search from
-       \param pos - index of the found 1 bit
+       \param pos - [out] index of the found 1 bit
         \return true if search returned result
         \sa get_first, get_next, extract_next, find_reverse, find_first_mismatch
      */
-    bool find(size_type from, size_type& pos) const;
+    bool find(size_type from, size_type& pos) const BMNOEXCEPT;
+
  
      /*!
         \fn bm::id_t bvector::get_first() const
@@ -1915,7 +1542,7 @@ public:
         \return Index of the first 1 bit, may return 0
         \sa get_next, find, extract_next, find_reverse
      */
-    size_type get_first() const { return check_or_next(0); }
+    size_type get_first() const BMNOEXCEPT { return check_or_next(0); }
  
      /*!
         \fn bm::id_t bvector::get_next(bm::id_t prev) const
@@ -1924,7 +1551,7 @@ public:
         \return Index of the next bit which is ON or 0 if not found.
         \sa get_first, find, extract_next, find_reverse
      */
-    size_type get_next(size_type prev) const
+    size_type get_next(size_type prev) const BMNOEXCEPT
                  { return (++prev == bm::id_max) ? 0 : check_or_next(prev); }
  
      /*!
@@ -1945,7 +1572,7 @@ public:
         \return true if search returned result
         \sa get_first, get_next, extract_next, find, find_first_mismatch
      */
-    bool find_reverse(size_type& pos) const;
+    bool find_reverse(size_type& pos) const BMNOEXCEPT;
      
      /*!
         \brief Finds dynamic range of bit-vector [first, last]
@@ -1954,7 +1581,7 @@ public:
         \return true if search returned result
         \sa get_first, get_next, extract_next, find, find_reverse
      */
-    bool find_range(size_type& first, size_type& last) const;
+    bool find_range(size_type& first, size_type& last) const BMNOEXCEPT;
      
      /*!
          \brief Find bit-vector position for the specified rank(bitcount)
@@ -1969,7 +1596,8 @@ public:
       
          \return true if requested rank was found
      */
-    bool find_rank(size_type rank, size_type from, size_type& pos) const;
+    bool find_rank(size_type rank, size_type from,
+                   size_type& pos) const BMNOEXCEPT;
  
      /*!
          \brief Find bit-vector position for the specified rank(bitcount)
@@ -1989,7 +1617,7 @@ public:
          \return true if requested rank was found
      */
      bool find_rank(size_type rank, size_type from, size_type& pos,
-                   const rs_index_type&  rs_idx) const;
+                   const rs_index_type&  rs_idx) const BMNOEXCEPT;
      
      /*!
          \brief select bit-vector position for the specified rank(bitcount)
@@ -2007,7 +1635,8 @@ public:
  
          \return true if requested rank was found
      */
-    bool select(size_type rank, size_type& pos, const rs_index_type&  rs_idx) const;
+    bool select(size_type rank, size_type& pos,
+                const rs_index_type&  rs_idx) const BMNOEXCEPT;
  
      //@}
  
@@ -2185,7 +1814,7 @@ public:
  
         @sa statistics
      */
-    void calc_stat(struct bm::bvector<Alloc>::statistics* st) const;
+    void calc_stat(struct bm::bvector<Alloc>::statistics* st) const BMNOEXCEPT;
  
      /*!
         \brief Sets new blocks allocation strategy.
@@ -2200,7 +1829,8 @@ public:
                   1 - Blocks mutation mode (adaptive algorithm)
         \sa set_new_blocks_strat
      */
-    strategy  get_new_blocks_strat() const { return new_blocks_strat_; }
+    strategy  get_new_blocks_strat() const BMNOEXCEPT 
+                             { return new_blocks_strat_; }
  
      /*!
         \brief Optimize memory bitvector's memory allocation.
@@ -2239,7 +1869,7 @@ public:
          Return true if bvector is initialized at all
          @internal
      */
-    bool is_init() const { return blockman_.is_init(); }
+    bool is_init() const BMNOEXCEPT { return blockman_.is_init(); }
      
      //@}
      
@@ -2258,13 +1888,13 @@ public:
          @return 0 if this == arg, -1 if this < arg, 1 if this > arg
          @sa find_first_mismatch
      */
-    int compare(const bvector<Alloc>& bvect) const;
+    int compare(const bvector<Alloc>& bvect) const BMNOEXCEPT;
  
      /*!
          \brief Equal comparison with an agr bit-vector
          @return true if vectors are identical
      */
-    bool equal(const bvector<Alloc>& bvect) const
+    bool equal(const bvector<Alloc>& bvect) const BMNOEXCEPT
      {
          size_type pos;
          bool found = find_first_mismatch(bvect, pos);
@@ -2285,7 +1915,7 @@ public:
      bool find_first_mismatch(const bvector<Alloc>& bvect,
                                          size_type& pos,
                                          size_type  search_to = bm::id_max
-                                        ) const;
+                                        ) const BMNOEXCEPT;
      
      //@}
  
@@ -2305,14 +1935,16 @@ public:
          Use only if you are BitMagic library
          @internal
      */
-    const blocks_manager_type& get_blocks_manager() const { return blockman_; }
+    const blocks_manager_type& get_blocks_manager() const BMNOEXCEPT 
+                                            { return blockman_; }
      
      /**
          \brief get access to memory manager (internal)
          Use only if you are BitMagic library
          @internal
      */
-    blocks_manager_type& get_blocks_manager() { return blockman_; }
+    blocks_manager_type& get_blocks_manager() BMNOEXCEPT 
+                                    { return blockman_; }
  
      //@}
      
@@ -2338,21 +1970,22 @@ protected:
  
  private:
  
-    size_type check_or_next(size_type prev) const;
+    size_type check_or_next(size_type prev) const BMNOEXCEPT;
      
-    /// set bit in GAP block withlength extension control
+    /// set bit in GAP block with GAP block length control
      bool gap_block_set(bm::gap_word_t* gap_blk,
                         bool val, block_idx_type nblock, unsigned nbit);
-    
+
+    /// set bit in GAP block with GAP block length control
+    void gap_block_set_no_ret(bm::gap_word_t* gap_blk,
+                              bool val, block_idx_type nblock,
+                              unsigned nbit);
+
      /// check if specified bit is 1, and set it to 0
      /// if specified bit is 0, scan for the next 1 and returns it
      /// if no 1 found returns 0
      size_type check_or_next_extract(size_type prev);
  
-    /**
-        \brief Set specified bit without checking preconditions (size, etc)
-    */
-    bool set_bit_no_check(size_type n, bool val);
  
      /**
          \brief AND specified bit without checking preconditions (size, etc)
@@ -2440,11 +2073,11 @@ private:
      size_type block_count_to(const bm::word_t* block,
                              block_idx_type nb,
                              unsigned nbit_right,
-                            const rs_index_type&  blocks_cnt);
+                            const rs_index_type&  blocks_cnt) BMNOEXCEPT;
      /**
          Return value of first bit in the block
      */
-    bool test_first_block_bit(block_idx_type nb) const;
+    bool test_first_block_bit(block_idx_type nb) const BMNOEXCEPT;
      
  private:
      blocks_manager_type  blockman_;         //!< bitblocks manager
@@ -2510,7 +2143,7 @@ void bvector<Alloc>::init()
  // -----------------------------------------------------------------------
  
  template<typename Alloc>
-void bvector<Alloc>::move_from(bvector<Alloc>& bvect) BMNOEXEPT
+void bvector<Alloc>::move_from(bvector<Alloc>& bvect) BMNOEXCEPT
  {
      if (this != &bvect)
      {
@@ -2572,7 +2205,7 @@ bvector<Alloc>& bvector<Alloc>::set_range(size_type left,
  // -----------------------------------------------------------------------
  
  template<typename Alloc> 
-typename bvector<Alloc>::size_type bvector<Alloc>::count() const
+typename bvector<Alloc>::size_type bvector<Alloc>::count() const BMNOEXCEPT
  {
      if (!blockman_.is_init())
          return 0;
@@ -2592,6 +2225,9 @@ typename bvector<Alloc>::size_type bvector<Alloc>::count() const
              if (!found)
                  break;
              blk_blk = blk_root[i];
+            BM_ASSERT(blk_blk);
+            if (!blk_blk)
+                break;
          }
          if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
          {
@@ -2619,7 +2255,7 @@ typename bvector<Alloc>::size_type bvector<Alloc>::count() const
  // -----------------------------------------------------------------------
  
  template<typename Alloc>
-bool bvector<Alloc>::any() const
+bool bvector<Alloc>::any() const BMNOEXCEPT
  {
      word_t*** blk_root = blockman_.top_blocks_root();
      if (!blk_root)
@@ -2780,7 +2416,7 @@ void bvector<Alloc>::build_rs_index(rs_index_type* rs_idx,
  
  template<typename Alloc>
  typename bvector<Alloc>::block_idx_type
-bvector<Alloc>::count_blocks(unsigned* arr) const
+bvector<Alloc>::count_blocks(unsigned* arr) const BMNOEXCEPT
  {
      bm::word_t*** blk_root = blockman_.top_blocks_root();
      if (blk_root == 0)
@@ -2797,7 +2433,7 @@ typename bvector<Alloc>::size_type
  bvector<Alloc>::block_count_to(const bm::word_t*    block,
                                 block_idx_type       nb,
                                 unsigned             nbit_right,
-                               const rs_index_type& rs_idx)
+                               const rs_index_type& rs_idx) BMNOEXCEPT
  {
      size_type c;
      unsigned sub_range = rs_idx.find_sub_range(nbit_right);
@@ -2909,7 +2545,7 @@ bvector<Alloc>::block_count_to(const bm::word_t*    block,
  template<typename Alloc>
  typename bvector<Alloc>::size_type 
  bvector<Alloc>::count_to(size_type right,
-                         const rs_index_type&  rs_idx) const
+                         const rs_index_type&  rs_idx) const BMNOEXCEPT
  {
      BM_ASSERT(right < bm::id_max);
      if (!blockman_.is_init())
@@ -2963,7 +2599,7 @@ bvector<Alloc>::count_to(size_type right,
  template<typename Alloc>
  typename bvector<Alloc>::size_type 
  bvector<Alloc>::count_to_test(size_type right,
-                              const rs_index_type&  blocks_cnt) const
+                              const rs_index_type&  rs_idx) const BMNOEXCEPT
  {
      BM_ASSERT(right < bm::id_max);
      if (!blockman_.is_init())
@@ -2972,15 +2608,13 @@ bvector<Alloc>::count_to_test(size_type right,
      unsigned nblock_right = unsigned(right >> bm::set_block_shift);
      unsigned nbit_right = unsigned(right & bm::set_block_mask);
  
-    // running count of all blocks before target
-    //
-    size_type cnt = 0;
      unsigned i, j;
      bm::get_block_coord(nblock_right, i, j);
      const bm::word_t* block = blockman_.get_block_ptr(i, j);
  
+    size_type cnt = 0;
      if (!block)
-        return 0;
+        return cnt;
  
      bool gap = BM_IS_GAP(block);
      if (gap)
@@ -2989,7 +2623,7 @@ bvector<Alloc>::count_to_test(size_type right,
          if (bm::gap_test_unr(gap_blk, (gap_word_t)nbit_right))
              cnt = bm::gap_bit_count_to(gap_blk, (gap_word_t)nbit_right);
          else
-            return 0;
+            return cnt;
      }
      else
      {
@@ -3004,14 +2638,16 @@ bvector<Alloc>::count_to_test(size_type right,
              w &= (1u << (nbit_right & bm::set_word_mask));
              if (w)
              {
-                cnt = block_count_to(block, nblock_right, nbit_right, blocks_cnt);
+                cnt = block_count_to(block, nblock_right, nbit_right, rs_idx);
                  BM_ASSERT(cnt == bm::bit_block_calc_count_to(block, nbit_right));
              }
              else
-                return 0;
+            {
+                return cnt;
+            }
          }
      }
-    cnt += nblock_right ? blocks_cnt.rcount(nblock_right - 1) : 0;
+    cnt += nblock_right ? rs_idx.rcount(nblock_right - 1) : 0;
      return cnt;
  }
  
@@ -3019,22 +2655,67 @@ bvector<Alloc>::count_to_test(size_type right,
  
  template<typename Alloc>
  typename bvector<Alloc>::size_type
-bvector<Alloc>::count_range(size_type left, size_type right) const
+bvector<Alloc>::rank_corrected(size_type right,
+                               const rs_index_type&  rs_idx) const BMNOEXCEPT
+{
+  BM_ASSERT(right < bm::id_max);
+  if (!blockman_.is_init())
+      return 0;
+
+  unsigned nblock_right = unsigned(right >> bm::set_block_shift);
+  unsigned nbit_right = unsigned(right & bm::set_block_mask);
+
+  size_type cnt = nblock_right ? rs_idx.rcount(nblock_right - 1) : 0;
+
+  unsigned i, j;
+  bm::get_block_coord(nblock_right, i, j);
+  const bm::word_t* block = blockman_.get_block_ptr(i, j);
+
+  if (!block)
+      return cnt;
+
+  bool gap = BM_IS_GAP(block);
+  if (gap)
+  {
+      cnt += bm::gap_bit_count_to(BMGAP_PTR(block), (gap_word_t)nbit_right,
+                                  true /* rank corrected */);
+  }
+  else
+  {
+      if (block == FULL_BLOCK_FAKE_ADDR)
+          cnt += nbit_right;
+      else
+      {
+          cnt += block_count_to(block, nblock_right, nbit_right, rs_idx);
+          unsigned w = block[nbit_right >> bm::set_word_shift] &
+                       (1u << (nbit_right & bm::set_word_mask));
+          cnt -= bool(w); // rank correction
+      }
+  }
+  return cnt;
+}
+
+
+// -----------------------------------------------------------------------
+
+template<typename Alloc>
+typename bvector<Alloc>::size_type
+bvector<Alloc>::count_range(size_type left, size_type right) const BMNOEXCEPT
  {
      BM_ASSERT(left < bm::id_max && right < bm::id_max);
-    BM_ASSERT(left <= right);
+    if (left > right)
+        bm::xor_swap(left, right);
+    if (right == bm::id_max)
+        --right;
  
-    BM_ASSERT_THROW(right < bm::id_max, BM_ERR_RANGE);
-    BM_ASSERT_THROW(left <= right, BM_ERR_RANGE);
-    
      if (!blockman_.is_init())
          return 0;
  
      size_type cnt = 0;
  
      // calculate logical number of start and destination blocks
-    unsigned nblock_left  = unsigned(left  >>  bm::set_block_shift);
-    unsigned nblock_right = unsigned(right >>  bm::set_block_shift);
+    block_idx_type nblock_left  = (left  >>  bm::set_block_shift);
+    block_idx_type nblock_right = (right >>  bm::set_block_shift);
  
      unsigned i0, j0;
      bm::get_block_coord(nblock_left, i0, j0);
@@ -3076,13 +2757,15 @@ bvector<Alloc>::count_range(size_type left, size_type right) const
      {
          return cnt;
      }
-    
+
+    // process all full mid-blocks
      {
          func.reset();
          word_t*** blk_root = blockman_.top_blocks_root();
-        unsigned top_blocks_size = blockman_.top_block_size();
+        block_idx_type top_blocks_size = blockman_.top_block_size();
          
-        bm::for_each_nzblock_range(blk_root, top_blocks_size, nblock_left+1, nblock_right-1, func);
+        bm::for_each_nzblock_range(blk_root, top_blocks_size,
+                                   nblock_left+1, nblock_right-1, func);
          cnt += func.count();
      }
      
@@ -3098,27 +2781,205 @@ bvector<Alloc>::count_range(size_type left, size_type right) const
                                            (gap_word_t)0,
                                            (gap_word_t)nbit_right);
          }
-        else
+        else
+        {
+            cnt += bm::bit_block_calc_count_range(block, 0, nbit_right);
+        }
+    }
+    return cnt;
+}
+
+// -----------------------------------------------------------------------
+
+template<typename Alloc>
+bool bvector<Alloc>::is_all_one_range(size_type left,
+                                      size_type right) const BMNOEXCEPT
+{
+    if (!blockman_.is_init())
+        return false; // nothing to do
+
+    if (right < left)
+        bm::xor_swap(left, right);
+    if (right == bm::id_max)
+        --right;
+    if (left == right)
+        return test(left);
+
+    BM_ASSERT(left < bm::id_max && right < bm::id_max);
+
+    block_idx_type nblock_left  = (left  >> bm::set_block_shift);
+    block_idx_type nblock_right = (right >> bm::set_block_shift);
+
+    unsigned i0, j0;
+    bm::get_block_coord(nblock_left, i0, j0);
+    const bm::word_t* block = blockman_.get_block(i0, j0);
+
+    if (nblock_left == nblock_right) // hit in the same block
+    {
+        unsigned nbit_left  = unsigned(left  & bm::set_block_mask);
+        unsigned nbit_right = unsigned(right & bm::set_block_mask);
+        return bm::block_is_all_one_range(block, nbit_left, nbit_right);
+    }
+
+    // process entry point block
+    {
+        unsigned nbit_left  = unsigned(left  & bm::set_block_mask);
+        bool all_one = bm::block_is_all_one_range(block,
+                                            nbit_left, (bm::gap_max_bits-1));
+        if (!all_one)
+            return all_one;
+        ++nblock_left;
+    }
+
+    // process tail block
+    {
+        bm::get_block_coord(nblock_right, i0, j0);
+        block = blockman_.get_block(i0, j0);
+        unsigned nbit_right  = unsigned(right  & bm::set_block_mask);
+        bool all_one = bm::block_is_all_one_range(block, 0, nbit_right);
+        if (!all_one)
+            return all_one;
+        --nblock_right;
+    }
+
+    // check all blocks in the middle
+    //
+    if (nblock_left <= nblock_right)
+    {
+        unsigned i_from, j_from, i_to, j_to;
+        bm::get_block_coord(nblock_left, i_from, j_from);
+        bm::get_block_coord(nblock_right, i_to, j_to);
+
+        bm::word_t*** blk_root = blockman_.top_blocks_root();
+
+        for (unsigned i = i_from; i <= i_to; ++i)
+        {
+            bm::word_t** blk_blk = blk_root[i];
+            if (!blk_blk)
+                return false;
+            if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+                continue;
+
+            unsigned j = (i == i_from) ? j_from : 0;
+            unsigned j_limit = (i == i_to) ? j_to+1 : bm::set_sub_array_size;
+            do
+            {
+                bool all_one = bm::check_block_one(blk_blk[j], true);
+                if (!all_one)
+                    return all_one;
+            } while (++j < j_limit);
+        } // for i
+    }
+    return true;
+}
+
+// -----------------------------------------------------------------------
+
+template<typename Alloc>
+bool bvector<Alloc>::any_range(size_type left, size_type right) const BMNOEXCEPT
+{
+    BM_ASSERT(left < bm::id_max && right < bm::id_max);
+
+    if (!blockman_.is_init())
+        return false; // nothing to do
+
+    if (right < left)
+        bm::xor_swap(left, right);
+    if (right == bm::id_max)
+        --right;
+    if (left == right)
+        return test(left);
+
+    block_idx_type nblock_left  = (left  >> bm::set_block_shift);
+    block_idx_type nblock_right = (right >> bm::set_block_shift);
+
+    unsigned i0, j0;
+    bm::get_block_coord(nblock_left, i0, j0);
+    const bm::word_t* block = blockman_.get_block(i0, j0);
+
+    if (nblock_left == nblock_right) // hit in the same block
+    {
+        unsigned nbit_left  = unsigned(left  & bm::set_block_mask);
+        unsigned nbit_right = unsigned(right & bm::set_block_mask);
+        return bm::block_any_range(block, nbit_left, nbit_right);
+    }
+
+    // process entry point block
+    {
+        unsigned nbit_left  = unsigned(left  & bm::set_block_mask);
+        bool any_one = bm::block_any_range(block,
+                                           nbit_left, (bm::gap_max_bits-1));
+        if (any_one)
+            return any_one;
+        ++nblock_left;
+    }
+
+    // process tail block
+    {
+        bm::get_block_coord(nblock_right, i0, j0);
+        block = blockman_.get_block(i0, j0);
+        unsigned nbit_right  = unsigned(right  & bm::set_block_mask);
+        bool any_one = bm::block_any_range(block, 0, nbit_right);
+        if (any_one)
+            return any_one;
+        --nblock_right;
+    }
+
+    // check all blocks in the middle
+    //
+    if (nblock_left <= nblock_right)
+    {
+        unsigned i_from, j_from, i_to, j_to;
+        bm::get_block_coord(nblock_left, i_from, j_from);
+        bm::get_block_coord(nblock_right, i_to, j_to);
+
+        bm::word_t*** blk_root = blockman_.top_blocks_root();
+        {
+            block_idx_type top_size = blockman_.top_block_size();
+            if (i_from >= top_size)
+                return false;
+            if (i_to >= top_size)
+            {
+                i_to = unsigned(top_size-1);
+                j_to = bm::set_sub_array_size-1;
+            }
+        }
+
+        for (unsigned i = i_from; i <= i_to; ++i)
          {
-            cnt += bm::bit_block_calc_count_range(block, 0, nbit_right);
-        }
+            bm::word_t** blk_blk = blk_root[i];
+            if (!blk_blk)
+                continue;
+            if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+                return true;
+
+            unsigned j = (i == i_from) ? j_from : 0;
+            unsigned j_limit = (i == i_to) ? j_to+1 : bm::set_sub_array_size;
+            do
+            {
+                bool any_one = bm::block_any(blk_blk[j]);
+                if (any_one)
+                    return any_one;
+            } while (++j < j_limit);
+        } // for i
      }
-    return cnt;
+    return false;
  }
  
-
  // -----------------------------------------------------------------------
  
  template<typename Alloc>
  typename bvector<Alloc>::size_type
  bvector<Alloc>::count_range(size_type left,
                              size_type right,
-                            const rs_index_type&  rs_idx) const
+                            const rs_index_type&  rs_idx) const BMNOEXCEPT
  {
      BM_ASSERT(left <= right);
  
+    if (left > right)
+        bm::xor_swap(left, right);
+
      BM_ASSERT_THROW(right < bm::id_max, BM_ERR_RANGE);
-    BM_ASSERT_THROW(left <= right, BM_ERR_RANGE);
  
      if (left == right)
          return this->test(left);
@@ -3187,7 +3048,7 @@ bvector<Alloc>& bvector<Alloc>::invert()
  // -----------------------------------------------------------------------
  
  template<typename Alloc> 
-bool bvector<Alloc>::get_bit(size_type n) const
+bool bvector<Alloc>::get_bit(size_type n) const BMNOEXCEPT
  {    
      BM_ASSERT(n < size_);
      BM_ASSERT_THROW((n < size_), BM_ERR_RANGE);
@@ -3309,7 +3170,7 @@ void bvector<Alloc>::set_gap_levels(const gap_word_t* glevel_len)
  // -----------------------------------------------------------------------
  
  template<typename Alloc> 
-int bvector<Alloc>::compare(const bvector<Alloc>& bv) const
+int bvector<Alloc>::compare(const bvector<Alloc>& bv) const BMNOEXCEPT
  {
      int res;
      unsigned top_blocks = blockman_.top_block_size();
@@ -3429,7 +3290,7 @@ int bvector<Alloc>::compare(const bvector<Alloc>& bv) const
  template<typename Alloc>
  bool bvector<Alloc>::find_first_mismatch(
                          const bvector<Alloc>& bvect, size_type& pos,
-                        size_type search_to) const
+                        size_type search_to) const BMNOEXCEPT
  {
      unsigned top_blocks = blockman_.top_block_size();
      bm::word_t*** top_root = blockman_.top_blocks_root();
@@ -3531,7 +3392,7 @@ bool bvector<Alloc>::find_first_mismatch(
  // -----------------------------------------------------------------------
  
  template<typename Alloc>
-void bvector<Alloc>::swap(bvector<Alloc>& bvect) BMNOEXEPT
+void bvector<Alloc>::swap(bvector<Alloc>& bvect) BMNOEXCEPT
  {
      if (this != &bvect)
      {
@@ -3543,7 +3404,8 @@ void bvector<Alloc>::swap(bvector<Alloc>& bvect) BMNOEXEPT
  // -----------------------------------------------------------------------
  
  template<typename Alloc> 
-void bvector<Alloc>::calc_stat(struct bvector<Alloc>::statistics* st) const
+void bvector<Alloc>::calc_stat(
+                   struct bvector<Alloc>::statistics* st) const BMNOEXCEPT
  {
      BM_ASSERT(st);
      
@@ -3572,6 +3434,9 @@ void bvector<Alloc>::calc_stat(struct bvector<Alloc>::statistics* st) const
                  if (!found)
                      break;
                  blk_blk = blk_root[i];
+                BM_ASSERT(blk_blk);
+                if (!blk_blk)
+                    break;
              }
              if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
                  continue;
@@ -3636,8 +3501,7 @@ void bvector<Alloc>::set_bit_no_check(size_type n)
  
      if (block_type) // gap block
      {
-        bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
-        gap_block_set(gap_blk, val, nblock, nbit);
+        this->gap_block_set_no_ret(BMGAP_PTR(blk), val, nblock, nbit);
      }
      else  // bit block
      {
@@ -3798,7 +3662,10 @@ void bvector<Alloc>::import(const size_type* ids, size_type size_in,
          block_idx_type nblock_end = (ids[size_in-1] >> bm::set_block_shift);
          if (nblock == nblock_end) // special case: one block import
          {
-            import_block(ids, nblock, 0, stop);
+            if (stop == 1)
+                set_bit_no_check(ids[0]);
+            else
+                import_block(ids, nblock, 0, stop);
              return;
          }
      }
@@ -3817,7 +3684,11 @@ void bvector<Alloc>::import(const size_type* ids, size_type size_in,
              stop = bm::idx_arr_block_lookup_u32(ids, size_in, nblock, start);
          #endif
          BM_ASSERT(start < stop);
-        import_block(ids, nblock, start, stop);
+
+        if (stop - start == 1 && n < bm::id_max) // just one bit to set
+            set_bit_no_check(n);
+        else
+            import_block(ids, nblock, start, stop);
          start = stop;
      } while (start < size_in);
  }
@@ -3826,17 +3697,22 @@ void bvector<Alloc>::import(const size_type* ids, size_type size_in,
  
  template<class Alloc>
  void bvector<Alloc>::import_block(const size_type* ids,
-                                  block_idx_type nblock,
-                                  size_type start, size_type stop)
+                                  block_idx_type   nblock,
+                                  size_type        start,
+                                  size_type        stop)
  {
+    BM_ASSERT(stop > start);
      int block_type;
      bm::word_t* blk =
-        blockman_.check_allocate_block(nblock, 1, 0, &block_type, true/*allow NULL ret*/);
+        blockman_.check_allocate_block(nblock, 1, 0, &block_type,
+                                       true/*allow NULL ret*/);
      if (!IS_FULL_BLOCK(blk))
      {
+        // TODO: add a special case when we import just a few bits per block
          if (BM_IS_GAP(blk))
+        {
              blk = blockman_.deoptimize_block(nblock); // TODO: try to avoid
-
+        }
          #ifdef BM64ADDR
              bm::set_block_bits_u64(blk, ids, start, stop);
          #else
@@ -3867,62 +3743,71 @@ bool bvector<Alloc>::set_bit_no_check(size_type n, bool val)
          return false;
  
      // calculate word number in block and bit
-    unsigned nbit   = unsigned(n & bm::set_block_mask); 
-
+    unsigned nbit   = unsigned(n & bm::set_block_mask);
      if (block_type) // gap
      {
-        bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
-        unsigned is_set = gap_block_set(gap_blk, val, nblock, nbit);
-        return is_set;
+        return gap_block_set(BMGAP_PTR(blk), val, nblock, nbit);
      }
      else  // bit block
      {
          unsigned nword  = unsigned(nbit >> bm::set_word_shift); 
          nbit &= bm::set_word_mask;
-
          bm::word_t* word = blk + nword;
          bm::word_t  mask = (((bm::word_t)1) << nbit);
  
          if (val)
          {
-            if ( ((*word) & mask) == 0 )
-            {
-                *word |= mask; // set bit
-                return true;
-            }
+            val = ~(*word & mask);
+            *word |= mask; // set bit
+            return val;
          }
          else
          {
-            if ((*word) & mask)
-            {
-                *word &= ~mask; // clear bit
-                return true;
-            }
+            val = ~(*word & mask);
+            *word &= ~mask; // clear bit
+            return val;
          }
      }
-    return false;
+    //return false;
  }
  
  // -----------------------------------------------------------------------
  
  template<class Alloc>
  bool bvector<Alloc>::gap_block_set(bm::gap_word_t* gap_blk,
-                                   bool val, block_idx_type nblock, unsigned nbit)
+                                   bool val, block_idx_type nblock,
+                                   unsigned nbit)
  {
-    unsigned is_set, new_block_len;
-    new_block_len =
-        bm::gap_set_value(val, gap_blk, nbit, &is_set);
-    if (is_set)
+    unsigned is_set, new_len, old_len;
+    old_len = bm::gap_length(gap_blk)-1;
+    new_len = bm::gap_set_value(val, gap_blk, nbit, &is_set);
+    if (old_len < new_len)
      {
          unsigned threshold = bm::gap_limit(gap_blk, blockman_.glen());
-        if (new_block_len > threshold)
-        {
+        if (new_len > threshold)
              blockman_.extend_gap_block(nblock, gap_blk);
-        }
      }
      return is_set;
  }
  
+// -----------------------------------------------------------------------
+
+template<class Alloc>
+void bvector<Alloc>::gap_block_set_no_ret(bm::gap_word_t* gap_blk,
+                        bool val, block_idx_type nblock, unsigned nbit)
+{
+    unsigned new_len, old_len;
+    old_len = bm::gap_length(gap_blk)-1;
+    new_len = bm::gap_set_value(val, gap_blk, nbit);
+    if (old_len < new_len)
+    {
+        unsigned threshold = bm::gap_limit(gap_blk, blockman_.glen());
+        if (new_len > threshold)
+            blockman_.extend_gap_block(nblock, gap_blk);
+    }
+}
+
+
  // -----------------------------------------------------------------------
  
  template<class Alloc>
@@ -4089,11 +3974,11 @@ bool bvector<Alloc>::and_bit_no_check(size_type n, bool val)
  //---------------------------------------------------------------------
  
  template<class Alloc>
-bool bvector<Alloc>::find(size_type from, size_type& pos) const
+bool bvector<Alloc>::find(size_type from, size_type& pos) const BMNOEXCEPT
  {
-    BM_ASSERT_THROW(from < bm::id_max, BM_ERR_RANGE);
-
-    if (from == 0)
+    if (from == bm::id_max)
+        return false;
+    if (!from)
      {
          return find(pos);
      }
@@ -4104,7 +3989,7 @@ bool bvector<Alloc>::find(size_type from, size_type& pos) const
  //---------------------------------------------------------------------
  
  template<class Alloc>
-bool bvector<Alloc>::find_reverse(size_type& pos) const
+bool bvector<Alloc>::find_reverse(size_type& pos) const BMNOEXCEPT
  {
      bool found;
      
@@ -4138,7 +4023,9 @@ bool bvector<Alloc>::find_reverse(size_type& pos) const
                      }
                      if (found)
                      {
-                        block_idx_type base_idx = block_idx_type(i) * bm::set_sub_array_size * bm::gap_max_bits;
+                        block_idx_type base_idx =
+                            block_idx_type(i) * bm::set_sub_array_size *
+                            bm::gap_max_bits;
                          base_idx += j * bm::gap_max_bits;
                          pos = base_idx + block_pos;
                          return found;
@@ -4159,7 +4046,7 @@ bool bvector<Alloc>::find_reverse(size_type& pos) const
  //---------------------------------------------------------------------
  
  template<class Alloc>
-bool bvector<Alloc>::find(size_type& pos) const
+bool bvector<Alloc>::find(size_type& pos) const BMNOEXCEPT
  {
      bool found;
      
@@ -4205,7 +4092,8 @@ bool bvector<Alloc>::find(size_type& pos) const
  //---------------------------------------------------------------------
  
  template<class Alloc>
-bool bvector<Alloc>::find_range(size_type& in_first, size_type& in_last) const
+bool bvector<Alloc>::find_range(size_type& in_first,
+                                size_type& in_last) const BMNOEXCEPT
  {
      bool found = find(in_first);
      if (found)
@@ -4226,7 +4114,7 @@ bool bvector<Alloc>::find_range(size_type& in_first, size_type& in_last) const
  template<class Alloc>
  bool bvector<Alloc>::find_rank(size_type  rank_in, 
                                 size_type  from, 
-                               size_type& pos) const
+                               size_type& pos) const BMNOEXCEPT
  {
      BM_ASSERT_THROW(from < bm::id_max, BM_ERR_RANGE);
  
@@ -4280,7 +4168,7 @@ template<class Alloc>
  bool bvector<Alloc>::find_rank(size_type             rank_in, 
                                 size_type             from, 
                                 size_type&            pos,
-                               const rs_index_type&  rs_idx) const
+                               const rs_index_type&  rs_idx) const BMNOEXCEPT
  {
      BM_ASSERT_THROW(from < bm::id_max, BM_ERR_RANGE);
  
@@ -4349,7 +4237,7 @@ bool bvector<Alloc>::find_rank(size_type             rank_in,
  
  template<class Alloc>
  bool bvector<Alloc>::select(size_type rank_in, size_type& pos,
-                            const rs_index_type&  rs_idx) const
+                            const rs_index_type&  rs_idx) const BMNOEXCEPT
  {
      bool ret = false;
      
@@ -4385,7 +4273,7 @@ bool bvector<Alloc>::select(size_type rank_in, size_type& pos,
  
  template<class Alloc> 
  typename bvector<Alloc>::size_type 
-bvector<Alloc>::check_or_next(size_type prev) const
+bvector<Alloc>::check_or_next(size_type prev) const BMNOEXCEPT
  {
      if (!blockman_.is_init())
          return 0;
@@ -4836,7 +4724,7 @@ void bvector<Alloc>::erase(size_type n)
  //---------------------------------------------------------------------
  
  template<class Alloc>
-bool bvector<Alloc>::test_first_block_bit(block_idx_type nb) const
+bool bvector<Alloc>::test_first_block_bit(block_idx_type nb) const BMNOEXCEPT
  {
      if (nb >= bm::set_total_blocks) // last possible block
          return false;
@@ -6529,7 +6417,10 @@ bvector<Alloc>::combine_operation_with_block(block_idx_type    nb,
                  BM_ASSERT(gfunc);
                  (*gfunc)(blk, BMGAP_PTR(arg_blk));
  
-                blockman_.optimize_bit_block(nb);
+                // TODO: commented out optimization, because it can be very slow
+                // need to take into account previous operation not to make
+                // fruitless attempts here
+                //blockman_.optimize_bit_block(nb);
                  return;
              }
              
@@ -6860,7 +6751,468 @@ void bvector<Alloc>::throw_bad_alloc()
  }
  
  //---------------------------------------------------------------------
+//
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::go_up() BMNOEXCEPT
+{
+    BM_ASSERT(this->valid());
+
+    block_descr_type* bdescr = &(this->bdescr_);
+    if (this->block_type_) // GAP
+    {
+        BM_ASSERT(this->block_type_ == 1);
+        ++this->position_;
+        if (--(bdescr->gap_.gap_len))
+          return true;
+        // next gap is "OFF" by definition.
+        if (*(bdescr->gap_.ptr) != bm::gap_max_bits - 1)
+        {
+            gap_word_t prev = *(bdescr->gap_.ptr);
+            unsigned val = *(++(bdescr->gap_.ptr));
+            this->position_ += val - prev;
+            // next gap is now "ON"
+            if (*(bdescr->gap_.ptr) != bm::gap_max_bits - 1)
+            {
+                prev = *(bdescr->gap_.ptr);
+                val = *(++(bdescr->gap_.ptr));
+                bdescr->gap_.gap_len = (gap_word_t)(val - prev);
+                return true;  // next "ON" found;
+            }
+        }
+    }
+    else // BIT
+    {
+        unsigned short idx = ++(bdescr->bit_.idx);
+        if (idx < bdescr->bit_.cnt)
+        {
+            this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx];
+            return true;
+        }
+        this->position_ +=
+            (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx];
+        bdescr->bit_.ptr += bm::set_bitscan_wave_size;
+        if (decode_bit_group(bdescr))
+          return true;
+    }
+
+    if (search_in_blocks())
+      return true;
+
+    this->invalidate();
+    return false;
+}
+
+//---------------------------------------------------------------------
+
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::skip(size_type rank) BMNOEXCEPT
+{
+    if (!this->valid())
+        return false;
+    if (!rank)
+      return this->valid(); // nothing to do
+
+    for (; rank; --rank)
+    {
+          block_descr_type* bdescr = &(this->bdescr_);
+          switch (this->block_type_)
+          {
+          case 0:   //  BitBlock
+              for (; rank; --rank)
+              {
+                  unsigned short idx = ++(bdescr->bit_.idx);
+                  if (idx < bdescr->bit_.cnt)
+                  {
+                      this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx];
+                      continue;
+                  }
+                  this->position_ +=
+                      (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx];
+                  bdescr->bit_.ptr += bm::set_bitscan_wave_size;
+
+                  if (!decode_bit_group(bdescr, rank))
+                      break;
+              } // for rank
+              break;
+          case 1:   // DGAP Block
+              for (; rank; --rank) // TODO: better skip logic
+              {
+                  ++this->position_;
+                  if (--(bdescr->gap_.gap_len))
+                      continue;
+
+                  // next gap is "OFF" by definition.
+                  if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
+                      break;
+                  gap_word_t prev = *(bdescr->gap_.ptr);
+                  unsigned int val = *(++(bdescr->gap_.ptr));
+
+                  this->position_ += val - prev;
+                  // next gap is now "ON"
+                  if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
+                      break;
+                  prev = *(bdescr->gap_.ptr);
+                  val = *(++(bdescr->gap_.ptr));
+                  bdescr->gap_.gap_len = (gap_word_t)(val - prev);
+              } // for rank
+              break;
+          default:
+              BM_ASSERT(0);
+          } // switch
+
+          if (!rank)
+              return true;
+
+          if (!search_in_blocks())
+          {
+              this->invalidate();
+              return false;
+          }
+    } // for rank
+
+    return this->valid();
+}
+
+
+//---------------------------------------------------------------------
+
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::go_to(size_type pos) BMNOEXCEPT
+{
+    if (pos == 0)
+    {
+        go_first();
+        return this->valid();
+    }
+
+    size_type new_pos = this->bv_->check_or_next(pos); // find the true pos
+    if (!new_pos) // no bits available
+    {
+        this->invalidate();
+        return false;
+    }
+    BM_ASSERT(new_pos >= pos);
+    pos = new_pos;
+
+
+    this->position_ = pos;
+    size_type nb = this->block_idx_ = (pos >> bm::set_block_shift);
+    bm::bvector<Alloc>::blocks_manager_type& bman =
+                                       this->bv_->get_blocks_manager();
+    unsigned i0, j0;
+    bm::get_block_coord(nb, i0, j0);
+    this->block_ = bman.get_block(i0, j0);
+
+    BM_ASSERT(this->block_);
+
+    this->block_type_ = (bool)BM_IS_GAP(this->block_);
+
+    block_descr_type* bdescr = &(this->bdescr_);
+    unsigned nbit = unsigned(pos & bm::set_block_mask);
+
+    if (this->block_type_) // gap
+    {
+        this->position_ = nb * bm::set_block_size * 32;
+        search_in_gapblock();
+
+        if (this->position_ == pos)
+          return this->valid();
+        this->position_ = pos;
+
+        gap_word_t* gptr = BMGAP_PTR(this->block_);
+        unsigned is_set;
+        unsigned gpos = bm::gap_bfind(gptr, nbit, &is_set);
+        BM_ASSERT(is_set);
+
+        bdescr->gap_.ptr = gptr + gpos;
+        if (gpos == 1)
+        {
+            bdescr->gap_.gap_len = bm::gap_word_t(gptr[gpos] - (nbit - 1));
+        }
+        else
+        {
+            bm::gap_word_t interval = bm::gap_word_t(gptr[gpos] - gptr[gpos - 1]);
+            bm::gap_word_t interval2 = bm::gap_word_t(nbit - gptr[gpos - 1]);
+            bdescr->gap_.gap_len = bm::gap_word_t(interval - interval2 + 1);
+        }
+    }
+    else // bit
+    {
+        if (nbit == 0)
+        {
+            search_in_bitblock();
+            return this->valid();
+        }
+
+        unsigned nword  = unsigned(nbit >> bm::set_word_shift);
+
+        // check if we need to step back to match the wave
+        unsigned parity = nword % bm::set_bitscan_wave_size;
+        bdescr->bit_.ptr = this->block_ + (nword - parity);
+        bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits);
+        BM_ASSERT(bdescr->bit_.cnt);
+        bdescr->bit_.pos = (nb * bm::set_block_size * 32) + ((nword - parity) * 32);
+        bdescr->bit_.idx = 0;
+        nbit &= bm::set_word_mask;
+        nbit += 32 * parity;
+        for (unsigned i = 0; i < bdescr->bit_.cnt; ++i)
+        {
+            if (bdescr->bit_.bits[i] == nbit)
+              return this->valid();
+            bdescr->bit_.idx++;
+        } // for
+        BM_ASSERT(0);
+    }
+    return this->valid();
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+void bvector<Alloc>::enumerator::go_first() BMNOEXCEPT
+{
+    BM_ASSERT(this->bv_);
+
+    blocks_manager_type* bman = &(this->bv_->blockman_);
+    if (!bman->is_init())
+    {
+        this->invalidate();
+        return;
+    }
+
+    bm::word_t*** blk_root = bman->top_blocks_root();
+    this->block_idx_ = this->position_= 0;
+    unsigned i, j;
+
+    for (i = 0; i < bman->top_block_size(); ++i)
+    {
+        bm::word_t** blk_blk = blk_root[i];
+        if (blk_blk == 0) // not allocated
+        {
+          this->block_idx_ += bm::set_sub_array_size;
+          this->position_ += bm::bits_in_array;
+          continue;
+        }
+
+        if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+          blk_blk = FULL_SUB_BLOCK_REAL_ADDR;
+
+        for (j = 0; j < bm::set_sub_array_size; ++j,++(this->block_idx_))
+        {
+            this->block_ = blk_blk[j];
+            if (this->block_ == 0)
+            {
+                this->position_ += bits_in_block;
+                continue;
+            }
+            if (BM_IS_GAP(this->block_))
+            {
+                this->block_type_ = 1;
+                if (search_in_gapblock())
+                    return;
+            }
+            else
+            {
+                if (this->block_ == FULL_BLOCK_FAKE_ADDR)
+                  this->block_ = FULL_BLOCK_REAL_ADDR;
+                this->block_type_ = 0;
+                if (search_in_bitblock())
+                    return;
+            }
+        } // for j
+    } // for i
+
+    this->invalidate();
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool
+bvector<Alloc>::enumerator::decode_wave(block_descr_type* bdescr) BMNOEXCEPT
+{
+    bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits);
+    if (bdescr->bit_.cnt) // found
+    {
+        bdescr->bit_.idx = 0;
+        return true;
+    }
+    return false;
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool
+bvector<Alloc>::enumerator::decode_bit_group(block_descr_type* bdescr) BMNOEXCEPT
+{
+    const word_t* block_end = this->block_ + bm::set_block_size;
+    for (; bdescr->bit_.ptr < block_end;)
+    {
+        if (decode_wave(bdescr))
+        {
+            bdescr->bit_.pos = this->position_;
+            this->position_ += bdescr->bit_.bits[0];
+            return true;
+        }
+        this->position_ += bm::set_bitscan_wave_size * 32; // wave size
+        bdescr->bit_.ptr += bm::set_bitscan_wave_size;
+    } // for
+    return false;
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool
+bvector<Alloc>::enumerator::decode_bit_group(block_descr_type* bdescr,
+                                             size_type& rank) BMNOEXCEPT
+{
+    const word_t* block_end = this->block_ + bm::set_block_size;
+    for (; bdescr->bit_.ptr < block_end;)
+    {
+        const bm::id64_t* w64_p = (bm::id64_t*)bdescr->bit_.ptr;
+        BM_ASSERT(bm::set_bitscan_wave_size == 4); // TODO: better handle this
+
+        unsigned cnt = bm::word_bitcount64(w64_p[0]);
+        cnt += bm::word_bitcount64(w64_p[1]);
+        if (rank > cnt)
+        {
+            rank -= cnt;
+        }
+        else
+        {
+            if (decode_wave(bdescr))
+            {
+                bdescr->bit_.pos = this->position_;
+                this->position_ += bdescr->bit_.bits[0];
+                return true;
+            }
+        }
+        this->position_ += bm::set_bitscan_wave_size * 32; // wave size
+        bdescr->bit_.ptr += bm::set_bitscan_wave_size;
+    } // for
+    return false;
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::search_in_bitblock() BMNOEXCEPT
+{
+    BM_ASSERT(this->block_type_ == 0);
+
+    block_descr_type* bdescr = &(this->bdescr_);
+    bdescr->bit_.ptr = this->block_;
+    return decode_bit_group(bdescr);
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::search_in_gapblock() BMNOEXCEPT
+{
+    BM_ASSERT(this->block_type_ == 1);
+
+    block_descr_type* bdescr = &(this->bdescr_);
+    bdescr->gap_.ptr = BMGAP_PTR(this->block_);
+    unsigned bitval = *(bdescr->gap_.ptr) & 1;
+
+    ++(bdescr->gap_.ptr);
+
+    for (;true;)
+    {
+        unsigned val = *(bdescr->gap_.ptr);
+        if (bitval)
+        {
+            gap_word_t* first = BMGAP_PTR(this->block_) + 1;
+            if (bdescr->gap_.ptr == first)
+            {
+                bdescr->gap_.gap_len = (gap_word_t)(val + 1);
+            }
+            else
+            {
+                bdescr->gap_.gap_len =
+                     (gap_word_t)(val - *(bdescr->gap_.ptr-1));
+            }
+            return true;
+        }
+        this->position_ += val + 1;
+        if (val == bm::gap_max_bits - 1)
+            break;
+        bitval ^= 1;
+        ++(bdescr->gap_.ptr);
+    }
+    return false;
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::search_in_blocks() BMNOEXCEPT
+{
+    ++(this->block_idx_);
+    const blocks_manager_type& bman = this->bv_->blockman_;
+    block_idx_type i = this->block_idx_ >> bm::set_array_shift;
+    block_idx_type top_block_size = bman.top_block_size();
+    bm::word_t*** blk_root = bman.top_blocks_root();
+    for (; i < top_block_size; ++i)
+    {
+        bm::word_t** blk_blk = blk_root[i];
+        if (blk_blk == 0)
+        {
+            // fast scan fwd in top level
+            size_type bn = this->block_idx_ + bm::set_sub_array_size;
+            size_type pos = this->position_ + bm::bits_in_array;
+            for (++i; i < top_block_size; ++i)
+            {
+                if (blk_root[i])
+                    break;
+                bn += bm::set_sub_array_size;
+                pos += bm::bits_in_array;
+            } // for i
+            this->block_idx_ = bn;
+            this->position_ = pos;
+            if ((i < top_block_size) && blk_root[i])
+                --i;
+            continue;
+        }
+        if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+            blk_blk = FULL_SUB_BLOCK_REAL_ADDR;
+
+        block_idx_type j = this->block_idx_ & bm::set_array_mask;
+
+        for(; j < bm::set_sub_array_size; ++j, ++(this->block_idx_))
+        {
+            this->block_ = blk_blk[j];
+            if (this->block_ == 0)
+            {
+                this->position_ += bm::bits_in_block;
+                continue;
+            }
+            this->block_type_ = BM_IS_GAP(this->block_);
+            if (this->block_type_)
+            {
+                if (search_in_gapblock())
+                    return true;
+            }
+            else
+            {
+                if (this->block_ == FULL_BLOCK_FAKE_ADDR)
+                    this->block_ = FULL_BLOCK_REAL_ADDR;
+                if (search_in_bitblock())
+                    return true;
+            }
+        } // for j
+    } // for i
+    return false;
+}
  
+//---------------------------------------------------------------------
  
  
  } // namespace
diff --git a/c++/include/util/bitset/bmaggregator.h b/c++/include/util/bitset/bmaggregator.h

index d171f9ce7db4d40887250080b8cee1aa5c90aab2..a996f9b88840bd79453711202bb2b0d5610b0b1d 100644 (file)
--- a/c++/include/util/bitset/bmaggregator.h
+++ b/c++/include/util/bitset/bmaggregator.h
@@ -89,6 +89,7 @@ public:
      
  public:
  
+    // -----------------------------------------------------------------------
      /*! @name Construction and setup */
      //@{
      aggregator();
@@ -105,6 +106,12 @@ public:
      void set_optimization(
          typename bvector_type::optmode opt = bvector_type::opt_compress)
          { opt_mode_ = opt; }
+
+    void set_compute_count(bool count_mode)
+    {
+        compute_count_ = count_mode; count_ = 0;
+    }
+
      //@}
      
      
@@ -122,12 +129,12 @@ public:
          @return current arg group size (0 if vector was not added (empty))
          @sa reset
      */
-    unsigned add(const bvector_type* bv, unsigned agr_group = 0);
+    unsigned add(const bvector_type* bv, unsigned agr_group = 0) BMNOEXCEPT;
      
      /**
          Reset aggregate groups, forget all attached vectors
      */
-    void reset();
+    void reset() BMNOEXCEPT;
  
      /**
          Aggregate added group of vectors using logical OR
@@ -195,7 +202,9 @@ public:
          Set search hint for the range, where results needs to be searched
          (experimental for internal use).
      */
-    void set_range_hint(size_type from, size_type to);
+    void set_range_hint(size_type from, size_type to) BMNOEXCEPT;
+
+    size_type count() const { return count_; }
      
      //@}
      
@@ -305,10 +314,10 @@ public:
      //@{
  
      /** Get current operation code */
-    int get_operation() const { return operation_; }
+    int get_operation() const BMNOEXCEPT { return operation_; }
  
      /** Set operation code for the aggregator */
-    void set_operation(int op_code) { operation_ = op_code; }
+    void set_operation(int op_code) BMNOEXCEPT { operation_ = op_code; }
  
      /**
          Prepare operation, create internal resources, analyse dependencies.
@@ -361,19 +370,20 @@ protected:
                             bool init_clear = true);
  
      static
-    unsigned max_top_blocks(const bvector_type_const_ptr* bv_src, unsigned src_size);
+    unsigned max_top_blocks(const bvector_type_const_ptr* bv_src,
+                            unsigned src_size) BMNOEXCEPT;
      
      bm::word_t* sort_input_blocks_or(const bvector_type_const_ptr* bv_src,
                                       unsigned src_size,
                                       unsigned i, unsigned j,
                                       unsigned* arg_blk_count,
-                                     unsigned* arg_blk_gap_count);
+                                     unsigned* arg_blk_gap_count) BMNOEXCEPT;
      
      bm::word_t* sort_input_blocks_and(const bvector_type_const_ptr* bv_src,
                                        unsigned src_size,
                                        unsigned i, unsigned j,
                                        unsigned* arg_blk_count,
-                                      unsigned* arg_blk_gap_count);
+                                      unsigned* arg_blk_gap_count) BMNOEXCEPT;
  
  
      bool process_bit_blocks_or(blocks_manager_type& bman_target,
@@ -396,19 +406,24 @@ protected:
      unsigned find_effective_sub_block_size(unsigned i,
                                             const bvector_type_const_ptr* bv_src,
                                             unsigned src_size,
-                                           bool     top_null_as_zero);
-    
-    bool any_carry_overs(unsigned co_size) const;
+                                           bool     top_null_as_zero) BMNOEXCEPT;
+
+    static
+    bool any_carry_overs(const unsigned char* carry_overs,
+                         unsigned co_size)  BMNOEXCEPT;
      
      /**
          @return carry over
      */
-    bool process_shift_right_and(const bm::word_t* arg_blk,
-                                     digest_type&      digest,
-                                     unsigned          carry_over);
-    
+    static
+    unsigned process_shift_right_and(bm::word_t*       BMRESTRICT blk,
+                                 const bm::word_t* BMRESTRICT arg_blk,
+                                 digest_type&      BMRESTRICT digest,
+                                 unsigned          carry_over) BMNOEXCEPT;
+
+    static
      const bm::word_t* get_arg_block(const bvector_type_const_ptr* bv_src,
-                                    unsigned k, unsigned i, unsigned j);
+                                unsigned k, unsigned i, unsigned j) BMNOEXCEPT;
  
      bvector_type* check_create_target();
      
@@ -418,8 +433,8 @@ private:
      /// @internal
      struct arena
      {
-        BM_DECLARE_TEMP_BLOCK(tb1);
-        BM_DECLARE_TEMP_BLOCK(tb_opt);  ///< temp block for results optimization
+        BM_DECLARE_TEMP_BLOCK(tb1)
+        BM_DECLARE_TEMP_BLOCK(tb_opt)  ///< temp block for results optimization
          const bm::word_t*     v_arg_or_blk[max_aggregator_cap];     ///< source blocks list (OR)
          const bm::gap_word_t* v_arg_or_blk_gap[max_aggregator_cap]; ///< source GAP blocks list (OR)
          const bm::word_t*     v_arg_and_blk[max_aggregator_cap];     ///< source blocks list (AND)
@@ -450,8 +465,9 @@ private:
      size_type            range_from_ = bm::id_max; ///< search from
      size_type            range_to_   = bm::id_max; ///< search to
      
-    typename bvector_type::optmode opt_mode_;
-
+    typename bvector_type::optmode opt_mode_; ///< perform search result optimization
+    bool                           compute_count_; ///< compute search result count
+    size_type                      count_;         ///< search result count
  };
  
  
@@ -515,7 +531,9 @@ void aggregator_pipeline_execute(It  first, It last)
  
  template<typename BV>
  aggregator<BV>::aggregator()
-: opt_mode_(bvector_type::opt_none)
+: opt_mode_(bvector_type::opt_none),
+  compute_count_(false),
+  count_(0)
  {
      ar_ = (arena*) bm::aligned_new_malloc(sizeof(arena));
  }
@@ -533,18 +551,19 @@ aggregator<BV>::~aggregator()
  // ------------------------------------------------------------------------
  
  template<typename BV>
-void aggregator<BV>::reset()
+void aggregator<BV>::reset() BMNOEXCEPT
  {
      arg_group0_size = arg_group1_size = operation_ = top_block_size_ = 0;
      operation_status_ = op_undefined;
      range_set_ = false;
      range_from_ = range_to_ = bm::id_max;
+    count_ = 0;
  }
  
  // ------------------------------------------------------------------------
  
  template<typename BV>
-void aggregator<BV>::set_range_hint(size_type from, size_type to)
+void aggregator<BV>::set_range_hint(size_type from, size_type to) BMNOEXCEPT
  {
      range_from_ = from; range_to_ = to;
      range_set_ = true;
@@ -553,11 +572,12 @@ void aggregator<BV>::set_range_hint(size_type from, size_type to)
  // ------------------------------------------------------------------------
  
  template<typename BV>
-typename aggregator<BV>::bvector_type* aggregator<BV>::check_create_target()
+typename aggregator<BV>::bvector_type*
+aggregator<BV>::check_create_target()
  {
      if (!bv_target_)
      {
-        bv_target_ = new bvector_type();
+        bv_target_ = new bvector_type(); //TODO: get rid of "new"
          bv_target_->init();
      }
      return bv_target_;
@@ -566,7 +586,8 @@ typename aggregator<BV>::bvector_type* aggregator<BV>::check_create_target()
  // ------------------------------------------------------------------------
  
  template<typename BV>
-unsigned aggregator<BV>::add(const bvector_type* bv, unsigned agr_group)
+unsigned aggregator<BV>::add(const bvector_type* bv,
+                             unsigned agr_group) BMNOEXCEPT
  {
      BM_ASSERT_THROW(agr_group <= 1, BM_ERR_RANGE);
      BM_ASSERT(agr_group <= 1);
@@ -646,6 +667,7 @@ bool aggregator<BV>::find_first_and_sub(size_type& idx)
  template<typename BV>
  void aggregator<BV>::combine_shift_right_and(bvector_type& bv_target)
  {
+    count_ = 0;
      combine_shift_right_and(bv_target, ar_->arg_bv0, arg_group0_size, false);
  }
  
@@ -890,10 +912,11 @@ bool aggregator<BV>::find_first_and_sub(size_type& idx,
  
  template<typename BV>
  unsigned
-aggregator<BV>::find_effective_sub_block_size(unsigned i,
-                                              const bvector_type_const_ptr* bv_src,
-                                              unsigned src_size,
-                                              bool     top_null_as_zero) 
+aggregator<BV>::find_effective_sub_block_size(
+                                        unsigned i,
+                                        const bvector_type_const_ptr* bv_src,
+                                        unsigned src_size,
+                                        bool     top_null_as_zero) BMNOEXCEPT
  {
      // quick hack to avoid scanning large, arrays, where such scan can be quite
      // expensive by itself (this makes this function approximate)
@@ -924,7 +947,7 @@ aggregator<BV>::find_effective_sub_block_size(unsigned i,
                  max_size = j;
                  break;
              }
-        }
+        } // for j
          if (max_size == bm::set_sub_array_size - 1)
              break;
      } // for k
@@ -992,8 +1015,6 @@ void aggregator<BV>::combine_and(unsigned i, unsigned j,
  {
      BM_ASSERT(src_size);
      
-    typename bvector_type::blocks_manager_type& bman_target = bv_target.get_blocks_manager();
-
      unsigned arg_blk_count = 0;
      unsigned arg_blk_gap_count = 0;
      bm::word_t* blk =
@@ -1012,12 +1033,11 @@ void aggregator<BV>::combine_and(unsigned i, unsigned j,
              if (ar_->v_arg_and_blk[0] == FULL_BLOCK_REAL_ADDR)
              {
                  // another nothing to do: one FULL block
+                blocks_manager_type& bman_target = bv_target.get_blocks_manager();
                  bman_target.check_alloc_top_subblock(i);
                  bman_target.set_block_ptr(i, j, blk);
                  if (++j == bm::set_sub_array_size)
-                {
                      bman_target.validate_top_full(i);
-                }
                  return;
              }
          }
@@ -1032,14 +1052,13 @@ void aggregator<BV>::combine_and(unsigned i, unsigned j,
          //
          if (arg_blk_gap_count)
          {
-            digest =
-                process_gap_blocks_and(arg_blk_gap_count, digest);
+            digest = process_gap_blocks_and(arg_blk_gap_count, digest);
          }
-        if (digest) // some results
+        if (digest) // we have results , allocate block and copy from temp
          {
-            // we have some results, allocate block and copy from temp
+            blocks_manager_type& bman_target = bv_target.get_blocks_manager();
              bman_target.opt_copy_bit_block(i, j, ar_->tb1,
-                                           opt_mode_, ar_->tb_opt);
+                                            opt_mode_, ar_->tb_opt);
          }
      }
  }
@@ -1154,7 +1173,7 @@ aggregator<BV>::process_gap_blocks_and(unsigned    arg_blk_gap_count,
                  bool b = bm::gap_test_unr(ar_->v_arg_and_blk_gap[k], single_bit_idx);
                  if (!b)
                      return 0; // AND 0 causes result to turn 0
-            }
+            } // for k
              break;
          }
      }
@@ -1471,7 +1490,8 @@ unsigned aggregator<BV>::resize_target(bvector_type& bv_target,
  
  template<typename BV>
  unsigned
-aggregator<BV>::max_top_blocks(const bvector_type_const_ptr* bv_src, unsigned src_size)
+aggregator<BV>::max_top_blocks(const bvector_type_const_ptr* bv_src,
+                               unsigned src_size) BMNOEXCEPT
  {
      unsigned top_blocks = 1;
  
@@ -1491,11 +1511,12 @@ aggregator<BV>::max_top_blocks(const bvector_type_const_ptr* bv_src, unsigned sr
  // ------------------------------------------------------------------------
  
  template<typename BV>
-bm::word_t* aggregator<BV>::sort_input_blocks_or(const bvector_type_const_ptr* bv_src,
-                                                 unsigned src_size,
-                                                 unsigned i, unsigned j,
-                                                 unsigned* arg_blk_count,
-                                                 unsigned* arg_blk_gap_count)
+bm::word_t* aggregator<BV>::sort_input_blocks_or(
+                        const bvector_type_const_ptr* bv_src,
+                        unsigned src_size,
+                        unsigned i, unsigned j,
+                        unsigned* arg_blk_count,
+                        unsigned* arg_blk_gap_count) BMNOEXCEPT
  {
      bm::word_t* blk = 0;
      for (unsigned k = 0; k < src_size; ++k)
@@ -1529,11 +1550,12 @@ bm::word_t* aggregator<BV>::sort_input_blocks_or(const bvector_type_const_ptr* b
  // ------------------------------------------------------------------------
  
  template<typename BV>
-bm::word_t* aggregator<BV>::sort_input_blocks_and(const bvector_type_const_ptr* bv_src,
-                                                  unsigned src_size,
-                                                  unsigned i, unsigned j,
-                                                  unsigned* arg_blk_count,
-                                                  unsigned* arg_blk_gap_count)
+bm::word_t* aggregator<BV>::sort_input_blocks_and(
+                                const bvector_type_const_ptr* bv_src,
+                                unsigned src_size,
+                                unsigned i, unsigned j,
+                                unsigned* arg_blk_count,
+                                unsigned* arg_blk_gap_count) BMNOEXCEPT
  {
      unsigned full_blk_cnt = 0;
      bm::word_t* blk = FULL_BLOCK_FAKE_ADDR;
@@ -1683,20 +1705,24 @@ bool aggregator<BV>::combine_shift_right_and(
      {
          if (i > top_block_size_)
          {
-            if (!this->any_carry_overs(src_and_size))
+            if (!any_carry_overs(&ar_->carry_overs_[0], src_and_size))
                  break; // quit early if there is nothing to carry on
          }
  
          unsigned j = 0;
          do
          {
-            bool found = combine_shift_right_and(i, j, bv_target, bv_src_and, src_and_size);
+            bool found =
+            combine_shift_right_and(i, j, bv_target, bv_src_and, src_and_size);
              if (found && any)
                  return found;
          } while (++j < bm::set_sub_array_size);
  
      } // for i
  
+    if (compute_count_)
+        return bool(count_);
+
      return bv_target.any();
  }
  
@@ -1708,7 +1734,6 @@ bool aggregator<BV>::combine_shift_right_and(unsigned i, unsigned j,
                                          const bvector_type_const_ptr* bv_src,
                                          unsigned src_size)
  {
-    blocks_manager_type& bman_target = bv_target.get_blocks_manager();
      bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1;
      unsigned char* carry_overs = &(ar_->carry_overs_[0]);
  
@@ -1748,18 +1773,33 @@ bool aggregator<BV>::combine_shift_right_and(unsigned i, unsigned j,
          if (blk_zero) // delayed temp block 0-init requested
          {
              bm::bit_block_set(blk, 0);
-            blk_zero = false;
+            blk_zero = !blk_zero; // = false
          }
          const bm::word_t* arg_blk = get_arg_block(bv_src, k, i, j);
-        carry_overs[k] = process_shift_right_and(arg_blk, digest, carry_over);
+        carry_overs[k] = (unsigned char)
+            process_shift_right_and(blk, arg_blk, digest, carry_over);
+        BM_ASSERT(carry_overs[k] == 0 || carry_overs[k] == 1);
      } // for k
-    
+
+    if (blk_zero) // delayed temp block 0-init
+    {
+        bm::bit_block_set(blk, 0);
+    }
      // block now gets emitted into the target bit-vector
      if (digest)
      {
          BM_ASSERT(!bm::bit_is_all_zero(blk));
-        bman_target.opt_copy_bit_block(i, j, blk,
-                                       opt_mode_, ar_->tb_opt);
+
+        if (compute_count_)
+        {
+            unsigned cnt = bm::bit_block_count(blk, digest);
+            count_ += cnt;
+        }
+        else
+        {
+            blocks_manager_type& bman_target = bv_target.get_blocks_manager();
+            bman_target.opt_copy_bit_block(i, j, blk, opt_mode_, ar_->tb_opt);
+        }
          return true;
      }
      return false;
@@ -1768,11 +1808,13 @@ bool aggregator<BV>::combine_shift_right_and(unsigned i, unsigned j,
  // ------------------------------------------------------------------------
  
  template<typename BV>
-bool aggregator<BV>::process_shift_right_and(const bm::word_t* arg_blk,
-                                             digest_type&      digest,
-                                             unsigned          carry_over)
+unsigned aggregator<BV>::process_shift_right_and(
+                            bm::word_t*       BMRESTRICT blk,
+                            const bm::word_t* BMRESTRICT arg_blk,
+                            digest_type&      BMRESTRICT digest,
+                            unsigned                    carry_over) BMNOEXCEPT
  {
-    bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1;
+    BM_ASSERT(carry_over == 1 || carry_over == 0);
  
      if (BM_IS_GAP(arg_blk)) // GAP argument
      {
@@ -1800,8 +1842,8 @@ bool aggregator<BV>::process_shift_right_and(const bm::word_t* arg_blk,
              if (digest)
              {
                  carry_over =
-                bm::bit_block_shift_r1_and_unr(blk, carry_over, arg_blk,
-                                               &digest);
+                    bm::bit_block_shift_r1_and_unr(blk, carry_over, arg_blk,
+                                                   &digest);
              }
              else // digest == 0
              {
@@ -1813,13 +1855,12 @@ bool aggregator<BV>::process_shift_right_and(const bm::word_t* arg_blk,
          }
          else  // arg is zero - target block => zero
          {
-            unsigned co = blk[bm::set_block_size-1] >> 31; // carry out
+            carry_over = blk[bm::set_block_size-1] >> 31; // carry out
              if (digest)
              {
                  bm::bit_block_set(blk, 0);  // TODO: digest based set
-                digest ^= digest;
+                digest = 0;
              }
-            carry_over = co;
          }
      }
      return carry_over;
@@ -1829,22 +1870,26 @@ bool aggregator<BV>::process_shift_right_and(const bm::word_t* arg_blk,
  
  template<typename BV>
  const bm::word_t* aggregator<BV>::get_arg_block(
-                                        const bvector_type_const_ptr* bv_src,
-                                        unsigned k, unsigned i, unsigned j)
+                                const bvector_type_const_ptr* bv_src,
+                                unsigned k, unsigned i, unsigned j) BMNOEXCEPT
  {
-    const blocks_manager_type& bman_arg = bv_src[k]->get_blocks_manager();
-    return bman_arg.get_block(i, j);
+    return bv_src[k]->get_blocks_manager().get_block(i, j);
  }
  
  // ------------------------------------------------------------------------
  
  template<typename BV>
-bool aggregator<BV>::any_carry_overs(unsigned co_size) const
+bool aggregator<BV>::any_carry_overs(const unsigned char* carry_overs,
+                                     unsigned co_size)  BMNOEXCEPT
  {
-    for (unsigned i = 0; i < co_size; ++i)
-        if (ar_->carry_overs_[i])
-            return true;
-    return false;
+    // TODO: loop unroll?
+    unsigned acc = carry_overs[0];
+    for (unsigned i = 1; i < co_size; ++i)
+        acc |= carry_overs[i];
+//        if (ar_->carry_overs_[i])
+//            return true;
+//    return false;
+    return acc;
  }
  
  // ------------------------------------------------------------------------
@@ -1888,7 +1933,7 @@ aggregator<BV>::run_step(unsigned i, unsigned j)
          {
          if (i > top_block_size_)
          {
-            if (!this->any_carry_overs(arg_group0_size))
+            if (!this->any_carry_overs(&ar_->carry_overs_[0], arg_group0_size))
              {
                  operation_status_ = op_done;
                  return operation_status_;
diff --git a/c++/include/util/bitset/bmalgo.h b/c++/include/util/bitset/bmalgo.h

index a44c5fc47c711ca0bd37c75f8c4a9e60c055909d..669dab93c73c9dcf7366c21874d5ff980ccd3d21 100644 (file)
--- a/c++/include/util/bitset/bmalgo.h
+++ b/c++/include/util/bitset/bmalgo.h
@@ -46,7 +46,7 @@ namespace bm
      \ingroup  setalgo
  */
  template<class BV>
-typename BV::size_type count_and(const BV& bv1, const BV& bv2)
+typename BV::size_type count_and(const BV& bv1, const BV& bv2) BMNOEXCEPT
  {
      return bm::distance_and_operation(bv1, bv2);
  }
@@ -59,7 +59,7 @@ typename BV::size_type count_and(const BV& bv1, const BV& bv2)
     \ingroup  setalgo
  */
  template<class BV>
-typename BV::size_type any_and(const BV& bv1, const BV& bv2)
+typename BV::size_type any_and(const BV& bv1, const BV& bv2) BMNOEXCEPT
  {
      distance_metric_descriptor dmd(bm::COUNT_AND);
  
@@ -78,7 +78,7 @@ typename BV::size_type any_and(const BV& bv1, const BV& bv2)
  */
  template<class BV>
  bm::distance_metric_descriptor::size_type
-count_xor(const BV& bv1, const BV& bv2)
+count_xor(const BV& bv1, const BV& bv2) BMNOEXCEPT
  {
      distance_metric_descriptor dmd(bm::COUNT_XOR);
  
@@ -94,7 +94,7 @@ count_xor(const BV& bv1, const BV& bv2)
     \ingroup  setalgo
  */
  template<class BV>
-typename BV::size_type any_xor(const BV& bv1, const BV& bv2)
+typename BV::size_type any_xor(const BV& bv1, const BV& bv2) BMNOEXCEPT
  {
      distance_metric_descriptor dmd(bm::COUNT_XOR);
  
@@ -112,7 +112,7 @@ typename BV::size_type any_xor(const BV& bv1, const BV& bv2)
     \ingroup  setalgo
  */
  template<class BV>
-typename BV::size_type count_sub(const BV& bv1, const BV& bv2)
+typename BV::size_type count_sub(const BV& bv1, const BV& bv2) BMNOEXCEPT
  {
      distance_metric_descriptor dmd(bm::COUNT_SUB_AB);
  
@@ -129,7 +129,7 @@ typename BV::size_type count_sub(const BV& bv1, const BV& bv2)
     \ingroup  setalgo
  */
  template<class BV>
-typename BV::size_type any_sub(const BV& bv1, const BV& bv2)
+typename BV::size_type any_sub(const BV& bv1, const BV& bv2) BMNOEXCEPT
  {
      distance_metric_descriptor dmd(bm::COUNT_SUB_AB);
  
@@ -146,7 +146,7 @@ typename BV::size_type any_sub(const BV& bv1, const BV& bv2)
     \ingroup  setalgo
  */
  template<class BV>
-typename BV::size_type count_or(const BV& bv1, const BV& bv2)
+typename BV::size_type count_or(const BV& bv1, const BV& bv2) BMNOEXCEPT
  {
      distance_metric_descriptor dmd(bm::COUNT_OR);
  
@@ -162,7 +162,7 @@ typename BV::size_type count_or(const BV& bv1, const BV& bv2)
     \ingroup  setalgo
  */
  template<class BV>
-typename BV::size_type any_or(const BV& bv1, const BV& bv2)
+typename BV::size_type any_or(const BV& bv1, const BV& bv2) BMNOEXCEPT
  {
      distance_metric_descriptor dmd(bm::COUNT_OR);
  
@@ -173,27 +173,28 @@ typename BV::size_type any_or(const BV& bv1, const BV& bv2)
  
  
  #define BM_SCANNER_OP(x) \
-    if (0 != (block = blk_blk[j+x])) \
+if (0 != (block = blk_blk[j+x])) \
+{ \
+    if (BM_IS_GAP(block)) \
      { \
-        if (BM_IS_GAP(block)) \
-        { \
-            bm::for_each_gap_blk(BMGAP_PTR(block), (r+j+x)*bm::bits_in_block,\
-                                 bit_functor); \
-        } \
-        else \
-        { \
-            bm::for_each_bit_blk(block, (r+j+x)*bm::bits_in_block,bit_functor); \
-        } \
-    }
+        bm::for_each_gap_blk(BMGAP_PTR(block), (r+j+x)*bm::bits_in_block,\
+                             bit_functor); \
+    } \
+    else \
+    { \
+        bm::for_each_bit_blk(block, (r+j+x)*bm::bits_in_block,bit_functor); \
+    } \
+}
      
  
  /**
      @brief bit-vector visitor scanner to traverse each 1 bit using C++ visitor
   
      @param bv - bit vector to scan
-    @param bit_functor (should support add_bits() and add_range() methods
+    @param bit_functor - visitor: should support add_bits(), add_range()
   
      \ingroup setalgo
+    @sa for_each_bit_range visit_each_bit
  */
  template<class BV, class Func>
  void for_each_bit(const BV&    bv,
@@ -248,10 +249,100 @@ void for_each_bit(const BV&    bv,
      }  // for i
  }
  
+/**
+    @brief bit-vector range visitor to traverse each 1 bit
+
+    @param bv - bit vector to scan
+    @param right - start of closed interval [from..to]
+    @param left   - end of close interval [from..to]
+    @param bit_functor - visitor: should support add_bits(), add_range()
+
+    \ingroup setalgo
+    @sa for_each_bit
+*/
+template<class BV, class Func>
+void for_each_bit_range(const BV&             bv,
+                       typename BV::size_type left,
+                       typename BV::size_type right,
+                       Func&                  bit_functor)
+{
+    if (left > right)
+        bm::xor_swap(left, right);
+    if (right == bm::id_max)
+        --right;
+    BM_ASSERT(left < bm::id_max && right < bm::id_max);
+
+    bm::for_each_bit_range_no_check(bv, left, right, bit_functor);
+}
+
+
  #undef BM_SCANNER_OP
  
+
+/// private adaptor for C-style callbacks
+///
+/// @internal
+///
+template <class VCBT, class size_type>
+struct bit_vitor_callback_adaptor
+{
+    typedef VCBT bit_visitor_callback_type;
+
+    bit_vitor_callback_adaptor(void* h, bit_visitor_callback_type cb_func)
+        : handle_(h), func_(cb_func)
+    {}
+
+    void add_bits(size_type offset, const unsigned char* bits, unsigned size)
+    {
+        for (unsigned i = 0; i < size; ++i)
+            func_(handle_, offset + bits[i]);
+    }
+    void add_range(size_type offset, size_type size)
+    {
+        for (size_type i = 0; i < size; ++i)
+            func_(handle_, offset + i);
+    }
+
+    void* handle_;
+    bit_visitor_callback_type func_;
+};
+
+
+/// Functor for bit-copy (for testing)
+///
+/// @internal
+///
+template <class BV>
+struct bit_vistor_copy_functor
+{
+    typedef typename BV::size_type size_type;
+
+    bit_vistor_copy_functor(BV& bv)
+        : bv_(bv)
+    {
+        bv_.init();
+    }
+
+    void add_bits(size_type offset, const unsigned char* bits, unsigned size)
+    {
+        BM_ASSERT(size);
+        for (unsigned i = 0; i < size; ++i)
+            bv_.set_bit_no_check(offset + bits[i]);
+    }
+    void add_range(size_type offset, size_type size)
+    {
+        BM_ASSERT(size);
+        bv_.set_range(offset, offset + size - 1);
+    }
+
+    BV& bv_;
+    bit_visitor_callback_type func_;
+};
+
+
+
  /**
-    @brief bit-vector visitor scanner to traverse each 1 bit using C callback
+    @brief bvector visitor scanner to traverse each 1 bit using C callback
   
      @param bv - bit vector to scan
      @param handle_ptr - handle to private memory used by callback
@@ -267,33 +358,101 @@ void visit_each_bit(const BV&                 bv,
                      bit_visitor_callback_type callback_ptr)
  {
      typedef typename BV::size_type size_type;
-    // private adaptor for C-style callbacks
-    struct callback_adaptor
+    bm::bit_vitor_callback_adaptor<bit_visitor_callback_type, size_type>
+            func(handle_ptr, callback_ptr);
+    bm::for_each_bit(bv, func);
+}
+
+/**
+    @brief bvector visitor scanner to traverse each bits in range (C callback)
+
+    @param bv - bit vector to scan
+    @param left - from [left..right]
+    @param right - to [left..right]
+    @param handle_ptr - handle to private memory used by callback
+    @param callback_ptr - callback function
+
+    \ingroup setalgo
+
+    @sa bit_visitor_callback_type for_each_bit
+*/
+template<class BV>
+void visit_each_bit_range(const BV&                 bv,
+                          typename BV::size_type    left,
+                          typename BV::size_type    right,
+                          void*                     handle_ptr,
+                          bit_visitor_callback_type callback_ptr)
+{
+    typedef typename BV::size_type size_type;
+    bm::bit_vitor_callback_adaptor<bit_visitor_callback_type, size_type>
+            func(handle_ptr, callback_ptr);
+    bm::for_each_bit_range(bv, left, right, func);
+}
+
+/**
+    @brief Algorithm to identify bit-vector ranges (splits) for the rank
+
+    Rank range split algorithm walks the bit-vector to create list of
+    non-overlapping ranges [s1..e1],[s2..e2]...[sN...eN] with requested
+    (rank) number of 1 bits. All ranges should be the same popcount weight,
+    except the last one, which may have less.
+    Scan is progressing from left to right so result ranges will be
+    naturally sorted.
+
+    @param bv       - bit vector to perform the range split scan
+    @param rank     - requested number of bits in each range
+                      if 0 it will create single range [first..last]
+                      to cover the whole bv
+    @param target_v - [out] STL(or STL-like) vector of pairs to keep pairs results
+
+    \ingroup setalgo
+ */
+template<typename BV, typename PairVect>
+void rank_range_split(const BV&              bv,
+                      typename BV::size_type rank,
+                      PairVect&              target_v)
+{
+    target_v.resize(0);
+    typename BV::size_type first, last, pos;
+    bool found = bv.find_range(first, last);
+    if (!found) // empty bit-vector
+        return;
+
+    if (!rank) // if rank is not defined, include the whole vector [first..last]
      {
-        callback_adaptor(void* h, bit_visitor_callback_type cb_func)
-        : handle_(h), func_(cb_func)
-        {}
-        
-        void add_bits(size_type offset, const unsigned char* bits, unsigned size)
+        typename PairVect::value_type pv;
+        pv.first = first; pv.second = last;
+        target_v.push_back(pv);
+        return;
+    }
+
+    while (1)
+    {
+        typename PairVect::value_type pv;
+        found = bv.find_rank(rank, first, pos);
+        if (found)
          {
-            for (unsigned i = 0; i < size; ++i)
-                func_(handle_, offset + bits[i]);
+            pv.first = first; pv.second = pos;
+            target_v.push_back(pv);
+            if (pos >= last)
+                break;
+            first = pos + 1;
+            continue;
          }
-        void add_range(size_type offset, unsigned size)
+        // insufficient rank (last range)
+        found = bv.any_range(first, last);
+        if (found)
          {
-            for (unsigned i = 0; i < size; ++i)
-                func_(handle_, offset + i);
+            pv.first = first; pv.second = last;
+            target_v.push_back(pv);
          }
-        
-        void* handle_;
-        bit_visitor_callback_type func_;
-    };
-    
-    callback_adaptor func(handle_ptr, callback_ptr);
-    bm::for_each_bit(bv, func);
+        break;
+    } // while
+
  }
  
  
+
  /**
      Algorithms for rank compression of bit-vector
  
@@ -562,6 +721,7 @@ void rank_compressor<BV>::compress_by_source(BV& bv_target,
  
  
  
+
  } // bm
  
  #include "bmundef.h"
diff --git a/c++/include/util/bitset/bmalgo_impl.h b/c++/include/util/bitset/bmalgo_impl.h

index b6a245748f4bced386f4b3719c55936d5b11405a..61ef7c97da7968537a5199d4278b580036722721 100644 (file)
--- a/c++/include/util/bitset/bmalgo_impl.h
+++ b/c++/include/util/bitset/bmalgo_impl.h
@@ -70,7 +70,7 @@ enum distance_metric
      \ingroup  distance
  */
  inline
-distance_metric operation2metric(set_operation op)
+distance_metric operation2metric(set_operation op) BMNOEXCEPT
  {
      BM_ASSERT(is_const_set_operation(op));
      if (op == set_COUNT) op = set_COUNT_B;
@@ -95,11 +95,11 @@ struct distance_metric_descriptor
       distance_metric   metric;
       size_type          result;
       
-     distance_metric_descriptor(distance_metric m)
+     distance_metric_descriptor(distance_metric m) BMNOEXCEPT
       : metric(m),
         result(0)
      {}
-    distance_metric_descriptor()
+    distance_metric_descriptor() BMNOEXCEPT
      : metric(bm::COUNT_XOR),
        result(0)
      {}
@@ -107,7 +107,7 @@ struct distance_metric_descriptor
      /*! 
          \brief Sets metric result to 0
      */
-    void reset()
+    void reset() BMNOEXCEPT
      {
          result = 0;
      }
@@ -125,7 +125,7 @@ inline
  void combine_count_operation_with_block(const bm::word_t*           blk,
                                          const bm::word_t*           arg_blk,
                                          distance_metric_descriptor* dmit,
-                                        distance_metric_descriptor* dmit_end)
+                                        distance_metric_descriptor* dmit_end) BMNOEXCEPT
                                              
  {     
       gap_word_t* g1 = BMGAP_PTR(blk);
@@ -340,7 +340,7 @@ void combine_count_operation_with_block(const bm::word_t*           blk,
  */
  inline
  unsigned combine_count_and_operation_with_block(const bm::word_t* blk,
-                                                const bm::word_t* arg_blk)
+                                                const bm::word_t* arg_blk) BMNOEXCEPT
  {
      unsigned gap = BM_IS_GAP(blk);
      unsigned arg_gap = BM_IS_GAP(arg_blk);
@@ -381,7 +381,7 @@ void combine_any_operation_with_block(const bm::word_t* blk,
                                        const bm::word_t* arg_blk,
                                        unsigned arg_gap,
                                        distance_metric_descriptor* dmit,
-                                      distance_metric_descriptor* dmit_end)
+                                      distance_metric_descriptor* dmit_end) BMNOEXCEPT
                                              
  {
       gap_word_t* res=0;
@@ -628,7 +628,7 @@ inline
  unsigned
  combine_count_operation_with_block(const bm::word_t* blk,
                                     const bm::word_t* arg_blk,
-                                   distance_metric metric)
+                                   distance_metric metric) BMNOEXCEPT
  {
      distance_metric_descriptor dmd(metric);
      combine_count_operation_with_block(blk, //gap, 
@@ -649,7 +649,7 @@ combine_any_operation_with_block(const bm::word_t* blk,
                                            unsigned gap,
                                            const bm::word_t* arg_blk,
                                            unsigned arg_gap,
-                                          distance_metric metric)
+                                          distance_metric metric) BMNOEXCEPT
  {
      distance_metric_descriptor dmd(metric);
      combine_any_operation_with_block(blk, gap, 
@@ -668,7 +668,7 @@ combine_any_operation_with_block(const bm::word_t* blk,
  inline
  void distance_stage(const distance_metric_descriptor* dmit,
                      const distance_metric_descriptor* dmit_end,
-                    bool*                             is_all_and)
+                    bool*                             is_all_and) BMNOEXCEPT
  {
      for (const distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
      {
@@ -702,7 +702,7 @@ template<class BV>
  void distance_operation(const BV& bv1, 
                          const BV& bv2, 
                          distance_metric_descriptor* dmit,
-                        distance_metric_descriptor* dmit_end)
+                        distance_metric_descriptor* dmit_end) BMNOEXCEPT
  {
      const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
      const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
@@ -787,7 +787,7 @@ void distance_operation(const BV& bv1,
  */
  template<class BV>
  typename BV::size_type distance_and_operation(const BV& bv1,
-                                              const BV& bv2)
+                                              const BV& bv2) BMNOEXCEPT
  {
      const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
      const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
@@ -858,7 +858,7 @@ template<class BV>
  void distance_operation_any(const BV& bv1, 
                              const BV& bv2, 
                              distance_metric_descriptor* dmit,
-                            distance_metric_descriptor* dmit_end)
+                            distance_metric_descriptor* dmit_end) BMNOEXCEPT
  {
      const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
      const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
@@ -980,7 +980,8 @@ void distance_operation_any(const BV& bv1,
      \internal
  */
  template<typename It, typename SIZE_TYPE>
-It block_range_scan(It  first, It last, SIZE_TYPE nblock, SIZE_TYPE* max_id)
+It block_range_scan(It  first, It last,
+                    SIZE_TYPE nblock, SIZE_TYPE* max_id) BMNOEXCEPT
  {
      SIZE_TYPE m = *max_id;
      It right;
@@ -1333,7 +1334,11 @@ typename BV::size_type count_intervals(const BV& bv)
      typename BV::blocks_manager_type::block_idx_type st = 0;
      bm::for_each_block(blk_root, bman.top_block_size(), func, st);
  
-    return func.count();        
+    typename BV::size_type intervals = func.count();
+    bool last_bit_set = bv.test(bm::id_max-1);
+
+    intervals -= last_bit_set; // correct last (out of range) interval
+    return intervals;
  }
  
  /*!
@@ -1514,7 +1519,7 @@ void export_array(BV& bv, It first, It last)
  
  
  /*!
-   \brief for-each visitor, calls a special visitor functor for each 1 bit group
+   \brief for-each visitor, calls a visitor functor for each 1 bit group
   
     \param block - bit block buffer pointer
     \param offset - global block offset (number of bits)
@@ -1527,6 +1532,7 @@ template<typename Func, typename SIZE_TYPE>
  void for_each_bit_blk(const bm::word_t* block, SIZE_TYPE offset,
                        Func&  bit_functor)
  {
+    BM_ASSERT(block);
      if (IS_FULL_BLOCK(block))
      {
          bit_functor.add_range(offset, bm::gap_max_bits);
@@ -1547,6 +1553,110 @@ void for_each_bit_blk(const bm::word_t* block, SIZE_TYPE offset,
      } while (block < block_end);
  }
  
+/*!
+   \brief for-each range visitor, calls a visitor functor for each 1 bit group
+
+   \param block - bit block buffer pointer
+   \param offset - global block offset (number of bits)
+   \param left - bit addredd in block from [from..to]
+   \param right - bit addredd in block to [from..to]
+   \param bit_functor - functor must support .add_bits(offset, bits_ptr, size)
+
+   @ingroup bitfunc
+   @internal
+*/
+template<typename Func, typename SIZE_TYPE>
+void for_each_bit_blk(const bm::word_t* block, SIZE_TYPE offset,
+                      unsigned left, unsigned right,
+                      Func&  bit_functor)
+{
+    BM_ASSERT(block);
+    BM_ASSERT(left <= right);
+    BM_ASSERT(right < bm::bits_in_block);
+
+    if (IS_FULL_BLOCK(block))
+    {
+        unsigned sz = right - left + 1;
+        bit_functor.add_range(offset + left, sz);
+        return;
+    }
+    unsigned char bits[bm::set_bitscan_wave_size*32];
+
+    unsigned cnt, nword, nbit, bitcount, temp;
+    nbit = left & bm::set_word_mask;
+    const bm::word_t* word =
+        block + (nword = unsigned(left >> bm::set_word_shift));
+    if (left == right)  // special case (only 1 bit to check)
+    {
+        if ((*word >> nbit) & 1u)
+        {
+            bits[0] = (unsigned char)nbit;
+            bit_functor.add_bits(offset + (nword * 32), bits, 1);
+        }
+        return;
+    }
+
+    bitcount = right - left + 1u;
+    if (nbit) // starting position is not aligned
+    {
+        unsigned right_margin = nbit + right - left;
+        if (right_margin < 32)
+        {
+            unsigned mask =
+                block_set_table<true>::_right[nbit] &
+                block_set_table<true>::_left[right_margin];
+            temp = (*word & mask);
+            cnt = bm::bitscan_popcnt(temp, bits);
+            if (cnt)
+                bit_functor.add_bits(offset + (nword * 32), bits, cnt);
+
+            return;
+        }
+        temp = *word & block_set_table<true>::_right[nbit];
+        cnt = bm::bitscan_popcnt(temp, bits);
+        if (cnt)
+            bit_functor.add_bits(offset + (nword * 32), bits, cnt);
+        bitcount -= 32 - nbit;
+        ++word; ++nword;
+    }
+    else
+    {
+        bitcount = right - left + 1u;
+    }
+    BM_ASSERT(bm::set_bitscan_wave_size == 4);
+    // now when we are word aligned, we can scan the bit-stream
+    // loop unrolled to evaluate 4 words at a time
+    for ( ;bitcount >= 128;
+           bitcount-=128, word+=bm::set_bitscan_wave_size,
+           nword += bm::set_bitscan_wave_size)
+    {
+        cnt = bm::bitscan_wave(word, bits);
+        if (cnt)
+            bit_functor.add_bits(offset + (nword * 32), bits, cnt);
+    } // for
+
+    for ( ;bitcount >= 32; bitcount-=32, ++word)
+    {
+        temp = *word;
+        cnt = bm::bitscan_popcnt(temp, bits);
+        if (cnt)
+            bit_functor.add_bits(offset + (nword * 32), bits, cnt);
+        ++nword;
+    } // for
+
+    BM_ASSERT(bitcount < 32);
+
+    if (bitcount)  // we have a tail to count
+    {
+        temp = *word & block_set_table<true>::_left[bitcount-1];
+        cnt = bm::bitscan_popcnt(temp, bits);
+        if (cnt)
+            bit_functor.add_bits(offset + (nword * 32), bits, cnt);
+    }
+
+}
+
+
  
  /*!
     \brief for-each visitor, calls a special visitor functor for each 1 bit range
@@ -1577,6 +1687,223 @@ void for_each_gap_blk(const T* buf, SIZE_TYPE offset,
      }
  }
  
+/*!
+   \brief for-each visitor, calls a special visitor functor for each 1 bit range
+
+   \param buf - bit block buffer pointer
+   \param offset - global block offset (number of bits)
+   \param left - interval start [left..right]
+   \param right - intreval end [left..right]
+   \param bit_functor - functor must support .add_range(offset, bits_ptr, size)
+
+   @ingroup gapfunc
+   @internal
+*/
+template<typename T, typename Func, typename SIZE_TYPE>
+void for_each_gap_blk_range(const T* BMRESTRICT buf,
+                            SIZE_TYPE offset,
+                            unsigned left, unsigned right,
+                            Func&  bit_functor)
+{
+    BM_ASSERT(left <= right);
+    BM_ASSERT(right < bm::bits_in_block);
+
+    unsigned is_set;
+    unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
+    const T* BMRESTRICT pcurr = buf + start_pos;
+
+    if (is_set)
+    {
+        if (right <= *pcurr)
+        {
+            bit_functor.add_range(offset + left, (right + 1)-left);
+            return;
+        }
+        bit_functor.add_range(offset + left, (*pcurr + 1)-left);
+        ++pcurr;
+    }
+
+    const T* BMRESTRICT pend = buf + (*buf >> 3);
+    for (++pcurr; pcurr <= pend; pcurr += 2)
+    {
+        T prev = *(pcurr-1);
+        if (right <= *pcurr)
+        {
+            int sz = int(right) - int(prev);
+            if (sz > 0)
+                bit_functor.add_range(offset + prev + 1, unsigned(sz));
+            return;
+        }
+        bit_functor.add_range(offset + prev + 1, *pcurr - prev);
+    } // for
+}
+
+
+
+/*! For each non-zero block in [from, to] executes supplied functor
+    \internal
+*/
+template<typename T, typename N, typename F>
+void for_each_bit_block_range(T*** root,
+                              N top_size, N nb_from, N nb_to, F& f)
+{
+    BM_ASSERT(top_size);
+    if (nb_from > nb_to)
+        return;
+    unsigned i_from = unsigned(nb_from >> bm::set_array_shift);
+    unsigned j_from = unsigned(nb_from &  bm::set_array_mask);
+    unsigned i_to = unsigned(nb_to >> bm::set_array_shift);
+    unsigned j_to = unsigned(nb_to &  bm::set_array_mask);
+
+    if (i_from >= top_size)
+        return;
+    if (i_to >= top_size)
+    {
+        i_to = unsigned(top_size-1);
+        j_to = bm::set_sub_array_size-1;
+    }
+
+    for (unsigned i = i_from; i <= i_to; ++i)
+    {
+        T** blk_blk = root[i];
+        if (!blk_blk)
+            continue;
+        if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+        {
+            unsigned j = (i == i_from) ? j_from : 0;
+            if (!j && (i != i_to)) // full sub-block
+            {
+                N base_idx = bm::get_super_block_start<N>(i);
+                f.add_range(base_idx, bm::set_sub_total_bits);
+            }
+            else
+            {
+                do
+                {
+                    N base_idx = bm::get_block_start<N>(i, j);
+                    f.add_range(base_idx, bm::gap_max_bits);
+                    if ((i == i_to) && (j == j_to))
+                        return;
+                } while (++j < bm::set_sub_array_size);
+            }
+        }
+        else
+        {
+            unsigned j = (i == i_from) ? j_from : 0;
+            do
+            {
+                const T* block;
+                if (blk_blk[j])
+                {
+                    N base_idx = bm::get_block_start<N>(i, j);
+                    if (0 != (block = blk_blk[j]))
+                    {
+                        if (BM_IS_GAP(block))
+                        {
+                            bm::for_each_gap_blk(BMGAP_PTR(block), base_idx, f);
+                        }
+                        else
+                        {
+                            bm::for_each_bit_blk(block, base_idx, f);
+                        }
+                    }
+                }
+
+                if ((i == i_to) && (j == j_to))
+                    return;
+            } while (++j < bm::set_sub_array_size);
+        }
+    } // for i
+}
+
+
+/**
+    Implementation of for_each_bit_range without boilerplave checks
+    @internal
+*/
+template<class BV, class Func>
+void for_each_bit_range_no_check(const BV&             bv,
+                       typename BV::size_type left,
+                       typename BV::size_type right,
+                       Func&                  bit_functor)
+{
+    typedef typename BV::size_type      size_type;
+    typedef typename BV::block_idx_type block_idx_type;
+
+    const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
+    bm::word_t*** blk_root = bman.top_blocks_root();
+    if (!blk_root)
+        return;
+        
+    block_idx_type nblock_left  = (left  >> bm::set_block_shift);
+    block_idx_type nblock_right = (right >> bm::set_block_shift);
+
+    unsigned i0, j0;
+    bm::get_block_coord(nblock_left, i0, j0);
+    const bm::word_t* block = bman.get_block_ptr(i0, j0);
+    unsigned nbit_left  = unsigned(left  & bm::set_block_mask);
+    size_type offset = nblock_left * bm::bits_in_block;
+
+    if (nblock_left == nblock_right) // hit in the same block
+    {
+        if (!block)
+            return;
+        unsigned nbit_right = unsigned(right & bm::set_block_mask);
+        if (BM_IS_GAP(block))
+        {
+            bm::for_each_gap_blk_range(BMGAP_PTR(block), offset,
+                                       nbit_left, nbit_right, bit_functor);
+        }
+        else
+        {
+            bm::for_each_bit_blk(block, offset, nbit_left, nbit_right,
+                                 bit_functor);
+        }
+        return;
+    }
+    // process left block
+    if (nbit_left && block)
+    {
+        if (BM_IS_GAP(block))
+        {
+            bm::for_each_gap_blk_range(BMGAP_PTR(block), offset,
+                                nbit_left, bm::bits_in_block-1, bit_functor);
+        }
+        else
+        {
+            bm::for_each_bit_blk(block, offset, nbit_left, bm::bits_in_block-1,
+                                 bit_functor);
+        }
+        ++nblock_left;
+    }
+
+    // process all complete blocks in the middle
+    {
+        block_idx_type top_blocks_size = bman.top_block_size();
+        bm::for_each_bit_block_range(blk_root, top_blocks_size,
+                                nblock_left, nblock_right-1, bit_functor);
+    }
+
+    unsigned nbit_right = unsigned(right & bm::set_block_mask);
+    bm::get_block_coord(nblock_right, i0, j0);
+    block = bman.get_block_ptr(i0, j0);
+
+    if (block)
+    {
+        offset = nblock_right * bm::bits_in_block;
+        if (BM_IS_GAP(block))
+        {
+            bm::for_each_gap_blk_range(BMGAP_PTR(block), offset,
+                                       0, nbit_right, bit_functor);
+        }
+        else
+        {
+            bm::for_each_bit_blk(block, offset, 0, nbit_right, bit_functor);
+        }
+    }
+}
+
+
  
  } // namespace bm
  
diff --git a/c++/include/util/bitset/bmalloc.h b/c++/include/util/bitset/bmalloc.h

index b9921f27b01a2dc3bdc425ee4169c8e6c5b52b8e..73a4aacc3e3d789efd95fcbe749d7c568bb217d5 100644 (file)
--- a/c++/include/util/bitset/bmalloc.h
+++ b/c++/include/util/bitset/bmalloc.h
@@ -73,13 +73,10 @@ public:
          ptr = (bm::word_t*) ::_mm_malloc(n * sizeof(bm::word_t), BM_ALLOC_ALIGN);
      #endif
  #else
-            ptr = (bm::word_t*) ::malloc(n * sizeof(bm::word_t));
+        ptr = (bm::word_t*) ::malloc(n * sizeof(bm::word_t));
  #endif
-
          if (!ptr)
-        {
              throw std::bad_alloc();
-        }
          return ptr;
      }
  
@@ -87,7 +84,7 @@ public:
      The member function frees storage for an array of n bm::word_t 
      elements, by calling free. 
      */
-    static void deallocate(bm::word_t* p, size_t)
+    static void deallocate(bm::word_t* p, size_t) BMNOEXCEPT
      {
  #ifdef BM_ALLOC_ALIGN
      # ifdef _MSC_VER
@@ -120,9 +117,7 @@ public:
      {
          void* ptr = ::malloc(n * sizeof(void*));
          if (!ptr)
-        {
              throw std::bad_alloc();
-        }
          return ptr;
      }
  
@@ -130,7 +125,7 @@ public:
      The member function frees storage for an array of n bm::word_t 
      elements, by calling free. 
      */
-    static void deallocate(void* p, size_t)
+    static void deallocate(void* p, size_t) BMNOEXCEPT
      {
          ::free(p);
      }
@@ -147,7 +142,7 @@ public:
          n_pool_max_size = BM_DEFAULT_POOL_SIZE
      };
  
-    pointer_pool_array() : size_(0) 
+    pointer_pool_array() : pool_ptr_(0), size_(0) 
      {
          allocate_pool(n_pool_max_size);
      }
@@ -164,7 +159,7 @@ public:
      /// Push pointer to the pool (if it is not full)
      ///
      /// @return 0 if pointer is not accepted (pool is full)
-    unsigned push(void* ptr)
+    unsigned push(void* ptr) BMNOEXCEPT
      {
          if (size_ == n_pool_max_size - 1)
              return 0;
@@ -174,21 +169,22 @@ public:
  
      /// Get a pointer if there are any vacant
      ///
-    void* pop()
+    void* pop() BMNOEXCEPT
      {
-        if (size_ == 0)
+        if (!size_)
              return 0;
          return pool_ptr_[--size_];
      }
  private:
      void allocate_pool(size_t pool_size)
      {
+        BM_ASSERT(!pool_ptr_);
          pool_ptr_ = (void**)::malloc(sizeof(void*) * pool_size);
          if (!pool_ptr_)
              throw std::bad_alloc();
      }
  
-    void free_pool()
+    void free_pool() BMNOEXCEPT
      {
          ::free(pool_ptr_);
      }
@@ -218,21 +214,19 @@ public:
      bm::word_t* alloc_bit_block()
      {
          bm::word_t* ptr = (bm::word_t*)block_pool_.pop();
-        if (ptr == 0)
+        if (!ptr)
              ptr = block_alloc_.allocate(bm::set_block_size, 0);
          return ptr;
      }
      
-    void free_bit_block(bm::word_t* block)
+    void free_bit_block(bm::word_t* block) BMNOEXCEPT
      {
          BM_ASSERT(IS_VALID_ADDR(block));
          if (!block_pool_.push(block))
-        {
              block_alloc_.deallocate(block, bm::set_block_size);
-        }
      }
  
-    void free_pools()
+    void free_pools() BMNOEXCEPT
      {
          bm::word_t* block;
          do
@@ -267,19 +261,19 @@ public:
  
  public:
  
-    mem_alloc(const BA& block_alloc = BA(), const PA& ptr_alloc = PA())
+    mem_alloc(const BA& block_alloc = BA(), const PA& ptr_alloc = PA()) BMNOEXCEPT
      : block_alloc_(block_alloc),
        ptr_alloc_(ptr_alloc),
        alloc_pool_p_(0)
      {}
  
-    mem_alloc(const mem_alloc& ma)
+    mem_alloc(const mem_alloc& ma) BMNOEXCEPT
          : block_alloc_(ma.block_alloc_),
            ptr_alloc_(ma.ptr_alloc_),
            alloc_pool_p_(0) // do not inherit pool (has to be explicitly defined)
      {}
  
-    mem_alloc& operator=(const mem_alloc& ma)
+    mem_alloc& operator=(const mem_alloc& ma) BMNOEXCEPT
      {
          block_alloc_ = ma.block_alloc_;
          ptr_alloc_ = ma.ptr_alloc_;
@@ -289,26 +283,26 @@ public:
      
      /*! @brief Returns copy of the block allocator object
      */
-    block_allocator_type get_block_allocator() const 
+    block_allocator_type get_block_allocator() const BMNOEXCEPT
      { 
          return BA(block_alloc_); 
      }
  
      /*! @brief Returns copy of the ptr allocator object
      */
-    ptr_allocator_type get_ptr_allocator() const 
+    ptr_allocator_type get_ptr_allocator() const BMNOEXCEPT
      { 
         return PA(block_alloc_); 
      }
  
      /*! @brief set pointer to external pool */
-    void set_pool(allocator_pool_type* pool)
+    void set_pool(allocator_pool_type* pool) BMNOEXCEPT
      {
          alloc_pool_p_ = pool;
      }
  
      /*! @brief get pointer to allocation pool (if set) */
-    allocator_pool_type* get_pool()
+    allocator_pool_type* get_pool() BMNOEXCEPT
      {
          return alloc_pool_p_;
      }
@@ -328,7 +322,7 @@ public:
  
      /*! @brief Frees bit block allocated by alloc_bit_block.
      */
-    void free_bit_block(bm::word_t* block, unsigned alloc_factor = 1)
+    void free_bit_block(bm::word_t* block, unsigned alloc_factor = 1) BMNOEXCEPT
      {
          BM_ASSERT(IS_VALID_ADDR(block));
          if (alloc_pool_p_ && alloc_factor == 1)
@@ -377,7 +371,7 @@ public:
  
      /*! @brief Frees block of pointers.
      */
-    void free_ptr(void* p, size_t size)
+    void free_ptr(void* p, size_t size) BMNOEXCEPT
      {
          if (p)
              ptr_alloc_.deallocate(p, size);
@@ -427,7 +421,7 @@ void* aligned_new_malloc(size_t size)
  ///
  /// @internal
  inline
-void aligned_free(void* ptr)
+void aligned_free(void* ptr) BMNOEXCEPT
  {
      if (!ptr)
          return;
diff --git a/c++/include/util/bitset/bmavx2.h b/c++/include/util/bitset/bmavx2.h

index 04e66dd963c68e0ed65cb438be79102f0dd160af..2e232589191de917e3723be5393ee193dde79ab2 100644 (file)
--- a/c++/include/util/bitset/bmavx2.h
+++ b/c++/include/util/bitset/bmavx2.h
@@ -222,6 +222,66 @@ bm::id_t avx2_bit_count(const __m256i* BMRESTRICT block,
    return (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
  }
  
+/*!
+    @brief Calculate population count based on digest
+
+    @return popcnt
+    @ingroup AVX2
+*/
+inline
+bm::id_t avx2_bit_block_count(const bm::word_t* const block,
+                              bm::id64_t digest)
+{
+    bm::id_t count = 0;
+    bm::id64_t* cnt64;
+    BM_AVX2_POPCNT_PROLOG;
+    __m256i cnt = _mm256_setzero_si256();
+    while (digest)
+    {
+        bm::id64_t t = bm::bmi_blsi_u64(digest); // d & -d;
+
+        unsigned wave = _mm_popcnt_u64(t - 1);
+        unsigned off = wave * bm::set_block_digest_wave_size;
+
+        const __m256i* BMRESTRICT wave_src = (__m256i*)&block[off];
+
+        __m256i m1A, m1B, m1C, m1D;
+        m1A = _mm256_load_si256(wave_src);
+        m1B = _mm256_load_si256(wave_src+1);
+        if (!_mm256_testz_si256(m1A, m1A))
+        {
+            BM_AVX2_BIT_COUNT(bc, m1A)
+            cnt = _mm256_add_epi64(cnt, bc);
+        }
+        if (!_mm256_testz_si256(m1B, m1B))
+        {
+            BM_AVX2_BIT_COUNT(bc, m1B)
+            cnt = _mm256_add_epi64(cnt, bc);
+        }
+
+        m1C = _mm256_load_si256(wave_src+2);
+        m1D = _mm256_load_si256(wave_src+3);
+        if (!_mm256_testz_si256(m1C, m1C))
+        {
+            BM_AVX2_BIT_COUNT(bc, m1C)
+            cnt = _mm256_add_epi64(cnt, bc);
+        }
+        if (!_mm256_testz_si256(m1D, m1D))
+        {
+            BM_AVX2_BIT_COUNT(bc, m1D)
+            cnt = _mm256_add_epi64(cnt, bc);
+        }
+
+        digest = bm::bmi_bslr_u64(digest); // d &= d - 1;
+    } // while
+    cnt64 = (bm::id64_t*)&cnt;
+    count = (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
+    return count;
+
+}
+
+
+
  /*!
    @brief AND bit count for two aligned bit-blocks
    @ingroup AVX2
@@ -1367,6 +1427,20 @@ bool avx2_is_all_one(const __m256i* BMRESTRICT block)
      return true;
  }
  
+/*!
+    @brief check if wave of pointers is all 0xFFF
+    @ingroup AVX2
+*/
+BMFORCEINLINE
+bool avx2_test_all_one_wave(const void* ptr)
+{
+    __m256i maskF = _mm256_set1_epi32(~0u); // braodcast 0xFF
+   __m256i wcmpA = _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)ptr), maskF); // (w0 == maskF)
+    unsigned maskA = unsigned(_mm256_movemask_epi8(wcmpA));
+    return (maskA == ~0u);
+}
+
+
  /*!
      @brief check if wave of pointers is all NULL
      @ingroup AVX2
@@ -2474,16 +2548,24 @@ int avx2_cmpge_u16(__m256i vect16, unsigned short value)
  }
  
  /**
-    hybrid binary search, starts as binary, then switches to scan
- 
+    Hybrid binary search, starts as binary, then switches to scan
+
      NOTE: AVX code uses _mm256_subs_epu16 - saturated substraction
      which gives 0 if A-B=0 if A < B (not negative a value).
- 
+
+   \param buf - GAP buffer pointer.
+   \param pos - index of the element.
+   \param is_set - output. GAP value (0 or 1).
+   \return GAP index.
+
      @ingroup AVX2
  */
  inline
-unsigned avx2_gap_test(const unsigned short* buf, unsigned pos)
+unsigned avx2_gap_bfind(const unsigned short* BMRESTRICT buf,
+                        unsigned pos, unsigned* BMRESTRICT is_set)
  {
+    BM_ASSERT(is_set);
+
      const unsigned linear_cutoff = 48;
      const unsigned unroll_factor = 16;
  
@@ -2500,8 +2582,9 @@ unsigned avx2_gap_test(const unsigned short* buf, unsigned pos)
          {
              if (buf[start] >= pos)
              {
-                res = ((*buf) & 1) ^ ((--start) & 1);
-                return res;
+                res = ((*buf) & 1) ^ ((start-1) & 1);
+                *is_set = res;
+                return start;
              }
          } // for
          BM_ASSERT(0);
@@ -2516,7 +2599,7 @@ unsigned avx2_gap_test(const unsigned short* buf, unsigned pos)
              // but stay within allocated block memory
              //
              dsize = arr_end - start;
-            
+
              __m256i mZ = _mm256_setzero_si256();
              __m256i mPos  = _mm256_set1_epi16((unsigned short)pos);
              __m256i vect16, mSub, mge_mask;
@@ -2532,8 +2615,9 @@ unsigned avx2_gap_test(const unsigned short* buf, unsigned pos)
                  {
                      int lz = _tzcnt_u32(mask) / 2;
                      start += lz;
-                    res = ((*buf) & 1) ^ ((--start) & 1);
-                    return res;
+                    res = ((*buf) & 1) ^ ((start-1) & 1);
+                    *is_set = res;
+                    return start;
                  }
              } // for k
              unsigned tail = unroll_factor - (end - start);
@@ -2544,22 +2628,19 @@ unsigned avx2_gap_test(const unsigned short* buf, unsigned pos)
                  mSub = _mm256_subs_epu16(mPos, vect16);
                  mge_mask = _mm256_cmpeq_epi16(mSub, mZ);
                  int mask = _mm256_movemask_epi8(mge_mask);
-                BM_ASSERT(mask);
-                // TODO: if should be not needed, cleanup
-                if (mask)
-                {
-                    int lz = _tzcnt_u32(mask) / 2;
-                    start += lz;
-                    res = ((*buf) & 1) ^ ((--start) & 1);
-                    return res;
-                }
-                start += unroll_factor; // remove with if when sure
+                BM_ASSERT(mask); // the rersult MUST be here at this point
+
+                int lz = _tzcnt_u32(mask) / 2;
+                start += lz;
+                res = ((*buf) & 1) ^ ((start-1) & 1);
+                *is_set = res;
+                return start;
              }
              for (; start < end; ++start)
              {
                  if (buf[start] >= pos)
                      break;
-            }
+            } // for
              break;
          }
          unsigned curr = (start + end) >> 1;
@@ -2568,8 +2649,22 @@ unsigned avx2_gap_test(const unsigned short* buf, unsigned pos)
          else
              end = curr;
      } // while
-    res = ((*buf) & 1) ^ ((--start) & 1);
-    return res;
+    res = ((*buf) & 1) ^ ((start-1) & 1);
+    *is_set = res;
+    return start;
+}
+
+
+/**
+    Hybrid binary search, starts as binary, then switches to scan
+    @ingroup AVX2
+*/
+inline
+unsigned avx2_gap_test(const unsigned short* BMRESTRICT buf, unsigned pos)
+{
+    unsigned is_set;
+    bm::avx2_gap_bfind(buf, pos, &is_set);
+    return is_set;
  }
  
  /**
@@ -3024,6 +3119,13 @@ void avx2_bit_block_xor(bm::word_t*  target_block,
  #define VECT_BIT_BLOCK_XOR(t, src, src_xor, d) \
      avx2_bit_block_xor(t, src, src_xor, d)
  
+#define VECT_GAP_BFIND(buf, pos, is_set) \
+    avx2_gap_bfind(buf, pos, is_set)
+
+#define VECT_BIT_COUNT_DIGEST(blk, d) \
+    avx2_bit_block_count(blk, d)
+
+
  } // namespace
  
  
diff --git a/c++/include/util/bitset/bmblocks.h b/c++/include/util/bitset/bmblocks.h

index 3e4c4c613b1a88ace6e28469b677f770501608a3..857d8e9b3c127ac1d809f4e8bb91dc055ad74d73 100644 (file)
--- a/c++/include/util/bitset/bmblocks.h
+++ b/c++/include/util/bitset/bmblocks.h
@@ -59,10 +59,10 @@ public:
      public:
          typedef id_type size_type;
          
-        bm_func_base(blocks_manager& bman) : bm_(bman) {}
+        bm_func_base(blocks_manager& bman) BMNOEXCEPT : bm_(bman) {}
  
-        void on_empty_top(unsigned /* top_block_idx*/ ) {}
-        void on_empty_block(block_idx_type /* block_idx*/ ) {}
+        void on_empty_top(unsigned /* top_block_idx*/ ) BMNOEXCEPT {}
+        void on_empty_block(block_idx_type /* block_idx*/ )BMNOEXCEPT {}
      private:
          bm_func_base(const bm_func_base&);
          bm_func_base& operator=(const bm_func_base&);
@@ -76,13 +76,13 @@ public:
      {
      public:
          typedef id_type size_type;
-        bm_func_base_const(const blocks_manager& bman) : bm_(bman) {}
+        bm_func_base_const(const blocks_manager& bman) BMNOEXCEPT : bm_(bman) {}
  
-        void on_empty_top(unsigned /* top_block_idx*/ ) {}
-        void on_empty_block(block_idx_type /* block_idx*/ ) {}
+        void on_empty_top(unsigned /* top_block_idx*/ ) BMNOEXCEPT {}
+        void on_empty_block(block_idx_type /* block_idx*/ ) BMNOEXCEPT {}
      private:
-        bm_func_base_const(const bm_func_base_const&);
-        bm_func_base_const& operator=(const bm_func_base_const&);
+        bm_func_base_const(const bm_func_base_const&) BMNOEXCEPT;
+        bm_func_base_const& operator=(const bm_func_base_const&) BMNOEXCEPT;
      protected:
          const blocks_manager&  bm_;
      };
@@ -92,10 +92,10 @@ public:
      class block_count_base : public bm_func_base_const
      {
      protected:
-        block_count_base(const blocks_manager& bm) 
+        block_count_base(const blocks_manager& bm) BMNOEXCEPT
              : bm_func_base_const(bm) {}
  
-        bm::id_t block_count(const bm::word_t* block) const
+        bm::id_t block_count(const bm::word_t* block) const BMNOEXCEPT
          {
              return this->bm_.block_bitcount(block);
          }
@@ -108,17 +108,17 @@ public:
      public:
          typedef id_type size_type;
  
-        block_count_func(const blocks_manager& bm) 
+        block_count_func(const blocks_manager& bm) BMNOEXCEPT
              : block_count_base(bm), count_(0) {}
  
-        id_type count() const { return count_; }
+        id_type count() const BMNOEXCEPT { return count_; }
  
-        void operator()(const bm::word_t* block)
+        void operator()(const bm::word_t* block) BMNOEXCEPT
          {
              count_ += this->block_count(block);
          }
-        void add_full(id_type c) { count_ += c; }
-        void reset() { count_ = 0; }
+        void add_full(id_type c) BMNOEXCEPT { count_ += c; }
+        void reset() BMNOEXCEPT { count_ = 0; }
  
      private:
          id_type count_;
@@ -131,24 +131,22 @@ public:
      public:
          typedef id_type size_type;
  
-        block_count_arr_func(const blocks_manager& bm, unsigned* arr) 
+        block_count_arr_func(const blocks_manager& bm, unsigned* arr) BMNOEXCEPT
              : block_count_base(bm), arr_(arr), last_idx_(0) 
          {
              arr_[0] = 0;
          }
  
-        void operator()(const bm::word_t* block, id_type idx)
+        void operator()(const bm::word_t* block, id_type idx) BMNOEXCEPT
          {
              while (++last_idx_ < idx)
-            {
                  arr_[last_idx_] = 0;
-            }
              arr_[idx] = this->block_count(block);
              last_idx_ = idx;
          }
  
-        id_type last_block() const { return last_idx_; }
-        void on_non_empty_top(unsigned) {}
+        id_type last_block() const BMNOEXCEPT { return last_idx_; }
+        void on_non_empty_top(unsigned) BMNOEXCEPT {}
  
      private:
          unsigned*  arr_;
@@ -161,13 +159,14 @@ public:
      public:
          typedef id_type size_type;
  
-        block_count_change_func(const blocks_manager& bm) 
+        block_count_change_func(const blocks_manager& bm) BMNOEXCEPT
              : bm_func_base_const(bm),
                  count_(0),
                  prev_block_border_bit_(0)
          {}
  
-        block_idx_type block_count(const bm::word_t* block, block_idx_type idx)
+        block_idx_type block_count(const bm::word_t* block,
+                                   block_idx_type idx) BMNOEXCEPT
          {
              block_idx_type cnt = 0;
              id_type first_bit;
@@ -187,7 +186,7 @@ public:
                  if (BM_IS_GAP(block))
                  {
                      gap_word_t* gap_block = BMGAP_PTR(block);
-                    cnt = gap_length(gap_block) - 1;
+                    cnt = bm::gap_length(gap_block) - 1;
                      if (idx)
                      {
                          first_bit = bm::gap_test_unr(gap_block, 0);
@@ -213,9 +212,9 @@ public:
              return cnt;
          }
          
-        id_type count() const { return count_; }
+        id_type count() const BMNOEXCEPT { return count_; }
  
-        void operator()(const bm::word_t* block, block_idx_type idx)
+        void operator()(const bm::word_t* block, block_idx_type idx) BMNOEXCEPT
          {
              count_ += block_count(block, idx);
          }
@@ -232,11 +231,12 @@ public:
      public:
          typedef id_type size_type;
  
-        block_any_func(const blocks_manager& bm) 
+        block_any_func(const blocks_manager& bm) BMNOEXCEPT
              : bm_func_base_const(bm) 
          {}
  
-        bool operator()(const bm::word_t* block, block_idx_type /*idx*/)
+        bool operator()
+                (const bm::word_t* block, block_idx_type /*idx*/) BMNOEXCEPT
          {
              if (BM_IS_GAP(block)) // gap block
                  return (!gap_is_all_zero(BMGAP_PTR(block)));
@@ -250,9 +250,9 @@ public:
      class gap_level_func : public bm_func_base
      {
      public:
-        gap_level_func(blocks_manager& bm, const gap_word_t* glevel_len)
-            : bm_func_base(bm),
-                glevel_len_(glevel_len)
+        gap_level_func(blocks_manager& bm,
+                       const gap_word_t* glevel_len) BMNOEXCEPT
+            : bm_func_base(bm), glevel_len_(glevel_len)
          {
              BM_ASSERT(glevel_len);
          }
@@ -282,19 +282,18 @@ public:
                  return;
              }
  
-            unsigned len = gap_length(gap_blk);
-            int level = gap_calc_level(len, glevel_len_);
+            unsigned len = bm::gap_length(gap_blk);
+            int level = bm::gap_calc_level(len, glevel_len_);
              if (level == -1)
              {
-                bm::word_t* blk = 
-                    bman.get_allocator().alloc_bit_block();
+                bm::word_t* blk = bman.get_allocator().alloc_bit_block();
                  bman.set_block_ptr(idx, blk);
                  bm::gap_convert_to_bitset(blk, gap_blk);
              }
              else
              {
                  gap_word_t* gap_blk_new = 
-                    bman.allocate_gap_block(unsigned(level), gap_blk, glevel_len_);
+                bman.allocate_gap_block(unsigned(level), gap_blk, glevel_len_);
  
                  bm::word_t* p = (bm::word_t*) gap_blk_new;
                  BMSET_PTRGAP(p);
@@ -312,7 +311,7 @@ public:
      class block_one_func : public bm_func_base
      {
      public:
-        block_one_func(blocks_manager& bm) : bm_func_base(bm) {}
+        block_one_func(blocks_manager& bm) BMNOEXCEPT : bm_func_base(bm) {}
  
          void operator()(bm::word_t* block, block_idx_type idx)
          {
@@ -357,7 +356,7 @@ public:
      }
      
  #ifndef BM_NO_CXX11
-    blocks_manager(blocks_manager&& blockman) BMNOEXEPT
+    blocks_manager(blocks_manager&& blockman) BMNOEXCEPT
          : max_bits_(blockman.max_bits_),
            top_blocks_(0),
            top_block_size_(blockman.top_block_size_),
@@ -369,7 +368,7 @@ public:
      }
  #endif
  
-    ~blocks_manager() BMNOEXEPT
+    ~blocks_manager() BMNOEXCEPT
      {
          if (temp_block_)
              alloc_.free_bit_block(temp_block_);
@@ -379,7 +378,7 @@ public:
      /*! \brief Swaps content 
          \param bm  another blocks manager
      */
-    void swap(blocks_manager& bm) BMNOEXEPT
+    void swap(blocks_manager& bm) BMNOEXCEPT
      {
          BM_ASSERT(this != &bm);
  
@@ -399,7 +398,7 @@ public:
      
      /*! \brief implementation of moving semantics
      */
-    void move_from(blocks_manager& bm) BMNOEXEPT
+    void move_from(blocks_manager& bm) BMNOEXCEPT
      {
          deinit_tree();
          swap(bm);
@@ -412,9 +411,9 @@ public:
      }
      
  
-    void free_ptr(bm::word_t** ptr)
+    void free_ptr(bm::word_t** ptr) BMNOEXCEPT
      {
-        if (ptr) alloc_.free_ptr(ptr);
+        alloc_.free_ptr(ptr);
      }
  
      /**
@@ -422,7 +421,7 @@ public:
          \param bits_to_store - supposed capacity (number of bits)
          \return size of the top level block
      */
-    unsigned compute_top_block_size(id_type bits_to_store)
+    unsigned compute_top_block_size(id_type bits_to_store) const BMNOEXCEPT
      {
          if (bits_to_store >= bm::id_max)  // working in full-range mode
              return bm::set_top_array_size;
@@ -456,7 +455,8 @@ public:
          \param no_more_blocks - 1 if there are no more blocks at all
          \return block adress or NULL if not yet allocated
      */
-    bm::word_t* get_block(block_idx_type nb, int* no_more_blocks) const
+    const bm::word_t*
+    get_block(block_idx_type nb, int* no_more_blocks) const BMNOEXCEPT
      {
          BM_ASSERT(top_blocks_);
          unsigned i = unsigned(nb >> bm::set_array_shift);
@@ -489,7 +489,7 @@ public:
      @return bm::set_total_blocks - no more blocks
      */
      block_idx_type
-    find_next_nz_block(block_idx_type nb, bool deep_scan = true) const
+    find_next_nz_block(block_idx_type nb, bool deep_scan=true) const BMNOEXCEPT
      {
          if (is_init())
          {
@@ -521,7 +521,7 @@ public:
          \param j - second level block index
          \return block adress or NULL if not yet allocated
      */
-    const bm::word_t* get_block(unsigned i, unsigned j) const
+    const bm::word_t* get_block(unsigned i, unsigned j) const BMNOEXCEPT
      {
          if (!top_blocks_ || i >= top_block_size_) return 0;
          const bm::word_t* const* blk_blk = top_blocks_[i];
@@ -537,7 +537,7 @@ public:
          \param j - second level block index
          \return block adress or NULL if not yet allocated
      */
-    const bm::word_t* get_block_ptr(unsigned i, unsigned j) const
+    const bm::word_t* get_block_ptr(unsigned i, unsigned j) const BMNOEXCEPT
      {
          if (!top_blocks_ || i >= top_block_size_) return 0;
  
@@ -553,9 +553,10 @@ public:
          \param j - second level block index
          \return block adress or NULL if not yet allocated
      */
-    bm::word_t* get_block_ptr(unsigned i, unsigned j)
+    bm::word_t* get_block_ptr(unsigned i, unsigned j) BMNOEXCEPT
      {
-        if (!top_blocks_ || i >= top_block_size_) return 0;
+        if (!top_blocks_ || i >= top_block_size_)
+            return 0;
          bm::word_t* const* blk_blk = top_blocks_[i];
          if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
              return FULL_BLOCK_FAKE_ADDR;
@@ -569,7 +570,7 @@ public:
          \param i - top level block index
          \return block adress or NULL if not yet allocated
      */
-    const bm::word_t* const * get_topblock(unsigned i) const
+    const bm::word_t* const * get_topblock(unsigned i) const BMNOEXCEPT
      {
          return (!top_blocks_ || i >= top_block_size_) ? 0 : top_blocks_[i];
      }
@@ -577,7 +578,7 @@ public:
      /** 
          \brief Returns root block in the tree.
      */
-    bm::word_t*** top_blocks_root() const
+    bm::word_t*** top_blocks_root() const BMNOEXCEPT
      {
          blocks_manager* bm = 
              const_cast<blocks_manager*>(this);
@@ -837,7 +838,7 @@ public:
          {
              gap_res = true;
              new_block = (bm::word_t*)
-                    get_allocator().alloc_gap_block(unsigned(new_level), glen());
+                get_allocator().alloc_gap_block(unsigned(new_level), glen());
              ::memcpy(new_block, gap_block, len * sizeof(bm::gap_word_t));
              bm::set_gap_level(new_block, new_level);
          }
@@ -1124,7 +1125,7 @@ public:
      /*! @brief Fills all blocks with 0.
          @param free_mem - if true function frees the resources (obsolete)
      */
-    void set_all_zero(bool /*free_mem*/)
+    void set_all_zero(bool /*free_mem*/) BMNOEXCEPT
      {
          if (!is_init()) return;
          deinit_tree(); // TODO: optimization of top-level realloc
@@ -1141,7 +1142,7 @@ public:
                                  bm::set_sub_array_size, func);
      }
      
-    void free_top_subblock(unsigned nblk_blk)
+    void free_top_subblock(unsigned nblk_blk) BMNOEXCEPT
      {
          BM_ASSERT(top_blocks_[nblk_blk]);
          if ((bm::word_t*)top_blocks_[nblk_blk] != FULL_BLOCK_FAKE_ADDR)
@@ -1452,7 +1453,7 @@ public:
          Places new block into blocks table.
      */
      BMFORCEINLINE
-    void set_block_ptr(unsigned i, unsigned j, bm::word_t* block)
+    void set_block_ptr(unsigned i, unsigned j, bm::word_t* block) BMNOEXCEPT
      {
          BM_ASSERT(is_init());
          BM_ASSERT(i < top_block_size_);
@@ -1597,7 +1598,7 @@ public:
      /**
      Free block, make it zero pointer in the tree
      */
-    void zero_gap_block_ptr(unsigned i, unsigned j)
+    void zero_gap_block_ptr(unsigned i, unsigned j) BMNOEXCEPT
      {
          BM_ASSERT(top_blocks_ && i < top_block_size_);
          
@@ -1616,19 +1617,15 @@ public:
          Count number of bits ON in the block
      */
      static
-    bm::id_t block_bitcount(const bm::word_t* block)
+    bm::id_t block_bitcount(const bm::word_t* block) BMNOEXCEPT
      {
          BM_ASSERT(block);
          id_t count;
          if (BM_IS_GAP(block))
-        {
              count = bm::gap_bit_count_unr(BMGAP_PTR(block));
-        }
          else // bitset
-        {
              count = (IS_FULL_BLOCK(block)) ? bm::bits_in_block
                                             : bm::bit_block_count(block);
-        }
          return count;
      }
  
@@ -1678,7 +1675,7 @@ public:
      }
      
      /*! deallocate temp block */
-    void free_temp_block()
+    void free_temp_block() BMNOEXCEPT
      {
          if (temp_block_)
          {
@@ -1686,6 +1683,7 @@ public:
              temp_block_ = 0;
          }
      }
+
      /*! Detach and return temp block.
          if temp block is NULL allocates a bit-block
          caller is responsible for returning
@@ -1705,7 +1703,7 @@ public:
      /*! Return temp block
          if temp block already exists - block gets deallocated
      */
-    void return_tempblock(bm::word_t* block)
+    void return_tempblock(bm::word_t* block) BMNOEXCEPT
      {
          BM_ASSERT(block != temp_block_);
          BM_ASSERT(IS_VALID_ADDR(block));
@@ -1717,7 +1715,7 @@ public:
      }
  
      /*! Assigns new GAP lengths vector */
-    void set_glen(const gap_word_t* glevel_len)
+    void set_glen(const gap_word_t* glevel_len) BMNOEXCEPT
      {
          ::memcpy(glevel_len_, glevel_len, sizeof(glevel_len_));
      }
@@ -1745,7 +1743,7 @@ public:
      
      /** Returns true if second level block pointer is 0.
      */
-    bool is_subblock_null(unsigned nsub) const
+    bool is_subblock_null(unsigned nsub) const BMNOEXCEPT
      {
          BM_ASSERT(top_blocks_);
          if (nsub >= top_block_size_)
@@ -1753,14 +1751,14 @@ public:
          return top_blocks_[nsub] == NULL;
      }
  
-    bm::word_t*** top_blocks_root()
+    bm::word_t*** top_blocks_root() BMNOEXCEPT
      {
          return top_blocks_;
      }
  
      /*! \brief Returns current GAP level vector
      */
-    const gap_word_t* glen() const
+    const gap_word_t* glen() const BMNOEXCEPT
      {
          return glevel_len_;
      }
@@ -1768,14 +1766,14 @@ public:
      /*! \brief Returns GAP level length for specified level
          \param level - level number
      */
-    unsigned glen(unsigned level) const
+    unsigned glen(unsigned level) const BMNOEXCEPT
      {
          return glevel_len_[level];
      }
      
      /*! \brief Returns size of the top block array in the tree 
      */
-    unsigned top_block_size() const
+    unsigned top_block_size() const BMNOEXCEPT
      {
          return top_block_size_;
      }
@@ -1829,21 +1827,20 @@ public:
      
      /** \brief Returns reference on the allocator
      */
-    allocator_type& get_allocator() { return alloc_; }
+    allocator_type& get_allocator() BMNOEXCEPT { return alloc_; }
  
      /** \brief Returns allocator
      */
-    allocator_type get_allocator() const { return alloc_; }
+    allocator_type get_allocator() const BMNOEXCEPT { return alloc_; }
      
      
      /// if tree of blocks already up
-    bool is_init() const { return top_blocks_ != 0; }
+    bool is_init() const BMNOEXCEPT { return top_blocks_ != 0; }
      
      /// allocate first level of descr. of blocks 
      void init_tree()
      {
          BM_ASSERT(top_blocks_ == 0);
-        
          if (top_block_size_)
          {
              top_blocks_ = (bm::word_t***) alloc_.alloc_ptr(top_block_size_);
@@ -1865,7 +1862,7 @@ public:
                  alloc_.free_bit_block(blk); \
          }
      
-    void deallocate_top_subblock(unsigned nblk_blk)
+    void deallocate_top_subblock(unsigned nblk_blk) BMNOEXCEPT
      {
          if (!top_blocks_[nblk_blk])
              return;
@@ -1907,7 +1904,7 @@ public:
      /** destroy tree, free memory in all blocks and control structures
          Note: pointers are NOT assigned to zero(!)
      */
-    void destroy_tree() BMNOEXEPT
+    void destroy_tree() BMNOEXCEPT
      {
          if (!top_blocks_)
              return;
@@ -1937,7 +1934,7 @@ public:
      }
      #undef BM_FREE_OP
  
-    void deinit_tree() BMNOEXEPT
+    void deinit_tree() BMNOEXCEPT
      {
          destroy_tree();
          top_blocks_ = 0; top_block_size_ = 0;
@@ -1946,7 +1943,7 @@ public:
      // ----------------------------------------------------------------
      
      /// calculate top blocks which are not NULL and not FULL
-    unsigned find_real_top_blocks() const
+    unsigned find_real_top_blocks() const BMNOEXCEPT
      {
          unsigned cnt = 0;
          unsigned top_blocks = top_block_size();
@@ -1964,7 +1961,7 @@ public:
      // ----------------------------------------------------------------
  
      /// calculate max top blocks size whithout NULL-tail
-    unsigned find_max_top_blocks() const
+    unsigned find_max_top_blocks() const BMNOEXCEPT
      {
          unsigned top_blocks = top_block_size();
          if (!top_blocks)
@@ -1981,11 +1978,11 @@ public:
  
      // ----------------------------------------------------------------
  
-    void validate_top_zero(unsigned i)
+    void validate_top_zero(unsigned i) BMNOEXCEPT
      {
          BM_ASSERT(i < top_block_size());
          bm::word_t** blk_blk = top_blocks_[i];
-        // TODO: SIMD
+        // TODO: SIMD or unroll
          for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
          {
              if (blk_blk[j])
@@ -1997,7 +1994,7 @@ public:
  
      // ----------------------------------------------------------------
  
-    void validate_top_full(unsigned i)
+    void validate_top_full(unsigned i) BMNOEXCEPT
      {
          BM_ASSERT(i < top_block_size());
          bm::word_t** blk_blk = top_blocks_[i];
@@ -2015,7 +2012,7 @@ public:
          Calculate approximate memory needed to serialize big runs
          of 0000s and 111s (as blocks)
      */
-    size_t calc_serialization_null_full() const
+    size_t calc_serialization_null_full() const BMNOEXCEPT
      {
          size_t s_size = sizeof(unsigned);
          if (!top_blocks_)
@@ -2041,6 +2038,9 @@ public:
                  }
                  nb_empty += (i - nb_prev) * bm::set_sub_array_size;
                  blk_blk = top_blocks_[i];
+                BM_ASSERT(blk_blk);
+                if (!blk_blk)
+                    break;
              }
              if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
              {
@@ -2357,7 +2357,7 @@ template<class BlocksManager>
  class bit_block_guard
  {
  public:
-    bit_block_guard(BlocksManager& bman, bm::word_t* blk=0) 
+    bit_block_guard(BlocksManager& bman, bm::word_t* blk=0) BMNOEXCEPT
          : bman_(bman), 
            block_(blk)
      {}
@@ -2366,18 +2366,20 @@ public:
          if (IS_VALID_ADDR(block_))
              bman_.get_allocator().free_bit_block(block_, 3);
      }
-    void attach(bm::word_t* blk)
+
+    void attach(bm::word_t* blk) BMNOEXCEPT
      {
          if (IS_VALID_ADDR(block_))
              bman_.get_allocator().free_bit_block(block_);
          block_ = blk;
      }
+
      bm::word_t* allocate()
      {
          attach(bman_.get_allocator().alloc_bit_block(3));
          return block_;
      }
-    bm::word_t* get() { return block_; }
+    bm::word_t* get() BMNOEXCEPT { return block_; }
  
  private:
      bit_block_guard(const bit_block_guard&);
diff --git a/c++/include/util/bitset/bmbmatrix.h b/c++/include/util/bitset/bmbmatrix.h

index b8b0ed88c165cfe1df1c9c6be8ad19ac5e63decd..06908a806c8802f841a20cc03bea7d9ab88a69d5 100644 (file)
--- a/c++/include/util/bitset/bmbmatrix.h
+++ b/c++/include/util/bitset/bmbmatrix.h
@@ -72,7 +72,7 @@ public:
                    allocation_policy_type ap = allocation_policy_type(),
                    size_type bv_max_size = bm::id_max,
                    const allocator_type&   alloc  = allocator_type());
-    ~basic_bmatrix() BMNOEXEPT;
+    ~basic_bmatrix() BMNOEXCEPT;
      
      /*! copy-ctor */
      basic_bmatrix(const basic_bmatrix<BV>& bbm);
@@ -84,10 +84,10 @@ public:
      
  #ifndef BM_NO_CXX11
      /*! move-ctor */
-    basic_bmatrix(basic_bmatrix<BV>&& bbm) BMNOEXEPT;
+    basic_bmatrix(basic_bmatrix<BV>&& bbm) BMNOEXCEPT;
  
      /*! move assignmment operator */
-    basic_bmatrix<BV>& operator = (basic_bmatrix<BV>&& bbm) BMNOEXEPT
+    basic_bmatrix<BV>& operator = (basic_bmatrix<BV>&& bbm) BMNOEXCEPT
      {
          if (this != &bbm)
          {
@@ -98,7 +98,8 @@ public:
      }
  #endif
  
-    void set_allocator_pool(allocator_pool_type* pool_ptr) { pool_ = pool_ptr; }
+    void set_allocator_pool(allocator_pool_type* pool_ptr) BMNOEXCEPT
+    { pool_ = pool_ptr; }
  
      ///@}
      
@@ -107,7 +108,7 @@ public:
      ///@{
  
      /*! Swap content */
-    void swap(basic_bmatrix<BV>& bbm) BMNOEXEPT;
+    void swap(basic_bmatrix<BV>& bbm) BMNOEXCEPT;
      
      /*! Copy content */
      void copy_from(const basic_bmatrix<BV>& bbm);
@@ -118,17 +119,17 @@ public:
      /*! @name row access                                         */
      ///@{
  
-    /*! Get row bit-vector */
-    const bvector_type* row(size_type i) const;
+    /*! Get row bit-vector. Can return NULL */
+    const bvector_type* row(size_type i) const BMNOEXCEPT;
  
-    /*! Get row bit-vector */
-    bvector_type_const_ptr get_row(size_type i) const;
+    /*! Get row bit-vector. Can return NULL */
+    bvector_type_const_ptr get_row(size_type i) const BMNOEXCEPT;
  
-    /*! Get row bit-vector */
-    bvector_type* get_row(size_type i);
+    /*! Get row bit-vector. Can return NULL */
+    bvector_type* get_row(size_type i) BMNOEXCEPT;
      
      /*! get number of value rows */
-    size_type rows() const { return rsize_; }
+    size_type rows() const BMNOEXCEPT { return rsize_; }
      
      /*! Make sure row is constructed, return bit-vector */
      bvector_type_ptr construct_row(size_type row);
@@ -168,7 +169,7 @@ public:
          @param pos - column position in the matrix
          @param octet_idx - octet based row position (1 octet - 8 rows)
      */
-    unsigned char get_octet(size_type pos, size_type octet_idx) const;
+    unsigned char get_octet(size_type pos, size_type octet_idx) const BMNOEXCEPT;
      
      /*!
          Compare vector[pos] with octet
@@ -183,7 +184,7 @@ public:
          @return 0 - equal, -1 - less(vect[pos] < octet), 1 - greater
      */
      int compare_octet(size_type pos,
-                      size_type octet_idx, char octet) const;
+                      size_type octet_idx, char octet) const BMNOEXCEPT;
      
      ///@}
  
@@ -194,12 +195,13 @@ public:
      ///@{
      
      /// Test if 4 rows from i are not NULL
-    bool test_4rows(unsigned i) const;
+    bool test_4rows(unsigned i) const BMNOEXCEPT;
  
      /// Get low level internal access to
-    const bm::word_t* get_block(size_type p, unsigned i, unsigned j) const;
+    const bm::word_t* get_block(size_type p,
+                                unsigned i, unsigned j) const BMNOEXCEPT;
      
-    unsigned get_half_octet(size_type pos, size_type row_idx) const;
+    unsigned get_half_octet(size_type pos, size_type row_idx) const BMNOEXCEPT;
  
      /*!
          \brief run memory optimization for all bit-vector rows
@@ -222,7 +224,7 @@ public:
  
  protected:
      void allocate_rows(size_type rsize);
-    void free_rows() BMNOEXEPT;
+    void free_rows() BMNOEXCEPT;
  
      bvector_type* construct_bvector(const bvector_type* bv) const;
      void destruct_bvector(bvector_type* bv) const;
@@ -286,7 +288,7 @@ public:
  
  #ifndef BM_NO_CXX11
      /*! move-ctor */
-    base_sparse_vector(base_sparse_vector<Val, BV, MAX_SIZE>&& bsv) BMNOEXEPT
+    base_sparse_vector(base_sparse_vector<Val, BV, MAX_SIZE>&& bsv) BMNOEXCEPT
      {
          bmatr_.swap(bsv.bmatr_);
          size_ = bsv.size_;
@@ -295,19 +297,19 @@ public:
      }
  #endif
  
-    void swap(base_sparse_vector<Val, BV, MAX_SIZE>& bsv) BMNOEXEPT;
+    void swap(base_sparse_vector<Val, BV, MAX_SIZE>& bsv) BMNOEXCEPT;
  
-    size_type size() const { return size_; }
+    size_type size() const BMNOEXCEPT { return size_; }
      
      void resize(size_type new_size);
      
      void clear_range(size_type left, size_type right, bool set_null);
  
      /*! \brief resize to zero, free memory */
-    void clear() BMNOEXEPT;
+    void clear() BMNOEXCEPT;
      
      /*! return true if empty */
-    bool empty() const { return size() == 0; }
+    bool empty() const BMNOEXCEPT { return size() == 0; }
  
  public:
  
@@ -317,13 +319,14 @@ public:
      /**
          \brief check if container supports NULL(unassigned) values
      */
-    bool is_nullable() const { return bmatr_.get_row(this->null_plain()) != 0; }
+    bool is_nullable() const BMNOEXCEPT
+        { return bmatr_.get_row(this->null_plain()) != 0; }
  
      /**
          \brief Get bit-vector of assigned values or NULL
          (if not constructed that way)
      */
-    const bvector_type* get_null_bvector() const
+    const bvector_type* get_null_bvector() const BMNOEXCEPT
          { return bmatr_.get_row(this->null_plain()); }
      
      /** \brief test if specified element is NULL
@@ -331,7 +334,7 @@ public:
          \return true if it is NULL false if it was assigned or container
          is not configured to support assignment flags
      */
-    bool is_null(size_type idx) const;
+    bool is_null(size_type idx) const BMNOEXCEPT;
      
  
      ///@}
@@ -352,25 +355,27 @@ public:
          \return bit-vector for the bit plain or NULL
      */
      bvector_type_const_ptr
-    get_plain(unsigned i) const { return bmatr_.row(i); }
+    get_plain(unsigned i) const BMNOEXCEPT { return bmatr_.row(i); }
  
      /*!
          \brief get total number of bit-plains in the vector
      */
-    static unsigned plains() { return value_bits(); }
+    static unsigned plains() BMNOEXCEPT { return value_bits(); }
  
      /** Number of stored bit-plains (value plains + extra */
-    static unsigned stored_plains() { return value_bits()+1; }
+    static unsigned stored_plains() BMNOEXCEPT { return value_bits()+1; }
  
  
      /** Number of effective bit-plains in the value type */
-    unsigned effective_plains() const { return effective_plains_ + 1; }
+    unsigned effective_plains() const BMNOEXCEPT
+                                { return effective_plains_ + 1; }
  
      /*!
          \brief get access to bit-plain as is (can return NULL)
      */
-    bvector_type_ptr plain(unsigned i) { return bmatr_.get_row(i); }
-    const bvector_type_ptr plain(unsigned i) const { return bmatr_.get_row(i); }
+    bvector_type_ptr plain(unsigned i) BMNOEXCEPT { return bmatr_.get_row(i); }
+    bvector_type_const_ptr plain(unsigned i) const BMNOEXCEPT
+                                    { return bmatr_.get_row(i); }
  
      bvector_type* get_null_bvect() { return bmatr_.get_row(this->null_plain());}
  
@@ -388,12 +393,12 @@ public:
          @return 64-bit mask
          @internal
      */
-    bm::id64_t get_plains_mask(unsigned element_idx) const;
+    bm::id64_t get_plains_mask(unsigned element_idx) const BMNOEXCEPT;
  
      /*!
          get read-only access to inetrnal bit-matrix
      */
-    const bmatrix_type& get_bmatrix() const { return bmatr_; }
+    const bmatrix_type& get_bmatrix() const BMNOEXCEPT { return bmatr_; }
      ///@}
      
      /*!
@@ -417,7 +422,7 @@ public:
  
          @sa statistics
      */
-    void calc_stat(typename bvector_type::statistics* st) const;
+    void calc_stat(typename bvector_type::statistics* st) const BMNOEXCEPT;
  
      /*!
          \brief check if another sparse vector has the same content and size
@@ -429,7 +434,7 @@ public:
          \return true, if it is the same
      */
      bool equal(const base_sparse_vector<Val, BV, MAX_SIZE>& sv,
-               bm::null_support null_able = bm::use_null) const;
+               bm::null_support null_able = bm::use_null) const BMNOEXCEPT;
  
  protected:
      void copy_from(const base_sparse_vector<Val, BV, MAX_SIZE>& bsv);
@@ -463,13 +468,13 @@ protected:
      typedef typename bvector_type::block_idx_type block_idx_type;
  
      /** Number of total bit-plains in the value type*/
-    static unsigned value_bits()
+    static unsigned value_bits() BMNOEXCEPT
      {
          return base_sparse_vector<Val, BV, MAX_SIZE>::sv_value_plains;
      }
      
      /** plain index for the "NOT NULL" flags plain */
-    static unsigned null_plain() { return value_bits(); }
+    static unsigned null_plain() BMNOEXCEPT { return value_bits(); }
      
      /** optimize block in all matrix plains */
      void optimize_block(block_idx_type nb)
@@ -515,7 +520,7 @@ basic_bmatrix<BV>::basic_bmatrix(size_type rsize,
  //---------------------------------------------------------------------
  
  template<typename BV>
-basic_bmatrix<BV>::~basic_bmatrix() BMNOEXEPT
+basic_bmatrix<BV>::~basic_bmatrix() BMNOEXCEPT
  {
      free_rows();
  }
@@ -537,7 +542,7 @@ basic_bmatrix<BV>::basic_bmatrix(const basic_bmatrix<BV>& bbm)
  //---------------------------------------------------------------------
  
  template<typename BV>
-basic_bmatrix<BV>::basic_bmatrix(basic_bmatrix<BV>&& bbm) BMNOEXEPT
+basic_bmatrix<BV>::basic_bmatrix(basic_bmatrix<BV>&& bbm) BMNOEXCEPT
  : bv_size_(bbm.bv_size_),
    alloc_(bbm.alloc_),
    ap_(bbm.ap_),
@@ -552,7 +557,7 @@ basic_bmatrix<BV>::basic_bmatrix(basic_bmatrix<BV>&& bbm) BMNOEXEPT
  
  template<typename BV>
  const typename basic_bmatrix<BV>::bvector_type*
-basic_bmatrix<BV>::row(size_type i) const
+basic_bmatrix<BV>::row(size_type i) const BMNOEXCEPT
  {
      BM_ASSERT(i < rsize_);
      return bv_rows_[i];
@@ -562,7 +567,7 @@ basic_bmatrix<BV>::row(size_type i) const
  
  template<typename BV>
  const typename basic_bmatrix<BV>::bvector_type*
-basic_bmatrix<BV>::get_row(size_type i) const
+basic_bmatrix<BV>::get_row(size_type i) const BMNOEXCEPT
  {
      BM_ASSERT(i < rsize_);
      return bv_rows_[i];
@@ -572,7 +577,7 @@ basic_bmatrix<BV>::get_row(size_type i) const
  
  template<typename BV>
  typename basic_bmatrix<BV>::bvector_type*
-basic_bmatrix<BV>::get_row(size_type i)
+basic_bmatrix<BV>::get_row(size_type i) BMNOEXCEPT
  {
      BM_ASSERT(i < rsize_);
      return bv_rows_[i];
@@ -581,7 +586,7 @@ basic_bmatrix<BV>::get_row(size_type i)
  //---------------------------------------------------------------------
  
  template<typename BV>
-bool basic_bmatrix<BV>::test_4rows(unsigned j) const
+bool basic_bmatrix<BV>::test_4rows(unsigned j) const BMNOEXCEPT
  {
      BM_ASSERT((j + 4) <= rsize_);
  #if defined(BM64_SSE4)
@@ -593,7 +598,8 @@ bool basic_bmatrix<BV>::test_4rows(unsigned j) const
          __m256i w0 = _mm256_loadu_si256((__m256i*)(bv_rows_ + j));
          return !_mm256_testz_si256(w0, w0);
  #else
-        bool b = bv_rows_[j + 0] || bv_rows_[j + 1] || bv_rows_[j + 2] || bv_rows_[j + 3];
+        bool b = bv_rows_[j + 0] || bv_rows_[j + 1] ||
+                 bv_rows_[j + 2] || bv_rows_[j + 3];
          return b;
  #endif
  }
@@ -655,7 +661,7 @@ void basic_bmatrix<BV>::allocate_rows(size_type rsize)
  //---------------------------------------------------------------------
  
  template<typename BV>
-void basic_bmatrix<BV>::free_rows() BMNOEXEPT
+void basic_bmatrix<BV>::free_rows() BMNOEXCEPT
  {
      for (size_type i = 0; i < rsize_; ++i)
      {
@@ -676,7 +682,7 @@ void basic_bmatrix<BV>::free_rows() BMNOEXEPT
  //---------------------------------------------------------------------
  
  template<typename BV>
-void basic_bmatrix<BV>::swap(basic_bmatrix<BV>& bbm) BMNOEXEPT
+void basic_bmatrix<BV>::swap(basic_bmatrix<BV>& bbm) BMNOEXCEPT
  {
      if (this == &bbm)
          return;
@@ -795,12 +801,14 @@ void basic_bmatrix<BV>::destruct_bvector(bvector_type* bv) const
  
  template<typename BV>
  const bm::word_t*
-basic_bmatrix<BV>::get_block(size_type p, unsigned i, unsigned j) const
+basic_bmatrix<BV>::get_block(size_type p,
+                             unsigned i, unsigned j) const BMNOEXCEPT
  {
      bvector_type_const_ptr bv = this->row(p);
      if (bv)
      {
-        const typename bvector_type::blocks_manager_type& bman = bv->get_blocks_manager();
+        const typename bvector_type::blocks_manager_type& bman =
+                                            bv->get_blocks_manager();
          return bman.get_block_ptr(i, j);
      }
      return 0;
@@ -902,7 +910,7 @@ void basic_bmatrix<BV>::insert_octet(size_type pos,
  
  template<typename BV>
  unsigned char
-basic_bmatrix<BV>::get_octet(size_type pos, size_type octet_idx) const
+basic_bmatrix<BV>::get_octet(size_type pos, size_type octet_idx) const BMNOEXCEPT
  {
      unsigned v = 0;
  
@@ -1003,7 +1011,7 @@ basic_bmatrix<BV>::get_octet(size_type pos, size_type octet_idx) const
  template<typename BV>
  int basic_bmatrix<BV>::compare_octet(size_type pos,
                                       size_type octet_idx,
-                                     char      octet) const
+                                     char      octet) const BMNOEXCEPT
  {
      char value = char(get_octet(pos, octet_idx));
      return (value > octet) - (value < octet);
@@ -1013,7 +1021,7 @@ int basic_bmatrix<BV>::compare_octet(size_type pos,
  
  template<typename BV>
  unsigned
-basic_bmatrix<BV>::get_half_octet(size_type pos, size_type row_idx) const
+basic_bmatrix<BV>::get_half_octet(size_type pos, size_type row_idx) const BMNOEXCEPT
  {
      unsigned v = 0;
  
@@ -1198,7 +1206,7 @@ void base_sparse_vector<Val, BV, MAX_SIZE>::copy_from(
  
  template<class Val, class BV, unsigned MAX_SIZE>
  void base_sparse_vector<Val, BV, MAX_SIZE>::swap(
-                 base_sparse_vector<Val, BV, MAX_SIZE>& bsv) BMNOEXEPT
+                 base_sparse_vector<Val, BV, MAX_SIZE>& bsv) BMNOEXCEPT
  {
      if (this != &bsv)
      {
@@ -1212,7 +1220,7 @@ void base_sparse_vector<Val, BV, MAX_SIZE>::swap(
  //---------------------------------------------------------------------
  
  template<class Val, class BV, unsigned MAX_SIZE>
-void base_sparse_vector<Val, BV, MAX_SIZE>::clear() BMNOEXEPT
+void base_sparse_vector<Val, BV, MAX_SIZE>::clear() BMNOEXCEPT
  {
      unsigned plains = value_bits();
      for (size_type i = 0; i < plains; ++i)
@@ -1274,7 +1282,8 @@ void base_sparse_vector<Val, BV, MAX_SIZE>::resize(size_type sz)
  //---------------------------------------------------------------------
  
  template<class Val, class BV, unsigned MAX_SIZE>
-bool base_sparse_vector<Val, BV, MAX_SIZE>::is_null(size_type idx) const
+bool base_sparse_vector<Val, BV, MAX_SIZE>::is_null(
+                                    size_type idx) const BMNOEXCEPT
  {
      const bvector_type* bv_null = get_null_bvector();
      return (bv_null) ? (!bv_null->test(idx)) : false;
@@ -1312,7 +1321,7 @@ typename base_sparse_vector<Val, BV, MAX_SIZE>::bvector_type_ptr
  
  template<class Val, class BV, unsigned MAX_SIZE>
  bm::id64_t base_sparse_vector<Val, BV, MAX_SIZE>::get_plains_mask(
-                                                 unsigned element_idx) const
+                                        unsigned element_idx) const BMNOEXCEPT
  {
      BM_ASSERT(element_idx < MAX_SIZE);
      bm::id64_t mask = 0;
@@ -1364,7 +1373,7 @@ void base_sparse_vector<Val, BV, MAX_SIZE>::optimize(bm::word_t* temp_block,
  
  template<class Val, class BV, unsigned MAX_SIZE>
  void base_sparse_vector<Val, BV, MAX_SIZE>::calc_stat(
-                    typename bvector_type::statistics* st) const
+                    typename bvector_type::statistics* st) const BMNOEXCEPT
  {
      BM_ASSERT(st);
      
@@ -1409,7 +1418,7 @@ void base_sparse_vector<Val, BV, MAX_SIZE>::clear_value_plains_from(
  
  template<class Val, class BV, unsigned MAX_SIZE>
  void base_sparse_vector<Val, BV, MAX_SIZE>::insert_clear_value_plains_from(
-                                            unsigned plain_idx, size_type idx)
+                                        unsigned plain_idx, size_type idx)
  {
      for (unsigned i = plain_idx; i < sv_value_plains; ++i)
      {
@@ -1437,7 +1446,7 @@ void base_sparse_vector<Val, BV, MAX_SIZE>::erase_column(size_type idx)
  template<class Val, class BV, unsigned MAX_SIZE>
  bool base_sparse_vector<Val, BV, MAX_SIZE>::equal(
              const base_sparse_vector<Val, BV, MAX_SIZE>& sv,
-             bm::null_support null_able) const
+             bm::null_support null_able) const BMNOEXCEPT
  {
      size_type arg_size = sv.size();
      if (this->size_ != arg_size)
@@ -1468,11 +1477,6 @@ bool base_sparse_vector<Val, BV, MAX_SIZE>::equal(
          bool eq = bv->equal(*arg_bv);
          if (!eq)
              return false;
-        /*
-        int cmp = bv->compare(*arg_bv);
-        if (cmp != 0)
-            return false;
-        */
      } // for j
      
      if (null_able == bm::use_null)
@@ -1490,11 +1494,6 @@ bool base_sparse_vector<Val, BV, MAX_SIZE>::equal(
          bool eq = bv_null->equal(*bv_null_arg);
          if (!eq)
              return false;
-        /*
-        int cmp = bv_null->compare(*bv_null);
-        if (cmp != 0)
-            return false;
-        */
      }
      return true;
  }
diff --git a/c++/include/util/bitset/bmbuffer.h b/c++/include/util/bitset/bmbuffer.h

index eba6d7e83f710e90b7eab0cf5e96f95555b42b34..8abdcc859ded6ec148b93768ed93aefeb162ccf0 100644 (file)
--- a/c++/include/util/bitset/bmbuffer.h
+++ b/c++/include/util/bitset/bmbuffer.h
@@ -33,35 +33,35 @@ namespace bm
  class byte_buffer_ptr
  {
  public:
-    byte_buffer_ptr()
+    byte_buffer_ptr() BMNOEXCEPT
          : byte_buf_(0), size_(0)
      {}
      
      /// construct byte buffer pointer
      ///
-    byte_buffer_ptr(unsigned char* in_buf, size_t in_size)
+    byte_buffer_ptr(unsigned char* in_buf, size_t in_size) BMNOEXCEPT
          : byte_buf_(in_buf), size_(in_size)
      {}
      
      /// Set buffer pointer
-    void set_buf(unsigned char* in_buf, size_t in_size)
+    void set_buf(unsigned char* in_buf, size_t in_size) BMNOEXCEPT
      {
          byte_buf_ = in_buf; size_= in_size;
      }
  
      /// Get buffer size
-    size_t size() const { return size_; }
+    size_t size() const BMNOEXCEPT { return size_; }
      
      /// Get read access to buffer memory
-    const unsigned char* buf() const { return byte_buf_; }
+    const unsigned char* buf() const BMNOEXCEPT { return byte_buf_; }
  
      /// Get write access to buffer memory
-    unsigned char* data() { return byte_buf_; }
+    unsigned char* data() BMNOEXCEPT { return byte_buf_; }
  
-    bool operator==(const byte_buffer_ptr& lhs) const { return equal(lhs); }
+    bool operator==(const byte_buffer_ptr& lhs) const BMNOEXCEPT { return equal(lhs); }
      
      /// return true if content and size is the same
-    bool equal(const byte_buffer_ptr& lhs) const
+    bool equal(const byte_buffer_ptr& lhs) const BMNOEXCEPT
      {
          if (this == &lhs)
              return true;
@@ -89,7 +89,7 @@ public:
      typedef size_t                                           size_type;
      
  public:
-    byte_buffer() : capacity_(0), alloc_factor_(0)
+    byte_buffer() BMNOEXCEPT : capacity_(0), alloc_factor_(0)
      {}
      
      byte_buffer(size_t in_capacity)
@@ -98,7 +98,7 @@ public:
          allocate(in_capacity);
      }
      
-    byte_buffer(const byte_buffer& lhs)
+    byte_buffer(const byte_buffer& lhs) BMNOEXCEPT
      {
          byte_buf_ = 0;
          size_ = capacity_ = alloc_factor_ = 0;
@@ -110,7 +110,7 @@ public:
      
  #ifndef BM_NO_CXX11
      /// Move constructor
-    byte_buffer(byte_buffer&& in_buf) BMNOEXEPT
+    byte_buffer(byte_buffer&& in_buf) BMNOEXCEPT
      {
          byte_buf_ = in_buf.byte_buf_;
          in_buf.byte_buf_ = 0;
@@ -121,14 +121,14 @@ public:
      }
      
      /// Move assignment operator
-    byte_buffer& operator=(byte_buffer&& lhs) BMNOEXEPT
+    byte_buffer& operator=(byte_buffer&& lhs) BMNOEXCEPT
      {
          move_from(lhs);
          return *this;
      }
  #endif
  
-    byte_buffer& operator=(const byte_buffer& lhs)
+    byte_buffer& operator=(const byte_buffer& lhs) BMNOEXCEPT
      {
          if (this == &lhs)
              return *this;
@@ -143,7 +143,7 @@ public:
      }
      
      /// swap content with another buffer
-    void swap(byte_buffer& other) BMNOEXEPT
+    void swap(byte_buffer& other) BMNOEXCEPT
      {
          if (this == &other)
              return;
@@ -157,7 +157,7 @@ public:
      }
      
      /// take/move content from another buffer
-    void move_from(byte_buffer& other) BMNOEXEPT
+    void move_from(byte_buffer& other) BMNOEXCEPT
      {
          if (this == &other)
              return;
@@ -190,7 +190,7 @@ public:
      
      
      /// Get buffer capacity
-    size_t capacity() const { return capacity_; }
+    size_t capacity() const BMNOEXCEPT { return capacity_; }
  
      /// adjust current size (buffer content preserved)
      void resize(size_t new_size, bool copy_content = true)
@@ -213,6 +213,11 @@ public:
      {
          if (new_capacity <= capacity_)
              return;
+        if (!capacity_)
+        {
+            allocate(new_capacity);
+            return;
+        }
          
          byte_buffer tmp_buffer(new_capacity);
          tmp_buffer = *this;
@@ -247,7 +252,7 @@ public:
      }
      
      /// return memory consumtion
-    size_t mem_usage() const
+    size_t mem_usage() const BMNOEXCEPT
      {
          return sizeof(capacity_) + sizeof(alloc_factor_) +
                 capacity();
@@ -258,7 +263,7 @@ private:
      void set_buf(unsigned char* buf, size_t size);
  
      /// compute number of words for the desired capacity
-    static size_t compute_words(size_t capacity)
+    static size_t compute_words(size_t capacity) BMNOEXCEPT
      {
          size_t words = (capacity / sizeof(bm::word_t))+1;
          return words;
@@ -307,10 +312,8 @@ public:
      typedef Val                                              value_type;
      typedef typename buffer_type::size_type                  size_type;
  
-    heap_vector()
-        : buffer_()    
-    {
-    }
+    heap_vector() BMNOEXCEPT : buffer_()
+    {}
  
      heap_vector(const heap_vector<Val, BVAlloc, trivial_type>& hv)
          : buffer_()
@@ -359,14 +362,14 @@ public:
          }
      }
      
-    value_type* data() { return (value_type*) buffer_.data(); }
+    value_type* data() BMNOEXCEPT { return (value_type*) buffer_.data(); }
  
-    void swap(heap_vector<Val, BVAlloc, trivial_type>& other) BMNOEXEPT
+    void swap(heap_vector<Val, BVAlloc, trivial_type>& other) BMNOEXCEPT
      {
          buffer_.swap(other.buffer_);
      }
  
-    const value_type& operator[](size_type pos) const
+    const value_type& operator[](size_type pos) const BMNOEXCEPT
      {
          BM_ASSERT(pos < size());
          size_type v_size = value_size();
@@ -374,7 +377,7 @@ public:
          return *reinterpret_cast<const value_type*>(p);
      }
  
-    value_type& operator[](size_type pos)
+    value_type& operator[](size_type pos) BMNOEXCEPT
      {
          BM_ASSERT(pos < size());
          size_type v_size = value_size();
@@ -393,22 +396,22 @@ public:
          return *reinterpret_cast<value_type*>(p);
      }
      
-    const value_type* begin() const
+    const value_type* begin() const BMNOEXCEPT
      {
          return (const value_type*) buffer_.buf();
      }
  
-    size_type size() const
+    size_type size() const BMNOEXCEPT
      {
          return buffer_.size() / value_size();
      }
  
-    size_type capacity() const
+    size_type capacity() const BMNOEXCEPT
      {
          return buffer_.capacity() / value_size();
      }
  
-    bool empty() const
+    bool empty() const BMNOEXCEPT
      {
          return (buffer_.size() == 0);
      }
@@ -492,7 +495,7 @@ protected:
          buffer_.resize(new_size * v_size);
      }
  
-    static size_type value_size()
+    static size_type value_size() BMNOEXCEPT
      {
          size_type size_of = sizeof(value_type);
          return size_of;
@@ -537,13 +540,13 @@ public:
          row_size_in_bytes = sizeof(value_type) * COLS
      };
  
-    static size_t rows() { return ROWS; }
-    static size_t cols() { return COLS; }
+    static size_t rows() BMNOEXCEPT { return ROWS; }
+    static size_t cols() BMNOEXCEPT { return COLS; }
  
      /**
          By default object is constructed NOT allocated.
      */
-    heap_matrix()
+    heap_matrix() BMNOEXCEPT
          : buffer_()
      {}
  
@@ -565,12 +568,12 @@ public:
          buffer_.resize(size_in_bytes);
      }
      
-    bool is_init() const
+    bool is_init() const BMNOEXCEPT
      {
          return buffer_.size();
      }
  
-    value_type get(size_type row_idx, size_type col_idx) const
+    value_type get(size_type row_idx, size_type col_idx) const BMNOEXCEPT
      {
          BM_ASSERT(row_idx < ROWS);
          BM_ASSERT(col_idx < COLS);
@@ -579,7 +582,7 @@ public:
          return ((const value_type*)buf)[col_idx];
      }
  
-    const value_type* row(size_type row_idx) const
+    const value_type* row(size_type row_idx) const BMNOEXCEPT
      {
          BM_ASSERT(row_idx < ROWS);
          BM_ASSERT(buffer_.size());
@@ -587,7 +590,7 @@ public:
          return (const value_type*) buf;
      }
  
-    value_type* row(size_type row_idx)
+    value_type* row(size_type row_idx) BMNOEXCEPT
      {
          BM_ASSERT(row_idx < ROWS);
          BM_ASSERT(buffer_.size());
@@ -597,21 +600,21 @@ public:
      }
  
      /** memset all buffer to all zeroes */
-    void set_zero()
+    void set_zero() BMNOEXCEPT
      {
          ::memset(buffer_.data(), 0, size_in_bytes);
      }
      
      /*!  swap content
      */
-    void swap(heap_matrix& other) BMNOEXEPT
+    void swap(heap_matrix& other) BMNOEXCEPT
      {
          buffer_.swap(other.buffer_);
      }
      
      /*!  move content from another matrix
      */
-    void move_from(heap_matrix& other) BMNOEXEPT
+    void move_from(heap_matrix& other) BMNOEXCEPT
      {
          buffer_.move_from(other.buffer_);
      }
@@ -624,7 +627,7 @@ public:
      /*! remapping: vect[idx] = matrix[idx, vect[idx] ]
      */
      template<typename VECT_TYPE>
-    void remap(VECT_TYPE* vect, size_type size) const
+    void remap(VECT_TYPE* vect, size_type size) const BMNOEXCEPT
      {
          BM_ASSERT(size <= ROWS);
          const unsigned char* buf = buffer_.buf();
@@ -641,7 +644,7 @@ public:
      /*! zero-terminated remap: vect[idx] = matrix[idx, vect[idx] ]
      */
      template<typename VECT_TYPE>
-    void remapz(VECT_TYPE* vect) const
+    void remapz(VECT_TYPE* vect) const BMNOEXCEPT
      {
          const unsigned char* buf = buffer_.buf();
          for (size_type i = 0; i < ROWS; ++i)
@@ -704,12 +707,12 @@ public:
          buffer_.resize(size_in_bytes());
      }
      
-    bool is_init() const
+    bool is_init() const BMNOEXCEPT
      {
          return buffer_.size();
      }
  
-    const value_type* row(size_type row_idx) const
+    const value_type* row(size_type row_idx) const BMNOEXCEPT
      {
          BM_ASSERT(row_idx < rows_);
          BM_ASSERT(buffer_.size());
@@ -717,7 +720,7 @@ public:
          return (const value_type*) buf;
      }
  
-    value_type* row(size_type row_idx)
+    value_type* row(size_type row_idx) BMNOEXCEPT
      {
          BM_ASSERT(row_idx < rows_);
          BM_ASSERT(buffer_.size());
@@ -726,15 +729,31 @@ public:
          return (value_type*)buf;
      }
  
+    value_type get(size_type row_idx, size_type col_idx) BMNOEXCEPT
+    {
+        BM_ASSERT(row_idx < rows_);
+        BM_ASSERT(col_idx < cols_);
+        const value_type* r = row(row_idx);
+        return r[col_idx];
+    }
+
+    void set(size_type row_idx, size_type col_idx, value_type v) BMNOEXCEPT
+    {
+        BM_ASSERT(row_idx < rows_);
+        BM_ASSERT(col_idx < cols_);
+        value_type* r = row(row_idx);
+        r[col_idx] = v;
+    }
+
      /** memset all buffer to all zeroes */
-    void set_zero()
+    void set_zero() BMNOEXCEPT
      {
          ::memset(buffer_.data(), 0, size_in_bytes());
      }
      
      /*!  swap content
      */
-    void swap(dynamic_heap_matrix& other) BMNOEXEPT
+    void swap(dynamic_heap_matrix& other) BMNOEXCEPT
      {
          bm::xor_swap(rows_, other.rows_);
          bm::xor_swap(cols_, other.cols_);
@@ -743,7 +762,7 @@ public:
      
      /*!  move content from another matrix
      */
-    void move_from(dynamic_heap_matrix& other) BMNOEXEPT
+    void move_from(dynamic_heap_matrix& other) BMNOEXCEPT
      {
          rows_ = other.rows_;
          cols_ = other.cols_;
@@ -751,16 +770,46 @@ public:
      }
  
      /** Get low-level buffer access */
-    buffer_type& get_buffer() { return buffer_; }
+    buffer_type& get_buffer() BMNOEXCEPT { return buffer_; }
      /** Get low-level buffer access */
-    const buffer_type& get_buffer() const { return buffer_; }
+    const buffer_type& get_buffer() const BMNOEXCEPT { return buffer_; }
+
+    /**
+        copy values of the left triangle elements to the right triangle
+        (operation specific to matrices with symmetric distances)
+     */
+    void replicate_triange() BMNOEXCEPT
+    {
+        BM_ASSERT(rows_ == cols_);
+        for (size_type i = 0; i < rows_; ++i)
+        {
+            for (size_type j = i+1; j < cols_; ++j)
+            {
+                set(i, j, get(j, i));
+            }
+        }
+    }
+    /**
+        Sum of row elements
+     */
+    template<typename ACC>
+    void sum(ACC& acc, size_type row_idx) const BMNOEXCEPT
+    {
+        BM_ASSERT(row_idx < rows_);
+        ACC s = 0;
+        const value_type* r = row(row_idx);
+        for (size_type j = 0; j < cols_; ++j)
+            s += r[j];
+        acc = s;
+    }
  
  protected:
-    size_type size_in_bytes() const
+
+    size_type size_in_bytes() const BMNOEXCEPT
      {
          return sizeof(value_type) * cols_ * rows_;
      }
-    size_type row_size_in_bytes() const
+    size_type row_size_in_bytes() const BMNOEXCEPT
      {
          return sizeof(value_type) * cols_;
      }
diff --git a/c++/include/util/bitset/bmconst.h b/c++/include/util/bitset/bmconst.h

index e14978657095e2fc7d4a260baf7c6c5db8f1807c..954b223b7f76549f0532b2c5a06792404122e705 100644 (file)
--- a/c++/include/util/bitset/bmconst.h
+++ b/c++/include/util/bitset/bmconst.h
@@ -96,14 +96,14 @@ const unsigned set_array_shift = 8u;
  const unsigned set_array_mask  = 0xFFu;
  
  const unsigned set_total_blocks32 = (bm::set_array_size32 * bm::set_array_size32);
+const unsigned set_sub_total_bits = bm::set_sub_array_size * bm::gap_max_bits;
  
  #ifdef BM64ADDR
  const unsigned set_total_blocks48 = bm::id_max48 / bm::gap_max_bits;
  const unsigned long long id_max = bm::id_max48;
-const unsigned long long set_array_size48 = 1 + (bm::id_max48 / (bm::set_sub_array_size * bm::gap_max_bits));
+const unsigned long long set_array_size48 = 1 + (bm::id_max48 / set_sub_total_bits);
  const unsigned  set_top_array_size = bm::set_array_size48;
  const id64_t set_total_blocks = id64_t(bm::set_top_array_size) * set_sub_array_size;
-//bm::id_max / (bm::gap_max_bits * bm::set_sub_array_size);
  #else
  const unsigned id_max = bm::id_max32;
  const unsigned set_top_array_size = bm::set_array_size32;
@@ -228,8 +228,8 @@ template<bool T> struct _copyright
  };
  
  template<bool T> const char _copyright<T>::_p[] = 
-    "BitMagic C++ Library. v.6.0.0 (c) 2002-2020 Anatoliy Kuznetsov.";
-template<bool T> const unsigned _copyright<T>::_v[3] = {6, 0, 0};
+    "BitMagic C++ Library. v.6.4.0 (c) 2002-2020 Anatoliy Kuznetsov.";
+template<bool T> const unsigned _copyright<T>::_v[3] = {6, 4, 0};
  
  
  
diff --git a/c++/include/util/bitset/bmdbg.h b/c++/include/util/bitset/bmdbg.h

index 4c4e3bbdcf6d115fe709d81bb1b42d8ebd0e98ab..f0796e26e742803d07a76e0e6e64f94ff11d2dc8 100644 (file)
--- a/c++/include/util/bitset/bmdbg.h
+++ b/c++/include/util/bitset/bmdbg.h
@@ -510,15 +510,15 @@ void print_stat(const BV& bv, typename BV::block_idx_type blocks = 0)
  }
  
  template<class BV>
-unsigned compute_serialization_size(const BV& bv)
+size_t compute_serialization_size(const BV& bv)
  {
      BM_DECLARE_TEMP_BLOCK(tb)
      unsigned char*  buf = 0;
-    unsigned blob_size = 0;
+    typename BV::size_type blob_size = 0;
      try
      {
          bm::serializer<BV> bvs(typename BV::allocator_type(), tb);
-        bvs.set_compression_level(4);
+        //bvs.set_compression_level(4);
          
          typename BV::statistics st;
          bv.calc_stat(&st);
@@ -677,12 +677,12 @@ void print_svector_stat(const SV& svect, bool print_sim = false)
                  const typename SV::bvector_type* bv1 = sim_vec[k].get_first();
                  const typename SV::bvector_type* bv2 = sim_vec[k].get_second();
  
-                unsigned bv_size2 = compute_serialization_size(*bv2);
+                auto bv_size2 = compute_serialization_size(*bv2);
                  
                  typename SV::bvector_type bvx(*bv2);
                  bvx ^= *bv1;
                  
-                unsigned bv_size_x = compute_serialization_size(bvx);
+                auto bv_size_x = compute_serialization_size(bvx);
                  if (bv_size_x < bv_size2) // true savings
                  {
                      size_t diff = bv_size2 - bv_size_x;
@@ -904,10 +904,15 @@ int file_save_svector(const SV& sv, const std::string& fname, size_t* sv_blob_si
      BM_ASSERT(!fname.empty());
      
      bm::sparse_vector_serial_layout<SV> sv_lay;
-    
+
+    bm::sparse_vector_serializer<SV> sv_serializer;
+    sv_serializer.set_xor_ref(true);
+
+    sv_serializer.serialize(sv, sv_lay);
+/*
      BM_DECLARE_TEMP_BLOCK(tb)
      bm::sparse_vector_serialize(sv, sv_lay, tb);
-
+*/
      std::ofstream fout(fname.c_str(), std::ios::binary);
      if (!fout.good())
      {
@@ -956,7 +961,7 @@ int file_load_svector(SV& sv, const std::string& fname)
  }
  
  
-// comapre-check if sparse vector is excatly coresponds to vector 
+// compare-check if sparse vector is excatly coresponds to vector
  //
  // returns 0 - if equal
  //         1 - no size match
@@ -979,6 +984,20 @@ int svector_check(const SV& sv, const V& vect)
  }
  
  
+template<class SV, class BV>
+void convert_bv2sv(SV& sv, const BV& bv)
+{
+    typename SV::back_insert_iterator bit = sv.get_back_inserter();
+    typename BV::enumerator en = bv.first();
+    for (; en.valid(); ++en)
+    {
+        auto v = en.value();
+        bit = v;
+    }
+    bit.flush();
+}
+
+
  } // namespace
  
  #include "bmundef.h"
diff --git a/c++/include/util/bitset/bmdef.h b/c++/include/util/bitset/bmdef.h

index 4863e63a5773665c78121c2af92480c70b0eb43f..83006e1080c56bd5f869ec3c9a23c167f6f67ec5 100644 (file)
--- a/c++/include/util/bitset/bmdef.h
+++ b/c++/include/util/bitset/bmdef.h
@@ -72,10 +72,15 @@ For more information please visit:  http://bitmagic.io
  // cxx11 features
  //
  #if defined(BM_NO_CXX11) || (defined(_MSC_VER)  &&  _MSC_VER < 1900)
-# define BMNOEXEPT
+# define BMNOEXCEPT
+# define BMNOEXCEPT2
  #else
-# ifndef BMNOEXEPT
-#  define BMNOEXEPT noexcept
+# ifndef BMNOEXCEPT
+#  define BMNOEXCEPT noexcept
+#if defined(__EMSCRIPTEN__)
+#else
+#  define BMNOEXCEPT2
+#endif
  # endif
  #endif
  
@@ -84,16 +89,14 @@ For more information please visit:  http://bitmagic.io
  // detects use of EMSCRIPTEN engine and tweaks settings
  // WebAssemply compiles into 32-bit ptr yet 64-bit wordsize use GCC extensions
  //
+// BMNOEXCEPT2 is to declare "noexcept" for WebAsm only where needed
+// and silence GCC warnings where not
  #if defined(__EMSCRIPTEN__)
  # define BM64OPT
  # define BM_USE_GCC_BUILD
-//# define BM_FORBID_UNALIGNED_ACCESS
-#endif
-
-// disable 'register' keyword, which is obsolete in C++11
-//
-#ifndef BMREGISTER
-# define BMREGISTER
+# define BMNOEXCEPT2 noexcept
+#else
+#  define BMNOEXCEPT2
  #endif
  
  
diff --git a/c++/include/util/bitset/bmfunc.h b/c++/include/util/bitset/bmfunc.h

index 06d213a9dd4d517c80fae872f64f444efdb74d2b..e874b28d2200f5ca01a23292f85e6e70a7fc8898 100644 (file)
--- a/c++/include/util/bitset/bmfunc.h
+++ b/c++/include/util/bitset/bmfunc.h
@@ -39,12 +39,12 @@ namespace bm
  inline 
  bm::id_t bit_block_calc_count_range(const bm::word_t* block,
                                      bm::word_t left,
-                                    bm::word_t right);
+                                    bm::word_t right) BMNOEXCEPT;
  
  inline 
  bm::id_t bit_block_any_range(const bm::word_t* block,
                               bm::word_t left,
-                             bm::word_t right);
+                             bm::word_t right) BMNOEXCEPT;
  
  /*!
      @brief Structure with statistical information about memory
@@ -64,7 +64,7 @@ struct bv_statistics
      unsigned long long gaps_by_level[bm::gap_levels]; ///< number of GAP blocks at each level
  
      /// cound bit block
-    void add_bit_block()
+    void add_bit_block() BMNOEXCEPT
      {
          ++bit_blocks;
          size_t mem_used = sizeof(bm::word_t) * bm::set_block_size;
@@ -73,7 +73,7 @@ struct bv_statistics
      }
  
      /// count gap block
-    void add_gap_block(unsigned capacity, unsigned length)
+    void add_gap_block(unsigned capacity, unsigned length) BMNOEXCEPT
      {
          ++gap_blocks;
          size_t mem_used = (capacity * sizeof(gap_word_t));
@@ -93,7 +93,7 @@ struct bv_statistics
      }
      
      /// Reset statisctics
-    void reset()
+    void reset() BMNOEXCEPT
      {
          bit_blocks = gap_blocks = ptr_sub_blocks = bv_count = 0;
          max_serialize_mem = memory_used = gap_cap_overhead = 0;
@@ -102,10 +102,11 @@ struct bv_statistics
      }
      
      /// Sum data from another sttructure
-    void add(const bv_statistics& st)
+    void add(const bv_statistics& st) BMNOEXCEPT
      {
          bit_blocks += st.bit_blocks;
          gap_blocks += st.gap_blocks;
+        ptr_sub_blocks += st.ptr_sub_blocks;
          bv_count += st.bv_count;
          max_serialize_mem += st.max_serialize_mem + 8;
          memory_used += st.memory_used;
@@ -121,6 +122,8 @@ struct pair
  {
      First   first;
      Second  second;
+    
+    pair(First f, Second s) : first(f), second(s) {}
  };
  
  /**
@@ -141,24 +144,35 @@ struct  bit_decode_cache
      \internal
  */
  template<typename BI_TYPE>
-void get_block_coord(BI_TYPE nb, unsigned& i, unsigned& j)
+BMFORCEINLINE
+void get_block_coord(BI_TYPE nb, unsigned& i, unsigned& j) BMNOEXCEPT
  {
      i = unsigned(nb >> bm::set_array_shift); // top block address
      j = unsigned(nb &  bm::set_array_mask);  // address in sub-block
  }
  
  /**
-    \brief ad-hoc conditional expressions 
+    Compute bit address of the first bit in a superblock
      \internal
  */
-template <bool b> struct conditional
+template<typename RTYPE>
+BMFORCEINLINE RTYPE get_super_block_start(unsigned i) BMNOEXCEPT
  {
-    static bool test() { return true; }
-};
-template <> struct conditional<false>
+    return RTYPE(i) * bm::set_sub_total_bits;
+}
+
+/**
+    Compute bit address of the first bit in a block
+    \internal
+*/
+template<typename RTYPE>
+BMFORCEINLINE RTYPE get_block_start(unsigned i, unsigned j) BMNOEXCEPT
  {
-    static bool test() { return false; }
-};
+    RTYPE idx = bm::get_super_block_start<RTYPE>(i);
+    idx += (j) * bm::gap_max_bits;
+    return idx;
+}
+
  
  /*! 
      @defgroup gapfunc GAP functions
@@ -177,42 +191,12 @@ template <> struct conditional<false>
   */
  
  
-
-
-/*!
-    Returns BSR value
-    @ingroup bitfunc
-*/
-template <class T>
-unsigned bit_scan_reverse(T value)
-{
-    BM_ASSERT(value);
-    
-    if (bm::conditional<sizeof(T)==8>::test())
-    {
-    #if defined(BM_USE_GCC_BUILD)
-        return (unsigned) (63 - __builtin_clzll(value));
-    #else
-        bm::id64_t v8 = value;
-        v8 >>= 32;
-        unsigned v = (unsigned)v8;
-        if (v)
-        {
-            v = bm::bit_scan_reverse32(v);
-            return v + 32;
-        }
-    #endif
-    }
-    return bit_scan_reverse32((unsigned)value);
-}
-
-
  /*!
      Returns bit count
      @ingroup bitfunc 
  */
  BMFORCEINLINE
-bm::id_t word_bitcount(bm::id_t w)
+bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
  {
  #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
      return bm::id_t(_mm_popcnt_u32(w));
@@ -230,7 +214,7 @@ bm::id_t word_bitcount(bm::id_t w)
  }
  
  inline
-int parallel_popcnt_32(unsigned int n) 
+int parallel_popcnt_32(unsigned int n) BMNOEXCEPT
  {
     unsigned int tmp;
  
@@ -245,7 +229,7 @@ int parallel_popcnt_32(unsigned int n)
      @ingroup bitfunc 
  */
  BMFORCEINLINE
-unsigned word_bitcount64(bm::id64_t x)
+unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
  {
  #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
  #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
@@ -270,7 +254,7 @@ unsigned word_bitcount64(bm::id64_t x)
  
  inline 
  unsigned bitcount64_4way(bm::id64_t x, bm::id64_t y, 
-                    bm::id64_t u, bm::id64_t v)
+                         bm::id64_t u, bm::id64_t v) BMNOEXCEPT
  {
      const bm::id64_t m1 = 0x5555555555555555U;
      const bm::id64_t m2 = 0x3333333333333333U; 
@@ -400,7 +384,8 @@ void bit_for_each(T w, F& func)
  /*! @brief Adaptor to copy 1 bits to array
      @internal
  */
-template<typename B> class copy_to_array_functor
+template<typename B>
+class copy_to_array_functor
  {
  public:
      copy_to_array_functor(B* bits): bp_(bits)
@@ -408,10 +393,10 @@ public:
  
      B* ptr() { return bp_; }
      
-    void operator()(unsigned bit_idx) { *bp_++ = (B)bit_idx; }
+    void operator()(unsigned bit_idx) BMNOEXCEPT { *bp_++ = (B)bit_idx; }
      
      void operator()(unsigned bit_idx0,
-                    unsigned bit_idx1)
+                    unsigned bit_idx1) BMNOEXCEPT
      {
          bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
          bp_+=2;
@@ -419,7 +404,7 @@ public:
      
      void operator()(unsigned bit_idx0,
                      unsigned bit_idx1,
-                    unsigned bit_idx2)
+                    unsigned bit_idx2) BMNOEXCEPT
      {
          bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2;
          bp_+=3;
@@ -428,7 +413,7 @@ public:
      void operator()(unsigned bit_idx0,
                      unsigned bit_idx1,
                      unsigned bit_idx2,
-                    unsigned bit_idx3)
+                    unsigned bit_idx3) BMNOEXCEPT
      {
          bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
          bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3;
@@ -451,7 +436,8 @@ private:
  
     @ingroup bitfunc
  */
-template<typename T,typename B> unsigned bit_list(T w, B* bits)
+template<typename T,typename B>
+unsigned bit_list(T w, B* bits) BMNOEXCEPT
  {
      copy_to_array_functor<B> func(bits);
      bit_for_each(w, func);
@@ -468,7 +454,8 @@ template<typename T,typename B> unsigned bit_list(T w, B* bits)
  
     @ingroup bitfunc
  */
-template<typename T,typename B> unsigned bit_list_4(T w, B* bits)
+template<typename T,typename B>
+unsigned bit_list_4(T w, B* bits) BMNOEXCEPT
  {
      copy_to_array_functor<B> func(bits);
      bit_for_each_4(w, func);
@@ -486,7 +473,8 @@ template<typename T,typename B> unsigned bit_list_4(T w, B* bits)
      @internal
  */
  template<typename B>
-unsigned short bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs)
+unsigned short
+bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs) BMNOEXCEPT
  {
      unsigned pos = 0;
      while (w)
@@ -508,7 +496,7 @@ unsigned short bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs)
      @internal
  */
  template<typename B>
-unsigned short bitscan_popcnt(bm::id_t w, B* bits)
+unsigned short bitscan_popcnt(bm::id_t w, B* bits) BMNOEXCEPT
  {
      unsigned pos = 0;
      while (w)
@@ -529,29 +517,48 @@ unsigned short bitscan_popcnt(bm::id_t w, B* bits)
      @ingroup bitfunc
  */
  template<typename B>
-unsigned short bitscan_popcnt64(bm::id64_t w, B* bits)
+unsigned short bitscan_popcnt64(bm::id64_t w, B* bits) BMNOEXCEPT
  {
      unsigned short pos = 0;
      while (w)
      {
-        bm::id64_t t = w & -w;
+        bm::id64_t t = bmi_blsi_u64(w); // w & -w;
          bits[pos++] = (B) bm::word_bitcount64(t - 1);
-        w &= w - 1;
+        w = bmi_bslr_u64(w); // w &= w - 1;
+    }
+    return pos;
+}
+
+/*!
+  \brief Unpacks 64-bit word into list of ON bit indexes using popcnt method
+  \param w - value
+  \param bits - pointer on the result array
+  \param offs - value to add to bit position (programmed shift)
+  \return number of bits in the list
+  @ingroup bitfunc
+*/
+template<typename B>
+unsigned short
+bitscan_popcnt64(bm::id64_t w, B* bits, unsigned short offs) BMNOEXCEPT
+{
+    unsigned short pos = 0;
+    while (w)
+    {
+        bm::id64_t t = bmi_blsi_u64(w); // w & -w;
+        bits[pos++] = B(bm::word_bitcount64(t - 1) + offs);
+        w = bmi_bslr_u64(w); // w &= w - 1;
      }
      return pos;
  }
  
+
  template<typename V, typename B>
-unsigned short bitscan(V w, B* bits)
+unsigned short bitscan(V w, B* bits) BMNOEXCEPT
  {
      if (bm::conditional<sizeof(V) == 8>::test())
-    {
          return bm::bitscan_popcnt64(w, bits);
-    }
      else
-    {
          return bm::bitscan_popcnt((bm::word_t)w, bits);
-    }
  }
  
  // --------------------------------------------------------------
@@ -566,7 +573,7 @@ unsigned short bitscan(V w, B* bits)
      \return selected value (inxed of bit set)
  */
  inline
-unsigned word_select64_linear(bm::id64_t w, unsigned rank)
+unsigned word_select64_linear(bm::id64_t w, unsigned rank) BMNOEXCEPT
  {
      BM_ASSERT(w);
      BM_ASSERT(rank);
@@ -589,7 +596,7 @@ unsigned word_select64_linear(bm::id64_t w, unsigned rank)
      \return selected value (inxed of bit set)
  */
  inline
-unsigned word_select64_bitscan(bm::id64_t w, unsigned rank)
+unsigned word_select64_bitscan(bm::id64_t w, unsigned rank) BMNOEXCEPT
  {
      BM_ASSERT(w);
      BM_ASSERT(rank);
@@ -616,7 +623,7 @@ unsigned word_select64_bitscan(bm::id64_t w, unsigned rank)
      \return selected value (inxed of bit set)
  */
  inline
-unsigned word_select64(bm::id64_t w, unsigned rank)
+unsigned word_select64(bm::id64_t w, unsigned rank) BMNOEXCEPT
  {
  #if defined(BMI2_SELECT64)
      return BMI2_SELECT64(w, rank);
@@ -642,7 +649,7 @@ unsigned word_select64(bm::id64_t w, unsigned rank)
     @internal
  */
  BMFORCEINLINE
-bm::id64_t widx_to_digest_mask(unsigned w_idx)
+bm::id64_t widx_to_digest_mask(unsigned w_idx) BMNOEXCEPT
  {
      bm::id64_t mask(1ull);
      return mask << (w_idx / bm::set_block_digest_wave_size);
@@ -657,7 +664,7 @@ bm::id64_t widx_to_digest_mask(unsigned w_idx)
     @internal
  */
  BMFORCEINLINE
-bm::id64_t digest_mask(unsigned from, unsigned to)
+bm::id64_t digest_mask(unsigned from, unsigned to) BMNOEXCEPT
  {
      BM_ASSERT(from <= to);
      
@@ -680,7 +687,8 @@ bm::id64_t digest_mask(unsigned from, unsigned to)
     @internal
  */
  inline
-bool check_zero_digest(bm::id64_t digest, unsigned bitpos_from, unsigned bitpos_to)
+bool check_zero_digest(bm::id64_t digest,
+                       unsigned bitpos_from, unsigned bitpos_to) BMNOEXCEPT
  {
      bm::id64_t mask = bm::digest_mask(bitpos_from, bitpos_to);
      return !(digest & mask);
@@ -695,7 +703,7 @@ bool check_zero_digest(bm::id64_t digest, unsigned bitpos_from, unsigned bitpos_
     @internal
  */
  inline
-void block_init_digest0(bm::word_t* const block, bm::id64_t digest)
+void block_init_digest0(bm::word_t* const block, bm::id64_t digest) BMNOEXCEPT
  {
      unsigned   off;
      for (unsigned i = 0; i < 64; ++i)
@@ -725,7 +733,7 @@ void block_init_digest0(bm::word_t* const block, bm::id64_t digest)
     @internal
  */
  inline
-bm::id64_t calc_block_digest0(const bm::word_t* const block)
+bm::id64_t calc_block_digest0(const bm::word_t* const block) BMNOEXCEPT
  {
      bm::id64_t digest0 = 0;
      unsigned   off;
@@ -766,7 +774,8 @@ bm::id64_t calc_block_digest0(const bm::word_t* const block)
     @internal
  */
  inline
-bm::id64_t update_block_digest0(const bm::word_t* const block, bm::id64_t digest)
+bm::id64_t
+update_block_digest0(const bm::word_t* const block, bm::id64_t digest) BMNOEXCEPT
  {
      const bm::id64_t mask(1ull);
      bm::id64_t d = digest;
@@ -807,7 +816,7 @@ bm::id64_t update_block_digest0(const bm::word_t* const block, bm::id64_t digest
  
  /// Returns true if set operation is constant (bitcount)
  inline
-bool is_const_set_operation(set_operation op)
+bool is_const_set_operation(set_operation op) BMNOEXCEPT
  {
      return (int(op) >= int(set_COUNT));
  }
@@ -816,7 +825,7 @@ bool is_const_set_operation(set_operation op)
      Convert set operation to operation
  */
  inline
-bm::operation setop2op(bm::set_operation op)
+bm::operation setop2op(bm::set_operation op) BMNOEXCEPT
  {
      BM_ASSERT(op == set_AND || 
                op == set_OR  || 
@@ -863,7 +872,7 @@ template<bool T> struct all_set
      // version with minimal branching, super-scalar friendly
      //
      inline
-    static bm::id64_t block_type(const bm::word_t* bp)
+    static bm::id64_t block_type(const bm::word_t* bp) BMNOEXCEPT
      {
          bm::id64_t type;
          if (bm::conditional<sizeof(void*) == 8>::test())
@@ -884,11 +893,11 @@ template<bool T> struct all_set
      }
  
      BMFORCEINLINE 
-    static bool is_full_block(const bm::word_t* bp) 
+    static bool is_full_block(const bm::word_t* bp) BMNOEXCEPT
          { return (bp == _block._p || bp == _block._p_fullp); }
  
      BMFORCEINLINE 
-    static bool is_valid_block_addr(const bm::word_t* bp) 
+    static bool is_valid_block_addr(const bm::word_t* bp) BMNOEXCEPT
          { return (bp && !(bp == _block._p || bp == _block._p_fullp)); }
  
      static all_set_block  _block;
@@ -899,7 +908,7 @@ template<bool T> typename all_set<T>::all_set_block all_set<T>::_block;
  
  /// XOR swap two scalar variables
  template<typename W> 
-void xor_swap(W& x, W& y) 
+void xor_swap(W& x, W& y) BMNOEXCEPT
  {
      BM_ASSERT(&x != &y);
      x ^= y;
@@ -913,7 +922,7 @@ void xor_swap(W& x, W& y)
      @internal
  */
  template<typename N>
-bool find_not_null_ptr(bm::word_t*** arr, N start, N size, N* pos)
+bool find_not_null_ptr(bm::word_t*** arr, N start, N size, N* pos) BMNOEXCEPT
  {
      BM_ASSERT(pos);
  //    BM_ASSERT(start < size);
@@ -1035,7 +1044,7 @@ template<typename T> int wordcmp(T a, T b)
      @ingroup bitfunc
  */
  inline
-bool bit_is_all_zero(const bm::word_t* BMRESTRICT start)
+bool bit_is_all_zero(const bm::word_t* BMRESTRICT start) BMNOEXCEPT
  {
  #if defined(VECT_IS_ZERO_BLOCK)
      return VECT_IS_ZERO_BLOCK(start);
@@ -1062,7 +1071,7 @@ bool bit_is_all_zero(const bm::word_t* BMRESTRICT start)
     @ingroup gapfunc
  */
  BMFORCEINLINE
-bool gap_is_all_zero(const bm::gap_word_t* buf)
+bool gap_is_all_zero(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT
  {
      // (almost) branchless variant:
      return (!(*buf & 1u)) & (!(bm::gap_max_bits - 1 - buf[1]));
@@ -1075,7 +1084,7 @@ bool gap_is_all_zero(const bm::gap_word_t* buf)
     @ingroup gapfunc
  */
  BMFORCEINLINE
-bool gap_is_all_one(const bm::gap_word_t* buf)
+bool gap_is_all_one(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT
  {
      return ((*buf & 1u) && (buf[1] == bm::gap_max_bits - 1));
  }
@@ -1088,7 +1097,7 @@ bool gap_is_all_one(const bm::gap_word_t* buf)
     @ingroup gapfunc
  */
  BMFORCEINLINE
-bm::gap_word_t gap_length(const bm::gap_word_t* buf)
+bm::gap_word_t gap_length(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT
  {
      return (bm::gap_word_t)((*buf >> 3) + 1);
  }
@@ -1103,7 +1112,8 @@ bm::gap_word_t gap_length(const bm::gap_word_t* buf)
     @ingroup gapfunc
  */
  template<typename T>
-unsigned gap_capacity(const T* buf, const T* glevel_len)
+unsigned
+gap_capacity(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
  {
      return glevel_len[(*buf >> 1) & 3];
  }
@@ -1118,7 +1128,8 @@ unsigned gap_capacity(const T* buf, const T* glevel_len)
     @ingroup gapfunc
  */
  template<typename T>
-unsigned gap_limit(const T* buf, const T* glevel_len)
+unsigned
+gap_limit(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
  {
      return glevel_len[(*buf >> 1) & 3]-4;
  }
@@ -1132,7 +1143,7 @@ unsigned gap_limit(const T* buf, const T* glevel_len)
     @ingroup gapfunc
  */
  template<typename T>
-T gap_level(const T* buf)
+T gap_level(const T* BMRESTRICT buf) BMNOEXCEPT
  {
      return T((*buf >> 1) & 3u);
  }
@@ -1149,7 +1160,8 @@ T gap_level(const T* buf)
      @ingroup gapfunc
  */
  template<typename T>
-unsigned gap_find_last(const T* buf, unsigned* last)
+unsigned
+gap_find_last(const T* BMRESTRICT buf, unsigned* BMRESTRICT last) BMNOEXCEPT
  {
      BM_ASSERT(last);
  
@@ -1179,7 +1191,8 @@ unsigned gap_find_last(const T* buf, unsigned* last)
      @ingroup gapfunc
  */
  template<typename T>
-unsigned gap_find_first(const T* buf, unsigned* first)
+unsigned
+gap_find_first(const T* BMRESTRICT buf, unsigned* BMRESTRICT first) BMNOEXCEPT
  {
      BM_ASSERT(first);
  
@@ -1206,24 +1219,30 @@ unsigned gap_find_first(const T* buf, unsigned* first)
     @ingroup gapfunc
  */
  template<typename T> 
-unsigned gap_bfind(const T* buf, unsigned pos, unsigned* is_set)
+unsigned gap_bfind(const T* BMRESTRICT buf,
+                   unsigned pos, unsigned* BMRESTRICT is_set) BMNOEXCEPT
  {
      BM_ASSERT(pos < bm::gap_max_bits);
-    *is_set = (*buf) & 1;
+    #undef VECT_GAP_BFIND // TODO: VECTOR bfind causes performance degradation
+    #ifdef VECT_GAP_BFIND
+        return VECT_GAP_BFIND(buf, pos, is_set);
+    #else
+        *is_set = (*buf) & 1;
  
-    unsigned start = 1;
-    unsigned end = 1 + ((*buf) >> 3);
+        unsigned start = 1;
+        unsigned end = 1 + ((*buf) >> 3);
  
-    while ( start != end )
-    {
-        unsigned curr = (start + end) >> 1;
-        if ( buf[curr] < pos )
-            start = curr + 1;
-        else
-            end = curr;
-    }
-    *is_set ^= ((start-1) & 1);
-    return start; 
+        while ( start != end )
+        {
+            unsigned curr = (start + end) >> 1;
+            if ( buf[curr] < pos )
+                start = curr + 1;
+            else
+                end = curr;
+        }
+        *is_set ^= ((start-1) & 1);
+        return start;
+    #endif
  }
  
  
@@ -1234,7 +1253,8 @@ unsigned gap_bfind(const T* buf, unsigned pos, unsigned* is_set)
     \return true if position is in "1" gap
     @ingroup gapfunc
  */
-template<typename T> unsigned gap_test(const T* buf, unsigned pos)
+template<typename T>
+unsigned gap_test(const T* BMRESTRICT buf, unsigned pos) BMNOEXCEPT
  {
      BM_ASSERT(pos < bm::gap_max_bits);
  
@@ -1277,7 +1297,7 @@ template<typename T> unsigned gap_test(const T* buf, unsigned pos)
      @ingroup gapfunc
  */
  template<typename T> 
-unsigned gap_test_unr(const T* buf, const unsigned pos)
+unsigned gap_test_unr(const T* BMRESTRICT buf, const unsigned pos) BMNOEXCEPT
  {
      BM_ASSERT(pos < bm::gap_max_bits);
  
@@ -1286,84 +1306,10 @@ unsigned gap_test_unr(const T* buf, const unsigned pos)
          return (*buf) & 1;
      }
  #if defined(BMSSE2OPT)
-    unsigned start = 1;
-    unsigned end = 1 + ((*buf) >> 3);
-    unsigned dsize = end - start;
-
-    if (dsize < 17)
-    {
-        start = bm::sse2_gap_find(buf + 1, (bm::gap_word_t)pos, dsize);
-        unsigned res = ((*buf) & 1) ^ ((start) & 1);
-        BM_ASSERT(buf[start + 1] >= pos);
-        BM_ASSERT(buf[start] < pos || (start == 0));
-        BM_ASSERT(res == bm::gap_test(buf, pos));
-        return res;
-    }
-    unsigned arr_end = end;
-    while (start != end)
-    {
-        unsigned curr = (start + end) >> 1;
-        if (buf[curr] < pos)
-            start = curr + 1;
-        else
-            end = curr;
-
-        unsigned size = end - start;
-        if (size < 16)
-        {
-            size += (end != arr_end);
-            unsigned idx = bm::sse2_gap_find(buf + start, (bm::gap_word_t)pos, size);
-            start += idx;
-
-            BM_ASSERT(buf[start] >= pos);
-            BM_ASSERT(buf[start - 1] < pos || (start == 1));
-            break;
-        }
-    }
-
-    unsigned res = ((*buf) & 1) ^ ((--start) & 1);
-
+    unsigned res = bm::sse2_gap_test(buf, pos);
      BM_ASSERT(res == bm::gap_test(buf, pos));
-    return res;
-//#endif
  #elif defined(BMSSE42OPT)
-    unsigned start = 1;
-    unsigned end = 1 + ((*buf) >> 3);
-    unsigned dsize = end - start;
-
-    if (dsize < 17)
-    {
-        start = bm::sse4_gap_find(buf+1, (bm::gap_word_t)pos, dsize);
-        unsigned res = ((*buf) & 1) ^ ((start) & 1);
-        BM_ASSERT(buf[start+1] >= pos);
-        BM_ASSERT(buf[start] < pos || (start==0));
-        BM_ASSERT(res == bm::gap_test(buf, pos));
-        return res;
-    }
-    unsigned arr_end = end;
-    while (start != end)
-    {
-        unsigned curr = (start + end) >> 1;
-        if (buf[curr] < pos)
-            start = curr + 1;
-        else
-            end = curr;
-
-        unsigned size = end - start;
-        if (size < 16)
-        {
-            size += (end != arr_end);
-            unsigned idx = bm::sse4_gap_find(buf + start, (bm::gap_word_t)pos, size);
-            start += idx;
-
-            BM_ASSERT(buf[start] >= pos);
-            BM_ASSERT(buf[start - 1] < pos || (start == 1));
-            break;
-        }
-    }
-    
-    unsigned res = ((*buf) & 1) ^ ((--start) & 1);
-
+    unsigned res = bm::sse42_gap_test(buf, pos);
      BM_ASSERT(res == bm::gap_test(buf, pos));
  #elif defined(BMAVX2OPT)
      unsigned res = bm::avx2_gap_test(buf, pos);
@@ -1378,21 +1324,22 @@ unsigned gap_test_unr(const T* buf, const unsigned pos)
      \internal
  */
  template<typename T, typename N, typename F>
-void for_each_nzblock_range(T*** root, N top_size, N nb_from, N nb_to, F& f)
+void for_each_nzblock_range(T*** root,
+                            N top_size, N nb_from, N nb_to, F& f) BMNOEXCEPT
  {
      BM_ASSERT(top_size);
      if (nb_from > nb_to)
          return;
-    unsigned i_from = nb_from >> bm::set_array_shift;
-    unsigned j_from = nb_from &  bm::set_array_mask;
-    unsigned i_to = nb_to >> bm::set_array_shift;
-    unsigned j_to = nb_to &  bm::set_array_mask;
+    unsigned i_from = unsigned(nb_from >> bm::set_array_shift);
+    unsigned j_from = unsigned(nb_from &  bm::set_array_mask);
+    unsigned i_to = unsigned(nb_to >> bm::set_array_shift);
+    unsigned j_to = unsigned(nb_to &  bm::set_array_mask);
      
      if (i_from >= top_size)
          return;
      if (i_to >= top_size)
      {
-        i_to = top_size-1;
+        i_to = unsigned(top_size-1);
          j_to = bm::set_sub_array_size-1;
      }
      
@@ -1400,16 +1347,12 @@ void for_each_nzblock_range(T*** root, N top_size, N nb_from, N nb_to, F& f)
      {
          T** blk_blk = root[i];
          if (!blk_blk)
-        {
              continue;
-        }
          if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
          {
              unsigned j = (i == i_from) ? j_from : 0;
              if (!j && (i != i_to)) // full sub-block
-            {
-                f.add_full(bm::set_sub_array_size * bm::gap_max_bits);
-            }
+                f.add_full(bm::set_sub_total_bits);
              else
              {
                  do
@@ -1426,13 +1369,10 @@ void for_each_nzblock_range(T*** root, N top_size, N nb_from, N nb_to, F& f)
              do
              {
                  if (blk_blk[j])
-                {
                      f(blk_blk[j]);
-                }
                  if ((i == i_to) && (j == j_to))
                      return;
-                ++j;
-            } while (j < bm::set_sub_array_size);
+            } while (++j < bm::set_sub_array_size);
          }
      } // for i
  }
@@ -1672,7 +1612,7 @@ void for_each_nzblock2(T*** root, unsigned size1, F& f)
      Function returns if function-predicate returns true
  */
  template<typename T, typename BI, typename F>
-bool for_each_nzblock_if(T*** root, BI size1, F& f)
+bool for_each_nzblock_if(T*** root, BI size1, F& f) BMNOEXCEPT
  {
      BI block_idx = 0;
      for (BI i = 0; i < size1; ++i)
@@ -1754,14 +1694,11 @@ template<class T, class F> F bmfor_each(T first, T last, F f)
  /*! Computes SUM of all elements of the sequence
  */
  template<typename T>
-bm::id64_t sum_arr(T* first, T* last)
+bm::id64_t sum_arr(const T* first, const T* last) BMNOEXCEPT
  {
      bm::id64_t sum = 0;
-    while (first < last)
-    {
+    for (;first < last; ++first)
          sum += *first;
-        ++first;
-    }
      return sum;
  }
  
@@ -1775,7 +1712,8 @@ bm::id64_t sum_arr(T* first, T* last)
      @ingroup gapfunc
  */
  template<typename T>
-void gap_split(const T* buf, T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt)
+void gap_split(const T* buf,
+              T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt) BMNOEXCEPT
  {
      const T* pcurr = buf;
      unsigned len = (*pcurr >> 3);
@@ -1834,7 +1772,7 @@ void gap_split(const T* buf, T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt)
     @ingroup gapfunc
  */
  template<typename T>
-unsigned gap_bit_count(const T* buf, unsigned dsize=0)
+unsigned gap_bit_count(const T* buf, unsigned dsize=0) BMNOEXCEPT
  {
      const T* pcurr = buf;
      if (dsize == 0)
@@ -1850,14 +1788,8 @@ unsigned gap_bit_count(const T* buf, unsigned dsize=0)
          bits_counter += *pcurr + 1;
          ++pcurr;
      }
-    ++pcurr;  // set GAP to 1
-
-    while (pcurr <= pend)
-    {
+    for (++pcurr; pcurr <= pend; pcurr += 2)
          bits_counter += *pcurr - *(pcurr-1);
-        pcurr += 2; // jump to the next positive GAP
-    } 
-
      return bits_counter;
  }
  
@@ -1867,7 +1799,8 @@ unsigned gap_bit_count(const T* buf, unsigned dsize=0)
      \return Number of non-zero bits.
      @ingroup gapfunc
  */
-template<typename T> unsigned gap_bit_count_unr(const T* buf)
+template<typename T>
+unsigned gap_bit_count_unr(const T* buf) BMNOEXCEPT
  {
      const T* pcurr = buf;
      unsigned dsize = (*pcurr >> 3);
@@ -1918,7 +1851,7 @@ template<typename T> unsigned gap_bit_count_unr(const T* buf)
      {
          cnt += *pcurr - *(pcurr - 1);
      }
-    BM_ASSERT(cnt == gap_bit_count(buf));
+    BM_ASSERT(cnt == bm::gap_bit_count(buf));
      return cnt;
  }
  
@@ -1933,9 +1866,11 @@ template<typename T> unsigned gap_bit_count_unr(const T* buf)
     @ingroup gapfunc
  */
  template<typename T>
-unsigned gap_bit_count_range(const T* const buf, unsigned left, unsigned right)
+unsigned gap_bit_count_range(const T* const buf,
+                             unsigned left, unsigned right) BMNOEXCEPT
  {
      BM_ASSERT(left <= right);
+    BM_ASSERT(right < bm::gap_max_bits);
      
      const T* pcurr = buf;
      const T* pend = pcurr + (*pcurr >> 3);
@@ -1965,6 +1900,140 @@ unsigned gap_bit_count_range(const T* const buf, unsigned left, unsigned right)
      return bits_counter;
  }
  
+/*!
+   \brief Test if all bits are 1 in GAP buffer in the [left, right] range.
+   \param buf - GAP buffer pointer.
+   \param left - leftmost bit index to start from
+   \param right- rightmost bit index
+   \return true if all bits are "11111"
+   @ingroup gapfunc
+*/
+template<typename T>
+bool gap_is_all_one_range(const T* const BMRESTRICT buf,
+                          unsigned left, unsigned right) BMNOEXCEPT
+{
+    BM_ASSERT(left <= right);
+    BM_ASSERT(right < bm::gap_max_bits);
+
+    unsigned is_set;
+    unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
+    if (!is_set) // GAP is 0
+        return false;
+    const T* const pcurr = buf + start_pos;
+    return (right <= *pcurr);
+}
+
+/*!
+   \brief Test if any bits are 1 in GAP buffer in the [left, right] range.
+   \param buf - GAP buffer pointer.
+   \param left - leftmost bit index to start from
+   \param right- rightmost bit index
+   \return true if at least 1 "00010"
+   @ingroup gapfunc
+*/
+template<typename T>
+bool gap_any_range(const T* const BMRESTRICT buf,
+                    unsigned left, unsigned right) BMNOEXCEPT
+{
+    BM_ASSERT(left <= right);
+    BM_ASSERT(right < bm::gap_max_bits);
+
+    unsigned is_set;
+    unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
+    const T* const pcurr = buf + start_pos;
+
+    if (!is_set) // start GAP is 0 ...
+    {
+        if (right <= *pcurr) // ...bit if the interval goes into at least 1 blk
+            return false; // .. nope
+        return true;
+    }
+    return true;
+}
+
+/*!
+   \brief Test if any bits are 1 in GAP buffer in the [left, right] range
+   and flanked with 0s
+   \param buf - GAP buffer pointer.
+   \param left - leftmost bit index to start from
+   \param right- rightmost bit index
+   \return true if "011110"
+   @ingroup gapfunc
+*/
+template<typename T>
+bool gap_is_interval(const T* const BMRESTRICT buf,
+                     unsigned left, unsigned right) BMNOEXCEPT
+{
+    BM_ASSERT(left <= right);
+    BM_ASSERT(left > 0); // cannot check left-1 otherwise
+    BM_ASSERT(right < bm::gap_max_bits-1); // cannot check right+1 otherwise
+
+    unsigned is_set;
+    unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
+
+    const T* pcurr = buf + start_pos;
+    if (!is_set || (right != *pcurr) || (start_pos <= 1))
+        return false;
+    --pcurr;
+    if (*pcurr != left-1)
+        return false;
+    return true;
+}
+
+/**
+    \brief Searches for the last 1 bit in the 111 interval of a GAP block
+    \param buf - BIT block buffer
+    \param nbit - bit index to start checking from
+    \param pos - [out] found value
+
+    \return false if not found
+    @ingroup gapfunc
+*/
+template<typename T>
+bool gap_find_interval_end(const T* const BMRESTRICT buf,
+                           unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
+{
+    BM_ASSERT(pos);
+    BM_ASSERT(nbit < bm::gap_max_bits);
+
+    unsigned is_set;
+    unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
+    if (!is_set)
+        return false;
+    *pos = buf[start_pos];
+    return true;
+}
+
+
+/**
+    \brief Searches for the first 1 bit in the 111 interval of a GAP block
+    \param buf - BIT block buffer
+    \param nbit - bit index to start checking from
+    \param pos - [out] found value
+
+    \return false if not found
+    @ingroup gapfunc
+*/
+template<typename T>
+bool gap_find_interval_start(const T* const BMRESTRICT buf,
+                           unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
+{
+    BM_ASSERT(pos);
+    BM_ASSERT(nbit < bm::gap_max_bits);
+
+    unsigned is_set;
+    unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
+    if (!is_set)
+        return false;
+    --start_pos;
+    if (!start_pos)
+        *pos = 0;
+    else
+        *pos = buf[start_pos]+1;
+    return true;
+}
+
+
  /*!
      \brief GAP block find position for the rank
  
@@ -1982,7 +2051,7 @@ template<typename T, typename SIZE_TYPE>
  SIZE_TYPE gap_find_rank(const T* const block,
                          SIZE_TYPE   rank,
                          unsigned   nbit_from,
-                        unsigned&  nbit_pos)
+                        unsigned&  nbit_pos) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(rank);
@@ -2025,11 +2094,14 @@ SIZE_TYPE gap_find_rank(const T* const block,
      \brief Counts 1 bits in GAP buffer in the closed [0, right] range.
      \param buf - GAP buffer pointer.
      \param right- rightmost bit index
-    \return Number of non-zero bits.
+    \param is_corrected - if true the result will be rank corrected
+                       if right bit == true count=count-1
+    \return Number of non-zero bits
      @ingroup gapfunc
  */
  template<typename T>
-unsigned gap_bit_count_to(const T* const buf, T right)
+unsigned gap_bit_count_to(const T* const buf, T right,
+                          bool is_corrected=false) BMNOEXCEPT
  {
      const T* pcurr = buf;
      const T* pend = pcurr + (*pcurr >> 3);
@@ -2042,6 +2114,7 @@ unsigned gap_bit_count_to(const T* const buf, T right)
      if (right <= *pcurr) // we are in the target block right now
      {
          bits_counter = (right + 1u) & is_set; // & is_set == if (is_set)
+        bits_counter -= (is_set & unsigned(is_corrected));
          return bits_counter;
      }
      bits_counter += (*pcurr + 1u) & is_set;
@@ -2051,10 +2124,14 @@ unsigned gap_bit_count_to(const T* const buf, T right)
      {
          bits_counter += (*pcurr - prev_gap) & is_set;
          if (pcurr == pend)
+        {
+            bits_counter -= (is_set & unsigned(is_corrected));
              return bits_counter;
+        }
          prev_gap = *pcurr++;
      }
      bits_counter += (right - prev_gap) & is_set;
+    bits_counter -= (is_set & unsigned(is_corrected));
      return bits_counter;
  }
  
@@ -2110,7 +2187,8 @@ template<typename T> struct d_copy_func
     @ingroup gapfunc
  */
  template<typename T>
-T* gap_2_dgap(const T* gap_buf, T* dgap_buf, bool copy_head=true)
+T* gap_2_dgap(const T* BMRESTRICT gap_buf,
+              T* BMRESTRICT dgap_buf, bool copy_head=true) BMNOEXCEPT
  {
      if (copy_head) // copy GAP header
      {
@@ -2135,7 +2213,8 @@ T* gap_2_dgap(const T* gap_buf, T* dgap_buf, bool copy_head=true)
     @ingroup gapfunc
  */
  template<typename T>
-void dgap_2_gap(const T* dgap_buf, T* gap_buf, T gap_header=0)
+void dgap_2_gap(const T* BMRESTRICT dgap_buf,
+                T* BMRESTRICT gap_buf, T gap_header=0) BMNOEXCEPT
  {
      const T* pcurr = dgap_buf;
      unsigned len;    
@@ -2175,7 +2254,8 @@ void dgap_2_gap(const T* dgap_buf, T* gap_buf, T gap_header=0)
  
     @ingroup gapfunc
  */
-template<typename T> int gapcmp(const T* buf1, const T* buf2)
+template<typename T>
+int gapcmp(const T* buf1, const T* buf2) BMNOEXCEPT
  {
      const T* pcurr1 = buf1;
      const T* pend1 = pcurr1 + (*pcurr1 >> 3);
@@ -2213,9 +2293,7 @@ template<typename T> int gapcmp(const T* buf1, const T* buf2)
                  return (bitval1) ? 1 : -1;
              }
          }
-
          ++pcurr1; ++pcurr2;
-
          bitval1 ^= 1;
          bitval2 ^= 1;
      }
@@ -2235,7 +2313,7 @@ template<typename T> int gapcmp(const T* buf1, const T* buf2)
  template<typename T>
  bool gap_find_first_diff(const T* BMRESTRICT buf1,
                           const T* BMRESTRICT buf2,
-                         unsigned* BMRESTRICT pos)
+                         unsigned* BMRESTRICT pos) BMNOEXCEPT
  {
      BM_ASSERT(buf1 && buf2 && pos);
  
@@ -2253,7 +2331,8 @@ bool gap_find_first_diff(const T* BMRESTRICT buf1,
      return false;
  }
  
-
+// -------------------------------------------------------------------------
+//
  
  /*!
     \brief Abstract operation for GAP buffers. 
@@ -2264,7 +2343,6 @@ bool gap_find_first_diff(const T* BMRESTRICT buf1,
     can be 0 or 1 (1 inverts the vector)
     \param vect2 - operand 2 GAP encoded buffer.
     \param vect2_mask - same as vect1_mask
-   \param f - operation functor.
     \param dlen - destination length after the operation
  
     \note Internal function.
@@ -2278,8 +2356,7 @@ void gap_buff_op(T*         BMRESTRICT dest,
                   unsigned   vect1_mask, 
                   const T*   BMRESTRICT vect2,
                   unsigned   vect2_mask, 
-                 F&         f,
-                 unsigned&  dlen)
+                 unsigned&  dlen) BMNOEXCEPT2
  {
      const T*  cur1 = vect1;
      const T*  cur2 = vect2;
@@ -2287,7 +2364,7 @@ void gap_buff_op(T*         BMRESTRICT dest,
      T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask);
      T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask);
      
-    T bitval = (T) f(bitval1, bitval2);
+    T bitval = (T) F::op(bitval1, bitval2);
      T bitval_prev = bitval;
  
      T* res = dest;
@@ -2297,7 +2374,7 @@ void gap_buff_op(T*         BMRESTRICT dest,
      T c1 = *cur1; T c2 = *cur2;
      while (1)
      {
-        bitval = (T) f(bitval1, bitval2);
+        bitval = (T) F::op(bitval1, bitval2);
  
          // Check if GAP value changes and we need to 
          // start the next one
@@ -2327,19 +2404,18 @@ void gap_buff_op(T*         BMRESTRICT dest,
              }
              ++cur2; c2 = *cur2;
          }
-
      } // while
  
      dlen = (unsigned)(res - dest);
      *dest = (T)((*dest & 7) + (dlen << 3));
  }
  
+
  /*!
     \brief Abstract operation for GAP buffers (predicts legth)
            Receives functor F as a template argument
     \param vect1 - operand 1 GAP encoded buffer.
     \param vect2 - operand 2 GAP encoded buffer.
-   \param f - operation functor.
     \param dlen - destination length after the operation
     \param limit - maximum target length limit,
                    returns false if limit is reached
@@ -2354,9 +2430,8 @@ void gap_buff_op(T*         BMRESTRICT dest,
  template<typename T, class F>
  bool gap_buff_dry_op(const T*   BMRESTRICT vect1,
                       const T*   BMRESTRICT vect2,
-                          F&         f,
                       unsigned&  dlen,
-                     unsigned limit)
+                     unsigned limit) BMNOEXCEPT2
  {
      const T*  cur1 = vect1;
      const T*  cur2 = vect2;
@@ -2364,7 +2439,7 @@ bool gap_buff_dry_op(const T*   BMRESTRICT vect1,
      T bitval1 = (T)((*cur1++ & 1));
      T bitval2 = (T)((*cur2++ & 1));
  
-    T bitval = (T) f(bitval1, bitval2);
+    T bitval = (T) F::op(bitval1, bitval2);
      T bitval_prev = bitval;
  
      unsigned len = 1;
@@ -2372,7 +2447,7 @@ bool gap_buff_dry_op(const T*   BMRESTRICT vect1,
      T c1 = *cur1; T c2 = *cur2;
      while (1)
      {
-        bitval = (T) f(bitval1, bitval2);
+        bitval = (T) F::op(bitval1, bitval2);
  
          // Check if GAP value changes and we need to
          // start the next one
@@ -2418,7 +2493,6 @@ bool gap_buff_dry_op(const T*   BMRESTRICT vect1,
                         can be 0 or 1 (1 inverts the vector)
     \param vect2 - operand 2 GAP encoded buffer.
     \param vect2_mask - same as vect1_mask
-   \param f - operation functor.
     \note Internal function.
     \return non zero value if operation result returns any 1 bit 
  
@@ -2428,8 +2502,7 @@ template<typename T, class F>
  unsigned gap_buff_any_op(const T*   BMRESTRICT vect1,
                           unsigned              vect1_mask, 
                           const T*   BMRESTRICT vect2,
-                         unsigned              vect2_mask, 
-                         F                     f)
+                         unsigned              vect2_mask) BMNOEXCEPT2
  {
      const T*  cur1 = vect1;
      const T*  cur2 = vect2;
@@ -2437,14 +2510,14 @@ unsigned gap_buff_any_op(const T*   BMRESTRICT vect1,
      unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask;
      unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask;
      
-    unsigned bitval = f(bitval1, bitval2);
+    unsigned bitval = F::op(bitval1, bitval2);
      if (bitval)
          return bitval;
      unsigned bitval_prev = bitval;
  
      while (1)
      {
-        bitval = f(bitval1, bitval2);
+        bitval = F::op(bitval1, bitval2);
          if (bitval)
              return bitval;
  
@@ -2468,10 +2541,8 @@ unsigned gap_buff_any_op(const T*   BMRESTRICT vect1,
                  {
                      break;
                  }
-
                  ++cur1;
-                bitval1 ^= 1;
-                bitval2 ^= 1;
+                bitval1 ^= 1; bitval2 ^= 1;
              }
              ++cur2;
          }
@@ -2488,13 +2559,12 @@ unsigned gap_buff_any_op(const T*   BMRESTRICT vect1,
            Receives functor F as a template argument
     \param vect1 - operand 1 GAP encoded buffer.
     \param vect2 - operand 2 GAP encoded buffer.
-   \param f - operation functor.
     \note Internal function.
  
     @ingroup gapfunc
  */
  template<typename T, class F> 
-unsigned gap_buff_count_op(const T*  vect1, const T*  vect2, F f)
+unsigned gap_buff_count_op(const T*  vect1, const T*  vect2) BMNOEXCEPT2
  {
      unsigned count;// = 0;
      const T* cur1 = vect1;
@@ -2502,18 +2572,15 @@ unsigned gap_buff_count_op(const T*  vect1, const T*  vect2, F f)
  
      unsigned bitval1 = (*cur1++ & 1);
      unsigned bitval2 = (*cur2++ & 1);
-    unsigned bitval = count = f(bitval1, bitval2);
+    unsigned bitval = count = F::op(bitval1, bitval2);
      unsigned bitval_prev = bitval;
  
-    //if (bitval) ++count;
-    
      T res, res_prev;
      res = res_prev = 0;
  
      while (1)
      {
-        bitval = f(bitval1, bitval2);
-
+        bitval = F::op(bitval1, bitval2);
          // Check if GAP value changes and we need to 
          // start the next one.
          if (bitval != bitval_prev)
@@ -2530,8 +2597,7 @@ unsigned gap_buff_count_op(const T*  vect1, const T*  vect2, F f)
                  count += res - res_prev; 
                  res_prev = res;
              }
-            ++cur1;
-            bitval1 ^= 1;
+            ++cur1; bitval1 ^= 1;
          }
          else // >=
          {
@@ -2548,13 +2614,10 @@ unsigned gap_buff_count_op(const T*  vect1, const T*  vect2, F f)
              else  // equal
              {
                  if (*cur2 == (bm::gap_max_bits - 1))
-                {
                      break;
-                }
  
                  ++cur1;
-                bitval1 ^= 1;
-                bitval2 ^= 1;
+                bitval1 ^= 1; bitval2 ^= 1;
              }
              ++cur2;
          }
@@ -2565,6 +2628,10 @@ unsigned gap_buff_count_op(const T*  vect1, const T*  vect2, F f)
  }
  
  
+#ifdef __GNUG__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
  
  /*!
     \brief Sets or clears bit in the GAP buffer.
@@ -2582,11 +2649,11 @@ template<typename T>
  unsigned gap_set_value(unsigned val,
                         T* BMRESTRICT buf,
                         unsigned pos,
-                       unsigned* BMRESTRICT is_set)
+                       unsigned* BMRESTRICT is_set) BMNOEXCEPT
  {
      BM_ASSERT(pos < bm::gap_max_bits);
-    unsigned curr = gap_bfind(buf, pos, is_set);
  
+    unsigned curr = bm::gap_bfind(buf, pos, is_set);
      T end = (T)(*buf >> 3);
      if (*is_set == val)
      {
@@ -2601,10 +2668,10 @@ unsigned gap_set_value(unsigned val,
  
      // Special case, first bit GAP operation. There is no platform beside it.
      // initial flag must be inverted.
-    if (pos == 0)
+    if (!pos)
      {
          *buf ^= 1;
-        if ( buf[1] ) // We need to insert a 1 bit platform here.
+        if (buf[1]) // We need to insert a 1 bit GAP here
          {
              ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
              buf[1] = 0;
@@ -2612,52 +2679,125 @@ unsigned gap_set_value(unsigned val,
          }
          else // Only 1 bit in the GAP. We need to delete the first GAP.
          {
-            pprev = buf + 1;
-            pcurr = pprev + 1;
-            do
-            {
-                *pprev++ = *pcurr++;
-            } while (pcurr < pend);
-            --end;
+            pprev = buf + 1; pcurr = pprev + 1;
+            goto copy_gaps;
          }
      }
-    else if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
+    else
+    if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
      {
         ++(*pprev);
         if (*pprev == *pcurr)  // Curr. GAP to be merged with prev.GAP.
         {
              --end;
-            if (pcurr != pend) // GAP merge: 2 GAPS to be deleted 
+            if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
              {
-                --end;
                  ++pcurr;
-                do
-                {
-                    *pprev++ = *pcurr++;
-                } while (pcurr < pend);
+                copy_gaps:
+                --end;
+                do { *pprev++ = *pcurr++; } while (pcurr < pend);
              }
         }    
      }
-    else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
+    else
+    if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
      {
-        --(*pcurr);       
-        if (pcurr == pend)
+        --(*pcurr);
+        end += (pcurr == pend);
+    }
+    else  // Worst case: split current GAP
+    {
+        if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove
+            ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
+        end += 2;
+        pcurr[0] = (T)(pos-1);
+        pcurr[1] = (T)pos;
+    }
+
+    // Set correct length word and last border word
+    *buf = (T)((*buf & 7) + (end << 3));
+    buf[end] = bm::gap_max_bits-1;
+    return end;
+}
+
+/*!
+   \brief Sets or clears bit in the GAP buffer.
+
+   \param val - new bit value
+   \param buf - GAP buffer.
+   \param pos - Index of bit to set.
+
+   \return New GAP buffer length.
+
+   @ingroup gapfunc
+*/
+template<typename T>
+unsigned gap_set_value(unsigned val,
+                       T* BMRESTRICT buf,
+                       unsigned pos) BMNOEXCEPT
+{
+    BM_ASSERT(pos < bm::gap_max_bits);
+    unsigned is_set;
+    unsigned curr = bm::gap_bfind(buf, pos, &is_set);
+    T end = (T)(*buf >> 3);
+    if (is_set == val)
+        return end;
+
+    T* pcurr = buf + curr;
+    T* pprev = pcurr - 1;
+    T* pend = buf + end;
+
+    // Special case, first bit GAP operation. There is no platform beside it.
+    // initial flag must be inverted.
+    if (!pos)
+    {
+        *buf ^= 1;
+        if (buf[1]) // We need to insert a 1 bit GAP here
+        {
+            ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
+            buf[1] = 0;
+            ++end;
+        }
+        else // Only 1 bit in the GAP. We need to delete the first GAP.
          {
-           ++end;
+            pprev = buf + 1; pcurr = pprev + 1;
+            goto copy_gaps;
          }
      }
-    else  // Worst case we need to split current block.
+    else
+    if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
+    {
+       ++(*pprev);
+       if (*pprev == *pcurr)  // Curr. GAP to be merged with prev.GAP.
+       {
+            --end;
+            if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
+            {
+                ++pcurr;
+                copy_gaps:
+                --end;
+                do { *pprev++ = *pcurr++; } while (pcurr < pend);
+            }
+       }
+    }
+    else
+    if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
      {
-        ::memmove(pcurr+2, pcurr,(end - curr + 1)*sizeof(T));
-        *pcurr++ = (T)(pos - 1);
-        *pcurr = (T)pos;
-        end = (T)(end + 2);
+        --(*pcurr);
+        end += (pcurr == pend);
+    }
+    else  // Worst case: split current GAP
+    {
+        if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove
+            ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
+        end += 2;
+        pcurr[0] = (T)(pos-1);
+        pcurr[1] = (T)pos;
      }
  
-    // Set correct length word.
+    // Set correct length word and last border word
      *buf = (T)((*buf & 7) + (end << 3));
-
-    buf[end] = bm::gap_max_bits - 1;
+    buf[end] = bm::gap_max_bits-1;
      return end;
  }
  
@@ -2672,7 +2812,7 @@ unsigned gap_set_value(unsigned val,
     @ingroup gapfunc
  */
  template<typename T> 
-unsigned gap_add_value(T* buf, unsigned pos)
+unsigned gap_add_value(T* buf, unsigned pos) BMNOEXCEPT
  {
      BM_ASSERT(pos < bm::gap_max_bits);
  
@@ -2684,7 +2824,7 @@ unsigned gap_add_value(T* buf, unsigned pos)
  
      // Special case, first bit GAP operation. There is no platform beside it.
      // initial flag must be inverted.
-    if (pos == 0)
+    if (!pos)
      {
          *buf ^= 1;
          if ( buf[1] ) // We need to insert a 1 bit platform here.
@@ -2695,13 +2835,9 @@ unsigned gap_add_value(T* buf, unsigned pos)
          }
          else // Only 1 bit in the GAP. We need to delete the first GAP.
          {
-            pprev = buf + 1;
-            pcurr = pprev + 1;
-            do
-            {
-                *pprev++ = *pcurr++;
-            } while (pcurr < pend);
+            pprev = buf + 1; pcurr = pprev + 1;
              --end;
+            do { *pprev++ = *pcurr++; } while (pcurr < pend);
          }
      }
      else if (((unsigned)(*pprev))+1 == pos && (curr > 1) ) // Left border bit
@@ -2710,40 +2846,32 @@ unsigned gap_add_value(T* buf, unsigned pos)
         if (*pprev == *pcurr)  // Curr. GAP to be merged with prev.GAP.
         {
              --end;
-            if (pcurr != pend) // GAP merge: 2 GAPS to be deleted 
-            {
-                // TODO: should never get here...
-                --end;
-                ++pcurr;
-                do
-                {
-                    *pprev++ = *pcurr++;
-                } while (pcurr < pend);
-            }
-       } 
+            BM_ASSERT(pcurr == pend);
+       }
      }
      else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
      {
          --(*pcurr);       
-        if (pcurr == pend)
-        {
-           ++end;
-        }
+        end += (pcurr == pend);
      }
      else  // Worst case we need to split current block.
      {
-        *pcurr++ = (T)(pos - 1);
-        *pcurr = (T)pos;
+        pcurr[0] = (T)(pos-1);
+        pcurr[1] = (T)pos;
          end = (T)(end+2);
      }
  
      // Set correct length word.
      *buf = (T)((*buf & 7) + (end << 3));
-
      buf[end] = bm::gap_max_bits - 1;
      return end;
  }
  
+#ifdef __GNUG__
+#pragma GCC diagnostic pop
+#endif
+
+
  /*!
      @brief Right shift GAP block by 1 bit
      @param buf - block pointer
@@ -2754,7 +2882,8 @@ unsigned gap_add_value(T* buf, unsigned pos)
      @ingroup gapfunc
  */
  template<typename T>
-bool gap_shift_r1(T* buf, unsigned co_flag, unsigned* new_len)
+bool gap_shift_r1(T* BMRESTRICT buf,
+                  unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
  {
      BM_ASSERT(new_len);
      bool co;
@@ -2802,7 +2931,8 @@ bool gap_shift_r1(T* buf, unsigned co_flag, unsigned* new_len)
      @ingroup gapfunc
  */
  template<typename T>
-bool gap_shift_l1(T* buf, unsigned co_flag, unsigned* new_len)
+bool gap_shift_l1(T* BMRESTRICT buf,
+                  unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
  {
      BM_ASSERT(new_len);
      unsigned is_set;
@@ -2860,7 +2990,7 @@ bool gap_shift_l1(T* buf, unsigned co_flag, unsigned* new_len)
  */
  
  template<typename T> 
-unsigned gap_set_array(T* buf, const T* arr, unsigned len)
+unsigned gap_set_array(T* buf, const T* arr, unsigned len) BMNOEXCEPT
  {
      *buf = (T)((*buf & 6u) + (1u << 3)); // gap header setup
  
@@ -2921,8 +3051,7 @@ unsigned gap_set_array(T* buf, const T* arr, unsigned len)
      @ingroup gapfunc
  */
  template<typename T> 
-unsigned bit_array_compute_gaps(const T* arr, 
-                                unsigned len)
+unsigned bit_array_compute_gaps(const T* arr, unsigned len) BMNOEXCEPT
  {
      unsigned gap_count = 1;
      T prev = arr[0];
@@ -2954,9 +3083,9 @@ unsigned bit_array_compute_gaps(const T* arr,
      @ingroup gapfunc
  */
  template<typename T>
-unsigned gap_block_find(const T* buf,
+unsigned gap_block_find(const T* BMRESTRICT buf,
                          unsigned nbit,
-                        bm::id_t* prev)
+                        bm::id_t* BMRESTRICT prev) BMNOEXCEPT
  {
      BM_ASSERT(nbit < bm::gap_max_bits);
  
@@ -2968,19 +3097,20 @@ unsigned gap_block_find(const T* buf,
         *prev = nbit;
         return 1u;
      }
-
      unsigned val = buf[gap_idx] + 1;
      *prev = val;
- 
      return (val != bm::gap_max_bits);  // no bug here.
  }
  
+//------------------------------------------------------------------------
+
+
  /*! 
      \brief Set 1 bit in a block
      @ingroup bitfunc
  */
  BMFORCEINLINE
-void set_bit(unsigned* dest, unsigned  bitpos)
+void set_bit(unsigned* dest, unsigned  bitpos) BMNOEXCEPT
  {
      unsigned nbit  = unsigned(bitpos & bm::set_block_mask); 
      unsigned nword = unsigned(nbit >> bm::set_word_shift); 
@@ -2993,7 +3123,7 @@ void set_bit(unsigned* dest, unsigned  bitpos)
      @ingroup bitfunc
  */
  BMFORCEINLINE
-void clear_bit(unsigned* dest, unsigned  bitpos)
+void clear_bit(unsigned* dest, unsigned  bitpos) BMNOEXCEPT
  {
      unsigned nbit  = unsigned(bitpos & bm::set_block_mask);
      unsigned nword = unsigned(nbit >> bm::set_word_shift);
@@ -3007,7 +3137,7 @@ void clear_bit(unsigned* dest, unsigned  bitpos)
      @ingroup bitfunc
  */
  BMFORCEINLINE
-unsigned test_bit(const unsigned* block, unsigned  bitpos)
+unsigned test_bit(const unsigned* block, unsigned  bitpos) BMNOEXCEPT
  {
      unsigned nbit  = unsigned(bitpos & bm::set_block_mask); 
      unsigned nword = unsigned(nbit >> bm::set_word_shift); 
@@ -3025,7 +3155,7 @@ unsigned test_bit(const unsigned* block, unsigned  bitpos)
     @ingroup bitfunc
  */
  inline
-void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount)
+void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
  {
      const unsigned maskFF = ~0u;
      
@@ -3072,7 +3202,7 @@ void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount)
     @ingroup bitfunc
  */
  inline
-void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount)
+void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
  {
      const unsigned maskFF = ~0u;
      
@@ -3121,7 +3251,7 @@ void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount)
  */  
  inline void xor_bit_block(unsigned* dest, 
                            unsigned bitpos, 
-                          unsigned bitcount)
+                          unsigned bitcount) BMNOEXCEPT
  {
      unsigned nbit  = unsigned(bitpos & bm::set_block_mask); 
      unsigned nword = unsigned(nbit >> bm::set_word_shift); 
@@ -3175,7 +3305,8 @@ inline void xor_bit_block(unsigned* dest,
     @ingroup gapfunc
  */
  template<typename T> 
-void gap_sub_to_bitset(unsigned* dest, const T*  pcurr)
+void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
+                       const T* BMRESTRICT pcurr) BMNOEXCEPT
  {
      BM_ASSERT(dest && pcurr);
      
@@ -3203,7 +3334,8 @@ void gap_sub_to_bitset(unsigned* dest, const T*  pcurr)
     @ingroup gapfunc
  */
  template<typename T>
-void gap_sub_to_bitset(unsigned* dest, const T*  pcurr, bm::id64_t digest0)
+void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
+                       const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
  {
      BM_ASSERT(dest && pcurr);
      
@@ -3261,7 +3393,8 @@ void gap_sub_to_bitset(unsigned* dest, const T*  pcurr, bm::id64_t digest0)
     @ingroup gapfunc
  */
  template<typename T> 
-void gap_xor_to_bitset(unsigned* dest, const T*  pcurr)
+void gap_xor_to_bitset(unsigned* BMRESTRICT dest,
+                       const T* BMRESTRICT pcurr) BMNOEXCEPT
  {
      BM_ASSERT(dest && pcurr);
  
@@ -3288,7 +3421,8 @@ void gap_xor_to_bitset(unsigned* dest, const T*  pcurr)
     @ingroup gapfunc
  */
  template<typename T>
-void gap_add_to_bitset(unsigned* dest, const T*  pcurr, unsigned len)
+void gap_add_to_bitset(unsigned* BMRESTRICT dest,
+                       const T* BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
  {
      BM_ASSERT(dest && pcurr);
      
@@ -3321,7 +3455,8 @@ void gap_add_to_bitset(unsigned* dest, const T*  pcurr, unsigned len)
     @ingroup gapfunc
  */
  template<typename T>
-void gap_add_to_bitset(unsigned* dest, const T*  pcurr)
+void gap_add_to_bitset(unsigned* BMRESTRICT dest,
+                       const T* BMRESTRICT pcurr) BMNOEXCEPT
  {
      unsigned len = (*pcurr >> 3);
      gap_add_to_bitset(dest, pcurr, len);
@@ -3336,7 +3471,8 @@ void gap_add_to_bitset(unsigned* dest, const T*  pcurr)
     @ingroup gapfunc
  */
  template<typename T> 
-void gap_and_to_bitset(unsigned* dest, const T*  pcurr)
+void gap_and_to_bitset(unsigned* BMRESTRICT dest,
+                       const T* BMRESTRICT pcurr) BMNOEXCEPT
  {
      BM_ASSERT(dest && pcurr);
  
@@ -3370,7 +3506,8 @@ void gap_and_to_bitset(unsigned* dest, const T*  pcurr)
     @ingroup gapfunc
  */
  template<typename T>
-void gap_and_to_bitset(unsigned* dest, const T*  pcurr, bm::id64_t digest0)
+void gap_and_to_bitset(unsigned* BMRESTRICT dest,
+                    const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
  {
      BM_ASSERT(dest && pcurr);
      if (!digest0)
@@ -3431,7 +3568,8 @@ void gap_and_to_bitset(unsigned* dest, const T*  pcurr, bm::id64_t digest0)
     @ingroup gapfunc
  */
  template<typename T> 
-bm::id_t gap_bitset_and_count(const unsigned* block, const T*  pcurr)
+bm::id_t gap_bitset_and_count(const unsigned* BMRESTRICT block,
+                              const T* BMRESTRICT pcurr) BMNOEXCEPT
  {
      BM_ASSERT(block);
      const T* pend = pcurr + (*pcurr >> 3);
@@ -3458,7 +3596,8 @@ bm::id_t gap_bitset_and_count(const unsigned* block, const T*  pcurr)
     @ingroup gapfunc
  */
  template<typename T> 
-bm::id_t gap_bitset_and_any(const unsigned* block, const T* pcurr)
+bm::id_t gap_bitset_and_any(const unsigned* BMRESTRICT block,
+                            const T* BMRESTRICT pcurr) BMNOEXCEPT
  {
      BM_ASSERT(block);
  
@@ -3487,7 +3626,8 @@ bm::id_t gap_bitset_and_any(const unsigned* block, const T* pcurr)
     @ingroup gapfunc
  */
  template<typename T> 
-bm::id_t gap_bitset_sub_count(const unsigned* block, const T*  buf)
+bm::id_t gap_bitset_sub_count(const unsigned* BMRESTRICT block,
+                              const T* BMRESTRICT buf) BMNOEXCEPT
  {
      BM_ASSERT(block);
  
@@ -3521,7 +3661,8 @@ bm::id_t gap_bitset_sub_count(const unsigned* block, const T*  buf)
     @ingroup gapfunc
  */
  template<typename T> 
-bm::id_t gap_bitset_sub_any(const unsigned* block, const T*  buf)
+bm::id_t gap_bitset_sub_any(const unsigned* BMRESTRICT block,
+                            const T* BMRESTRICT buf) BMNOEXCEPT
  {
      BM_ASSERT(block);
  
@@ -3558,7 +3699,8 @@ bm::id_t gap_bitset_sub_any(const unsigned* block, const T*  buf)
     @ingroup gapfunc
  */
  template<typename T> 
-bm::id_t gap_bitset_xor_count(const unsigned* block, const T*  buf)
+bm::id_t gap_bitset_xor_count(const unsigned* BMRESTRICT block,
+                              const T* BMRESTRICT buf) BMNOEXCEPT
  {
      BM_ASSERT(block);
  
@@ -3595,7 +3737,8 @@ bm::id_t gap_bitset_xor_count(const unsigned* block, const T*  buf)
     @ingroup gapfunc
  */
  template<typename T> 
-bm::id_t gap_bitset_xor_any(const unsigned* block, const T*  buf)
+bm::id_t gap_bitset_xor_any(const unsigned* BMRESTRICT block,
+                            const T* BMRESTRICT buf) BMNOEXCEPT
  {
      BM_ASSERT(block);
  
@@ -3632,10 +3775,10 @@ bm::id_t gap_bitset_xor_any(const unsigned* block, const T*  buf)
     @ingroup gapfunc
  */
  template<typename T> 
-bm::id_t gap_bitset_or_count(const unsigned* block, const T*  buf)
+bm::id_t gap_bitset_or_count(const unsigned* BMRESTRICT block,
+                             const T* BMRESTRICT buf) BMNOEXCEPT
  {
      BM_ASSERT(block);
-
      const T* pcurr = buf;
      const T* pend = pcurr + (*pcurr >> 3);
      ++pcurr;
@@ -3664,7 +3807,8 @@ bm::id_t gap_bitset_or_count(const unsigned* block, const T*  buf)
     @ingroup gapfunc
  */
  template<typename T> 
-bm::id_t gap_bitset_or_any(const unsigned* block, const T*  buf)
+bm::id_t gap_bitset_or_any(const unsigned* BMRESTRICT block,
+                           const T* BMRESTRICT buf) BMNOEXCEPT
  {
      bool b = !bm::gap_is_all_zero(buf) ||
               !bm::bit_is_all_zero(block);
@@ -3682,7 +3826,7 @@ bm::id_t gap_bitset_or_any(const unsigned* block, const T*  buf)
     @ingroup bitfunc
  */
  inline 
-void bit_block_set(bm::word_t* BMRESTRICT dst, bm::word_t value)
+void bit_block_set(bm::word_t* BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
  {
  #ifdef BMVECTOPT
      VECT_SET_BLOCK(dst, value);
@@ -3700,7 +3844,8 @@ void bit_block_set(bm::word_t* BMRESTRICT dst, bm::word_t value)
     @ingroup gapfunc
  */
  template<typename T> 
-void gap_convert_to_bitset(unsigned* dest, const T*  buf)
+void gap_convert_to_bitset(unsigned* BMRESTRICT dest,
+                           const T* BMRESTRICT buf) BMNOEXCEPT
  {
      bm::bit_block_set(dest, 0);
      bm::gap_add_to_bitset(dest, buf);
@@ -3721,13 +3866,12 @@ void gap_convert_to_bitset(unsigned* dest, const T*  buf)
     @ingroup gapfunc
  */
  template<typename T> 
-unsigned* gap_convert_to_bitset_smart(unsigned* dest,
-                                      const T* buf,
-                                      id_t set_max)
+unsigned* gap_convert_to_bitset_smart(unsigned* BMRESTRICT dest,
+                                      const T* BMRESTRICT buf,
+                                      id_t set_max) BMNOEXCEPT
  {
      if (buf[1] == set_max - 1)
          return (buf[0] & 1) ? FULL_BLOCK_REAL_ADDR : 0;
-
      bm::gap_convert_to_bitset(dest, buf);
      return dest;
  }
@@ -3742,7 +3886,8 @@ unsigned* gap_convert_to_bitset_smart(unsigned* dest,
     @ingroup gapfunc
     @internal
  */
-template<typename T> unsigned gap_control_sum(const T* buf)
+template<typename T>
+unsigned gap_control_sum(const T* buf) BMNOEXCEPT
  {
      unsigned end = *buf >> 3;
  
@@ -3755,7 +3900,6 @@ template<typename T> unsigned gap_control_sum(const T* buf)
          ++pcurr;
      }
      ++pcurr; // now we are in GAP "1" again
-
      while (pcurr <= pend)
      {
          BM_ASSERT(*pcurr > *(pcurr-1));
@@ -3773,9 +3917,8 @@ template<typename T> unsigned gap_control_sum(const T* buf)
  
     @ingroup gapfunc
  */
-template<class T> void gap_set_all(T* buf, 
-                                   unsigned set_max,
-                                   unsigned value)
+template<class T>
+void gap_set_all(T* buf, unsigned set_max, unsigned value) BMNOEXCEPT
  {
      BM_ASSERT(value == 0 || value == 1);
      *buf = (T)((*buf & 6u) + (1u << 3) + value);
@@ -3796,8 +3939,7 @@ template<class T>
  void gap_init_range_block(T* buf,
                            T  from,
                            T  to,
-                          T  value)
-                          //unsigned set_max)
+                          T  value) BMNOEXCEPT
  {
      BM_ASSERT(value == 0 || value == 1);
      const unsigned set_max = bm::bits_in_block;
@@ -3844,7 +3986,7 @@ void gap_init_range_block(T* buf,
  
     @ingroup gapfunc
  */
-template<typename T> void gap_invert(T* buf)
+template<typename T> void gap_invert(T* buf) BMNOEXCEPT
  { 
      *buf ^= 1;
  }
@@ -3863,7 +4005,7 @@ template<typename T> void gap_invert(T* buf)
     @ingroup gapfunc
  */
  template<typename T> 
-void set_gap_level(T* buf, int level)
+void set_gap_level(T* buf, int level) BMNOEXCEPT
  {
      BM_ASSERT(level >= 0);
      BM_ASSERT(unsigned(level) < bm::gap_levels);
@@ -3885,7 +4027,7 @@ void set_gap_level(T* buf, int level)
     @ingroup gapfunc
  */
  template<typename T>
-inline int gap_calc_level(unsigned len, const T* glevel_len)
+int gap_calc_level(unsigned len, const T* glevel_len) BMNOEXCEPT
  {
      if (len <= unsigned(glevel_len[0]-4)) return 0;
      if (len <= unsigned(glevel_len[1]-4)) return 1;
@@ -3906,10 +4048,11 @@ inline int gap_calc_level(unsigned len, const T* glevel_len)
      @ingroup gapfunc
  */
  template<typename T>
-inline unsigned gap_free_elements(const T* buf, const T* glevel_len)
+inline unsigned gap_free_elements(const T* BMRESTRICT buf,
+                                  const T* BMRESTRICT glevel_len) BMNOEXCEPT
  {
-    unsigned len = gap_length(buf);
-    unsigned capacity = gap_capacity(buf, glevel_len);
+    unsigned len = bm::gap_length(buf);
+    unsigned capacity = bm::gap_capacity(buf, glevel_len);
      return capacity - len;
  }
  
@@ -3923,7 +4066,7 @@ inline unsigned gap_free_elements(const T* buf, const T* glevel_len)
     @ingroup bitfunc 
  */
  template<typename T> 
-int bitcmp(const T* buf1, const T* buf2, unsigned len)
+int bitcmp(const T* buf1, const T* buf2, unsigned len) BMNOEXCEPT
  {
      BM_ASSERT(len);
      const T* pend1 = buf1 + len; 
@@ -3948,8 +4091,9 @@ int bitcmp(const T* buf1, const T* buf2, unsigned len)
     @ingroup bitfunc
  */
  inline
-bool bit_find_first_diff(const bm::word_t* blk1, const bm::word_t* blk2,
-                         unsigned* pos)
+bool bit_find_first_diff(const bm::word_t* BMRESTRICT blk1,
+                         const bm::word_t* BMRESTRICT blk2,
+                         unsigned* BMRESTRICT pos) BMNOEXCEPT
  {
      BM_ASSERT(blk1 && blk2 && pos);
  #ifdef VECT_BIT_FIND_DIFF
@@ -3969,7 +4113,7 @@ bool bit_find_first_diff(const bm::word_t* blk1, const bm::word_t* blk2,
          if (diff)
          {
              unsigned idx = bm::count_trailing_zeros_u64(diff);
-            *pos = unsigned(idx + (i * 8u * sizeof(bm::wordop_t)));
+            *pos = unsigned(idx + (i * 8u * unsigned(sizeof(bm::wordop_t))));
              return true;
          }
      } // for
@@ -3997,7 +4141,7 @@ bool bit_find_first_diff(const bm::word_t* blk1, const bm::word_t* blk2,
     \brief Converts bit block to GAP.
     \param dest - Destinatio GAP buffer.
     \param block - Source bitblock buffer.
-   \param dest_len - length of the dest. buffer.
+   \param dest_len - length of the destination buffer.
     \return  New length of GAP block or 0 if conversion failed
     (insufficicent space).
  
@@ -4006,7 +4150,7 @@ bool bit_find_first_diff(const bm::word_t* blk1, const bm::word_t* blk2,
  inline
  unsigned bit_block_to_gap(gap_word_t* BMRESTRICT dest,
                            const unsigned* BMRESTRICT block,
-                          unsigned dest_len)
+                          unsigned dest_len) BMNOEXCEPT
  {
      const unsigned* BMRESTRICT block_end = block + bm::set_block_size;
      gap_word_t* BMRESTRICT pcurr = dest;
@@ -4083,10 +4227,15 @@ complete:
  }
  #endif
  
+/**
+   Convert bit block to GAP representation
+   @internal
+   @ingroup bitfunc
+*/
  inline
  unsigned bit_to_gap(gap_word_t* BMRESTRICT dest,
                      const unsigned* BMRESTRICT block,
-                    unsigned dest_len)
+                    unsigned dest_len) BMNOEXCEPT
  {
  #if defined(VECT_BIT_TO_GAP)
      return VECT_BIT_TO_GAP(dest, block, dest_len);
@@ -4159,10 +4308,10 @@ template<typename D, typename T>
  D gap_convert_to_arr(D* BMRESTRICT       dest, 
                       const T* BMRESTRICT buf,
                       unsigned            dest_len,
-                     bool                invert = false)
+                     bool                invert = false) BMNOEXCEPT
  {
-    BMREGISTER const T* BMRESTRICT pcurr = buf;
-    BMREGISTER const T* pend = pcurr + (*pcurr >> 3);
+    const T* BMRESTRICT pcurr = buf;
+    const T* pend = pcurr + (*pcurr >> 3);
  
      D* BMRESTRICT dest_curr = dest;
      ++pcurr;
@@ -4215,7 +4364,7 @@ D gap_convert_to_arr(D* BMRESTRICT       dest,
      @ingroup bitfunc 
  */
  inline 
-bm::id_t bit_block_count(const bm::word_t* block)
+bm::id_t bit_block_count(const bm::word_t* block) BMNOEXCEPT
  {
      const bm::word_t* block_end = block + bm::set_block_size;
      bm::id_t count = 0;
@@ -4278,8 +4427,12 @@ bm::id_t bit_block_count(const bm::word_t* block)
      @ingroup bitfunc
  */
  inline
-bm::id_t bit_block_count(const bm::word_t* const block, bm::id64_t digest)
+bm::id_t bit_block_count(const bm::word_t* const block,
+                         bm::id64_t digest) BMNOEXCEPT
  {
+#ifdef VECT_BIT_COUNT_DIGEST
+    return VECT_BIT_COUNT_DIGEST(block, digest);
+#else
      bm::id_t count = 0;
      bm::id64_t d = digest;
      while (d)
@@ -4304,6 +4457,7 @@ bm::id_t bit_block_count(const bm::word_t* const block, bm::id64_t digest)
          d = bm::bmi_bslr_u64(d); // d &= d - 1;
      } // while
      return count;
+#endif
  }
  
  
@@ -4318,7 +4472,7 @@ bm::id_t bit_block_count(const bm::word_t* const block, bm::id64_t digest)
  */
  inline
  bm::id_t bit_block_calc_count(const bm::word_t* block,
-                              const bm::word_t* block_end)
+                              const bm::word_t* block_end) BMNOEXCEPT
  {
      bm::id_t count = 0;
      bm::word_t  acc = *block++;
@@ -4352,7 +4506,7 @@ bm::id_t bit_block_calc_count(const bm::word_t* block,
      @ingroup bitfunc 
  */
  inline 
-bm::id_t bit_count_change(bm::word_t w)
+bm::id_t bit_count_change(bm::word_t w) BMNOEXCEPT
  {
      unsigned count = 1;
      w ^= (w >> 1);
@@ -4368,7 +4522,7 @@ bm::id_t bit_count_change(bm::word_t w)
      @internal
  */
  inline
-unsigned bit_block_change32(const bm::word_t* block, unsigned size)
+unsigned bit_block_change32(const bm::word_t* block, unsigned size) BMNOEXCEPT
  {
      unsigned gap_count = 1;
  
@@ -4416,7 +4570,8 @@ unsigned bit_block_change32(const bm::word_t* block, unsigned size)
      @internal
  */
  inline
-void bit_block_change_bc(const bm::word_t* block, unsigned* gc, unsigned* bc)
+void bit_block_change_bc(const bm::word_t* BMRESTRICT block,
+                unsigned* BMRESTRICT gc, unsigned* BMRESTRICT bc) BMNOEXCEPT
  {
      BM_ASSERT(gc);
      BM_ASSERT(bc);
@@ -4441,7 +4596,7 @@ void bit_block_change_bc(const bm::word_t* block, unsigned* gc, unsigned* bc)
      @ingroup bitfunc
  */
  inline
-unsigned bit_block_calc_change(const bm::word_t* block)
+unsigned bit_block_calc_change(const bm::word_t* block) BMNOEXCEPT
  {
  #if defined(VECT_BLOCK_CHANGE)
      return VECT_BLOCK_CHANGE(block, bm::set_block_size);
@@ -4450,6 +4605,78 @@ unsigned bit_block_calc_change(const bm::word_t* block)
  #endif
  }
  
+/*!
+    Check if all bits are 1 in [left, right] range
+    @ingroup bitfunc
+*/
+inline
+bool bit_block_is_all_one_range(const bm::word_t* const BMRESTRICT block,
+                                bm::word_t left,
+                                bm::word_t right) BMNOEXCEPT
+{
+    BM_ASSERT(left <= right);
+    BM_ASSERT(right <= bm::gap_max_bits-1);
+
+    unsigned nword, nbit, bitcount, temp;
+    nbit = left & bm::set_word_mask;
+    const bm::word_t* word =
+        block + (nword = unsigned(left >> bm::set_word_shift));
+    if (left == right)  // special case (only 1 bit to check)
+        return (*word >> nbit) & 1u;
+
+    if (nbit) // starting position is not aligned
+    {
+        unsigned right_margin = nbit + right - left;
+        if (right_margin < 32)
+        {
+            unsigned mask =
+                block_set_table<true>::_right[nbit] &
+                block_set_table<true>::_left[right_margin];
+            return mask == (*word & mask);
+        }
+        temp = *word & block_set_table<true>::_right[nbit];
+        if (temp != block_set_table<true>::_right[nbit])
+            return false;
+        bitcount = (right - left + 1u) - (32 - nbit);
+        ++word;
+    }
+    else
+    {
+        bitcount = right - left + 1u;
+    }
+
+    // now when we are word aligned, we can scan the bit-stream
+    const bm::id64_t maskFF64 = ~0ull;
+    const bm::word_t maskFF = ~0u;
+    // loop unrolled to evaluate 4 words at a time
+    // SIMD showed no advantage, unless evaluate sub-wave intervals
+    //
+    for ( ;bitcount >= 128; bitcount-=128, word+=4)
+    {
+        bm::id64_t w64_0 = bm::id64_t(word[0]) + (bm::id64_t(word[1]) << 32);
+        bm::id64_t w64_1 = bm::id64_t(word[2]) + (bm::id64_t(word[3]) << 32);
+        if ((w64_0 ^ maskFF64) | (w64_1 ^ maskFF64))
+            return false;
+    } // for
+
+    for ( ;bitcount >= 32; bitcount-=32, ++word)
+    {
+        if (*word != maskFF)
+            return false;
+    } // for
+    BM_ASSERT(bitcount < 32);
+
+    if (bitcount)  // we have a tail to count
+    {
+        temp = *word & block_set_table<true>::_left[bitcount-1];
+        if (temp != block_set_table<true>::_left[bitcount-1])
+            return false;
+    }
+
+    return true;
+}
+
+
  
  
  /*!
@@ -4462,7 +4689,7 @@ unsigned bit_block_calc_change(const bm::word_t* block)
  inline 
  bm::id_t bit_block_calc_count_range(const bm::word_t* block,
                                      bm::word_t left,
-                                    bm::word_t right)
+                                    bm::word_t right) BMNOEXCEPT
  {
      BM_ASSERT(left <= right);
      BM_ASSERT(right <= bm::gap_max_bits-1);
@@ -4530,7 +4757,7 @@ bm::id_t bit_block_calc_count_range(const bm::word_t* block,
  */
  inline
  bm::id_t bit_block_calc_count_to(const bm::word_t*  block,
-                                 bm::word_t         right)
+                                 bm::word_t         right) BMNOEXCEPT
  {
      BM_ASSERT(block);
      if (!right) // special case, first bit check
@@ -4586,7 +4813,7 @@ bm::id_t bit_block_calc_count_to(const bm::word_t*  block,
      @ingroup bitfunc
  */
  inline
-void bit_block_rotate_left_1(bm::word_t* block)
+void bit_block_rotate_left_1(bm::word_t* block) BMNOEXCEPT
  {
      bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
      for (unsigned i = 0; i < bm::set_block_size-1; ++i)
@@ -4602,7 +4829,7 @@ void bit_block_rotate_left_1(bm::word_t* block)
      @ingroup bitfunc
  */
  inline
-void bit_block_rotate_left_1_unr(bm::word_t* block)
+void bit_block_rotate_left_1_unr(bm::word_t* block) BMNOEXCEPT
  {
      bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
      const unsigned unroll_factor = 4;
@@ -4638,7 +4865,8 @@ void bit_block_rotate_left_1_unr(bm::word_t* block)
      @ingroup bitfunc
  */
  inline
-bm::word_t bit_block_insert(bm::word_t* block, unsigned bitpos, bool value)
+bm::word_t bit_block_insert(bm::word_t* BMRESTRICT block,
+                            unsigned bitpos, bool value) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(bitpos < 65536);
@@ -4686,8 +4914,9 @@ bm::word_t bit_block_insert(bm::word_t* block, unsigned bitpos, bool value)
      @ingroup bitfunc
  */
  inline
-bool bit_block_shift_r1(bm::word_t* block,
-                        bm::word_t* empty_acc, bm::word_t co_flag)
+bool bit_block_shift_r1(bm::word_t* BMRESTRICT block,
+                        bm::word_t* BMRESTRICT empty_acc,
+                        bm::word_t             co_flag) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(empty_acc);
@@ -4715,8 +4944,9 @@ bool bit_block_shift_r1(bm::word_t* block,
      @ingroup bitfunc
  */
  inline
-bool bit_block_shift_r1_unr(bm::word_t* block,
-                            bm::word_t* empty_acc, bm::word_t co_flag)
+bool bit_block_shift_r1_unr(bm::word_t* BMRESTRICT block,
+                            bm::word_t* BMRESTRICT empty_acc,
+                            bm::word_t             co_flag) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(empty_acc);
@@ -4740,7 +4970,7 @@ bool bit_block_shift_r1_unr(bm::word_t* block,
  */
  inline
  bool bit_block_shift_l1(bm::word_t* block,
-                        bm::word_t* empty_acc, bm::word_t co_flag)
+                        bm::word_t* empty_acc, bm::word_t co_flag) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(empty_acc);
@@ -4770,7 +5000,8 @@ bool bit_block_shift_l1(bm::word_t* block,
  */
  inline
  bool bit_block_shift_l1_unr(bm::word_t* block,
-                            bm::word_t* empty_acc, bm::word_t co_flag)
+                            bm::word_t* empty_acc,
+                            bm::word_t  co_flag) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(empty_acc);
@@ -4791,7 +5022,9 @@ bool bit_block_shift_l1_unr(bm::word_t* block,
      @ingroup bitfunc
  */
  inline
-void bit_block_erase(bm::word_t* block, unsigned bitpos, bool carry_over)
+void bit_block_erase(bm::word_t* block,
+                     unsigned    bitpos,
+                     bool        carry_over) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(bitpos < 65536);
@@ -4799,7 +5032,7 @@ void bit_block_erase(bm::word_t* block, unsigned bitpos, bool carry_over)
      if (!bitpos)
      {
          bm::word_t acc;
-        bit_block_shift_l1_unr(block, &acc, carry_over);
+        bm::bit_block_shift_l1_unr(block, &acc, carry_over);
          return;
      }
      
@@ -4848,7 +5081,7 @@ inline
  bool bit_block_shift_r1_and(bm::word_t* BMRESTRICT block,
                              bm::word_t co_flag,
                              const bm::word_t* BMRESTRICT mask_block,
-                            bm::id64_t* BMRESTRICT digest)
+                            bm::id64_t* BMRESTRICT digest) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(mask_block);
@@ -4896,7 +5129,7 @@ bool bit_block_shift_r1_and(bm::word_t* BMRESTRICT block,
                  
                  block[d_base] = co_flag & mask_block[d_base];
                  if (block[d_base])
-                    d |= dmask; // update d
+                    d |= dmask; // update digest
                  co_flag = 0;
              }
          }
@@ -4920,7 +5153,7 @@ inline
  bool bit_block_shift_r1_and_unr(bm::word_t* BMRESTRICT block,
                                  bm::word_t co_flag,
                                  const bm::word_t* BMRESTRICT mask_block,
-                                bm::id64_t* BMRESTRICT digest)
+                                bm::id64_t* BMRESTRICT digest) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(mask_block);
@@ -4942,9 +5175,9 @@ bool bit_block_shift_r1_and_unr(bm::word_t* BMRESTRICT block,
      @ingroup bitfunc
  */
  inline 
-bm::id_t bit_block_any_range(const bm::word_t* block,
+bm::id_t bit_block_any_range(const bm::word_t* const BMRESTRICT block,
                               bm::word_t left,
-                             bm::word_t right)
+                             bm::word_t right) BMNOEXCEPT
  {
      BM_ASSERT(left <= right);
      
@@ -4969,8 +5202,7 @@ bm::id_t bit_block_any_range(const bm::word_t* block,
              unsigned mask =
                  block_set_table<true>::_right[nbit] &
                  block_set_table<true>::_left[right_margin];
-            acc = *word & mask;
-            return acc;
+            return *word & mask;
          }
          else
          {
@@ -4982,22 +5214,26 @@ bm::id_t bit_block_any_range(const bm::word_t* block,
          ++word;
      }
  
-    // now when we are word aligned, we can check bits the usual way
-    for ( ;bitcount >= 32; bitcount -= 32)
+    // loop unrolled to evaluate 4 words at a time
+    // SIMD showed no advantage, unless evaluate sub-wave intervals
+    //
+    for ( ;bitcount >= 128; bitcount-=128, word+=4)
      {
-        acc = *word++;
-        if (acc) 
+        acc = word[0] | word[1] | word[2] | word[3];
+        if (acc)
              return acc;
-    }
+    } // for
  
-    if (bitcount)  // we have a tail to count
+    acc = 0;
+    for ( ;bitcount >= 32; bitcount -= 32)
      {
-        acc = (*word) & block_set_table<true>::_left[bitcount-1];
-        if (acc) 
-            return acc;
-    }
+        acc |= *word++;
+    } // for
  
-    return 0;
+    if (bitcount)  // we have a tail to count
+        acc |= (*word) & block_set_table<true>::_left[bitcount-1];
+
+    return acc;
  }
  
  // ----------------------------------------------------------------------
@@ -5005,7 +5241,8 @@ bm::id_t bit_block_any_range(const bm::word_t* block,
  /*! Function inverts block of bits 
      @ingroup bitfunc 
  */
-template<typename T> void bit_invert(T* start)
+template<typename T>
+void bit_invert(T* start) BMNOEXCEPT
  {
      BM_ASSERT(IS_VALID_ADDR((bm::word_t*)start));
  #ifdef BMVECTOPT
@@ -5029,7 +5266,7 @@ template<typename T> void bit_invert(T* start)
      @ingroup bitfunc 
  */
  inline
-bool is_bits_one(const bm::wordop_t* start)
+bool is_bits_one(const bm::wordop_t* start) BMNOEXCEPT
  {
  #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
      return VECT_IS_ONE_BLOCK(start);
@@ -5050,35 +5287,311 @@ bool is_bits_one(const bm::wordop_t* start)
  
  // ----------------------------------------------------------------------
  
-// GAP blocks manipulation functions:
+/*! @brief Returns "true" if all bits are 1 in the block [left, right]
+    Function check for block varieties
+    @internal
+*/
+inline
+bool block_is_all_one_range(const bm::word_t* const BMRESTRICT block,
+                            unsigned left, unsigned right) BMNOEXCEPT
+{
+    BM_ASSERT(left <= right);
+    BM_ASSERT(right < bm::gap_max_bits);
+    if (block)
+    {
+        if (BM_IS_GAP(block))
+            return bm::gap_is_all_one_range(BMGAP_PTR(block), left, right);
+        if (block == FULL_BLOCK_FAKE_ADDR)
+            return true;
+        return bm::bit_block_is_all_one_range(block, left, right);
+    }
+    return false;
+}
+
+/*! @brief Returns "true" if all bits are 1 in the block [left, right]
+    and border bits are 0
+    @internal
+*/
+inline
+bool block_is_interval(const bm::word_t* const BMRESTRICT block,
+                       unsigned left, unsigned right) BMNOEXCEPT
+{
+    BM_ASSERT(left <= right);
+    BM_ASSERT(right < bm::gap_max_bits-1);
+
+    if (block)
+    {
+        bool is_left, is_right, all_one;
+        if (BM_IS_GAP(block))
+        {
+            const bm::gap_word_t* gap = BMGAP_PTR(block);
+            all_one = bm::gap_is_interval(gap, left, right);
+            return all_one;
+        }
+        else // bit-block
+        {
+            if (block == FULL_BLOCK_FAKE_ADDR)
+                return false;
+            unsigned nword = ((left-1) >> bm::set_word_shift);
+            is_left = block[nword] & (1u << ((left-1) & bm::set_word_mask));
+            if (is_left == false)
+            {
+                nword = ((right + 1) >> bm::set_word_shift);
+                is_right = block[nword] & (1u << ((right + 1) & bm::set_word_mask));
+                if (is_right == false)
+                {
+                    all_one = bm::bit_block_is_all_one_range(block, left, right);
+                    return all_one;
+                }
+            }
+        }
+    }
+
+    return false;
+}
+
+// ----------------------------------------------------------------------
+
+/**
+    \brief Searches for the last 1 bit in the 111 interval of a BIT block
+    \param block - BIT buffer
+    \param nbit - bit index to start checking from
+    \param pos - [out] found value
+
+    \return false if not found
+    @ingroup bitfunc
+*/
+inline
+bool bit_block_find_interval_end(const bm::word_t* BMRESTRICT block,
+                           unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
+{
+    BM_ASSERT(block);
+    BM_ASSERT(pos);
+
+    unsigned nword  = unsigned(nbit >> bm::set_word_shift);
+    unsigned bit_pos = (nbit & bm::set_word_mask);
+    bm::word_t w = block[nword];
+    w &= (1u << bit_pos);
+    if (!w)
+        return false;
+
+    if (nbit == bm::gap_max_bits-1)
+    {
+        *pos = bm::gap_max_bits-1;
+        return true;
+    }
+    *pos = nbit;
+
+    ++nbit;
+    nword  = unsigned(nbit >> bm::set_word_shift);
+    bit_pos = (nbit & bm::set_word_mask);
+
+    w = (~block[nword]) >> bit_pos;
+    w <<= bit_pos; // clear the trailing bits
+    if (w)
+    {
+        bit_pos = bm::bit_scan_forward32(w); // trailing zeros
+        *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
+        return true;
+    }
+
+    for (++nword; nword < bm::set_block_size; ++nword)
+    {
+        w = ~block[nword];
+        if (w)
+        {
+            bit_pos = bm::bit_scan_forward32(w); // trailing zeros
+            *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
+            return true;
+        }
+    } // for nword
+
+    // 0 not found, all block is 1s...
+    *pos = bm::gap_max_bits-1;
+    return true;
+}
+
+
+/*! @brief Find end of the current 111 interval
+    @return search result code 0 - not found, 1 found, 2 - found at the end
+    @internal
+*/
+inline
+unsigned block_find_interval_end(const bm::word_t* BMRESTRICT block,
+                                 unsigned  nbit_from,
+                                 unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
+{
+    BM_ASSERT(block && found_nbit);
+    BM_ASSERT(nbit_from < bm::gap_max_bits);
+
+    bool b;
+    if (BM_IS_GAP(block))
+    {
+        const bm::gap_word_t* gap = BMGAP_PTR(block);
+        b = bm::gap_find_interval_end(gap, nbit_from, found_nbit);
+        if (b && *found_nbit == bm::gap_max_bits-1)
+            return 2; // end of block, keep searching
+    }
+    else // bit-block
+    {
+        if (IS_FULL_BLOCK(block))
+        {
+            *found_nbit = bm::gap_max_bits-1;
+            return 2;
+        }
+        b = bm::bit_block_find_interval_end(block, nbit_from, found_nbit);
+        if (b && *found_nbit == bm::gap_max_bits-1)
+            return 2; // end of block, keep searching
+    }
+    return b;
+}
  
-/*! \brief GAP and functor */
-BMFORCEINLINE unsigned and_op(unsigned v1, unsigned v2)
+// ----------------------------------------------------------------------
+
+/**
+    \brief Searches for the first 1 bit in the 111 interval of a BIT block
+    \param block - BIT buffer
+    \param nbit - bit index to start checking from
+    \param pos - [out] found value
+
+    \return false if not found
+    @ingroup bitfunc
+*/
+inline
+bool bit_block_find_interval_start(const bm::word_t* BMRESTRICT block,
+                           unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
  {
-    return v1 & v2;
+    BM_ASSERT(block);
+    BM_ASSERT(pos);
+
+    unsigned nword  = unsigned(nbit >> bm::set_word_shift);
+    unsigned bit_pos = (nbit & bm::set_word_mask);
+    bm::word_t w = block[nword];
+    w &= (1u << bit_pos);
+    if (!w)
+        return false;
+
+    if (nbit == 0)
+    {
+        *pos = 0;
+        return true;
+    }
+    *pos = nbit;
+
+    --nbit;
+    nword  = unsigned(nbit >> bm::set_word_shift);
+    bit_pos = (nbit & bm::set_word_mask);
+
+    w = (~block[nword]) & block_set_table<true>::_left[bit_pos];
+    if (w)
+    {
+        bit_pos = bm::bit_scan_reverse32(w);
+        *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
+        return true;
+    }
+
+    if (nword)
+    {
+        for (--nword; true; --nword)
+        {
+            w = ~block[nword];
+            if (w)
+            {
+                bit_pos = bm::bit_scan_reverse32(w); // trailing zeros
+                *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
+                return true;
+            }
+            if (!nword)
+                break;
+        } // for nword
+    }
+
+    // 0 not found, all block is 1s...
+    *pos = 0;
+    return true;
  }
  
  
-/*! \brief GAP xor functor */
-BMFORCEINLINE unsigned xor_op(unsigned v1, unsigned v2)
+/*! @brief Find start of the current 111 interval
+    @return search result code 0 - not found, 1 found, 2 - found at the start
+    @internal
+*/
+inline
+unsigned block_find_interval_start(const bm::word_t* BMRESTRICT block,
+                                   unsigned  nbit_from,
+                                   unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
  {
-    return v1 ^ v2;
+    BM_ASSERT(block && found_nbit);
+    BM_ASSERT(nbit_from < bm::gap_max_bits);
+    bool b;
+    if (BM_IS_GAP(block))
+    {
+        const bm::gap_word_t* gap = BMGAP_PTR(block);
+        b = bm::gap_find_interval_start(gap, nbit_from, found_nbit);
+        if (b && *found_nbit == 0)
+            return 2; // start of block, keep searching
+    }
+    else // bit-block
+    {
+        if (IS_FULL_BLOCK(block))
+        {
+            *found_nbit = 0;
+            return 2;
+        }
+        b = bm::bit_block_find_interval_start(block, nbit_from, found_nbit);
+        if (b && *found_nbit == 0)
+            return 2; // start of block, keep searching
+    }
+    return b;
  }
  
+// ----------------------------------------------------------------------
  
-/*! \brief GAP or functor */
-BMFORCEINLINE unsigned or_op(unsigned v1, unsigned v2)
+/*! @brief Returns "true" if one bit is set in the block [left, right]
+    Function check for block varieties
+    @internal
+*/
+inline
+bool block_any_range(const bm::word_t* const BMRESTRICT block,
+                            unsigned left, unsigned right) BMNOEXCEPT
  {
-    return v1 | v2;
+    BM_ASSERT(left <= right);
+    BM_ASSERT(right < bm::gap_max_bits);
+    if (!block)
+        return false;
+    if (BM_IS_GAP(block))
+        return bm::gap_any_range(BMGAP_PTR(block), left, right);
+    if (IS_FULL_BLOCK(block))
+        return true;
+    return bm::bit_block_any_range(block, left, right);
  }
  
-/*! \brief GAP or functor */
-BMFORCEINLINE unsigned sub_op(unsigned v1, unsigned v2)
+// ----------------------------------------------------------------------
+
+/*! @brief Returns "true" if one bit is set in the block
+    Function check for block varieties
+    @internal
+*/
+inline
+bool block_any(const bm::word_t* const BMRESTRICT block) BMNOEXCEPT
  {
-    return v1 & ~v2;
+    if (!block)
+        return false;
+    if (IS_FULL_BLOCK(block))
+        return true;
+    bool all_zero = (BM_IS_GAP(block)) ?
+                        bm::gap_is_all_zero(BMGAP_PTR(block))
+                      : bm::bit_is_all_zero(block);
+    return !all_zero;
  }
  
  
+
+// ----------------------------------------------------------------------
+
+// GAP blocks manipulation functions:
+
+
  /*!
     \brief GAP AND operation.
     
@@ -5095,13 +5608,14 @@ BMFORCEINLINE unsigned sub_op(unsigned v1, unsigned v2)
  
     @ingroup gapfunc
  */
-BMFORCEINLINE 
+inline
  gap_word_t* gap_operation_and(const gap_word_t* BMRESTRICT vect1,
                                const gap_word_t* BMRESTRICT vect2,
                                gap_word_t*       BMRESTRICT tmp_buf,
-                              unsigned&         dsize)
+                              unsigned&         dsize) BMNOEXCEPT
  {
-    bm::gap_buff_op(tmp_buf, vect1, 0, vect2, 0, bm::and_op, dsize);
+    bm::gap_buff_op<bm::gap_word_t, bm::and_func>(
+                                        tmp_buf, vect1, 0, vect2, 0, dsize);
      return tmp_buf;
  }
  
@@ -5119,11 +5633,11 @@ gap_word_t* gap_operation_and(const gap_word_t* BMRESTRICT vect1,
  
     @ingroup gapfunc
  */
-BMFORCEINLINE 
+inline
  unsigned gap_operation_any_and(const gap_word_t* BMRESTRICT vect1,
-                                      const gap_word_t* BMRESTRICT vect2)
+                               const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
  {
-    return gap_buff_any_op(vect1, 0, vect2, 0, and_op);
+    return gap_buff_any_op<bm::gap_word_t, bm::and_func>(vect1, 0, vect2, 0);
  }
  
  
@@ -5138,9 +5652,9 @@ unsigned gap_operation_any_and(const gap_word_t* BMRESTRICT vect1,
  */
  inline
  unsigned gap_count_and(const gap_word_t* BMRESTRICT vect1,
-                       const gap_word_t* BMRESTRICT vect2)
+                       const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
  {
-    return gap_buff_count_op(vect1, vect2, and_op);
+    return bm::gap_buff_count_op<bm::gap_word_t, bm::and_func>(vect1, vect2);
  }
  
  
@@ -5165,9 +5679,10 @@ inline
  gap_word_t* gap_operation_xor(const gap_word_t*  BMRESTRICT vect1,
                                const gap_word_t*  BMRESTRICT vect2,
                                gap_word_t*        BMRESTRICT tmp_buf,
-                              unsigned&                     dsize)
+                              unsigned&                     dsize) BMNOEXCEPT
  {
-    gap_buff_op(tmp_buf, vect1, 0, vect2, 0, bm::xor_op, dsize);
+    bm::gap_buff_op<bm::gap_word_t, bm::xor_func>(
+                                        tmp_buf, vect1, 0, vect2, 0, dsize);
      return tmp_buf;
  }
  
@@ -5178,9 +5693,10 @@ inline
  bool gap_operation_dry_xor(const gap_word_t*  BMRESTRICT vect1,
                             const gap_word_t*  BMRESTRICT vect2,
                             unsigned&                     dsize,
-                           unsigned limit)
+                           unsigned limit) BMNOEXCEPT
  {
-    return gap_buff_dry_op(vect1, vect2, bm::xor_op, dsize, limit);
+    return
+    bm::gap_buff_dry_op<bm::gap_word_t, bm::xor_func>(vect1, vect2, dsize, limit);
  }
  
  
@@ -5200,9 +5716,9 @@ bool gap_operation_dry_xor(const gap_word_t*  BMRESTRICT vect1,
  */
  BMFORCEINLINE 
  unsigned gap_operation_any_xor(const gap_word_t* BMRESTRICT vect1,
-                               const gap_word_t* BMRESTRICT vect2)
+                               const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
  {
-    return gap_buff_any_op(vect1, 0, vect2, 0, bm::xor_op);
+    return gap_buff_any_op<bm::gap_word_t, bm::xor_func>(vect1, 0, vect2, 0);
  }
  
  /*!
@@ -5214,11 +5730,11 @@ unsigned gap_operation_any_xor(const gap_word_t* BMRESTRICT vect1,
  
     @ingroup gapfunc
  */
-BMFORCEINLINE 
+BMFORCEINLINE
  unsigned gap_count_xor(const gap_word_t* BMRESTRICT vect1,
-                       const gap_word_t* BMRESTRICT vect2)
+                       const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
  {
-    return gap_buff_count_op(vect1, vect2, bm::xor_op);
+    return bm::gap_buff_count_op<bm::gap_word_t, bm::xor_func>(vect1, vect2);
  }
  
  
@@ -5243,10 +5759,10 @@ inline
  gap_word_t* gap_operation_or(const gap_word_t*  BMRESTRICT vect1,
                               const gap_word_t*  BMRESTRICT vect2,
                               gap_word_t*        BMRESTRICT tmp_buf,
-                             unsigned&                     dsize)
+                             unsigned&                     dsize) BMNOEXCEPT
  {
-    gap_buff_op(tmp_buf, vect1, 1, vect2, 1, bm::and_op, dsize);
-    gap_invert(tmp_buf);
+    bm::gap_buff_op<bm::gap_word_t, bm::and_func>(tmp_buf, vect1, 1, vect2, 1, dsize);
+    bm::gap_invert(tmp_buf);
      return tmp_buf;
  }
  
@@ -5261,9 +5777,9 @@ gap_word_t* gap_operation_or(const gap_word_t*  BMRESTRICT vect1,
  */
  BMFORCEINLINE 
  unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1,
-                      const gap_word_t* BMRESTRICT vect2)
+                      const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
  {
-    return gap_buff_count_op(vect1, vect2, bm::or_op);
+    return gap_buff_count_op<bm::gap_word_t, bm::or_func>(vect1, vect2);
  }
  
  
@@ -5285,12 +5801,14 @@ unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1,
  
     @ingroup gapfunc
  */
-inline gap_word_t* gap_operation_sub(const gap_word_t*  BMRESTRICT vect1,
-                                     const gap_word_t*  BMRESTRICT vect2,
-                                     gap_word_t*        BMRESTRICT tmp_buf,
-                                     unsigned&                     dsize)
+inline
+gap_word_t* gap_operation_sub(const gap_word_t*  BMRESTRICT vect1,
+                              const gap_word_t*  BMRESTRICT vect2,
+                              gap_word_t*        BMRESTRICT tmp_buf,
+                              unsigned&                     dsize) BMNOEXCEPT
  {
-    gap_buff_op(tmp_buf, vect1, 0, vect2, 1, and_op, dsize);    
+    bm::gap_buff_op<bm::gap_word_t, bm::and_func>( // no bug here
+                                        tmp_buf, vect1, 0, vect2, 1, dsize);
      return tmp_buf;
  }
  
@@ -5309,11 +5827,13 @@ inline gap_word_t* gap_operation_sub(const gap_word_t*  BMRESTRICT vect1,
  
     @ingroup gapfunc
  */
-BMFORCEINLINE 
+inline
  unsigned gap_operation_any_sub(const gap_word_t* BMRESTRICT vect1,
-                               const gap_word_t* BMRESTRICT vect2)
+                               const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
  {
-    return gap_buff_any_op(vect1, 0, vect2, 1, bm::and_op);    
+    return
+    bm::gap_buff_any_op<bm::gap_word_t, bm::and_func>( // no bug here
+                                               vect1, 0, vect2, 1);
  }
  
  
@@ -5328,9 +5848,9 @@ unsigned gap_operation_any_sub(const gap_word_t* BMRESTRICT vect1,
  */
  BMFORCEINLINE 
  unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1,
-                       const gap_word_t* BMRESTRICT vect2)
+                       const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
  {
-    return gap_buff_count_op(vect1, vect2, bm::sub_op);
+    return bm::gap_buff_count_op<bm::gap_word_t, bm::sub_func>(vect1, vect2);
  }
  
  
@@ -5348,7 +5868,8 @@ unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1,
     @ingroup bitfunc
  */
  inline 
-void bit_block_copy(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src)
+void bit_block_copy(bm::word_t* BMRESTRICT dst,
+                    const bm::word_t* BMRESTRICT src) BMNOEXCEPT
  {
  #ifdef BMVECTOPT
      VECT_COPY_BLOCK(dst, src);
@@ -5366,7 +5887,8 @@ void bit_block_copy(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src
     @ingroup bitfunc
  */
  inline
-void bit_block_stream(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src)
+void bit_block_stream(bm::word_t* BMRESTRICT dst,
+                      const bm::word_t* BMRESTRICT src) BMNOEXCEPT
  {
  #ifdef VECT_STREAM_BLOCK
      VECT_STREAM_BLOCK(dst, src);
@@ -5388,7 +5910,8 @@ void bit_block_stream(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT s
     @ingroup bitfunc
  */
  inline 
-bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src)
+bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst,
+                         const bm::word_t* BMRESTRICT src) BMNOEXCEPT
  {
      BM_ASSERT(dst);
      BM_ASSERT(src);
@@ -5428,7 +5951,7 @@ bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRIC
  inline
  bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst,
                           const bm::word_t* BMRESTRICT src,
-                         bm::id64_t digest)
+                         bm::id64_t digest) BMNOEXCEPT
  {
      BM_ASSERT(dst);
      BM_ASSERT(src);
@@ -5486,7 +6009,7 @@ bm::id64_t bit_block_and_5way(bm::word_t* BMRESTRICT dst,
                                const bm::word_t* BMRESTRICT src1,
                                const bm::word_t* BMRESTRICT src2,
                                const bm::word_t* BMRESTRICT src3,
-                              bm::id64_t digest)
+                              bm::id64_t digest) BMNOEXCEPT
  {
      BM_ASSERT(dst);
      BM_ASSERT(src0 && src1 && src2 && src3);
@@ -5551,7 +6074,7 @@ inline
  bm::id64_t bit_block_and_2way(bm::word_t* BMRESTRICT dst,
                                const bm::word_t* BMRESTRICT src1,
                                const bm::word_t* BMRESTRICT src2,
-                              bm::id64_t digest)
+                              bm::id64_t digest) BMNOEXCEPT
  {
      BM_ASSERT(dst);
      BM_ASSERT(src1 && src2);
@@ -5612,7 +6135,7 @@ bm::id64_t bit_block_and_2way(bm::word_t* BMRESTRICT dst,
  */
  inline 
  unsigned bit_block_and_count(const bm::word_t* BMRESTRICT src1,
-                             const bm::word_t* BMRESTRICT src2)
+                             const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      unsigned count;
      const bm::word_t* src1_end = src1 + bm::set_block_size;
@@ -5661,7 +6184,7 @@ unsigned bit_block_and_count(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  unsigned bit_block_and_any(const bm::word_t* src1, 
-                           const bm::word_t* src2)
+                           const bm::word_t* src2) BMNOEXCEPT
  {
      unsigned count = 0;
      const bm::word_t* src1_end = src1 + bm::set_block_size;
@@ -5691,7 +6214,7 @@ unsigned bit_block_and_any(const bm::word_t* src1,
  */
  inline 
  unsigned bit_block_xor_count(const bm::word_t* BMRESTRICT src1,
-                             const bm::word_t* BMRESTRICT src2)
+                             const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      unsigned count;
      const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
@@ -5740,7 +6263,7 @@ unsigned bit_block_xor_count(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  unsigned bit_block_xor_any(const bm::word_t* BMRESTRICT src1,
-                           const bm::word_t* BMRESTRICT src2)
+                           const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      unsigned count = 0;
      const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
@@ -5767,7 +6290,7 @@ unsigned bit_block_xor_any(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  unsigned bit_block_sub_count(const bm::word_t* BMRESTRICT src1,
-                             const bm::word_t* BMRESTRICT src2)
+                             const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      unsigned count;
      const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
@@ -5815,7 +6338,7 @@ unsigned bit_block_sub_count(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  unsigned bit_block_sub_any(const bm::word_t* BMRESTRICT src1,
-                           const bm::word_t* BMRESTRICT src2)
+                           const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      unsigned count = 0;
      const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
@@ -5844,7 +6367,7 @@ unsigned bit_block_sub_any(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  unsigned bit_block_or_count(const bm::word_t* src1, 
-                            const bm::word_t* src2)
+                            const bm::word_t* src2) BMNOEXCEPT
  {
      unsigned count;
      const bm::word_t* src1_end = src1 + bm::set_block_size;
@@ -5892,7 +6415,7 @@ unsigned bit_block_or_count(const bm::word_t* src1,
  */
  inline 
  unsigned bit_block_or_any(const bm::word_t* BMRESTRICT src1,
-                          const bm::word_t* BMRESTRICT src2)
+                          const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      unsigned count = 0;
      const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
@@ -5924,7 +6447,7 @@ unsigned bit_block_or_any(const bm::word_t* BMRESTRICT src1,
     @ingroup bitfunc
  */
  inline bm::word_t* bit_operation_and(bm::word_t* BMRESTRICT dst, 
-                                     const bm::word_t* BMRESTRICT src)
+                                     const bm::word_t* BMRESTRICT src) BMNOEXCEPT
  {
      BM_ASSERT(dst || src);
  
@@ -5988,7 +6511,7 @@ inline bm::word_t* bit_operation_and(bm::word_t* BMRESTRICT dst,
  */
  inline 
  bm::id_t bit_operation_and_count(const bm::word_t* BMRESTRICT src1,
-                                 const bm::word_t* BMRESTRICT src2)
+                                 const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
          return 0;
@@ -6012,7 +6535,7 @@ bm::id_t bit_operation_and_count(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  bm::id_t bit_operation_and_any(const bm::word_t* BMRESTRICT src1,
-                               const bm::word_t* BMRESTRICT src2)
+                               const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
          return 0;
@@ -6037,7 +6560,7 @@ bm::id_t bit_operation_and_any(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  bm::id_t bit_operation_sub_count(const bm::word_t* BMRESTRICT src1, 
-                                 const bm::word_t* BMRESTRICT src2)
+                                 const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      if (src1 == src2)
          return 0;
@@ -6074,7 +6597,7 @@ bm::id_t bit_operation_sub_count(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  bm::id_t bit_operation_sub_count_inv(const bm::word_t* BMRESTRICT src1, 
-                                     const bm::word_t* BMRESTRICT src2)
+                                     const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      return bit_operation_sub_count(src2, src1);
  }
@@ -6092,7 +6615,7 @@ bm::id_t bit_operation_sub_count_inv(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  bm::id_t bit_operation_sub_any(const bm::word_t* BMRESTRICT src1, 
-                               const bm::word_t* BMRESTRICT src2)
+                               const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      if (IS_EMPTY_BLOCK(src1))
          return 0;
@@ -6127,7 +6650,7 @@ bm::id_t bit_operation_sub_any(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  bm::id_t bit_operation_or_count(const bm::word_t* BMRESTRICT src1,
-                                const bm::word_t* BMRESTRICT src2)
+                                const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      if (IS_FULL_BLOCK(src1) || IS_FULL_BLOCK(src2))
          return bm::gap_max_bits;
@@ -6164,7 +6687,7 @@ bm::id_t bit_operation_or_count(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  bm::id_t bit_operation_or_any(const bm::word_t* BMRESTRICT src1,
-                              const bm::word_t* BMRESTRICT src2)
+                              const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      if (IS_EMPTY_BLOCK(src1))
      {
@@ -6197,7 +6720,7 @@ bm::id_t bit_operation_or_any(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  bool bit_block_or(bm::word_t* BMRESTRICT dst, 
-                  const bm::word_t* BMRESTRICT src)
+                  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
  {
  #ifdef BMVECTOPT
      return VECT_OR_BLOCK(dst, src);
@@ -6235,7 +6758,7 @@ bool bit_block_or(bm::word_t* BMRESTRICT dst,
  inline
  bool bit_block_or_2way(bm::word_t* BMRESTRICT dst,
                          const bm::word_t* BMRESTRICT src1,
-                        const bm::word_t* BMRESTRICT src2)
+                        const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
  #ifdef BMVECTOPT
      return VECT_OR_BLOCK_2WAY(dst, src1, src2);
@@ -6274,7 +6797,7 @@ bool bit_block_or_2way(bm::word_t* BMRESTRICT dst,
  inline
  bm::id64_t bit_block_xor_2way(bm::word_t* BMRESTRICT dst,
                                const bm::word_t* BMRESTRICT src1,
-                              const bm::word_t* BMRESTRICT src2)
+                              const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
  #ifdef BMVECTOPT
      return VECT_XOR_BLOCK_2WAY(dst, src1, src2);
@@ -6315,7 +6838,7 @@ bm::id64_t bit_block_xor_2way(bm::word_t* BMRESTRICT dst,
  inline
  bool bit_block_or_3way(bm::word_t* BMRESTRICT dst,
                          const bm::word_t* BMRESTRICT src1,
-                        const bm::word_t* BMRESTRICT src2)
+                        const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
  #ifdef BMVECTOPT
      return VECT_OR_BLOCK_3WAY(dst, src1, src2);
@@ -6361,7 +6884,7 @@ bool bit_block_or_5way(bm::word_t* BMRESTRICT dst,
                          const bm::word_t* BMRESTRICT src1,
                          const bm::word_t* BMRESTRICT src2,
                          const bm::word_t* BMRESTRICT src3,
-                        const bm::word_t* BMRESTRICT src4)
+                        const bm::word_t* BMRESTRICT src4) BMNOEXCEPT
  {
  #ifdef BMVECTOPT
      return VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4);
@@ -6407,7 +6930,7 @@ bool bit_block_or_5way(bm::word_t* BMRESTRICT dst,
  */
  inline 
  bm::word_t* bit_operation_or(bm::word_t* BMRESTRICT dst, 
-                             const bm::word_t* BMRESTRICT src)
+                             const bm::word_t* BMRESTRICT src) BMNOEXCEPT
  {
      BM_ASSERT(dst || src);
  
@@ -6467,7 +6990,7 @@ bm::word_t* bit_operation_or(bm::word_t* BMRESTRICT dst,
  */
  inline
  bm::id64_t bit_block_sub(bm::word_t* BMRESTRICT dst,
-                         const bm::word_t* BMRESTRICT src)
+                         const bm::word_t* BMRESTRICT src) BMNOEXCEPT
  {
  #ifdef BMVECTOPT
      bm::id64_t acc = VECT_SUB_BLOCK(dst, src);
@@ -6504,7 +7027,7 @@ bm::id64_t bit_block_sub(bm::word_t* BMRESTRICT dst,
  inline
  bm::id64_t bit_block_sub(bm::word_t* BMRESTRICT dst,
                           const bm::word_t* BMRESTRICT src,
-                         bm::id64_t digest)
+                         bm::id64_t digest) BMNOEXCEPT
  {
      BM_ASSERT(dst);
      BM_ASSERT(src);
@@ -6565,7 +7088,7 @@ inline
  bm::id64_t bit_block_sub_2way(bm::word_t* BMRESTRICT dst,
                           const bm::word_t* BMRESTRICT src1,
                           const bm::word_t* BMRESTRICT src2,
-                         bm::id64_t digest)
+                         bm::id64_t digest) BMNOEXCEPT
  {
      BM_ASSERT(dst);
      BM_ASSERT(src1 && src2);
@@ -6630,7 +7153,7 @@ bm::id64_t bit_block_sub_2way(bm::word_t* BMRESTRICT dst,
  */
  inline 
  bm::word_t* bit_operation_sub(bm::word_t* BMRESTRICT dst, 
-                              const bm::word_t* BMRESTRICT src)
+                              const bm::word_t* BMRESTRICT src) BMNOEXCEPT
  {
      BM_ASSERT(dst || src);
  
@@ -6688,7 +7211,7 @@ bm::word_t* bit_operation_sub(bm::word_t* BMRESTRICT dst,
  */
  inline 
  bm::id64_t bit_block_xor(bm::word_t* BMRESTRICT dst,
-                         const bm::word_t* BMRESTRICT src)
+                         const bm::word_t* BMRESTRICT src) BMNOEXCEPT
  {
      BM_ASSERT(dst);
      BM_ASSERT(src);
@@ -6724,7 +7247,7 @@ bm::id64_t bit_block_xor(bm::word_t* BMRESTRICT dst,
  */
  inline
  void bit_andnot_arr_ffmask(bm::word_t* BMRESTRICT dst,
-                           const bm::word_t* BMRESTRICT src)
+                           const bm::word_t* BMRESTRICT src) BMNOEXCEPT
  {
      const bm::word_t* BMRESTRICT src_end = src + bm::set_block_size;
  #ifdef BMVECTOPT
@@ -6759,7 +7282,7 @@ void bit_andnot_arr_ffmask(bm::word_t* BMRESTRICT dst,
  */
  inline 
  bm::word_t* bit_operation_xor(bm::word_t* BMRESTRICT dst, 
-                              const bm::word_t* BMRESTRICT src)
+                              const bm::word_t* BMRESTRICT src) BMNOEXCEPT
  {
      BM_ASSERT(dst || src);
      if (src == dst) return 0;  // XOR rule  
@@ -6797,7 +7320,7 @@ bm::word_t* bit_operation_xor(bm::word_t* BMRESTRICT dst,
  */
  inline 
  bm::id_t bit_operation_xor_count(const bm::word_t* BMRESTRICT src1,
-                                 const bm::word_t* BMRESTRICT src2)
+                                 const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      if (src1 == src2)
          return 0;
@@ -6829,7 +7352,7 @@ bm::id_t bit_operation_xor_count(const bm::word_t* BMRESTRICT src1,
  */
  inline 
  bm::id_t bit_operation_xor_any(const bm::word_t* BMRESTRICT src1,
-                               const bm::word_t* BMRESTRICT src2)
+                               const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
  {
      if (src1 == src2)
          return 0;
@@ -6854,7 +7377,7 @@ bm::id_t bit_operation_xor_any(const bm::word_t* BMRESTRICT src1,
      @ingroup bitfunc
  */
  template<class T>
-unsigned bit_count_nonzero_size(const T*  blk, unsigned  data_size)
+unsigned bit_count_nonzero_size(const T* blk, unsigned  data_size) BMNOEXCEPT
  {
      BM_ASSERT(blk && data_size);
      unsigned count = 0;
@@ -6914,7 +7437,8 @@ unsigned bit_count_nonzero_size(const T*  blk, unsigned  data_size)
      @ingroup bitfunc
  */
  inline
-unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos)
+unsigned bit_block_find(const bm::word_t* BMRESTRICT block,
+                        unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(pos);
@@ -6934,7 +7458,7 @@ unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos)
      if (w)
      {
          bit_pos = bm::bit_scan_forward32(w); // trailing zeros
-        *pos = unsigned(bit_pos + (nword * 8u * sizeof(bm::word_t)));
+        *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
          return 1;
      }
      
@@ -6944,7 +7468,7 @@ unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos)
          if (w)
          {
              bit_pos = bm::bit_scan_forward32(w); // trailing zeros
-            *pos = unsigned(bit_pos + (i * 8u * sizeof(bm::word_t)));
+            *pos = unsigned(bit_pos + (i * 8u * unsigned(sizeof(bm::word_t))));
              return w;
          }
      } // for i
@@ -6952,6 +7476,8 @@ unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos)
  }
  
  
+
+
  /*!
      \brief BIT block find the last set bit (backward search)
  
@@ -6962,7 +7488,8 @@ unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos)
      @ingroup bitfunc
  */
  inline
-unsigned bit_find_last(const bm::word_t* block, unsigned* last)
+unsigned bit_find_last(const bm::word_t* BMRESTRICT block,
+                       unsigned* BMRESTRICT last) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(last);
@@ -6975,7 +7502,7 @@ unsigned bit_find_last(const bm::word_t* block, unsigned* last)
          if (w)
          {
              unsigned idx = bm::bit_scan_reverse(w);
-            *last = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
+            *last = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t))));
              return w;
          }
          if (i == 0)
@@ -6995,7 +7522,8 @@ unsigned bit_find_last(const bm::word_t* block, unsigned* last)
      @internal
  */
  inline
-bool bit_find_first(const bm::word_t* block, unsigned* pos)
+bool bit_find_first(const bm::word_t* BMRESTRICT block,
+                    unsigned* BMRESTRICT pos) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(pos);
@@ -7009,7 +7537,7 @@ bool bit_find_first(const bm::word_t* block, unsigned* pos)
          if (w)
          {
              unsigned idx = bm::bit_scan_forward32(w); // trailing zeros
-            *pos = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
+            *pos = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t))));
              return w;
          }
      } // for i
@@ -7029,9 +7557,9 @@ bool bit_find_first(const bm::word_t* block, unsigned* pos)
      @ingroup bitfunc
  */
  inline
-unsigned bit_find_first(const bm::word_t* block,
-                        unsigned*         first,
-                        bm::id64_t        digest)
+unsigned bit_find_first(const bm::word_t* BMRESTRICT block,
+                        unsigned*         BMRESTRICT first,
+                        bm::id64_t        digest) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(first);
@@ -7047,7 +7575,7 @@ unsigned bit_find_first(const bm::word_t* block,
          if (w)
          {
              unsigned idx = bit_scan_forward32(w); // trailing zeros
-            *first = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
+            *first = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t))));
              return w;
          }
      } // for i
@@ -7067,9 +7595,9 @@ unsigned bit_find_first(const bm::word_t* block,
      @ingroup bitfunc
  */
  inline
-bool bit_find_first_if_1(const bm::word_t* block,
-                         unsigned*         first,
-                         bm::id64_t        digest)
+bool bit_find_first_if_1(const bm::word_t* BMRESTRICT block,
+                         unsigned*         BMRESTRICT first,
+                         bm::id64_t        digest) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(first);
@@ -7127,7 +7655,7 @@ template<typename SIZE_TYPE>
  SIZE_TYPE bit_find_rank(const bm::word_t* const block,
                          SIZE_TYPE               rank,
                          unsigned                nbit_from,
-                        unsigned&               nbit_pos)
+                        unsigned&               nbit_pos) BMNOEXCEPT
  {
      BM_ASSERT(block);
      BM_ASSERT(rank);
@@ -7206,7 +7734,7 @@ template<typename SIZE_TYPE>
  SIZE_TYPE block_find_rank(const bm::word_t* const block,
                            SIZE_TYPE               rank,
                            unsigned                nbit_from,
-                          unsigned&               nbit_pos)
+                          unsigned&               nbit_pos) BMNOEXCEPT
  {
      if (BM_IS_GAP(block))
      {
@@ -7230,7 +7758,7 @@ inline
  bm::set_representation best_representation(unsigned bit_count,
                                             unsigned total_possible_bitcount,
                                             unsigned gap_count,
-                                           unsigned block_size)
+                                           unsigned block_size) BMNOEXCEPT
  {
      unsigned arr_size = unsigned(sizeof(bm::gap_word_t) * bit_count + sizeof(bm::gap_word_t));
      unsigned gap_size = unsigned(sizeof(bm::gap_word_t) * gap_count + sizeof(bm::gap_word_t));
@@ -7268,15 +7796,16 @@ T bit_convert_to_arr(T* BMRESTRICT dest,
                       const unsigned* BMRESTRICT src,
                       bm::id_t bits,
                       unsigned dest_len,
-                     unsigned mask = 0)
+                     unsigned mask = 0) BMNOEXCEPT
  {
      T* BMRESTRICT pcurr = dest;
-    for (unsigned bit_idx=0; bit_idx < bits; ++src,bit_idx += unsigned(sizeof(*src) * 8))
+    for (unsigned bit_idx=0; bit_idx < bits;
+                                ++src,bit_idx += unsigned(sizeof(*src) * 8))
      {
          unsigned val = *src ^ mask; // invert value by XOR 0xFF..
          if (val == 0) 
              continue;
-        if (pcurr + sizeof(val)*8 >= dest + dest_len) // insufficient space
+        if (pcurr + unsigned(sizeof(val)*8) >= dest + dest_len) // insufficient space
              return 0;
          // popscan loop to decode bits in a word
          while (val)
@@ -7299,7 +7828,7 @@ T bit_convert_to_arr(T* BMRESTRICT dest,
      @internal
  */
  inline
-bool check_block_zero(const bm::word_t* blk, bool  deep_scan)
+bool check_block_zero(const bm::word_t* blk, bool  deep_scan) BMNOEXCEPT
  {
      if (!blk) return true;
      if (IS_FULL_BLOCK(blk)) return false;
@@ -7323,7 +7852,7 @@ bool check_block_zero(const bm::word_t* blk, bool  deep_scan)
      @internal
  */
  inline
-bool check_block_one(const bm::word_t* blk, bool deep_scan)
+bool check_block_one(const bm::word_t* blk, bool deep_scan) BMNOEXCEPT
  {
      if (blk == 0) return false;
  
@@ -7348,7 +7877,7 @@ bool check_block_one(const bm::word_t* blk, bool deep_scan)
  template<typename T> 
  unsigned gap_overhead(const T* length, 
                        const T* length_end, 
-                      const T* glevel_len)
+                      const T* glevel_len) BMNOEXCEPT
  {
      BM_ASSERT(length && length_end && glevel_len);
  
@@ -7375,7 +7904,7 @@ unsigned gap_overhead(const T* length,
  template<typename T>
  bool improve_gap_levels(const T* length,
                          const T* length_end,
-                        T*       glevel_len)
+                        T*       glevel_len) BMNOEXCEPT
  {
      BM_ASSERT(length && length_end && glevel_len);
  
@@ -7473,7 +8002,7 @@ bool improve_gap_levels(const T* length,
  inline
  bool block_find_first_diff(const bm::word_t* BMRESTRICT blk,
                             const bm::word_t* BMRESTRICT arg_blk,
-                           unsigned* BMRESTRICT pos)
+                           unsigned* BMRESTRICT pos) BMNOEXCEPT
  {
      // If one block is zero we check if the other one has at least
      // one bit ON
@@ -7568,7 +8097,7 @@ public:
      bitblock_get_adapter(const bm::word_t* bit_block) : b_(bit_block) {}
      
      BMFORCEINLINE
-    bm::word_t get_32() { return *b_++; }
+    bm::word_t get_32() BMNOEXCEPT { return *b_++; }
  private:
      const bm::word_t*  b_;
  };
@@ -7597,9 +8126,9 @@ class bitblock_sum_adapter
  public:
      bitblock_sum_adapter() : sum_(0) {}
      BMFORCEINLINE
-    void push_back(bm::word_t w) { this->sum_+= w; }
+    void push_back(bm::word_t w) BMNOEXCEPT { this->sum_+= w; }
      /// Get accumulated sum
-    bm::word_t sum() const { return this->sum_; }
+    bm::word_t sum() const BMNOEXCEPT { return this->sum_; }
  private:
      bm::word_t sum_;
  };
@@ -7619,7 +8148,7 @@ public:
        cnt_(0)
      {}
  
-    bm::word_t get_32()
+    bm::word_t get_32() BMNOEXCEPT
      {
          if (cnt_ < from_ || cnt_ > to_)
          {    
@@ -7645,7 +8174,7 @@ template<class It1, class It2, class BinaryOp, class Encoder>
  void bit_recomb(It1& it1, It2& it2, 
                  BinaryOp& op, 
                  Encoder& enc, 
-                unsigned block_size = bm::set_block_size)
+                unsigned block_size = bm::set_block_size) BMNOEXCEPT
  {
      for (unsigned i = 0; i < block_size; ++i)
      {
@@ -7659,37 +8188,37 @@ void bit_recomb(It1& it1, It2& it2,
  /// Bit AND functor
  template<typename W> struct bit_AND
  {
-    W operator()(W w1, W w2) { return w1 & w2; }
+    W operator()(W w1, W w2) BMNOEXCEPT { return w1 & w2; }
  };
  
  /// Bit OR functor
  template<typename W> struct bit_OR
  {
-    W operator()(W w1, W w2) { return w1 | w2; }
+    W operator()(W w1, W w2) BMNOEXCEPT { return w1 | w2; }
  };
  
  /// Bit SUB functor
  template<typename W> struct bit_SUB
  {
-    W operator()(W w1, W w2) { return w1 & ~w2; }
+     W operator()(W w1, W w2) BMNOEXCEPT { return w1 & ~w2; }
  };
  
  /// Bit XOR functor
  template<typename W> struct bit_XOR
  {
-    W operator()(W w1, W w2) { return w1 ^ w2; }
+     W operator()(W w1, W w2) BMNOEXCEPT { return w1 ^ w2; }
  };
  
  /// Bit ASSIGN functor
  template<typename W> struct bit_ASSIGN
  {
-    W operator()(W, W w2) { return w2; }
+     W operator()(W, W w2) BMNOEXCEPT { return w2; }
  };
  
  /// Bit COUNT functor
  template<typename W> struct bit_COUNT
  {
-    W operator()(W w1, W w2) 
+    W operator()(W w1, W w2) BMNOEXCEPT
      {
          w1 = 0;
          BM_INCWORD_BITCOUNT(w1, w2);
@@ -7700,7 +8229,7 @@ template<typename W> struct bit_COUNT
  /// Bit COUNT AND functor
  template<typename W> struct bit_COUNT_AND
  {
-    W operator()(W w1, W w2) 
+    W operator()(W w1, W w2) BMNOEXCEPT
      {
          W r = 0;
          BM_INCWORD_BITCOUNT(r, w1 & w2);
@@ -7711,7 +8240,7 @@ template<typename W> struct bit_COUNT_AND
  /// Bit COUNT XOR functor
  template<typename W> struct bit_COUNT_XOR
  {
-    W operator()(W w1, W w2) 
+    W operator()(W w1, W w2) BMNOEXCEPT
      {
          W r = 0;
          BM_INCWORD_BITCOUNT(r, w1 ^ w2);
@@ -7722,7 +8251,7 @@ template<typename W> struct bit_COUNT_XOR
  /// Bit COUNT OR functor
  template<typename W> struct bit_COUNT_OR
  {
-    W operator()(W w1, W w2) 
+    W operator()(W w1, W w2) BMNOEXCEPT
      {
          W r = 0;
          BM_INCWORD_BITCOUNT(r, w1 | w2);
@@ -7734,7 +8263,7 @@ template<typename W> struct bit_COUNT_OR
  /// Bit COUNT SUB AB functor
  template<typename W> struct bit_COUNT_SUB_AB
  {
-    W operator()(W w1, W w2) 
+    W operator()(W w1, W w2) BMNOEXCEPT
      {
          W r = 0;
          BM_INCWORD_BITCOUNT(r, w1 & (~w2));
@@ -7745,7 +8274,7 @@ template<typename W> struct bit_COUNT_SUB_AB
  /// Bit SUB BA functor
  template<typename W> struct bit_COUNT_SUB_BA
  {
-    W operator()(W w1, W w2) 
+    W operator()(W w1, W w2) BMNOEXCEPT
      {
          W r = 0;
          BM_INCWORD_BITCOUNT(r, w2 & (~w1));
@@ -7756,7 +8285,7 @@ template<typename W> struct bit_COUNT_SUB_BA
  /// Bit COUNT A functor
  template<typename W> struct bit_COUNT_A
  {
-    W operator()(W w1, W )
+    W operator()(W w1, W ) BMNOEXCEPT
      {
          W r = 0;
          BM_INCWORD_BITCOUNT(r, w1);
@@ -7767,7 +8296,7 @@ template<typename W> struct bit_COUNT_A
  /// Bit COUNT B functor
  template<typename W> struct bit_COUNT_B
  {
-    W operator()(W, W w2)
+    W operator()(W, W w2) BMNOEXCEPT
      {
          W r = 0;
          BM_INCWORD_BITCOUNT(r, w2);
@@ -7858,8 +8387,11 @@ operation_functions<T>::bit_op_count_table_[bm::set_END] = {
      0,                            // set_COUNT_B
  };
  
-
-const unsigned short set_bitscan_wave_size = 2;
+/**
+    Size of bit decode wave in words
+    @internal
+ */
+const unsigned short set_bitscan_wave_size = 4;
  /*!
      \brief Unpacks word wave (Nx 32-bit words)
      \param w_ptr - pointer on wave start
@@ -7870,10 +8402,12 @@ const unsigned short set_bitscan_wave_size = 2;
      @internal
  */
  inline
-unsigned short bitscan_wave(const bm::word_t* w_ptr, unsigned char* bits)
+unsigned short
+bitscan_wave(const bm::word_t* BMRESTRICT w_ptr,
+             unsigned char* BMRESTRICT bits) BMNOEXCEPT
  {
      bm::word_t w0, w1;
-    unsigned short cnt0;
+    unsigned int cnt0;
  
      w0 = w_ptr[0];
      w1 = w_ptr[1];
@@ -7881,15 +8415,23 @@ unsigned short bitscan_wave(const bm::word_t* w_ptr, unsigned char* bits)
  #if defined(BMAVX512OPT) || defined(BMAVX2OPT) || defined(BMSSE42OPT)
      // combine into 64-bit word and scan (when HW popcnt64 is available)
      bm::id64_t w = (bm::id64_t(w1) << 32) | w0;
-    cnt0 = (unsigned short) bm::bitscan_popcnt64(w, bits);
+    cnt0 = bm::bitscan_popcnt64(w, bits);
+
+    w0 = w_ptr[2];
+    w1 = w_ptr[3];
+    w = (bm::id64_t(w1) << 32) | w0;
+    cnt0 += bm::bitscan_popcnt64(w, bits + cnt0, 64);
  #else
-    unsigned short cnt1;
      // decode wave as two 32-bit bitscan decodes
-    cnt0 = w0 ? bm::bitscan_popcnt(w0, bits) : 0;
-    cnt1 = w1 ? bm::bitscan_popcnt(w1, bits + cnt0, 32) : 0;
-    cnt0 = (unsigned short)(cnt0 + cnt1);
+    cnt0 = bm::bitscan_popcnt(w0, bits);
+    cnt0 += bm::bitscan_popcnt(w1, bits + cnt0, 32);
+
+    w0 = w_ptr[2];
+    w1 = w_ptr[3];
+    cnt0 += bm::bitscan_popcnt(w0, bits + cnt0, 64);
+    cnt0 += bm::bitscan_popcnt(w1, bits + cnt0, 64+32);
  #endif
-    return cnt0;
+    return static_cast<unsigned short>(cnt0);
  }
  
  #if defined (BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
@@ -7899,9 +8441,11 @@ unsigned short bitscan_wave(const bm::word_t* w_ptr, unsigned char* bits)
      @internal
  */
  inline
-void bit_block_gather_scatter(unsigned* arr, const bm::word_t* blk,
-                              const unsigned* idx, unsigned size, unsigned start,
-                              unsigned bit_idx)
+void bit_block_gather_scatter(unsigned* BMRESTRICT arr,
+                              const bm::word_t* BMRESTRICT blk,
+                              const unsigned* BMRESTRICT idx,
+                              unsigned size, unsigned start,
+                              unsigned bit_idx) BMNOEXCEPT
  {
  typedef unsigned TRGW;
  typedef unsigned IDX;
@@ -7930,8 +8474,10 @@ typedef unsigned IDX;
      @internal
  */
  template<typename TRGW, typename IDX, typename SZ>
-void bit_block_gather_scatter(TRGW* arr, const bm::word_t* blk,
-                              const IDX* idx, SZ size, SZ start, unsigned bit_idx)
+void bit_block_gather_scatter(TRGW* BMRESTRICT arr,
+                              const bm::word_t* BMRESTRICT blk,
+                              const IDX* BMRESTRICT idx,
+                              SZ size, SZ start, unsigned bit_idx) BMNOEXCEPT
  {
      // TODO: SIMD for 64-bit index sizes and 64-bit target value size
      //
@@ -7943,15 +8489,17 @@ void bit_block_gather_scatter(TRGW* arr, const bm::word_t* blk,
      {
          const SZ base = start + k;
          const unsigned nbitA = unsigned(idx[base] & bm::set_block_mask);
-        arr[base]   |= (TRGW(bool(blk[nbitA >> bm::set_word_shift] & (mask1 << (nbitA & bm::set_word_mask)))) << bit_idx);
+        arr[base]  |= (TRGW(bool(blk[nbitA >> bm::set_word_shift] &
+                       (mask1 << (nbitA & bm::set_word_mask)))) << bit_idx);
          const unsigned nbitB = unsigned(idx[base + 1] & bm::set_block_mask);
-        arr[base+1] |= (TRGW(bool(blk[nbitB >> bm::set_word_shift] & (mask1 << (nbitB & bm::set_word_mask)))) << bit_idx);
+        arr[base+1] |= (TRGW(bool(blk[nbitB >> bm::set_word_shift] &
+                        (mask1 << (nbitB & bm::set_word_mask)))) << bit_idx);
      } // for k
-
      for (; k < len; ++k)
      {
          unsigned nbit = unsigned(idx[start + k] & bm::set_block_mask);
-        arr[start + k] |= (TRGW(bool(blk[nbit >> bm::set_word_shift] & (mask1 << (nbit & bm::set_word_mask)))) << bit_idx);
+        arr[start + k] |= (TRGW(bool(blk[nbit >> bm::set_word_shift] &
+                         (mask1 << (nbit & bm::set_word_mask)))) << bit_idx);
      } // for k
  }
  
@@ -7968,7 +8516,8 @@ void bit_block_gather_scatter(TRGW* arr, const bm::word_t* blk,
      @internal
  */
  inline
-bm::id64_t idx_arr_block_lookup_u64(const bm::id64_t* idx, bm::id64_t size, bm::id64_t nb, bm::id64_t start)
+bm::id64_t idx_arr_block_lookup_u64(const bm::id64_t* idx,
+                bm::id64_t size, bm::id64_t nb, bm::id64_t start) BMNOEXCEPT
  {
      BM_ASSERT(idx);
      BM_ASSERT(start < size);
@@ -7993,7 +8542,8 @@ bm::id64_t idx_arr_block_lookup_u64(const bm::id64_t* idx, bm::id64_t size, bm::
      @internal
  */
  inline
-unsigned idx_arr_block_lookup_u32(const unsigned* idx, unsigned size, unsigned nb, unsigned start)
+unsigned idx_arr_block_lookup_u32(const unsigned* idx,
+                unsigned size, unsigned nb, unsigned start) BMNOEXCEPT
  {
      BM_ASSERT(idx);
      BM_ASSERT(start < size);
@@ -8027,7 +8577,7 @@ unsigned idx_arr_block_lookup_u32(const unsigned* idx, unsigned size, unsigned n
  inline
  void set_block_bits_u64(bm::word_t* BMRESTRICT block,
                          const bm::id64_t* BMRESTRICT idx,
-                        bm::id64_t start, bm::id64_t stop)
+                        bm::id64_t start, bm::id64_t stop) BMNOEXCEPT
  {
      // TODO: SIMD for 64-bit mode
      for (bm::id64_t i = start; i < stop; ++i)
@@ -8036,8 +8586,7 @@ void set_block_bits_u64(bm::word_t* BMRESTRICT block,
          unsigned nbit = unsigned(n & bm::set_block_mask);
          unsigned nword  = nbit >> bm::set_word_shift;
          nbit &= bm::set_word_mask;
-        bm::word_t mask = (1u << nbit);
-        block[nword] |= mask;
+        block[nword] |= (1u << nbit);
      } // for i
  }
  
@@ -8058,7 +8607,7 @@ void set_block_bits_u64(bm::word_t* BMRESTRICT block,
  inline
  void set_block_bits_u32(bm::word_t* BMRESTRICT block,
                          const unsigned* BMRESTRICT idx,
-                        unsigned start, unsigned stop )
+                        unsigned start, unsigned stop ) BMNOEXCEPT
  {
  #if defined(VECT_SET_BLOCK_BITS)
      VECT_SET_BLOCK_BITS(block, idx, start, stop);
@@ -8069,8 +8618,7 @@ void set_block_bits_u32(bm::word_t* BMRESTRICT block,
          unsigned nbit = unsigned(n & bm::set_block_mask);
          unsigned nword  = nbit >> bm::set_word_shift;
          nbit &= bm::set_word_mask;
-        bm::word_t mask = (1u << nbit);
-        block[nword] |= mask;
+        block[nword] |= (1u << nbit);
      } // for i
  #endif
  }
@@ -8084,7 +8632,8 @@ void set_block_bits_u32(bm::word_t* BMRESTRICT block,
      @internal
  */
  inline
-bool block_ptr_array_range(bm::word_t** arr, unsigned& left, unsigned& right)
+bool block_ptr_array_range(bm::word_t** arr,
+                           unsigned& left, unsigned& right) BMNOEXCEPT
  {
      BM_ASSERT(arr);
      
@@ -8119,7 +8668,7 @@ bool block_ptr_array_range(bm::word_t** arr, unsigned& left, unsigned& right)
  */
  inline
  unsigned lower_bound_linear_u32(const unsigned* arr,  unsigned target,
-                                unsigned        from, unsigned to)
+                                unsigned        from, unsigned to) BMNOEXCEPT
  {
      BM_ASSERT(arr);
      BM_ASSERT(from <= to);
@@ -8141,8 +8690,9 @@ unsigned lower_bound_linear_u32(const unsigned* arr,  unsigned target,
      @internal
  */
  inline
-unsigned lower_bound_linear_u64(const unsigned long long* arr, unsigned long long target,
-                                unsigned        from, unsigned to)
+unsigned lower_bound_linear_u64(const unsigned long long* arr,
+                                unsigned long long target,
+                                unsigned from, unsigned to) BMNOEXCEPT
  {
      BM_ASSERT(arr);
      BM_ASSERT(from <= to);
@@ -8166,7 +8716,7 @@ unsigned lower_bound_linear_u64(const unsigned long long* arr, unsigned long lon
  */
  inline
  unsigned lower_bound_u32(const unsigned* arr,  unsigned target,
-                         unsigned        from, unsigned to)
+                         unsigned        from, unsigned to) BMNOEXCEPT
  {
      BM_ASSERT(arr);
      BM_ASSERT(from <= to);
@@ -8200,8 +8750,9 @@ unsigned lower_bound_u32(const unsigned* arr,  unsigned target,
      @internal
  */
  inline
-unsigned lower_bound_u64(const unsigned long long* arr, unsigned long long target,
-                         unsigned        from, unsigned to)
+unsigned lower_bound_u64(const unsigned long long* arr,
+                         unsigned long long target,
+                         unsigned from, unsigned to) BMNOEXCEPT
  {
      BM_ASSERT(arr);
      BM_ASSERT(from <= to);
@@ -8238,7 +8789,8 @@ unsigned lower_bound_u64(const unsigned long long* arr, unsigned long long targe
  */
  #ifdef BM64ADDR
  inline
-bm::id64_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx)
+bm::id64_t block_to_global_index(unsigned i, unsigned j,
+                                 unsigned block_idx) BMNOEXCEPT
  {
      bm::id64_t base_idx = bm::id64_t(i) * bm::set_sub_array_size * bm::gap_max_bits;
      base_idx += j * bm::gap_max_bits;
@@ -8246,7 +8798,8 @@ bm::id64_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx)
  }
  #else
  inline
-bm::id_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx)
+bm::id_t block_to_global_index(unsigned i, unsigned j,
+                               unsigned block_idx) BMNOEXCEPT
  {
      unsigned base_idx = i * bm::set_sub_array_size * bm::gap_max_bits;
      base_idx += j * bm::gap_max_bits;
@@ -8274,7 +8827,7 @@ union ptr_payload_t
      @internal
  */
  inline
-bm::id64_t ptrp_test(ptr_payload_t ptr, bm::gap_word_t v)
+bm::id64_t ptrp_test(ptr_payload_t ptr, bm::gap_word_t v) BMNOEXCEPT
  {
      if (v == 0)
      {
diff --git a/c++/include/util/bitset/bmgamma.h b/c++/include/util/bitset/bmgamma.h

index 1b632e9eaaccab298cb269bd3a2a0b50f380b5ba..641123c2ce774f8a43d18b92b97efd946eac4af4 100644 (file)
--- a/c++/include/util/bitset/bmgamma.h
+++ b/c++/include/util/bitset/bmgamma.h
@@ -42,25 +42,25 @@ template<typename T, typename TBitIO>
  class gamma_decoder
  {
  public:
-    gamma_decoder(TBitIO& bin) : bin_(bin) 
+    gamma_decoder(TBitIO& bin) BMNOEXEPT : bin_(bin)
      {}
      
      /**
          Start encoding sequence
      */
-    void start()
+    void start() BMNOEXEPT
      {}
      
      /**
          Stop decoding sequence
      */
-    void stop()
+    void stop() BMNOEXEPT
      {}
      
      /**
          Decode word
      */
-    T operator()(void)
+    T operator()(void) BMNOEXEPT
      {
          unsigned l = bin_.eat_zero_bits();
          bin_.get_bit(); // get border bit
diff --git a/c++/include/util/bitset/bmintervals.h b/c++/include/util/bitset/bmintervals.h

new file mode 100644 (file)

index 0000000..7ef9804
--- /dev/null
+++ b/c++/include/util/bitset/bmintervals.h
@@ -0,0 +1,781 @@
+#ifndef BMINTERVALS__H__INCLUDED__
+#define BMINTERVALS__H__INCLUDED__
+
+/*
+Copyright(c) 2002-2020 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+For more information please visit:  http://bitmagic.io
+*/
+/*! \file bmintervals.h
+    \brief Algorithms for bit ranges and intervals
+*/
+
+#ifndef BM__H__INCLUDED__
+// BitMagic utility headers do not include main "bm.h" declaration
+// #include "bm.h" or "bm64.h" explicitly
+# error missing include (bm.h or bm64.h)
+#endif
+
+#include "bmdef.h"
+
+/** \defgroup bvintervals Algorithms for bit intervals
+    Algorithms and iterators for bit ranges and intervals
+    @ingroup bvector
+ */
+
+
+namespace bm
+{
+
+/*!
+    \brief forward iterator class to traverse bit-vector as ranges
+
+    Traverse enumerator for forward walking bit-vector as intervals:
+    series of consequtive 1111s flanked with zeroes.
+    Enumerator can traverse the whole bit-vector or jump(go_to) to position.
+
+   \ingroup bvintervals
+*/
+template<typename BV>
+class interval_enumerator
+{
+public:
+#ifndef BM_NO_STL
+        typedef std::input_iterator_tag  iterator_category;
+#endif
+        typedef BV                                         bvector_type;
+        typedef typename bvector_type::size_type           size_type;
+        typedef typename bvector_type::allocator_type      allocator_type;
+        typedef bm::byte_buffer<allocator_type>            buffer_type;
+        typedef bm::pair<size_type, size_type>             pair_type;
+
+public:
+    /*! @name Construction and assignment */
+    //@{
+
+    interval_enumerator()
+        : bv_(0), interval_(bm::id_max, bm::id_max), gap_ptr_(0)
+    {}
+
+    /**
+        Construct enumerator for the bit-vector
+    */
+    interval_enumerator(const BV& bv)
+        : bv_(&bv), interval_(bm::id_max, bm::id_max), gap_ptr_(0)
+    {
+        go_to_impl(0, false);
+    }
+
+    /**
+        Construct enumerator for the specified position
+        @param bv - source bit-vector
+        @param start_pos - position on bit-vector to search for interval
+        @param extend_start - flag to extend interval start to the start if
+            true start happenes to be less than start_pos
+        @sa go_to
+    */
+    interval_enumerator(const BV& bv, size_type start_pos, bool extend_start)
+        : bv_(&bv), interval_(bm::id_max, bm::id_max), gap_ptr_(0)
+    {
+        go_to_impl(start_pos, extend_start);
+    }
+
+    /**
+        Copy constructor
+    */
+    interval_enumerator(const interval_enumerator<BV>& ien)
+        : bv_(ien.bv_), interval_(bm::id_max, bm::id_max), gap_ptr_(0)
+    {
+        go_to_impl(ien.start(), false);
+    }
+
+    /**
+        Assignment operator
+    */
+    interval_enumerator& operator=(const interval_enumerator<BV>& ien)
+    {
+        bv_ = ien.bv_; gap_ptr_ = 0;
+        go_to_impl(ien.start(), false);
+    }
+
+#ifndef BM_NO_CXX11
+    /** move-ctor */
+    interval_enumerator(interval_enumerator<BV>&& ien) BMNOEXCEPT
+        : bv_(0), interval_(bm::id_max, bm::id_max), gap_ptr_(0)
+    {
+        this->swap(ien);
+    }
+
+    /** move assignmment operator */
+    interval_enumerator<BV>& operator=(interval_enumerator<BV>&& ien) BMNOEXCEPT
+    {
+        if (this != &ien)
+            this->swap(ien);
+        return *this;
+    }
+#endif
+
+    //@}
+
+
+    // -----------------------------------------------------------------
+
+    /*! @name Comparison methods all use start position to compare  */
+    //@{
+
+    bool operator==(const interval_enumerator<BV>& ien) const BMNOEXCEPT
+                            { return (start() == ien.start()); }
+    bool operator!=(const interval_enumerator<BV>& ien) const BMNOEXCEPT
+                            { return (start() != ien.start()); }
+    bool operator < (const interval_enumerator<BV>& ien) const BMNOEXCEPT
+                            { return (start() < ien.start()); }
+    bool operator <= (const interval_enumerator<BV>& ien) const BMNOEXCEPT
+                            { return (start() <= ien.start()); }
+    bool operator > (const interval_enumerator<BV>& ien) const BMNOEXCEPT
+                            { return (start() > ien.start()); }
+    bool operator >= (const interval_enumerator<BV>& ien) const BMNOEXCEPT
+                            { return (start() >= ien.start()); }
+    //@}
+
+
+    /// Return interval start/left as bit-vector coordinate 011110 [left..right]
+    size_type start() const BMNOEXCEPT;
+    /// Return interval end/right as bit-vector coordinate 011110 [left..right]
+    size_type end() const BMNOEXCEPT;
+
+    const pair_type& operator*() const BMNOEXCEPT { return interval_; }
+
+    /// Get interval pair
+    const pair_type& get() const BMNOEXCEPT { return interval_; }
+
+    /// Returns true if enumerator is valid (false if traversal is done)
+    bool valid() const BMNOEXCEPT;
+
+    // -----------------------------------------------------------------
+
+    /*! @name enumerator positioning  */
+    //@{
+
+    /*!
+        @brief Go to inetrval at specified position
+        Jump to position with interval. If interval is not available at
+        the specified position (o bit) enumerator will find the next interval.
+        If interval is present we have an option to find interval start [left..]
+        and set enumerator from the effective start coodrinate
+
+        @param pos - position on bit-vector
+        @param extend_start - find effective start if it is less than the
+                              go to position
+        @return true if enumerator remains valid after the jump
+    */
+    bool go_to(size_type pos, bool extend_start = true);
+
+    /*! Advance to the next interval
+        @return true if interval is available
+        @sa valid
+    */
+    bool advance();
+
+    /*! \brief Advance enumerator forward to the next available bit */
+    interval_enumerator<BV>& operator++() BMNOEXCEPT
+        { advance(); return *this; }
+
+    /*! \brief Advance enumerator forward to the next available bit */
+    interval_enumerator<BV> operator++(int) BMNOEXCEPT
+    {
+        interval_enumerator<BV> tmp = *this;
+        advance();
+        return tmp;
+    }
+    //@}
+
+    /**
+        swap enumerator with another one
+    */
+    void swap(interval_enumerator<BV>& ien) BMNOEXCEPT;
+
+protected:
+    typedef typename bvector_type::block_idx_type       block_idx_type;
+    typedef typename bvector_type::allocator_type       bv_allocator_type;
+    typedef bm::heap_vector<unsigned short, bv_allocator_type, true>
+                                                    gap_vector_type;
+
+
+    bool go_to_impl(size_type pos, bool extend_start);
+
+    /// Turn FSM into invalid state (out of range)
+    void invalidate() BMNOEXCEPT;
+
+private:
+    const BV*                  bv_;      ///!< bit-vector for traversal
+    gap_vector_type            gap_buf_; ///!< GAP buf.vector for bit-block
+    pair_type                  interval_; ///! current inetrval
+    const bm::gap_word_t*      gap_ptr_; ///!< current pointer in GAP block
+};
+
+//----------------------------------------------------------------------------
+
+/*!
+    \brief Returns true if range is all 1s flanked with 0s
+    Function performs the test on a closed range [left, right]
+    true interval is all 1s AND test(left-1)==false AND test(right+1)==false
+    Examples:
+        01110 [1,3] - true
+        11110 [0,3] - true
+        11110 [1,3] - false
+    \param bv   - bit-vector for check
+   \param left - index of first bit start checking
+   \param right - index of last bit
+   \return true/false
+
+   \ingroup bvintervals
+
+   @sa is_all_one_range
+*/
+template<class BV>
+bool is_interval(const BV& bv,
+                 typename BV::size_type left,
+                 typename BV::size_type right) BMNOEXCEPT
+{
+    typedef typename BV::block_idx_type block_idx_type;
+
+    const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
+
+    if (!bman.is_init())
+        return false; // nothing to do
+
+    if (right < left)
+        bm::xor_swap(left, right);
+    if (left == bm::id_max) // out of range
+        return false;
+    if (right == bm::id_max)
+        --right;
+
+    block_idx_type nblock_left = (left >> bm::set_block_shift);
+    block_idx_type nblock_right = (right >> bm::set_block_shift);
+
+    if (nblock_left == nblock_right) // same block (fast case)
+    {
+        unsigned nbit_left = unsigned(left  & bm::set_block_mask);
+        unsigned nbit_right = unsigned(right  & bm::set_block_mask);
+        if ((nbit_left > 0) && (nbit_right < bm::gap_max_bits-1))
+        {
+            unsigned i0, j0;
+            bm::get_block_coord(nblock_left, i0, j0);
+            const bm::word_t* block = bman.get_block_ptr(i0, j0);
+            bool b = bm::block_is_interval(block, nbit_left, nbit_right);
+            return b;
+        }
+    }
+    bool is_left, is_right, is_all_one;
+    is_left = left > 0 ? bv.test(left-1) : false;
+    if (is_left == false)
+    {
+        is_right = (right < (bm::id_max - 1)) ? bv.test(right + 1) : false;
+        if (is_left == false && is_right == false)
+        {
+            is_all_one = bv.is_all_one_range(left, right);
+            return is_all_one;
+        }
+    }
+    return false;
+}
+
+
+//----------------------------------------------------------------------------
+
+/*!
+
+    \brief Reverse find index of first 1 bit gap (01110) starting from position
+    Reverse scan for the first 1 in a block of continious 1s.
+    Method employs closed interval semantics: 0[pos..from]
+
+    \param bv   - bit-vector for search
+    \param from - position to start reverse search from
+    \param pos - [out] index of the found first 1 bit in a gap of bits
+    \return true if search returned result, false if not found
+           (start point is zero)
+
+    \sa is_interval, find_interval_end
+    \ingroup bvintervals
+*/
+template<class BV>
+bool find_interval_start(const BV& bv,
+                         typename BV::size_type from,
+                         typename BV::size_type& pos)  BMNOEXCEPT
+{
+    typedef typename BV::size_type size_type;
+    typedef typename BV::block_idx_type block_idx_type;
+
+    const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
+
+    if (!bman.is_init())
+        return false; // nothing to do
+    if (!from)
+    {
+        pos = from;
+        return bv.test(from);
+    }
+
+    block_idx_type nb = (from >> bm::set_block_shift);
+    unsigned i0, j0;
+    bm::get_block_coord(nb, i0, j0);
+
+    size_type base_idx;
+    unsigned found_nbit;
+
+    const bm::word_t* block = bman.get_block_ptr(i0, j0);
+    if (!block)
+        return false;
+    unsigned nbit = unsigned(from & bm::set_block_mask);
+    unsigned res = bm::block_find_interval_start(block, nbit, &found_nbit);
+
+    switch (res)
+    {
+    case 0: // not interval
+        return false;
+    case 1: // interval found
+        pos = found_nbit + (nb * bm::gap_max_bits);
+        return true;
+    case 2: // keep scanning
+        base_idx = bm::get_block_start<size_type>(i0, j0);
+        pos = base_idx + found_nbit;
+        if (!nb)
+            return true;
+        break;
+    default:
+        BM_ASSERT(0);
+    } // switch
+
+    --nb;
+    bm::get_block_coord(nb, i0, j0);
+    bm::word_t*** blk_root = bman.top_blocks_root();
+
+    for (unsigned i = i0; true; --i)
+    {
+        bm::word_t** blk_blk = blk_root[i];
+        if (!blk_blk)
+            return true;
+        if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+        {
+            pos = bm::get_super_block_start<size_type>(i);
+            if (!i)
+                break;
+            continue;
+        }
+        unsigned j = (i == i0) ? j0 : 255;
+        for (; true; --j)
+        {
+            if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+            {
+                pos = bm::get_block_start<size_type>(i, j);
+                goto loop_j_end; // continue
+            }
+
+            block = blk_blk[j];
+            if (!block)
+                return true;
+
+            res = bm::block_find_interval_start(block,
+                                            bm::gap_max_bits-1, &found_nbit);
+            switch (res)
+            {
+            case 0: // not interval (but it was the interval, so last result
+                return true;
+            case 1: // interval found
+                base_idx = bm::get_block_start<size_type>(i, j);
+                pos = base_idx + found_nbit;
+                return true;
+            case 2: // keep scanning
+                pos = bm::get_block_start<size_type>(i, j);
+                break;
+            default:
+                BM_ASSERT(0);
+            } // switch
+
+            loop_j_end: // continue point
+            if (!j)
+                break;
+        } // for j
+
+        if (!i)
+            break;
+    } // for i
+
+    return true;
+}
+
+
+//----------------------------------------------------------------------------
+
+/*!
+   \brief Reverse find index of first 1 bit gap (01110) starting from position
+   Reverse scan for the first 1 in a block of continious 1s.
+   Method employs closed interval semantics: 0[pos..from]
+
+   \param bv   - bit-vector for search
+   \param from - position to start reverse search from
+   \param pos - [out] index of the found first 1 bit in a gap of bits
+   \return true if search returned result, false if not found
+           (start point is zero)
+
+   \sa is_interval, find_interval_end
+    \ingroup bvintervals
+*/
+template <typename BV>
+bool find_interval_end(const BV& bv,
+                       typename BV::size_type from,
+                       typename BV::size_type & pos)  BMNOEXCEPT
+{
+    typedef typename BV::block_idx_type block_idx_type;
+
+    if (from == bm::id_max)
+        return false;
+    const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
+
+    if (!bman.is_init())
+        return false; // nothing to do
+    if (from == bm::id_max-1)
+    {
+        pos = from;
+        return bv.test(from);
+    }
+
+    block_idx_type nb = (from >> bm::set_block_shift);
+    unsigned i0, j0;
+    bm::get_block_coord(nb, i0, j0);
+
+    unsigned found_nbit;
+
+    const bm::word_t* block = bman.get_block_ptr(i0, j0);
+    if (!block)
+        return false;
+    unsigned nbit = unsigned(from & bm::set_block_mask);
+    unsigned res = bm::block_find_interval_end(block, nbit, &found_nbit);
+    switch (res)
+    {
+    case 0: // not interval
+        return false;
+    case 1: // interval found
+        pos = found_nbit + (nb * bm::gap_max_bits);
+        return true;
+    case 2: // keep scanning
+        pos = found_nbit + (nb * bm::gap_max_bits);
+        break;
+    default:
+        BM_ASSERT(0);
+    } // switch
+
+    block_idx_type nblock_right = (bm::id_max >> bm::set_block_shift);
+    unsigned i_from, j_from, i_to, j_to;
+    bm::get_block_coord(nblock_right, i_to, j_to);
+    block_idx_type top_size = bman.top_block_size();
+    if (i_to >= top_size)
+        i_to = unsigned(top_size-1);
+
+    ++nb;
+    bm::word_t*** blk_root = bman.top_blocks_root();
+    bm::get_block_coord(nb, i_from, j_from);
+
+    for (unsigned i = i_from; i <= i_to; ++i)
+    {
+        bm::word_t** blk_blk = blk_root[i];
+        if (!blk_blk)
+            return true;
+        if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+        {
+            if (i > i_from)
+            {
+                pos += bm::gap_max_bits * bm::set_sub_array_size;
+                continue;
+            }
+            else
+            {
+                // TODO: optimization to avoid scanning rest of the super block
+            }
+        }
+
+        unsigned j = (i == i_from) ? j_from : 0;
+        do
+        {
+            if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+            {
+                pos += bm::gap_max_bits;
+                continue;
+            }
+
+            block = blk_blk[j];
+            if (!block)
+                return true;
+
+            res = bm::block_find_interval_end(block, 0, &found_nbit);
+            switch (res)
+            {
+            case 0: // not interval (but it was the interval, so last result
+                return true;
+            case 1: // interval found
+                pos += found_nbit+1;
+                return true;
+            case 2: // keep scanning
+                pos += bm::gap_max_bits;
+                break;
+            default:
+                BM_ASSERT(0);
+            } // switch
+        } while (++j < bm::set_sub_array_size);
+    } // for i
+
+    return true;
+}
+
+
+
+//----------------------------------------------------------------------------
+//
+//----------------------------------------------------------------------------
+
+template<typename BV>
+typename interval_enumerator<BV>::size_type
+interval_enumerator<BV>::start() const BMNOEXCEPT
+{
+    return interval_.first;
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+typename interval_enumerator<BV>::size_type
+interval_enumerator<BV>::end() const BMNOEXCEPT
+{
+    return interval_.second;
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+bool interval_enumerator<BV>::valid() const BMNOEXCEPT
+{
+    return (interval_.first != bm::id_max);
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+void interval_enumerator<BV>::invalidate() BMNOEXCEPT
+{
+    interval_.first = interval_.second = bm::id_max;
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+bool interval_enumerator<BV>::go_to(size_type pos, bool extend_start)
+{
+    return go_to_impl(pos, extend_start);
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+bool interval_enumerator<BV>::go_to_impl(size_type pos, bool extend_start)
+{
+    if (!bv_ || !bv_->is_init() || (pos >= bm::id_max))
+    {
+        invalidate();
+        return false;
+    }
+
+    bool found;
+    size_type start_pos;
+
+    // go to prolog: identify the true interval start position
+    //
+    if (extend_start)
+    {
+        found = bm::find_interval_start(*bv_, pos, start_pos);
+        if (!found)
+        {
+            found = bv_->find(pos, start_pos);
+            if (!found)
+            {
+                invalidate();
+                return false;
+            }
+        }
+    }
+    else
+    {
+        found = bv_->find(pos, start_pos);
+        if (!found)
+        {
+            invalidate();
+            return false;
+        }
+    }
+
+    // start position established, start decoding from it
+    interval_.first = pos = start_pos;
+
+    block_idx_type nb = (pos >> bm::set_block_shift);
+    const typename BV::blocks_manager_type& bman = bv_->get_blocks_manager();
+    unsigned i0, j0;
+    bm::get_block_coord(nb, i0, j0);
+    const bm::word_t* block = bman.get_block_ptr(i0, j0);
+    BM_ASSERT(block);
+
+    if (block == FULL_BLOCK_FAKE_ADDR)
+    {
+        // super-long interval, find the end of it
+        found = bm::find_interval_end(*bv_, pos, interval_.second);
+        BM_ASSERT(found);
+        gap_ptr_ = 0;
+        return true;
+    }
+
+    if (BM_IS_GAP(block))
+    {
+        const bm::gap_word_t* BMRESTRICT gap_block = BMGAP_PTR(block);
+        unsigned nbit = unsigned(pos  & bm::set_block_mask);
+
+        unsigned is_set;
+        unsigned gap_pos = bm::gap_bfind(gap_block, nbit, &is_set);
+        BM_ASSERT(is_set);
+
+        interval_.second = (nb * bm::gap_max_bits) + gap_block[gap_pos];
+        if (gap_block[gap_pos] == bm::gap_max_bits-1)
+        {
+            // it is the end of the GAP block - run search
+            //
+            if (interval_.second == bm::id_max-1)
+            {
+                gap_ptr_ = 0;
+                return true;
+            }
+            found = bm::find_interval_end(*bv_, interval_.second + 1, start_pos);
+            if (found)
+                interval_.second = start_pos;
+            gap_ptr_ = 0;
+            return true;
+        }
+        gap_ptr_ = gap_block + gap_pos;
+        return true;
+    }
+
+    // bit-block: turn to GAP and position there
+    //
+    if (gap_buf_.size() == 0)
+    {
+        gap_buf_.resize(bm::gap_max_bits+64);
+    }
+    bm::gap_word_t* gap_tmp = gap_buf_.data();
+    unsigned len = bm::bit_to_gap(gap_tmp, block, bm::gap_max_bits+64);
+    BM_ASSERT(len);
+
+
+    size_type base_idx = (nb * bm::gap_max_bits);
+    for (unsigned i = 1; i <= len; ++i)
+    {
+        size_type gap_pos = base_idx + gap_tmp[i];
+        if (gap_pos >= pos)
+        {
+            if (gap_tmp[i] == bm::gap_max_bits - 1)
+            {
+                found = bm::find_interval_end(*bv_, gap_pos, interval_.second);
+                BM_ASSERT(found);
+                gap_ptr_ = 0;
+                return true;
+            }
+
+            gap_ptr_ = &gap_tmp[i];
+            interval_.second = gap_pos;
+            return true;
+        }
+        if (gap_tmp[i] == bm::gap_max_bits - 1)
+            break;
+    } // for
+
+    BM_ASSERT(0);
+
+    return false;
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+bool interval_enumerator<BV>::advance()
+{
+    BM_ASSERT(valid());
+
+    if (interval_.second == bm::id_max-1)
+    {
+        invalidate();
+        return false;
+    }
+    block_idx_type nb = (interval_.first >> bm::set_block_shift);
+
+    bool found;
+    if (gap_ptr_) // in GAP block
+    {
+        ++gap_ptr_; // 0 - GAP
+        if (*gap_ptr_ == bm::gap_max_bits-1) // GAP block end
+        {
+            return go_to_impl(((nb+1) * bm::gap_max_bits), false);
+        }
+        unsigned prev = *gap_ptr_;
+
+        ++gap_ptr_; // 1 - GAP
+        BM_ASSERT(*gap_ptr_ > prev);
+        interval_.first = (nb * bm::gap_max_bits) + prev + 1;
+        if (*gap_ptr_ == bm::gap_max_bits-1) // GAP block end
+        {
+            found = bm::find_interval_end(*bv_, interval_.first, interval_.second);
+            BM_ASSERT(found); (void)found;
+            gap_ptr_ = 0;
+            return true;
+        }
+        interval_.second = (nb * bm::gap_max_bits) + *gap_ptr_;
+        return true;
+    }
+    return go_to_impl(interval_.second + 1, false);
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+void interval_enumerator<BV>::swap(interval_enumerator<BV>& ien) BMNOEXCEPT
+{
+    const BV* bv_tmp = bv_;
+    bv_ = ien.bv_;
+    ien.bv_ = bv_tmp;
+
+    gap_buf_.swap(ien.gap_buf_);
+    bm::xor_swap(interval_.first, ien.interval_.first);
+    bm::xor_swap(interval_.second, ien.interval_.second);
+
+    const bm::gap_word_t* gap_tmp = gap_ptr_;
+    gap_ptr_ = ien.gap_ptr_;
+    ien.gap_ptr_ = gap_tmp;
+}
+
+//----------------------------------------------------------------------------
+//
+//----------------------------------------------------------------------------
+
+
+} // namespace bm
+
+#include "bmundef.h"
+
+#endif
diff --git a/c++/include/util/bitset/bmrandom.h b/c++/include/util/bitset/bmrandom.h

index aca0829d74ac06c50487b45212b7217602ed5007..3f0e23f1b2599400c36279e3e7c4242e7dc19636 100644 (file)
--- a/c++/include/util/bitset/bmrandom.h
+++ b/c++/include/util/bitset/bmrandom.h
@@ -97,7 +97,7 @@ private:
      unsigned process_word(bm::word_t*       blk_out, 
                            const bm::word_t* blk_src,
                            unsigned          nword,
-                          unsigned          take_count);
+                          unsigned          take_count) BMNOEXCEPT;
  
      static
      void get_random_array(bm::word_t*       blk_out, 
@@ -106,7 +106,7 @@ private:
                            unsigned          count);
      static
      unsigned compute_take_count(unsigned bc,
-                        size_type in_count, size_type sample_count);
+                size_type in_count, size_type sample_count) BMNOEXCEPT;
  
  
  private:
@@ -357,9 +357,10 @@ void random_subset<BV>::get_subset(BV&        bv_out,
  }
  
  template<class BV>
-unsigned random_subset<BV>::compute_take_count(unsigned bc,
-                                               size_type in_count,
-                                               size_type sample_count)
+unsigned random_subset<BV>::compute_take_count(
+                                    unsigned bc,
+                                    size_type in_count,
+                                    size_type sample_count) BMNOEXCEPT
  {
      float block_percent = float(bc) / float(in_count);
      float bits_to_take = float(sample_count) * block_percent;
@@ -404,7 +405,7 @@ void random_subset<BV>::get_block_subset(bm::word_t*       blk_out,
          }
          // now transform vacant bits to array, then pick random elements
          //
-        unsigned arr_len = bit_convert_to_arr(bit_list_, 
+        unsigned arr_len = bm::bit_convert_to_arr(bit_list_,
                                                sub_block_, 
                                                bm::gap_max_bits, 
                                                bm::gap_max_bits,
@@ -418,7 +419,7 @@ template<class BV>
  unsigned random_subset<BV>::process_word(bm::word_t*       blk_out, 
                                           const bm::word_t* blk_src,
                                           unsigned          nword,
-                                         unsigned          take_count)
+                                         unsigned          take_count) BMNOEXCEPT
  {
      unsigned new_bits, mask;
      do 
diff --git a/c++/include/util/bitset/bmrs.h b/c++/include/util/bitset/bmrs.h

index 14cce61e84cc80dffdf860f9d8eaebdc6f5ec7ad..08659062db1fef5951b48fc097d81b22740253e1 100644 (file)
--- a/c++/include/util/bitset/bmrs.h
+++ b/c++/include/util/bitset/bmrs.h
@@ -59,7 +59,7 @@ public:
      rs_index(const rs_index& rsi);
      
      /// init arrays to zeros
-    void init() BMNOEXEPT;
+    void init() BMNOEXCEPT;
  
      /// copy rs index
      void copy_from(const rs_index& rsi);
@@ -165,7 +165,7 @@ rs_index<BVAlloc>::rs_index(const rs_index<BVAlloc>& rsi)
  
  
  template<typename BVAlloc>
-void rs_index<BVAlloc>::init() BMNOEXEPT
+void rs_index<BVAlloc>::init() BMNOEXCEPT
  {
      sblock_count_.resize(0);
      sblock_row_idx_.resize(0);
diff --git a/c++/include/util/bitset/bmserial.h b/c++/include/util/bitset/bmserial.h

index ee65f1b05b9ca50d2e34f238c3d1911f16b38b3c..5ab728380df3ccb054d01a50175869c055911a19 100644 (file)
--- a/c++/include/util/bitset/bmserial.h
+++ b/c++/include/util/bitset/bmserial.h
@@ -75,12 +75,12 @@ template<class BV>
  class serializer
  {
  public:
-    typedef BV                                                bvector_type;
-    typedef typename bvector_type::allocator_type             allocator_type;
-    typedef typename bvector_type::blocks_manager_type        blocks_manager_type;
-    typedef typename bvector_type::statistics                 statistics_type;
-    typedef typename bvector_type::block_idx_type             block_idx_type;
-    typedef typename bvector_type::size_type                  size_type;
+    typedef BV                                              bvector_type;
+    typedef typename bvector_type::allocator_type           allocator_type;
+    typedef typename bvector_type::blocks_manager_type      blocks_manager_type;
+    typedef typename bvector_type::statistics               statistics_type;
+    typedef typename bvector_type::block_idx_type           block_idx_type;
+    typedef typename bvector_type::size_type                size_type;
  
      typedef byte_buffer<allocator_type>     buffer;
      typedef bm::bv_ref_vector<BV>           bv_ref_vector_type;
@@ -113,7 +113,7 @@ public:
          @param clevel - compression level (0-5)
          @sa get_compression_level
      */
-    void set_compression_level(unsigned clevel);
+    void set_compression_level(unsigned clevel) BMNOEXCEPT;
  
      /**
          Get compression level (0-5), Default 5 (recommended)
@@ -127,7 +127,8 @@ public:
          Recommended: use 3 or 5
  
      */
-    unsigned get_compression_level() const { return compression_level_; }
+    unsigned get_compression_level() const BMNOEXCEPT
+        { return compression_level_; }
  
  
      //@}
@@ -189,20 +190,21 @@ public:
          Return serialization counter vector
          @internal
      */
-    const size_type* get_compression_stat() const { return compression_stat_; }
+    const size_type* get_compression_stat() const BMNOEXCEPT
+                                    { return compression_stat_; }
      
      /**
          Set GAP length serialization (serializes GAP levels of the original vector)
                  
          @param value - when TRUE serialized vector includes GAP levels parameters
      */
-    void gap_length_serialization(bool value);
+    void gap_length_serialization(bool value) BMNOEXCEPT;
      
      /**
          Set byte-order serialization (for cross platform compatibility)
          @param value - TRUE serialization format includes byte-order marker
      */
-    void byte_order_serialization(bool value);
+    void byte_order_serialization(bool value) BMNOEXCEPT;
  
      /**
          Add skip-markers to serialization BLOB for faster range decode
@@ -214,7 +216,7 @@ public:
          smaller interval means more bookmarks added to the skip list thus
          more increasing the BLOB size
      */
-    void set_bookmarks(bool enable, unsigned bm_interval = 256);
+    void set_bookmarks(bool enable, unsigned bm_interval = 256) BMNOEXCEPT;
  
      /**
          Attach collection of reference vectors for XOR serialization
@@ -227,20 +229,21 @@ public:
          Set current index in rer.vector collection
          (not a row idx or plain idx)
      */
-    void set_curr_ref_idx(size_type ref_idx);
+    void set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT;
  
  
  protected:
      /**
          Encode serialization header information
      */
-    void encode_header(const BV& bv, bm::encoder& enc);
+    void encode_header(const BV& bv, bm::encoder& enc) BMNOEXCEPT;
      
      /*! Encode GAP block */
      void encode_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc);
  
      /*! Encode GAP block with Elias Gamma coder */
-    void gamma_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc);
+    void gamma_gap_block(const bm::gap_word_t* gap_block,
+                         bm::encoder&          enc) BMNOEXCEPT;
  
      /**
          Encode GAP block as delta-array with Elias Gamma coder
@@ -248,29 +251,30 @@ protected:
      void gamma_gap_array(const bm::gap_word_t* gap_block, 
                           unsigned              arr_len, 
                           bm::encoder&          enc,
-                         bool                  inverted = false);
+                         bool                  inverted = false) BMNOEXCEPT;
      
      /// Encode bit-block as an array of bits
      void encode_bit_array(const bm::word_t* block,
-                          bm::encoder& enc, bool inverted);
+                          bm::encoder& enc, bool inverted) BMNOEXCEPT;
      
      void gamma_gap_bit_block(const bm::word_t* block,
-                             bm::encoder&      enc);
+                             bm::encoder&      enc) BMNOEXCEPT;
      
      void gamma_arr_bit_block(const bm::word_t* block,
-                          bm::encoder& enc, bool inverted);
+                          bm::encoder& enc, bool inverted) BMNOEXCEPT;
  
      void bienc_arr_bit_block(const bm::word_t* block,
-                            bm::encoder& enc, bool inverted);
+                            bm::encoder& enc, bool inverted) BMNOEXCEPT;
  
      /// encode bit-block as interpolated bit block of gaps
-    void bienc_gap_bit_block(const bm::word_t* block, bm::encoder& enc);
+    void bienc_gap_bit_block(const bm::word_t* block,
+                             bm::encoder& enc) BMNOEXCEPT;
  
      void interpolated_arr_bit_block(const bm::word_t* block,
-                            bm::encoder& enc, bool inverted);
+                            bm::encoder& enc, bool inverted) BMNOEXCEPT;
      /// encode bit-block as interpolated gap block
      void interpolated_gap_bit_block(const bm::word_t* block,
-                                    bm::encoder&      enc);
+                                    bm::encoder&      enc) BMNOEXCEPT;
  
      /**
          Encode GAP block as an array with binary interpolated coder
@@ -278,29 +282,29 @@ protected:
      void interpolated_gap_array(const bm::gap_word_t* gap_block,
                                  unsigned              arr_len,
                                  bm::encoder&          enc,
-                                bool                  inverted);
+                                bool                  inverted) BMNOEXCEPT;
      void interpolated_gap_array_v0(const bm::gap_word_t* gap_block,
                                     unsigned              arr_len,
                                     bm::encoder&          enc,
-                                   bool                  inverted);
+                                   bool                  inverted) BMNOEXCEPT;
  
  
      /*! Encode GAP block with using binary interpolated encoder */
      void interpolated_encode_gap_block(
-                const bm::gap_word_t* gap_block, bm::encoder& enc);
+                const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT;
  
      /**
          Encode BIT block with repeatable runs of zeroes
      */
      void encode_bit_interval(const bm::word_t* blk, 
                               bm::encoder&      enc,
-                             unsigned          size_control);
+                             unsigned          size_control) BMNOEXCEPT;
      /**
          Encode bit-block using digest (hierarchical compression)
      */
      void encode_bit_digest(const bm::word_t*  blk,
-                             bm::encoder&     enc,
-                             bm::id64_t       d0);
+                           bm::encoder&       enc,
+                           bm::id64_t         d0) BMNOEXCEPT;
  
      /**
          Determine best representation for GAP block based
@@ -314,25 +318,26 @@ protected:
       
          @internal
      */
-    unsigned char find_gap_best_encoding(const bm::gap_word_t* gap_block);
+    unsigned char
+    find_gap_best_encoding(const bm::gap_word_t* gap_block) BMNOEXCEPT;
      
      /// Determine best representation for a bit-block
-    unsigned char find_bit_best_encoding(const bm::word_t* block);
+    unsigned char find_bit_best_encoding(const bm::word_t* block) BMNOEXCEPT;
  
      /// Determine best representation for a bit-block (level 5)
-    unsigned char find_bit_best_encoding_l5(const bm::word_t* block);
+    unsigned char find_bit_best_encoding_l5(const bm::word_t* block) BMNOEXCEPT;
  
      /// Reset all accumulated compression statistics
-    void reset_compression_stats();
+    void reset_compression_stats() BMNOEXCEPT;
      
-    void reset_models() { mod_size_ = 0; }
-    void add_model(unsigned char mod, unsigned score);
+    void reset_models() BMNOEXCEPT { mod_size_ = 0; }
+    void add_model(unsigned char mod, unsigned score) BMNOEXCEPT;
  protected:
  
      /// Bookmark state structure
      struct bookmark_state
      {
-        bookmark_state(block_idx_type nb_range)
+        bookmark_state(block_idx_type nb_range) BMNOEXCEPT
              : ptr_(0), nb_(0),
                nb_range_(nb_range), bm_type_(0)
          {
@@ -364,7 +369,7 @@ protected:
      */
      static
      void process_bookmark(block_idx_type nb, bookmark_state& bookm,
-                          bm::encoder& enc);
+                          bm::encoder&   enc) BMNOEXCEPT;
  
  private:
      serializer(const serializer&);
@@ -421,6 +426,8 @@ class deseriaizer_base
  protected:
      typedef DEC       decoder_type;
      typedef BLOCK_IDX block_idx_type;
+    typedef bm::bit_in<DEC> bit_in_type;
+
  protected:
      deseriaizer_base()
          : id_array_(0), bookmark_idx_(0), skip_offset_(0), skip_pos_(0)
@@ -440,29 +447,31 @@ protected:
                            bm::gap_word_t* dst_arr);
      
      /// Read binary interpolated list into a bit-set
-    void read_bic_arr(decoder_type&   decoder, bm::word_t* blk);
+    void read_bic_arr(decoder_type&   decoder, bm::word_t* blk) BMNOEXCEPT;
  
      /// Read binary interpolated gap blocks into a bitset
-    void read_bic_gap(decoder_type&   decoder, bm::word_t* blk);
+    void read_bic_gap(decoder_type&   decoder, bm::word_t* blk) BMNOEXCEPT;
  
      /// Read inverted binary interpolated list into a bit-set
-    void read_bic_arr_inv(decoder_type&   decoder, bm::word_t* blk);
+    void read_bic_arr_inv(decoder_type&   decoder, bm::word_t* blk) BMNOEXCEPT;
      
      /// Read digest0-type bit-block
-    void read_digest0_block(decoder_type& decoder, bm::word_t* blk);
+    void read_digest0_block(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT;
      
      
      /// read bit-block encoded as runs
      static
-    void read_0runs_block(decoder_type& decoder, bm::word_t* blk);
+    void read_0runs_block(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT;
      
      static
-    const char* err_msg() { return "BM::Invalid serialization format"; }
+    const char* err_msg() BMNOEXCEPT { return "BM::Invalid serialization format"; }
  
      /// Try to skip if skip bookmark is available within reach
      /// @return new block idx if skip went well
      ///
-    block_idx_type try_skip(decoder_type& decoder, block_idx_type nb, block_idx_type expect_nb);
+    block_idx_type try_skip(decoder_type&  decoder,
+                            block_idx_type nb,
+                            block_idx_type expect_nb) BMNOEXCEPT;
  
  protected:
      bm::gap_word_t*   id_array_; ///< ptr to idx array for temp decode use
@@ -519,7 +528,7 @@ public:
          is not guaranteed to be absent
          @sa unset_range()
      */
-    void set_range(size_type from, size_type to)
+    void set_range(size_type from, size_type to) BMNOEXCEPT
      {
          is_range_set_ = 1; idx_from_ = from; idx_to_ = to;
      }
@@ -528,7 +537,7 @@ public:
          Disable range deserialization
          @sa set_range()
      */
-    void unset_range() { is_range_set_ = 0; }
+    void unset_range() BMNOEXCEPT { is_range_set_ = 0; }
  
  protected:
     typedef typename BV::blocks_manager_type blocks_manager_type;
@@ -608,7 +617,7 @@ public:
      void set_range(size_type from, size_type to);
  
      /// disable range filtration
-    void unset_range() { is_range_set_ = false; }
+    void unset_range() BMNOEXCEPT { is_range_set_ = false; }
  
      size_type deserialize(bvector_type&         bv,
                            serial_iterator_type& sit,
@@ -639,7 +648,8 @@ private:
                                serial_iterator_type& sit,
                                set_operation         op);
      static
-    const char* err_msg() { return "BM::de-serialization format error"; }
+    const char* err_msg() BMNOEXCEPT
+                { return "BM::de-serialization format error"; }
  private:
      bool                       is_range_set_ = false;
      size_type                  nb_range_from_ = 0;
@@ -675,7 +685,7 @@ public:
      void next();
  
         /// skip all zero or all-one blocks
-       block_idx_type skip_mono_blocks();
+       block_idx_type skip_mono_blocks() BMNOEXCEPT;
  
      /// read bit block, using logical operation
      unsigned get_bit_block(bm::word_t*       dst_block, 
@@ -708,17 +718,17 @@ public:
      };
  
      /// Returns iterator internal state
-    iterator_state state() const { return this->state_; }
+    iterator_state state() const BMNOEXCEPT { return this->state_; }
  
-    iterator_state get_state() const { return this->state_; }
+    iterator_state get_state() const BMNOEXCEPT { return this->state_; }
      /// Number of ids in the inverted list (valid for e_list_ids)
-    unsigned get_id_count() const { return this->id_cnt_; }
+    unsigned get_id_count() const BMNOEXCEPT { return this->id_cnt_; }
  
      /// Get last id from the id list
-    bm::id_t get_id() const { return this->last_id_; }
+    bm::id_t get_id() const BMNOEXCEPT { return this->last_id_; }
  
      /// Get current block index 
-    block_idx_type block_idx() const { return this->block_idx_; }
+    block_idx_type block_idx() const BMNOEXCEPT { return this->block_idx_; }
  
  public:
      /// member function pointer for bitset-bitset get operations
@@ -761,19 +771,19 @@ public:
      /// (Converts inverted list into bits)
      /// Returns number of words (bits) being read
      unsigned get_arr_bit(bm::word_t* dst_block, 
-                         bool clear_target=true);
+                         bool clear_target=true) BMNOEXCEPT;
  
         /// Get current block type
-       unsigned get_block_type() const { return block_type_; }
+       unsigned get_block_type() const BMNOEXCEPT { return block_type_; }
  
-       unsigned get_bit();
+       unsigned get_bit() BMNOEXCEPT;
   
-    void get_inv_arr(bm::word_t* block);
+    void get_inv_arr(bm::word_t* block) BMNOEXCEPT;
  
      /// Try to skip if skip bookmark is available within reach
      /// @return true if skip went well
      ///
-    bool try_skip(block_idx_type nb, block_idx_type expect_nb)
+    bool try_skip(block_idx_type nb, block_idx_type expect_nb) BMNOEXCEPT
      {
          block_idx_type new_nb = parent_type::try_skip(decoder_, nb, expect_nb);
          if (new_nb)
@@ -1064,6 +1074,7 @@ serializer<BV>::serializer(bm::word_t*    temp_block)
    gap_serial_(false),
    byte_order_serial_(true),
    sb_bookmarks_(false),
+  sb_range_(0),
    compression_level_(bm::set_compression_default),
    ref_vect_(0),
    ref_idx_(0),
@@ -1097,7 +1108,7 @@ serializer<BV>::~serializer()
  
  
  template<class BV>
-void serializer<BV>::reset_compression_stats()
+void serializer<BV>::reset_compression_stats() BMNOEXCEPT
  {
      for (unsigned i = 0; i < 256; ++i)
          compression_stat_[i] = 0;
@@ -1105,30 +1116,30 @@ void serializer<BV>::reset_compression_stats()
  
  
  template<class BV>
-void serializer<BV>::set_compression_level(unsigned clevel)
+void serializer<BV>::set_compression_level(unsigned clevel) BMNOEXCEPT
  {
      if (clevel <= bm::set_compression_max)
          compression_level_ = clevel;
  }
  
  template<class BV>
-void serializer<BV>::gap_length_serialization(bool value)
+void serializer<BV>::gap_length_serialization(bool value) BMNOEXCEPT
  {
      gap_serial_ = value;
  }
  
  template<class BV>
-void serializer<BV>::byte_order_serialization(bool value)
+void serializer<BV>::byte_order_serialization(bool value) BMNOEXCEPT
  {
      byte_order_serial_ = value;
  }
  
  template<class BV>
-void serializer<BV>::set_bookmarks(bool enable, unsigned bm_interval)
+void serializer<BV>::set_bookmarks(bool enable, unsigned bm_interval) BMNOEXCEPT
  {
      sb_bookmarks_ = enable;
      if (enable)
-        {
+    {
          if (bm_interval > 512)
              bm_interval = 512;
          else
@@ -1148,13 +1159,13 @@ void serializer<BV>::set_ref_vectors(const bv_ref_vector_type* ref_vect)
  }
  
  template<class BV>
-void serializer<BV>::set_curr_ref_idx(size_type ref_idx)
+void serializer<BV>::set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT
  {
      ref_idx_ = ref_idx;
  }
  
  template<class BV>
-void serializer<BV>::encode_header(const BV& bv, bm::encoder& enc)
+void serializer<BV>::encode_header(const BV& bv, bm::encoder& enc) BMNOEXCEPT
  {
      const blocks_manager_type& bman = bv.get_blocks_manager();
  
@@ -1207,7 +1218,7 @@ void serializer<BV>::encode_header(const BV& bv, bm::encoder& enc)
  
  template<class BV>
  void serializer<BV>::interpolated_encode_gap_block(
-            const bm::gap_word_t* gap_block, bm::encoder& enc)
+            const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT
  {
      unsigned len = bm::gap_length(gap_block);
      if (len > 4) // BIC encoding
@@ -1266,7 +1277,8 @@ void serializer<BV>::interpolated_encode_gap_block(
  
  
  template<class BV>
-void serializer<BV>::gamma_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc)
+void serializer<BV>::gamma_gap_block(const bm::gap_word_t* gap_block,
+                                     bm::encoder& enc) BMNOEXCEPT
  {
      unsigned len = gap_length(gap_block);
      if (len > 3 && (compression_level_ > 3)) // Use Elias Gamma encoding
@@ -1307,7 +1319,7 @@ template<class BV>
  void serializer<BV>::gamma_gap_array(const bm::gap_word_t* gap_array, 
                                       unsigned              arr_len, 
                                       bm::encoder&          enc,
-                                     bool                  inverted)
+                                     bool                  inverted) BMNOEXCEPT
  {
      unsigned char scode = inverted ? bm::set_block_arrgap_egamma_inv
                                     : bm::set_block_arrgap_egamma;
@@ -1349,10 +1361,11 @@ void serializer<BV>::gamma_gap_array(const bm::gap_word_t* gap_array,
  
  
  template<class BV>
-void serializer<BV>::interpolated_gap_array_v0(const bm::gap_word_t* gap_block,
-                                            unsigned              arr_len,
-                                            bm::encoder&          enc,
-                                            bool                  inverted)
+void serializer<BV>::interpolated_gap_array_v0(
+                                const bm::gap_word_t* gap_block,
+                                unsigned              arr_len,
+                                bm::encoder&          enc,
+                                bool                  inverted) BMNOEXCEPT
  {
      BM_ASSERT(arr_len <= 65535);
      unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv
@@ -1399,7 +1412,7 @@ template<class BV>
  void serializer<BV>::interpolated_gap_array(const bm::gap_word_t* gap_block,
                                              unsigned              arr_len,
                                              bm::encoder&          enc,
-                                            bool                  inverted)
+                                            bool                  inverted) BMNOEXCEPT
  {
      BM_ASSERT(arr_len <= 65535);
  
@@ -1471,7 +1484,7 @@ void serializer<BV>::interpolated_gap_array(const bm::gap_word_t* gap_block,
  
  
  template<class BV>
-void serializer<BV>::add_model(unsigned char mod, unsigned score)
+void serializer<BV>::add_model(unsigned char mod, unsigned score) BMNOEXCEPT
  {
      BM_ASSERT(mod_size_ < 64); // too many models (memory corruption?)
      scores_[mod_size_] = score; models_[mod_size_] = mod;
@@ -1479,7 +1492,8 @@ void serializer<BV>::add_model(unsigned char mod, unsigned score)
  }
  
  template<class BV>
-unsigned char serializer<BV>::find_bit_best_encoding_l5(const bm::word_t* block)
+unsigned char
+serializer<BV>::find_bit_best_encoding_l5(const bm::word_t* block) BMNOEXCEPT
  {
      unsigned bc, bit_gaps;
      
@@ -1566,7 +1580,8 @@ unsigned char serializer<BV>::find_bit_best_encoding_l5(const bm::word_t* block)
  }
  
  template<class BV>
-unsigned char serializer<BV>::find_bit_best_encoding(const bm::word_t* block)
+unsigned char
+serializer<BV>::find_bit_best_encoding(const bm::word_t* block) BMNOEXCEPT
  {
      reset_models();
      
@@ -1672,7 +1687,7 @@ unsigned char serializer<BV>::find_bit_best_encoding(const bm::word_t* block)
  
  template<class BV>
  unsigned char
-serializer<BV>::find_gap_best_encoding(const bm::gap_word_t* gap_block)
+serializer<BV>::find_gap_best_encoding(const bm::gap_word_t* gap_block)BMNOEXCEPT
  {
      // heuristics and hard-coded rules to determine
      // the best representation for d-GAP block
@@ -1731,9 +1746,9 @@ void serializer<BV>::encode_gap_block(const bm::gap_word_t* gap_block, bm::encod
          break;
          
      case bm::set_block_bit_1bit:
-        arr_len = gap_convert_to_arr(gap_temp_block,
-                                     gap_block,
-                                     bm::gap_equiv_len-10);
+        arr_len = bm::gap_convert_to_arr(gap_temp_block,
+                                         gap_block,
+                                         bm::gap_equiv_len-10);
          BM_ASSERT(arr_len == 1);
          enc.put_8(bm::set_block_bit_1bit);
          enc.put_16(gap_temp_block[0]);
@@ -1779,7 +1794,7 @@ template<class BV>
  void serializer<BV>::encode_bit_interval(const bm::word_t* blk, 
                                           bm::encoder&      enc,
                                           unsigned          //size_control
-                                         )
+                                         ) BMNOEXCEPT
  {
      enc.put_8(bm::set_block_bit_0runs);
      enc.put_8((blk[0]==0) ? 0 : 1); // encode start
@@ -1830,7 +1845,7 @@ void serializer<BV>::encode_bit_interval(const bm::word_t* blk,
  template<class BV>
  void serializer<BV>::encode_bit_digest(const bm::word_t* block,
                                         bm::encoder&     enc,
-                                       bm::id64_t       d0)
+                                       bm::id64_t       d0) BMNOEXCEPT
  {
      // evaluate a few "sure" models here and pick the best
      //
@@ -1927,16 +1942,16 @@ void serializer<BV>::optimize_serialize_destroy(BV& bv,
  template<class BV>
  void serializer<BV>::encode_bit_array(const bm::word_t* block,
                                        bm::encoder&      enc,
-                                      bool              inverted)
+                                      bool              inverted) BMNOEXCEPT
  {
      unsigned arr_len;
      unsigned mask = inverted ? ~0u : 0u;
      // TODO: get rid of max bits
-    arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
-                                 block,
-                                 bm::gap_max_bits,
-                                 bm::gap_max_bits_cmrz,
-                                 mask);
+    arr_len = bm::bit_convert_to_arr(bit_idx_arr_.data(),
+                                     block,
+                                     bm::gap_max_bits,
+                                     bm::gap_max_bits_cmrz,
+                                     mask);
      if (arr_len)
      {
          unsigned char scode =
@@ -1950,7 +1965,7 @@ void serializer<BV>::encode_bit_array(const bm::word_t* block,
  
  template<class BV>
  void serializer<BV>::gamma_gap_bit_block(const bm::word_t* block,
-                                         bm::encoder&      enc)
+                                         bm::encoder&      enc) BMNOEXCEPT
  {
      unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_equiv_len);
      BM_ASSERT(len); (void)len;
@@ -1959,7 +1974,8 @@ void serializer<BV>::gamma_gap_bit_block(const bm::word_t* block,
  
  template<class BV>
  void serializer<BV>::gamma_arr_bit_block(const bm::word_t* block,
-                                         bm::encoder& enc, bool inverted)
+                                         bm::encoder&      enc,
+                                         bool              inverted) BMNOEXCEPT
  {
      unsigned mask = inverted ? ~0u : 0u;
      unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
@@ -1978,7 +1994,8 @@ void serializer<BV>::gamma_arr_bit_block(const bm::word_t* block,
  
  template<class BV>
  void serializer<BV>::bienc_arr_bit_block(const bm::word_t* block,
-                                        bm::encoder& enc, bool inverted)
+                                        bm::encoder&       enc,
+                                        bool               inverted) BMNOEXCEPT
  {
      unsigned mask = inverted ? ~0u : 0u;
      unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
@@ -1996,7 +2013,7 @@ void serializer<BV>::bienc_arr_bit_block(const bm::word_t* block,
  
  template<class BV>
  void serializer<BV>::interpolated_gap_bit_block(const bm::word_t* block,
-                                                bm::encoder&      enc)
+                                                bm::encoder&      enc) BMNOEXCEPT
  {
      unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
      BM_ASSERT(len); (void)len;
@@ -2006,7 +2023,7 @@ void serializer<BV>::interpolated_gap_bit_block(const bm::word_t* block,
  
  template<class BV>
  void serializer<BV>::bienc_gap_bit_block(const bm::word_t* block,
-                                         bm::encoder& enc)
+                                         bm::encoder& enc) BMNOEXCEPT
  {
      unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
      BM_ASSERT(len); (void)len;
@@ -2052,8 +2069,10 @@ void serializer<BV>::bienc_gap_bit_block(const bm::word_t* block,
  
  
  template<class BV>
-void serializer<BV>::interpolated_arr_bit_block(const bm::word_t* block,
-                                                bm::encoder& enc, bool inverted)
+void
+serializer<BV>::interpolated_arr_bit_block(const bm::word_t* block,
+                                           bm::encoder&      enc,
+                                           bool              inverted) BMNOEXCEPT
  {
      unsigned mask = inverted ? ~0u : 0u;
      unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
@@ -2134,7 +2153,7 @@ void serializer<BV>::interpolated_arr_bit_block(const bm::word_t* block,
  template<class BV>
  void serializer<BV>::process_bookmark(block_idx_type   nb,
                                        bookmark_state&  bookm,
-                                      bm::encoder&     enc)
+                                      bm::encoder&     enc) BMNOEXCEPT
  {
      BM_ASSERT(bookm.nb_range_);
  
@@ -2786,8 +2805,6 @@ unsigned deseriaizer_base<DEC, BLOCK_IDX>::read_id_list(
                                                                             unsigned        block_type,
                                                                                 bm::gap_word_t* dst_arr)
  {
-    typedef bit_in<DEC> bit_in_type;
-
         bm::gap_word_t len = 0;
  
      switch (block_type)
@@ -2867,12 +2884,12 @@ unsigned deseriaizer_base<DEC, BLOCK_IDX>::read_id_list(
  }
  
  template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_bic_arr(decoder_type& dec,
-                                                    bm::word_t*   blk)
+void
+deseriaizer_base<DEC, BLOCK_IDX>::read_bic_arr(decoder_type& dec,
+                                               bm::word_t*   blk) BMNOEXCEPT
  {
      BM_ASSERT(!BM_IS_GAP(blk));
      
-    typedef bit_in<DEC> bit_in_type;
      bm::gap_word_t min_v = dec.get_16();
      bm::gap_word_t max_v = dec.get_16();
      unsigned arr_len = dec.get_16();
@@ -2890,7 +2907,9 @@ void deseriaizer_base<DEC, BLOCK_IDX>::read_bic_arr(decoder_type& dec,
  }
  
  template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_bic_arr_inv(decoder_type&   decoder, bm::word_t* blk)
+void
+deseriaizer_base<DEC, BLOCK_IDX>::read_bic_arr_inv(decoder_type&   decoder,
+                                                   bm::word_t* blk) BMNOEXCEPT
  {
      // TODO: optimization
      bm::bit_block_set(blk, 0);
@@ -2899,18 +2918,16 @@ void deseriaizer_base<DEC, BLOCK_IDX>::read_bic_arr_inv(decoder_type&   decoder,
  }
  
  template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_bic_gap(decoder_type& dec, bm::word_t* blk)
+void deseriaizer_base<DEC, BLOCK_IDX>::read_bic_gap(decoder_type& dec,
+                                                    bm::word_t*   blk) BMNOEXCEPT
  {
      BM_ASSERT(!BM_IS_GAP(blk));
      
-    typedef bit_in<DEC> bit_in_type;
-
      bm::gap_word_t head = dec.get_8();
      unsigned arr_len = dec.get_16();
      bm::gap_word_t min_v = dec.get_16();
      
      BM_ASSERT(arr_len <= bie_cut_off);
-
      
      id_array_[0] = head;
      id_array_[1] = min_v;
@@ -2920,15 +2937,14 @@ void deseriaizer_base<DEC, BLOCK_IDX>::read_bic_gap(decoder_type& dec, bm::word_
      bin.bic_decode_u16(&id_array_[2], arr_len-2, min_v, 65535);
  
      if (!IS_VALID_ADDR(blk))
-    {
          return;
-    }
      bm::gap_add_to_bitset(blk, id_array_, arr_len);
  }
  
  template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_digest0_block(decoder_type& dec,
-                                                          bm::word_t*   block)
+void deseriaizer_base<DEC, BLOCK_IDX>::read_digest0_block(
+                                                decoder_type& dec,
+                                                bm::word_t*   block) BMNOEXCEPT
  {
      bm::id64_t d0 = dec.get_64();
      while (d0)
@@ -2966,8 +2982,9 @@ void deseriaizer_base<DEC, BLOCK_IDX>::read_digest0_block(decoder_type& dec,
  }
  
  template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_0runs_block(decoder_type& dec,
-                                                        bm::word_t* blk)
+void deseriaizer_base<DEC, BLOCK_IDX>::read_0runs_block(
+                                            decoder_type& dec,
+                                            bm::word_t* blk) BMNOEXCEPT
  {
      //TODO: optimization if block exists and it is OR-ed read
      bm::bit_block_set(blk, 0);
@@ -2995,13 +3012,13 @@ void deseriaizer_base<DEC, BLOCK_IDX>::read_0runs_block(decoder_type& dec,
  
  
  template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_gap_block(decoder_type&   decoder,
+void
+deseriaizer_base<DEC, BLOCK_IDX>::read_gap_block(decoder_type&   decoder,
                                             unsigned        block_type, 
                                             bm::gap_word_t* dst_block,
                                             bm::gap_word_t& gap_head)
  {
-    typedef bit_in<DEC> bit_in_type;
-
+//    typedef bit_in<DEC> bit_in_type;
      switch (block_type)
      {
      case set_block_gap:
@@ -3028,7 +3045,7 @@ void deseriaizer_base<DEC, BLOCK_IDX>::read_gap_block(decoder_type&   decoder,
              for (gap_word_t k = 0; k < len; ++k)
              {
                  gap_word_t bit_idx = decoder.get_16();
-                               gap_add_value(dst_block, bit_idx);
+                               bm::gap_add_value(dst_block, bit_idx);
              } // for
          }
          break;
@@ -3126,7 +3143,7 @@ typename deseriaizer_base<DEC, BLOCK_IDX>::block_idx_type
  deseriaizer_base<DEC, BLOCK_IDX>::try_skip(
                                          decoder_type&   decoder,
                                          block_idx_type nb,
-                                        block_idx_type expect_nb)
+                                        block_idx_type expect_nb) BMNOEXCEPT
  {
      if (skip_offset_) // skip bookmark is available
      {
@@ -3156,26 +3173,22 @@ deseriaizer_base<DEC, BLOCK_IDX>::try_skip(
              nb_sync = decoder.get_32();
              break;
          case set_nb_sync_mark48:
+            nb_sync = block_idx_type(decoder.get_48());
              #ifndef BM64ADDR
                  BM_ASSERT(0);
-                #ifndef BM_NO_STL
-                    throw std::logic_error(this->err_msg());
-                #else
-                    BM_THROW(BM_ERR_SERIALFORMAT);
-                #endif
+                decoder.set_pos(save_pos);
+                skip_offset_ = 0;
+                return 0; // invalid bookmark from 64-bit serialization
              #endif
-            nb_sync = block_idx_type(decoder.get_48());
              break;
          case set_nb_sync_mark64:
+            nb_sync = block_idx_type(decoder.get_64());
              #ifndef BM64ADDR
                  BM_ASSERT(0);
-                #ifndef BM_NO_STL
-                    throw std::logic_error(this->err_msg());
-                #else
-                    BM_THROW(BM_ERR_SERIALFORMAT);
-                #endif
+                decoder.set_pos(save_pos);
+                skip_offset_ = 0;
+                return 0; // invalid bookmark from 64-bit serialization
              #endif
-            nb_sync = block_idx_type(decoder.get_64());
              break;
          default:
              BM_ASSERT(0);
@@ -3187,8 +3200,6 @@ deseriaizer_base<DEC, BLOCK_IDX>::try_skip(
          nb_sync += nb;
          if (nb_sync <= expect_nb) // within reach
          {
-            //block_idx_ = nb_sync;
-            //state_ = e_blocks;
              skip_offset_ = 0;
              return nb_sync;
          }
@@ -3593,6 +3604,7 @@ size_t deserializer<BV, DEC>::deserialize(bvector_type&        bv,
          {
              // 64-bit vector cannot be deserialized into 32-bit
              BM_ASSERT(sizeof(block_idx_type)==8);
+            bv_size = (block_idx_type)dec.get_64();
              #ifndef BM64ADDR
                  #ifndef BM_NO_STL
                      throw std::logic_error(this->err_msg());
@@ -3600,7 +3612,6 @@ size_t deserializer<BV, DEC>::deserialize(bvector_type&        bv,
                      BM_THROW(BM_ERR_SERIALFORMAT);
                  #endif
              #endif
-            bv_size = (block_idx_type)dec.get_64();
          }
          else
              bv_size = dec.get_32();
@@ -3715,12 +3726,12 @@ size_t deserializer<BV, DEC>::deserialize(bvector_type&        bv,
              goto process_full_blocks;
      #else
              BM_ASSERT(0); // 32-bit vector cannot read 64-bit
+            dec.get_64();
              #ifndef BM_NO_STL
                  throw std::logic_error(this->err_msg());
              #else
                  BM_THROW(BM_ERR_SERIALFORMAT);
              #endif
-            dec.get_64();
      #endif
              process_full_blocks:
              {
@@ -3957,7 +3968,7 @@ size_t deserializer<BV, DEC>::deserialize(bvector_type&        bv,
  template<class BV, class DEC>
  void deserializer<BV, DEC>::xor_decode(size_type x_ref_idx, bm::id64_t x_ref_d64,
                                         blocks_manager_type& bman,
-                                       block_idx_type nb)
+                                       block_idx_type       nb)
  {
      BM_ASSERT(ref_vect_);
  
@@ -4098,7 +4109,15 @@ serial_stream_iterator<DEC, BLOCK_IDX>::serial_stream_iterator(const unsigned ch
          }
          state_ = e_blocks;
      }
-    block_idx_arr_ = (gap_word_t*) ::malloc(sizeof(gap_word_t) * bm::gap_max_bits);
+    block_idx_arr_=(gap_word_t*)::malloc(sizeof(gap_word_t) * bm::gap_max_bits);
+    if (!block_idx_arr_)
+    {
+        #ifndef BM_NO_STL
+            throw std::bad_alloc();
+        #else
+            BM_THROW(BM_ERR_BADALLOC);
+        #endif
+    }
      this->id_array_ = block_idx_arr_;
  }
  
@@ -4335,7 +4354,7 @@ void serial_stream_iterator<DEC, BLOCK_IDX>::next()
  
  template<typename DEC, typename BLOCK_IDX>
  typename serial_stream_iterator<DEC, BLOCK_IDX>::block_idx_type
-serial_stream_iterator<DEC, BLOCK_IDX>::skip_mono_blocks()
+serial_stream_iterator<DEC, BLOCK_IDX>::skip_mono_blocks() BMNOEXCEPT
  {
         BM_ASSERT(state_ == e_zero_blocks || state_ == e_one_blocks);
      if (!mono_block_cnt_)
@@ -4350,7 +4369,8 @@ serial_stream_iterator<DEC, BLOCK_IDX>::skip_mono_blocks()
  }
  
  template<typename DEC, typename BLOCK_IDX>
-void serial_stream_iterator<DEC, BLOCK_IDX>::get_inv_arr(bm::word_t* block)
+void
+serial_stream_iterator<DEC, BLOCK_IDX>::get_inv_arr(bm::word_t* block) BMNOEXCEPT
  {
      gap_word_t len = decoder_.get_16();
      if (block)
@@ -4358,7 +4378,7 @@ void serial_stream_iterator<DEC, BLOCK_IDX>::get_inv_arr(bm::word_t* block)
          bm::bit_block_set(block, ~0u);
          for (unsigned k = 0; k < len; ++k)
          {
-            gap_word_t bit_idx = decoder_.get_16();
+            bm::gap_word_t bit_idx = decoder_.get_16();
              bm::clear_bit(block, bit_idx);
          }
      }
@@ -5519,8 +5539,8 @@ serial_stream_iterator<DEC, BLOCK_IDX>::get_bit_block_COUNT_SUB_BA(
  
  template<typename DEC, typename BLOCK_IDX>
  unsigned serial_stream_iterator<DEC, BLOCK_IDX>::get_arr_bit(
-                                                  bm::word_t* dst_block,
-                                                  bool        clear_target)
+                                          bm::word_t* dst_block,
+                                          bool        clear_target) BMNOEXCEPT
  {
      BM_ASSERT(this->block_type_ == set_block_arrbit || 
                this->block_type_ == set_block_bit_1bit);
@@ -5547,17 +5567,16 @@ unsigned serial_stream_iterator<DEC, BLOCK_IDX>::get_arr_bit(
      else
      {
          if (this->block_type_ == set_block_bit_1bit)
-        {
-            return 1; // nothing to do: len var already consumed 16bits
-        }
-        // fwd the decocing stream
+            return 1; // nothing to do: len var already consumed 16 bits
+
+        // fwd the decode stream
          decoder_.seek(len * 2);
      }
      return len;
  }
  
  template<typename DEC, typename BLOCK_IDX>
-unsigned serial_stream_iterator<DEC, BLOCK_IDX>::get_bit()
+unsigned serial_stream_iterator<DEC, BLOCK_IDX>::get_bit() BMNOEXCEPT
  {
      BM_ASSERT(this->block_type_ == set_block_bit_1bit);
      ++(this->block_idx_);
diff --git a/c++/include/util/bitset/bmsparsevec.h b/c++/include/util/bitset/bmsparsevec.h

index fed6f242d0cfa224a2b4c0a2e40d3bb26b6dcee5..536ae7b52e2a3c2bc486e213927bb732f87851e6 100644 (file)
--- a/c++/include/util/bitset/bmsparsevec.h
+++ b/c++/include/util/bitset/bmsparsevec.h
@@ -37,7 +37,7 @@ For more information please visit:  http://bitmagic.io
  
  
  #include "bmtrans.h"
-#include "bmalgo.h"
+#include "bmalgo_impl.h"
  #include "bmbuffer.h"
  #include "bmbmatrix.h"
  #include "bmdef.h"
@@ -113,10 +113,10 @@ public:
      class reference
      {
      public:
-        reference(sparse_vector<Val, BV>& sv, size_type idx) BMNOEXEPT
+        reference(sparse_vector<Val, BV>& sv, size_type idx) BMNOEXCEPT
          : sv_(sv), idx_(idx)
          {}
-        operator value_type() const { return sv_.get(idx_); }
+        operator value_type() const BMNOEXCEPT { return sv_.get(idx_); }
          reference& operator=(const reference& ref)
          {
              sv_.set(idx_, (value_type)ref);
@@ -127,9 +127,9 @@ public:
              sv_.set(idx_, val);
              return *this;
          }
-        bool operator==(const reference& ref) const
+        bool operator==(const reference& ref) const BMNOEXCEPT
                                  { return bool(*this) == bool(ref); }
-        bool is_null() const { return sv_.is_null(idx_); }
+        bool is_null() const BMNOEXCEPT { return sv_.is_null(idx_); }
      private:
          sparse_vector<Val, BV>& sv_;
          size_type               idx_;
@@ -169,30 +169,30 @@ public:
          typedef value_type&                 reference;
  
      public:
-        const_iterator();
-        const_iterator(const sparse_vector_type* sv);
-        const_iterator(const sparse_vector_type* sv, size_type pos);
-        const_iterator(const const_iterator& it);
+        const_iterator() BMNOEXCEPT;
+        const_iterator(const sparse_vector_type* sv) BMNOEXCEPT;
+        const_iterator(const sparse_vector_type* sv, size_type pos) BMNOEXCEPT;
+        const_iterator(const const_iterator& it) BMNOEXCEPT;
          
-        bool operator==(const const_iterator& it) const
+        bool operator==(const const_iterator& it) const BMNOEXCEPT
                                  { return (pos_ == it.pos_) && (sv_ == it.sv_); }
-        bool operator!=(const const_iterator& it) const
+        bool operator!=(const const_iterator& it) const BMNOEXCEPT
                                  { return ! operator==(it); }
-        bool operator < (const const_iterator& it) const
+        bool operator < (const const_iterator& it) const BMNOEXCEPT
                                  { return pos_ < it.pos_; }
-        bool operator <= (const const_iterator& it) const
+        bool operator <= (const const_iterator& it) const BMNOEXCEPT
                                  { return pos_ <= it.pos_; }
-        bool operator > (const const_iterator& it) const
+        bool operator > (const const_iterator& it) const BMNOEXCEPT
                                  { return pos_ > it.pos_; }
-        bool operator >= (const const_iterator& it) const
+        bool operator >= (const const_iterator& it) const BMNOEXCEPT
                                  { return pos_ >= it.pos_; }
  
          /// \brief Get current position (value)
-        value_type operator*() const { return this->value(); }
+        value_type operator*() const  { return this->value(); }
          
          
          /// \brief Advance to the next available value
-        const_iterator& operator++() { this->advance(); return *this; }
+        const_iterator& operator++() BMNOEXCEPT { this->advance(); return *this; }
  
          /// \brief Advance to the next available value
          const_iterator& operator++(int)
@@ -203,24 +203,25 @@ public:
          value_type value() const;
          
          /// \brief Get NULL status
-        bool is_null() const;
+        bool is_null() const BMNOEXCEPT;
          
          /// Returns true if iterator is at a valid position
-        bool valid() const { return pos_ != bm::id_max; }
+        bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
          
          /// Invalidate current iterator
-        void invalidate() { pos_ = bm::id_max; }
+        void invalidate() BMNOEXCEPT { pos_ = bm::id_max; }
          
          /// Current position (index) in the vector
-        size_type pos() const { return pos_; }
+        size_type pos() const BMNOEXCEPT{ return pos_; }
          
          /// re-position to a specified position
-        void go_to(size_type pos);
+        void go_to(size_type pos) BMNOEXCEPT;
          
          /// advance iterator forward by one
-        void advance();
+        /// @return true if it is still valid
+        bool advance() BMNOEXCEPT;
          
-        void skip_zero_values();
+        void skip_zero_values() BMNOEXCEPT;
      private:
          enum buf_size_e
          {
@@ -232,7 +233,6 @@ public:
          size_type                         pos_;     ///!< Position
          mutable buffer_type               buffer_;  ///!< value buffer
          mutable value_type*               buf_ptr_; ///!< position in the buffer
-        mutable allocator_pool_type       pool_;
      };
      
      /**
@@ -313,7 +313,7 @@ public:
              Get access to not-null vector
              @internal
          */
-        bvector_type* get_null_bvect() const { return bv_null_; }
+        bvector_type* get_null_bvect() const BMNOEXCEPT { return bv_null_; }
          
          /** add value to the buffer without changing the NULL vector
              @param v - value to push back
@@ -323,9 +323,9 @@ public:
          size_type add_value_no_null(value_type v);
          
          /**
-            Reconf back inserter not to touch the NULL vector
+            Reconfшпгку back inserter not to touch the NULL vector
          */
-        void disable_set_null() { set_not_null_ = false; }
+        void disable_set_null() BMNOEXCEPT { set_not_null_ = false; }
          // ---------------------------------------------------------------
          
      protected:
@@ -387,11 +387,11 @@ public:
  
  #ifndef BM_NO_CXX11
      /*! move-ctor */
-    sparse_vector(sparse_vector<Val, BV>&& sv) BMNOEXEPT;
+    sparse_vector(sparse_vector<Val, BV>&& sv) BMNOEXCEPT;
  
  
      /*! move assignmment operator */
-    sparse_vector<Val,BV>& operator = (sparse_vector<Val, BV>&& sv) BMNOEXEPT
+    sparse_vector<Val,BV>& operator = (sparse_vector<Val, BV>&& sv) BMNOEXCEPT
      {
          if (this != &sv)
          {
@@ -402,7 +402,7 @@ public:
      }
  #endif
  
-    ~sparse_vector() BMNOEXEPT;
+    ~sparse_vector() BMNOEXCEPT;
      ///@}
  
      
@@ -411,14 +411,16 @@ public:
      ///@{
  
      /** \brief Operator to get write access to an element  */
-    reference operator[](size_type idx) { return reference(*this, idx); }
+    reference operator[](size_type idx) BMNOEXCEPT
+                            { return reference(*this, idx); }
  
      /*!
          \brief get specified element without bounds checking
          \param idx - element index
          \return value of the element
      */
-    value_type operator[](size_type idx) const { return this->get(idx); }
+    value_type operator[](size_type idx) const BMNOEXCEPT
+                                    { return this->get(idx); }
  
      /*!
          \brief access specified element with bounds checking
@@ -431,7 +433,7 @@ public:
          \param idx - element index
          \return value of the element
      */
-    value_type get(size_type idx) const;
+    value_type get(size_type idx) const BMNOEXCEPT;
  
      /*!
          \brief set specified element with bounds checking and automatic resize
@@ -485,21 +487,24 @@ public:
      //@{
  
      /** Provide const iterator access to container content  */
-    const_iterator begin() const;
+    const_iterator begin() const BMNOEXCEPT;
  
      /** Provide const iterator access to the end    */
-    const_iterator end() const { return const_iterator(this, bm::id_max); }
+    const_iterator end() const BMNOEXCEPT
+        { return const_iterator(this, bm::id_max); }
  
      /** Get const_itertor re-positioned to specific element
      @param idx - position in the sparse vector
      */
-    const_iterator get_const_iterator(size_type idx) const { return const_iterator(this, idx); }
+    const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
+        { return const_iterator(this, idx); }
   
      /** Provide back insert iterator
          Back insert iterator implements buffered insertion,
          which is faster, than random access or push_back
      */
-    back_insert_iterator get_back_inserter() { return back_insert_iterator(this); }
+    back_insert_iterator get_back_inserter()
+        { return back_insert_iterator(this); }
      ///@}
  
  
@@ -515,7 +520,7 @@ public:
      /** \brief trait if sparse vector is "compressed" (false)
      */
      static
-    bool is_compressed() { return false; }
+    bool is_compressed() BMNOEXCEPT { return false; }
      
      ///@}
  
@@ -608,14 +613,14 @@ public:
  
      /*! \brief content exchange
      */
-    void swap(sparse_vector<Val, BV>& sv) BMNOEXEPT;
+    void swap(sparse_vector<Val, BV>& sv) BMNOEXCEPT;
  
      // ------------------------------------------------------------
      /*! @name Clear                                              */
      ///@{
  
      /*! \brief resize to zero, free memory */
-    void clear() BMNOEXEPT;
+    void clear() BMNOEXCEPT;
  
      /*!
          \brief clear range (assign bit 0 for all plains)
@@ -636,12 +641,12 @@ public:
      /*! \brief return size of the vector
          \return size of sparse vector
      */
-    size_type size() const { return this->size_; }
+    size_type size() const BMNOEXCEPT { return this->size_; }
      
      /*! \brief return true if vector is empty
          \return true if empty
      */
-    bool empty() const { return (size() == 0); }
+    bool empty() const BMNOEXCEPT { return (size() == 0); }
      
      /*! \brief resize vector
          \param sz - new size
@@ -663,7 +668,7 @@ public:
          \return true, if it is the same
      */
      bool equal(const sparse_vector<Val, BV>& sv,
-               bm::null_support null_able = bm::use_null) const;
+               bm::null_support null_able = bm::use_null) const BMNOEXCEPT;
  
      ///@}
  
@@ -679,7 +684,7 @@ public:
       
          \return 0 - equal, < 0 - vect[i] < str, >0 otherwise
      */
-    int compare(size_type idx, const value_type val) const;
+    int compare(size_type idx, const value_type val) const BMNOEXCEPT;
      
      ///@}
  
@@ -694,8 +699,9 @@ public:
          \param stat - memory allocation statistics after optimization
      */
      void optimize(bm::word_t* temp_block = 0,
-                  typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
-                  typename sparse_vector<Val, BV>::statistics* stat = 0);
+          typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
+          typename sparse_vector<Val, BV>::statistics* stat = 0);
+
      /*!
         \brief Optimize sizes of GAP blocks
  
@@ -715,7 +721,8 @@ public:
  
          @sa statistics
      */
-    void calc_stat(struct sparse_vector<Val, BV>::statistics* st) const;
+    void calc_stat(
+        struct sparse_vector<Val, BV>::statistics* st) const BMNOEXCEPT;
      ///@}
  
      // ------------------------------------------------------------
@@ -789,7 +796,6 @@ public:
          \param offset - target index in the sparse vector to export from
          \param zero_mem - set to false if target array is pre-initialized
                            with 0s to avoid performance penalty   
-        \param pool_ptr - optional pointer to block allocation pool
          \return number of exported elements
       
          \sa decode
@@ -799,8 +805,7 @@ public:
      size_type extract(value_type* arr,
                        size_type size,
                        size_type offset = 0,
-                      bool      zero_mem = true,
-                      allocator_pool_type* pool_ptr = 0) const;
+                      bool      zero_mem = true) const BMNOEXCEPT2;
  
      /** \brief extract small window without use of masking vector
          \sa decode
@@ -824,7 +829,7 @@ public:
          \internal
      */
      static
-    size_type translate_address(size_type i) { return i; }
+    size_type translate_address(size_type i) BMNOEXCEPT { return i; }
      
      /**
          \brief throw range error
@@ -845,24 +850,24 @@ public:
      \brief find position of compressed element by its rank
      */
      static
-    bool find_rank(size_type rank, size_type& pos);
+    bool find_rank(size_type rank, size_type& pos) BMNOEXCEPT;
  
      /**
          \brief size of sparse vector (may be different for RSC)
      */
-    size_type effective_size() const { return size(); }
+    size_type effective_size() const BMNOEXCEPT { return size(); }
  
      /**
          \brief Always 1 (non-matrix type)
      */
-    size_type effective_vector_max() const { return 1; }
+    size_type effective_vector_max() const BMNOEXCEPT { return 1; }
  
      ///@}
  
      /// Set allocator pool for local (non-threaded)
      /// memory cyclic(lots of alloc-free ops) opertations
      ///
-    void set_allocator_pool(allocator_pool_type* pool_ptr);
+    void set_allocator_pool(allocator_pool_type* pool_ptr) BMNOEXCEPT;
      
  protected:
      enum octet_plains
@@ -886,20 +891,26 @@ protected:
      void insert_value_no_null(size_type idx, value_type v);
  
      void resize_internal(size_type sz) { resize(sz); }
-    size_type size_internal() const { return size(); }
+    size_type size_internal() const BMNOEXCEPT { return size(); }
  
-    bool is_remap() const { return false; }
-    size_t remap_size() const { return 0; }
-    const unsigned char* get_remap_buffer() const { return 0; }
-    unsigned char* init_remap_buffer() { return 0; }
-    void set_remap() { }
+    bool is_remap() const BMNOEXCEPT { return false; }
+    size_t remap_size() const BMNOEXCEPT { return 0; }
+    const unsigned char* get_remap_buffer() const BMNOEXCEPT { return 0; }
+    unsigned char* init_remap_buffer() BMNOEXCEPT { return 0; }
+    void set_remap() BMNOEXCEPT { }
  
      bool resolve_range(size_type from, size_type to,
-                       size_type* idx_from, size_type* idx_to) const
+                       size_type* idx_from, size_type* idx_to) const BMNOEXCEPT
      {
          *idx_from = from; *idx_to = to; return true;
      }
  
+    /// Increment element by 1 without chnaging NULL vector or size
+    void inc_no_null(size_type idx);
+
+    /// increment by v  without chnaging NULL vector or size
+    void inc_no_null(size_type idx, value_type v);
+
  protected:
      template<class V, class SV> friend class rsc_sparse_vector;
      template<class SVect> friend class sparse_vector_scanner;
@@ -933,7 +944,7 @@ sparse_vector<Val, BV>::sparse_vector(const sparse_vector<Val, BV>& sv)
  #ifndef BM_NO_CXX11
  
  template<class Val, class BV>
-sparse_vector<Val, BV>::sparse_vector(sparse_vector<Val, BV>&& sv) BMNOEXEPT
+sparse_vector<Val, BV>::sparse_vector(sparse_vector<Val, BV>&& sv) BMNOEXCEPT
  {
      parent_type::swap(sv);
  }
@@ -944,13 +955,13 @@ sparse_vector<Val, BV>::sparse_vector(sparse_vector<Val, BV>&& sv) BMNOEXEPT
  //---------------------------------------------------------------------
  
  template<class Val, class BV>
-sparse_vector<Val, BV>::~sparse_vector() BMNOEXEPT
+sparse_vector<Val, BV>::~sparse_vector() BMNOEXCEPT
  {}
  
  //---------------------------------------------------------------------
  
  template<class Val, class BV>
-void sparse_vector<Val, BV>::swap(sparse_vector<Val, BV>& sv) BMNOEXEPT
+void sparse_vector<Val, BV>::swap(sparse_vector<Val, BV>& sv) BMNOEXCEPT
  {
      parent_type::swap(sv);
  }
@@ -1080,19 +1091,7 @@ sparse_vector<Val, BV>::decode(value_type* arr,
                                 size_type   dec_size,
                                 bool        zero_mem) const
  {
-    if (dec_size < 32)
-    {
-        return extract_range(arr, dec_size, idx_from, zero_mem);
-    }
-    return extract_plains(arr, dec_size, idx_from, zero_mem);
-    // TODO: write proper extract() based on for_each_range() and a visitor
-    /*
-    if (dec_size < 1024)
-    {
-        return extract_plains(arr, dec_size, idx_from, zero_mem);
-    }
      return extract(arr, dec_size, idx_from, zero_mem);
-    */
  }
  
  //---------------------------------------------------------------------
@@ -1373,89 +1372,65 @@ sparse_vector<Val, BV>::extract_plains(value_type* arr,
  
  template<class Val, class BV>
  typename sparse_vector<Val, BV>::size_type
-sparse_vector<Val, BV>::extract(value_type* arr,
+sparse_vector<Val, BV>::extract(value_type* BMRESTRICT arr,
                                  size_type   size,
                                  size_type   offset,
-                                bool        zero_mem,
-                                allocator_pool_type* pool_ptr) const
+                                bool        zero_mem) const BMNOEXCEPT2
  {
      /// Decoder functor
      /// @internal
      ///
      struct sv_decode_visitor_func
      {
-        sv_decode_visitor_func(value_type* varr,
+        sv_decode_visitor_func(value_type* BMRESTRICT varr,
                                 value_type  mask,
-                               size_type   off)
-        : arr_(varr), mask_(mask), off_(off)
+                               size_type   off) BMNOEXCEPT2
+        : arr_(varr), mask_(mask), sv_off_(off)
          {}
-        
-        void add_bits(size_type arr_offset, const unsigned char* bits, unsigned bits_size)
+
+        void add_bits(size_type bv_offset,
+                      const unsigned char* bits, unsigned bits_size) BMNOEXCEPT
          {
-            size_type idx_base = arr_offset - off_;
-            const value_type m = mask_;
-            unsigned i = 0;
-            for (; i < bits_size; ++i)
-                arr_[idx_base + bits[i]] |= m;
+            // can be negative (-1) when bv base offset = 0 and sv = 1,2..
+            size_type base = bv_offset - sv_off_; 
+            value_type m = mask_;
+            for (unsigned i = 0; i < bits_size; ++i)
+                arr_[bits[i] + base] |= m;
          }
-        
-        void add_range(size_type arr_offset, unsigned sz)
+        void add_range(size_type bv_offset, size_type sz) BMNOEXCEPT
          {
-            size_type idx_base = arr_offset - off_;
-            const value_type m = mask_;
-            for (unsigned i = 0; i < sz; ++i)
-                arr_[i + idx_base] |= m;
+            auto base = bv_offset - sv_off_;
+            value_type m = mask_;
+            for (size_type i = 0; i < sz; ++i)
+                arr_[i + base] |= m;
          }
-        value_type*  arr_;
-        value_type   mask_;
-        size_type    off_;
-    };
  
+        value_type* BMRESTRICT arr_;       ///< target array for reverse transpose
+        value_type             mask_;      ///< bit-plane mask 
+        size_type              sv_off_;    ///< SV read offset
+    };
  
-    if (size == 0)
+    if (!size)
          return 0;
  
      if (zero_mem)
          ::memset(arr, 0, sizeof(value_type)*size);
      
-    size_type start = offset;
-    size_type end = start + size;
+    size_type end = offset + size;
      if (end > this->size_)
-    {
          end = this->size_;
-    }
-    
-       bool masked_scan = !(offset == 0 && size == this->size());
-    if (masked_scan) // use temp vector to decompress the area
-    {
-        bvector_type bv_mask;
-        bv_mask.set_allocator_pool(pool_ptr);
-        
-        for (size_type i = 0; i < parent_type::value_bits(); ++i)
-        {
-            const bvector_type* bv = this->bmatr_.get_row(i);
-            if (bv)
-            {
-                bv_mask.copy_range(*bv, offset, end - 1);
-                sv_decode_visitor_func func(arr, (value_type(1) << i), offset);
-                bm::for_each_bit(bv_mask, func);
-            }
-        } // for i
-    }
-    else
-    {
-        for (size_type i = 0; i < parent_type::value_bits(); ++i)
-        {
-            const bvector_type* bv = this->bmatr_.get_row(i);
-            if (bv)
-            {
-                sv_decode_visitor_func func(arr, (value_type(1) << i), 0);
-                bm::for_each_bit(*bv, func);
-            }
-        } // for i
-    }
  
-    return end - start;
+    sv_decode_visitor_func func(arr, 0, offset);
+
+    for (size_type i = 0; i < parent_type::value_bits(); ++i)
+    {
+        const bvector_type* bv = this->bmatr_.get_row(i);
+        if (!bv)
+            continue;
+        func.mask_ = (value_type(1) << i); // set target plane OR mask
+        bm::for_each_bit_range_no_check(*bv, offset, end-1, func);
+    } // for i
+    return end - offset;
  }
  
  //---------------------------------------------------------------------
@@ -1473,7 +1448,8 @@ sparse_vector<Val, BV>::at(typename sparse_vector<Val, BV>::size_type idx) const
  
  template<class Val, class BV>
  typename sparse_vector<Val, BV>::value_type
-sparse_vector<Val, BV>::get(typename sparse_vector<Val, BV>::size_type i) const
+sparse_vector<Val, BV>::get(
+        typename sparse_vector<Val, BV>::size_type i) const BMNOEXCEPT
  {
      BM_ASSERT(i < bm::id_max);
      BM_ASSERT(i < size());
@@ -1485,7 +1461,7 @@ sparse_vector<Val, BV>::get(typename sparse_vector<Val, BV>::size_type i) const
          bool b = this->bmatr_.test_4rows(j);
          if (b)
          {
-            value_type vm = this->bmatr_.get_half_octet(i, j);
+            value_type vm = (value_type)this->bmatr_.get_half_octet(i, j);
              v |= vm << j;
          }
      } // for j
@@ -1692,7 +1668,17 @@ void sparse_vector<Val, BV>::inc(size_type idx)
  {
      if (idx >= this->size_)
          this->size_ = idx+1;
+    inc_no_null(idx);
+    bvector_type* bv_null = this->get_null_bvect();
+    if (bv_null)
+        bv_null->set_bit_no_check(idx);
+}
+
+//---------------------------------------------------------------------
  
+template<class Val, class BV>
+void sparse_vector<Val, BV>::inc_no_null(size_type idx)
+{
      for (unsigned i = 0; i < parent_type::sv_value_plains; ++i)
      {
          bvector_type* bv = this->get_plain(i);
@@ -1700,15 +1686,21 @@ void sparse_vector<Val, BV>::inc(size_type idx)
          if (!carry_over)
              break;
      }
-    bvector_type* bv_null = this->get_null_bvect();
-    if (bv_null)
-        bv_null->set_bit_no_check(idx);
  }
  
  //---------------------------------------------------------------------
  
  template<class Val, class BV>
-void sparse_vector<Val, BV>::clear() BMNOEXEPT
+void sparse_vector<Val, BV>::inc_no_null(size_type idx, value_type v)
+{
+    value_type v_prev = get(idx);
+    set_value_no_null(idx, v + v_prev);
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class BV>
+void sparse_vector<Val, BV>::clear() BMNOEXCEPT
  {
      parent_type::clear();
  }
@@ -1716,7 +1708,7 @@ void sparse_vector<Val, BV>::clear() BMNOEXEPT
  //---------------------------------------------------------------------
  
  template<class Val, class BV>
-bool sparse_vector<Val, BV>::find_rank(size_type rank, size_type& pos)
+bool sparse_vector<Val, BV>::find_rank(size_type rank, size_type& pos) BMNOEXCEPT
  {
      BM_ASSERT(rank);
      pos = rank - 1; 
@@ -1740,7 +1732,7 @@ sparse_vector<Val, BV>::clear_range(
  
  template<class Val, class BV>
  void sparse_vector<Val, BV>::calc_stat(
-     struct sparse_vector<Val, BV>::statistics* st) const
+     struct sparse_vector<Val, BV>::statistics* st) const BMNOEXCEPT
  {
      BM_ASSERT(st);
      typename bvector_type::statistics stbv;
@@ -1906,7 +1898,8 @@ void sparse_vector<Val, BV>::filter(
  //---------------------------------------------------------------------
  
  template<class Val, class BV>
-int sparse_vector<Val, BV>::compare(size_type idx, const value_type val) const
+int sparse_vector<Val, BV>::compare(size_type idx,
+                                    const value_type val) const BMNOEXCEPT
  {
      // TODO: consider bit-by-bit comparison to minimize CPU hit miss in plans get()
      value_type sv_value = get(idx);
@@ -1917,7 +1910,7 @@ int sparse_vector<Val, BV>::compare(size_type idx, const value_type val) const
  
  template<class Val, class BV>
  bool sparse_vector<Val, BV>::equal(const sparse_vector<Val, BV>& sv,
-                                   bm::null_support null_able) const
+                                   bm::null_support null_able) const BMNOEXCEPT
  {
      return parent_type::equal(sv, null_able);
  }
@@ -1926,7 +1919,7 @@ bool sparse_vector<Val, BV>::equal(const sparse_vector<Val, BV>& sv,
  
  template<class Val, class BV>
  typename sparse_vector<Val, BV>::const_iterator
-sparse_vector<Val, BV>::begin() const
+sparse_vector<Val, BV>::begin() const BMNOEXCEPT
  {
      typedef typename sparse_vector<Val, BV>::const_iterator it_type;
      return it_type(this);
@@ -1936,7 +1929,7 @@ sparse_vector<Val, BV>::begin() const
  
  template<class Val, class BV>
  void sparse_vector<Val, BV>::set_allocator_pool(
-    typename sparse_vector<Val, BV>::allocator_pool_type* pool_ptr)
+    typename sparse_vector<Val, BV>::allocator_pool_type* pool_ptr) BMNOEXCEPT
  {
      this->bmatr_.set_allocator_pool(pool_ptr);
  }
@@ -1948,7 +1941,7 @@ void sparse_vector<Val, BV>::set_allocator_pool(
  
  
  template<class Val, class BV>
-sparse_vector<Val, BV>::const_iterator::const_iterator()
+sparse_vector<Val, BV>::const_iterator::const_iterator() BMNOEXCEPT
  : sv_(0), pos_(bm::id_max), buf_ptr_(0)
  {}
  
@@ -1956,7 +1949,7 @@ sparse_vector<Val, BV>::const_iterator::const_iterator()
  
  template<class Val, class BV>
  sparse_vector<Val, BV>::const_iterator::const_iterator(
-                        const typename sparse_vector<Val, BV>::const_iterator& it)
+    const typename sparse_vector<Val, BV>::const_iterator& it) BMNOEXCEPT
  : sv_(it.sv_), pos_(it.pos_), buf_ptr_(0)
  {}
  
@@ -1964,7 +1957,8 @@ sparse_vector<Val, BV>::const_iterator::const_iterator(
  
  template<class Val, class BV>
  sparse_vector<Val, BV>::const_iterator::const_iterator(
-  const typename sparse_vector<Val, BV>::const_iterator::sparse_vector_type* sv)
+  const typename sparse_vector<Val, BV>::const_iterator::sparse_vector_type* sv
+  ) BMNOEXCEPT
  : sv_(sv), buf_ptr_(0)
  {
      BM_ASSERT(sv_);
@@ -1976,7 +1970,7 @@ sparse_vector<Val, BV>::const_iterator::const_iterator(
  template<class Val, class BV>
  sparse_vector<Val, BV>::const_iterator::const_iterator(
   const typename sparse_vector<Val, BV>::const_iterator::sparse_vector_type* sv,
- typename sparse_vector<Val, BV>::size_type pos)
+ typename sparse_vector<Val, BV>::size_type pos) BMNOEXCEPT
  : sv_(sv), buf_ptr_(0)
  {
      BM_ASSERT(sv_);
@@ -1986,7 +1980,7 @@ sparse_vector<Val, BV>::const_iterator::const_iterator(
  //---------------------------------------------------------------------
  
  template<class Val, class BV>
-void sparse_vector<Val, BV>::const_iterator::go_to(size_type pos)
+void sparse_vector<Val, BV>::const_iterator::go_to(size_type pos) BMNOEXCEPT
  {
      pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos;
      buf_ptr_ = 0;
@@ -1995,22 +1989,23 @@ void sparse_vector<Val, BV>::const_iterator::go_to(size_type pos)
  //---------------------------------------------------------------------
  
  template<class Val, class BV>
-void sparse_vector<Val, BV>::const_iterator::advance()
+bool sparse_vector<Val, BV>::const_iterator::advance() BMNOEXCEPT
  {
      if (pos_ == bm::id_max) // nothing to do, we are at the end
-        return;
+        return false;
      ++pos_;
      if (pos_ >= sv_->size())
+    {
          this->invalidate();
-    else
+        return false;
+    }
+    if (buf_ptr_)
      {
-        if (buf_ptr_)
-        {
-            ++buf_ptr_;
-            if (buf_ptr_ - ((value_type*)buffer_.data()) >= n_buf_size)
-                buf_ptr_ = 0;
-        }
+        ++buf_ptr_;
+        if (buf_ptr_ - ((value_type*)buffer_.data()) >= n_buf_size)
+            buf_ptr_ = 0;
      }
+    return true;
  }
  
  //---------------------------------------------------------------------
@@ -2026,7 +2021,7 @@ sparse_vector<Val, BV>::const_iterator::value() const
      {
          buffer_.reserve(n_buf_size * sizeof(value_type));
          buf_ptr_ = (value_type*)(buffer_.data());
-        sv_->extract(buf_ptr_, n_buf_size, pos_, true, &pool_);
+        sv_->extract(buf_ptr_, n_buf_size, pos_, true);
      }
      v = *buf_ptr_;
      return v;
@@ -2035,7 +2030,7 @@ sparse_vector<Val, BV>::const_iterator::value() const
  //---------------------------------------------------------------------
  
  template<class Val, class BV>
-void sparse_vector<Val, BV>::const_iterator::skip_zero_values()
+void sparse_vector<Val, BV>::const_iterator::skip_zero_values() BMNOEXCEPT
  {
      value_type v = value();
      if (buf_ptr_)
@@ -2063,7 +2058,7 @@ void sparse_vector<Val, BV>::const_iterator::skip_zero_values()
  //---------------------------------------------------------------------
  
  template<class Val, class BV>
-bool sparse_vector<Val, BV>::const_iterator::is_null() const
+bool sparse_vector<Val, BV>::const_iterator::is_null() const BMNOEXCEPT
  {
      return sv_->is_null(pos_);
  }
diff --git a/c++/include/util/bitset/bmsparsevec_algo.h b/c++/include/util/bitset/bmsparsevec_algo.h

index 70269d15486eeaeb2a4d30022001b89fd54b9d4a..2be5afecd38e9ba4bed65deb64da244ac4279565 100644 (file)
--- a/c++/include/util/bitset/bmsparsevec_algo.h
+++ b/c++/include/util/bitset/bmsparsevec_algo.h
@@ -18,7 +18,7 @@ limitations under the License.
  For more information please visit:  http://bitmagic.io
  */
  /*! \file bmsparsevec_algo.h
-    \brief Algorithms for sparse_vector<>
+    \brief Algorithms for bm::sparse_vector
  */
  
  #ifndef BM__H__INCLUDED__
@@ -31,6 +31,7 @@ For more information please visit:  http://bitmagic.io
  #include "bmsparsevec.h"
  #include "bmaggregator.h"
  #include "bmbuffer.h"
+#include "bmalgo.h"
  #include "bmdef.h"
  
  #ifdef _MSC_VER
@@ -231,7 +232,7 @@ bool sparse_vector_find_first_mismatch(const SV& sv1,
          } // null_proc
      }
  
-    for (unsigned i = 0; mismatch & (i < plains1); ++i)
+    for (unsigned i = 0; mismatch && (i < plains1); ++i)
      {
          typename SV::bvector_type_const_ptr bv1 = sv1.get_plain(i);
          typename SV::bvector_type_const_ptr bv2 = sv2.get_plain(i);
@@ -503,7 +504,7 @@ public:
      /**
          \brief reset sparse vector binding
      */
-    void reset_binding();
+    void reset_binding() BMNOEXCEPT;
  
      /**
          \brief find all sparse vector elements EQ to search value
@@ -709,7 +710,7 @@ protected:
      int compare_str(const SV& sv, size_type idx, const value_type* str);
  
      /// compare sv[idx] with input value
-    int compare(const SV& sv, size_type idx, const value_type val);
+    int compare(const SV& sv, size_type idx, const value_type val) BMNOEXCEPT;
  
  protected:
      sparse_vector_scanner(const sparse_vector_scanner&) = delete;
@@ -1047,7 +1048,7 @@ void set2set_11_transform<SV>::remap(const bvector_type&        bv_in,
              {
                  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
                  bv_out.set(&gb_->buffer_[0], buf_cnt, BM_SORTED);
-                buf_cnt ^= buf_cnt;
+                buf_cnt = 0;
              }
              nb_old = nb;
              gb_->gather_idx_[buf_cnt++] = idx;
@@ -1061,7 +1062,7 @@ void set2set_11_transform<SV>::remap(const bvector_type&        bv_in,
          {
              sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
              bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
-            buf_cnt ^= buf_cnt;
+            buf_cnt = 0;
          }
      } // for en
      if (buf_cnt)
@@ -1157,7 +1158,7 @@ void sparse_vector_scanner<SV>::bind(const SV&  sv, bool sorted)
  //----------------------------------------------------------------------------
  
  template<typename SV>
-void sparse_vector_scanner<SV>::reset_binding()
+void sparse_vector_scanner<SV>::reset_binding() BMNOEXCEPT
  {
      bound_sv_ = 0;
      effective_str_max_ = 0;
@@ -2020,7 +2021,7 @@ int sparse_vector_scanner<SV>::compare_str(const SV& sv,
  template<typename SV>
  int sparse_vector_scanner<SV>::compare(const SV& sv,
                                         size_type idx,
-                                       const value_type val)
+                                       const value_type val) BMNOEXCEPT
  {
      // TODO: implement sentinel elements cache (similar to compare_str())
      return sv.compare(idx, val);
diff --git a/c++/include/util/bitset/bmsparsevec_compr.h b/c++/include/util/bitset/bmsparsevec_compr.h

index e0930820213e1f35a1a758d2614481806cfac373..ecc791cc80843c3f8ff0c4d4c229435835627e18 100644 (file)
--- a/c++/include/util/bitset/bmsparsevec_compr.h
+++ b/c++/include/util/bitset/bmsparsevec_compr.h
@@ -98,18 +98,120 @@ public:
      class reference
      {
      public:
-        reference(rsc_sparse_vector<Val, SV>& csv, size_type idx) BMNOEXEPT
+        reference(rsc_sparse_vector<Val, SV>& csv, size_type idx) BMNOEXCEPT
          : csv_(csv), idx_(idx)
          {}
-        operator value_type() const { return csv_.get(idx_); }
-        bool operator==(const reference& ref) const
+        operator value_type() const BMNOEXCEPT { return csv_.get(idx_); }
+        bool operator==(const reference& ref) const BMNOEXCEPT
                                  { return bool(*this) == bool(ref); }
-        bool is_null() const { return csv_.is_null(idx_); }
+        bool is_null() const BMNOEXCEPT { return csv_.is_null(idx_); }
      private:
          rsc_sparse_vector<Val, SV>& csv_;
          size_type                   idx_;
      };
  
+    /**
+        Const iterator to traverse the rsc sparse vector.
+
+        Implementation uses buffer for decoding so, competing changes
+        to the original vector may not match the iterator returned values.
+
+        This iterator keeps an operational buffer, memory footprint is not
+        negligable
+
+        @ingroup sv
+    */
+    class const_iterator
+    {
+    public:
+    friend class rsc_sparse_vector;
+
+#ifndef BM_NO_STL
+        typedef std::input_iterator_tag  iterator_category;
+#endif
+        typedef rsc_sparse_vector<Val, SV>           rsc_sparse_vector_type;
+        typedef rsc_sparse_vector_type*              rsc_sparse_vector_type_ptr;
+        typedef typename rsc_sparse_vector_type::value_type    value_type;
+        typedef typename rsc_sparse_vector_type::size_type     size_type;
+        typedef typename rsc_sparse_vector_type::bvector_type  bvector_type;
+        typedef typename bvector_type::allocator_type          allocator_type;
+        typedef typename
+        bvector_type::allocator_type::allocator_pool_type allocator_pool_type;
+        typedef bm::byte_buffer<allocator_type>            buffer_type;
+
+        typedef unsigned                    difference_type;
+        typedef unsigned*                   pointer;
+        typedef value_type&                 reference;
+
+    public:
+        const_iterator() BMNOEXCEPT;
+        const_iterator(const rsc_sparse_vector_type* csv) BMNOEXCEPT;
+        const_iterator(const rsc_sparse_vector_type* csv, size_type pos) BMNOEXCEPT;
+        const_iterator(const const_iterator& it) BMNOEXCEPT;
+
+        bool operator==(const const_iterator& it) const BMNOEXCEPT
+                                { return (pos_ == it.pos_) && (csv_ == it.csv_); }
+        bool operator!=(const const_iterator& it) const BMNOEXCEPT
+                                { return ! operator==(it); }
+        bool operator < (const const_iterator& it) const BMNOEXCEPT
+                                { return pos_ < it.pos_; }
+        bool operator <= (const const_iterator& it) const BMNOEXCEPT
+                                { return pos_ <= it.pos_; }
+        bool operator > (const const_iterator& it) const BMNOEXCEPT
+                                { return pos_ > it.pos_; }
+        bool operator >= (const const_iterator& it) const BMNOEXCEPT
+                                { return pos_ >= it.pos_; }
+
+        /// \brief Get current position (value)
+        value_type operator*() const { return this->value(); }
+
+
+        /// \brief Advance to the next available value
+        const_iterator& operator++() BMNOEXCEPT { this->advance(); return *this; }
+
+        /// \brief Advance to the next available value
+        const_iterator& operator++(int)
+            { const_iterator tmp(*this);this->advance(); return tmp; }
+
+
+        /// \brief Get current position (value)
+        value_type value() const;
+
+        /// \brief Get NULL status
+        bool is_null() const BMNOEXCEPT;
+
+        /// Returns true if iterator is at a valid position
+        bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
+
+        /// Invalidate current iterator
+        void invalidate() BMNOEXCEPT { pos_ = bm::id_max; }
+
+        /// Current position (index) in the vector
+        size_type pos() const BMNOEXCEPT{ return pos_; }
+
+        /// re-position to a specified position
+        void go_to(size_type pos) BMNOEXCEPT;
+
+        /// advance iterator forward by one
+        /// @return true if it is still valid
+        bool advance() BMNOEXCEPT;
+
+        void skip_zero_values() BMNOEXCEPT;
+    private:
+        enum buf_size_e
+        {
+            n_buf_size = 1024 * 8
+        };
+
+    private:
+        const rsc_sparse_vector_type*     csv_;     ///!< ptr to parent
+        size_type                         pos_;     ///!< Position
+        mutable buffer_type               vbuffer_; ///!< value buffer
+        mutable buffer_type               tbuffer_; ///!< temp buffer
+        mutable value_type*               buf_ptr_; ///!< position in the buffer
+    };
+
+
  
      /**
          Back insert iterator implements buffered insert, faster than generic
@@ -141,8 +243,8 @@ public:
          typedef void reference;
          
      public:
-        back_insert_iterator();
-        back_insert_iterator(rsc_sparse_vector_type* csv);
+        back_insert_iterator() BMNOEXCEPT;
+        back_insert_iterator(rsc_sparse_vector_type* csv) BMNOEXCEPT;
          
          back_insert_iterator& operator=(const back_insert_iterator& bi)
          {
@@ -154,7 +256,8 @@ public:
          ~back_insert_iterator();
          
          /** push value to the vector */
-        back_insert_iterator& operator=(value_type v) { this->add(v); return *this; }
+        back_insert_iterator& operator=(value_type v)
+            { this->add(v); return *this; }
          /** noop */
          back_insert_iterator& operator*() { return *this; }
          /** noop */
@@ -166,10 +269,10 @@ public:
          void add(value_type v);
          
          /** add NULL (no-value) to the container */
-        void add_null();
+        void add_null() BMNOEXCEPT;
          
          /** add a series of consequitve NULLs (no-value) to the container */
-        void add_null(size_type count);
+        void add_null(size_type count) BMNOEXCEPT;
          
          /** flush the accumulated buffer */
          void flush();
@@ -183,7 +286,8 @@ public:
          ///size_type add_value(value_type v);
          
          typedef rsc_sparse_vector_type::sparse_vector_type sparse_vector_type;
-        typedef typename sparse_vector_type::back_insert_iterator   sparse_vector_bi;
+        typedef
+        typename sparse_vector_type::back_insert_iterator  sparse_vector_bi;
      private:
          rsc_sparse_vector_type* csv_;      ///!< pointer on the parent vector
          sparse_vector_bi        sv_bi_;
@@ -192,12 +296,25 @@ public:
  public:
      // ------------------------------------------------------------
      /*! @name Construction and assignment  */
+
      //@{
  
      rsc_sparse_vector(bm::null_support null_able = bm::use_null,
                        allocation_policy_type ap = allocation_policy_type(),
                        size_type bv_max_size = bm::id_max,
                        const allocator_type&   alloc  = allocator_type());
+
+    /**
+        Contructor to pre-initialize the list of assigned (not NULL) elements.
+
+        If the list of not NULL elements is known upfront it can help to
+        pre-declare it, enable rank-select index and then use set function.
+        This scenario gives significant speed boost, comparing random assignment
+
+        @param bv_null - not NULL vector for the container
+    */
+    rsc_sparse_vector(const bvector_type& bv_null);
+
      ~rsc_sparse_vector();
      
      /*! copy-ctor */
@@ -205,7 +322,7 @@ public:
      
      
      /*! copy assignmment operator */
-    rsc_sparse_vector<Val,SV>& operator = (const rsc_sparse_vector<Val, SV>& csv)
+    rsc_sparse_vector<Val,SV>& operator=(const rsc_sparse_vector<Val, SV>& csv)
      {
          if (this != &csv)
          {
@@ -219,13 +336,13 @@ public:
          }
          return *this;
      }
-    
+
  #ifndef BM_NO_CXX11
      /*! move-ctor */
-    rsc_sparse_vector(rsc_sparse_vector<Val,SV>&& csv) BMNOEXEPT;
+    rsc_sparse_vector(rsc_sparse_vector<Val,SV>&& csv) BMNOEXCEPT;
  
      /*! move assignmment operator */
-    rsc_sparse_vector<Val,SV>& operator=(rsc_sparse_vector<Val,SV>&& csv) BMNOEXEPT
+    rsc_sparse_vector<Val,SV>& operator=(rsc_sparse_vector<Val,SV>&& csv) BMNOEXCEPT
      {
          if (this != &csv)
          {
@@ -249,7 +366,7 @@ public:
      /*! \brief return size of the vector
          \return size of sparse vector
      */
-    size_type size() const;
+    size_type size() const BMNOEXCEPT;
      
      /*! \brief return true if vector is empty
          \return true if empty
@@ -281,7 +398,7 @@ public:
          \param idx - element index
          \return value of the element
      */
-    value_type get(size_type idx) const;
+    value_type get(size_type idx) const BMNOEXCEPT;
      
      /*!
          \brief set specified element with bounds checking and automatic resize
@@ -301,6 +418,29 @@ public:
      */
      void set(size_type idx, value_type v);
      
+
+    /*!
+        \brief increment specified element by one
+        \param idx - element index
+    */
+    void inc(size_type idx);
+
+    /*!
+        \brief increment specified element by one
+        \param idx - element index
+        \param v - increment value
+    */
+    void inc(size_type idx, value_type v);
+
+    /*!
+        \brief increment specified element by one, element MUST be NOT NULL
+        Faster than just inc() if element is NULL - behavior is undefined
+        \param idx - element index
+        \param v - increment value
+        @sa inc
+    */
+    void inc_not_null(size_type idx, value_type v);
+
      /*!
          \brief set specified element to NULL
          RSC vector actually erases element when it is set to NULL (expensive).
@@ -309,37 +449,67 @@ public:
      void set_null(size_type idx);
  
  
-    
      /** \brief test if specified element is NULL
          \param idx - element index
          \return true if it is NULL false if it was assigned or container
          is not configured to support assignment flags
      */
-    bool is_null(size_type idx) const;
+    bool is_null(size_type idx) const BMNOEXCEPT;
      
      /**
          \brief Get bit-vector of assigned values (or NULL)
      */
-    const bvector_type* get_null_bvector() const;
+    const bvector_type* get_null_bvector() const BMNOEXCEPT;
  
      /**
          \brief find position of compressed element by its rank
          \param rank - rank  (virtual index in sparse vector)
          \param idx  - index (true position)
      */
-    bool find_rank(size_type rank, size_type& idx) const;
+    bool find_rank(size_type rank, size_type& idx) const BMNOEXCEPT;
  
      //@}
      
      // ------------------------------------------------------------
      /*! @name Export content to C-stype array                    */
      ///@{
-    
+
+    /**
+        \brief C-style decode
+        \param arr - decode target array (must be properly sized)
+        \param idx_from - start address to decode
+        \param size - number of elements to decode
+        \param zero_mem - flag if array needs to beset to zeros first
+
+        @return actual decoded size
+        @sa decode_buf
+     */
      size_type decode(value_type* arr,
                       size_type   idx_from,
                       size_type   size,
                       bool        zero_mem = true) const;
  
+
+    /**
+        \brief C-style decode (variant with external memory)
+         Analog of decode, but requires two arrays.
+         Faster than decode in many cases.
+
+        \param arr - decode target array (must be properly sized)
+        \param arr_buf_tmp - decode temp bufer (must be same size of arr)
+        \param idx_from - start address to decode
+        \param size - number of elements to decode
+        \param zero_mem - flag if array needs to beset to zeros first
+
+        @return actual decoded size
+        @sa decode
+     */
+    size_type decode_buf(value_type* arr,
+                         value_type* arr_buf_tmp,
+                         size_type   idx_from,
+                         size_type   size,
+                         bool        zero_mem = true) const BMNOEXCEPT;
+
      ///@}
  
      
@@ -367,7 +537,7 @@ public:
          \brief check if another vector has the same content
          \return true, if it is the same
      */
-    bool equal(const rsc_sparse_vector<Val, SV>& csv) const;
+    bool equal(const rsc_sparse_vector<Val, SV>& csv) const BMNOEXCEPT;
      //@}
  
  
@@ -395,6 +565,20 @@ public:
      /*! @name Iterator access */
      //@{
  
+    /** Provide const iterator access to container content  */
+    const_iterator begin() const BMNOEXCEPT
+        { return const_iterator(this); }
+        
+    /** Provide const iterator access to the end    */
+    const_iterator end() const BMNOEXCEPT
+        { return const_iterator(this, bm::id_max); }
+
+    /** Get const_itertor re-positioned to specific element
+    @param idx - position in the sparse vector
+    */
+    const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
+        { return const_iterator(this, idx); }
+
      back_insert_iterator get_back_inserter() { return back_insert_iterator(this); }
      ///@}
  
@@ -408,13 +592,14 @@ public:
          \param opt_mode - requested compression depth
          \param stat - memory allocation statistics after optimization
      */
-    void optimize(bm::word_t* temp_block = 0,
-                  typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
-                  statistics* stat = 0);
+    void optimize(
+        bm::word_t* temp_block = 0,
+        typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
+        statistics* stat = 0);
      
      /*! \brief resize to zero, free memory
      */
-    void clear() BMNOEXEPT;
+    void clear() BMNOEXCEPT;
      
      /*!
          @brief Calculates memory statistics.
@@ -427,7 +612,8 @@ public:
  
          @sa statistics
      */
-    void calc_stat(struct rsc_sparse_vector<Val, SV>::statistics* st) const;
+    void calc_stat(
+           struct rsc_sparse_vector<Val, SV>::statistics* st) const BMNOEXCEPT;
  
      ///@}
  
@@ -448,6 +634,14 @@ public:
      void copy_range(const rsc_sparse_vector<Val, SV>& csv,
          size_type left, size_type right);
  
+    /**
+        @brief merge two vectors (argument gets destroyed)
+        It is important that both vectors have the same NULL vectors
+        @param csv - [in,out] argumnet vector to merge
+                     (works like move so arg should not be used after the merge)
+     */
+    void merge_not_null(rsc_sparse_vector<Val, SV>& csv);
+
      ///@}
  
      // ------------------------------------------------------------
@@ -467,12 +661,12 @@ public:
      /*!
          \brief returns true if prefix sum table is in sync with the vector
      */
-    bool in_sync() const { return in_sync_; }
+    bool in_sync() const BMNOEXCEPT { return in_sync_; }
      
      /*!
          \brief Unsync the prefix sum table
      */
-    void unsync() { in_sync_ = false; }
+    void unsync() BMNOEXCEPT { in_sync_ = false; }
      ///@}
  
      // ------------------------------------------------------------
@@ -483,19 +677,23 @@ public:
          \brief get access to bit-plain, function checks and creates a plain
          \return bit-vector for the bit plain
      */
-    bvector_type_const_ptr get_plain(unsigned i) const { return sv_.get_plain(i); }
+    bvector_type_const_ptr get_plain(unsigned i) const BMNOEXCEPT
+        { return sv_.get_plain(i); }
  
-    bvector_type_ptr get_plain(unsigned i)  { return sv_.get_plain(i); }
+    bvector_type_ptr get_plain(unsigned i) BMNOEXCEPT
+        { return sv_.get_plain(i); }
      
      /*!
          Number of effective bit-plains in the value type
      */
-    unsigned effective_plains() const { return sv_.effective_plains(); }
+    unsigned effective_plains() const BMNOEXCEPT
+        { return sv_.effective_plains(); }
      
      /*!
          \brief get total number of bit-plains in the vector
      */
-    static unsigned plains() { return sparse_vector_type::plains(); }
+    static unsigned plains() BMNOEXCEPT
+        { return sparse_vector_type::plains(); }
  
      /** Number of stored bit-plains (value plains + extra */
      static unsigned stored_plains()
@@ -504,22 +702,23 @@ public:
      /*!
          \brief access dense vector
      */
-    const sparse_vector_type& get_sv() const { return sv_; }
+    const sparse_vector_type& get_sv() const BMNOEXCEPT { return sv_; }
  
      /*!
          \brief size of internal dense vector
      */
-    size_type effective_size() const { return sv_.size(); }
+    size_type effective_size() const BMNOEXCEPT { return sv_.size(); }
  
      /**
          \brief Always 1 (non-matrix type)
      */
-    size_type effective_vector_max() const { return 1; }
+    size_type effective_vector_max() const BMNOEXCEPT { return 1; }
  
      /*!
          get read-only access to inetrnal bit-matrix
      */
-    const bmatrix_type& get_bmatrix() const { return sv_.get_bmatrix(); }
+    const bmatrix_type& get_bmatrix() const BMNOEXCEPT
+        { return sv_.get_bmatrix(); }
  
      ///@}
      
@@ -537,26 +736,29 @@ protected:
       
          \return true if id is known and resolved successfully
      */
-    bool resolve(size_type idx, size_type* idx_to) const;
+    bool resolve(size_type idx, size_type* idx_to) const BMNOEXCEPT;
  
      bool resolve_range(size_type from, size_type to, 
-                       size_type* idx_from, size_type* idx_to) const;
+                       size_type* idx_from, size_type* idx_to) const BMNOEXCEPT;
      
      void resize_internal(size_type sz) { sv_.resize_internal(sz); }
-    size_type size_internal() const { return sv_.size(); }
+    size_type size_internal() const BMNOEXCEPT { return sv_.size(); }
  
-    bool is_remap() const { return false; }
-    size_t remap_size() const { return 0; }
-    const unsigned char* get_remap_buffer() const { return 0; }
-    unsigned char* init_remap_buffer() { return 0; }
-    void set_remap() { }
+    bool is_remap() const BMNOEXCEPT { return false; }
+    size_t remap_size() const BMNOEXCEPT { return 0; }
+    const unsigned char* get_remap_buffer() const BMNOEXCEPT { return 0; }
+    unsigned char* init_remap_buffer() BMNOEXCEPT { return 0; }
+    void set_remap() BMNOEXCEPT { }
      
      void push_back_no_check(size_type idx, value_type v);
  
  
  private:
-    void construct_bv_blocks();
-    void free_bv_blocks();
+
+    /// Allocate memory for RS index
+    void construct_rs_index();
+    /// Free rs-index
+    void free_rs_index();
  
  protected:
      template<class SVect> friend class sparse_vector_scanner;
@@ -580,13 +782,37 @@ rsc_sparse_vector<Val, SV>::rsc_sparse_vector(bm::null_support null_able,
                                                allocation_policy_type ap,
                                                size_type bv_max_size,
                                                const allocator_type&   alloc)
-: sv_(null_able, ap, bv_max_size, alloc),
-  in_sync_(false)
+: sv_(null_able, ap, bv_max_size, alloc), in_sync_(false)
  {
      BM_ASSERT(null_able == bm::use_null);
      BM_ASSERT(int(sv_value_plains) == int(SV::sv_value_plains));
      size_ = max_id_ = 0;
-    construct_bv_blocks();
+    construct_rs_index();
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+rsc_sparse_vector<Val, SV>::rsc_sparse_vector(const bvector_type& bv_null)
+: sv_(bm::use_null), in_sync_(false)
+{
+    construct_rs_index();
+    bvector_type* bv = sv_.get_null_bvect();
+    BM_ASSERT(bv);
+    *bv = bv_null;
+
+    bool found = bv->find_reverse(max_id_);
+    if (found)
+    {
+        size_ = max_id_ + 1;
+        size_type sz = bv->count();
+        sv_.resize(sz);
+    }
+    else
+    {
+        BM_ASSERT(!bv->any());
+        size_ = max_id_ = 0;
+    }
  }
  
  //---------------------------------------------------------------------
@@ -594,7 +820,7 @@ rsc_sparse_vector<Val, SV>::rsc_sparse_vector(bm::null_support null_able,
  template<class Val, class SV>
  rsc_sparse_vector<Val, SV>::~rsc_sparse_vector()
  {
-    free_bv_blocks();
+    free_rs_index();
  }
  
  //---------------------------------------------------------------------
@@ -602,24 +828,20 @@ rsc_sparse_vector<Val, SV>::~rsc_sparse_vector()
  template<class Val, class SV>
  rsc_sparse_vector<Val, SV>::rsc_sparse_vector(
                            const rsc_sparse_vector<Val, SV>& csv)
-: sv_(csv.sv_),
-  size_(csv.size_),
-  max_id_(csv.max_id_),
-  in_sync_(csv.in_sync_)
+: sv_(csv.sv_), size_(csv.size_), max_id_(csv.max_id_), in_sync_(csv.in_sync_)
  {
      BM_ASSERT(int(sv_value_plains) == int(SV::sv_value_plains));
      
-    construct_bv_blocks();
+    construct_rs_index();
      if (in_sync_)
-    {
          bv_blocks_ptr_->copy_from(*(csv.bv_blocks_ptr_));
-    }
  }
  
  //---------------------------------------------------------------------
  
  template<class Val, class SV>
-rsc_sparse_vector<Val, SV>::rsc_sparse_vector(rsc_sparse_vector<Val,SV>&& csv) BMNOEXEPT
+rsc_sparse_vector<Val, SV>::rsc_sparse_vector(
+                            rsc_sparse_vector<Val,SV>&& csv) BMNOEXCEPT
  : sv_(bm::use_null),
    size_(0),
    max_id_(0), in_sync_(false)
@@ -636,7 +858,7 @@ rsc_sparse_vector<Val, SV>::rsc_sparse_vector(rsc_sparse_vector<Val,SV>&& csv) B
  
  template<class Val, class SV>
  typename rsc_sparse_vector<Val, SV>::size_type
-rsc_sparse_vector<Val, SV>::size() const
+rsc_sparse_vector<Val, SV>::size() const BMNOEXCEPT
  {
      return size_;
  }
@@ -686,9 +908,93 @@ void rsc_sparse_vector<Val, SV>::set_null(size_type idx)
          size_type sv_idx = bv_null->count_range(0, idx);
          bv_null->clear_bit_no_check(idx);
          sv_.erase(--sv_idx);
+        in_sync_ = false;
      }
  }
  
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::inc(size_type idx)
+{
+    bvector_type* bv_null = sv_.get_null_bvect();
+    BM_ASSERT(bv_null);
+
+    size_type sv_idx;
+    bool found = bv_null->test(idx);
+
+    sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
+                      : bv_null->count_range(0, idx); // TODO: make test'n'count
+
+    if (found)
+    {
+        sv_.inc_no_null(--sv_idx);
+    }
+    else
+    {
+        sv_.insert_value_no_null(sv_idx, 1);
+        bv_null->set_bit_no_check(idx);
+
+        if (idx > max_id_)
+        {
+            max_id_ = idx;
+            size_ = max_id_ + 1;
+        }
+        in_sync_ = false;
+    }
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::inc(size_type idx, value_type v)
+{
+    bvector_type* bv_null = sv_.get_null_bvect();
+    BM_ASSERT(bv_null);
+
+    size_type sv_idx;
+    bool found = bv_null->test(idx);
+
+    sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
+                      : bv_null->count_range(0, idx); // TODO: make test'n'count
+
+    if (found)
+    {
+        sv_.inc_no_null(--sv_idx, v);
+    }
+    else
+    {
+        sv_.insert_value_no_null(sv_idx, v);
+        bv_null->set_bit_no_check(idx);
+
+        if (idx > max_id_)
+        {
+            max_id_ = idx;
+            size_ = max_id_ + 1;
+        }
+        in_sync_ = false;
+    }
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::inc_not_null(size_type idx, value_type v)
+{
+    bvector_type* bv_null = sv_.get_null_bvect();
+    BM_ASSERT(bv_null->test(idx)); // idx must be NOT NULL
+
+    size_type sv_idx;
+    sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
+                      : bv_null->count_range(0, idx); // TODO: make test'n'count
+    --sv_idx;
+    if (v == 1)
+        sv_.inc_no_null(sv_idx);
+    else
+        sv_.inc_no_null(sv_idx, v);
+}
+
+
  //---------------------------------------------------------------------
  
  template<class Val, class SV>
@@ -696,15 +1002,15 @@ void rsc_sparse_vector<Val, SV>::set(size_type idx, value_type v)
  {
      bvector_type* bv_null = sv_.get_null_bvect();
      BM_ASSERT(bv_null);
-    
+
+    size_type sv_idx;
      bool found = bv_null->test(idx);
-    size_type sv_idx = bv_null->count_range(0, idx); // TODO: make test'n'count
-//    size_type sv_idx;
-//    bool found = resolve(idx, &sv_idx);
+
+    sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
+                      : bv_null->count_range(0, idx); // TODO: make test'n'count
  
      if (found)
      {
-        //sv_.set(--sv_idx, v);
          sv_.set_value_no_null(--sv_idx, v);
      }
      else
@@ -725,7 +1031,7 @@ void rsc_sparse_vector<Val, SV>::set(size_type idx, value_type v)
  
  template<class Val, class SV>
  bool rsc_sparse_vector<Val, SV>::equal(
-                    const rsc_sparse_vector<Val, SV>& csv) const
+                    const rsc_sparse_vector<Val, SV>& csv) const BMNOEXCEPT
  {
      if (this == &csv)
          return true;
@@ -739,7 +1045,7 @@ bool rsc_sparse_vector<Val, SV>::equal(
  
  template<class Val, class SV>
  void rsc_sparse_vector<Val, SV>::load_from(
-                                    const sparse_vector_type& sv_src)
+                                        const sparse_vector_type& sv_src)
  {
      max_id_ = size_ = 0;
  
@@ -837,10 +1143,10 @@ void rsc_sparse_vector<Val, SV>::sync(bool force)
  //---------------------------------------------------------------------
  
  template<class Val, class SV>
-bool rsc_sparse_vector<Val, SV>::resolve(size_type idx, size_type* idx_to) const
+bool rsc_sparse_vector<Val, SV>::resolve(size_type idx,
+                                         size_type* idx_to) const BMNOEXCEPT
  {
      BM_ASSERT(idx_to);
-    
      const bvector_type* bv_null = sv_.get_null_bvector();
      if (in_sync_)
      {
@@ -849,23 +1155,17 @@ bool rsc_sparse_vector<Val, SV>::resolve(size_type idx, size_type* idx_to) const
      else  // slow access
      {
          bool found = bv_null->test(idx);
-        if (!found)
-        {
-            *idx_to = 0;
-        }
-        else
-        {
-            *idx_to = bv_null->count_range(0, idx);
-        }
+        *idx_to = found ? bv_null->count_range(0, idx) : 0;
      }
      return bool(*idx_to);
  }
+
  //---------------------------------------------------------------------
  
  template<class Val, class SV>
  bool rsc_sparse_vector<Val, SV>::resolve_range(
      size_type from, size_type to,
-    size_type* idx_from, size_type* idx_to) const
+    size_type* idx_from, size_type* idx_to) const BMNOEXCEPT
  {
      BM_ASSERT(idx_to && idx_from);
      const bvector_type* bv_null = sv_.get_null_bvector();
@@ -876,12 +1176,15 @@ bool rsc_sparse_vector<Val, SV>::resolve_range(
          copy_sz = bv_null->count_range(from, to);
      if (!copy_sz)
          return false;
+
      if (in_sync_)
-        sv_left = bv_null->count_range(0, from, *bv_blocks_ptr_);
+        sv_left = bv_null->rank_corrected(from, *bv_blocks_ptr_);
      else
+    {
          sv_left = bv_null->count_range(0, from);
-    bool tl = bv_null->test(from); // TODO: add count and test
-    sv_left -= tl; // rank correction
+        bool tl = bv_null->test(from); // TODO: add count and test
+        sv_left -= tl; // rank correction
+    }
  
      *idx_from = sv_left; *idx_to = sv_left + copy_sz - 1;
      return true;
@@ -910,7 +1213,7 @@ rsc_sparse_vector<Val, SV>::at(size_type idx) const
  
  template<class Val, class SV>
  typename rsc_sparse_vector<Val, SV>::value_type
-rsc_sparse_vector<Val, SV>::get(size_type idx) const
+rsc_sparse_vector<Val, SV>::get(size_type idx) const BMNOEXCEPT
  {
      size_type sv_idx;
      bool found = resolve(idx, &sv_idx);
@@ -923,7 +1226,7 @@ rsc_sparse_vector<Val, SV>::get(size_type idx) const
  //---------------------------------------------------------------------
  
  template<class Val, class SV>
-bool rsc_sparse_vector<Val, SV>::is_null(size_type idx) const
+bool rsc_sparse_vector<Val, SV>::is_null(size_type idx) const BMNOEXCEPT
  {
      const bvector_type* bv_null = sv_.get_null_bvector();
      BM_ASSERT(bv_null);
@@ -950,7 +1253,7 @@ void rsc_sparse_vector<Val, SV>::optimize(bm::word_t*  temp_block,
  //---------------------------------------------------------------------
  
  template<class Val, class SV>
-void rsc_sparse_vector<Val, SV>::clear() BMNOEXEPT
+void rsc_sparse_vector<Val, SV>::clear() BMNOEXCEPT
  {
      sv_.clear();
      in_sync_ = false;  max_id_ = size_ = 0;
@@ -960,7 +1263,7 @@ void rsc_sparse_vector<Val, SV>::clear() BMNOEXEPT
  
  template<class Val, class SV>
  void rsc_sparse_vector<Val, SV>::calc_stat(
-            struct rsc_sparse_vector<Val, SV>::statistics* st) const
+            struct rsc_sparse_vector<Val, SV>::statistics* st) const BMNOEXCEPT
  {
      BM_ASSERT(st);
      sv_.calc_stat((typename sparse_vector_type::statistics*)st);
@@ -977,7 +1280,7 @@ void rsc_sparse_vector<Val, SV>::calc_stat(
  
  template<class Val, class SV>
  const typename rsc_sparse_vector<Val, SV>::bvector_type*
-rsc_sparse_vector<Val, SV>::get_null_bvector() const
+rsc_sparse_vector<Val, SV>::get_null_bvector() const BMNOEXCEPT
  {
      return sv_.get_null_bvector();
  }
@@ -986,7 +1289,8 @@ rsc_sparse_vector<Val, SV>::get_null_bvector() const
  
  template<class Val, class SV>
  bool
-rsc_sparse_vector<Val, SV>::find_rank(size_type rank, size_type& idx) const
+rsc_sparse_vector<Val, SV>::find_rank(size_type rank,
+                                      size_type& idx) const BMNOEXCEPT
  {
      BM_ASSERT(rank);
      bool b;
@@ -1006,7 +1310,7 @@ typename rsc_sparse_vector<Val, SV>::size_type
  rsc_sparse_vector<Val, SV>::decode(value_type* arr,
                                     size_type   idx_from,
                                     size_type   size,
-                                   bool        /*zero_mem*/) const
+                                   bool        zero_mem) const
  {
      if (size == 0)
          return 0;
@@ -1020,51 +1324,104 @@ rsc_sparse_vector<Val, SV>::decode(value_type* arr,
      
      if ((bm::id_max - size) <= idx_from)
          size = bm::id_max - idx_from;
+    if ((idx_from + size) > this->size())
+        size = this->size() - idx_from;
  
      const bvector_type* bv_null = sv_.get_null_bvector();
+    size_type rank = bv_null->rank_corrected(idx_from, *bv_blocks_ptr_);
+
+    BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
  
-    size_type rank = bv_null->count_to(idx_from, *bv_blocks_ptr_);
-    bool b = bv_null->test(idx_from);
-    
      bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from);
-    size_type i = *en_i;
-    if (idx_from + size <= i)  // empty space (all zeros)
+    BM_ASSERT(en_i.valid());
+
+    if (zero_mem)
+        ::memset(arr, 0, sizeof(value_type)*size);
+
+    sparse_vector_const_iterator it = sv_.get_const_iterator(rank);
+    size_type i = 0;
+    if (it.valid())
      {
+        do
+        {
+            size_type en_idx = *en_i;
+            size_type delta = en_idx - idx_from;
+            idx_from += delta;
+            i += delta;
+            if (i >= size)
+                return size;
+            arr[i++] = it.value();
+            if (!en_i.advance())
+                break;
+            if (!it.advance())
+                break;
+            ++idx_from;
+        } while (i < size);
+    }
+    return i;
+}
+
+
+template<class Val, class SV>
+typename rsc_sparse_vector<Val, SV>::size_type
+rsc_sparse_vector<Val, SV>::decode_buf(value_type*     arr,
+                                       value_type*     arr_buf_tmp,
+                                       size_type       idx_from,
+                                       size_type       size,
+                                       bool            zero_mem) const BMNOEXCEPT
+{
+    if (!size || (idx_from >= this->size()))
+        return 0;
+
+    BM_ASSERT(arr && arr_buf_tmp);
+    BM_ASSERT(arr != arr_buf_tmp);
+    BM_ASSERT(in_sync_);  // call sync() before decoding
+    BM_ASSERT(bv_blocks_ptr_);
+
+    if ((bm::id_max - size) <= idx_from)
+        size = bm::id_max - idx_from;
+    if ((idx_from + size) > this->size())
+        size = this->size() - idx_from;
+
+    if (zero_mem)
          ::memset(arr, 0, sizeof(value_type)*size);
+
+    const bvector_type* bv_null = sv_.get_null_bvector();
+    size_type rank = bv_null->rank_corrected(idx_from, *bv_blocks_ptr_);
+
+    BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
+
+    bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from);
+    if (!en_i.valid())
          return size;
-    }
-    rank -= b;
-    sparse_vector_const_iterator it = sv_.get_const_iterator(rank);
-    i = 0;
-    while (it.valid())
+
+    size_type i = en_i.value();
+    if (idx_from + size <= i)  // empty space (all zeros)
+        return size;
+
+    size_type extract_cnt =
+        bv_null->count_range(idx_from, idx_from + size - 1, *bv_blocks_ptr_);
+
+    BM_ASSERT(extract_cnt <= this->size());
+    auto ex_sz = sv_.decode(arr_buf_tmp, rank, extract_cnt, true);
+    BM_ASSERT(ex_sz == extract_cnt); (void) ex_sz;
+
+    for (i = 0; i < extract_cnt; ++i)
      {
-        if (!en_i.valid())
-            break;
+        BM_ASSERT(en_i.valid());
          size_type en_idx = *en_i;
-        while (idx_from < en_idx) // zero the empty prefix
-        {
-            arr[i] ^= arr[i];
-            ++i; ++idx_from;
-            if (i == size)
-                return i;
-        }
-        BM_ASSERT(idx_from == en_idx);
-        arr[i] = *it;
-        ++i; ++idx_from;
-        if (i == size)
-            return i;
-        
+        arr[en_idx-idx_from] = arr_buf_tmp[i];
          en_i.advance();
-        it.advance();
-    } // while
-    
-    return i;
+    } // for i
+
+    return size;
  }
  
+
  //---------------------------------------------------------------------
  
  template<class Val, class SV>
-void rsc_sparse_vector<Val, SV>::construct_bv_blocks()
+void rsc_sparse_vector<Val, SV>::construct_rs_index()
  {
      if (bv_blocks_ptr_)
          return;
@@ -1076,7 +1433,7 @@ void rsc_sparse_vector<Val, SV>::construct_bv_blocks()
  //---------------------------------------------------------------------
  
  template<class Val, class SV>
-void rsc_sparse_vector<Val, SV>::free_bv_blocks()
+void rsc_sparse_vector<Val, SV>::free_rs_index()
  {
      if (bv_blocks_ptr_)
      {
@@ -1085,13 +1442,57 @@ void rsc_sparse_vector<Val, SV>::free_bv_blocks()
      }
  }
  
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::copy_range(
+                            const rsc_sparse_vector<Val, SV>& csv,
+                            size_type left, size_type right)
+{
+    if (left > right)
+        bm::xor_swap(left, right);
+
+    if (left >= csv.size())
+        return;
+
+    size_ = csv.size_; max_id_ = csv.max_id_;
+    in_sync_ = false;
+
+    const bvector_type* arg_bv_null = csv.sv_.get_null_bvector();
+    size_type sv_left, sv_right;
+    bool range_valid = csv.resolve_range(left, right, &sv_left, &sv_right);
+    if (!range_valid)
+    {
+        sv_.clear(); sv_.resize(size_);
+        bvector_type* bv_null = sv_.get_null_bvect();
+        bv_null->copy_range(*arg_bv_null, 0, right);
+        return;
+    }
+    bvector_type* bv_null = sv_.get_null_bvect();
+    bv_null->copy_range(*arg_bv_null, 0, right); // not NULL vector gets a full copy
+    sv_.copy_range(csv.sv_, sv_left, sv_right, bm::no_null); // don't copy NULL
+}
+
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::merge_not_null(rsc_sparse_vector<Val, SV>& csv)
+{
+    // MUST have the same NULL to work
+    BM_ASSERT(sv_.get_null_bvector()->equal(*csv.sv_.get_null_bvector()));
+
+    sv_.merge(csv.sv_);
+}
+
+
  //---------------------------------------------------------------------
  //
  //---------------------------------------------------------------------
  
  
  template<class Val, class SV>
-rsc_sparse_vector<Val, SV>::back_insert_iterator::back_insert_iterator()
+rsc_sparse_vector<Val, SV>::back_insert_iterator::back_insert_iterator() BMNOEXCEPT
  : csv_(0)
  {}
  
@@ -1100,7 +1501,7 @@ rsc_sparse_vector<Val, SV>::back_insert_iterator::back_insert_iterator()
  
  template<class Val, class SV>
  rsc_sparse_vector<Val, SV>::back_insert_iterator::back_insert_iterator
-                                                 (rsc_sparse_vector_type* csv)
+                                (rsc_sparse_vector_type* csv) BMNOEXCEPT
  {
      csv_ = csv;
      sv_bi_ = csv->sv_.get_back_inserter();
@@ -1134,7 +1535,7 @@ void rsc_sparse_vector<Val, SV>::back_insert_iterator::add(
  //---------------------------------------------------------------------
  
  template<class Val, class SV>
-void rsc_sparse_vector<Val, SV>::back_insert_iterator::add_null()
+void rsc_sparse_vector<Val, SV>::back_insert_iterator::add_null() BMNOEXCEPT
  {
      BM_ASSERT(csv_);
      csv_->max_id_++;
@@ -1145,7 +1546,7 @@ void rsc_sparse_vector<Val, SV>::back_insert_iterator::add_null()
  
  template<class Val, class SV>
  void rsc_sparse_vector<Val, SV>::back_insert_iterator::add_null(
-                rsc_sparse_vector<Val, SV>::back_insert_iterator::size_type count)
+    rsc_sparse_vector<Val, SV>::back_insert_iterator::size_type count) BMNOEXCEPT
  {
      BM_ASSERT(csv_);
      csv_->max_id_+=count;
@@ -1161,39 +1562,140 @@ void rsc_sparse_vector<Val, SV>::back_insert_iterator::flush()
      csv_->in_sync_ = false;
  }
  
+//---------------------------------------------------------------------
+//
+//---------------------------------------------------------------------
+
+template<class Val, class BV>
+rsc_sparse_vector<Val, BV>::const_iterator::const_iterator() BMNOEXCEPT
+: csv_(0), pos_(bm::id_max), buf_ptr_(0)
+{}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+rsc_sparse_vector<Val, SV>::const_iterator::const_iterator(
+    const typename rsc_sparse_vector<Val, SV>::const_iterator& it) BMNOEXCEPT
+: csv_(it.csv_), pos_(it.pos_), buf_ptr_(0)
+{}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+rsc_sparse_vector<Val, SV>::const_iterator::const_iterator(
+  const typename rsc_sparse_vector<Val, SV>::const_iterator::rsc_sparse_vector_type* csv
+  ) BMNOEXCEPT
+: csv_(csv), buf_ptr_(0)
+{
+    BM_ASSERT(csv_);
+    pos_ = csv_->empty() ? bm::id_max : 0u;
+}
+
  //---------------------------------------------------------------------
  
  template<class Val, class SV>
-void rsc_sparse_vector<Val, SV>::copy_range(
-                            const rsc_sparse_vector<Val, SV>& csv,
-                            size_type left, size_type right)
+rsc_sparse_vector<Val, SV>::const_iterator::const_iterator(
+ const typename rsc_sparse_vector<Val, SV>::const_iterator::rsc_sparse_vector_type* csv,
+ typename rsc_sparse_vector<Val, SV>::size_type pos) BMNOEXCEPT
+: csv_(csv), buf_ptr_(0)
  {
-    if (left > right)
-        bm::xor_swap(left, right);
+    BM_ASSERT(csv_);
+    this->go_to(pos);
+}
  
-    if (left >= csv.size())
-        return;
-    
-    size_ = csv.size_; max_id_ = csv.max_id_;
-    in_sync_ = false;
+//---------------------------------------------------------------------
  
-    const bvector_type* arg_bv_null = csv.sv_.get_null_bvector();
-    size_type sv_left, sv_right;
-    bool range_valid = csv.resolve_range(left, right, &sv_left, &sv_right);
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::const_iterator::go_to(size_type pos) BMNOEXCEPT
+{
+    pos_ = (!csv_ || pos >= csv_->size()) ? bm::id_max : pos;
+    buf_ptr_ = 0;
+}
  
-    if (!range_valid)
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+bool rsc_sparse_vector<Val, SV>::const_iterator::advance() BMNOEXCEPT
+{
+    if (pos_ == bm::id_max) // nothing to do, we are at the end
+        return false;
+    ++pos_;
+    if (pos_ >= csv_->size())
      {
-        sv_.clear();
-        sv_.resize(size_);
-        bvector_type* bv_null = sv_.get_null_bvect();
-        bv_null->copy_range(*arg_bv_null, 0, right);
-        return;
+        this->invalidate();
+        return false;
+    }
+    if (buf_ptr_)
+    {
+        ++buf_ptr_;
+        if (buf_ptr_ - ((value_type*)vbuffer_.data()) >= n_buf_size)
+            buf_ptr_ = 0;
+    }
+    return true;
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+typename rsc_sparse_vector<Val, SV>::const_iterator::value_type
+rsc_sparse_vector<Val, SV>::const_iterator::value() const
+{
+    BM_ASSERT(this->valid());
+    value_type v;
+
+    if (!buf_ptr_)
+    {
+        vbuffer_.reserve(n_buf_size * sizeof(value_type));
+        tbuffer_.reserve(n_buf_size * sizeof(value_type));
+        buf_ptr_ = (value_type*)(vbuffer_.data());
+        value_type* tmp_buf_ptr = (value_type*) (tbuffer_.data());
+
+        csv_->decode_buf(buf_ptr_, tmp_buf_ptr, pos_, n_buf_size, true);
+    }
+    v = *buf_ptr_;
+    return v;
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::const_iterator::skip_zero_values() BMNOEXCEPT
+{
+    value_type v = value();
+    if (buf_ptr_)
+    {
+        v = *buf_ptr_;
+        value_type* buf_end = ((value_type*)vbuffer_.data()) + n_buf_size;
+        while(!v)
+        {
+            ++pos_;
+            if (++buf_ptr_ < buf_end)
+                v = *buf_ptr_;
+            else
+                break;
+        }
+        if (pos_ >= csv_->size())
+        {
+            pos_ = bm::id_max;
+            return;
+        }
+        if (buf_ptr_ >= buf_end)
+            buf_ptr_ = 0;
      }
-    bvector_type* bv_null = sv_.get_null_bvect();
-    bv_null->copy_range(*arg_bv_null, 0, right); // not NULL vector gets a full copy
-    sv_.copy_range(csv.sv_, sv_left, sv_right, bm::no_null); // don't copy NULL
  }
  
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+bool rsc_sparse_vector<Val, SV>::const_iterator::is_null() const BMNOEXCEPT
+{
+    return csv_->is_null(pos_);
+}
+
+
+//---------------------------------------------------------------------
+
+
  
  } // namespace bm
  
diff --git a/c++/include/util/bitset/bmsparsevec_serial.h b/c++/include/util/bitset/bmsparsevec_serial.h

index 95fc3af2d7ce639d5c681b3426658e1230b285dc..958b44e95005500d691cc603d1aa733c781aa19c 100644 (file)
--- a/c++/include/util/bitset/bmsparsevec_serial.h
+++ b/c++/include/util/bitset/bmsparsevec_serial.h
@@ -927,7 +927,8 @@ unsigned sparse_vector_deserializer<SV>::load_header(
  
      BM_ASSERT(h1 == 'B' && (h2 == 'M' || h2 == 'C'));
  
-    if (h1 != 'B' && (h2 != 'M' || h2 != 'C'))  // no magic header?
+    bool sig2_ok = (h2 == 'M' || h2 == 'C');
+    if (h1 != 'B' || !sig2_ok) //&& (h2 != 'M' || h2 != 'C'))  // no magic header?
          raise_invalid_header();
  
      unsigned char bv_bo = dec.get_8(); (void) bv_bo;
diff --git a/c++/include/util/bitset/bmsparsevec_util.h b/c++/include/util/bitset/bmsparsevec_util.h

index 7cec62559aa2c93395c742b05c2671185e5882d5..f61143a51dd6247f1bd85c7e560a7e70a1e8eea7 100644 (file)
--- a/c++/include/util/bitset/bmsparsevec_util.h
+++ b/c++/include/util/bitset/bmsparsevec_util.h
@@ -70,7 +70,7 @@ public:
      /*!
          \brief Move content from the argument address resolver
      */
-    void move_from(bvps_addr_resolver& addr_res) BMNOEXEPT;
+    void move_from(bvps_addr_resolver& addr_res) BMNOEXCEPT;
      
      /*!
          \brief Resolve id to integer id (address)
@@ -82,7 +82,7 @@ public:
       
          \return true if id is known and resolved successfully
      */
-    bool resolve(size_type id_from, size_type* id_to) const;
+    bool resolve(size_type id_from, size_type* id_to) const BMNOEXCEPT;
      
      /*!
          \brief Resolve id to integer id (address) without sync check
@@ -94,7 +94,7 @@ public:
       
          \return true if id is known and resolved successfully
      */
-    bool get(size_type id_from, size_type* id_to) const;
+    bool get(size_type id_from, size_type* id_to) const BMNOEXCEPT;
      
      /*!
          \brief Set id (bit) to address resolver
@@ -146,7 +146,7 @@ public:
      /*!
          \brief equality comparison
      */
-    bool equal(const bvps_addr_resolver& addr_res) const;
+    bool equal(const bvps_addr_resolver& addr_res) const BMNOEXCEPT;
      
  protected:
      void construct_rs_index();
@@ -437,7 +437,7 @@ bvps_addr_resolver<BV>::bvps_addr_resolver(const bvps_addr_resolver& addr_res)
  
  
  template<class BV>
-void bvps_addr_resolver<BV>::move_from(bvps_addr_resolver& addr_res) BMNOEXEPT
+void bvps_addr_resolver<BV>::move_from(bvps_addr_resolver& addr_res) BMNOEXCEPT
  {
      if (this != &addr_res)
      {
@@ -459,7 +459,8 @@ void bvps_addr_resolver<BV>::move_from(bvps_addr_resolver& addr_res) BMNOEXEPT
  //---------------------------------------------------------------------
  
  template<class BV>
-bool bvps_addr_resolver<BV>::resolve(size_type id_from, size_type* id_to) const
+bool bvps_addr_resolver<BV>::resolve(size_type id_from,
+                                    size_type* id_to) const BMNOEXCEPT
  {
      BM_ASSERT(id_to);
      if (in_sync_)
@@ -484,7 +485,8 @@ bool bvps_addr_resolver<BV>::resolve(size_type id_from, size_type* id_to) const
  //---------------------------------------------------------------------
  
  template<class BV>
-bool bvps_addr_resolver<BV>::get(size_type id_from, size_type* id_to) const
+bool bvps_addr_resolver<BV>::get(size_type id_from,
+                                 size_type* id_to) const BMNOEXCEPT
  {
      BM_ASSERT(id_to);
      BM_ASSERT(in_sync_);
@@ -529,10 +531,10 @@ void bvps_addr_resolver<BV>::optimize(bm::word_t* temp_block)
  //---------------------------------------------------------------------
  
  template<class BV>
-bool bvps_addr_resolver<BV>::equal(const bvps_addr_resolver& addr_res) const
+bool bvps_addr_resolver<BV>::equal(
+                    const bvps_addr_resolver& addr_res) const BMNOEXCEPT
  {
-    int cmp = addr_bv_.compare(addr_res.addr_bv_);
-    return (cmp == 0);
+    return addr_bv_.equal(addr_res.addr_bv_);
  }
  
  //---------------------------------------------------------------------
diff --git a/c++/include/util/bitset/bmsse2.h b/c++/include/util/bitset/bmsse2.h

index 6748e05fbd65f66d62b4611fd594e3d07ce420b8..09998d7394734ba9dd08be24cdfe9421ca7bbc18 100644 (file)
--- a/c++/include/util/bitset/bmsse2.h
+++ b/c++/include/util/bitset/bmsse2.h
@@ -398,6 +398,74 @@ unsigned sse2_gap_find(const bm::gap_word_t* BMRESTRICT pbuf, const bm::gap_word
      }
      return size;
  }
+
+/**
+    Hybrid binary search, starts as binary, then switches to linear scan
+
+   \param buf - GAP buffer pointer.
+   \param pos - index of the element.
+   \param is_set - output. GAP value (0 or 1).
+   \return GAP index.
+
+    @ingroup SSE2
+*/
+inline
+unsigned sse2_gap_bfind(const unsigned short* BMRESTRICT buf,
+                         unsigned pos, unsigned* BMRESTRICT is_set)
+{
+    unsigned start = 1;
+    unsigned end = 1 + ((*buf) >> 3);
+    unsigned dsize = end - start;
+
+    if (dsize < 17)
+    {
+        start = bm::sse2_gap_find(buf+1, (bm::gap_word_t)pos, dsize);
+        *is_set = ((*buf) & 1) ^ (start & 1);
+        BM_ASSERT(buf[start+1] >= pos);
+        BM_ASSERT(buf[start] < pos || (start==0));
+
+        return start+1;
+    }
+    unsigned arr_end = end;
+    while (start != end)
+    {
+        unsigned curr = (start + end) >> 1;
+        if (buf[curr] < pos)
+            start = curr + 1;
+        else
+            end = curr;
+
+        unsigned size = end - start;
+        if (size < 16)
+        {
+            size += (end != arr_end);
+            unsigned idx =
+                bm::sse2_gap_find(buf + start, (bm::gap_word_t)pos, size);
+            start += idx;
+
+            BM_ASSERT(buf[start] >= pos);
+            BM_ASSERT(buf[start - 1] < pos || (start == 1));
+            break;
+        }
+    }
+
+    *is_set = ((*buf) & 1) ^ ((start-1) & 1);
+    return start;
+}
+
+/**
+    Hybrid binary search, starts as binary, then switches to scan
+    @ingroup SSE2
+*/
+inline
+unsigned sse2_gap_test(const unsigned short* BMRESTRICT buf, unsigned pos)
+{
+    unsigned is_set;
+    bm::sse2_gap_bfind(buf, pos, &is_set);
+    return is_set;
+}
+
+
  #ifdef __GNUG__
  #pragma GCC diagnostic pop
  #endif
@@ -460,6 +528,8 @@ unsigned sse2_gap_find(const bm::gap_word_t* BMRESTRICT pbuf, const bm::gap_word
  #define VECT_SET_BLOCK(dst, value) \
      sse2_set_block((__m128i*) dst, value)
  
+#define VECT_GAP_BFIND(buf, pos, is_set) \
+    sse2_gap_bfind(buf, pos, is_set)
  
  
  } // namespace
diff --git a/c++/include/util/bitset/bmsse4.h b/c++/include/util/bitset/bmsse4.h

index 0f2e03d91688332f4a72ce02523871b5c7e19f3b..d5362f15843208b06084cbf5ddf3eac4ae2587db 100644 (file)
--- a/c++/include/util/bitset/bmsse4.h
+++ b/c++/include/util/bitset/bmsse4.h
@@ -576,6 +576,17 @@ bool sse4_is_all_one(const __m128i* BMRESTRICT block)
      return true;
  }
  
+/*!
+    @brief check if SSE wave is all oxFFFF...FFF
+    @ingroup SSE4
+*/
+BMFORCEINLINE
+bool sse42_test_all_one_wave(const void* ptr)
+{
+    return _mm_test_all_ones(_mm_loadu_si128((__m128i*)ptr));
+}
+
+
  /*!
      @brief check if wave of pointers is all NULL
      @ingroup SSE4
@@ -973,12 +984,14 @@ bool sse42_bit_find_first(const __m128i* BMRESTRICT block,
  #endif
  
  /*!
-     SSE4.2 check for one to two (variable len) 128 bit SSE lines for gap search results (8 elements)
+     SSE4.2 check for one to two (variable len) 128 bit SSE lines
+     for gap search results (8 elements)
       @ingroup SSE4
       \internal
  */
  inline
-unsigned sse4_gap_find(const bm::gap_word_t* BMRESTRICT pbuf, const bm::gap_word_t pos, const unsigned size)
+unsigned sse4_gap_find(const bm::gap_word_t* BMRESTRICT pbuf,
+                       const bm::gap_word_t pos, const unsigned size)
  {
      BM_ASSERT(size <= 16);
      BM_ASSERT(size);
@@ -1031,6 +1044,74 @@ unsigned sse4_gap_find(const bm::gap_word_t* BMRESTRICT pbuf, const bm::gap_word
      return size - bc;
  }
  
+/**
+    Hybrid binary search, starts as binary, then switches to linear scan
+
+   \param buf - GAP buffer pointer.
+   \param pos - index of the element.
+   \param is_set - output. GAP value (0 or 1).
+   \return GAP index.
+
+    @ingroup SSE4
+*/
+inline
+unsigned sse42_gap_bfind(const unsigned short* BMRESTRICT buf,
+                         unsigned pos, unsigned* BMRESTRICT is_set)
+{
+    unsigned start = 1;
+    unsigned end = 1 + ((*buf) >> 3);
+    unsigned dsize = end - start;
+
+    if (dsize < 17)
+    {
+        start = bm::sse4_gap_find(buf+1, (bm::gap_word_t)pos, dsize);
+        *is_set = ((*buf) & 1) ^ (start & 1);
+        BM_ASSERT(buf[start+1] >= pos);
+        BM_ASSERT(buf[start] < pos || (start==0));
+
+        return start+1;
+    }
+    unsigned arr_end = end;
+    while (start != end)
+    {
+        unsigned curr = (start + end) >> 1;
+        if (buf[curr] < pos)
+            start = curr + 1;
+        else
+            end = curr;
+
+        unsigned size = end - start;
+        if (size < 16)
+        {
+            size += (end != arr_end);
+            unsigned idx =
+                bm::sse4_gap_find(buf + start, (bm::gap_word_t)pos, size);
+            start += idx;
+
+            BM_ASSERT(buf[start] >= pos);
+            BM_ASSERT(buf[start - 1] < pos || (start == 1));
+            break;
+        }
+    }
+
+    *is_set = ((*buf) & 1) ^ ((start-1) & 1);
+    return start;
+}
+
+/**
+    Hybrid binary search, starts as binary, then switches to scan
+    @ingroup SSE4
+*/
+inline
+unsigned sse42_gap_test(const unsigned short* BMRESTRICT buf, unsigned pos)
+{
+    unsigned is_set;
+    bm::sse42_gap_bfind(buf, pos, &is_set);
+    return is_set;
+}
+
+
+
  /**
      Experimental (test) function to do SIMD vector search (lower bound)
      in sorted, growing array
@@ -1751,6 +1832,8 @@ void sse42_bit_block_xor(bm::word_t*  target_block,
  #define VECT_BIT_BLOCK_XOR(t, src, src_xor, d) \
      sse42_bit_block_xor(t, src, src_xor, d)
  
+#define VECT_GAP_BFIND(buf, pos, is_set) \
+    sse42_gap_bfind(buf, pos, is_set)
  
  #ifdef __GNUG__
  #pragma GCC diagnostic pop
diff --git a/c++/include/util/bitset/bmsse_util.h b/c++/include/util/bitset/bmsse_util.h

index 92ad1300f5aab945d25987a51bdf300af6b65548..e25046247d4f57ae99ae836421d2525358a28972 100644 (file)
--- a/c++/include/util/bitset/bmsse_util.h
+++ b/c++/include/util/bitset/bmsse_util.h
@@ -823,9 +823,6 @@ void sse2_stream_block(__m128i* BMRESTRICT dst,
  inline 
  void sse2_invert_block(__m128i* dst)
  {
-    //__m128i mZ = _mm_setzero_si128();
-    //__m128i maskF = _mm_cmpeq_epi8(mZ, mZ); // 0xFF..
-
      __m128i maskF = _mm_set1_epi32(~0u);
      __m128i* BMRESTRICT dst_end =
          (__m128i*)((bm::word_t*)(dst) + bm::set_block_size);
diff --git a/c++/include/util/bitset/bmstrsparsevec.h b/c++/include/util/bitset/bmstrsparsevec.h

index 82ef9e2a09b770656d80a0376dd89eb07956e84f..a2a0da377a3fee0694683ef939ba41e038e26405 100644 (file)
--- a/c++/include/util/bitset/bmstrsparsevec.h
+++ b/c++/include/util/bitset/bmstrsparsevec.h
@@ -95,19 +95,19 @@ public:
      {
      public:
          const_reference(const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv,
-                  size_type idx) BMNOEXEPT
+                  size_type idx) BMNOEXCEPT
          : str_sv_(str_sv), idx_(idx)
          {}
          
-        operator const value_type*() const
+        operator const value_type*() const BMNOEXCEPT
          {
              str_sv_.get(idx_, buf_, MAX_STR_SIZE);
              return &(buf_[0]);
          }
  
-        bool operator==(const const_reference& ref) const
+        bool operator==(const const_reference& ref) const BMNOEXCEPT
                                  { return bool(*this) == bool(ref); }
-        bool is_null() const { return str_sv_.is_null(idx_); }
+        bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
      private:
          const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv_;
          size_type                                            idx_;
@@ -122,11 +122,11 @@ public:
      {
      public:
          reference(str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv,
-                  size_type idx) BMNOEXEPT
+                  size_type idx) BMNOEXCEPT
          : str_sv_(str_sv), idx_(idx)
          {}
          
-        operator const value_type*() const
+        operator const value_type*() const BMNOEXCEPT
          {
              str_sv_.get(idx_, buf_, MAX_STR_SIZE);
              return &(buf_[0]);
@@ -144,9 +144,9 @@ public:
              str_sv_.set(idx_, str);
              return *this;
          }
-        bool operator==(const reference& ref) const
+        bool operator==(const reference& ref) const BMNOEXCEPT
                                  { return bool(*this) == bool(ref); }
-        bool is_null() const { return str_sv_.is_null(idx_); }
+        bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
      private:
          str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv_;
          size_type                                      idx_;
@@ -183,55 +183,56 @@ public:
          typedef CharType*                   pointer;
          typedef CharType*&                  reference;
      public:
-        const_iterator();
-        const_iterator(const str_sparse_vector_type* sv);
-        const_iterator(const str_sparse_vector_type* sv, size_type pos);
-        const_iterator(const const_iterator& it);
+        const_iterator() BMNOEXCEPT;
+        const_iterator(const str_sparse_vector_type* sv) BMNOEXCEPT;
+        const_iterator(const str_sparse_vector_type* sv, size_type pos) BMNOEXCEPT;
+        const_iterator(const const_iterator& it) BMNOEXCEPT;
          
-        bool operator==(const const_iterator& it) const
+        bool operator==(const const_iterator& it) const BMNOEXCEPT
                                  { return (pos_ == it.pos_) && (sv_ == it.sv_); }
-        bool operator!=(const const_iterator& it) const
+        bool operator!=(const const_iterator& it) const BMNOEXCEPT
                                  { return ! operator==(it); }
-        bool operator < (const const_iterator& it) const
+        bool operator < (const const_iterator& it) const BMNOEXCEPT
                                  { return pos_ < it.pos_; }
-        bool operator <= (const const_iterator& it) const
+        bool operator <= (const const_iterator& it) const BMNOEXCEPT
                                  { return pos_ <= it.pos_; }
-        bool operator > (const const_iterator& it) const
+        bool operator > (const const_iterator& it) const BMNOEXCEPT
                                  { return pos_ > it.pos_; }
-        bool operator >= (const const_iterator& it) const
+        bool operator >= (const const_iterator& it) const BMNOEXCEPT
                                  { return pos_ >= it.pos_; }
  
          /// \brief Get current position (value)
-        const value_type* operator*() const { return this->value(); }
+        const value_type* operator*() const BMNOEXCEPT { return this->value(); }
  
          /// \brief Advance to the next available value
-        const_iterator& operator++() { this->advance(); return *this; }
+        const_iterator& operator++() BMNOEXCEPT
+            { this->advance(); return *this; }
  
          /// \brief Advance to the next available value
-        const_iterator& operator++(int)
+        const_iterator& operator++(int) BMNOEXCEPT
              { const_iterator tmp(*this);this->advance(); return tmp; }
  
  
          /// \brief Get current position (value)
-        const value_type* value() const;
+        const value_type* value() const BMNOEXCEPT;
  
          /// \brief Get NULL status
-        bool is_null() const { return sv_->is_null(this->pos_); }
+        bool is_null() const BMNOEXCEPT { return sv_->is_null(this->pos_); }
  
          /// Returns true if iterator is at a valid position
-        bool valid() const { return pos_ != bm::id_max; }
+        bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
  
          /// Invalidate current iterator
-        void invalidate() { pos_ = bm::id_max; }
+        void invalidate() BMNOEXCEPT { pos_ = bm::id_max; }
  
          /// Current position (index) in the vector
-        size_type pos() const { return pos_; }
+        size_type pos() const BMNOEXCEPT { return pos_; }
  
          /// re-position to a specified position
-        void go_to(size_type pos);
+        void go_to(size_type pos) BMNOEXCEPT;
  
          /// advance iterator forward by one
-        void advance();
+        void advance() BMNOEXCEPT;
  
      protected:
          typedef bm::heap_matrix<CharType,
@@ -279,9 +280,9 @@ public:
          typedef void reference;
          
      public:
-        back_insert_iterator();
-        back_insert_iterator(str_sparse_vector_type* sv);
-        back_insert_iterator(const back_insert_iterator& bi);
+        back_insert_iterator() BMNOEXCEPT;
+        back_insert_iterator(str_sparse_vector_type* sv) BMNOEXCEPT;
+        back_insert_iterator(const back_insert_iterator& bi) BMNOEXCEPT;
          
          back_insert_iterator& operator=(const back_insert_iterator& bi)
          {
@@ -321,7 +322,7 @@ public:
          void add_null(size_type count);
  
          /** return true if insertion buffer is empty */
-        bool empty() const;
+        bool empty() const BMNOEXCEPT;
          
          /** flush the accumulated buffer */
          void flush();
@@ -393,7 +394,7 @@ public:
      }
  #ifndef BM_NO_CXX11
      /*! move-ctor */
-    str_sparse_vector(str_sparse_vector<CharType, BV, MAX_STR_SIZE>&& str_sv) BMNOEXEPT
+    str_sparse_vector(str_sparse_vector<CharType, BV, MAX_STR_SIZE>&& str_sv) BMNOEXCEPT
      {
          parent_type::swap(str_sv);
          remap_flags_ = str_sv.remap_flags_;
@@ -403,7 +404,7 @@ public:
  
      /*! move assignmment operator */
      str_sparse_vector<CharType, BV, MAX_STR_SIZE>& operator =
-            (str_sparse_vector<CharType, BV, MAX_STR_SIZE>&& str_sv) BMNOEXEPT
+            (str_sparse_vector<CharType, BV, MAX_STR_SIZE>&& str_sv) BMNOEXCEPT
      {
          if (this != &str_sv)
          {
@@ -475,7 +476,8 @@ public:
       
          @return string length
      */
-    size_type get(size_type idx, value_type* str, size_type buf_size) const;
+    size_type get(size_type idx,
+                 value_type* str, size_type buf_size) const BMNOEXCEPT;
      
      /*!
          \brief set specified element with bounds checking and automatic resize
@@ -568,7 +570,7 @@ public:
      }
  
      /*! Swap content */
-    void swap(str_sparse_vector& str_sv) BMNOEXEPT;
+    void swap(str_sparse_vector& str_sv) BMNOEXCEPT;
  
      ///@}
      
@@ -589,14 +591,14 @@ public:
       
          \return 0 - equal, < 0 - vect[i] < str, >0 otherwise
      */
-    int compare(size_type idx, const value_type* str) const;
+    int compare(size_type idx, const value_type* str) const BMNOEXCEPT;
      
      
      /**
          \brief Find size of common prefix between two vector elements in octets
          \return size of common prefix
      */
-    unsigned common_prefix_length(size_type idx1, size_type idx2) const;
+    unsigned common_prefix_length(size_type idx1, size_type idx2) const BMNOEXCEPT;
  
      ///@}
  
@@ -606,7 +608,7 @@ public:
      ///@{
  
      /*! \brief resize to zero, free memory */
-    void clear() BMNOEXEPT;
+    void clear() BMNOEXCEPT;
  
      /*!
          \brief clear range (assign bit 0 for all plains)
@@ -650,13 +652,11 @@ public:
      static size_type max_str() { return sv_octet_plains; }
      
      /*! \brief get effective string length used in vector
-    
-        Method returns efficiency, how close are we
-        to reserved maximum.
-    
+        Calculate and returns efficiency, how close are we
+        to the reserved maximum.
          \return current string length maximum
      */
-    size_type effective_max_str() const;
+    size_type effective_max_str() const BMNOEXCEPT;
      
      /*! \brief get effective string length used in vector
          \return current string length maximum
@@ -691,7 +691,9 @@ public:
  
          @sa statistics
      */
-    void calc_stat(struct str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* st) const;
+    void calc_stat(
+        struct str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* st
+        ) const BMNOEXCEPT;
      
      
      ///@}
@@ -701,15 +703,15 @@ public:
      //@{
  
      /** Provide const iterator access to container content  */
-    const_iterator begin() const;
+    const_iterator begin() const BMNOEXCEPT;
  
      /** Provide const iterator access to the end    */
-    const_iterator end() const { return const_iterator(this, bm::id_max); }
+    const_iterator end() const BMNOEXCEPT { return const_iterator(this, bm::id_max); }
  
      /** Get const_itertor re-positioned to specific element
      @param idx - position in the sparse vector
      */
-    const_iterator get_const_iterator(size_type idx) const
+    const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
          { return const_iterator(this, idx); }
      
       /** Provide back insert iterator
@@ -730,7 +732,7 @@ public:
      /** \brief trait if sparse vector is "compressed" (false)
      */
      static
-    bool is_compressed() { return false; }
+    bool is_compressed() BMNOEXCEPT { return false; }
  
      ///@}
  
@@ -745,7 +747,7 @@ public:
      /**
          Get remapping status (true|false)
      */
-    bool is_remap() const { return remap_flags_ != 0; }
+    bool is_remap() const BMNOEXCEPT { return remap_flags_ != 0; }
      
      /**
          Build remapping profile and load content from another sparse vector
@@ -757,7 +759,7 @@ public:
          Calculate flags which octets are present on each byte-plain.
          @internal
      */
-    void calc_octet_stat(plain_octet_matrix_type& octet_matrix) const;
+    void calc_octet_stat(plain_octet_matrix_type& octet_matrix) const BMNOEXCEPT;
  
      static
      void build_octet_remap(
@@ -771,10 +773,11 @@ public:
          @internal
      */
      static
-    bool remap_tosv(value_type*       sv_str,
-                    size_type         buf_size,
-                    const value_type* str,
-                    const plain_octet_matrix_type& octet_remap_matrix2);
+    bool remap_tosv(value_type*  BMRESTRICT      sv_str,
+                    size_type                    buf_size,
+                    const value_type* BMRESTRICT str,
+                    const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix2
+                    ) BMNOEXCEPT;
      
      /*!
          remap string from external (ASCII) system to matrix internal code
@@ -782,7 +785,7 @@ public:
      */
      bool remap_tosv(value_type*       sv_str,
                      size_type         buf_size,
-                    const value_type* str) const
+                    const value_type* str) const BMNOEXCEPT
      {
          return remap_tosv(sv_str, buf_size, str, remap_matrix2_);
      }
@@ -793,10 +796,12 @@ public:
          @internal
      */
      static
-    bool remap_fromsv(value_type*       str,
-                      size_type         buf_size,
-                      const value_type* sv_str,
-                      const plain_octet_matrix_type& octet_remap_matrix1);
+    bool remap_fromsv(
+            value_type*   BMRESTRICT     str,
+            size_type                    buf_size,
+            const value_type* BMRESTRICT sv_str,
+            const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix1
+            ) BMNOEXCEPT;
      /*!
          re-calculate remap matrix2 based on matrix1
          @internal
@@ -949,18 +954,18 @@ public:
          \return true, if it is the same
      */
      bool equal(const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& sv,
-               bm::null_support null_able = bm::use_null) const;
+               bm::null_support null_able = bm::use_null) const BMNOEXCEPT;
  
      /**
          \brief find position of compressed element by its rank
      */
      static
-    bool find_rank(size_type rank, size_type& pos);
+    bool find_rank(size_type rank, size_type& pos) BMNOEXCEPT;
      
      /**
          \brief size of sparse vector (may be different for RSC)
      */
-    size_type effective_size() const { return size(); }
+    size_type effective_size() const BMNOEXCEPT { return size(); }
  
  protected:
  
@@ -1138,7 +1143,8 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::str_sparse_vector(
  //---------------------------------------------------------------------
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
-void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::swap(str_sparse_vector& str_sv) BMNOEXEPT
+void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::swap(
+                                str_sparse_vector& str_sv) BMNOEXCEPT
  {
      parent_type::swap(str_sv);
      bm::xor_swap(remap_flags_, str_sv.remap_flags_);
@@ -1287,7 +1293,7 @@ void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::insert_value_no_null(
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type
  str_sparse_vector<CharType, BV, MAX_STR_SIZE>::get(
-            size_type idx, value_type* str, size_type buf_size) const
+            size_type idx, value_type* str, size_type buf_size) const BMNOEXCEPT
  {
      size_type i = 0;
      for (; i < MAX_STR_SIZE; ++i)
@@ -1330,7 +1336,8 @@ void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::optimize(
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::calc_stat(
-    struct str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* st) const
+    struct str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* st
+    ) const BMNOEXCEPT
  {
      BM_ASSERT(st);
      typename bvector_type::statistics stbv;
@@ -1362,7 +1369,7 @@ void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::calc_stat(
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  int str_sparse_vector<CharType, BV, MAX_STR_SIZE>::compare(
                       size_type idx,
-                     const value_type* str) const
+                     const value_type* str) const BMNOEXCEPT
  {
      BM_ASSERT(str);
      int res = 0;
@@ -1390,7 +1397,7 @@ int str_sparse_vector<CharType, BV, MAX_STR_SIZE>::compare(
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  unsigned str_sparse_vector<CharType, BV, MAX_STR_SIZE>::common_prefix_length(
-                                          size_type idx1, size_type idx2) const
+                                size_type idx1, size_type idx2) const BMNOEXCEPT
  {
      unsigned i = 0;
      for (; i < MAX_STR_SIZE; ++i)
@@ -1416,8 +1423,9 @@ unsigned str_sparse_vector<CharType, BV, MAX_STR_SIZE>::common_prefix_length(
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  bool 
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::find_rank(size_type rank,
-                                                         size_type& pos)
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::find_rank(
+                                                size_type rank,
+                                                size_type& pos) BMNOEXCEPT
  {
      BM_ASSERT(rank);
      pos = rank - 1;
@@ -1428,7 +1436,8 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::find_rank(size_type rank,
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::effective_max_str() const
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::effective_max_str()
+                                                        const BMNOEXCEPT
  {
      for (int i = MAX_STR_SIZE-1; i >= 0; --i)
      {
@@ -1446,7 +1455,7 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::effective_max_str() const
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::calc_octet_stat(
-                    plain_octet_matrix_type& octet_matrix) const
+                    plain_octet_matrix_type& octet_matrix) const BMNOEXCEPT
  {
      octet_matrix.init();
      octet_matrix.set_zero();
@@ -1531,10 +1540,10 @@ void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::recalc_remap_matrix2()
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_tosv(
-                   value_type*       sv_str,
-                   size_type         buf_size,
-                   const value_type* str,
-                   const plain_octet_matrix_type& octet_remap_matrix2)
+       value_type*   BMRESTRICT     sv_str,
+       size_type                    buf_size,
+       const value_type* BMRESTRICT str,
+       const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
  {
      for (unsigned i = 0; i < buf_size; ++i)
      {
@@ -1559,10 +1568,11 @@ bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_tosv(
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_fromsv(
-                         value_type* str,
-                         size_type         buf_size,
-                         const value_type* sv_str,
-                         const plain_octet_matrix_type& octet_remap_matrix1)
+         value_type* BMRESTRICT str,
+         size_type         buf_size,
+         const value_type* BMRESTRICT sv_str,
+         const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix1
+         ) BMNOEXCEPT
  {
      for (unsigned i = 0; i < buf_size; ++i)
      {
@@ -1586,7 +1596,8 @@ bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_fromsv(
  //---------------------------------------------------------------------
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
-void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_from(const str_sparse_vector& str_sv)
+void
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_from(const str_sparse_vector& str_sv)
  {
      if (str_sv.is_remap())
      {
@@ -1639,7 +1650,7 @@ void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::sync(bool /*force*/)
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::equal(
                  const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& sv,
-                bm::null_support null_able) const
+                bm::null_support null_able) const BMNOEXCEPT
  {
      // at this point both vectors should have the same remap settings
      // to be considered "equal".
@@ -1686,7 +1697,7 @@ void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::copy_range(
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::begin() const
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::begin() const BMNOEXCEPT
  {
      typedef typename
          str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator it_type;
@@ -1696,7 +1707,7 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::begin() const
  //---------------------------------------------------------------------
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
-void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::clear() BMNOEXEPT
+void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::clear() BMNOEXCEPT
  {
      parent_type::clear();
  }
@@ -1736,7 +1747,7 @@ void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::throw_bad_value(
  
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator()
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator() BMNOEXCEPT
  : sv_(0), pos_(bm::id_max), pos_in_buf_(~size_type(0))
  {}
  
@@ -1744,7 +1755,7 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator()
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
-   const str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator& it)
+   const str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator& it) BMNOEXCEPT
  : sv_(it.sv_), pos_(it.pos_), pos_in_buf_(~size_type(0))
  {}
  
@@ -1752,7 +1763,7 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
-    const str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv)
+    const str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv) BMNOEXCEPT
  : sv_(sv), pos_(sv->empty() ? bm::id_max : 0), pos_in_buf_(~size_type(0))
  {}
  
@@ -1761,7 +1772,7 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
      const str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv,
-    typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type pos)
+    typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type pos) BMNOEXCEPT
  : sv_(sv), pos_(pos >= sv->size() ? bm::id_max : pos), pos_in_buf_(~size_type(0))
  {}
  
@@ -1769,7 +1780,7 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  const typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::value_type*
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::value() const
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::value() const BMNOEXCEPT
  {
      BM_ASSERT(sv_);
      BM_ASSERT(this->valid());
@@ -1791,8 +1802,10 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::value() const
  //---------------------------------------------------------------------
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
-void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::go_to(
-    typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type pos)
+void
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::go_to(
+   typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type pos
+   ) BMNOEXCEPT
  {
      pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos;
      pos_in_buf_ = ~size_type(0);
@@ -1801,7 +1814,8 @@ void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::go_to(
  //---------------------------------------------------------------------
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
-void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::advance()
+void
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::advance() BMNOEXCEPT
  {
      if (pos_ == bm::id_max) // nothing to do, we are at the end
          return;
@@ -1825,7 +1839,7 @@ void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::advance()
  //---------------------------------------------------------------------
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert_iterator()
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert_iterator() BMNOEXCEPT
  : sv_(0), bv_null_(0), pos_in_buf_(~size_type(0)), prev_nb_(0)
  {}
  
@@ -1833,7 +1847,7 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert_iterator(
-           str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv)
+           str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv) BMNOEXCEPT
  : sv_(sv), pos_in_buf_(~size_type(0))
  {
      if (sv)
@@ -1851,7 +1865,7 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
  str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert_iterator(
-const str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator& bi)
+const str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator& bi) BMNOEXCEPT
  : sv_(bi.sv_), bv_null_(bi.bv_null_), pos_in_buf_(~size_type(0)), prev_nb_(bi.prev_nb_)
  {
      BM_ASSERT(bi.empty());
@@ -1868,7 +1882,9 @@ str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::~back_inser
  //---------------------------------------------------------------------
  
  template<class CharType, class BV, unsigned MAX_STR_SIZE>
-bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::empty() const
+bool
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::empty()
+                                                                const BMNOEXCEPT
  {
      return (pos_in_buf_ == ~size_type(0) || !sv_);
  }
diff --git a/c++/include/util/bitset/bmtimer.h b/c++/include/util/bitset/bmtimer.h

index 3c02262d0d2b4edf8dd201215a97e67e7d39ea62..d8d34f777d16ef3182f3ca4d0d689ef75ff69944 100644 (file)
--- a/c++/include/util/bitset/bmtimer.h
+++ b/c++/include/util/bitset/bmtimer.h
@@ -46,7 +46,7 @@ public:
          std::chrono::duration<double, std::milli>  duration;
          unsigned                                   repeats;
          
-        statistics() : repeats(1) {}
+        statistics() : duration(0), repeats(1) {}
          
          statistics(std::chrono::duration<double, std::milli> d, unsigned r)
          : duration(d), repeats(r)
@@ -147,7 +147,13 @@ public:
                  if (ms > 1000)
                  {
                      double sec = ms / 1000;
-                    std::cout << it->first << "; " << std::setprecision(4) << sec << " sec" << std::endl;
+                    if (sec > 60)
+                    {
+                        double min = sec / 60;
+                        std::cout << it->first << "; " << std::setprecision(4) << min << " min" << std::endl;
+                    }
+                    else
+                        std::cout << it->first << "; " << std::setprecision(4) << sec << " sec" << std::endl;
                  }
                  else
                      std::cout << it->first << "; " << it->second.duration.count() << " ms" << std::endl;
diff --git a/c++/include/util/bitset/bmundef.h b/c++/include/util/bitset/bmundef.h

index 324a7c23a6d8a9baa19d8e129b47aa7cdab95e81..7a1796a0eee4d42056a4c9778c02ef31ec6b99d4 100644 (file)
--- a/c++/include/util/bitset/bmundef.h
+++ b/c++/include/util/bitset/bmundef.h
@@ -72,7 +72,10 @@ For more information please visit:  http://bitmagic.io
  
  #undef VECT_ARR_BLOCK_LOOKUP
  #undef VECT_SET_BLOCK_BITS
+
  #undef VECT_BLOCK_CHANGE
+#undef VECT_BLOCK_CHANGE_BC
+
  #undef VECT_BIT_TO_GAP
  
  #undef VECT_AND_DIGEST
@@ -80,7 +83,12 @@ For more information please visit:  http://bitmagic.io
  #undef VECT_AND_DIGEST_5WAY
  #undef VECT_BLOCK_SET_DIGEST
  
+#undef VECT_BLOCK_XOR_CHANGE
+#undef VECT_BIT_BLOCK_XOR
+
+#undef VECT_BIT_FIND_FIRST
  #undef VECT_BIT_FIND_DIFF
+#undef VECT_GAP_BFIND
  
  #undef BMI1_SELECT64
  #undef BMI2_SELECT64
diff --git a/c++/include/util/bitset/bmutil.h b/c++/include/util/bitset/bmutil.h

index 7dba20dbfdf1d47665dec92b79bc60f69e28e646..6091861508f7ebfca3cf5fd34e765f7d20de2c53 100644 (file)
--- a/c++/include/util/bitset/bmutil.h
+++ b/c++/include/util/bitset/bmutil.h
@@ -94,22 +94,34 @@ namespace bm
              bm::word_t* end() { return (b_.w32 + bm::set_block_size); }
          };
      
-    
  /**
      Get minimum of 2 values
  */
  template<typename T>
-T min_value(T v1, T v2)
+T min_value(T v1, T v2) BMNOEXCEPT
  {
      return v1 < v2 ? v1 : v2;
  }
  
+/**
+    \brief ad-hoc conditional expressions
+    \internal
+*/
+template <bool b> struct conditional
+{
+    static bool test() { return true; }
+};
+template <> struct conditional<false>
+{
+    static bool test() { return false; }
+};
+
  
  /**
      Fast loop-less function to find LOG2
  */
  template<typename T>
-T ilog2(T x)
+T ilog2(T x) BMNOEXCEPT
  {
      unsigned int l = 0;
      
@@ -122,7 +134,7 @@ T ilog2(T x)
  }
  
  template<>
-inline bm::gap_word_t ilog2(gap_word_t x)
+inline bm::gap_word_t ilog2(gap_word_t x) BMNOEXCEPT
  {
      unsigned int l = 0;
      if (x >= 1<<8)  { x = (bm::gap_word_t)(x >> 8); l |= 8; }
@@ -140,7 +152,7 @@ template<class T>
  class ptr_guard
  {
  public:
-    ptr_guard(T* p) : ptr_(p) {}
+    ptr_guard(T* p) BMNOEXCEPT : ptr_(p) {}
      ~ptr_guard() { delete ptr_; }
  private:
      ptr_guard(const ptr_guard<T>& p);
@@ -154,8 +166,7 @@ private:
      @ingroup bitfunc
      @internal
  */
-inline 
-unsigned count_leading_zeros(unsigned x) 
+inline unsigned count_leading_zeros(unsigned x) BMNOEXCEPT
  {
      unsigned n =
          (x >= (1U << 16)) ?
@@ -171,7 +182,7 @@ unsigned count_leading_zeros(unsigned x)
      @internal
  */
  inline
-unsigned count_trailing_zeros(unsigned v)
+unsigned count_trailing_zeros(unsigned v) BMNOEXCEPT
  {
      // (v & -v) isolates the last set bit
      return unsigned(bm::tzcnt_table<true>::_lut[(-v & v) % 37]);
@@ -181,7 +192,7 @@ unsigned count_trailing_zeros(unsigned v)
      Lookup table based integer LOG2
  */
  template<typename T>
-T ilog2_LUT(T x)
+T ilog2_LUT(T x) BMNOEXCEPT
  {
      unsigned l = 0;
      if (x & 0xffff0000) 
@@ -200,7 +211,7 @@ T ilog2_LUT(T x)
      Lookup table based short integer LOG2
  */
  template<>
-inline bm::gap_word_t ilog2_LUT<bm::gap_word_t>(bm::gap_word_t x)
+inline bm::gap_word_t ilog2_LUT<bm::gap_word_t>(bm::gap_word_t x) BMNOEXCEPT
  {
      bm::gap_word_t l = 0;
      if (x & 0xff00) 
@@ -218,7 +229,7 @@ inline bm::gap_word_t ilog2_LUT<bm::gap_word_t>(bm::gap_word_t x)
  #ifdef __GNUG__
  
  BMFORCEINLINE
-unsigned bsf_asm32(unsigned int v)
+unsigned bsf_asm32(unsigned int v) BMNOEXCEPT
  {
      unsigned r;
      asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) );
@@ -226,7 +237,7 @@ unsigned bsf_asm32(unsigned int v)
  }
   
  BMFORCEINLINE
-unsigned bsr_asm32(unsigned int v)
+unsigned bsr_asm32(unsigned int v) BMNOEXCEPT
  {
      unsigned r;
      asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) );
@@ -240,7 +251,7 @@ unsigned bsr_asm32(unsigned int v)
  #if defined(_M_AMD64) || defined(_M_X64) // inline assembly not supported
  
  BMFORCEINLINE
-unsigned int bsr_asm32(unsigned int value)
+unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
  {
      unsigned long r;
      _BitScanReverse(&r, value);
@@ -248,7 +259,7 @@ unsigned int bsr_asm32(unsigned int value)
  }
  
  BMFORCEINLINE
-unsigned int bsf_asm32(unsigned int value)
+unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
  {
      unsigned long r;
      _BitScanForward(&r, value);
@@ -258,13 +269,13 @@ unsigned int bsf_asm32(unsigned int value)
  #else
  
  BMFORCEINLINE
-unsigned int bsr_asm32(unsigned int value)
+unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
  {   
    __asm  bsr  eax, value
  }
  
  BMFORCEINLINE
-unsigned int bsf_asm32(unsigned int value)
+unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
  {   
    __asm  bsf  eax, value
  }
@@ -280,14 +291,14 @@ unsigned int bsf_asm32(unsigned int value)
  // http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8562
  //
  template<typename T>
-T bit_scan_fwd(T v)
+T bit_scan_fwd(T v) BMNOEXCEPT
  {
      return
          DeBruijn_bit_position<true>::_multiply[(((v & -v) * 0x077CB531U)) >> 27];
  }
  
  inline
-unsigned bit_scan_reverse32(unsigned value)
+unsigned bit_scan_reverse32(unsigned value) BMNOEXCEPT
  {
      BM_ASSERT(value);
  #if defined(BM_USE_GCC_BUILD)
@@ -302,7 +313,7 @@ unsigned bit_scan_reverse32(unsigned value)
  }
  
  inline
-unsigned bit_scan_forward32(unsigned value)
+unsigned bit_scan_forward32(unsigned value) BMNOEXCEPT
  {
      BM_ASSERT(value);
  #if defined(BM_USE_GCC_BUILD)
@@ -318,7 +329,7 @@ unsigned bit_scan_forward32(unsigned value)
  
  
  BMFORCEINLINE
-unsigned long long bmi_bslr_u64(unsigned long long w)
+unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
  {
  #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
      return _blsr_u64(w);
@@ -339,7 +350,7 @@ unsigned long long bmi_blsi_u64(unsigned long long w)
  
  /// 64-bit bit-scan reverse
  inline
-unsigned count_leading_zeros_u64(bm::id64_t w)
+unsigned count_leading_zeros_u64(bm::id64_t w) BMNOEXCEPT
  {
      BM_ASSERT(w);
  #if defined(BMAVX2OPT) || defined (BMAVX512OPT)
@@ -367,7 +378,7 @@ unsigned count_leading_zeros_u64(bm::id64_t w)
  
  /// 64-bit bit-scan fwd
  inline
-unsigned count_trailing_zeros_u64(bm::id64_t w)
+unsigned count_trailing_zeros_u64(bm::id64_t w) BMNOEXCEPT
  {
      BM_ASSERT(w);
  
@@ -396,6 +407,72 @@ unsigned count_trailing_zeros_u64(bm::id64_t w)
  
  
  
+/*!
+    Returns BSR value
+    @ingroup bitfunc
+*/
+template <class T>
+unsigned bit_scan_reverse(T value) BMNOEXCEPT
+{
+    BM_ASSERT(value);
+
+    if (bm::conditional<sizeof(T)==8>::test())
+    {
+    #if defined(BM_USE_GCC_BUILD)
+        return (unsigned) (63 - __builtin_clzll(value));
+    #else
+        bm::id64_t v8 = value;
+        v8 >>= 32;
+        unsigned v = (unsigned)v8;
+        if (v)
+        {
+            v = bm::bit_scan_reverse32(v);
+            return v + 32;
+        }
+    #endif
+    }
+    return bm::bit_scan_reverse32((unsigned)value);
+}
+
+/*! \brief and functor
+    \internal
+ */
+struct and_func
+{
+    static
+    BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
+        { return v1 & v2; }
+};
+/*! \brief xor functor
+    \internal
+ */
+struct xor_func
+{
+    static
+    BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
+        { return v1 ^ v2; }
+};
+/*! \brief or functor
+    \internal
+ */
+struct or_func
+{
+    static
+    BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
+        { return v1 | v2; }
+};
+/*! \brief sub functor
+    \internal
+ */
+struct sub_func
+{
+    static
+    BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
+        { return v1 & ~v2; }
+};
+
+
+
  #ifdef __GNUG__
  #pragma GCC diagnostic pop
  #endif
diff --git a/c++/include/util/bitset/bmxor.h b/c++/include/util/bitset/bmxor.h

index b5bd1a4ee3e64c36efdba549fc28af8bbd8cfe85..6017304249de207d9bd5496f0eee29bcd7a44d3e 100644 (file)
--- a/c++/include/util/bitset/bmxor.h
+++ b/c++/include/util/bitset/bmxor.h
@@ -36,7 +36,7 @@ namespace bm
  inline
  unsigned bit_block_xor_change32(const bm::word_t* BMRESTRICT block,
                                  const bm::word_t* BMRESTRICT xor_block,
-                                unsigned size)
+                                unsigned size) BMNOEXCEPT
  {
      unsigned gap_count = 1;
  
@@ -80,7 +80,7 @@ unsigned bit_block_xor_change32(const bm::word_t* BMRESTRICT block,
  inline
  unsigned bit_block_xor_change(const bm::word_t* BMRESTRICT block,
                                const bm::word_t* BMRESTRICT xor_block,
-                              unsigned size)
+                              unsigned size) BMNOEXCEPT
  {
  #ifdef VECT_BLOCK_XOR_CHANGE
      return VECT_BLOCK_XOR_CHANGE(block, xor_block, size);
@@ -112,7 +112,7 @@ struct block_waves_xor_descr
  inline
  void compute_complexity_descr(
                          const bm::word_t* BMRESTRICT block,
-                        block_waves_xor_descr& BMRESTRICT x_descr)
+                        block_waves_xor_descr& BMRESTRICT x_descr) BMNOEXCEPT
  {
      for (unsigned i = 0; i < bm::block_waves; ++i)
      {
@@ -146,7 +146,7 @@ bm::id64_t compute_xor_complexity_descr(
                          const bm::word_t* BMRESTRICT block,
                          const bm::word_t* BMRESTRICT xor_block,
                          block_waves_xor_descr& BMRESTRICT x_descr,
-                        unsigned& block_gain)
+                        unsigned& BMRESTRICT block_gain) BMNOEXCEPT
  {
      block_gain = 0; // approximate block gain (sum of sub-waves)
      bm::id64_t digest = 0;
@@ -191,7 +191,7 @@ bm::id64_t compute_xor_complexity_descr(
  inline
  void bit_block_xor(bm::word_t*  target_block,
                     const bm::word_t*  block, const bm::word_t*  xor_block,
-                   bm::id64_t digest)
+                   bm::id64_t digest) BMNOEXCEPT
  {
      BM_ASSERT(target_block);
      BM_ASSERT(block);
@@ -267,21 +267,23 @@ public:
      }
  
      /// Get reference list size
-    size_type size() const { return (size_type)ref_bvects_.size(); }
+    size_type size() const BMNOEXCEPT { return (size_type)ref_bvects_.size(); }
  
      /// Get reference vector by the index in this ref-vector
-    const bvector_type* get_bv(size_type idx) const { return ref_bvects_[idx]; }
+    const bvector_type* get_bv(size_type idx) const BMNOEXCEPT
+                                        { return ref_bvects_[idx]; }
  
      /// Get reference row index by the index in this ref-vector
-    size_type get_row_idx(size_type idx) const { return (size_type)ref_bvects_rows_[idx]; }
+    size_type get_row_idx(size_type idx) const BMNOEXCEPT
+                        { return (size_type)ref_bvects_rows_[idx]; }
  
      /// not-found value for find methods
      static
-    size_type not_found() { return ~(size_type(0)); }
+    size_type not_found() BMNOEXCEPT { return ~(size_type(0)); }
  
      /// Find vector index by the reference index
      /// @return ~0 if not found
-    size_type find(std::size_t ref_idx) const
+    size_type find(std::size_t ref_idx) const BMNOEXCEPT
      {
          size_type sz = size();
          for (size_type i = 0; i < sz; ++i)
@@ -333,13 +335,16 @@ public:
      typedef typename bvector_type::size_type     size_type;
  
  public:
-    void set_ref_vector(const bv_ref_vector_type* ref_vect) { ref_vect_ = ref_vect; }
-    const bv_ref_vector_type& get_ref_vector() const { return *ref_vect_; }
+    void set_ref_vector(const bv_ref_vector_type* ref_vect) BMNOEXCEPT
+    { ref_vect_ = ref_vect; }
+
+    const bv_ref_vector_type& get_ref_vector() const BMNOEXCEPT
+    { return *ref_vect_; }
  
      /** Compute statistics for the anchor search vector
          @param block - bit-block target
      */
-    void compute_x_block_stats(const bm::word_t* block);
+    void compute_x_block_stats(const bm::word_t* block) BMNOEXCEPT;
  
      /** Scan for all candidate bit-blocks to find mask or match
          @return true if XOR complement or matching vector found
@@ -360,23 +365,26 @@ public:
      /**
          Validate serialization target
      */
-    bool validate_found(bm::word_t* xor_block, const bm::word_t* block) const;
+    bool validate_found(bm::word_t* xor_block,
+                        const bm::word_t* block) const BMNOEXCEPT;
  
-    size_type found_ridx() const { return found_ridx_; }
-    const bm::word_t* get_found_block() const { return found_block_xor_; }
-    unsigned get_x_best_metric() const { return x_best_metric_; }
-    bm::id64_t get_xor_digest() const { return x_d64_; }
+    size_type found_ridx() const BMNOEXCEPT { return found_ridx_; }
+    const bm::word_t* get_found_block() const BMNOEXCEPT
+    { return found_block_xor_; }
+    unsigned get_x_best_metric() const BMNOEXCEPT { return x_best_metric_; }
+    bm::id64_t get_xor_digest() const BMNOEXCEPT { return x_d64_; }
  
      /// true if completely identical vector found
-    bool is_eq_found() const { return !x_best_metric_; }
+    bool is_eq_found() const BMNOEXCEPT { return !x_best_metric_; }
  
  
-    unsigned get_x_bc() const { return x_bc_; }
-    unsigned get_x_gc() const { return x_gc_; }
-    unsigned get_x_block_best() const { return x_block_best_metric_; }
+    unsigned get_x_bc() const BMNOEXCEPT { return x_bc_; }
+    unsigned get_x_gc() const BMNOEXCEPT { return x_gc_; }
+    unsigned get_x_block_best() const BMNOEXCEPT
+                    { return x_block_best_metric_; }
  
  
-    bm::block_waves_xor_descr& get_descr() { return x_descr_; }
+    bm::block_waves_xor_descr& get_descr() BMNOEXCEPT { return x_descr_; }
  
  private:
      const bv_ref_vector_type*        ref_vect_ = 0; ///< ref.vect for XOR filter
@@ -400,7 +408,7 @@ private:
  // --------------------------------------------------------------------------
  
  template<typename BV>
-void xor_scanner<BV>::compute_x_block_stats(const bm::word_t* block)
+void xor_scanner<BV>::compute_x_block_stats(const bm::word_t* block) BMNOEXCEPT
  {
      BM_ASSERT(IS_VALID_ADDR(block));
      BM_ASSERT(!BM_IS_GAP(block));
@@ -439,7 +447,8 @@ bool xor_scanner<BV>::search_best_xor_mask(const bm::word_t* block,
      {
          const bvector_type* bv = ref_vect_->get_bv(ri);
          BM_ASSERT(bv);
-        const typename bvector_type::blocks_manager_type& bman = bv->get_blocks_manager();
+        const typename bvector_type::blocks_manager_type& bman =
+                                                bv->get_blocks_manager();
          const bm::word_t* block_xor = bman.get_block_ptr(i, j);
          if (!IS_VALID_ADDR(block_xor) || BM_IS_GAP(block_xor))
              continue;
@@ -487,7 +496,7 @@ bool xor_scanner<BV>::search_best_xor_mask(const bm::word_t* block,
              if (!xor_bc) // completely identical block?
              {
                  unsigned pos;
-                bool f = bit_find_first_diff(block, block_xor, &pos);
+                bool f = bm::bit_find_first_diff(block, block_xor, &pos);
                  x_best_metric_ += f;
              }
          }
@@ -555,7 +564,7 @@ bool xor_scanner<BV>::search_best_xor_gap(const bm::word_t* block,
  
  template<typename BV>
  bool xor_scanner<BV>::validate_found(bm::word_t* xor_block,
-                                     const bm::word_t* block) const
+                                     const bm::word_t* block) const BMNOEXCEPT
  {
      bm::id64_t d64 = get_xor_digest();
      BM_ASSERT(d64);
diff --git a/c++/include/util/bitset/encoding.h b/c++/include/util/bitset/encoding.h

index b30ac412829cfe9be8823a91652a85edf9f96ef9..19c9112d571721b6c8768856bd95f37cde04616b 100644 (file)
--- a/c++/include/util/bitset/encoding.h
+++ b/c++/include/util/bitset/encoding.h
@@ -51,24 +51,24 @@ class encoder
  public:
      typedef unsigned char* position_type;
  public:
-    encoder(unsigned char* buf, size_t size);
-    void put_8(unsigned char c);
-    void put_16(bm::short_t  s);
-    void put_16(const bm::short_t* s, unsigned count);
-    void put_24(bm::word_t  w);
-    void put_32(bm::word_t  w);
-    void put_32(const bm::word_t* w, unsigned count);
-    void put_48(bm::id64_t w);
-    void put_64(bm::id64_t w);
+    encoder(unsigned char* buf, size_t size) BMNOEXCEPT;
+    void put_8(unsigned char c) BMNOEXCEPT;
+    void put_16(bm::short_t  s) BMNOEXCEPT;
+    void put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT;
+    void put_24(bm::word_t  w) BMNOEXCEPT;
+    void put_32(bm::word_t  w) BMNOEXCEPT;
+    void put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT;
+    void put_48(bm::id64_t w) BMNOEXCEPT;
+    void put_64(bm::id64_t w) BMNOEXCEPT;
      void put_prefixed_array_32(unsigned char c, 
-                               const bm::word_t* w, unsigned count);
+                               const bm::word_t* w, unsigned count) BMNOEXCEPT;
      void put_prefixed_array_16(unsigned char c, 
                                 const bm::short_t* s, unsigned count,
-                               bool encode_count);
-    void memcpy(const unsigned char* src, size_t count);
-    size_t size() const;
-    unsigned char* get_pos() const;
-    void set_pos(unsigned char* buf_pos);
+                               bool encode_count) BMNOEXCEPT;
+    void memcpy(const unsigned char* src, size_t count) BMNOEXCEPT;
+    size_t size() const BMNOEXCEPT;
+    unsigned char* get_pos() const BMNOEXCEPT;
+    void set_pos(unsigned char* buf_pos) BMNOEXCEPT;
  private:
      unsigned char*  buf_;
      unsigned char*  start_;
@@ -83,25 +83,25 @@ private:
  class decoder_base
  {
  public:
-    decoder_base(const unsigned char* buf) { buf_ = start_ = buf; }
+    decoder_base(const unsigned char* buf) BMNOEXCEPT { buf_ = start_ = buf; }
      
      /// Reads character from the decoding buffer. 
-    unsigned char get_8() { return *buf_++; }
+    unsigned char get_8() BMNOEXCEPT { return *buf_++; }
      
      /// Returns size of the current decoding stream.
-    size_t size() const { return size_t(buf_ - start_); }
+    size_t size() const BMNOEXCEPT { return size_t(buf_ - start_); }
      
      /// change current position
-    void seek(int delta) { buf_ += delta; }
+    void seek(int delta) BMNOEXCEPT { buf_ += delta; }
      
      /// read bytes from the decode buffer
-    void memcpy(unsigned char* dst, size_t count);
+    void memcpy(unsigned char* dst, size_t count) BMNOEXCEPT;
      
      /// Return current buffer pointer
-    const unsigned char* get_pos() const { return buf_; }
+    const unsigned char* get_pos() const BMNOEXCEPT { return buf_; }
  
      /// Set current buffer pointer
-    void set_pos(const unsigned char* pos) { buf_ = pos; }
+    void set_pos(const unsigned char* pos) BMNOEXCEPT { buf_ = pos; }
  protected:
     const unsigned char*   buf_;
     const unsigned char*   start_;
@@ -117,16 +117,16 @@ protected:
  class decoder : public decoder_base
  {
  public:
-    decoder(const unsigned char* buf);
-    bm::short_t get_16();
-    bm::word_t get_24();
-    bm::word_t get_32();
-    bm::id64_t get_48();
-    bm::id64_t get_64();
-    void get_32(bm::word_t* w, unsigned count);
-    bool get_32_OR(bm::word_t* w, unsigned count);
-    void get_32_AND(bm::word_t* w, unsigned count);
-    void get_16(bm::short_t* s, unsigned count);
+    decoder(const unsigned char* buf) BMNOEXCEPT;
+    bm::short_t get_16() BMNOEXCEPT;
+    bm::word_t get_24() BMNOEXCEPT;
+    bm::word_t get_32() BMNOEXCEPT;
+    bm::id64_t get_48() BMNOEXCEPT;
+    bm::id64_t get_64() BMNOEXCEPT;
+    void get_32(bm::word_t* w, unsigned count) BMNOEXCEPT;
+    bool get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT;
+    void get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT;
+    void get_16(bm::short_t* s, unsigned count) BMNOEXCEPT;
  };
  
  // ----------------------------------------------------------------
@@ -181,23 +181,23 @@ public:
      ~bit_out() { flush(); }
      
      /// issue single bit into encode bit-stream
-    void put_bit(unsigned value);
+    void put_bit(unsigned value) BMNOEXCEPT;
  
      /// issue count bits out of value
-    void put_bits(unsigned value, unsigned count);
+    void put_bits(unsigned value, unsigned count) BMNOEXCEPT;
  
      /// issue 0 into output stream
-    void put_zero_bit();
+    void put_zero_bit() BMNOEXCEPT;
  
      /// issue specified number of 0s
-    void put_zero_bits(unsigned count);
+    void put_zero_bits(unsigned count) BMNOEXCEPT;
  
      /// Elias Gamma encode the specified value
-    void gamma(unsigned value);
+    void gamma(unsigned value) BMNOEXCEPT;
      
      /// Binary Interpolative array decode
      void bic_encode_u16(const bm::gap_word_t* arr, unsigned sz,
-                        bm::gap_word_t lo, bm::gap_word_t hi)
+                        bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
      {
          bic_encode_u16_cm(arr, sz, lo, hi);
      }
@@ -205,24 +205,24 @@ public:
      /// Binary Interpolative encoding (array of 16-bit ints)
      void bic_encode_u16_rg(const bm::gap_word_t* arr, unsigned sz,
                             bm::gap_word_t lo,
-                           bm::gap_word_t hi);
+                           bm::gap_word_t hi) BMNOEXCEPT;
      
      /// Binary Interpolative encoding (array of 16-bit ints)
      /// cm - "center-minimal"
      void bic_encode_u16_cm(const bm::gap_word_t* arr, unsigned sz,
                             bm::gap_word_t lo,
-                           bm::gap_word_t hi);
+                           bm::gap_word_t hi) BMNOEXCEPT;
  
      /// Binary Interpolative encoding (array of 32-bit ints)
      /// cm - "center-minimal"
      void bic_encode_u32_cm(const bm::word_t* arr, unsigned sz,
-                           bm::word_t lo, bm::word_t hi);
+                           bm::word_t lo, bm::word_t hi) BMNOEXCEPT;
  
      /// Flush the incomplete 32-bit accumulator word
-    void flush() { if (used_bits_) flush_accum(); }
+    void flush() BMNOEXCEPT { if (used_bits_) flush_accum(); }
  
  private:
-    void flush_accum()
+    void flush_accum() BMNOEXCEPT
      {
          dest_.put_32(accum_);
          used_bits_ = accum_ = 0;
@@ -248,31 +248,32 @@ template<class TDecoder>
  class bit_in
  {
  public:
-    bit_in(TDecoder& decoder)
+    bit_in(TDecoder& decoder) BMNOEXCEPT
          : src_(decoder),
            used_bits_(unsigned(sizeof(accum_) * 8)),
-          accum_(0)
+          accum_(0) 
      {}
  
      /// decode unsigned value using Elias Gamma coding
-    unsigned gamma();
+    unsigned gamma() BMNOEXCEPT;
      
      /// read number of bits out of the stream
-    unsigned get_bits(unsigned count);
+    unsigned get_bits(unsigned count) BMNOEXCEPT;
  
      /// Binary Interpolative array decode
      void bic_decode_u16(bm::gap_word_t* arr, unsigned sz,
-                        bm::gap_word_t lo, bm::gap_word_t hi)
+                        bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
      {
          bic_decode_u16_cm(arr, sz, lo, hi);
      }
      
      void bic_decode_u16_bitset(bm::word_t* block, unsigned sz,
-                               bm::gap_word_t lo, bm::gap_word_t hi)
+                               bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
      {
          bic_decode_u16_cm_bitset(block, sz, lo, hi);
      }
-    void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
+    void bic_decode_u16_dry(unsigned sz,
+                            bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
      {
          bic_decode_u16_cm_dry(sz, lo, hi);
      }
@@ -280,29 +281,32 @@ public:
  
      /// Binary Interpolative array decode
      void bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz,
-                           bm::gap_word_t lo, bm::gap_word_t hi);
+                           bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT;
      /// Binary Interpolative array decode
      void bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz,
-                           bm::gap_word_t lo, bm::gap_word_t hi);
+                           bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT;
  
      /// Binary Interpolative array decode (32-bit)
      void bic_decode_u32_cm(bm::word_t* arr, unsigned sz,
-                           bm::word_t lo, bm::word_t hi);
+                           bm::word_t lo, bm::word_t hi) BMNOEXCEPT;
  
  
      /// Binary Interpolative array decode into bitset (32-bit based)
      void bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
-                                  bm::gap_word_t lo, bm::gap_word_t hi);
+                                  bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT;
  
      /// Binary Interpolative array decode into /dev/null
-    void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi);
+    void bic_decode_u16_rg_dry(unsigned sz,
+                               bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT;
  
      /// Binary Interpolative array decode into bitset (32-bit based)
      void bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz,
-                                  bm::gap_word_t lo, bm::gap_word_t hi);
+                                  bm::gap_word_t lo,
+                                  bm::gap_word_t hi) BMNOEXCEPT;
  
      /// Binary Interpolative array decode into /dev/null
-    void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi);
+    void bic_decode_u16_cm_dry(unsigned sz,
+                               bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT;
  
  private:
      bit_in(const bit_in&);
@@ -377,7 +381,7 @@ private:
      \param buf - memory buffer pointer.
      \param size - size of the buffer
  */
-inline encoder::encoder(unsigned char* buf, size_t a_size)
+inline encoder::encoder(unsigned char* buf, size_t a_size) BMNOEXCEPT
  : buf_(buf), start_(buf)
  {
      size_ = a_size;
@@ -387,7 +391,7 @@ inline encoder::encoder(unsigned char* buf, size_t a_size)
  */
  inline void encoder::put_prefixed_array_32(unsigned char c, 
                                             const bm::word_t* w, 
-                                           unsigned count)
+                                           unsigned count) BMNOEXCEPT
  {
      put_8(c);
      put_32(w, count);
@@ -399,7 +403,7 @@ inline void encoder::put_prefixed_array_32(unsigned char c,
  inline void encoder::put_prefixed_array_16(unsigned char c, 
                                             const bm::short_t* s, 
                                             unsigned count,
-                                           bool encode_count)
+                                           bool encode_count) BMNOEXCEPT
  {
      put_8(c);
      if (encode_count)
@@ -413,7 +417,7 @@ inline void encoder::put_prefixed_array_16(unsigned char c,
     \brief Puts one character into the encoding buffer.
     \param c - character to encode
  */
-BMFORCEINLINE void encoder::put_8(unsigned char c)
+BMFORCEINLINE void encoder::put_8(unsigned char c) BMNOEXCEPT
  {
      *buf_++ = c;
  }
@@ -423,7 +427,7 @@ BMFORCEINLINE void encoder::put_8(unsigned char c)
     \brief Puts short word (16 bits) into the encoding buffer.
     \param s - short word to encode
  */
-BMFORCEINLINE void encoder::put_16(bm::short_t s)
+BMFORCEINLINE void encoder::put_16(bm::short_t s) BMNOEXCEPT
  {
  #if (BM_UNALIGNED_ACCESS_OK == 1)
      ::memcpy(buf_, &s, sizeof(bm::short_t)); // optimizer takes care of it
@@ -438,7 +442,7 @@ BMFORCEINLINE void encoder::put_16(bm::short_t s)
  /*!
     \brief Method puts array of short words (16 bits) into the encoding buffer.
  */
-inline void encoder::put_16(const bm::short_t* s, unsigned count)
+inline void encoder::put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT
  {
  #if (BM_UNALIGNED_ACCESS_OK == 1)
      ::memcpy(buf_, s, sizeof(bm::short_t)*count);
@@ -465,7 +469,7 @@ inline void encoder::put_16(const bm::short_t* s, unsigned count)
      \brief copy bytes into target buffer or just rewind if src is NULL
  */
  inline
-void encoder::memcpy(const unsigned char* src, size_t count)
+void encoder::memcpy(const unsigned char* src, size_t count) BMNOEXCEPT
  {
      BM_ASSERT((buf_ + count) < (start_ + size_));
      if (src)
@@ -478,7 +482,7 @@ void encoder::memcpy(const unsigned char* src, size_t count)
     \fn unsigned encoder::size() const
     \brief Returns size of the current encoding stream.
  */
-inline size_t encoder::size() const
+inline size_t encoder::size() const BMNOEXCEPT
  {
      return size_t(buf_ - start_);
  }
@@ -486,7 +490,7 @@ inline size_t encoder::size() const
  /**
      \brief Get current memory stream position
  */
-inline encoder::position_type encoder::get_pos() const
+inline encoder::position_type encoder::get_pos() const BMNOEXCEPT
  {
      return buf_;
  }
@@ -494,7 +498,7 @@ inline encoder::position_type encoder::get_pos() const
  /**
      \brief Set current memory stream position
  */
-inline void encoder::set_pos(encoder::position_type buf_pos)
+inline void encoder::set_pos(encoder::position_type buf_pos) BMNOEXCEPT
  {
      buf_ = buf_pos;
  }
@@ -504,7 +508,7 @@ inline void encoder::set_pos(encoder::position_type buf_pos)
     \brief Puts 24 bits word into encoding buffer.
     \param w - word to encode.
  */
-inline void encoder::put_24(bm::word_t w)
+inline void encoder::put_24(bm::word_t w) BMNOEXCEPT
  {
      BM_ASSERT((w & ~(0xFFFFFFU)) == 0);
  
@@ -520,7 +524,7 @@ inline void encoder::put_24(bm::word_t w)
     \brief Puts 32 bits word into encoding buffer.
     \param w - word to encode.
  */
-inline void encoder::put_32(bm::word_t w)
+inline void encoder::put_32(bm::word_t w) BMNOEXCEPT
  {
  #if (BM_UNALIGNED_ACCESS_OK == 1)
      ::memcpy(buf_, &w, sizeof(bm::word_t));
@@ -538,7 +542,7 @@ inline void encoder::put_32(bm::word_t w)
     \brief Puts 48 bits word into encoding buffer.
     \param w - word to encode.
  */
-inline void encoder::put_48(bm::id64_t w)
+inline void encoder::put_48(bm::id64_t w) BMNOEXCEPT
  { 
      BM_ASSERT((w & ~(0xFFFFFFFFFFFFUL)) == 0);
      *buf_++ = (unsigned char)w;
@@ -555,7 +559,7 @@ inline void encoder::put_48(bm::id64_t w)
     \brief Puts 64 bits word into encoding buffer.
     \param w - word to encode.
  */
-inline void encoder::put_64(bm::id64_t w)
+inline void encoder::put_64(bm::id64_t w) BMNOEXCEPT
  {
  #if (BM_UNALIGNED_ACCESS_OK == 1)
      ::memcpy(buf_, &w, sizeof(bm::id64_t));
@@ -576,10 +580,10 @@ inline void encoder::put_64(bm::id64_t w)
  /*!
      \brief Encodes array of 32-bit words
  */
-inline 
-void encoder::put_32(const bm::word_t* w, unsigned count)
+inline void encoder::put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT
  {
  #if (BM_UNALIGNED_ACCESS_OK == 1)
+    // use memcpy() because compilers now understand it as an idiom and inline
      ::memcpy(buf_, w, sizeof(bm::word_t) * count);
      buf_ += sizeof(bm::word_t) * count;
  #else
@@ -611,7 +615,7 @@ void encoder::put_32(const bm::word_t* w, unsigned count)
      Load bytes from the decode buffer
  */
  inline
-void decoder_base::memcpy(unsigned char* dst, size_t count)
+void decoder_base::memcpy(unsigned char* dst, size_t count) BMNOEXCEPT
  {
      if (dst)
          ::memcpy(dst, buf_, count);
@@ -623,7 +627,7 @@ void decoder_base::memcpy(unsigned char* dst, size_t count)
     \brief Construction
     \param buf - pointer to the decoding memory. 
  */
-inline decoder::decoder(const unsigned char* buf) 
+inline decoder::decoder(const unsigned char* buf) BMNOEXCEPT
  : decoder_base(buf)
  {
  }
@@ -632,7 +636,7 @@ inline decoder::decoder(const unsigned char* buf)
     \fn bm::short_t decoder::get_16()
     \brief Reads 16-bit word from the decoding buffer.
  */
-BMFORCEINLINE bm::short_t decoder::get_16() 
+BMFORCEINLINE bm::short_t decoder::get_16() BMNOEXCEPT
  {
  #if (BM_UNALIGNED_ACCESS_OK == 1)
      bm::short_t a;
@@ -648,7 +652,7 @@ BMFORCEINLINE bm::short_t decoder::get_16()
     \fn bm::word_t decoder::get_24()
     \brief Reads 32-bit word from the decoding buffer.
  */
-inline bm::word_t decoder::get_24()
+inline bm::word_t decoder::get_24() BMNOEXCEPT
  {
      bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
          ((unsigned)buf_[2] << 16);
@@ -661,7 +665,7 @@ inline bm::word_t decoder::get_24()
     \fn bm::word_t decoder::get_32()
     \brief Reads 32-bit word from the decoding buffer.
  */
-BMFORCEINLINE bm::word_t decoder::get_32() 
+BMFORCEINLINE bm::word_t decoder::get_32() BMNOEXCEPT
  {
  #if (BM_UNALIGNED_ACCESS_OK == 1)
      bm::word_t a;
@@ -679,7 +683,7 @@ BMFORCEINLINE bm::word_t decoder::get_32()
     \brief Reads 64-bit word from the decoding buffer.
  */
  inline
-bm::id64_t decoder::get_48()
+bm::id64_t decoder::get_48() BMNOEXCEPT
  {
      bm::id64_t a = buf_[0] +
          ((bm::id64_t)buf_[1] << 8) +
@@ -696,7 +700,7 @@ bm::id64_t decoder::get_48()
     \brief Reads 64-bit word from the decoding buffer.
  */
  inline
-bm::id64_t decoder::get_64()
+bm::id64_t decoder::get_64() BMNOEXCEPT
  {
  #if (BM_UNALIGNED_ACCESS_OK == 1)
      bm::id64_t a;
@@ -722,7 +726,7 @@ bm::id64_t decoder::get_64()
     \param w - pointer on memory block to read into.
     \param count - size of memory block in words.
  */
-inline void decoder::get_32(bm::word_t* w, unsigned count)
+inline void decoder::get_32(bm::word_t* w, unsigned count) BMNOEXCEPT
  {
      if (!w) 
      {
@@ -754,7 +758,7 @@ inline void decoder::get_32(bm::word_t* w, unsigned count)
     \param count - should match bm::set_block_size
  */
  inline
-bool decoder::get_32_OR(bm::word_t* w, unsigned count)
+bool decoder::get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT
  {
      if (!w)
      {
@@ -795,7 +799,7 @@ bool decoder::get_32_OR(bm::word_t* w, unsigned count)
     \param count - should match bm::set_block_size
  */
  inline
-void decoder::get_32_AND(bm::word_t* w, unsigned count)
+void decoder::get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT
  {
      if (!w)
      {
@@ -833,7 +837,7 @@ void decoder::get_32_AND(bm::word_t* w, unsigned count)
     \param s - pointer on memory block to read into.
     \param count - size of memory block in words.
  */
-inline void decoder::get_16(bm::short_t* s, unsigned count)
+inline void decoder::get_16(bm::short_t* s, unsigned count) BMNOEXCEPT
  {
      if (!s) 
      {
@@ -1004,7 +1008,7 @@ void decoder_little_endian::get_16(bm::short_t* s, unsigned count)
  //
  
  template<typename TEncoder>
-void bit_out<TEncoder>::put_bit(unsigned value)
+void bit_out<TEncoder>::put_bit(unsigned value) BMNOEXCEPT
  {
      BM_ASSERT(value <= 1);
      accum_ |= (value << used_bits_);
@@ -1015,7 +1019,7 @@ void bit_out<TEncoder>::put_bit(unsigned value)
  // ----------------------------------------------------------------------
  
  template<typename TEncoder>
-void bit_out<TEncoder>::put_bits(unsigned value, unsigned count)
+void bit_out<TEncoder>::put_bits(unsigned value, unsigned count) BMNOEXCEPT
  {
      unsigned used = used_bits_;
      unsigned acc = accum_;
@@ -1057,7 +1061,7 @@ void bit_out<TEncoder>::put_bits(unsigned value, unsigned count)
  // ----------------------------------------------------------------------
  
  template<typename TEncoder>
-void bit_out<TEncoder>::put_zero_bit()
+void bit_out<TEncoder>::put_zero_bit() BMNOEXCEPT
  {
      if (++used_bits_ == (sizeof(accum_) * 8))
          flush_accum();
@@ -1066,7 +1070,7 @@ void bit_out<TEncoder>::put_zero_bit()
  // ----------------------------------------------------------------------
  
  template<typename TEncoder>
-void bit_out<TEncoder>::put_zero_bits(unsigned count)
+void bit_out<TEncoder>::put_zero_bits(unsigned count) BMNOEXCEPT
  {
      unsigned used = used_bits_;
      unsigned free_bits = (sizeof(accum_) * 8) - used;
@@ -1096,7 +1100,7 @@ void bit_out<TEncoder>::put_zero_bits(unsigned count)
  // ----------------------------------------------------------------------
  
  template<typename TEncoder>
-void bit_out<TEncoder>::gamma(unsigned value)
+void bit_out<TEncoder>::gamma(unsigned value) BMNOEXCEPT
  {
      BM_ASSERT(value);
  
@@ -1168,9 +1172,10 @@ void bit_out<TEncoder>::gamma(unsigned value)
  // ----------------------------------------------------------------------
  
  template<typename TEncoder>
-void bit_out<TEncoder>::bic_encode_u16_rg(const bm::gap_word_t* arr,
-                                          unsigned sz,
-                                          bm::gap_word_t lo, bm::gap_word_t hi)
+void bit_out<TEncoder>::bic_encode_u16_rg(
+                                const bm::gap_word_t* arr,
+                                unsigned sz,
+                                bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
  {
      for (;sz;)
      {
@@ -1204,7 +1209,8 @@ void bit_out<TEncoder>::bic_encode_u16_rg(const bm::gap_word_t* arr,
  template<typename TEncoder>
  void bit_out<TEncoder>::bic_encode_u32_cm(const bm::word_t* arr,
                                            unsigned sz,
-                                          bm::word_t lo, bm::word_t hi)
+                                          bm::word_t lo,
+                                          bm::word_t hi) BMNOEXCEPT
  {
      for (;sz;)
      {
@@ -1292,7 +1298,7 @@ template<typename TEncoder>
  void bit_out<TEncoder>::bic_encode_u16_cm(const bm::gap_word_t* arr,
                                            unsigned sz_i,
                                            bm::gap_word_t lo_i,
-                                          bm::gap_word_t hi_i)
+                                          bm::gap_word_t hi_i) BMNOEXCEPT
  {
      BM_ASSERT(sz_i <= 65535);
  
@@ -1329,7 +1335,8 @@ void bit_out<TEncoder>::bic_encode_u16_cm(const bm::gap_word_t* arr,
  template<typename TEncoder>
  void bit_out<TEncoder>::bic_encode_u16_cm(const bm::gap_word_t* arr,
                                            unsigned sz,
-                                          bm::gap_word_t lo, bm::gap_word_t hi)
+                                          bm::gap_word_t lo,
+                                          bm::gap_word_t hi) BMNOEXCEPT
  {
      for (;sz;)
      {
@@ -1379,7 +1386,8 @@ void bit_out<TEncoder>::bic_encode_u16_cm(const bm::gap_word_t* arr,
  
  template<class TDecoder>
  void bit_in<TDecoder>::bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz,
-                                         bm::gap_word_t lo, bm::gap_word_t hi)
+                                         bm::gap_word_t lo,
+                                         bm::gap_word_t hi) BMNOEXCEPT
  {
      for (;sz;)
      {
@@ -1421,7 +1429,8 @@ void bit_in<TDecoder>::bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz,
  
  template<class TDecoder>
  void bit_in<TDecoder>::bic_decode_u32_cm(bm::word_t* arr, unsigned sz,
-                                         bm::word_t lo, bm::word_t hi)
+                                         bm::word_t lo,
+                                         bm::word_t hi) BMNOEXCEPT
  {
      for (;sz;)
      {
@@ -1472,7 +1481,8 @@ void bit_in<TDecoder>::bic_decode_u32_cm(bm::word_t* arr, unsigned sz,
  
  template<class TDecoder>
  void bit_in<TDecoder>::bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz,
-                                         bm::gap_word_t lo, bm::gap_word_t hi)
+                                         bm::gap_word_t lo,
+                                         bm::gap_word_t hi) BMNOEXCEPT
  {
      for (;sz;)
      {
@@ -1523,7 +1533,8 @@ void bit_in<TDecoder>::bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz,
  
  template<class TDecoder>
  void bit_in<TDecoder>::bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz,
-                              bm::gap_word_t lo, bm::gap_word_t hi)
+                              bm::gap_word_t lo,
+                              bm::gap_word_t hi) BMNOEXCEPT
  {
      for (;sz;)
      {
@@ -1579,7 +1590,8 @@ void bit_in<TDecoder>::bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz,
  
  template<class TDecoder>
  void bit_in<TDecoder>::bic_decode_u16_cm_dry(unsigned sz,
-                              bm::gap_word_t lo, bm::gap_word_t hi)
+                              bm::gap_word_t lo,
+                              bm::gap_word_t hi) BMNOEXCEPT
  {
      for (;sz;)
      {
@@ -1630,7 +1642,8 @@ void bit_in<TDecoder>::bic_decode_u16_cm_dry(unsigned sz,
  
  template<class TDecoder>
  void bit_in<TDecoder>::bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
-                                                bm::gap_word_t lo, bm::gap_word_t hi)
+                                                bm::gap_word_t lo,
+                                                bm::gap_word_t hi) BMNOEXCEPT
  {
      for (;sz;)
      {
@@ -1665,7 +1678,7 @@ void bit_in<TDecoder>::bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
          if (sz == 1)
              return;
          bic_decode_u16_rg_bitset(block, mid_idx, lo, bm::gap_word_t(val - 1));
-        // tail recursion:
+        // tail recursion of:
          //bic_decode_u16_bitset(block, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
          sz  -= mid_idx + 1;
          lo = bm::gap_word_t(val + 1);
@@ -1676,7 +1689,8 @@ void bit_in<TDecoder>::bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
  
  template<class TDecoder>
  void bit_in<TDecoder>::bic_decode_u16_rg_dry(unsigned sz,
-                                   bm::gap_word_t lo, bm::gap_word_t hi)
+                                   bm::gap_word_t lo,
+                                   bm::gap_word_t hi) BMNOEXCEPT
  {
      for (;sz;)
      {
@@ -1705,7 +1719,6 @@ void bit_in<TDecoder>::bic_decode_u16_rg_dry(unsigned sz,
          if (sz == 1)
              return;
          bic_decode_u16_rg_dry(mid_idx, lo, bm::gap_word_t(val - 1));
-        //bic_decode_u16_dry(sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
          sz  -= mid_idx + 1;
          lo = bm::gap_word_t(val + 1);
      } // for sz
@@ -1716,7 +1729,7 @@ void bit_in<TDecoder>::bic_decode_u16_rg_dry(unsigned sz,
  // ----------------------------------------------------------------------
  
  template<class TDecoder>
-unsigned bit_in<TDecoder>::gamma()
+unsigned bit_in<TDecoder>::gamma() BMNOEXCEPT
  {
      unsigned acc = accum_;
      unsigned used = used_bits_;
@@ -1801,7 +1814,7 @@ ret:
  // ----------------------------------------------------------------------
  
  template<class TDecoder>
-unsigned bit_in<TDecoder>::get_bits(unsigned count)
+unsigned bit_in<TDecoder>::get_bits(unsigned count) BMNOEXCEPT
  {
      BM_ASSERT(count);
      const unsigned maskFF = ~0u;
diff --git a/c++/include/util/format_guess.hpp b/c++/include/util/format_guess.hpp

index d943a76403e5c734e8d8ab3f745c8e2c248e1f13..823af862b209493d7d71abd70a497d765afc5f16 100644 (file)
--- a/c++/include/util/format_guess.hpp
+++ b/c++/include/util/format_guess.hpp
@@ -1,7 +1,7 @@
  #ifndef FORMATGUESS__HPP
  #define FORMATGUESS__HPP
  
-/*  $Id: format_guess.hpp 596735 2019-11-12 16:36:21Z ludwigf $
+/*  $Id: format_guess.hpp 612523 2020-07-23 11:23:30Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -92,6 +92,51 @@ public:
          eGffAugustus         = 34, ///< GFFish output of Augustus Gene Prediction
          eJSON                = 35, ///< JSON 
          ePsl                 = 36, ///< PSL alignment format
+        // The following formats are not yet recognized by CFormatGuess - CXX-10039
+        eAltGraphX           = 37, 
+        eBed5FloatScore      = 38,
+        eBedGraph            = 39,      
+        eBedRnaElements      = 40,
+        eBigBarChart         = 41,
+        eBigBed              = 42,
+        eBigPsl              = 43,    
+        eBigChain            = 44,
+        eBigMaf              = 45,
+        eBigWig              = 46,
+        eBroadPeak           = 47,
+        eChain               = 48,
+        eClonePos            = 49,
+        eColoredExon         = 50,
+        eCtgPos              = 51,
+        eDownloadsOnly       = 52,
+        eEncodeFiveC         = 53,
+        eExpRatio            = 54,
+        eFactorSource        = 55,
+        eGenePred            = 56,
+        eLd2                 = 57,
+        eNarrowPeak          = 58,
+        eNetAlign            = 59,
+        ePeptideMapping      = 60,
+        eRmsk                = 61,
+        eSnake               = 62,
+        eVcfTabix            = 63,
+        eWigMaf              = 64,
+
+        // The following formats *are* recognized by CFormatGuess:
+        eFlatFileGenbank     = 65,
+        eFlatFileEna         = 66,
+        eFlatFileUniProt     = 67,
+        
+        // ***  Adding new format codes?  ***
+        //  (1) A sanity check in the  implementation depends on the format codes being 
+        //      consecutive. Hence no gaps allowed!
+        //  (2) Heed the warning above about never changing an already existing
+        //      format code!
+        //  (3) You must provide a display name for the new format. Do that in 
+        //      sm_FormatNames.
+        //  (4) You must add your new format to sm_CheckOrder (unless you don't want your 
+        //      format actually being checked and recognized.
+
          /// Max value of EFormat
          eFormat_max
      };
@@ -118,6 +163,8 @@ public:
          eThrowOnBadSource, ///< Throw an exception if the data source (stream, file) can't be read
      };
  
+    static bool IsSupportedFormat(EFormat format);
+
      /// Hints for guessing formats. Two hint types can be used: preferred and
      /// disabled. Preferred are checked before any other formats. Disabled
      /// formats are not checked at all.
@@ -195,6 +242,7 @@ public:
  
      ~CFormatGuess();
  
+
      NCBI_DEPRECATED EFormat GuessFormat(EMode);
      NCBI_DEPRECATED bool TestFormat(EFormat, EMode);
  
@@ -259,6 +307,10 @@ protected:
      bool TestFormatJson(EMode);
      bool TestFormatPsl(EMode);
  
+    bool TestFormatFlatFileGenbank(EMode);
+    bool TestFormatFlatFileEna(EMode);
+    bool TestFormatFlatFileUniProt(EMode);
+
      bool IsInputRepeatMaskerWithoutHeader();
      bool IsInputRepeatMaskerWithHeader();
  
@@ -322,17 +374,19 @@ private:
      bool x_IsBlankOrNumbers(const string& testString) const;
  
      // data:
-    static const char* const sm_FormatNames[eFormat_max];
+    using NAME_MAP = map<EFormat, const char*>;
+    static const NAME_MAP sm_FormatNames;
  
      bool x_TryProcessCLUSTALSeqData(const string& line, string& id, size_t& seg_length) const;
  
      bool x_LooksLikeCLUSTALConservedInfo(const string& line) const;
  
  protected:
-    static int s_CheckOrder[];
+    static vector<int> sm_CheckOrder;
      
      static const streamsize s_iTestBufferGranularity = 8096;
  
+
      CNcbiIstream& m_Stream;
      bool m_bOwnsStream;
      char* m_pTestBuffer;
diff --git a/c++/include/util/limited_size_map.hpp b/c++/include/util/limited_size_map.hpp

index ad5074867d584f8ae04c4ddd401dcf586ae3115d..ab5e6303f69dc926f05be76cf950efb553a7467f 100644 (file)
--- a/c++/include/util/limited_size_map.hpp
+++ b/c++/include/util/limited_size_map.hpp
@@ -1,7 +1,7 @@
  #ifndef UTIL__LIMITED_SIZE_MAP__HPP
  #define UTIL__LIMITED_SIZE_MAP__HPP
  
-/*  $Id: limited_size_map.hpp 402322 2013-06-06 17:13:46Z vasilche $
+/*  $Id: limited_size_map.hpp 612734 2020-07-27 11:38:33Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -165,7 +165,8 @@ public:
              return !(*this == a);
          }
      };
-    
+
+    explicit
      limited_size_map(size_t size_limit = 0)
          : m_SizeLimit(size_limit)
          {
diff --git a/c++/scripts/common/check/inspxe.sh b/c++/scripts/common/check/inspxe.sh

index 97b66896ed678d8ea125e14c10f3549f7c83fab2..11ad6d0956e6140649f8b32691dd94382e6e8eb9 100755 (executable)
--- a/c++/scripts/common/check/inspxe.sh
+++ b/c++/scripts/common/check/inspxe.sh
@@ -37,7 +37,7 @@ exe=$1.exe
  shift
  
  # Run test
-"$inspxe" -collect mi3 -knob detect-leaks-on-exit=false -knob enable-memory-growth-detection=false -knob enable-on-demand-leak-detection=false -knob still-allocated-memory=false -knob detect-resource-leaks=false -knob stack-depth=32 -result-dir $rd -return-app-exitcode -suppression-file "$suppress_dir" -- $exe "$@"
+"$inspxe" -collect mi3 -knob detect-leaks-on-exit=false -knob enable-memory-growth-detection=false -knob enable-on-demand-leak-detection=false -knob still-allocated-memory=false -knob detect-resource-leaks=false -knob stack-depth=16 -result-dir $rd -return-app-exitcode -suppression-file "$suppress_dir" -- $exe "$@"
  app_result=$?
  sleep 5
  if test ! -d $rd; then
diff --git a/c++/scripts/common/check/tsan.supp b/c++/scripts/common/check/tsan.supp

index 81534f312ef7d933ed5442fe529fd9180248df09..654e475ed09c11735077278a42aab4bfa0e7bad2 100644 (file)
--- a/c++/scripts/common/check/tsan.supp
+++ b/c++/scripts/common/check/tsan.supp
@@ -19,6 +19,9 @@ race:corelib/test/test_ncbidiag_mt.cpp
  
  race:^ncbi::CDiagContext::ApproveMessage(ncbi::SDiagMessage&,
  
+# ncbi_url.cpp default encoder. Leave as is. CXX-10543
+race:^ncbi::CSafeStatic<ncbi::CDefaultUrlEncoder, ncbi::CSafeStatic_Callbacks<ncbi::CDefaultUrlEncoder> >::x_Init()$
+
  
  ################################################################
  # ivanov
diff --git a/c++/scripts/common/impl/install.sh b/c++/scripts/common/impl/install.sh

index ccc41deb96b5ba2fd2dfa7662a62d46f0d83057a..4cbbbe38a2bcc2a032a115cd9afd496f7ac91a6d 100755 (executable)
--- a/c++/scripts/common/impl/install.sh
+++ b/c++/scripts/common/impl/install.sh
@@ -16,7 +16,7 @@
  
  echo "[`date`]"
  
-svn_location=`echo '$URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.10.1/c++/scripts/common/impl/install.sh $' | sed "s%\\$[U]RL: *\\([^$][^$]*\\) \\$.*%\\1%"`
+svn_location=`echo '$URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.11.0/c++/scripts/common/impl/install.sh $' | sed "s%\\$[U]RL: *\\([^$][^$]*\\) \\$.*%\\1%"`
  svn_revision=`echo '$Revision: 429376 $' | sed "s%\\$[R]evision: *\\([^$][^$]*\\) \\$.*%\\1%"`
  
  script_name=`basename $0`
diff --git a/c++/scripts/projects/blast/Manifest b/c++/scripts/projects/blast/Manifest

index d3976454a73eeca5fa6ae76fc7efcb20fd264d7e..c520c215c13c42a3622886b9e598defb066b3706 100644 (file)
--- a/c++/scripts/projects/blast/Manifest
+++ b/c++/scripts/projects/blast/Manifest
@@ -1,7 +1,7 @@
  #
  # Filename: Manifest
  #
-# $Id: Manifest 598028 2019-12-03 15:46:49Z camacho $
+# $Id: Manifest 615066 2020-08-26 16:41:29Z fongah2 $
  #
  # Author: Christiam Camacho
  #
@@ -44,6 +44,7 @@ DEFAULT_CONFIGURE_FLAGS: --without-debug --with-strip --with-openmp --with-mt --
  # that. The build-root is needed so that rpmbuild can find the proper directories
  # to copy the binaries from
  Linux64-Centos     : icc : ICC.sh 1900 --with-static --without-dll --with-bin-release --with-strip --without-debug --without-pcre --with-mt --with-openmp --with-flat-makefile --with-experimental=Int8GI --without-vdb --without-gnutls --without-gcrypt <ENV>OPENMP_FLAGS='-qopenmp -qopenmp-link=static';LDFLAGS=-Wl,--as-needed</ENV>
+
  #Linux64-Centos     : gcc : GCC.sh       --with-static --without-dll --with-bin-release --with-strip --without-debug --without-pcre --with-mt --with-openmp --with-flat-makefile --with-experimental=Int8GI --without-vdb --without-gnutls --without-gcrypt
  #Linux64-Centos     : gcc-debug : GCC.sh                        --with-strip --with-debug --without-dll    --without-pcre --with-mt --with-openmp --with-flat-makefile --with-experimental=Int8GI --without-vdb --without-gnutls --without-gcrypt
  
diff --git a/c++/scripts/projects/blast/components.link b/c++/scripts/projects/blast/components.link

index 70b291610fc989ebd2555d65de7de71104ee3197..6d1d217f5e07c9b19f423ab75d005c1149bfb16d 100644 (file)
--- a/c++/scripts/projects/blast/components.link
+++ b/c++/scripts/projects/blast/components.link
@@ -4,6 +4,6 @@ core            24.0
  dbase           24.0
  web             24.0
  objects         24.0
-objtools        24.0
-algo            24.0
-app             24.0
+objtools        24.1
+algo            24.1
+app             24.1
diff --git a/c++/scripts/projects/igblast/edit_imgt_file.pl b/c++/scripts/projects/igblast/edit_imgt_file.pl

index bdfe4636e8ddf67683832cfaf9aa87f72afd5af7..69ba2b81f79ec02a48d594b13a84f27f39ac722c 100755 (executable)
--- a/c++/scripts/projects/igblast/edit_imgt_file.pl
+++ b/c++/scripts/projects/igblast/edit_imgt_file.pl
@@ -1,7 +1,8 @@
-#!/usr/bin/perl  -w
+#!/usr/bin/env perl
  
  
  use strict;
+use warnings;
  my $inputfile=shift (@ARGV);
  
  open(in_handle, $inputfile);
diff --git a/c++/scripts/projects/ncbi_gbench.lst b/c++/scripts/projects/ncbi_gbench.lst

index 91b553c6147638141c25c7ba6dac0568d35e8e67..9f7d2a838f955eab2c825bda6acfdff9b6107e07 100644 (file)
--- a/c++/scripts/projects/ncbi_gbench.lst
+++ b/c++/scripts/projects/ncbi_gbench.lst
@@ -15,3 +15,7 @@ misc/third_party
  -[^gi].*/app
  -[^g].*/unit_test
  -algo/ms
+-internal/gbench/app/msaviewer
+-internal/gbench/app/sviewer
+-internal/gbench/app/treeview
+-internal/gbench/app/uud
diff --git a/c++/scripts/projects/netschedule/ChangeLog b/c++/scripts/projects/netschedule/ChangeLog

index a74068b51b673b45bbbc4f1412b58d761d022bd7..dc87b7dfb023de447ce362ead46258e0879da3e0 100644 (file)
--- a/c++/scripts/projects/netschedule/ChangeLog
+++ b/c++/scripts/projects/netschedule/ChangeLog
@@ -1,3 +1,10 @@
+Release 4.42.1 cloned from 4.41.0 (2020-09-21)
+
+    * NetSchedule: cannot start with string to unsigned int conversion
+      (CXX-11350)
+    * NetSchedule: update program name when a client changes its session
+      (CXX-11283)
+
  Release 4.42.0 cloned from 4.41.0 (2020-02-11)
  
      * NetSchedule: extend DUMP command (CXX-10344)
diff --git a/c++/scripts/projects/pubseq_gateway/ChangeLog b/c++/scripts/projects/pubseq_gateway/ChangeLog

index 29ac7d6d5cecadb029b7fce2321eeda015a701c6..ec20ce2487fcdce2a723493969582b05201e8137 100644 (file)
--- a/c++/scripts/projects/pubseq_gateway/ChangeLog
+++ b/c++/scripts/projects/pubseq_gateway/ChangeLog
@@ -1,3 +1,14 @@
+Release 1.10.0 (2020-07-10)
+
+    * PSG: add anti recursion flag (CXX-11438)
+    * PSG: create and use a high level Reply object at the very beginning
+      (CXX-11425)
+    * PSG server MaxDebug configuration segfault (CXX-11402)
+    * PSG server get_na processor filter (CXX-11401)
+    * PSG: incorrect handling of the CHttpReply<> instance (CXX-11397)
+    * PSG - add API to retrieve biodata from other (non-Cassandra/LMDB) sources
+      (CXX-11312)
+
  Release 1.9.0 (2020-04-09)
  
      * Updated libuv 1.35.0, datastax 2.15.1, lmdb 0.9.24 (CXX-11268)
diff --git a/c++/scripts/projects/pubseq_gateway/project.lst b/c++/scripts/projects/pubseq_gateway/project.lst

index ad5f1707d4f48e19b558f5255686805573437e9f..6524c3c665ef8c604f06fada899fd286d57b7f56 100644 (file)
--- a/c++/scripts/projects/pubseq_gateway/project.lst
+++ b/c++/scripts/projects/pubseq_gateway/project.lst
@@ -18,6 +18,11 @@ misc/third_party_static
  app$
  app/pubseq_gateway$
  app/pubseq_gateway/server$
+app/pubseq_gateway/server/test$
+app/pubseq_gateway/server/test/input$
+app/pubseq_gateway/server/integrationsmoketest$
+app/pubseq_gateway/server/integrationsmoketest/input$
+app/pubseq_gateway/server/integrationsmoketest/baseline$
  
  objtools$
  objtools/pubseq_gateway$
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt

index ae4ca1b93aa20592877e02edcfee16db9931dd22..09d447bd5f0f812649a59f3552049629468cbccb 100644 (file)
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -1,6 +1,9 @@
  #############################################################################
-# $Id: CMakeLists.txt 608162 2020-05-12 16:01:31Z blastadm $
+# $Id: CMakeLists.txt 617723 2020-10-06 07:10:56Z blastadm $
  #############################################################################
  
-cmake_minimum_required(VERSION 3.3)
-include(build-system/cmake/CMakeLists.top_builddir.txt)
+cmake_minimum_required(VERSION 3.7)
+if ("${PROJECT_NAME}" STREQUAL "")
+    project(ncbi_cpp)
+endif()
+include(${CMAKE_CURRENT_LIST_DIR}/build-system/cmake/CMakeLists.top_builddir.txt)
diff --git a/c++/src/Makefile.in b/c++/src/Makefile.in

index fd0a86f20abbda4abb3eab5d77a859af59fa1881..2f66472fe4bb77c6a20c3d9e3ce9f0e81e87ce38 100644 (file)
--- a/c++/src/Makefile.in
+++ b/c++/src/Makefile.in
@@ -1,4 +1,4 @@
-# $Id: Makefile.in 608162 2020-05-12 16:01:31Z blastadm $
+# $Id: Makefile.in 617723 2020-10-06 07:10:56Z blastadm $
  
  # Master (top-level) makefile for all NCBI C++ projects
  ##################################################################
diff --git a/c++/src/algo/blast/api/CMakeLists.xblast.lib.txt b/c++/src/algo/blast/api/CMakeLists.xblast.lib.txt

index 15432cdbc9e3094717af8cb903e97392157fb6dd..faf61fb70a29c8c61844d2348ebafc53a828458c 100644 (file)
--- a/c++/src/algo/blast/api/CMakeLists.xblast.lib.txt
+++ b/c++/src/algo/blast/api/CMakeLists.xblast.lib.txt
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMakeLists.xblast.lib.txt 594157 2019-09-30 18:28:48Z gouriano $
+# $Id: CMakeLists.xblast.lib.txt 615334 2020-08-31 15:35:33Z fukanchi $
  #############################################################################
  
  set(SRC_BLAST_CXX_CORE
@@ -75,6 +75,8 @@ set(SRC_BLAST_CXX_CORE
      deltablast
      magicblast_options
      magicblast
+    blast_node
+    blast_usage_report
  )
  
  
diff --git a/c++/src/algo/blast/api/Makefile.xblast.lib b/c++/src/algo/blast/api/Makefile.xblast.lib

index d8689276135c450fdffb241ca17242f345f53f04..ca8bc44291abb484b38ed3a076b1dff68fb06bea 100644 (file)
--- a/c++/src/algo/blast/api/Makefile.xblast.lib
+++ b/c++/src/algo/blast/api/Makefile.xblast.lib
@@ -1,4 +1,4 @@
-# $Id: Makefile.xblast.lib 553565 2017-12-18 22:23:38Z fongah2 $
+# $Id: Makefile.xblast.lib 615334 2020-08-31 15:35:33Z fukanchi $
  
  include $(srcdir)/../core/Makefile.blast.lib
  
@@ -75,7 +75,9 @@ cdd_pssm_input \
  deltablast_options \
  deltablast \
  magicblast_options \
-magicblast
+magicblast \
+blast_node \
+blast_usage_report
  
  SRC  = $(SRC_C:%=.core_%) $(SRC_CXX)
  
diff --git a/c++/src/algo/blast/api/blast_aux.cpp b/c++/src/algo/blast/api/blast_aux.cpp

index 9c92813555a7b52468c9b3b6e86132cefa28f39c..6bb9cac4f7776f1bb03871f1bca123d2e3df3852 100644 (file)
--- a/c++/src/algo/blast/api/blast_aux.cpp
+++ b/c++/src/algo/blast/api/blast_aux.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blast_aux.cpp 519527 2016-11-16 14:19:45Z camacho $
+/*  $Id: blast_aux.cpp 615182 2020-08-28 04:28:48Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -1152,6 +1152,47 @@ LoadSequencesToScope(CScope::TIds& ids, vector<TSeqRange>& ranges, CRef<CScope>
         top_bh.GetSeqMap().CanResolveRange(&*scope, sel);
  }
  
+void CBlastAppDiagHandler::Post(const SDiagMessage & mess)
+{
+       if(m_handler != NULL) {
+               m_handler->Post(mess);
+       }
+       if(m_save) {
+               CRef<CBlast4_error> d(new CBlast4_error);
+               string m;
+               mess.Write(m);
+               d->SetMessage(NStr::Sanitize(m));
+               d->SetCode((int)mess.m_Severity);
+               {
+                       DEFINE_STATIC_MUTEX(mx);
+                       CMutexGuard guard(mx);
+                       m_messages.push_back(d);
+               }
+       }
+}
+
+void CBlastAppDiagHandler::ResetMessages()
+{
+       DEFINE_STATIC_MUTEX(mx);
+       CMutexGuard guard(mx);
+       m_messages.clear();
+}
+
+CBlastAppDiagHandler::~CBlastAppDiagHandler()
+{
+       if(m_handler) {
+               SetDiagHandler(m_handler);
+               m_handler = NULL;
+       }
+}
+
+void CBlastAppDiagHandler::DoNotSaveMessages(void)
+{
+       m_save = false;
+       ResetMessages();
+}
+
+
  END_SCOPE(blast)
  END_NCBI_SCOPE
  
diff --git a/c++/src/algo/blast/api/blast_node.cpp b/c++/src/algo/blast/api/blast_node.cpp

new file mode 100644 (file)

index 0000000..b568c0f
--- /dev/null
+++ b/c++/src/algo/blast/api/blast_node.cpp
@@ -0,0 +1,293 @@
+/*  $Id:
+ * ===========================================================================
+ *
+ *                            PUBLIC DOMAIN NOTICE
+ *               National Center for Biotechnology Information
+ *
+ *  This software/database is a "United States Government Work" under the
+ *  terms of the United States Copyright Act.  It was written as part of
+ *  the author's official duties as a United States Government employee and
+ *  thus cannot be copyrighted.  This software/database is freely available
+ *  to the public for use. The National Library of Medicine and the U.S.
+ *  Government have not placed any restriction on its use or reproduction.
+ *
+ *  Although all reasonable efforts have been taken to ensure the accuracy
+ *  and reliability of the software and data, the NLM and the U.S.
+ *  Government do not and cannot warrant the performance or results that
+ *  may be obtained by using this software or data. The NLM and the U.S.
+ *  Government disclaim all warranties, express or implied, including
+ *  warranties of performance, merchantability or fitness for any particular
+ *  purpose.
+ *
+ *  Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors:  Amelia Fong
+ *
+ */
+
+/** @file blast_node.cpp
+ *  BLAST node api
+ */
+
+#include <ncbi_pch.hpp>
+#include <corelib/ncbiapp.hpp>
+#include <algo/blast/api/remote_blast.hpp>
+#include <algo/blast/blastinput/blast_fasta_input.hpp>
+#include <algo/blast/api/blast_node.hpp>
+
+#if defined(NCBI_OS_UNIX)
+#include <unistd.h>
+#endif
+
+#ifndef SKIP_DOXYGEN_PROCESSING
+USING_NCBI_SCOPE;
+USING_SCOPE(blast);
+USING_SCOPE(objects);
+#endif
+
+void CBlastNodeMailbox::SendMsg(CRef<CBlastNodeMsg> msg)
+{
+       CFastMutexGuard guard(m_Mutex);
+       m_MsgQueue.push_back(msg);
+       m_Notify.SignalSome();
+}
+
+CBlastNode::CBlastNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+                               CBlastAppDiagHandler & bah, EProgram blast_program,
+                        int query_index, int num_queries, CBlastNodeMailbox * mailbox):
+                        m_NodeNum(node_num), m_NcbiArgs(ncbi_args), m_Args(args),
+                        m_Bah(bah), m_BlastProgram(blast_program),
+                        m_QueryIndex(query_index), m_NumOfQueries(num_queries),
+                        m_QueriesLength(0)
+{
+       if(mailbox != NULL) {
+               m_Mailbox.Reset(mailbox);
+       }
+       string p("Query ");
+       p+=NStr::IntToString(query_index) + "-" + NStr::IntToString(query_index + num_queries -1);
+       m_NodeIdStr = p;
+}
+
+CBlastNode::~CBlastNode () {
+       if(m_Mailbox.NotEmpty()) {
+               m_Mailbox.Reset();
+       }
+}
+
+void CBlastNode::SendMsg(CBlastNodeMsg::EMsgType msg_type, void* ptr)
+{
+       if (m_Mailbox.NotEmpty()) {
+               CRef<CBlastNodeMsg>  m( new CBlastNodeMsg(msg_type, ptr));
+               m_Mailbox->SendMsg(m);
+       }
+}
+
+CBlastMasterNode::CBlastMasterNode(CNcbiOstream & out_stream, int num_threads):
+               m_OutputStream(out_stream), m_MaxNumThreads(num_threads), m_MaxNumNodes(num_threads + 2),
+               m_NumErrStatus(0), m_NumQueries(0), m_QueriesLength(0)
+{
+       m_StopWatch.Start();
+}
+
+void
+CBlastMasterNode::x_WaitForNewEvent()
+{
+       CFastMutexGuard guard(m_Mutex);
+       m_NewEvent.WaitForSignal(m_Mutex);
+}
+
+void
+CBlastMasterNode::RegisterNode(CBlastNode * node, CBlastNodeMailbox * mailbox)
+{
+       if(node == NULL) {
+                NCBI_THROW(CBlastException, eInvalidArgument, "Empty Node" );
+       }
+       if(mailbox == NULL) {
+                NCBI_THROW(CBlastException, eInvalidArgument, "Empty mailbox" );
+       }
+       if(mailbox->GetNodeNum() != node->GetNodeNum()) {
+                NCBI_THROW(CBlastException, eCoreBlastError, "Invalid mailbox node number" );
+       }
+       {
+               CFastMutexGuard guard(m_Mutex);
+               int node_num = node->GetNodeNum();
+               if ((m_PostOffice.find(node_num) != m_PostOffice.end()) ||
+               (m_RegisteredNodes.find(node_num) != m_RegisteredNodes.end())){
+                       NCBI_THROW(CBlastException, eInvalidArgument, "Duplicate chunk num" );
+               }
+               m_PostOffice[node_num]= mailbox;
+               m_RegisteredNodes[node_num] = node;
+       }
+}
+
+bool CBlastMasterNode::Processing()
+{
+       NON_CONST_ITERATE(TPostOffice, itr, m_PostOffice) {
+               if(itr->second->GetNumMsgs() > 0) {
+                       CRef<CBlastNodeMsg> msg = itr->second->ReadMsg();
+                       int chunk_num = itr->first;
+                       if (msg.NotEmpty()) {
+                               switch (msg->GetMsgType()) {
+                                       case CBlastNodeMsg::eRunRequest:
+                                       {
+                                               if ((int) m_ActiveNodes.size() < m_MaxNumThreads) {
+                                                       CBlastNode * n = (CBlastNode *) msg->GetMsgBody();
+                                                       if(n != NULL) {
+                                                               double start_time = m_StopWatch.Elapsed();
+                                                               n->Run();
+                                                               pair< int, double > p(chunk_num, start_time);
+                                                               m_ActiveNodes.insert(p);
+                                                               CRef<CBlastNodeMsg> empty_msg;
+                                                               pair<int,CRef<CBlastNodeMsg> > m(chunk_num, empty_msg);
+                                                               m_FormatQueue.insert(m);
+                                                               _TRACE("Starting Chunk # " << chunk_num) ;
+                                                       }
+                                                       else {
+                                                               NCBI_THROW(CBlastException, eCoreBlastError, "Invalid mailbox node number" );
+                                                       }
+                                               }
+                                               else {
+                                                       itr->second->UnreadMsg(msg);
+                                                       FormatResults();
+                                                       if (IsFull()) {
+                                                               x_WaitForNewEvent();
+                                                       }
+                                                       return true;
+                                               }
+                                               break;
+                                       }
+                                       case CBlastNodeMsg::ePostResult:
+                                       case CBlastNodeMsg::eErrorExit:
+                                       {
+                                               m_FormatQueue[itr->first] = msg;
+                                               double diff = m_StopWatch.Elapsed() - m_ActiveNodes[itr->first];
+                                               m_ActiveNodes.erase(chunk_num);
+                                               CTimeSpan s(diff);
+                                               _TRACE("Chunk #" << chunk_num << " completed in " << s.AsSmartString());
+                                               break;
+                                       }
+                                       case CBlastNodeMsg::ePostLog:
+                                       {
+                                               break;
+                                       }
+                                       default:
+                                       {
+                                               NCBI_THROW(CBlastException, eCoreBlastError, "Invalid node message type");
+                                               break;
+                                       }
+                               }
+                       }
+               }
+       }
+       FormatResults();
+       return IsActive();
+}
+
+void CBlastMasterNode::FormatResults()
+{
+       TFormatQueue::iterator itr= m_FormatQueue.begin();
+
+       while (itr != m_FormatQueue.end()){
+               CRef<CBlastNodeMsg> msg(itr->second);
+               if(msg.Empty()) {
+                       break;
+               }
+               CBlastNode * n = (CBlastNode *) msg->GetMsgBody();
+               if(n == NULL) {
+                       string err_msg = "Empty formatting msg for chunk num # " + NStr::IntToString(itr->first);
+                       NCBI_THROW(CBlastException, eCoreBlastError, err_msg);
+               }
+               int node_num = n->GetNodeNum();
+               if (msg->GetMsgType() == CBlastNodeMsg::ePostResult) {
+                       string results;
+                       n->GetBlastResults(results);
+                       if (results != kEmptyStr) {
+                               m_OutputStream << results;
+                       }
+               }
+               else if (msg->GetMsgType() == CBlastNodeMsg::eErrorExit) {
+                       m_NumErrStatus++;
+                       ERR_POST("Chunk # " << node_num << " exit with error (" << n->GetStatus() << ")");
+               }
+               else {
+                       NCBI_THROW(CBlastException, eCoreBlastError, "Invalid msg type");
+               }
+               m_NumQueries += n->GetNumOfQueries();
+               m_QueriesLength += n->GetQueriesLength();
+               n->Detach();
+               m_PostOffice.erase(node_num);
+               m_RegisteredNodes.erase(node_num);
+
+               itr++;
+       }
+
+       if (itr != m_FormatQueue.begin()) {
+               m_FormatQueue.erase(m_FormatQueue.begin(), itr);
+       }
+}
+
+int CBlastMasterNode::IsFull()
+{
+       TRegisteredNodes::reverse_iterator rr = m_RegisteredNodes.rbegin();
+       TActiveNodes::reverse_iterator ra = m_ActiveNodes.rbegin();
+       unsigned int in_buffer = m_MaxNumThreads;
+       if ((!m_RegisteredNodes.empty()) && (!m_ActiveNodes.empty())) {
+               in_buffer = rr->first - ra->first;
+       }
+       return ((int) (m_ActiveNodes.size() + in_buffer) >=   m_MaxNumNodes);
+}
+
+
+bool s_IsSeqID(string & line)
+{
+    static const int kMainAccSize = 32;
+    size_t digit_pos = line.find_last_of("0123456789|", kMainAccSize);
+    if (digit_pos != NPOS) {
+       return true;
+    }
+
+    return false;
+}
+
+int
+CBlastNodeInputReader::GetQueryBatch(string & queries, int & query_no)
+{
+       CNcbiOstrstream ss;
+       int q_size = 0;
+       int q_count = 0;
+       queries.clear();
+       query_no = -1;
+
+    while ( !AtEOF()) {
+               string line = NStr::TruncateSpaces_Unsafe(*++(*this), NStr::eTrunc_Begin);
+           if (line.empty()) {
+               continue;
+           }
+           char c =line[0];
+           if (c == '!'  ||  c == '#' || c == ';') {
+               continue;
+           }
+           bool isId = s_IsSeqID(line);
+           if ( isId || ( c == '>' )) {
+               if (q_size >= m_QueryBatchSize) {
+                       UngetLine();
+                       break;
+               }
+               q_count ++;
+           }
+           if (c != '>') {
+               q_size += isId? m_EstAvgQueryLength : line.size();
+           }
+       ss << line << endl;
+    }
+    ss << std::ends;
+    ss.flush();
+    if (q_count > 0){
+       queries = ss.str();
+       query_no = m_QueryCount +1;
+       m_QueryCount +=q_count;
+    }
+    return q_count;
+}
diff --git a/c++/src/algo/blast/api/blast_usage_report.cpp b/c++/src/algo/blast/api/blast_usage_report.cpp

new file mode 100644 (file)

index 0000000..0c8d431
--- /dev/null
+++ b/c++/src/algo/blast/api/blast_usage_report.cpp
@@ -0,0 +1,228 @@
+/*  $Id:
+ * ===========================================================================
+ *
+ *                            PUBLIC DOMAIN NOTICE
+ *               National Center for Biotechnology Information
+ *
+ *  This software/database is a "United States Government Work" under the
+ *  terms of the United States Copyright Act.  It was written as part of
+ *  the author's official duties as a United States Government employee and
+ *  thus cannot be copyrighted.  This software/database is freely available
+ *  to the public for use. The National Library of Medicine and the U.S.
+ *  Government have not placed any restriction on its use or reproduction.
+ *
+ *  Although all reasonable efforts have been taken to ensure the accuracy
+ *  and reliability of the software and data, the NLM and the U.S.
+ *  Government do not and cannot warrant the performance or results that
+ *  may be obtained by using this software or data. The NLM and the U.S.
+ *  Government disclaim all warranties, express or implied, including
+ *  warranties of performance, merchantability or fitness for any particular
+ *  purpose.
+ *
+ *  Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors:  Amelia Fong
+ *
+ */
+
+/** @file blast_usage_report.cpp
+ *  BLAST usage report api
+ */
+
+#include <ncbi_pch.hpp>
+#include <algo/blast/api/blast_usage_report.hpp>
+#include <corelib/ncbifile.hpp>
+
+#ifndef SKIP_DOXYGEN_PROCESSING
+USING_NCBI_SCOPE;
+USING_SCOPE(blast);
+#endif
+
+static const string kNcbiAppName="standalone-blast";
+static const string kIdFile="/sys/class/dmi/id/sys_vendor";
+
+void CBlastUsageReport::x_CheckRunEnv()
+{
+       char * blast_docker = getenv("BLAST_DOCKER");
+       if(blast_docker != NULL){
+               AddParam(eDocker, true);
+       }
+
+       CFile id_file(kIdFile);
+       if(id_file.Exists()){
+               CNcbiIfstream s(id_file.GetPath().c_str(), IOS_BASE::in);
+               string line;
+               NcbiGetlineEOL(s, line);
+               NStr::ToUpper(line);
+               if (line.find("GOOGLE") != NPOS) {
+                       AddParam(eGCP, true);
+               }
+               else if (line.find("AMAZON")!= NPOS){
+                       AddParam(eAWS, true);
+               }
+       }
+
+       char* elb_job_id = getenv("BLAST_ELB_JOB_ID");
+       if(elb_job_id != NULL){
+               string j_id(elb_job_id);
+               AddParam(eELBJobId, j_id);
+       }
+       char* elb_batch_num = getenv("BLAST_ELB_BATCH_NUM");
+       if(elb_batch_num != NULL){
+               int bn = NStr::StringToInt(CTempString(elb_batch_num), NStr::fConvErr_NoThrow);
+               AddParam(eELBBatchNum, bn);
+       }
+}
+
+CBlastUsageReport::CBlastUsageReport()
+{
+       x_CheckBlastUsageEnv();
+       AddParam(eApp, kNcbiAppName);
+       x_CheckRunEnv();
+}
+
+CBlastUsageReport::~CBlastUsageReport()
+{
+       if (IsEnabled()) {
+               Send(m_Params);
+               Wait();
+               Finish();
+       }
+}
+
+string CBlastUsageReport::x_EUsageParmsToString(EUsageParams p)
+{
+    string retval;
+    switch (p) {
+       case eApp:                              retval.assign("ncbi_app"); break;
+       case eVersion:                  retval.assign("version"); break;
+       case eProgram:          retval.assign("program"); break;
+       case eTask:                     retval.assign("task"); break;
+       case eExitStatus:       retval.assign("exit_status"); break;
+       case eRunTime:                  retval.assign("run_time"); break;
+       case eDBName:                   retval.assign("db_name"); break;
+       case eDBLength:                 retval.assign("db_length"); break;
+       case eDBNumSeqs:                retval.assign("db_num_seqs"); break;
+               case eDBDate:                   retval.assign("db_date"); break;
+       case eBl2seq:                   retval.assign("bl2seq"); break;
+       case eNumSubjects:              retval.assign("num_subjects"); break;
+               case eSubjectsLength:   retval.assign("subjects_length"); break;
+       case eNumQueries:               retval.assign("num_queries"); break;
+       case eTotalQueryLength: retval.assign("queries_length"); break;
+       case eEvalueThreshold:  retval.assign("evalue_threshold"); break;
+       case eNumThreads:               retval.assign("num_threads"); break;
+       case eHitListSize:              retval.assign("hitlist_size"); break;
+       case eOutputFmt:                retval.assign("output_fmt"); break;
+       case eTaxIdList:                retval.assign("taxidlist"); break;
+       case eNegTaxIdList:             retval.assign("negative_taxidlist"); break;
+       case eGIList:                   retval.assign("gilist"); break;
+       case eNegGIList:                retval.assign("negative_gilist"); break;
+       case eSeqIdList:                retval.assign("seqidlist"); break;
+       case eNegSeqIdList:             retval.assign("negative_seqidlist"); break;
+       case eIPGList:                  retval.assign("ipglist"); break;
+       case eNegIPGList:               retval.assign("negative_ipglist"); break;
+       case eMaskAlgo:                 retval.assign("mask_algo"); break;
+       case eCompBasedStats:   retval.assign("comp_based_stats"); break;
+       case eRange:                    retval.assign("range"); break;
+       case eMTMode:                   retval.assign("mt_mode"); break;
+       case eNumQueryBatches:  retval.assign("num_query_batches"); break;
+       case eNumErrStatus:             retval.assign("num_error_status"); break;
+       case ePSSMInput:                retval.assign("pssm_input"); break;
+       case eConverged:            retval.assign("converged"); break;
+       case eArchiveInput:         retval.assign("archive"); break;
+       case eRIDInput:         retval.assign("rid"); break;
+       case eDBInfo:                   retval.assign("db_info"); break;
+               case eDBTaxInfo:                retval.assign("db_tax_info"); break;
+               case eDBEntry:                  retval.assign("db_entry"); break;
+               case eDBDumpAll:                retval.assign("db_entry_all"); break;
+               case eDBType:                   retval.assign("db_type"); break;
+               case eInputType:                retval.assign("input_type"); break;
+               case eParseSeqIDs:              retval.assign("parse_seqids"); break;
+               case eSeqType:                  retval.assign("seq_type"); break;
+               case eDBTest:                   retval.assign("db_test"); break;
+               case eDBAliasMode:              retval.assign("db_alias_mode"); break;
+               case eDocker:                   retval.assign("docker"); break;
+               case eGCP:                              retval.assign("gcp"); break;
+               case eAWS:                              retval.assign("aws"); break;
+               case eELBJobId:                 retval.assign("elb_job_id"); break;
+               case eELBBatchNum:              retval.assign("elb_batch_num"); break;
+       default:
+               LOG_POST(Warning <<"Invalid usage params: " << (int)p);
+               abort();
+               break;
+    }
+    return retval;
+}
+
+void CBlastUsageReport::AddParam(EUsageParams p, int val)
+{
+       if (IsEnabled()){
+               string t = x_EUsageParmsToString(p);
+               m_Params.Add(t, NStr::IntToString(val));
+       }
+}
+
+void CBlastUsageReport::AddParam(EUsageParams p, const string & val)
+{
+       if (IsEnabled()) {
+               string t = x_EUsageParmsToString(p);
+               m_Params.Add(t, val);
+       }
+}
+
+void CBlastUsageReport::AddParam(EUsageParams p, const double & val)
+{
+       if (IsEnabled()) {
+               string t = x_EUsageParmsToString(p);
+               m_Params.Add(t, val);
+       }
+}
+
+void CBlastUsageReport::x_CheckBlastUsageEnv()
+{
+       char * blast_usage_env = getenv("BLAST_USAGE_REPORT");
+       if(blast_usage_env != NULL){
+               bool enable = NStr::StringToBool(blast_usage_env);
+               if (!enable) {
+                       SetEnabled(false);
+                       CUsageReportAPI::SetEnabled(false);
+                       LOG_POST(Info <<"Phone home disabled");
+                       return ;
+               }
+       }
+
+       CNcbiIstrstream empty_stream(kEmptyCStr);
+       CRef<CNcbiRegistry> registry(new CNcbiRegistry(empty_stream, IRegistry::fWithNcbirc));
+       if (registry->HasEntry("BLAST", "BLAST_USAGE_REPORT")) {
+               bool enable = NStr::StringToBool(registry->Get("BLAST", "BLAST_USAGE_REPORT"));
+               if (!enable) {
+                       SetEnabled(false);
+                       CUsageReportAPI::SetEnabled(false);
+                       LOG_POST(Info <<"Phone home disabled by config setting");
+                       return ;
+               }
+       }
+       CUsageReportAPI::SetEnabled(true);
+       SetEnabled(true);
+       LOG_POST(Info <<"Phone home enabled");
+}
+
+void CBlastUsageReport::AddParam(EUsageParams p, Int8 val)
+{
+       if (IsEnabled()) {
+               string t = x_EUsageParmsToString(p);
+               m_Params.Add(t, val);
+       }
+
+}
+
+void CBlastUsageReport::AddParam(EUsageParams p, bool val)
+{
+       if (IsEnabled()) {
+               string t = x_EUsageParmsToString(p);
+               m_Params.Add(t, val);
+       }
+
+}
diff --git a/c++/src/algo/blast/blastinput/blast_input.cpp b/c++/src/algo/blast/blastinput/blast_input.cpp

index ad81a464ef27d645ffd89c9ee99b2eb02d99de09..a74337db3e930c4b0d389edc0c97c17cf2015316 100644 (file)
--- a/c++/src/algo/blast/blastinput/blast_input.cpp
+++ b/c++/src/algo/blast/blastinput/blast_input.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blast_input.cpp 550028 2017-10-30 16:49:00Z rackerst $
+/*  $Id: blast_input.cpp 615335 2020-08-31 15:36:38Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -165,6 +165,8 @@ CBlastInput::GetNextSeqBatch(CScope& scope)
  
          retval->AddQuery(q);
      }
+    m_NumSeqs +=retval->Size();
+    m_TotalLength += size_read;
      _TRACE("Read " << retval->Size() << " queries");
      return retval;
  }
diff --git a/c++/src/algo/blast/blastinput/cmdline_flags.cpp b/c++/src/algo/blast/blastinput/cmdline_flags.cpp

index a2575940745498ac0733924b2b730753d1bc5a45..179fe960f0f73b7d9763dc884897fbe7d7ae16ea 100644 (file)
--- a/c++/src/algo/blast/blastinput/cmdline_flags.cpp
+++ b/c++/src/algo/blast/blastinput/cmdline_flags.cpp
@@ -1,4 +1,4 @@
-/*  $Id: cmdline_flags.cpp 605536 2020-04-13 11:07:50Z ivanov $
+/*  $Id: cmdline_flags.cpp 615184 2020-08-28 04:29:55Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -268,6 +268,8 @@ const string kArgPrintMdTag("md_tag");
  const string kArgUnalignedOutput("out_unaligned");
  const string kArgUnalignedFormat("unaligned_fmt");
  
+const string kArgMTMode("mt_mode");
+
  END_SCOPE(blast)
  END_NCBI_SCOPE
  
diff --git a/c++/src/algo/blast/blastinput/rpsblast_args.cpp b/c++/src/algo/blast/blastinput/rpsblast_args.cpp

index 68403aad306cc9889787485be91e0528943ebb0e..7caf41cc711bb6071ea584abe38781978fe82044 100644 (file)
--- a/c++/src/algo/blast/blastinput/rpsblast_args.cpp
+++ b/c++/src/algo/blast/blastinput/rpsblast_args.cpp
@@ -1,4 +1,4 @@
-/*  $Id: rpsblast_args.cpp 544441 2017-08-23 11:55:51Z camacho $
+/*  $Id: rpsblast_args.cpp 615193 2020-08-28 04:31:11Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -55,6 +55,14 @@ CRPSBlastMTArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
                             NStr::IntToString(kDfltRpsThreadingMode));
      arg_desc.SetConstraint(kArgNumThreads, 
                             new CArgAllowValuesGreaterThanOrEqual(0));
+    arg_desc.AddDefaultKey(kArgMTMode, "int_value",
+                           "Multi-thread mode to use in RPS BLAST search:\n "
+                           "0 (auto) split by database vols\n "
+                           "1 split by queries",
+                            CArgDescriptions::eInteger,
+                            NStr::IntToString(0));
+       arg_desc.SetConstraint(kArgMTMode,
+                              new CArgAllowValuesBetween(0, 1, true));
  #endif
      arg_desc.SetCurrentGroup("");
  }
@@ -140,6 +148,63 @@ CRPSBlastAppArgs::GetQueryBatchSize() const
      return blast::GetQueryBatchSize(eRPSBlast, m_IsUngapped, is_remote);
  }
  
+/// Get the input stream
+CNcbiIstream&
+CRPSBlastAppArgs::GetInputStream()
+{
+    return CBlastAppArgs::GetInputStream();
+}
+/// Get the output stream
+CNcbiOstream&
+CRPSBlastAppArgs::GetOutputStream()
+{
+    return CBlastAppArgs::GetOutputStream();
+}
+
+/// Get the input stream
+CNcbiIstream&
+CRPSBlastNodeArgs::GetInputStream()
+{
+       if ( !m_InputStream ) {
+               abort();
+       }
+       return *m_InputStream;
+}
+/// Get the output stream
+CNcbiOstream&
+CRPSBlastNodeArgs::GetOutputStream()
+{
+       return m_OutputStream;
+}
+
+CRPSBlastNodeArgs::CRPSBlastNodeArgs(const string & input)
+{
+       m_InputStream = new CNcbiIstrstream(input.c_str(), input.length());
+}
+
+CRPSBlastNodeArgs::~CRPSBlastNodeArgs()
+{
+       if (m_InputStream) {
+               delete m_InputStream;
+               m_InputStream = NULL;
+       }
+}
+
+int
+CRPSBlastNodeArgs::GetQueryBatchSize() const
+{
+    bool is_remote = (m_RemoteArgs.NotEmpty() && m_RemoteArgs->ExecuteRemotely());
+    return blast::GetQueryBatchSize(eRPSBlast, m_IsUngapped, is_remote);
+}
+
+CRef<CBlastOptionsHandle>
+CRPSBlastNodeArgs::x_CreateOptionsHandle(CBlastOptions::EAPILocality locality,
+                                      const CArgs& /*args*/)
+{
+    CRef<CBlastOptionsHandle> retval
+        (new CBlastRPSOptionsHandle(locality));
+    return retval;
+}
  END_SCOPE(blast)
  END_NCBI_SCOPE
  
diff --git a/c++/src/algo/blast/blastinput/rpstblastn_args.cpp b/c++/src/algo/blast/blastinput/rpstblastn_args.cpp

index 98ae1ab8620158153f4a2559335d2c698b48c0dd..e37743a50805f32afe0c48650614274833577ff8 100644 (file)
--- a/c++/src/algo/blast/blastinput/rpstblastn_args.cpp
+++ b/c++/src/algo/blast/blastinput/rpstblastn_args.cpp
@@ -1,4 +1,4 @@
-/*  $Id: rpstblastn_args.cpp 505234 2016-06-23 13:16:57Z fongah2 $
+/*  $Id: rpstblastn_args.cpp 615193 2020-08-28 04:31:11Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -35,6 +35,7 @@
  #include <algo/blast/blastinput/rpstblastn_args.hpp>
  #include <algo/blast/api/rpstblastn_options.hpp>
  #include <algo/blast/blastinput/blast_input_aux.hpp>
+#include <algo/blast/blastinput/rpsblast_args.hpp>
  #include <algo/blast/api/version.hpp>
  
  BEGIN_NCBI_SCOPE
@@ -92,7 +93,7 @@ CRPSTBlastnAppArgs::CRPSTBlastnAppArgs()
      arg.Reset(m_FormattingArgs);
      m_Args.push_back(arg);
  
-    m_MTArgs.Reset(new CMTArgs(true));
+    m_MTArgs.Reset(new CRPSBlastMTArgs());
      arg.Reset(m_MTArgs);
      m_Args.push_back(arg); 
  
@@ -123,6 +124,64 @@ CRPSTBlastnAppArgs::GetQueryBatchSize() const
      return blast::GetQueryBatchSize(eRPSTblastn, m_IsUngapped, is_remote);
  }
  
+/// Get the input stream
+CNcbiIstream&
+CRPSTBlastnAppArgs::GetInputStream()
+{
+    return CBlastAppArgs::GetInputStream();
+}
+/// Get the output stream
+CNcbiOstream&
+CRPSTBlastnAppArgs::GetOutputStream()
+{
+    return CBlastAppArgs::GetOutputStream();
+}
+
+/// Get the input stream
+CNcbiIstream&
+CRPSTBlastnNodeArgs::GetInputStream()
+{
+       if ( !m_InputStream ) {
+               abort();
+       }
+       return *m_InputStream;
+}
+/// Get the output stream
+CNcbiOstream&
+CRPSTBlastnNodeArgs::GetOutputStream()
+{
+       return m_OutputStream;
+}
+
+CRPSTBlastnNodeArgs::CRPSTBlastnNodeArgs(const string & input)
+{
+       m_InputStream = new CNcbiIstrstream(input.c_str(), input.length());
+}
+
+CRPSTBlastnNodeArgs::~CRPSTBlastnNodeArgs()
+{
+       if (m_InputStream) {
+               delete m_InputStream;
+               m_InputStream = NULL;
+       }
+}
+
+int
+CRPSTBlastnNodeArgs::GetQueryBatchSize() const
+{
+    bool is_remote = (m_RemoteArgs.NotEmpty() && m_RemoteArgs->ExecuteRemotely());
+    return blast::GetQueryBatchSize(eRPSTblastn, m_IsUngapped, is_remote);
+}
+
+CRef<CBlastOptionsHandle>
+CRPSTBlastnNodeArgs::x_CreateOptionsHandle(CBlastOptions::EAPILocality locality,
+                                      const CArgs& /*args*/)
+{
+    CRef<CBlastOptionsHandle> retval
+        (new CRPSTBlastnOptionsHandle(locality));
+    return retval;
+}
+
  END_SCOPE(blast)
  END_NCBI_SCOPE
  
diff --git a/c++/src/algo/blast/core/blast_engine.c b/c++/src/algo/blast/core/blast_engine.c

index caeffd91bc7d950e623cfc0d6e7ce45636406777..797d17a52a9d013a033788b403f732e990bb75d4 100644 (file)
--- a/c++/src/algo/blast/core/blast_engine.c
+++ b/c++/src/algo/blast/core/blast_engine.c
@@ -1,4 +1,4 @@
-/* $Id: blast_engine.c 604741 2020-04-01 15:15:25Z ivanov $
+/* $Id: blast_engine.c 617226 2020-09-28 18:25:19Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -79,9 +79,9 @@
  #define CONV_NUCL2PROT_COORDINATES(length) (length) / CODON_LENGTH
  
  NCBI_XBLAST_EXPORT const int   kBlastMajorVersion = 2;
-NCBI_XBLAST_EXPORT const int   kBlastMinorVersion = 10;
-NCBI_XBLAST_EXPORT const int   kBlastPatchVersion = 1;
-NCBI_XBLAST_EXPORT const char* kBlastReleaseDate = "June-01-2020";
+NCBI_XBLAST_EXPORT const int   kBlastMinorVersion = 11;
+NCBI_XBLAST_EXPORT const int   kBlastPatchVersion = 0;
+NCBI_XBLAST_EXPORT const char* kBlastReleaseDate = "Oct-15-2020";
  
  /** Structure to be passed to s_BlastSearchEngineCore, containing pointers
      to various preallocated structures and arrays. */
@@ -992,6 +992,9 @@ s_BlastSetUpAuxStructures(const BlastSeqSrc* seq_src,
      Boolean jumper = (ext_options->ePrelimGapExt == eJumperWithTraceback);
      Int4 offset_array_size = GetOffsetArraySize(lookup_wrap);
  
+    if(phi_lookup) {
+       offset_array_size = PHI_MAX_HIT;
+    }
      ASSERT(seq_src);
  
      *aux_struct_ptr = aux_struct = (BlastCoreAuxStruct*)
diff --git a/c++/src/algo/blast/core/blast_kappa.c b/c++/src/algo/blast/core/blast_kappa.c

index 8cad34476d65e58f48c5ff777f729a424675f9b0..4a4447e6b30e2e4a863c22aa1807e08b83817e66 100644 (file)
--- a/c++/src/algo/blast/core/blast_kappa.c
+++ b/c++/src/algo/blast/core/blast_kappa.c
@@ -1,4 +1,4 @@
-/* $Id: blast_kappa.c 605341 2020-04-09 16:06:51Z ivanov $
+/* $Id: blast_kappa.c 616357 2020-09-15 12:19:52Z ivanov $
   * ==========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -3468,7 +3468,9 @@ Blast_RedoAlignmentCore_MT(EBlastProgramType program_number,
  
                  int tid = 0;
  #ifdef _OPENMP
-                tid = omp_get_thread_num();
+                if(actual_num_threads > 1) {
+                       tid = omp_get_thread_num();
+                }
  #endif
                  seqSrc               = seqsrc_tld[tid];
                  scoringParams        = score_params_tld[tid];
@@ -3492,10 +3494,12 @@ Blast_RedoAlignmentCore_MT(EBlastProgramType program_number,
                      if (seqSrc) {
                          continue;
                      }
+                    if(actual_num_threads > 1) {
  #pragma omp critical(intrpt)
-                    interrupt = TRUE;
+                       interrupt = TRUE;
  #pragma omp flush(interrupt)
-                    continue;
+                       continue;
+                    }
                  }
  
                  if (BlastCompo_EarlyTermination(
@@ -3507,10 +3511,12 @@ Blast_RedoAlignmentCore_MT(EBlastProgramType program_number,
                      if (seqSrc) {
                          continue;
                      }
+                    if(actual_num_threads > 1) {
  #pragma omp critical(intrpt)
-                    interrupt = TRUE;
+                       interrupt = TRUE;
  #pragma omp flush(interrupt)
-                    continue;
+                       continue;
+                    }
                  }
  
                  query_index = localMatch->query_index;
@@ -3728,7 +3734,8 @@ match_loop_cleanup:
                  }
                  s_MatchingSequenceRelease(&matchingSeq);
                  BlastCompo_AlignmentsFree(&incoming_aligns, NULL);
-                if (*pStatusCode != 0 || !seqSrc) {
+                if ((actual_num_threads > 1) &&
+                       (*pStatusCode != 0 || !seqSrc)) {
  #pragma omp critical(intrpt)
                      interrupt = TRUE;
  #pragma omp flush(interrupt)
diff --git a/c++/src/algo/blast/format/blast_format.cpp b/c++/src/algo/blast/format/blast_format.cpp

index aad39515e2fa742f517a17bcb1002767c8270e93..1b0289ecd1495b7007ef369f9d07789c77981fd3 100644 (file)
--- a/c++/src/algo/blast/format/blast_format.cpp
+++ b/c++/src/algo/blast/format/blast_format.cpp
@@ -2416,3 +2416,95 @@ void CBlastFormat::x_InitSAMFormatter()
      m_SamFormatter.reset(new CBlast_SAM_Formatter(m_Outfile, *m_Scope,
                                                           m_CustomOutputFormatSpec, pg));
  }
+
+bool s_SetCompBasedStats(EProgram program)
+{
+        if (program == eBlastp || program == eTblastn ||
+               program == ePSIBlast || program == ePSITblastn ||
+               program == eRPSBlast || program == eRPSTblastn ||
+               program == eBlastx  ||  program == eDeltaBlast) {
+                return true;
+        }
+        return false;
+}
+
+void CBlastFormat::LogBlastSearchInfo(CBlastUsageReport & report)
+{
+       if (report.IsEnabled()) {
+               report.AddParam(CBlastUsageReport::eProgram, m_Program);
+               EProgram task = m_Options->GetProgram();
+               string task_str =  EProgramToTaskName(task);
+               report.AddParam(CBlastUsageReport::eTask, task_str);
+               report.AddParam(CBlastUsageReport::eEvalueThreshold, m_Options->GetEvalueThreshold());
+               report.AddParam(CBlastUsageReport::eHitListSize, m_Options->GetHitlistSize());
+               report.AddParam(CBlastUsageReport::eOutputFmt, m_FormatType);
+
+               if (s_SetCompBasedStats(task)) {
+                       report.AddParam(CBlastUsageReport::eCompBasedStats, m_Options->GetCompositionBasedStats());
+               }
+
+               int num_seqs = 0;
+           for (size_t i = 0; i < m_DbInfo.size(); i++) {
+               num_seqs += m_DbInfo[i].number_seqs;
+           }
+               if( m_IsBl2Seq) {
+                       report.AddParam(CBlastUsageReport::eBl2seq, "true");
+                       if (m_IsDbScan) {
+                               report.AddParam(CBlastUsageReport::eNumSubjects, num_seqs);
+                               report.AddParam(CBlastUsageReport::eSubjectsLength, GetDbTotalLength());
+                       }
+                       else if (m_SeqInfoSrc.NotEmpty()){
+                               report.AddParam(CBlastUsageReport::eNumSubjects, (int) m_SeqInfoSrc->Size());
+                               int total_subj_length = 0;
+                               for (size_t i = 0; i < m_SeqInfoSrc->Size(); i++) {
+                                      total_subj_length += m_SeqInfoSrc->GetLength(i);
+                               }
+                               report.AddParam(CBlastUsageReport::eSubjectsLength, total_subj_length);
+                       }
+               }
+               else {
+                       string dir = kEmptyStr;
+                       CFile::SplitPath(m_DbName, &dir);
+                       string db_name = m_DbName;
+                       if (dir != kEmptyStr) {
+                               db_name = m_DbName.substr(dir.length());
+                       }
+                       report.AddParam(CBlastUsageReport::eDBName, db_name);
+                       report.AddParam(CBlastUsageReport::eDBLength, GetDbTotalLength());
+                       report.AddParam(CBlastUsageReport::eDBNumSeqs, num_seqs);
+                       report.AddParam(CBlastUsageReport::eDBDate, m_DbInfo[0].date);
+                       if(m_SearchDb.NotEmpty()){
+                               if(m_SearchDb->GetGiList().NotEmpty()) {
+                                        CRef<CSeqDBGiList>  l = m_SearchDb->GetGiList();
+                                        if (l->GetNumGis()) {
+                                                report.AddParam(CBlastUsageReport::eGIList, true);
+                                        }
+                                        if (l->GetNumSis()){
+                                                report.AddParam(CBlastUsageReport::eSeqIdList, true);
+                                        }
+                                        if (l->GetNumTaxIds()){
+                                                report.AddParam(CBlastUsageReport::eTaxIdList, true);
+                                        }
+                                        if (l->GetNumPigs()) {
+                                                report.AddParam(CBlastUsageReport::eIPGList, true);
+                                        }
+                               }
+                               if(m_SearchDb->GetNegativeGiList().NotEmpty()) {
+                                        CRef<CSeqDBGiList>  l = m_SearchDb->GetNegativeGiList();
+                                        if (l->GetNumGis()) {
+                                                report.AddParam(CBlastUsageReport::eNegGIList, true);
+                                        }
+                                        if (l->GetNumSis()){
+                                                report.AddParam(CBlastUsageReport::eNegSeqIdList, true);
+                                        }
+                                        if (l->GetNumTaxIds()){
+                                                report.AddParam(CBlastUsageReport::eNegTaxIdList, true);
+                                        }
+                                        if (l->GetNumPigs()) {
+                                                report.AddParam(CBlastUsageReport::eNegIPGList, true);
+                                        }
+                               }
+                       }
+               }
+       }
+}
diff --git a/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp b/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp

index 23c25e73926aef8db86466dd6487a995b14dc4b7..3e8d753e866d4672b1d10941c23ae52d274aa69b 100644 (file)
--- a/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp
+++ b/c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp
@@ -1,4 +1,4 @@
-/*  $Id: version_reference_unit_test.cpp 604741 2020-04-01 15:15:25Z ivanov $
+/*  $Id: version_reference_unit_test.cpp 617227 2020-09-28 18:26:44Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -43,8 +43,8 @@ BOOST_AUTO_TEST_SUITE(version_reference)
  
  BOOST_AUTO_TEST_CASE(testVersion) {
      const int kMajor = 2;
-    const int kMinor = 10;
-    const int kPatch = 1;
+    const int kMinor = 11;
+    const int kPatch = 0;
      blast::CBlastVersion v;
      BOOST_REQUIRE_EQUAL(kMajor, v.GetMajor());
      BOOST_REQUIRE_EQUAL(kMinor, v.GetMinor());
diff --git a/c++/src/app/CMakeLists.txt b/c++/src/app/CMakeLists.txt

index f080ea2acf6bc3c727fd55e083f064b27c35808f..cd1b238738a3e761484a2deba7a1c6d1cac89ca3 100644 (file)
--- a/c++/src/app/CMakeLists.txt
+++ b/c++/src/app/CMakeLists.txt
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMakeLists.txt 594373 2019-10-03 13:30:50Z gouriano $
+# $Id: CMakeLists.txt 612980 2020-07-30 19:13:50Z ivanov $
  #############################################################################
  
  # Include projects from this directory
@@ -23,6 +23,7 @@ NCBI_add_subdirectory(
    convert_seq
    discrepancy_report
    dustmasker
+  flat2asn
    formatguess
    gap_stats
    gi2taxid
diff --git a/c++/src/app/Makefile.in b/c++/src/app/Makefile.in

index 5650214166794f4fe22b62d1ce4a7e84ddde8a8a..feadce627bbac0671752dadb70a778db9dc7c4bb 100644 (file)
--- a/c++/src/app/Makefile.in
+++ b/c++/src/app/Makefile.in
@@ -1,9 +1,9 @@
-# $Id: Makefile.in 591515 2019-08-16 14:37:05Z ludwigf $
+# $Id: Makefile.in 612980 2020-07-30 19:13:50Z ivanov $
  
  # Miscellaneous applications
  #################################
  
-SUB_PROJ = asn2asn asn2fasta asn2flat asnval asn_cleanup \
+SUB_PROJ = asn2asn asn2fasta asn2flat flat2asn asnval asn_cleanup \
             id1_fetch blast convert_seq \
             nmer_repeats objmgr gi2taxid netschedule grid netstorage igblast \
             winmasker dustmasker segmasker blastdb vecscreen \
@@ -15,7 +15,7 @@ SUB_PROJ = asn2asn asn2fasta asn2flat asnval asn_cleanup \
             srcchk tableval ncbi_encrypt ssub_fork asn_cache magicblast \
             multipattern prt2fsm \
             pub_report gff_deconcat sub_fuse \
-                  feat_import
+           feat_import
  
  EXPENDABLE_SUB_PROJ = split_cache wig2table netcache rmblastn dblb tls idfetch pubseq_gateway
  
diff --git a/c++/src/app/blast/CMakeLists.rpsblast.app.txt b/c++/src/app/blast/CMakeLists.rpsblast.app.txt

index 8baf4a1a7dac5a316a6827c9ca518aad0ab3eecc..9e28aa9673cf3680b355e844fa4491a07478adcf 100644 (file)
--- a/c++/src/app/blast/CMakeLists.rpsblast.app.txt
+++ b/c++/src/app/blast/CMakeLists.rpsblast.app.txt
@@ -1,9 +1,9 @@
  #############################################################################
-# $Id: CMakeLists.rpsblast.app.txt 593591 2019-09-20 14:53:34Z gouriano $
+# $Id: CMakeLists.rpsblast.app.txt 615197 2020-08-28 04:31:45Z fukanchi $
  #############################################################################
  
  NCBI_begin_app(rpsblast)
-  NCBI_sources(rpsblast_app)
+  NCBI_sources(rpsblast_node rpsblast_app)
    NCBI_add_definitions(NCBI_MODULE=BLAST)
    NCBI_uses_toolkit_libraries(blast_app_util)
    NCBI_requires(-Cygwin)
diff --git a/c++/src/app/blast/CMakeLists.rpstblastn.app.txt b/c++/src/app/blast/CMakeLists.rpstblastn.app.txt

index b87f1f737780e02086079691cadacef97a3fb2ef..de8d7b0cf1fa64d940d9bc16c1fc48c5fd66f4dd 100644 (file)
--- a/c++/src/app/blast/CMakeLists.rpstblastn.app.txt
+++ b/c++/src/app/blast/CMakeLists.rpstblastn.app.txt
@@ -1,9 +1,9 @@
  #############################################################################
-# $Id: CMakeLists.rpstblastn.app.txt 593591 2019-09-20 14:53:34Z gouriano $
+# $Id: CMakeLists.rpstblastn.app.txt 615200 2020-08-28 04:32:09Z fukanchi $
  #############################################################################
  
  NCBI_begin_app(rpstblastn)
-  NCBI_sources(rpstblastn_app)
+  NCBI_sources(rpstblastn_node rpstblastn_app)
    NCBI_add_definitions(NCBI_MODULE=BLAST)
    NCBI_uses_toolkit_libraries(blast_app_util)
    NCBI_requires(-Cygwin)
diff --git a/c++/src/app/blast/Makefile.rpsblast.app b/c++/src/app/blast/Makefile.rpsblast.app

index 5cd6a080d77e0c8f07b60fe5213d76ea6b533ea6..3ffc95ee0c7f7a4baa708ec7f623de2a907420b6 100644 (file)
--- a/c++/src/app/blast/Makefile.rpsblast.app
+++ b/c++/src/app/blast/Makefile.rpsblast.app
@@ -1,7 +1,7 @@
  WATCHERS = camacho madden fongah2
  
  APP = rpsblast
-SRC = rpsblast_app
+SRC = rpsblast_node rpsblast_app
  LIB_ = $(BLAST_INPUT_LIBS) $(BLAST_LIBS) xregexp $(PCRE_LIB) $(OBJMGR_LIBS)
  LIB = blast_app_util $(LIB_:%=%$(STATIC))
  
diff --git a/c++/src/app/blast/Makefile.rpstblastn.app b/c++/src/app/blast/Makefile.rpstblastn.app

index c5b6fa130edecaa4ea464824f6d42be0a6355f3a..4c0df46622444b3275fbbbc60debb005f8b46373 100644 (file)
--- a/c++/src/app/blast/Makefile.rpstblastn.app
+++ b/c++/src/app/blast/Makefile.rpstblastn.app
@@ -1,7 +1,7 @@
  WATCHERS = camacho madden fongah2
  
  APP = rpstblastn
-SRC = rpstblastn_app
+SRC = rpstblastn_node rpstblastn_app
  LIB_ = $(BLAST_INPUT_LIBS) $(BLAST_LIBS) xregexp $(PCRE_LIB) $(OBJMGR_LIBS)
  LIB = blast_app_util $(LIB_:%=%$(STATIC))
  
diff --git a/c++/src/app/blast/blast_app_util.cpp b/c++/src/app/blast/blast_app_util.cpp

index 729f353f44adb4e3c45e6d480ac7dc69e9ffd93a..b9eee79cb58b9af4702a7613293532dc8e70e733 100644 (file)
--- a/c++/src/app/blast/blast_app_util.cpp
+++ b/c++/src/app/blast/blast_app_util.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blast_app_util.cpp 592833 2019-09-09 13:01:28Z fongah2 $
+/*  $Id: blast_app_util.cpp 615351 2020-08-31 15:38:53Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -915,47 +915,6 @@ GetSubjectFile(const CArgs& args)
         return filename;
  }
  
-
-void CBlastAppDiagHandler::Post(const SDiagMessage & mess)
-{
-       if(m_handler != NULL) {
-               m_handler->Post(mess);
-       }
-       if(m_save) {
-               CRef<CBlast4_error> d(new CBlast4_error);
-               string m;
-               mess.Write(m);
-               d->SetMessage(NStr::Sanitize(m));
-               d->SetCode((int)mess.m_Severity);
-               {
-                       DEFINE_STATIC_MUTEX(mx);
-                       CMutexGuard guard(mx);
-                       m_messages.push_back(d);
-               }
-       }
-}
-
-void CBlastAppDiagHandler::ResetMessages()
-{
-       DEFINE_STATIC_MUTEX(mx);
-       CMutexGuard guard(mx);
-       m_messages.clear();
-}
-
-CBlastAppDiagHandler::~CBlastAppDiagHandler()
-{
-       if(m_handler) {
-               SetDiagHandler(m_handler);
-               m_handler = NULL;
-       }
-}
-
-void CBlastAppDiagHandler::DoNotSaveMessages(void)
-{
-       m_save = false;
-       ResetMessages();
-}
-
  void PrintErrorArchive(const CArgs & a, const list<CRef<CBlast4_error> > & msg)
  {
         try {
@@ -983,4 +942,41 @@ void QueryBatchCleanup()
  
  }
  
+void LogQueryInfo(CBlastUsageReport & report, const CBlastInput & q_info)
+{
+       report.AddParam(CBlastUsageReport::eTotalQueryLength, q_info.GetTotalLengthProcessed());
+       report.AddParam(CBlastUsageReport::eNumQueries, q_info.GetNumSeqsProcessed());
+}
+
+
+void LogRPSBlastOptions(blast::CBlastUsageReport & report, const CBlastOptions & opt)
+{
+       report.AddParam(CBlastUsageReport::eProgram, Blast_ProgramNameFromType(opt.GetProgramType()));
+       report.AddParam(CBlastUsageReport::eEvalueThreshold, opt.GetEvalueThreshold());
+       report.AddParam(CBlastUsageReport::eHitListSize, opt.GetHitlistSize());
+    report.AddParam(CBlastUsageReport::eCompBasedStats, opt.GetCompositionBasedStats());
+}
+
+void LogRPSCmdOptions(blast::CBlastUsageReport & report, const CBlastAppArgs & args)
+{
+       if (args.GetBlastDatabaseArgs().NotEmpty() &&
+               args.GetBlastDatabaseArgs()->GetSearchDatabase().NotEmpty() &&
+               args.GetBlastDatabaseArgs()->GetSearchDatabase()->GetSeqDb().NotEmpty()) {
+
+               CRef<CSeqDB> db = args.GetBlastDatabaseArgs()->GetSearchDatabase()->GetSeqDb();
+               string db_name = db->GetDBNameList();
+               int off = db_name.find_last_of(CFile::GetPathSeparator());
+           if (off != -1) {
+               db_name.erase(0, off+1);
+               }
+               report.AddParam(CBlastUsageReport::eDBName, db_name);
+               report.AddParam(CBlastUsageReport::eDBLength, (Int8) db->GetTotalLength());
+               report.AddParam(CBlastUsageReport::eDBNumSeqs, db->GetNumSeqs());
+               report.AddParam(CBlastUsageReport::eDBDate, db->GetDate());
+       }
+
+       if(args.GetFormattingArgs().NotEmpty()){
+               report.AddParam(CBlastUsageReport::eOutputFmt, args.GetFormattingArgs()->GetFormattedOutputChoice());
+       }
+}
  END_NCBI_SCOPE
diff --git a/c++/src/app/blast/blast_app_util.hpp b/c++/src/app/blast/blast_app_util.hpp

index ace25cbd86e14b10598702c47f272aed83e2a039..998b65afbcaf14fa0e822606e7850d5aac8d4a75 100644 (file)
--- a/c++/src/app/blast/blast_app_util.hpp
+++ b/c++/src/app/blast/blast_app_util.hpp
@@ -1,4 +1,4 @@
-/*  $Id: blast_app_util.hpp 570350 2018-09-07 12:47:53Z fongah2 $
+/*  $Id: blast_app_util.hpp 615351 2020-08-31 15:38:53Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -38,6 +38,7 @@
  #include <objtools/readers/reader_exception.hpp>        
  #include <objtools/blast/seqdb_reader/seqdb.hpp>
  #include <algo/blast/blastinput/blast_args.hpp>
+#include <algo/blast/blastinput/blast_input.hpp>
  
  #include <objects/blast/Blast4_request.hpp>
  #include <algo/blast/api/uniform_search.hpp>
@@ -48,6 +49,7 @@
  #include <objtools/blast/seqdb_writer/writedb_error.hpp>
  #include <algo/blast/format/blastfmtutil.hpp>   // for CBlastFormatUtil
  #include <algo/blast/blastinput/blast_scope_src.hpp>    // for SDataLoaderConfig
+#include <algo/blast/api/blast_usage_report.hpp>
  
  BEGIN_NCBI_SCOPE
  
@@ -302,28 +304,6 @@ UseXInclude(const blast::CFormattingArgs & f, const string & s);
  string
  GetSubjectFile(const CArgs& args);
  
-/// Class to capture message from diag handler
-class CBlastAppDiagHandler : public CDiagHandler
-{
-public:
-       /// Constructor
-       CBlastAppDiagHandler():m_handler(GetDiagHandler(true)), m_save (true) {}
-       /// Destructor
-       ~CBlastAppDiagHandler();
-       /// Save and post diag message
-       virtual void Post (const SDiagMessage & mess);
-       /// Reset messgae buffer, erase all saved message
-       void ResetMessages(void);
-       /// Call to turn off saving diag message, discard all saved message
-       void DoNotSaveMessages(void);
-       /// Return list of saved diag messages
-       list<CRef<CBlast4_error> > & GetMessages(void) { return m_messages;}
-private :
-       CDiagHandler * m_handler;
-       list<CRef<CBlast4_error> > m_messages;
-       bool m_save;
-};
-
  /// Function to print blast archive with only error messages (search failed)
  /// to output stream
  /// @param a cmdline args [in]
@@ -333,6 +313,12 @@ void PrintErrorArchive(const CArgs & a, const list<CRef<CBlast4_error> > & msg);
  /// Clean up formatter scope and release
  void QueryBatchCleanup();
  
+void LogQueryInfo(blast::CBlastUsageReport & report, const blast::CBlastInput & q_info);
+
+/// Log blast usage opts for rpsblast apps
+void LogRPSBlastOptions(blast::CBlastUsageReport & report, const blast::CBlastOptions & opt);
+void LogRPSCmdOptions(blast::CBlastUsageReport & report, const blast::CBlastAppArgs & args);
+
  END_NCBI_SCOPE
  
  #endif /* APP__BLAST_APP_UTIL__HPP */
diff --git a/c++/src/app/blast/blast_formatter.cpp b/c++/src/app/blast/blast_formatter.cpp

index e8474b45424b43a9625489f24d9e5e4d5477e7a4..85e43bc5ebe8e05584beb4a717024998b81553d3 100644 (file)
--- a/c++/src/app/blast/blast_formatter.cpp
+++ b/c++/src/app/blast/blast_formatter.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blast_formatter.cpp 591152 2019-08-12 11:18:21Z fongah2 $
+/*  $Id: blast_formatter.cpp 616875 2020-09-22 13:14:55Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -40,6 +40,9 @@
  #include <algo/blast/blastinput/blast_input_aux.hpp>
  #include <algo/blast/format/blast_format.hpp>
  #include <algo/blast/api/objmgr_query_data.hpp>
+#include <objtools/data_loaders/blastdb/bdbloader_rmt.hpp>
+#include <objtools/data_loaders/genbank/gbloader.hpp>
+#include <objtools/data_loaders/genbank/id2/reader_id2.hpp>
  #include "blast_app_util.hpp"
  
  
@@ -57,8 +60,18 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
-       m_LoadFromArchive = false;
+        m_LoadFromArchive = false;
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+               m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blast_formatter");
+        }
+    }
+
+    ~CBlastFormatterApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
+
  private:
      /** @inheritDoc */
      virtual void Init();
@@ -82,6 +95,8 @@ private:
      /// @param scope Scope object to add the sequence data to [in|out]
      SSeqLoc x_QueryBioseqToSSeqLoc(const CBioseq& bioseq, CRef<CScope> scope);
  
+    void x_AddCmdOptions();
+
      /// Our link to the NCBI BLAST service
      CRef<CRemoteBlast> m_RmtBlast;
  
@@ -90,6 +105,8 @@ private:
  
      /// Tracks whether results come from an archive file.
      bool m_LoadFromArchive;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  void CBlastFormatterApp::Init()
@@ -237,6 +254,78 @@ s_ConvertSubjects2TSeqLocVector(CRef<CRemoteBlast> remote_blast)
      return retval;
  }
  
+bool
+s_InitializeSubject(CRef<blast::CBlastDatabaseArgs> db_args,
+                  CRef<blast::CBlastOptionsHandle> opts_hndl,
+                  CRef<blast::CLocalDbAdapter>& db_adapter,
+                  CRef<objects::CScope>& scope)
+{
+       bool isRemote = false;
+    db_adapter.Reset();
+
+    _ASSERT(db_args.NotEmpty());
+    CRef<CSearchDatabase> search_db = db_args->GetSearchDatabase();
+
+    if (scope.Empty()) {
+          scope.Reset(new CScope(*CObjectManager::GetInstance()));
+    }
+
+    CRef<IQueryFactory> subjects;
+    if ( (subjects = db_args->GetSubjects(scope)) ) {
+        _ASSERT(search_db.Empty());
+        char* bl2seq_legacy = getenv("BL2SEQ_LEGACY");
+        if (bl2seq_legacy) {
+               db_adapter.Reset(new CLocalDbAdapter(subjects, opts_hndl, false));
+        }
+        else {
+            db_adapter.Reset(new CLocalDbAdapter(subjects, opts_hndl, true));
+        }
+    } else {
+        _ASSERT(search_db.NotEmpty());
+        try {
+            // Try to open the BLAST database even for remote searches, as if
+            // it is available locally, it will be better to fetch the
+            // sequence data for formatting from this (local) source
+            CRef<CSeqDB> seqdb = search_db->GetSeqDb();
+            db_adapter.Reset(new CLocalDbAdapter(*search_db));
+            scope->AddDataLoader(RegisterOMDataLoader(seqdb), CBlastDatabaseArgs::kSubjectsDataLoaderPriority);
+            LOG_POST(Info <<"Add local loader " << search_db->GetDatabaseName());
+        } catch (const CSeqDBException&) {
+               SetDiagPostLevel(eDiag_Critical);
+            string remote_loader = kEmptyStr;
+            try {
+            db_adapter.Reset(new CLocalDbAdapter(*search_db));
+            remote_loader = CRemoteBlastDbDataLoader::RegisterInObjectManager
+                                    (*( CObjectManager::GetInstance()),
+                                    search_db->GetDatabaseName(),
+                                    search_db->IsProtein()  ? CBlastDbDataLoader::eProtein : CBlastDbDataLoader::eNucleotide,
+                                    true, CObjectManager::eDefault, CBlastDatabaseArgs::kSubjectsDataLoaderPriority)
+                                    .GetLoader()->GetName();
+            scope->AddDataLoader(remote_loader, CBlastDatabaseArgs::kSubjectsDataLoaderPriority);
+            SetDiagPostLevel(eDiag_Warning);
+            isRemote = true;
+            LOG_POST(Info <<"Remote " << search_db->GetDatabaseName());
+            }
+            catch (CException & e) {
+               SetDiagPostLevel(eDiag_Warning);
+               NCBI_THROW(CException, eUnknown, "Fail to initialize local or remote DB" );
+            }
+        }
+    }
+    try {
+       const int kGenbankLoaderPriority = 99;
+        CRef<CReader> reader(new CId2Reader);
+        reader->SetPreopenConnection(false);
+        string genbank_loader = CGBDataLoader::RegisterInObjectManager
+            (*( CObjectManager::GetInstance()), reader,CObjectManager::eNonDefault).GetLoader()->GetName();
+        scope->AddDataLoader(genbank_loader, kGenbankLoaderPriority);
+    } catch (const CException& e) {
+       LOG_POST(Info << "Failed to add genbank dataloader");
+       // It's ok not to have genbank loader
+    }
+    return isRemote;
+}
+
  int CBlastFormatterApp::PrintFormattedOutput(void)
  {
      int retval = 0;
@@ -306,7 +395,7 @@ int CBlastFormatterApp::PrintFormattedOutput(void)
      }
  
      CRef<CLocalDbAdapter> db_adapter;
-    InitializeSubject(db_args, opts_handle, true, db_adapter, scope);
+    bool isRemoteLoader = s_InitializeSubject(db_args, opts_handle, db_adapter, scope);
  
      const string kTask = m_RmtBlast->GetTask();
  
@@ -323,7 +412,7 @@ int CBlastFormatterApp::PrintFormattedOutput(void)
                             opts.GetQueryGeneticCode(),
                             opts.GetDbGeneticCode(),
                             opts.GetSumStatisticsMode(),
-                           !kRid.empty(),
+                           (!kRid.empty() || isRemoteLoader),
                             filtering_algorithm,
                             fmt_args.GetCustomOutputFormatSpec(),
                             kTask == "megablast",
@@ -396,7 +485,7 @@ int CBlastFormatterApp::PrintFormattedOutput(void)
                  else {
                     scope->AddScope(*(queries->GetScope(0)));
                  }
-               InitializeSubject(db_args, opts_handle, true, db_adapter, scope);
+                   s_InitializeSubject(db_args, opts_handle, db_adapter, scope);
         }
      }
      formatter.PrintEpilog(opts);
@@ -475,9 +564,26 @@ int CBlastFormatterApp::Run(void)
          }
  
      } CATCH_ALL(status)
+    x_AddCmdOptions();
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
+void CBlastFormatterApp::x_AddCmdOptions()
+{
+       const CArgs & args = GetArgs();
+    if (args[kArgRid].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eRIDInput, args[kArgRid].AsString());
+    }
+    else if (args[kArgArchive].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eArchiveInput, true);
+    }
+
+    if(args["outfmt"].HasValue()) {
+       m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString());
+    }
+}
+
  
  #ifndef SKIP_DOXYGEN_PROCESSING
  int main(int argc, const char* argv[] /*, const char* envp[]*/)
diff --git a/c++/src/app/blast/blastn_app.cpp b/c++/src/app/blast/blastn_app.cpp

index ef3e0d61a907ece9e8660d08ff2b660087fe0e96..821d9ccf75c0f3f7aad9f621ead04be414a327ec 100644 (file)
--- a/c++/src/app/blast/blastn_app.cpp
+++ b/c++/src/app/blast/blastn_app.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blastn_app.cpp 574693 2018-11-16 17:46:37Z zaretska $
+/*  $Id: blastn_app.cpp 615344 2020-08-31 15:37:55Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -55,6 +55,14 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+        }
+    }
+
+    ~CBlastnApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  private:
      /** @inheritDoc */
@@ -64,6 +72,8 @@ private:
  
      /// This application's command line args
      CRef<CBlastnAppArgs> m_CmdLineArgs; 
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  void CBlastnApp::Init()
@@ -111,6 +121,7 @@ int CBlastnApp::Run(void)
          /*** Get the query sequence(s) ***/
          CRef<CQueryOptionsArgs> query_opts = 
              m_CmdLineArgs->GetQueryOptionsArgs();
+
          SDataLoaderConfig dlconfig =
              InitializeQueryDataLoaderConfiguration(query_opts->QueryIsProtein(),
                                                     db_adapter);
@@ -223,12 +234,17 @@ int CBlastnApp::Run(void)
              opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
          }
  
+        LogQueryInfo(m_UsageReport, input);
+        formatter.LogBlastSearchInfo(m_UsageReport);
      } CATCH_ALL(status)
  
      if(!bah.GetMessages().empty()) {
         const CArgs & a = GetArgs();
         PrintErrorArchive(a, bah.GetMessages());
      }
+
+       m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
diff --git a/c++/src/app/blast/blastp_app.cpp b/c++/src/app/blast/blastp_app.cpp

index 4be0f568c81822bd6556c41633e20b1b728e1909..36ff687ee5fe4de85e6dcc1d2b9ebe8ffe82a5bb 100644 (file)
--- a/c++/src/app/blast/blastp_app.cpp
+++ b/c++/src/app/blast/blastp_app.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blastp_app.cpp 574693 2018-11-16 17:46:37Z zaretska $
+/*  $Id: blastp_app.cpp 616355 2020-09-15 12:19:36Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -55,6 +55,14 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+        }
+    }
+
+    ~CBlastpApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  private:
      /** @inheritDoc */
@@ -64,6 +72,8 @@ private:
  
      /// This application's command line args
      CRef<CBlastpAppArgs> m_CmdLineArgs;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  void CBlastpApp::Init()
@@ -202,11 +212,16 @@ int CBlastpApp::Run(void)
              opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
          }
  
+        LogQueryInfo(m_UsageReport, input);
+        formatter.LogBlastSearchInfo(m_UsageReport);
      } CATCH_ALL(status)
      if(!bah.GetMessages().empty()) {
                 const CArgs & a = GetArgs();
                 PrintErrorArchive(a, bah.GetMessages());
      }
+
+    m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
diff --git a/c++/src/app/blast/blastx_app.cpp b/c++/src/app/blast/blastx_app.cpp

index 3e4e9eeb9a8d220e6d418b2eeb4185f0ad8e613b..c6d82121ee19233adba1511bd8c5917aaf6cb3d3 100644 (file)
--- a/c++/src/app/blast/blastx_app.cpp
+++ b/c++/src/app/blast/blastx_app.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blastx_app.cpp 574693 2018-11-16 17:46:37Z zaretska $
+/*  $Id: blastx_app.cpp 615342 2020-08-31 15:37:39Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -55,7 +55,15 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+        }
      }
+
+    ~CBlastxApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
+     }
  private:
      /** @inheritDoc */
      virtual void Init();
@@ -64,6 +72,8 @@ private:
  
      /// This application's command line args
      CRef<CBlastxAppArgs> m_CmdLineArgs;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  void CBlastxApp::Init()
@@ -202,11 +212,15 @@ int CBlastxApp::Run(void)
              opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
          }
  
+        LogQueryInfo(m_UsageReport, input);
+        formatter.LogBlastSearchInfo(m_UsageReport);
      } CATCH_ALL(status)
      if(!bah.GetMessages().empty()) {
                 const CArgs & a = GetArgs();
                 PrintErrorArchive(a, bah.GetMessages());
      }
+       m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
diff --git a/c++/src/app/blast/deltablast_app.cpp b/c++/src/app/blast/deltablast_app.cpp

index e8a39e905c67f94bacb454957f4955f1210daa8b..20adaaa54ead6c73d0422e5b0fb146bdd5726759 100644 (file)
--- a/c++/src/app/blast/deltablast_app.cpp
+++ b/c++/src/app/blast/deltablast_app.cpp
@@ -1,4 +1,4 @@
-/*  $Id: deltablast_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $
+/*  $Id: deltablast_app.cpp 615345 2020-08-31 15:38:03Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -63,6 +63,13 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+        }
+    }
+    ~CDeltaBlastApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  private:
      /** @inheritDoc */
@@ -100,6 +107,8 @@ private:
      CRef<CBlastAncillaryData> m_AncillaryData;
  
      CBlastAppDiagHandler m_bah;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  void CDeltaBlastApp::Init()
@@ -440,11 +449,15 @@ int CDeltaBlastApp::Run(void)
              opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
          }
  
+        LogQueryInfo(m_UsageReport, input);
+        formatter.LogBlastSearchInfo(m_UsageReport);
      } CATCH_ALL(status)
      if(!m_bah.GetMessages().empty()) {
         const CArgs & a = GetArgs();
         PrintErrorArchive(a, m_bah.GetMessages());
      }
+       m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
diff --git a/c++/src/app/blast/get_species_taxids.sh b/c++/src/app/blast/get_species_taxids.sh

index 10275b8f2eec388fd9bc89f28d24ac36ddaaa2f5..60995cd7e8fb58ad61a78dd91159032937645355 100755 (executable)
--- a/c++/src/app/blast/get_species_taxids.sh
+++ b/c++/src/app/blast/get_species_taxids.sh
@@ -1,5 +1,5 @@
  #!/bin/bash
-# $Id: get_species_taxids.sh 588462 2019-06-24 18:46:42Z camacho $
+# $Id: get_species_taxids.sh 617228 2020-09-28 18:26:52Z ivanov $
  # ===========================================================================
  #
  #                            PUBLIC DOMAIN NOTICE
@@ -141,8 +141,9 @@ if [ ! -z "${NAME}" ]; then
          error_exit "esummary error" $?
      fi
          
+    sed -i 's/,\|{/\n/g' $TMP 
      grep 'uid\|rank\|division\|scientificname\|commonname' $TMP | \
-    grep -v "uids\|genbankdivision" | tr -d '"\|,' | tr -s ' ' | \
+    grep -v "uids\|genbankdivision" | tr  '"\|,'  " " | tr -s ' ' | \
      sed 's/uid/\nTaxid/g;s/name/ name/g' > $OUTPUT
  
      echo -e "\n$NUM_RESULTS matche(s) found.\n" >> $OUTPUT
diff --git a/c++/src/app/blast/legacy_blast.pl b/c++/src/app/blast/legacy_blast.pl

index 6422cf7e1cb1e22aa7dc599c5687b46eec62ab2b..876a1ca56809818dce561ea798c582504d44af28 100755 (executable)
--- a/c++/src/app/blast/legacy_blast.pl
+++ b/c++/src/app/blast/legacy_blast.pl
@@ -1,5 +1,5 @@
-#! /usr/bin/perl -w
-# $Id: legacy_blast.pl 195935 2010-06-28 20:32:08Z camacho $
+#! /usr/bin/env perl
+# $Id: legacy_blast.pl 609147 2020-05-27 11:52:21Z ivanov $
  # ===========================================================================
  #
  #                            PUBLIC DOMAIN NOTICE
@@ -68,7 +68,7 @@ if ($application eq "blastall") {
  } elsif ($application eq "seedtop") {
      $cmd = &handle_seedtop(\$print_only);
  } elsif ($application =~ /version/) {
-    my $revision = '$Revision: 195935 $';
+    my $revision = '$Revision: 609147 $';
      $revision =~ s/\$Revision: | \$//g;
      print "$0 version $revision\n";
      goto CLEAN_UP;
diff --git a/c++/src/app/blast/psiblast_app.cpp b/c++/src/app/blast/psiblast_app.cpp

index f80fd71aa1360c888e09fe4c4afb61b75ca01810..96a6cabb98326103387159d8e571dc5b79066430 100644 (file)
--- a/c++/src/app/blast/psiblast_app.cpp
+++ b/c++/src/app/blast/psiblast_app.cpp
@@ -1,4 +1,4 @@
-/*  $Id: psiblast_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $
+/*  $Id: psiblast_app.cpp 617621 2020-10-05 13:24:26Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -62,6 +62,13 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+        }
+    }
+    ~CPsiBlastApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  private:
      /** @inheritDoc */
@@ -113,6 +120,8 @@ private:
      CConstRef<CBlastAncillaryData> m_AncillaryData;
  
      CBlastAppDiagHandler m_bah;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  void CPsiBlastApp::Init()
@@ -368,6 +377,7 @@ CPsiBlastApp::DoIterations(CRef<CBlastOptionsHandle> opts_hndl,
          retval = x_RunLocalPsiBlastIterations(query, pssm, scope, db_adapter,
                                                opts_hndl, formatter, kNumIterations);
      }
+       m_UsageReport.AddParam(CBlastUsageReport::eConverged, retval);
      return retval;
  }
  
@@ -459,6 +469,7 @@ int CPsiBlastApp::Run(void)
              _TRACE("PSI-BLAST running with FASTA input");
          } else {
              _TRACE("PSI-BLAST running with PSSM input");
+               m_UsageReport.AddParam(CBlastUsageReport::ePSSMInput, true);
          } 
  
          /*** Get the formatting options ***/
@@ -552,12 +563,19 @@ int CPsiBlastApp::Run(void)
  
          if (m_CmdLineArgs->ProduceDebugOutput())
              opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
+        if(input) {
+            LogQueryInfo(m_UsageReport, *input);
+        }
+        
+        formatter.LogBlastSearchInfo(m_UsageReport);
  
      } CATCH_ALL(status)
      if(!m_bah.GetMessages().empty()) {
         const CArgs & a = GetArgs();
         PrintErrorArchive(a, m_bah.GetMessages());
      }
+       m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
diff --git a/c++/src/app/blast/rpsblast_app.cpp b/c++/src/app/blast/rpsblast_app.cpp

index 32dd5a32dc28642525406e085cc7f910fa611c05..7814eac35eec92e27f1218959f53720c6561b690 100644 (file)
--- a/c++/src/app/blast/rpsblast_app.cpp
+++ b/c++/src/app/blast/rpsblast_app.cpp
@@ -1,4 +1,4 @@
-/*  $Id: rpsblast_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $
+/*  $Id: rpsblast_app.cpp 615351 2020-08-31 15:38:53Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -40,6 +40,7 @@
  #include <algo/blast/api/objmgr_query_data.hpp>
  #include <algo/blast/format/blast_format.hpp>
  #include "blast_app_util.hpp"
+#include "rpsblast_node.hpp"
  #include <algo/blast/api/rpsblast_local.hpp>
  #include <algo/blast/api/rps_aux.hpp>
  
@@ -57,6 +58,13 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+        }
+    }
+    ~CRPSBlastApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  private:
      /** @inheritDoc */
@@ -64,8 +72,13 @@ private:
      /** @inheritDoc */
      virtual int Run();
  
+    int x_RunMTBySplitDB();
+    int x_RunMTBySplitQuery();
+
      /// This application's command line args
      CRef<CRPSBlastAppArgs> m_CmdLineArgs;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  void CRPSBlastApp::Init()
@@ -81,6 +94,18 @@ void CRPSBlastApp::Init()
  }
  
  int CRPSBlastApp::Run(void)
+{
+       const CArgs& args = GetArgs();
+       if ((args[kArgMTMode].AsInteger() == 0)  || (args[kArgNumThreads].AsInteger() <= 1)){
+               return x_RunMTBySplitDB();
+       }
+       else {
+               m_UsageReport.AddParam(CBlastUsageReport::eMTMode, args[kArgMTMode].AsInteger());
+               return x_RunMTBySplitQuery();
+       }
+}
+
+int CRPSBlastApp::x_RunMTBySplitDB(void)
  {
      int status = BLAST_EXIT_SUCCESS;
      CBlastAppDiagHandler bah;
@@ -205,14 +230,91 @@ int CRPSBlastApp::Run(void)
              opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
          }
  
+        LogQueryInfo(m_UsageReport, input);
+        formatter.LogBlastSearchInfo(m_UsageReport);
      } CATCH_ALL(status)
      if(!bah.GetMessages().empty()) {
         const CArgs & a = GetArgs();
         PrintErrorArchive(a, bah.GetMessages());
      }
+       m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
+
+int CRPSBlastApp::x_RunMTBySplitQuery(void)
+{
+    int status = BLAST_EXIT_SUCCESS;
+    CBlastAppDiagHandler bah;
+    int batch_size = 3600;
+
+       char * mt_query_batch_env = getenv("BLAST_MT_QUERY_BATCH_SIZE");
+       if (mt_query_batch_env) {
+               batch_size = NStr::StringToInt(mt_query_batch_env);
+       }
+       cerr << "Batch Size: " << batch_size << endl;
+    // Allow the fasta reader to complain on invalid sequence input
+    SetDiagPostLevel(eDiag_Warning);
+    SetDiagPostPrefix("rpsblast");
+    SetDiagHandler(&bah, false);
+
+       try {
+       const CArgs& args = GetArgs();
+       const int kMaxNumOfThreads = args[kArgNumThreads].AsInteger();
+       CRef<CBlastOptionsHandle> opts_hndl;
+        if(RecoverSearchStrategy(args, m_CmdLineArgs)) {
+               opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
+        }
+        else {
+               opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
+        }
+       if(IsIStreamEmpty(m_CmdLineArgs->GetInputStream())){
+                       ERR_POST(Warning << "Query is Empty!");
+                       return BLAST_EXIT_SUCCESS;
+       }
+       CNcbiOstream & out_stream = m_CmdLineArgs->GetOutputStream();
+               CBlastMasterNode master_node(out_stream, kMaxNumOfThreads);
+               int chunk_num = 0;
+
+               LogRPSBlastOptions(m_UsageReport, opts_hndl->GetOptions());
+               LogRPSCmdOptions(m_UsageReport, *m_CmdLineArgs);
+               CBlastNodeInputReader input(m_CmdLineArgs->GetInputStream(), batch_size, 360);
+               while (master_node.Processing()) {
+                       if (!input.AtEOF()) {
+                               if (!master_node.IsFull()) {
+                                       string qb;
+                                       int q_index = 0;
+                                       int num_q = input.GetQueryBatch(qb, q_index);
+                                       if (num_q > 0) {
+                                               CBlastNodeMailbox * mb(new CBlastNodeMailbox(chunk_num, master_node.GetBuzzer()));
+                                               CRPSBlastNode * t(new CRPSBlastNode(chunk_num, GetArguments(), args, bah, qb, q_index, num_q, mb));
+                                               master_node.RegisterNode(t, mb);
+                                               chunk_num ++;
+                                       }
+                               }
+                       }
+                       else {
+                               master_node.Shutdown();
+                               m_UsageReport.AddParam(CBlastUsageReport::eNumQueries, master_node.GetNumOfQueries());
+                               m_UsageReport.AddParam(CBlastUsageReport::eTotalQueryLength, master_node.GetQueriesLength());
+                               m_UsageReport.AddParam(CBlastUsageReport::eNumErrStatus, master_node.GetNumErrStatus());
+                               m_UsageReport.AddParam(CBlastUsageReport::eNumQueryBatches, chunk_num);
+                       }
+       }
+
+       } CATCH_ALL (status)
+
+    if(!bah.GetMessages().empty()) {
+       const CArgs & a = GetArgs();
+       PrintErrorArchive(a, bah.GetMessages());
+    }
+       m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
+    return status;
+
+}
+
  #ifndef SKIP_DOXYGEN_PROCESSING
  int main(int argc, const char* argv[] /*, const char* envp[]*/)
  {
diff --git a/c++/src/app/blast/rpsblast_node.cpp b/c++/src/app/blast/rpsblast_node.cpp

new file mode 100644 (file)

index 0000000..2a3c9a1
--- /dev/null
+++ b/c++/src/app/blast/rpsblast_node.cpp
@@ -0,0 +1,210 @@
+/*  $Id:
+ * ===========================================================================
+ *
+ *                            PUBLIC DOMAIN NOTICE
+ *               National Center for Biotechnology Information
+ *
+ *  This software/database is a "United States Government Work" under the
+ *  terms of the United States Copyright Act.  It was written as part of
+ *  the author's official duties as a United States Government employee and
+ *  thus cannot be copyrighted.  This software/database is freely available
+ *  to the public for use. The National Library of Medicine and the U.S.
+ *  Government have not placed any restriction on its use or reproduction.
+ *
+ *  Although all reasonable efforts have been taken to ensure the accuracy
+ *  and reliability of the software and data, the NLM and the U.S.
+ *  Government do not and cannot warrant the performance or results that
+ *  may be obtained by using this software or data. The NLM and the U.S.
+ *  Government disclaim all warranties, express or implied, including
+ *  warranties of performance, merchantability or fitness for any particular
+ *  purpose.
+ *
+ *  Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file rpsblast_node.cpp
+ * RPSBLAST node api
+ */
+
+#include <ncbi_pch.hpp>
+#include <corelib/ncbiapp.hpp>
+#include <algo/blast/api/local_blast.hpp>
+#include <algo/blast/api/remote_blast.hpp>
+#include <algo/blast/blastinput/blast_fasta_input.hpp>
+#include <algo/blast/blastinput/rpsblast_args.hpp>
+#include <algo/blast/api/objmgr_query_data.hpp>
+#include <algo/blast/format/blast_format.hpp>
+#include "blast_app_util.hpp"
+#include "rpsblast_node.hpp"
+#include <algo/blast/api/rpsblast_local.hpp>
+#include <algo/blast/api/rps_aux.hpp>
+
+#ifndef SKIP_DOXYGEN_PROCESSING
+USING_NCBI_SCOPE;
+USING_SCOPE(blast);
+USING_SCOPE(objects);
+#endif
+
+CRPSBlastNode::CRPSBlastNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+                                     CBlastAppDiagHandler & bah, const string & input,
+                              int query_index, int num_queries,  CBlastNodeMailbox * mailbox):
+                              CBlastNode(node_num, ncbi_args, args, bah, eRPSBlast, query_index, num_queries, mailbox), m_Input(input)
+{
+       m_CmdLineArgs.Reset(new CRPSBlastNodeArgs(m_Input));
+       SetState(eInitialized);
+       SendMsg(CBlastNodeMsg::eRunRequest, (void*) this);
+}
+
+int CRPSBlastNode::GetBlastResults(string & results)
+{
+       if(GetState() == eDone) {
+               results = CNcbiOstrstreamToString(m_CmdLineArgs->GetOutputStrStream());
+               return GetStatus();
+       }
+       return -1;
+}
+
+CRPSBlastNode::~CRPSBlastNode()
+{
+       m_CmdLineArgs.Reset();
+}
+
+void *
+CRPSBlastNode::Main()
+{
+    int status = BLAST_EXIT_SUCCESS;
+    CBlastAppDiagHandler & bah = GetDiagHandler();
+       SetDiagPostPrefix(GetNodeIdStr().c_str());
+
+    SetState(eRunning);
+       try {
+               const CArgs& args = GetArgs();
+       CRef<CBlastOptionsHandle> opts_hndl;
+        if(RecoverSearchStrategy(args, m_CmdLineArgs)) {
+               opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
+        }
+        else {
+               opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
+        }
+
+        CheckForFreqRatioFile(m_CmdLineArgs->GetBlastDatabaseArgs()->GetDatabaseName(),
+                                      opts_hndl, true);
+        const CBlastOptions& opt = opts_hndl->GetOptions();
+
+        /*** Initialize the database ***/
+        CRef<CBlastDatabaseArgs> db_args(m_CmdLineArgs->GetBlastDatabaseArgs());
+        CRef<CLocalDbAdapter> db_adapter;
+        CRef<CScope> scope;
+        InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
+                         db_adapter, scope);
+        _ASSERT(db_adapter && scope);
+
+        /*** Get the query sequence(s) ***/
+        CRef<CQueryOptionsArgs> query_opts =
+            m_CmdLineArgs->GetQueryOptionsArgs();
+        SDataLoaderConfig dlconfig =
+            InitializeQueryDataLoaderConfiguration(query_opts->QueryIsProtein(),
+                                                   db_adapter);
+        CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
+                                     query_opts->UseLowercaseMasks(),
+                                     query_opts->GetParseDeflines(),
+                                     query_opts->GetRange());
+        CBlastFastaInputSource fasta(m_CmdLineArgs->GetInputStream(), iconfig);
+        CBlastInput input(&fasta, m_CmdLineArgs->GetQueryBatchSize());
+
+        /*** Get the formatting options ***/
+        CRef<CFormattingArgs> fmt_args(m_CmdLineArgs->GetFormattingArgs());
+        bool isArchiveFormat = fmt_args->ArchiveFormatRequested(args);
+        if(!isArchiveFormat) {
+               bah.DoNotSaveMessages();
+        }
+        CBlastFormat formatter(opt, *db_adapter,
+                               fmt_args->GetFormattedOutputChoice(),
+                               query_opts->GetParseDeflines(),
+                               m_CmdLineArgs->GetOutputStream(),
+                               fmt_args->GetNumDescriptions(),
+                               fmt_args->GetNumAlignments(),
+                               *scope,
+                               opt.GetMatrixName(),
+                               fmt_args->ShowGis(),
+                               fmt_args->DisplayHtmlOutput(),
+                               opt.GetQueryGeneticCode(),
+                               opt.GetDbGeneticCode(),
+                               opt.GetSumStatisticsMode(),
+                               m_CmdLineArgs->ExecuteRemotely(),
+                               db_adapter->GetFilteringAlgorithm(),
+                               fmt_args->GetCustomOutputFormatSpec(),
+                               false, false, NULL, NULL,
+                               GetCmdlineArgs(GetArguments()));
+
+        formatter.SetQueryRange(query_opts->GetRange());
+        formatter.SetLineLength(fmt_args->GetLineLength());
+        if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) {
+               formatter.SetBaseFile(args[kArgOutput].AsString());
+        }
+        formatter.PrintProlog();
+
+        /*** Process the input ***/
+        for (; !input.End(); formatter.ResetScopeHistory(), QueryBatchCleanup()) {
+
+            CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope));
+            CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*query_batch));
+
+            SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
+
+            CRef<CSearchResultSet> results;
+
+            if (m_CmdLineArgs->ExecuteRemotely())
+            {
+                CRef<CRemoteBlast> rmt_blast =
+                    InitializeRemoteBlast(queries, db_args, opts_hndl,
+                          m_CmdLineArgs->ProduceDebugRemoteOutput(),
+                          m_CmdLineArgs->GetClientId());
+                results = rmt_blast->GetResultSet();
+            }
+            else
+            {
+               CLocalRPSBlast  local_search (query_batch, db_args->GetDatabaseName(), opts_hndl, 1);
+               results = local_search.Run();
+            }
+
+            if (fmt_args->ArchiveFormatRequested(args)) {
+                formatter.WriteArchive(*queries, *opts_hndl, *results, 0, bah.GetMessages());
+                bah.ResetMessages();
+            } else {
+               BlastFormatter_PreFetchSequenceData(*results, scope,
+                                                      fmt_args->GetFormattedOutputChoice());
+                ITERATE(CSearchResultSet, result, *results) {
+                    formatter.PrintOneResultSet(**result, query_batch);
+                }
+            }
+        }
+
+        formatter.PrintEpilog(opt);
+
+        if (m_CmdLineArgs->ProduceDebugOutput()) {
+            opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
+        }
+        SetQueriesLength(input.GetTotalLengthProcessed());
+    } CATCH_ALL(status)
+
+       SetStatus(status);
+       if (status == BLAST_EXIT_SUCCESS) {
+               SetState(eDone);
+               SendMsg(CBlastNodeMsg::ePostResult, (void *) this);
+
+       }
+       else {
+               SetState(eError);
+               SendMsg(CBlastNodeMsg::eErrorExit, (void *) this);
+
+       }
+
+    return NULL;
+}
+
diff --git a/c++/src/app/blast/rpsblast_node.hpp b/c++/src/app/blast/rpsblast_node.hpp

new file mode 100644 (file)

index 0000000..112a7a2
--- /dev/null
+++ b/c++/src/app/blast/rpsblast_node.hpp
@@ -0,0 +1,62 @@
+/*  $Id:
+ * ===========================================================================
+ *
+ *                            PUBLIC DOMAIN NOTICE
+ *               National Center for Biotechnology Information
+ *
+ *  This software/database is a "United States Government Work" under the
+ *  terms of the United States Copyright Act.  It was written as part of
+ *  the author's official duties as a United States Government employee and
+ *  thus cannot be copyrighted.  This software/database is freely available
+ *  to the public for use. The National Library of Medicine and the U.S.
+ *  Government have not placed any restriction on its use or reproduction.
+ *
+ *  Although all reasonable efforts have been taken to ensure the accuracy
+ *  and reliability of the software and data, the NLM and the U.S.
+ *  Government do not and cannot warrant the performance or results that
+ *  may be obtained by using this software or data. The NLM and the U.S.
+ *  Government disclaim all warranties, express or implied, including
+ *  warranties of performance, merchantability or fitness for any particular
+ *  purpose.
+ *
+ *  Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file rpsblast_node.hpp
+ * RPSBLAST node api
+ */
+
+#ifndef APP__RPSBLAST_NODE__HPP
+#define APP__RPSBLAST_NODE__HPP
+
+#include <algo/blast/blastinput/rpsblast_args.hpp>
+#include <algo/blast/api/blast_node.hpp>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(blast)
+
+class CRPSBlastNode : public CBlastNode
+{
+public :
+
+       CRPSBlastNode (int check_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+                              CBlastAppDiagHandler & bah, const string & input,
+                              int query_index, int num_queries, CBlastNodeMailbox * mailbox = NULL);
+       virtual int GetBlastResults(string & results);
+protected:
+       virtual ~CRPSBlastNode(void);
+       virtual void* Main(void);
+private:
+       string m_Input;
+       CRef<CRPSBlastNodeArgs>  m_CmdLineArgs;
+};
+
+END_SCOPE(blast)
+END_NCBI_SCOPE
+
+#endif /* APP__RPSBLAST_NODE__HPP */
diff --git a/c++/src/app/blast/rpstblastn_app.cpp b/c++/src/app/blast/rpstblastn_app.cpp

index 8dafa55eb57907eef01fe42872247e4113e7f709..1f9ddd4e570588d7a85e1dfd369f2e64a27ac752 100644 (file)
--- a/c++/src/app/blast/rpstblastn_app.cpp
+++ b/c++/src/app/blast/rpstblastn_app.cpp
@@ -1,4 +1,4 @@
-/*  $Id: rpstblastn_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $
+/*  $Id: rpstblastn_app.cpp 615352 2020-08-31 15:39:03Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -40,6 +40,7 @@
  #include <algo/blast/api/objmgr_query_data.hpp>
  #include <algo/blast/format/blast_format.hpp>
  #include "blast_app_util.hpp"
+#include "rpstblastn_node.hpp"
  #include <objtools/blast/seqdb_reader/seqdb.hpp>
  #include <algo/blast/api/rpsblast_local.hpp>
  
@@ -57,6 +58,13 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+        }
+    }
+    ~CRPSTBlastnApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  private:
      /** @inheritDoc */
@@ -64,8 +72,13 @@ private:
      /** @inheritDoc */
      virtual int Run();
  
+    int x_RunMTBySplitDB();
+    int x_RunMTBySplitQuery();
+
      /// This application's command line args
      CRef<CRPSTBlastnAppArgs> m_CmdLineArgs;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  void CRPSTBlastnApp::Init()
@@ -80,7 +93,20 @@ void CRPSTBlastnApp::Init()
      SetupArgDescriptions(m_CmdLineArgs->SetCommandLine());
  }
  
+
  int CRPSTBlastnApp::Run(void)
+{
+       const CArgs& args = GetArgs();
+       if ((args[kArgMTMode].AsInteger() == 0)  || (args[kArgNumThreads].AsInteger() <= 1)){
+               return x_RunMTBySplitDB();
+       }
+       else {
+               m_UsageReport.AddParam(CBlastUsageReport::eMTMode, args[kArgMTMode].AsInteger());
+               return x_RunMTBySplitQuery();
+       }
+}
+
+int CRPSTBlastnApp::x_RunMTBySplitDB(void)
  {
      int status = BLAST_EXIT_SUCCESS;
      CBlastAppDiagHandler bah;
@@ -198,14 +224,91 @@ int CRPSTBlastnApp::Run(void)
              opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
          }
  
+        LogQueryInfo(m_UsageReport, input);
+        formatter.LogBlastSearchInfo(m_UsageReport);
      } CATCH_ALL(status)
      if(!bah.GetMessages().empty()) {
         const CArgs & a = GetArgs();
         PrintErrorArchive(a, bah.GetMessages());
      }
+       m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
+    return status;
+}
+
+int CRPSTBlastnApp::x_RunMTBySplitQuery(void)
+{
+    int status = BLAST_EXIT_SUCCESS;
+    CBlastAppDiagHandler bah;
+    int batch_size = 8000;
+
+       char * mt_query_batch_env = getenv("BLAST_MT_QUERY_BATCH_SIZE");
+       if (mt_query_batch_env) {
+               batch_size = NStr::StringToInt(mt_query_batch_env);
+       }
+       cerr << "Batch Size: " << batch_size << endl;
+    // Allow the fasta reader to complain on invalid sequence input
+    SetDiagPostLevel(eDiag_Warning);
+    SetDiagPostPrefix("rpstblastn_mt");
+    SetDiagHandler(&bah, false);
+
+       try {
+       const CArgs& args = GetArgs();
+       const int kMaxNumOfThreads = args[kArgNumThreads].AsInteger();
+       CRef<CBlastOptionsHandle> opts_hndl;
+        if(RecoverSearchStrategy(args, m_CmdLineArgs)) {
+               opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
+        }
+        else {
+               opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
+        }
+       if(IsIStreamEmpty(m_CmdLineArgs->GetInputStream())){
+                       ERR_POST(Warning << "Query is Empty!");
+                       return BLAST_EXIT_SUCCESS;
+       }
+       CNcbiOstream & out_stream = m_CmdLineArgs->GetOutputStream();
+               CBlastMasterNode master_node(out_stream, kMaxNumOfThreads);
+               int chunk_num = 0;
+
+               LogRPSBlastOptions(m_UsageReport, opts_hndl->GetOptions());
+               LogRPSCmdOptions(m_UsageReport, *m_CmdLineArgs);
+               CBlastNodeInputReader input(m_CmdLineArgs->GetInputStream(), batch_size, 4500);
+               while (master_node.Processing()) {
+                       if (!input.AtEOF()) {
+                               if (!master_node.IsFull()) {
+                                       int q_index = 0;
+                                       string qb;
+                                       int num_q = input.GetQueryBatch(qb, q_index);
+                                       if (num_q > 0) {
+                                               CBlastNodeMailbox * mb(new CBlastNodeMailbox(chunk_num, master_node.GetBuzzer()));
+                                               CRPSTBlastnNode * t(new CRPSTBlastnNode(chunk_num, GetArguments(), args, bah, qb, q_index, num_q, mb));
+                                               master_node.RegisterNode(t, mb);
+                                               chunk_num ++;
+                                       }
+                               }
+                       }
+                       else {
+                               master_node.Shutdown();
+                               m_UsageReport.AddParam(CBlastUsageReport::eNumQueries, master_node.GetNumOfQueries());
+                               m_UsageReport.AddParam(CBlastUsageReport::eTotalQueryLength, master_node.GetQueriesLength());
+                               m_UsageReport.AddParam(CBlastUsageReport::eNumErrStatus, master_node.GetNumErrStatus());
+                               m_UsageReport.AddParam(CBlastUsageReport::eNumQueryBatches, chunk_num);
+                       }
+
+       }
+
+       } CATCH_ALL (status)
+
+    if(!bah.GetMessages().empty()) {
+       const CArgs & a = GetArgs();
+       PrintErrorArchive(a, bah.GetMessages());
+    }
+       m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
+
  #ifndef SKIP_DOXYGEN_PROCESSING
  int main(int argc, const char* argv[] /*, const char* envp[]*/)
  {
diff --git a/c++/src/app/blast/rpstblastn_node.cpp b/c++/src/app/blast/rpstblastn_node.cpp

new file mode 100644 (file)

index 0000000..860f30c
--- /dev/null
+++ b/c++/src/app/blast/rpstblastn_node.cpp
@@ -0,0 +1,209 @@
+/*  $Id:
+ * ===========================================================================
+ *
+ *                            PUBLIC DOMAIN NOTICE
+ *               National Center for Biotechnology Information
+ *
+ *  This software/database is a "United States Government Work" under the
+ *  terms of the United States Copyright Act.  It was written as part of
+ *  the author's official duties as a United States Government employee and
+ *  thus cannot be copyrighted.  This software/database is freely available
+ *  to the public for use. The National Library of Medicine and the U.S.
+ *  Government have not placed any restriction on its use or reproduction.
+ *
+ *  Although all reasonable efforts have been taken to ensure the accuracy
+ *  and reliability of the software and data, the NLM and the U.S.
+ *  Government do not and cannot warrant the performance or results that
+ *  may be obtained by using this software or data. The NLM and the U.S.
+ *  Government disclaim all warranties, express or implied, including
+ *  warranties of performance, merchantability or fitness for any particular
+ *  purpose.
+ *
+ *  Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file rpstblastn_node.cpp
+ * RPSTBLASTN MT command line application
+ */
+
+#include <ncbi_pch.hpp>
+#include <corelib/ncbiapp.hpp>
+#include <algo/blast/api/local_blast.hpp>
+#include <algo/blast/api/remote_blast.hpp>
+#include <algo/blast/blastinput/blast_fasta_input.hpp>
+#include <algo/blast/blastinput/rpstblastn_args.hpp>
+#include <algo/blast/api/objmgr_query_data.hpp>
+#include <algo/blast/format/blast_format.hpp>
+#include "blast_app_util.hpp"
+#include "rpstblastn_node.hpp"
+#include <algo/blast/api/rpsblast_local.hpp>
+
+#ifndef SKIP_DOXYGEN_PROCESSING
+USING_NCBI_SCOPE;
+USING_SCOPE(blast);
+USING_SCOPE(objects);
+#endif
+
+CRPSTBlastnNode::CRPSTBlastnNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+                                         CBlastAppDiagHandler & bah, const string & input,
+                                 int query_index, int num_queries,CBlastNodeMailbox * mailbox):
+                              CBlastNode(node_num, ncbi_args, args, bah, eRPSTblastn, query_index, num_queries, mailbox), m_Input(input)
+{
+       m_CmdLineArgs.Reset(new CRPSTBlastnNodeArgs(m_Input));
+       SetState(eInitialized);
+       SendMsg(CBlastNodeMsg::eRunRequest, (void*) this);
+}
+
+int CRPSTBlastnNode::GetBlastResults(string & results)
+{
+       if(GetState() == eDone) {
+               results = CNcbiOstrstreamToString(m_CmdLineArgs->GetOutputStrStream());
+               return GetStatus();
+       }
+       return -1;
+}
+
+CRPSTBlastnNode::~CRPSTBlastnNode()
+{
+       m_CmdLineArgs.Reset();
+}
+
+void *
+CRPSTBlastnNode::Main()
+{
+    int status = BLAST_EXIT_SUCCESS;
+    CBlastAppDiagHandler & bah = GetDiagHandler();
+       SetDiagPostPrefix(GetNodeIdStr().c_str());
+
+    SetState(eRunning);
+       try {
+               const CArgs& args = GetArgs();
+       CRef<CBlastOptionsHandle> opts_hndl;
+        if(RecoverSearchStrategy(args, m_CmdLineArgs)) {
+               opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
+        }
+        else {
+               opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
+        }
+
+        CheckForFreqRatioFile(m_CmdLineArgs->GetBlastDatabaseArgs()->GetDatabaseName(),
+                                      opts_hndl, true);
+        const CBlastOptions& opt = opts_hndl->GetOptions();
+
+        /*** Initialize the database ***/
+        CRef<CBlastDatabaseArgs> db_args(m_CmdLineArgs->GetBlastDatabaseArgs());
+        CRef<CLocalDbAdapter> db_adapter;
+        CRef<CScope> scope;
+        InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
+                         db_adapter, scope);
+        _ASSERT(db_adapter && scope);
+
+        /*** Get the query sequence(s) ***/
+        CRef<CQueryOptionsArgs> query_opts =
+            m_CmdLineArgs->GetQueryOptionsArgs();
+        SDataLoaderConfig dlconfig =
+            InitializeQueryDataLoaderConfiguration(query_opts->QueryIsProtein(),
+                                                   db_adapter);
+        CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
+                                     query_opts->UseLowercaseMasks(),
+                                     query_opts->GetParseDeflines(),
+                                     query_opts->GetRange());
+        CBlastFastaInputSource fasta(m_CmdLineArgs->GetInputStream(), iconfig);
+        CBlastInput input(&fasta, m_CmdLineArgs->GetQueryBatchSize());
+
+        /*** Get the formatting options ***/
+        CRef<CFormattingArgs> fmt_args(m_CmdLineArgs->GetFormattingArgs());
+        bool isArchiveFormat = fmt_args->ArchiveFormatRequested(args);
+        if(!isArchiveFormat) {
+               bah.DoNotSaveMessages();
+        }
+        CBlastFormat formatter(opt, *db_adapter,
+                               fmt_args->GetFormattedOutputChoice(),
+                               query_opts->GetParseDeflines(),
+                               m_CmdLineArgs->GetOutputStream(),
+                               fmt_args->GetNumDescriptions(),
+                               fmt_args->GetNumAlignments(),
+                               *scope,
+                               opt.GetMatrixName(),
+                               fmt_args->ShowGis(),
+                               fmt_args->DisplayHtmlOutput(),
+                               opt.GetQueryGeneticCode(),
+                               opt.GetDbGeneticCode(),
+                               opt.GetSumStatisticsMode(),
+                               m_CmdLineArgs->ExecuteRemotely(),
+                               db_adapter->GetFilteringAlgorithm(),
+                               fmt_args->GetCustomOutputFormatSpec(),
+                               false, false, NULL, NULL,
+                               GetCmdlineArgs(GetArguments()));
+
+        formatter.SetQueryRange(query_opts->GetRange());
+        formatter.SetLineLength(fmt_args->GetLineLength());
+        if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) {
+               formatter.SetBaseFile(args[kArgOutput].AsString());
+        }
+        formatter.PrintProlog();
+
+        /*** Process the input ***/
+        for (; !input.End(); formatter.ResetScopeHistory(), QueryBatchCleanup()) {
+
+            CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope));
+            CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*query_batch));
+
+            SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
+
+            CRef<CSearchResultSet> results;
+
+            if (m_CmdLineArgs->ExecuteRemotely())
+            {
+                CRef<CRemoteBlast> rmt_blast =
+                    InitializeRemoteBlast(queries, db_args, opts_hndl,
+                          m_CmdLineArgs->ProduceDebugRemoteOutput(),
+                          m_CmdLineArgs->GetClientId());
+                results = rmt_blast->GetResultSet();
+            }
+            else
+            {
+               CLocalRPSBlast  local_search (query_batch, db_args->GetDatabaseName(), opts_hndl, 1);
+               results = local_search.Run();
+            }
+
+            if (fmt_args->ArchiveFormatRequested(args)) {
+                formatter.WriteArchive(*queries, *opts_hndl, *results, 0, bah.GetMessages());
+                bah.ResetMessages();
+            } else {
+               BlastFormatter_PreFetchSequenceData(*results, scope,
+                                                      fmt_args->GetFormattedOutputChoice());
+                ITERATE(CSearchResultSet, result, *results) {
+                    formatter.PrintOneResultSet(**result, query_batch);
+                }
+            }
+        }
+
+        formatter.PrintEpilog(opt);
+
+        if (m_CmdLineArgs->ProduceDebugOutput()) {
+            opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
+        }
+
+        SetQueriesLength(input.GetTotalLengthProcessed());
+    } CATCH_ALL(status)
+
+       SetStatus(status);
+       if (status == BLAST_EXIT_SUCCESS) {
+               SetState(eDone);
+               SendMsg(CBlastNodeMsg::ePostResult, (void *) this);
+
+       }
+       else {
+               SetState(eError);
+               SendMsg(CBlastNodeMsg::eErrorExit, (void *) this);
+
+       }
+
+    return NULL;
+}
diff --git a/c++/src/app/blast/rpstblastn_node.hpp b/c++/src/app/blast/rpstblastn_node.hpp

new file mode 100644 (file)

index 0000000..9ecfcaa
--- /dev/null
+++ b/c++/src/app/blast/rpstblastn_node.hpp
@@ -0,0 +1,62 @@
+/*  $Id:
+ * ===========================================================================
+ *
+ *                            PUBLIC DOMAIN NOTICE
+ *               National Center for Biotechnology Information
+ *
+ *  This software/database is a "United States Government Work" under the
+ *  terms of the United States Copyright Act.  It was written as part of
+ *  the author's official duties as a United States Government employee and
+ *  thus cannot be copyrighted.  This software/database is freely available
+ *  to the public for use. The National Library of Medicine and the U.S.
+ *  Government have not placed any restriction on its use or reproduction.
+ *
+ *  Although all reasonable efforts have been taken to ensure the accuracy
+ *  and reliability of the software and data, the NLM and the U.S.
+ *  Government do not and cannot warrant the performance or results that
+ *  may be obtained by using this software or data. The NLM and the U.S.
+ *  Government disclaim all warranties, express or implied, including
+ *  warranties of performance, merchantability or fitness for any particular
+ *  purpose.
+ *
+ *  Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file rpstblastn_node.hpp
+ * RPSTBLASTN node api
+ */
+
+#ifndef APP__RPSTBLASTN_NODE__HPP
+#define APP__RPSTBLASTN_NODE__HPP
+
+#include <algo/blast/blastinput/rpstblastn_args.hpp>
+#include <algo/blast/api/blast_node.hpp>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(blast)
+
+class CRPSTBlastnNode : public CBlastNode
+{
+public :
+
+       CRPSTBlastnNode (int check_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+                                CBlastAppDiagHandler & bah, const string & input,
+                                int query_index, int num_queries, CBlastNodeMailbox * mailbox = NULL);
+       virtual int GetBlastResults(string & results);
+protected:
+       virtual ~CRPSTBlastnNode(void);
+       virtual void* Main(void);
+private:
+       string m_Input;
+       CRef<CRPSTBlastnNodeArgs>  m_CmdLineArgs;
+};
+
+END_SCOPE(blast)
+END_NCBI_SCOPE
+
+#endif /* APP__RPSTBLASTN_NODE__HPP */
diff --git a/c++/src/app/blast/tblastn_app.cpp b/c++/src/app/blast/tblastn_app.cpp

index e3e000e1508587dc86980908e8e119de314cd573..4ffb4033e1cd6c03753a24d7f80acfa2ac1a66ab 100644 (file)
--- a/c++/src/app/blast/tblastn_app.cpp
+++ b/c++/src/app/blast/tblastn_app.cpp
@@ -1,4 +1,4 @@
-/*  $Id: tblastn_app.cpp 574693 2018-11-16 17:46:37Z zaretska $
+/*  $Id: tblastn_app.cpp 616358 2020-09-15 12:19:53Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -55,6 +55,14 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+        }
+    }
+
+    ~CTblastnApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  private:
      /** @inheritDoc */
@@ -63,6 +71,8 @@ private:
      virtual int Run();
      /// This application's command line args
      CRef<CTblastnAppArgs> m_CmdLineArgs;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  void CTblastnApp::Init()
@@ -258,11 +268,18 @@ int CTblastnApp::Run(void)
          if (m_CmdLineArgs->ProduceDebugOutput()) {
              opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
          }
+        if (input) {
+               LogQueryInfo(m_UsageReport, *input);
+        }
+        formatter.LogBlastSearchInfo(m_UsageReport);
      } CATCH_ALL(status)
      if(!bah.GetMessages().empty()) {
         const CArgs & a = GetArgs();
         PrintErrorArchive(a, bah.GetMessages());
      }
+
+       m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
diff --git a/c++/src/app/blast/tblastx_app.cpp b/c++/src/app/blast/tblastx_app.cpp

index 4f56902242cef1a90aad3568d7e6a4f11d39afc2..fc1e67bdb0e10d9ad7b3b62cf1139e567243ebf4 100644 (file)
--- a/c++/src/app/blast/tblastx_app.cpp
+++ b/c++/src/app/blast/tblastx_app.cpp
@@ -1,4 +1,4 @@
-/*  $Id: tblastx_app.cpp 574693 2018-11-16 17:46:37Z zaretska $
+/*  $Id: tblastx_app.cpp 615343 2020-08-31 15:37:47Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -55,6 +55,14 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+        }
+    }
+
+    ~CTblastxApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  private:
      /** @inheritDoc */
@@ -64,6 +72,8 @@ private:
  
      /// This application's command line args
      CRef<CTblastxAppArgs> m_CmdLineArgs;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  void CTblastxApp::Init()
@@ -202,11 +212,15 @@ int CTblastxApp::Run(void)
              opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
          }
  
+        LogQueryInfo(m_UsageReport, input);
+        formatter.LogBlastSearchInfo(m_UsageReport);
      } CATCH_ALL(status)
      if(!bah.GetMessages().empty()) {
         const CArgs & a = GetArgs();
         PrintErrorArchive(a, bah.GetMessages());
      }
+       m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
diff --git a/c++/src/app/blast/update_blastdb.pl b/c++/src/app/blast/update_blastdb.pl

index 8bc065f98a72e67dedafcf59dd79210c20cb43e1..212d2f08d72a3a252b6a5a03391c02944e440b10 100755 (executable)
--- a/c++/src/app/blast/update_blastdb.pl
+++ b/c++/src/app/blast/update_blastdb.pl
@@ -1,5 +1,5 @@
  #!/usr/bin/env perl
-# $Id: update_blastdb.pl 608134 2020-05-12 15:44:10Z ivanov $
+# $Id: update_blastdb.pl 608596 2020-05-19 10:56:17Z ivanov $
  # ===========================================================================
  #
  #                            PUBLIC DOMAIN NOTICE
@@ -114,7 +114,7 @@ my $exit_code = 0;
  $|++;
  
  if ($opt_show_version) {
-    my $revision = '$Revision: 608134 $';
+    my $revision = '$Revision: 608596 $';
      $revision =~ s/\$Revision: | \$//g;
      print "$0 version $revision\n";
      exit($exit_code);
@@ -135,20 +135,27 @@ if (defined($opt_source)) {
      # Try to auto-detect whether we're on the cloud
      if (defined($curl)) {
          my $tmpfile = File::Temp->new();
-        my $gcp_cmd = "$curl --connect-timeout 1 -sfo $tmpfile -H 'Metadata-Flavor: Google' " . GCP_URL;
-        my $aws_cmd = "$curl --connect-timeout 1 -sfo /dev/null " . AMI_URL;
+        my $gcp_cmd = "$curl --connect-timeout 3 --retry 3 --retry-max-time 30 -sfo $tmpfile -H 'Metadata-Flavor: Google' " . GCP_URL;
+        my $aws_cmd = "$curl --connect-timeout 3 --retry 3 --retry-max-time 30 -sfo /dev/null " . AMI_URL;
          print "$gcp_cmd\n" if DEBUG;
          if (system($gcp_cmd) == 0) { 
-       # status not always reliable.  Chekc that return is all digits.
-               my $tmpfile_content = do { local $/; <$tmpfile>};
-               print "tempfile: $tmpfile_content\n" if DEBUG;
-               if ($tmpfile_content =~ m/^(\d+)$/) {
-                       $location = "GCP";
-                }
+            # status not always reliable.  Check that curl output is all digits.
+            my $tmpfile_content = do { local $/; <$tmpfile>};
+            print "curl output $tmpfile_content\n" if DEBUG;
+            $location = "GCP" if ($tmpfile_content =~ m/^(\d+)$/);
+        } elsif (DEBUG) {
+            # Consult https://ec.haxx.se/usingcurl/usingcurl-returns
+            print "curl to GCP metadata server returned ", $?>>8, "\n";
          }
+
          print "$aws_cmd\n" if DEBUG;
-        $location = "AWS" if (system($aws_cmd) == 0);
-        print "Loation is $location\n" if DEBUG;
+        if (system($aws_cmd) == 0) {
+            $location = "AWS";
+        } elsif (DEBUG) {
+            # Consult https://ec.haxx.se/usingcurl/usingcurl-returns
+            print "curl to AWS metadata server returned ", $?>>8, "\n";
+        }
+        print "Location is $location\n" if DEBUG;
      }
  }
  if ($location =~ /aws|gcp/i and not defined $curl) {
diff --git a/c++/src/app/blastdb/CMakeLists.convert2blastmask.app.txt b/c++/src/app/blastdb/CMakeLists.convert2blastmask.app.txt

index 390775bf2d40bacd06defb1e92ad512fd5f0b7c5..3cdcfcaca052683ebd42e5bad9adefd443aa94d8 100644 (file)
--- a/c++/src/app/blastdb/CMakeLists.convert2blastmask.app.txt
+++ b/c++/src/app/blastdb/CMakeLists.convert2blastmask.app.txt
@@ -1,10 +1,10 @@
  #############################################################################
-# $Id: CMakeLists.convert2blastmask.app.txt 593591 2019-09-20 14:53:34Z gouriano $
+# $Id: CMakeLists.convert2blastmask.app.txt 615546 2020-09-01 12:05:24Z ivanov $
  #############################################################################
  
  NCBI_begin_app(convert2blastmask)
    NCBI_sources(convert2blastmask)
-  NCBI_uses_toolkit_libraries(blast seqmasks_io)
+  NCBI_uses_toolkit_libraries(blast seqmasks_io xblast)
    NCBI_add_definitions(NCBI_MODULE=BLASTDB)
    NCBI_project_watchers(camacho fongah2)
  NCBI_end_app()
diff --git a/c++/src/app/blastdb/blastdb_aliastool.cpp b/c++/src/app/blastdb/blastdb_aliastool.cpp

index 569c1e21e50027a7d824334c525b03ecd35ad349..c27e9d9f8810d3ae636305958a6f3d4dd84e86e6 100644 (file)
--- a/c++/src/app/blastdb/blastdb_aliastool.cpp
+++ b/c++/src/app/blastdb/blastdb_aliastool.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blastdb_aliastool.cpp 593112 2019-09-12 12:56:14Z fongah2 $
+/*  $Id: blastdb_aliastool.cpp 615362 2020-08-31 15:39:55Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -59,6 +59,14 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+               m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdb_aliastool");
+        }
+    }
+    ~CBlastDBAliasApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  private:
      /** @inheritDoc */
@@ -81,6 +89,7 @@ private:
      int x_ConvertSeqIDFile() const;
      void x_SeqIDFileInfo() const;
  
+    void x_AddCmdOptions();
      /// Documentation for this program
      static const char * const DOCUMENTATION;
  
@@ -108,6 +117,9 @@ private:
      }
      vector<string> x_GetDbsToAggregate(const string dbs, const string file) const;
      void x_AddVDBsToAliasFile( string filename, bool append, string title = kEmptyStr) const;
+
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  const char * const CBlastDBAliasApp::DOCUMENTATION = "\n\n"
@@ -619,9 +631,48 @@ int CBlastDBAliasApp::Run(void)
          }
  
      } CATCH_ALL(status)
+    x_AddCmdOptions();
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
+void CBlastDBAliasApp::x_AddCmdOptions()
+{
+       const CArgs & args = GetArgs();
+        if (args["gi_file_in"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "gi_file_conversion");
+        }
+        else if (args["seqid_file_in"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "seqid_file_conversion");
+        }
+        else if (args["seqid_file_info"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "get_seqid_file_info");
+        }
+
+        if (args["dblist"].HasValue() || args["dblist_file"].HasValue() || args["num_volumes"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_alias_db");
+        }
+        else if (args[kArgDb].HasValue() && args[kArgGiList]){
+        m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_gilist_alias_db");
+        }
+        else if (args[kArgDb].HasValue() && args[kArgSeqIdList]){
+        m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_seqidlist_alias_db");
+        }
+        else if (args[kArgDb].HasValue() && args[kArgTaxIdListFile]) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_taxidlist_alias_db");
+        }
+
+        if (args["vdblist"].HasValue() || args["vdblist_file"].HasValue()) {
+               if (args["dblist"].HasValue() || args["dblist_file"].HasValue()) {
+                       m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "add_vdblist");
+               }
+               else {
+                       m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_vdb_alias_db");
+               }
+        }
+}
+
+
  
  #ifndef SKIP_DOXYGEN_PROCESSING
  int main(int argc, const char* argv[] /*, const char* envp[]*/)
diff --git a/c++/src/app/blastdb/blastdb_convert.cpp b/c++/src/app/blastdb/blastdb_convert.cpp

index da0cda883072dd8442dd6bfe035fa22c64452304..03246dc370ebd524c6083d341f3e2804b39a327c 100644 (file)
--- a/c++/src/app/blastdb/blastdb_convert.cpp
+++ b/c++/src/app/blastdb/blastdb_convert.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blastdb_convert.cpp 598221 2019-12-05 15:33:01Z fongah2 $
+/*  $Id: blastdb_convert.cpp 615364 2020-08-31 15:40:14Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -91,6 +91,14 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+               m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdb_convert");
+        }
+    }
+    ~CBlastdbConvertApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  
  private:
@@ -100,6 +108,8 @@ private:
      virtual int Run();
  
      CNcbiOstream * m_LogFile;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  void CBlastdbConvertApp::Init()
diff --git a/c++/src/app/blastdb/blastdbcheck.cpp b/c++/src/app/blastdb/blastdbcheck.cpp

index 2273c5616edb396eec6fcae0e94fb88ce1b4f057..34c2e668116cf3e4df30918f493c5967431c1360 100644 (file)
--- a/c++/src/app/blastdb/blastdbcheck.cpp
+++ b/c++/src/app/blastdb/blastdbcheck.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blastdbcheck.cpp 538739 2017-06-13 18:26:55Z rackerst $
+/*  $Id: blastdbcheck.cpp 615362 2020-08-31 15:39:55Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -64,6 +64,14 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+               m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdbcheck");
+        }
+    }
+    ~CBlastDbCheckApplication() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
      
  private:
@@ -73,6 +81,11 @@ private:
      virtual int  Run(void);
      /** @inheritDoc */
      virtual void Exit(void);
+
+    void x_AddCmdOptions();
+
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  
@@ -1491,10 +1504,33 @@ int CBlastDbCheckApplication::Run(void)
          
          status = okay ? 0 : 1;
      } CATCH_ALL(status)
+
+    x_AddCmdOptions();
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
  
+void CBlastDbCheckApplication::x_AddCmdOptions()
+{
+       const CArgs & args = GetArgs();
+    if(args["random"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "random");
+    }
+    else if (args["full"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "full");
+    }
+    else if (args["stride"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "stride");
+    }
+    else if(args["ends"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "end");
+    }
+    else {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "default");
+    }
+}
+
  /////////////////////////////////////////////////////////////////////////////
  //  Cleanup
  
diff --git a/c++/src/app/blastdb/blastdbcmd.cpp b/c++/src/app/blastdb/blastdbcmd.cpp

index dd6557ee3b474670461a7fd46036c46c56e0ba09..77789783f7244084d386bad9d8341328752d03f2 100644 (file)
--- a/c++/src/app/blastdb/blastdbcmd.cpp
+++ b/c++/src/app/blastdb/blastdbcmd.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blastdbcmd.cpp 598336 2019-12-06 18:17:01Z merezhuk $
+/*  $Id: blastdbcmd.cpp 616873 2020-09-22 13:14:39Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -64,6 +64,14 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+               m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdbcmd");
+        }
+    }
+    ~CBlastDBCmdApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  private:
      /** @inheritDoc */
@@ -88,6 +96,9 @@ private:
  
      set<Int4> m_TaxIdList;
  
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
+
      /// Initializes Blast DB
      void x_InitBlastDB();
      void x_InitBlastDB_TaxIdList();
@@ -129,8 +140,11 @@ private:
      void x_PrintBlastDatabaseTaxInformation();
  
      int x_ProcessBatchPig(CBlastDB_Formatter & fmt);
+
+    void x_AddCmdOptions();
  };
  
+
  string s_PreProcessAccessionsForDBv5(const string & id)
  {
         string rv = id;
@@ -162,6 +176,7 @@ string s_PreProcessAccessionsForDBv5(const string & id)
  
  }
  
+
  bool
  CBlastDBCmdApp::x_GetOids(const string & id, vector<int> & oids)
  {
@@ -362,7 +377,14 @@ CBlastDBCmdApp::x_ProcessBatchEntry_NoDup(CBlastDB_Formatter & fmt)
                 ids[i] = s_PreProcessAccessionsForDBv5(ids[i]);
         }
      }
+    try {
      m_BlastDb->AccessionsToOids(ids, oids);
+    }
+    catch (CSeqDBException & e) {
+       if (e.GetMsg().find("DB contains no accession info") == NPOS){
+               NCBI_RETHROW_SAME(e, e.GetMsg());
+       }
+    }
      for(unsigned i=0; i < ids.size(); i++) {
         if(oids[i] == kSeqDBEntryNotFound) {
                 Int8 num_id = NStr::StringToNumeric<Int8>(ids[i], NStr::fConvErr_NoThrow);
@@ -1127,12 +1149,55 @@ int CBlastDBCmdApp::Run(void)
                 x_InitBlastDB();
                         status = x_ProcessSearchRequest();
          }
+       x_AddCmdOptions();
  
      } CATCH_ALL(status)
  
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
+void CBlastDBCmdApp::x_AddCmdOptions()
+{
+       const CArgs & args = GetArgs();
+    if (args["info"]) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBInfo, true);
+    }
+    else if (args["tax_info"]) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBTaxInfo, true);
+    }
+    else if(args[kArgTaxIdList].HasValue() || args[kArgTaxIdListFile].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eTaxIdList, true);
+       }
+    else if(args["ipg"].HasValue() || args["ipg_batch"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eIPGList, true);
+    }
+    else if(args["entry"].HasValue() || args["entry_batch"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBEntry, true);
+        if (args["entry"].HasValue() && args["entry"].AsString() == "all") {
+               m_UsageReport.AddParam(CBlastUsageReport::eDBDumpAll, true);
+       }
+               else {
+               m_UsageReport.AddParam(CBlastUsageReport::eDBEntry, true);
+               }
+    }
+    if(args["outfmt"].HasValue()) {
+       m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString());
+    }
+
+
+       string db_name = m_BlastDb->GetDBNameList();
+       int off = db_name.find_last_of(CFile::GetPathSeparator());
+    if (off != -1) {
+       db_name.erase(0, off+1);
+       }
+       m_UsageReport.AddParam(CBlastUsageReport::eDBName, db_name);
+       m_UsageReport.AddParam(CBlastUsageReport::eDBLength, (Int8) m_BlastDb->GetTotalLength());
+       m_UsageReport.AddParam(CBlastUsageReport::eDBNumSeqs, m_BlastDb->GetNumSeqs());
+       m_UsageReport.AddParam(CBlastUsageReport::eDBDate, m_BlastDb->GetDate());
+}
+
+
  
  #ifndef SKIP_DOXYGEN_PROCESSING
  int main(int argc, const char* argv[] /*, const char* envp[]*/)
diff --git a/c++/src/app/blastdb/blastdbcp.cpp b/c++/src/app/blastdb/blastdbcp.cpp

index 7e5c4894302a14fd65c599fdeb9e6ad8e5e46015..c52bfe4acd363a9aabda018e699e80e6ca3acb79 100644 (file)
--- a/c++/src/app/blastdb/blastdbcp.cpp
+++ b/c++/src/app/blastdb/blastdbcp.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blastdbcp.cpp 605535 2020-04-13 11:07:03Z ivanov $
+/*  $Id: blastdbcp.cpp 615363 2020-08-31 15:40:04Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -34,6 +34,7 @@
  #include <objtools/blast/seqdb_writer/build_db.hpp>
  #include <objtools/blast/seqdb_writer/impl/criteria.hpp>
  #include <objtools/blast/blastdb_format/invalid_data_exception.hpp>
+#include <algo/blast/api/blast_usage_report.hpp>
  
  USING_NCBI_SCOPE;
  USING_SCOPE(blast);
@@ -46,6 +47,9 @@ class BlastdbCopyApplication : public CNcbiApplication
  {
  public:
      BlastdbCopyApplication();
+    ~BlastdbCopyApplication() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
+    }
  
  private: /* Private Methods */
      virtual void Init(void);
@@ -69,6 +73,9 @@ private: /* Private Data */
      const string kTargetOnly;
      const string kMembershipBits;
      const string kCopyOnly;
+
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  /////////////////////////////////////////////////////////////////////////////
@@ -82,6 +89,11 @@ BlastdbCopyApplication::BlastdbCopyApplication()
      CRef<CVersion> version(new CVersion());
      version->SetVersionInfo(1, 0);
      SetFullVersion(version);
+    m_StopWatch.Start();
+    if (m_UsageReport.IsEnabled()) {
+       m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+       m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdbcp");
+    }
  }
  
  
diff --git a/c++/src/app/blastdb/convert2blastmask.cpp b/c++/src/app/blastdb/convert2blastmask.cpp

index 954ff686d46b0012a65a70d3595a3ad179d29ae1..71b44b98ed8e2ff495383e6073ce54c07054972b 100644 (file)
--- a/c++/src/app/blastdb/convert2blastmask.cpp
+++ b/c++/src/app/blastdb/convert2blastmask.cpp
@@ -1,4 +1,4 @@
-/*  # $Id: convert2blastmask.cpp 492284 2016-02-16 16:55:37Z camacho $
+/*  # $Id: convert2blastmask.cpp 615362 2020-08-31 15:39:55Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -112,6 +112,14 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+               m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "convert2blastmask");
+        }
+    }
+    ~CConvert2BlastMaskApplication() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  
  private:
@@ -122,8 +130,12 @@ private:
      CMaskFromFasta* x_GetReader();
      CMaskWriterBlastDbMaskInfo* x_GetWriter();
  
+    void x_AddCmdOptions();
+
      /// Contains the description of this application
      static const char * const USAGE_LINE;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  const char * const CConvert2BlastMaskApplication::USAGE_LINE 
@@ -218,6 +230,8 @@ int CConvert2BlastMaskApplication::Run(void) {
          cerr << e.what() << endl;
          retval = 1;
      }
+    x_AddCmdOptions();
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, retval);
      return retval;
  }
  
@@ -226,6 +240,21 @@ void CConvert2BlastMaskApplication::Exit(void)
      SetDiagStream(0);
  }
  
+void CConvert2BlastMaskApplication::x_AddCmdOptions()
+{
+       const CArgs & args = GetArgs();
+    if (args["masking_algorithm"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eMaskAlgo, args["masking_algorithm"].AsString());
+    }
+    if (args["outfmt"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString());
+    }
+    if (args["parse_seqids"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eParseSeqIDs, true);
+    }
+
+}
+
  #ifndef SKIP_DOXYGEN_PROCESSING
  int main(int argc, const char* argv[])
  {
diff --git a/c++/src/app/blastdb/makeblastdb.cpp b/c++/src/app/blastdb/makeblastdb.cpp

index 323e28b5e15b0f6874f6daf9791b772f6a0a761f..a9a82cfc38b70c81720375639031705746292d2a 100644 (file)
--- a/c++/src/app/blastdb/makeblastdb.cpp
+++ b/c++/src/app/blastdb/makeblastdb.cpp
@@ -1,4 +1,4 @@
-/*  $Id: makeblastdb.cpp 592321 2019-08-29 17:58:35Z fongah2 $
+/*  $Id: makeblastdb.cpp 615359 2020-08-31 15:39:39Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -91,6 +91,14 @@ public:
          CRef<CVersion> version(new CVersion());
          version->SetVersionInfo(new CBlastVersion());
          SetFullVersion(version);
+        m_StopWatch.Start();
+        if (m_UsageReport.IsEnabled()) {
+               m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+               m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "makeblastdb");
+        }
+    }
+    ~CMakeBlastDBApp() {
+       m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
      }
  
  private:
@@ -124,6 +132,8 @@ private:
      void x_VerifyInputFilesType(const vector<CTempString>& filenames,
                                  CMakeBlastDBApp::ESupportedInputFormats input_type);
  
+    void x_AddCmdOptions();
+
      // Data
  
      CNcbiOstream * m_LogFile;
@@ -135,6 +145,8 @@ private:
      bool m_IsModifyMode;
  
      bool m_SkipUnver;
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  /// Reads an object defined in a NCBI ASN.1 spec from a stream in multiple
@@ -1206,9 +1218,34 @@ int CMakeBlastDBApp::Run(void)
      int status = 0;
      try { x_BuildDatabase(); }
      CATCH_ALL(status)
+    x_AddCmdOptions();
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
      return status;
  }
  
+void CMakeBlastDBApp::x_AddCmdOptions()
+{
+       const CArgs & args = GetArgs();
+    if (args["input_type"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eInputType, args["input_type"].AsString());
+    }
+    if (args[kArgDbType].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eSeqType, args[kArgDbType].AsString());
+    }
+    if(args["taxid"].HasValue() || args["taxid_map"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eTaxIdList, true);
+       }
+    if(args["parse_seqids"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eParseSeqIDs, args["parse_seqids"].AsBoolean());
+    }
+    if (args["gi_mask"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eGIList, true);
+    }
+    else if(args["mask_data"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eMaskAlgo, true);
+       }
+}
+
  
  #ifndef SKIP_DOXYGEN_PROCESSING
  int main(int argc, const char* argv[] /*, const char* envp[]*/)
diff --git a/c++/src/app/blastdb/makeprofiledb.cpp b/c++/src/app/blastdb/makeprofiledb.cpp

index 4421e88a3c8eebf0577a290f50a29384e06bac32..aabf04fcb995f59f7e1f5b37f0b4ad7fd56006eb 100644 (file)
--- a/c++/src/app/blastdb/makeprofiledb.cpp
+++ b/c++/src/app/blastdb/makeprofiledb.cpp
@@ -1,4 +1,4 @@
-/*  $Id: makeprofiledb.cpp 596198 2019-11-04 15:01:48Z boratyng $
+/*  $Id: makeprofiledb.cpp 615360 2020-08-31 15:39:46Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -244,6 +244,8 @@ private:
  
      int x_Run(void);
  
+    void x_AddCmdOptions();
+
      // Data
      CNcbiOstream * m_LogFile;
      CNcbiIstream * m_InPssmList;
@@ -276,6 +278,9 @@ private:
  
         bool m_UpdateFreqRatios;
         bool m_UseModelThreshold;
+
+    CBlastUsageReport m_UsageReport;
+    CStopWatch m_StopWatch;
  };
  
  CMakeProfileDBApp::CMakeProfileDBApp(void)
@@ -291,6 +296,11 @@ CMakeProfileDBApp::CMakeProfileDBApp(void)
         CRef<CVersion> version(new CVersion());
         version->SetVersionInfo(new CBlastVersion());
         SetFullVersion(version);
+    m_StopWatch.Start();
+    if (m_UsageReport.IsEnabled()) {
+       m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+       m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "makeprofiledb");
+    }
  }
  
  CMakeProfileDBApp::~CMakeProfileDBApp()
@@ -348,6 +358,7 @@ CMakeProfileDBApp::~CMakeProfileDBApp()
                  string pog_str = m_OutDbName + ".pog";
                  CFile(pog_str).Remove();
          }
+        m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
  }
  
  void CMakeProfileDBApp::x_SetupArgDescriptions(void)
@@ -1732,9 +1743,24 @@ int CMakeProfileDBApp::Run(void)
             LOG_POST(Error << "Error: Unknown exception");
             status = BLAST_UNKNOWN_ERROR;
         }
+
+       x_AddCmdOptions();
+    m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
         return status;
  }
  
+void CMakeProfileDBApp::x_AddCmdOptions()
+{
+       const CArgs & args = GetArgs();
+    if (args["dbtype"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eDBType, args["dbtype"].AsString());
+    }
+    if(args["taxid"].HasValue() || args["taxid_map"].HasValue()) {
+        m_UsageReport.AddParam(CBlastUsageReport::eTaxIdList, true);
+       }
+}
+
+
  #ifndef SKIP_DOXYGEN_PROCESSING
  int main(int argc, const char* argv[] /*, const char* envp[]*/)
  {
diff --git a/c++/src/build-system/Makefile.mk.in b/c++/src/build-system/Makefile.mk.in

index 2768fc6b1e0603437895bf67ec6d53ae872c3646..a780b435e8eecc1dc0bfd8ab41d6b7e78b765d21 100644 (file)
--- a/c++/src/build-system/Makefile.mk.in
+++ b/c++/src/build-system/Makefile.mk.in
@@ -1,5 +1,5 @@
  #################################
-# $Id: Makefile.mk.in 606338 2020-04-20 16:30:59Z ivanov $
+# $Id: Makefile.mk.in 616396 2020-09-15 18:22:00Z ivanov $
  # Author:  Denis Vakatov (vakatov@ncbi.nlm.nih.gov)
  #################################
  #
@@ -378,6 +378,8 @@ BZ2_LIBS    = @BZ2_LIBS@
  BZ2_LIB     = @BZ2_LIB@
  LZO_INCLUDE = @LZO_INCLUDE@
  LZO_LIBS    = @LZO_LIBS@
+ZSTD_INCLUDE= @ZSTD_INCLUDE@
+ZSTD_LIBS   = @ZSTD_LIBS@
  
  CMPRS_INCLUDE = $(Z_INCLUDE) $(BZ2_INCLUDE) $(LZO_INCLUDE)
  CMPRS_LIBS    = $(Z_LIBS) $(BZ2_LIBS) $(LZO_LIBS)
@@ -784,6 +786,19 @@ HIREDIS_INCLUDE     = @HIREDIS_INCLUDE@
  HIREDIS_LIBS        = @HIREDIS_LIBS@
  HIREDIS_STATIC_LIBS = @HIREDIS_STATIC_LIBS@
  
+# Apache Arrow (specifically focusing on Parquet)
+APACHE_ARROW_INCLUDE     = @APACHE_ARROW_INCLUDE@
+APACHE_ARROW_LIBS        = @APACHE_ARROW_LIBS@
+APACHE_ARROW_STATIC_LIBS = @APACHE_ARROW_STATIC_LIBS@
+
+# Kafka
+LIBRDKAFKA_INCLUDE   = @LIBRDKAFKA_INCLUDE@
+LIBRDKAFKA_LIBS      = @LIBRDKAFKA_LIBS@
+LIBRDKAFKA_STATIC_LIBS = @LIBRDKAFKA_STATIC_LIBS@
+CPPKAFKA_INCLUDE     = @CPPKAFKA_INCLUDE@
+CPPKAFKA_LIBS        = @CPPKAFKA_LIBS@
+CPPKAFKA_STATIC_LIBS = @CPPKAFKA_STATIC_LIBS@
+
  # Compress
  COMPRESS_LDEP = $(CMPRS_LIB)
  COMPRESS_LIBS = xcompress $(COMPRESS_LDEP)
@@ -840,10 +855,10 @@ EUTILS_LIBS = eutils egquery elink epost esearch espell esummary \
  OBJREAD_LIBS = xobjread variation submit xlogging
  
  # formatting code
-XFORMAT_LIBS = xformat xcleanup gbseq mlacli mla medlars pubmed valid $(OBJEDIT_LIBS)
+XFORMAT_LIBS = xformat xcleanup gbseq $(OBJEDIT_LIBS)
  
  # object editing library
-OBJEDIT_LIBS = xobjedit $(OBJREAD_LIBS) taxon3
+OBJEDIT_LIBS = xobjedit $(OBJREAD_LIBS) taxon3 mlacli mla medlars pubmed valid 
  
  # standard data loader configuration, plus supporting libraries
  DATA_LOADERS_UTIL_LIB = data_loaders_util \
diff --git a/c++/src/build-system/Makefile.xcode.tmpl b/c++/src/build-system/Makefile.xcode.tmpl

index 6f7991797b9db8d0bf691c421c8ae99bf2692fac..ea9dd58b9cb67383be380ce34064999eb0b7e151 100644 (file)
--- a/c++/src/build-system/Makefile.xcode.tmpl
+++ b/c++/src/build-system/Makefile.xcode.tmpl
@@ -1,12 +1,12 @@
-#  $Id: Makefile.xcode.tmpl 563416 2018-05-09 11:59:33Z ivanov $
+#  $Id: Makefile.xcode.tmpl 608826 2020-05-21 18:14:06Z ivanov $
  #  Makefile template for Xcode
  #######################################################################
  
  include ./Makefile.mk
  
-DEVSDK = /Developer/SDKs
-SDKDIR = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
-SDK = $(firstword $(wildcard $(DEVSDK)/*.sdk) $(wildcard $(SDKDIR)/*.sdk))
+# DEVSDK = /Developer/SDKs
+# SDKDIR = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
+# SDK = $(firstword $(wildcard $(DEVSDK)/*.sdk) $(wildcard $(SDKDIR)/*.sdk))
  ifneq "" "$(wildcard ${SDK})"
    SDKFLAG = -sdk ${SDK}
  else
diff --git a/c++/src/build-system/cmake/CMake.NCBIComponents.cmake b/c++/src/build-system/cmake/CMake.NCBIComponents.cmake

index c87171036e1818d3555298c17d5bf138969d760f..278ec9120e0fccd4236521d154189af7c67697b4 100644 (file)
--- a/c++/src/build-system/cmake/CMake.NCBIComponents.cmake
+++ b/c++/src/build-system/cmake/CMake.NCBIComponents.cmake
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMake.NCBIComponents.cmake 607658 2020-05-06 12:48:50Z ivanov $
+# $Id: CMake.NCBIComponents.cmake 609371 2020-06-01 14:13:18Z ivanov $
  #############################################################################
  
  ##
@@ -43,7 +43,7 @@ endif()
  if(WIN32)
    set(NCBI_COMPONENT_local_lbsm_FOUND NO)
  else()
-  if (EXISTS ${NCBI_SRC_ROOT}/connect/ncbi_lbsm.c)
+  if (EXISTS ${NCBITK_SRC_ROOT}/connect/ncbi_lbsm.c)
  #    message("local_lbsm found at ${NCBI_SRC_ROOT}/connect")
      set(NCBI_COMPONENT_local_lbsm_FOUND YES)
      set(HAVE_LOCAL_LBSM 1)
@@ -56,9 +56,9 @@ endif()
  
  #############################################################################
  # LocalPCRE
-if (EXISTS ${includedir}/util/regexp)
+if (EXISTS ${NCBITK_INC_ROOT}/util/regexp)
    set(NCBI_COMPONENT_LocalPCRE_FOUND YES)
-  set(NCBI_COMPONENT_LocalPCRE_INCLUDE ${includedir}/util/regexp)
+  set(NCBI_COMPONENT_LocalPCRE_INCLUDE ${NCBITK_INC_ROOT}/util/regexp)
    set(NCBI_COMPONENT_LocalPCRE_NCBILIB regexp)
  else()
    set(NCBI_COMPONENT_LocalPCRE_FOUND NO)
@@ -66,9 +66,9 @@ endif()
  
  #############################################################################
  # LocalZ
-if (EXISTS ${includedir}/util/compress/zlib)
+if (EXISTS ${NCBITK_INC_ROOT}/util/compress/zlib)
    set(NCBI_COMPONENT_LocalZ_FOUND YES)
-  set(NCBI_COMPONENT_LocalZ_INCLUDE ${includedir}/util/compress/zlib)
+  set(NCBI_COMPONENT_LocalZ_INCLUDE ${NCBITK_INC_ROOT}/util/compress/zlib)
    set(NCBI_COMPONENT_LocalZ_NCBILIB z)
  else()
    set(NCBI_COMPONENT_LocalZ_FOUND NO)
@@ -76,9 +76,9 @@ endif()
  
  #############################################################################
  # LocalBZ2
-if (EXISTS ${includedir}/util/compress/bzip2)
+if (EXISTS ${NCBITK_INC_ROOT}/util/compress/bzip2)
    set(NCBI_COMPONENT_LocalBZ2_FOUND YES)
-  set(NCBI_COMPONENT_LocalBZ2_INCLUDE ${includedir}/util/compress/bzip2)
+  set(NCBI_COMPONENT_LocalBZ2_INCLUDE ${NCBITK_INC_ROOT}/util/compress/bzip2)
    set(NCBI_COMPONENT_LocalBZ2_NCBILIB bz2)
  else()
    set(NCBI_COMPONENT_LocalBZ2_FOUND NO)
@@ -86,9 +86,9 @@ endif()
  
  #############################################################################
  #LocalLMDB
-if (EXISTS ${includedir}/util/lmdb)
+if (EXISTS ${NCBITK_INC_ROOT}/util/lmdb)
    set(NCBI_COMPONENT_LocalLMDB_FOUND YES)
-  set(NCBI_COMPONENT_LocalLMDB_INCLUDE ${includedir}/util/lmdb)
+  set(NCBI_COMPONENT_LocalLMDB_INCLUDE ${NCBITK_INC_ROOT}/util/lmdb)
    set(NCBI_COMPONENT_LocalLMDB_NCBILIB lmdb)
  else()
    set(NCBI_COMPONENT_LocalLMDB_FOUND NO)
@@ -96,8 +96,8 @@ endif()
  
  #############################################################################
  # FreeTDS
-set(FTDS95_INCLUDE  ${includedir}/dbapi/driver/ftds95  ${includedir}/dbapi/driver/ftds95/freetds)
-set(FTDS100_INCLUDE ${includedir}/dbapi/driver/ftds100 ${includedir}/dbapi/driver/ftds100/freetds)
+set(FTDS95_INCLUDE  ${NCBITK_INC_ROOT}/dbapi/driver/ftds95  ${NCBITK_INC_ROOT}/dbapi/driver/ftds95/freetds)
+set(FTDS100_INCLUDE ${NCBITK_INC_ROOT}/dbapi/driver/ftds100 ${NCBITK_INC_ROOT}/dbapi/driver/ftds100/freetds)
  
  set(NCBI_COMPONENT_FreeTDS_FOUND   YES)
  set(NCBI_COMPONENT_FreeTDS_INCLUDE ${FTDS100_INCLUDE})
@@ -115,7 +115,7 @@ if (NCBI_EXPERIMENTAL_DISABLE_HUNTER)
  
  if (MSVC)
    include(${NCBI_TREE_CMAKECFG}/CMake.NCBIComponentsMSVC.cmake)
-elseif (XCODE)
+elseif (APPLE)
    include(${NCBI_TREE_CMAKECFG}/CMake.NCBIComponentsXCODE.cmake)
  else()
      if(NCBI_EXPERIMENTAL_CFG)
@@ -132,8 +132,8 @@ endif()
  
  #############################################################################
  # FreeTDS
-set(FTDS95_INCLUDE  ${includedir}/dbapi/driver/ftds95  ${includedir}/dbapi/driver/ftds95/freetds)
-set(FTDS100_INCLUDE ${includedir}/dbapi/driver/ftds100 ${includedir}/dbapi/driver/ftds100/freetds)
+set(FTDS95_INCLUDE  ${NCBITK_INC_ROOT}/dbapi/driver/ftds95  ${NCBITK_INC_ROOT}/dbapi/driver/ftds95/freetds)
+set(FTDS100_INCLUDE ${NCBITK_INC_ROOT}/dbapi/driver/ftds100 ${NCBITK_INC_ROOT}/dbapi/driver/ftds100/freetds)
  
  #############################################################################
  list(SORT NCBI_ALL_COMPONENTS)
diff --git a/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake b/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake

index 509d065aa9e8c8c5b1ac9487e8e2b9dc89269894..3e3895003a026b9511b99e2b232f2fc6950c8188 100644 (file)
--- a/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake
+++ b/c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMake.NCBIComponentsMSVC.cmake 607786 2020-05-07 15:35:50Z ivanov $
+# $Id: CMake.NCBIComponentsMSVC.cmake 609371 2020-06-01 14:13:18Z ivanov $
  #############################################################################
  
  ##
@@ -15,7 +15,7 @@
  ##  HAVE_XXX
  
  
-set(NCBI_COMPONENT_MSWin_FOUND YES)
+set(NCBI_REQUIRE_MSWin_FOUND YES)
  #to debug
  #set(NCBI_TRACE_COMPONENT_GRPC ON)
  #############################################################################
diff --git a/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake b/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake

index 4c08f9c2a6634bd24f6506ff4092b1b4ae891aa9..41972faf2e9c5ffdd1066ce503e0ce0c9eeca64f 100644 (file)
--- a/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake
+++ b/c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMake.NCBIComponentsUNIX.cmake 605517 2020-04-12 00:56:13Z ucko $
+# $Id: CMake.NCBIComponentsUNIX.cmake 611999 2020-07-14 15:30:59Z ivanov $
  #############################################################################
  
  ##
@@ -610,8 +610,8 @@ if (WIN32)
         find_external_library(VDB
                 INCLUDES sra/sradb.h
                 LIBS ncbi-vdb
-               INCLUDE_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.5\\interfaces"
-               LIBS_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.5\\win\\release\\x86_64\\lib")
+               INCLUDE_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.8\\interfaces"
+               LIBS_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.8\\win\\release\\x86_64\\lib")
  else (WIN32)
         find_external_library(VDB
                 INCLUDES sra/sradb.h
diff --git a/c++/src/build-system/cmake/CMake.NCBIComponentsUNIXex.cmake b/c++/src/build-system/cmake/CMake.NCBIComponentsUNIXex.cmake

index 66bb91d2111046d677c6ecff6c19c38a54921c56..af4e6b4900a0add217ba214aba02900206fbb1df 100644 (file)
--- a/c++/src/build-system/cmake/CMake.NCBIComponentsUNIXex.cmake
+++ b/c++/src/build-system/cmake/CMake.NCBIComponentsUNIXex.cmake
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMake.NCBIComponentsUNIXex.cmake 607786 2020-05-07 15:35:50Z ivanov $
+# $Id: CMake.NCBIComponentsUNIXex.cmake 609371 2020-06-01 14:13:18Z ivanov $
  #############################################################################
  
  ##
@@ -14,8 +14,10 @@
  ##  HAVE_LIBXXX
  ##  HAVE_XXX
  
-set(NCBI_COMPONENT_unix_FOUND YES)
-set(NCBI_COMPONENT_Linux_FOUND YES)
+set(NCBI_REQUIRE_unix_FOUND YES)
+if(NOT APPLE)
+set(NCBI_REQUIRE_Linux_FOUND YES)
+endif()
  option(USE_LOCAL_BZLIB "Use a local copy of libbz2")
  option(USE_LOCAL_PCRE "Use a local copy of libpcre")
  #to debug
@@ -292,21 +294,20 @@ if(NOT NCBI_COMPONENT_BACKWARD_DISABLED)
      if(EXISTS ${NCBI_ThirdParty_BACKWARD}/include)
          set(LIBBACKWARD_INCLUDE ${NCBI_ThirdParty_BACKWARD}/include)
          set(HAVE_LIBBACKWARD_CPP YES)
+        set(NCBI_COMPONENT_BACKWARD_FOUND YES)
+        set(NCBI_COMPONENT_BACKWARD_INCLUDE ${LIBBACKWARD_INCLUDE})
+        list(APPEND NCBI_ALL_COMPONENTS BACKWARD)
+    else()
+        message("NOT FOUND BACKWARD")
      endif()
      find_library(LIBBACKWARD_LIBS NAMES backward HINTS ${NCBI_ThirdParty_BACKWARD}/lib)
      find_library(LIBDW_LIBS NAMES dw)
      if (LIBDW_LIBS)
          set(HAVE_LIBDW YES)
      endif()
-
      if(HAVE_LIBBACKWARD_CPP AND HAVE_LIBDW)
-        set(NCBI_COMPONENT_BACKWARD_FOUND YES)
-        set(NCBI_COMPONENT_BACKWARD_INCLUDE ${LIBBACKWARD_INCLUDE})
          set(NCBI_COMPONENT_BACKWARD_LIBS ${LIBDW_LIBS})
  #        set(NCBI_COMPONENT_BACKWARD_LIBS ${LIBBACKWARD_LIBS} ${LIBDW_LIBS})
-        list(APPEND NCBI_ALL_COMPONENTS BACKWARD)
-    else()
-        message("NOT FOUND BACKWARD")
      endif()
  else(NOT NCBI_COMPONENT_BACKWARD_DISABLED)
      message("DISABLED BACKWARD")
diff --git a/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake b/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake

index cc8944b40a5b1de499c4f45c81113d859e6b0106..b496332f739ac9496df493fc408685e1666b13b5 100644 (file)
--- a/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake
+++ b/c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMake.NCBIComponentsXCODE.cmake 607661 2020-05-06 12:49:33Z ivanov $
+# $Id: CMake.NCBIComponentsXCODE.cmake 611999 2020-07-14 15:30:59Z ivanov $
  #############################################################################
  
  ##
@@ -15,8 +15,12 @@
  ##  HAVE_XXX
  
  
-set(NCBI_COMPONENT_XCODE_FOUND YES)
-set(NCBI_COMPONENT_unix_FOUND YES)
+set(NCBI_REQUIRE_unix_FOUND YES)
+if(XCODE)
+set(NCBI_REQUIRE_XCODE_FOUND YES)
+endif()
+#to debug
+#set(NCBI_TRACE_COMPONENT_JPEG ON)
  #############################################################################
  # common settings
  set(NCBI_TOOLS_ROOT $ENV{NCBI})
@@ -55,6 +59,7 @@ set(KRB5_LIBS "-framework Kerberos" -liconv)
  ############################################################################
  set(NCBI_ThirdPartyBasePath ${NCBI_TOOLS_ROOT})
  
+set(NCBI_ThirdParty_BACKWARD   ${NCBI_ThirdPartyBasePath}/backward-cpp-1.3.20180206-44ae960)
  set(NCBI_ThirdParty_TLS        ${NCBI_ThirdPartyBasePath}/gnutls-3.4.0)
  #set(NCBI_ThirdParty_FASTCGI 
  set(NCBI_ThirdParty_Boost      ${NCBI_ThirdPartyBasePath}/boost-1.62.0-ncbi1)
@@ -71,19 +76,23 @@ set(NCBI_ThirdParty_TIFF       ${NCBI_ThirdPartyBasePath}/safe-sw)
  set(NCBI_ThirdParty_XML        ${NCBI_ThirdPartyBasePath}/libxml-2.7.8)
  set(NCBI_ThirdParty_XSLT       ${NCBI_ThirdPartyBasePath}/libxml-2.7.8)
  set(NCBI_ThirdParty_EXSLT      ${NCBI_ThirdParty_XSLT})
-set(NCBI_ThirdParty_SQLITE3    ${NCBI_ThirdPartyBasePath}/sqlite-3.8.10.1-ncbi1)
+set(NCBI_ThirdParty_SQLITE3    ${NCBI_ThirdPartyBasePath}/sqlite-3.26.0-ncbi1)
  #set(NCBI_ThirdParty_Sybase
-set(NCBI_ThirdParty_VDB        "/net/snowman/vol/projects/trace_software/vdb/vdb-versions/2.10.5")
+set(NCBI_ThirdParty_VDB        "/net/snowman/vol/projects/trace_software/vdb/vdb-versions/2.10.8")
  set(NCBI_ThirdParty_VDB_ARCH x86_64)
  set(NCBI_ThirdParty_wxWidgets ${NCBI_ThirdPartyBasePath}/wxWidgets-3.1.3-ncbi1)
  set(NCBI_ThirdParty_GLEW      ${NCBI_ThirdPartyBasePath}/glew-1.5.8)
  set(NCBI_ThirdParty_FTGL      ${NCBI_ThirdPartyBasePath}/ftgl-2.1.3-rc5)
  set(NCBI_ThirdParty_FreeType  ${NCBI_OPT_ROOT})
+set(NCBI_ThirdParty_NGHTTP2   ${NCBI_ThirdPartyBasePath}/nghttp2-1.40.0)
+set(NCBI_ThirdParty_UV        ${NCBI_ThirdPartyBasePath}/libuv-1.35.0)
+set(NCBI_ThirdParty_GL2PS     ${NCBI_ThirdPartyBasePath}/gl2ps-1.4.0)
+set(NCBI_ThirdParty_Nettle    ${NCBI_ThirdPartyBasePath}/nettle-3.1.1)
+set(NCBI_ThirdParty_GMP       ${NCBI_ThirdPartyBasePath}/gmp-6.0.0a)
  
  #############################################################################
  #############################################################################
  
-set(_XCODE_EXTRA_LIBS)
  function(NCBI_define_component _name)
  
      if(NCBI_COMPONENT_${_name}_DISABLED)
@@ -120,8 +129,8 @@ function(NCBI_define_component _name)
          set(_suffixes .a .dylib)
      endif()
      set(_roots ${_root})
-#    set(_subdirs Release${NCBI_PlatformBits}/lib lib64 lib)
-    set(_subdirs Release${NCBI_PlatformBits}/lib lib64 ${_XCODE_EXTRA_LIBS})
+    set(_subdirs Release${NCBI_PlatformBits}/lib lib64 lib)
+#    set(_subdirs Release${NCBI_PlatformBits}/lib lib64 ${_XCODE_EXTRA_LIBS})
      if (BUILD_SHARED_LIBS AND DEFINED NCBI_ThirdParty_${_name}_SHLIB)
          set(_roots ${NCBI_ThirdParty_${_name}_SHLIB} ${_roots})
          set(_subdirs shlib64 shlib lib64 lib)
@@ -135,11 +144,18 @@ function(NCBI_define_component _name)
              set(_all_libs "")
              foreach(_lib IN LISTS _args)
                  set(_this_found NO)
+                if(NCBI_TRACE_COMPONENT_${_name})
+                    message("${_name}: checking ${_root}/${_libdir}/lib${_lib}")
+                endif()
                  foreach(_sfx IN LISTS _suffixes)
                      if(EXISTS ${_root}/${_libdir}/lib${_lib}${_sfx})
                          list(APPEND _all_libs ${_root}/${_libdir}/lib${_lib}${_sfx})
                          set(_this_found YES)
                          break()
+                    else()
+                        if(NCBI_TRACE_COMPONENT_${_name})
+                            message("${_name}: ${_root}/${_libdir}/lib${_lib}${_sfx} not found")
+                        endif()
                      endif()
                  endforeach()
                  if(NOT _this_found)
@@ -203,8 +219,20 @@ endmacro()
  set(NCBI_COMPONENT_NCBI_C_FOUND NO)
  
  #############################################################################
-# STACKTRACE
-set(NCBI_COMPONENT_STACKTRACE_FOUND NO)
+# BACKWARD, UNWIND
+if(NOT NCBI_COMPONENT_BACKWARD_DISABLED)
+    if(EXISTS ${NCBI_ThirdParty_BACKWARD}/include)
+        set(LIBBACKWARD_INCLUDE ${NCBI_ThirdParty_BACKWARD}/include)
+        set(HAVE_LIBBACKWARD_CPP YES)
+        set(NCBI_COMPONENT_BACKWARD_FOUND YES)
+        set(NCBI_COMPONENT_BACKWARD_INCLUDE ${LIBBACKWARD_INCLUDE})
+        list(APPEND NCBI_ALL_COMPONENTS BACKWARD)
+    else()
+        message("NOT FOUND BACKWARD")
+    endif()
+else(NOT NCBI_COMPONENT_BACKWARD_DISABLED)
+    message("DISABLED BACKWARD")
+endif(NOT NCBI_COMPONENT_BACKWARD_DISABLED)
  
  #############################################################################
  #LMDB
@@ -252,7 +280,6 @@ else()
    set(NCBI_COMPONENT_Boost.Test.Included_FOUND NO)
  endif()
  
-set(_XCODE_EXTRA_LIBS lib)
  #############################################################################
  # Boost.Test
  NCBI_define_component(Boost.Test boost_unit_test_framework)
@@ -260,7 +287,6 @@ NCBI_define_component(Boost.Test boost_unit_test_framework)
  #############################################################################
  # Boost.Spirit
  NCBI_define_component(Boost.Spirit boost_thread-mt)
-set(_XCODE_EXTRA_LIBS "")
  
  #############################################################################
  # JPEG
@@ -432,10 +458,27 @@ NCBI_define_component(FTGL ftgl)
  
  #############################################################################
  # FreeType
-set(_XCODE_EXTRA_LIBS lib)
  NCBI_define_component(FreeType freetype)
  if(NCBI_COMPONENT_FreeType_FOUND)
      set(NCBI_COMPONENT_FreeType_INCLUDE ${NCBI_COMPONENT_FreeType_INCLUDE} ${NCBI_COMPONENT_FreeType_INCLUDE}/freetype2)
  endif()
-set(_XCODE_EXTRA_LIBS "")
  
+#############################################################################
+# NGHTTP2
+NCBI_define_component(NGHTTP2 nghttp2)
+
+#############################################################################
+# UV
+NCBI_define_component(UV uv)
+
+#############################################################################
+# GL2PS
+NCBI_define_component(GL2PS gl2ps)
+
+#############################################################################
+# Nettle
+NCBI_define_component(Nettle nettle hogweed)
+
+#############################################################################
+# GMP
+#NCBI_define_component(GMP gmp)
diff --git a/c++/src/build-system/cmake/CMake.NCBIptb.cmake b/c++/src/build-system/cmake/CMake.NCBIptb.cmake

index 178c229830f4e8d05e48faba2ca83c6e5c329e74..8cd375b1ad8c555ce1b8f29e58cf2a6e551e00a2 100644 (file)
--- a/c++/src/build-system/cmake/CMake.NCBIptb.cmake
+++ b/c++/src/build-system/cmake/CMake.NCBIptb.cmake
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMake.NCBIptb.cmake 607660 2020-05-06 12:49:19Z ivanov $
+# $Id: CMake.NCBIptb.cmake 609379 2020-06-01 14:15:14Z ivanov $
  #############################################################################
  #############################################################################
  ##
@@ -100,6 +100,7 @@
  #############################################################################
  # deprecated
  macro(NCBI_add_root_subdirectory)
+    message(WARNING "NCBI_add_root_subdirectory is deprecated, use NCBI_add_subdirectory instead")
      NCBI_add_subdirectory(${ARGV})
  endmacro()
  
@@ -108,10 +109,22 @@ function(NCBI_add_subdirectory)
      if(NCBI_PTBMODE_PARTS)
          return()
      endif()
-    if(NCBI_PTBCFG_ENABLE_COLLECTOR AND NOT NCBI_PTB_HAS_ROOT)
+
+    if(NOT DEFINED NCBI_CURRENT_SOURCE_DIR)
          set(NCBI_CURRENT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+    endif()
+    if("${NCBI_CURRENT_SOURCE_DIR}" STREQUAL "${NCBITK_SRC_ROOT}")
+        set(NCBI_TREE_ROOT    ${NCBITK_TREE_ROOT})
+        set(NCBI_SRC_ROOT     ${NCBITK_SRC_ROOT})
+        set(NCBI_INC_ROOT     ${NCBITK_INC_ROOT})
+    elseif("${NCBI_CURRENT_SOURCE_DIR}" STREQUAL "${NCBITK_TREE_ROOT}")
+        set(NCBI_TREE_ROOT    ${NCBITK_TREE_ROOT})
+        set(NCBI_SRC_ROOT     ${NCBITK_TREE_ROOT})
+        set(NCBI_INC_ROOT     ${NCBITK_TREE_ROOT})
+    endif()
+
+    if(NOT NCBI_PTB_HAS_ROOT)
          NCBI_internal_analyze_tree()
-        variable_watch(CMAKE_CURRENT_LIST_DIR NCBI_internal_end_of_config)
      endif()
  
      if(NCBI_PTBMODE_COLLECT_DEPS)
@@ -130,13 +143,25 @@ function(NCBI_add_subdirectory)
              if(DEFINED NCBI_PTB_ALLOWED_DIRS)
                  set(_is_good FALSE)
                  foreach(_dir IN LISTS NCBI_PTB_ALLOWED_DIRS)
-                    NCBI_util_match_path(${_dir} ${NCBI_CURRENT_SOURCE_DIR} _is_good)
-                    if(_is_good)
+                    string(FIND ${_dir} ${NCBI_CURRENT_SOURCE_DIR} _pos)
+                    if(${_pos} EQUAL 0)
+                        set(_is_good TRUE)
                          break()
                      endif()
                  endforeach()
              else()
-                set(_is_good TRUE)
+                NCBI_internal_process_project_filters( _is_good)
+                if(NOT _is_good)
+                    if(NOT "${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "")
+                        foreach(_dir IN LISTS NCBI_PTBCFG_PROJECT_LIST)
+                            string(FIND "${NCBI_SRC_ROOT}/${_dir}" "${NCBI_CURRENT_SOURCE_DIR}" _pos)
+                            if(${_pos} EQUAL 0)
+                                set(_is_good TRUE)
+                                break()
+                            endif()
+                        endforeach()
+                    endif()
+                endif()
              endif()
              if (_is_good AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_sub}/CMakeLists.txt")
                  add_subdirectory(${_sub})
@@ -302,6 +327,10 @@ macro(NCBI_begin_app _name)
                  if (NCBI_PTBMODE_COLLECT_DEPS OR TARGET ${_name})
                      set(_appname ${_appname}-app)
                  endif()
+            elseif(NOT NCBI_PTBCFG_ENABLE_COLLECTOR)
+                if (TARGET ${_name})
+                    set(_appname ${_appname}-app)
+                endif()
              endif()
          endif()
          set(NCBI_PROJECT ${_appname})
@@ -616,6 +645,18 @@ function(NCBI_register_hook _event _callback)
      endif()
  endfunction()
  
+##############################################################################
+macro(NCBI_util_elapsed _value)
+    if(DEFINED NCBI_TIMESTAMP_START)
+        string(TIMESTAMP _curtime "%s")
+        math(EXPR _delta "${_curtime} - ${NCBI_TIMESTAMP_START}")
+        string(TIMESTAMP _curtime "%H:%M:%S")
+        set(${_value} "${_curtime} (${_delta}s)")
+    else()
+        string(TIMESTAMP ${_value} "%H:%M:%S")
+    endif()
+endmacro()
+
  ##############################################################################
  macro(NCBI_util_parse_sign _input _value _negative)
      string(SUBSTRING ${_input} 0 1 _sign)
@@ -667,53 +708,63 @@ macro(NCBI_internal_analyze_tree)
          set_property(GLOBAL PROPERTY NCBI_PTBPROP_COUNT_${_type} 0)
      endforeach()
  
-    if(NOT DEFINED NCBI_PTBCFG_KNOWN_FOLDERS OR "${NCBI_PTBCFG_KNOWN_FOLDERS}" STREQUAL "")
-        file(GLOB _files LIST_DIRECTORIES TRUE "${NCBI_CURRENT_SOURCE_DIR}/*")
-        foreach(_file IN LISTS _files)
-            if(IS_DIRECTORY ${_file} AND EXISTS ${_file}/CMakeLists.txt)
-                get_filename_component(_basename ${_file} NAME)
-                list(APPEND NCBI_PTBCFG_KNOWN_FOLDERS ${_basename})
-            endif()
-        endforeach()
+    if( "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "" AND
+        "${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "" AND
+        "${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "")
+        set(NCBI_PTB_NOFILTERS TRUE)
+    endif()
+    if (NCBI_PTBCFG_ENABLE_COLLECTOR AND NCBI_PTB_NOFILTERS AND NOT NCBI_PTBCFG_ALLOW_COMPOSITE)
+        set(NCBI_PTBCFG_ENABLE_COLLECTOR FALSE)
+        set(NCBI_PTBCFG_ENABLE_COLLECTOR FALSE PARENT_SCOPE)
      endif()
  
-    message("Analyzing source tree...")
-    set_property(GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS "")
+    if(NCBI_PTBCFG_ENABLE_COLLECTOR)
+        if(NOT DEFINED NCBI_PTBCFG_KNOWN_FOLDERS OR "${NCBI_PTBCFG_KNOWN_FOLDERS}" STREQUAL "")
+            file(GLOB _files LIST_DIRECTORIES TRUE "${NCBI_CURRENT_SOURCE_DIR}/*")
+            foreach(_file IN LISTS _files)
+                if(IS_DIRECTORY ${_file} AND EXISTS ${_file}/CMakeLists.txt)
+                    get_filename_component(_basename ${_file} NAME)
+                    list(APPEND NCBI_PTBCFG_KNOWN_FOLDERS ${_basename})
+                endif()
+            endforeach()
+        endif()
+        list(LENGTH NCBI_PTBCFG_KNOWN_FOLDERS _count)
+        if(NOT ${_count} EQUAL 1)
+            set(NCBI_PTB_THIS_SRC_ROOT ${NCBI_SRC_ROOT} PARENT_SCOPE)
+            set(NCBI_PTB_THIS_SRC_ROOT ${NCBI_SRC_ROOT})
+        endif()
  
-    set(NCBI_PTBMODE_COLLECT_DEPS ON)
-    NCBI_add_subdirectory(${NCBI_PTBCFG_KNOWN_FOLDERS})
-    set(NCBI_PTB_CALLBACK_ALL_PARSED TRUE)
-    set(NCBI_PTBMODE_COLLECT_DEPS OFF)
+        NCBI_util_elapsed(_elapsed)
+        message("${_elapsed}: Analyzing source tree...")
+        set_property(GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS "")
  
-    get_property(_allprojects     GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS)
-    get_property(_allowedprojects GLOBAL PROPERTY NCBI_PTBPROP_ALLOWED_PROJECTS)
+        set(NCBI_PTBMODE_COLLECT_DEPS ON)
+        set(_known ${NCBI_PTBCFG_KNOWN_FOLDERS})
+        unset(NCBI_PTBCFG_KNOWN_FOLDERS)
+        NCBI_add_subdirectory(${_known})
+        set(NCBI_PTB_CALLBACK_ALL_PARSED TRUE)
+        set(NCBI_PTBMODE_COLLECT_DEPS OFF)
  
-if(OFF)
-message("NCBI_PTBPROP_ALL_PROJECTS: ${_allprojects}")
-foreach(_prj IN LISTS _allprojects)
-    get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
-    message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}")
-endforeach()
-message("NCBI_PTBPROP_ALLOWED_PROJECTS: ${_allowedprojects}")
-endif()
+        get_property(_allprojects     GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS)
+        get_property(_allowedprojects GLOBAL PROPERTY NCBI_PTBPROP_ALLOWED_PROJECTS)
  
-    if("${_allowedprojects}" STREQUAL "")
-        message(FATAL_ERROR "List of projects is empty")
-        return()
-    endif()
+        if(OFF)
+            message("NCBI_PTBPROP_ALL_PROJECTS: ${_allprojects}")
+            foreach(_prj IN LISTS _allprojects)
+                get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
+                message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}")
+            endforeach()
+            message("NCBI_PTBPROP_ALLOWED_PROJECTS: ${_allowedprojects}")
+        endif()
  
-    message("Collecting projects...")
-    list(REMOVE_DUPLICATES _allowedprojects)
-    foreach(_prj IN LISTS _allowedprojects)
-        NCBI_internal_collect_dependencies(${_prj})
-        get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
-        get_property(_host GLOBAL PROPERTY NCBI_PTBPROP_HOST_${_prj})
-        set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} ${_host} ${_prj} ${_prjdeps})
-    endforeach()
-    list(SORT NCBI_PTB_ALLOWED_PROJECTS)
-    list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_PROJECTS)
-    if(NCBI_PTBCFG_ALLOW_COMPOSITE)
-        set(_allowedprojects ${NCBI_PTB_ALLOWED_PROJECTS})
+        if("${_allowedprojects}" STREQUAL "")
+            message(FATAL_ERROR "List of projects is empty")
+            return()
+        endif()
+
+        NCBI_util_elapsed(_elapsed)
+        message("${_elapsed}: Collecting projects...")
+        list(REMOVE_DUPLICATES _allowedprojects)
          foreach(_prj IN LISTS _allowedprojects)
              NCBI_internal_collect_dependencies(${_prj})
              get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
@@ -722,35 +773,53 @@ endif()
          endforeach()
          list(SORT NCBI_PTB_ALLOWED_PROJECTS)
          list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_PROJECTS)
-    endif()
-    foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
-        get_property(_dir GLOBAL PROPERTY NCBI_PTBPROP_DIR_${_prj})
-        list(APPEND NCBI_PTB_ALLOWED_DIRS ${_dir})
-    endforeach()
-    list(SORT NCBI_PTB_ALLOWED_DIRS)
-    list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_DIRS)
  
-if(OFF)
-message("NCBI_PTB_ALLOWED_PROJECTS: ${NCBI_PTB_ALLOWED_PROJECTS}")
-foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
-    get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
-    message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}")
-endforeach()
-endif()
+        if(NCBI_PTBCFG_ALLOW_COMPOSITE)
+            set(_allowedprojects ${NCBI_PTB_ALLOWED_PROJECTS})
+            foreach(_prj IN LISTS _allowedprojects)
+                NCBI_internal_collect_dependencies(${_prj})
+                get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
+                get_property(_host GLOBAL PROPERTY NCBI_PTBPROP_HOST_${_prj})
+                set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} ${_host} ${_prj} ${_prjdeps})
+            endforeach()
+            list(SORT NCBI_PTB_ALLOWED_PROJECTS)
+            list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_PROJECTS)
+        endif()
+        if(NOT NCBI_PTB_NOFILTERS)
+            foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
+                get_property(_dir GLOBAL PROPERTY NCBI_PTBPROP_DIR_${_prj})
+                list(APPEND NCBI_PTB_ALLOWED_DIRS ${_dir})
+            endforeach()
+            list(SORT NCBI_PTB_ALLOWED_DIRS)
+            list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_DIRS)
+            set(NCBI_PTB_ALLOWED_DIRS ${NCBI_PTB_ALLOWED_DIRS} PARENT_SCOPE)
+        endif()
  
-    foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
-        NCBI_internal_collect_requires(${_prj})
-    endforeach()
-    set(NCBI_PTB_CALLBACK_COLLECTED TRUE)
-    foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
-        if (NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${_prj})
-            NCBI_internal_print_project_info(${_prj})
+        if(OFF)
+            message("NCBI_PTB_ALLOWED_PROJECTS: ${NCBI_PTB_ALLOWED_PROJECTS}")
+            foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
+                get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
+                message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}")
+            endforeach()
          endif()
-    endforeach()
  
-    set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} PARENT_SCOPE)
-    set(NCBI_PTB_ALLOWED_DIRS ${NCBI_PTB_ALLOWED_DIRS} PARENT_SCOPE)
-    message("Configuring projects...")
+        foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
+            NCBI_internal_collect_requires(${_prj})
+        endforeach()
+        set(NCBI_PTB_CALLBACK_COLLECTED TRUE)
+        foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
+            if (NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${_prj})
+                NCBI_internal_print_project_info(${_prj})
+            endif()
+        endforeach()
+
+        set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} PARENT_SCOPE)
+    else()
+        message("Source tree analysis skipped")
+    endif()
+    NCBI_util_elapsed(_elapsed)
+    message("${_elapsed}: Configuring projects...")
+    variable_watch(CMAKE_CURRENT_LIST_DIR NCBI_internal_end_of_config)
  endmacro()
  
  #############################################################################
@@ -761,6 +830,8 @@ function(NCBI_internal_end_of_config _variable _access _value)
      set(NCBI_PTB_CALLBACK_ALL_ADDED TRUE)
      NCBI_internal_print_report("Processed" TOTAL)
      NCBI_internal_print_report("Added" COUNT)
+    NCBI_util_elapsed(_elapsed)
+    message("${_elapsed}: Done")
  endfunction()
  
  #############################################################################
@@ -1351,7 +1422,7 @@ macro(NCBI_internal_process_parts _result)
      NCBI_internal_collect_parts(_result)
  
      if(_result)
-        if (NCBI_PTBMODE_COLLECT_DEPS)
+        if (NCBI_PTBMODE_COLLECT_DEPS OR NOT NCBI_PTBCFG_ENABLE_COLLECTOR)
  #set_property(GLOBAL PROPERTY NCBI_PTBPROP_PARTS_${NCBI_PROJECT_ID} ${NCBITMP_PROJECT_PART_IDS})
              foreach(_part IN LISTS NCBITMP_PROJECT_PART_IDS)
                  set_property(GLOBAL PROPERTY NCBI_PTBPROP_HOSTID_${_part} ${NCBI_PROJECT_ID})
@@ -1406,7 +1477,10 @@ endfunction()
  ##############################################################################
  function(NCBI_internal_verify_libs)
      set(_optimize NO)
-    if (WIN32 AND NOT NCBI_PTBMODE_COLLECT_DEPS AND NOT DEFINED NCBI_EXTERNAL_TREE_ROOT AND NOT DEFINED NCBI_PTBCFG_DOINSTALL)
+    if (WIN32 AND NCBI_PTBCFG_ENABLE_COLLECTOR
+        AND NOT NCBI_PTBMODE_COLLECT_DEPS
+        AND NOT DEFINED NCBI_EXTERNAL_TREE_ROOT
+        AND NOT DEFINED NCBI_PTBCFG_DOINSTALL)
          if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "STATIC")
  #            set(_ncbilib ${NCBITMP_NCBILIB})
              get_property(_ncbilib GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${NCBI_PROJECT})
@@ -1499,7 +1573,7 @@ endfunction()
  ##############################################################################
  function(NCBI_internal_process_project_filters _result)
  
-    if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "")
+    if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "" AND NOT "${NCBI_PROJECT}" STREQUAL "")
          foreach(_prj IN LISTS NCBI_PTBCFG_PROJECT_TARGETS)
              if("${_prj}" STREQUAL "")
                  continue()
@@ -1516,7 +1590,7 @@ function(NCBI_internal_process_project_filters _result)
          endforeach()
      endif()
  
-    if(NOT "${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "")
+    if(NOT "${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "" AND NOT "${NCBI_PROJECT}" STREQUAL "")
          set(_alltags ${NCBI__PROJTAG} ${NCBI_${NCBI_PROJECT}_PROJTAG})
          if("${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "-")
              if(NOT "${_alltags}" STREQUAL "")
@@ -1556,6 +1630,11 @@ function(NCBI_internal_process_project_filters _result)
      if(NOT "${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "")
          set(_is_good FALSE)
          set(_hasp FALSE)
+        if(DEFINED NCBI_PTB_THIS_SRC_ROOT)
+            set(_src_root ${NCBI_PTB_THIS_SRC_ROOT})
+        else()
+            set(_src_root ${NCBI_SRC_ROOT})
+        endif()
          foreach(_dir IN LISTS NCBI_PTBCFG_PROJECT_LIST)
              if("${_dir}" STREQUAL "")
                  continue()
@@ -1566,14 +1645,14 @@ function(NCBI_internal_process_project_filters _result)
              endif()
              NCBI_util_parse_sign( ${_dir} _value _negate)
              if(_negate)
-                NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${NCBI_SRC_ROOT}/${_value} _match)
+                NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${_src_root}/${_value} _match)
                  if(_match)
                      set(${_result} FALSE PARENT_SCOPE)
                      return()
                  endif()
              else()
                  set(_hasp TRUE)
-                NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${NCBI_SRC_ROOT}/${_value} _match)
+                NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${_src_root}/${_value} _match)
                  if(_match)
                      set(_is_good TRUE)
                  endif()
@@ -1585,7 +1664,7 @@ function(NCBI_internal_process_project_filters _result)
          endif()
      endif()
  
-    if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "")
+    if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "" AND NOT "${NCBI_PROJECT}" STREQUAL "")
          set(_is_good FALSE)
          set(_hasp FALSE)
          foreach(_prj IN LISTS NCBI_PTBCFG_PROJECT_TARGETS)
@@ -1713,7 +1792,7 @@ function(NCBI_internal_print_report _caption _counter)
      set(_report "")
      foreach( _type IN ITEMS CONSOLEAPP GUIAPP STATIC SHARED CUSTOM)
          get_property(_cnt GLOBAL PROPERTY NCBI_PTBPROP_${_counter}_${_type})
-        if( ${_cnt} GREATER 0)
+        if( NOT "${_cnt}" STREQUAL "" AND "${_cnt}" GREATER 0)
              if( NOT "${_report}" STREQUAL "")
                  string(APPEND _report ",")
              endif()
@@ -1780,7 +1859,7 @@ function(NCBI_internal_add_project)
          get_property(_hosted GLOBAL PROPERTY NCBI_PTBPROP_HOST_${NCBI_PROJECT})
      endif()
  
-    if (NOT NCBI_PTBMODE_PARTS AND NOT NCBI_PTBMODE_COLLECT_DEPS AND NCBI_PTBCFG_ENABLE_COLLECTOR)
+    if (NCBI_PTBCFG_ENABLE_COLLECTOR AND NOT NCBI_PTBMODE_PARTS AND NOT NCBI_PTBMODE_COLLECT_DEPS)
          if(DEFINED NCBI_PTB_ALLOWED_PROJECTS)
              if(NOT ${NCBI_PROJECT} IN_LIST NCBI_PTB_ALLOWED_PROJECTS)
                  if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT})
@@ -1804,6 +1883,19 @@ function(NCBI_internal_add_project)
          endif()
      endif()
  
+    if(NOT NCBI_PTBCFG_ENABLE_COLLECTOR AND NOT NCBI_PTBMODE_PARTS)
+        get_property(_count  GLOBAL PROPERTY NCBI_PTBPROP_TOTAL_${NCBI_${NCBI_PROJECT}_TYPE})
+        math(EXPR _count "${_count} + 1")
+        set_property(GLOBAL PROPERTY NCBI_PTBPROP_TOTAL_${NCBI_${NCBI_PROJECT}_TYPE} ${_count})
+        NCBI_internal_process_project_filters(_allowed)
+        if (NOT _allowed)
+            if ("${ARGC}" GREATER "0")
+                set(${ARGV0} FALSE PARENT_SCOPE)
+            endif()
+            return()
+        endif()
+    endif()
+
       if (NCBI_PTBMODE_COLLECT_DEPS)
          get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DIRECT_DEPS_${NCBI_PROJECT} SET)
          if (_prjdeps AND NOT DEFINED NCBI_${NCBI_PROJECT}_PARTS)
@@ -1918,6 +2010,9 @@ endif()
              endif()
              return()
          endif()
+    elseif(NOT NCBI_PTBCFG_ENABLE_COLLECTOR AND NCBI_PTBMODE_PARTS)
+        set(NCBITMP_PROJECT_PART_IDS  ${NCBITMP_PROJECT_PART_IDS}  ${NCBI_PROJECT_ID}       PARENT_SCOPE )
+        set(NCBITMP_PROJECT_PARTS     ${NCBITMP_PROJECT_PARTS}     ${NCBI_PROJECT_PARTNAME} PARENT_SCOPE )
      endif()
  
  #message("processing ${NCBI_PROJECT_ID}")
@@ -2008,6 +2103,10 @@ endif()
                  message("WARNING: App target ${NCBI_${NCBI_PROJECT}_OUTPUT} (${NCBI_CURRENT_SOURCE_DIR}) cannot be created")
                  message("         because there is already a target with the same name in ${_dir}")
                  message("         App target ${NCBI_${NCBI_PROJECT}_OUTPUT} will be renamed into ${NCBI_PROJECT}")
+            elseif(NOT NCBI_PTBCFG_ENABLE_COLLECTOR)
+                message("WARNING: App target ${NCBI_${NCBI_PROJECT}_OUTPUT} (${NCBI_CURRENT_SOURCE_DIR}) cannot be created")
+                message("         because there is already a target with the same name elsewhere")
+                message("         App target ${NCBI_${NCBI_PROJECT}_OUTPUT} will be renamed into ${NCBI_PROJECT}")
              endif()
          endif()
          set_target_properties(${NCBI_PROJECT} PROPERTIES OUTPUT_NAME ${NCBI_${NCBI_PROJECT}_OUTPUT})
@@ -2022,6 +2121,8 @@ message("  ADDED: ${NCBI_CURRENT_SOURCE_DIR}/${NCBI_PROJECT_ID}")
  message("  NCBITMP_PROJECT_SOURCES ${NCBITMP_PROJECT_SOURCES}")
  message("  NCBITMP_PROJECT_HEADERS ${NCBITMP_PROJECT_HEADERS}")
  message("  NCBITMP_PROJECT_RESOURCES ${NCBITMP_PROJECT_RESOURCES}")
+#message("  NCBI_SRC_ROOT ${NCBI_SRC_ROOT}")
+#message("  NCBI_INC_ROOT ${NCBI_INC_ROOT}")
  endif()
  
      if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "CUSTOM")
diff --git a/c++/src/build-system/cmake/CMake.NCBIptb.ntest.cmake b/c++/src/build-system/cmake/CMake.NCBIptb.ntest.cmake

index 7de1a464588d51e0066f9c3de208e5d1421d1627..5ee24bab7daf85659191a3781ff8118cd4514521 100644 (file)
--- a/c++/src/build-system/cmake/CMake.NCBIptb.ntest.cmake
+++ b/c++/src/build-system/cmake/CMake.NCBIptb.ntest.cmake
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMake.NCBIptb.ntest.cmake 607666 2020-05-06 12:51:46Z ivanov $
+# $Id: CMake.NCBIptb.ntest.cmake 609363 2020-06-01 14:11:57Z ivanov $
  #############################################################################
  #############################################################################
  ##
@@ -98,11 +98,7 @@ endfunction()
  
  ##############################################################################
  function(NCBI_internal_add_ncbi_checktarget)
-    if(DEFINED NCBI_EXTERNAL_TREE_ROOT)
-        set(SCRIPT_NAME "${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_COMMON_SCRIPTS}/check/check_make_unix_cmake.sh")
-    else()
-        set(SCRIPT_NAME "${NCBI_TREE_ROOT}/${NCBI_DIRNAME_COMMON_SCRIPTS}/check/check_make_unix_cmake.sh")
-    endif()
+    set(SCRIPT_NAME "${NCBITK_TREE_ROOT}/${NCBI_DIRNAME_COMMON_SCRIPTS}/check/check_make_unix_cmake.sh")
      set(WORKDIR ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD})
      set(_checkdir ../check)
      set(_checkroot ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/${_checkdir})
diff --git a/c++/src/build-system/cmake/CMake.NCBItoolkit.cmake b/c++/src/build-system/cmake/CMake.NCBItoolkit.cmake

index 691382a129fd1364b7a4bd78c0106eeeaedcd11c..a4d03da487734e201fbcee4dcebc6323d7d8f548 100644 (file)
--- a/c++/src/build-system/cmake/CMake.NCBItoolkit.cmake
+++ b/c++/src/build-system/cmake/CMake.NCBItoolkit.cmake
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMake.NCBItoolkit.cmake 603345 2020-03-10 17:24:45Z ivanov $
+# $Id: CMake.NCBItoolkit.cmake 609379 2020-06-01 14:15:14Z ivanov $
  #############################################################################
  
  if(NOT DEFINED NCBI_TOOLKIT_NCBIPTB_BUILD_SYSTEM_INCLUDED)
@@ -38,7 +38,11 @@ if(NCBI_EXPERIMENTAL)
      set(NCBI_EXPERIMENTAL_SUBDIRS          ON)
      set(NCBI_EXPERIMENTAL_DISABLE_HUNTER   ON)
      set(NCBI_VERBOSE_ALLPROJECTS           OFF)
-    set(NCBI_PTBCFG_ENABLE_COLLECTOR       ON)
+    if(NCBI_PTBCFG_SKIP_ANALYSIS)
+        set(NCBI_PTBCFG_ENABLE_COLLECTOR       OFF)
+    else()
+        set(NCBI_PTBCFG_ENABLE_COLLECTOR       ON)
+    endif()
  
      if(BUILD_SHARED_LIBS)
          if(WIN32 OR XCODE)
@@ -80,32 +84,25 @@ if (WIN32)
  endif()
  endif()
  
-if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
-    set(_prefix "${NCBI_EXTERNAL_TREE_ROOT}/src/")
-else()
-    if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/build-system/cmake/CMake.NCBIptb.cmake")
-        set(_prefix "")
-    elseif (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/build-system/cmake/CMake.NCBIptb.cmake")
-        set(_prefix "src/")
-    else()
-        message(FATAL_ERROR "Cannot find NCBIptb build system in ${CMAKE_SOURCE_DIR}")
-    endif()
+set(_listdir "${CMAKE_CURRENT_LIST_DIR}")
+if (NOT EXISTS "${_listdir}/CMake.NCBIptb.cmake")
+    message(FATAL_ERROR "Cannot find NCBIptb build system in ${_listdir}")
  endif()
  
-include(${_prefix}build-system/cmake/CMakeMacros.cmake)
-include(${_prefix}build-system/cmake/CMakeChecks.cmake)
-include(${_prefix}build-system/cmake/CMake.NCBIptb.cmake)
-include(${_prefix}build-system/cmake/CMake.NCBIptb.ncbi.cmake)
-include(${_prefix}build-system/cmake/CMake.NCBIptb.datatool.cmake)
-include(${_prefix}build-system/cmake/CMake.NCBIptb.grpc.cmake)
-include(${_prefix}build-system/cmake/CMake.NCBIptb.ctest.cmake)
+include(${_listdir}/CMakeMacros.cmake)
+include(${_listdir}/CMakeChecks.cmake)
+include(${_listdir}/CMake.NCBIptb.cmake)
+include(${_listdir}/CMake.NCBIptb.ncbi.cmake)
+include(${_listdir}/CMake.NCBIptb.datatool.cmake)
+include(${_listdir}/CMake.NCBIptb.grpc.cmake)
+include(${_listdir}/CMake.NCBIptb.ctest.cmake)
  if(NCBI_PTBCFG_ADDCHECK)
-    include(${_prefix}build-system/cmake/CMake.NCBIptb.ntest.cmake)
+    include(${_listdir}/CMake.NCBIptb.ntest.cmake)
  endif()
  if(NCBI_PTBCFG_DOINSTALL)
-    include(${_prefix}build-system/cmake/CMake.NCBIptb.install.cmake)
+    include(${_listdir}/CMake.NCBIptb.install.cmake)
  endif()
-include(${_prefix}build-system/cmake/CMake.NCBIptb.legacy.cmake)
+include(${_listdir}/CMake.NCBIptb.legacy.cmake)
  
  if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
      if (EXISTS ${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.cmake)
@@ -116,5 +113,5 @@ if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
      NCBI_import_hostinfo(${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.hostinfo)
  endif()
  
-include(${_prefix}build-system/cmake/CMakeChecks.final-message.cmake)
+include(${_listdir}/CMakeChecks.final-message.cmake)
  endif(NOT DEFINED NCBI_TOOLKIT_NCBIPTB_BUILD_SYSTEM_INCLUDED)
diff --git a/c++/src/build-system/cmake/CMakeChecks.boost.cmake b/c++/src/build-system/cmake/CMakeChecks.boost.cmake

index 9ad928dca6d01c96c57d8d7a73756ddb1a5f7c07..ddf73f2bf13dc9c55b5f64e2c9c2338ada55c0b1 100644 (file)
--- a/c++/src/build-system/cmake/CMakeChecks.boost.cmake
+++ b/c++/src/build-system/cmake/CMakeChecks.boost.cmake
@@ -35,7 +35,7 @@ endif()
  
  #set(Boost_DEBUG ON)
  find_package(Boost
-             COMPONENTS filesystem iostreams date_time regex system serialization
+             COMPONENTS filesystem iostreams date_time regex system serialization thread
               REQUIRED)
  set(CMAKE_PREFIX_PATH ${_foo_CMAKE_PREFIX_PATH})
  
diff --git a/c++/src/build-system/cmake/CMakeChecks.cmake b/c++/src/build-system/cmake/CMakeChecks.cmake

index 5661b7b1b4fd17ef7bad42850e0532de74f6712e..82a779b9c8c2201acfdb1388ec714908144999f8 100644 (file)
--- a/c++/src/build-system/cmake/CMakeChecks.cmake
+++ b/c++/src/build-system/cmake/CMakeChecks.cmake
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMakeChecks.cmake 607666 2020-05-06 12:51:46Z ivanov $
+# $Id: CMakeChecks.cmake 609374 2020-06-01 14:13:44Z ivanov $
  #############################################################################
  #
  # Note:
@@ -13,6 +13,10 @@ if("${CMAKE_GENERATOR}" STREQUAL "Xcode")
      endif()
  endif()
  
+string(TIMESTAMP NCBI_TIMESTAMP_START "%s")
+string(TIMESTAMP _start)
+message("Started: ${_start}")
+
  #############################################################################
  # Source tree description
  #
@@ -37,18 +41,19 @@ set(NCBI_DIRNAME_CMAKECFG ${NCBI_DIRNAME_SRC}/build-system/cmake)
  
  
  if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/build-system/cmake/CMake.NCBIptb.cmake")
-    set(top_src_dir     ${CMAKE_CURRENT_SOURCE_DIR}/..)
-    set(abs_top_src_dir ${CMAKE_CURRENT_SOURCE_DIR}/..)
+    set(_this_root     ${CMAKE_CURRENT_SOURCE_DIR}/..)
  else()
-    set(top_src_dir     ${CMAKE_SOURCE_DIR})
-    set(abs_top_src_dir ${CMAKE_SOURCE_DIR})
+    set(_this_root     ${CMAKE_SOURCE_DIR})
  endif()
-get_filename_component(top_src_dir     "${top_src_dir}"     ABSOLUTE)
-get_filename_component(abs_top_src_dir "${abs_top_src_dir}" ABSOLUTE)
-
-set(NCBI_TREE_ROOT  ${top_src_dir})
-set(NCBI_SRC_ROOT   ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_SRC})
-set(NCBI_INC_ROOT   ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE})
+get_filename_component(_this_root  "${_this_root}"     ABSOLUTE)
+get_filename_component(top_src_dir "${CMAKE_CURRENT_LIST_DIR}/../../.."   ABSOLUTE)
+
+set(NCBI_TREE_ROOT    ${_this_root})
+set(NCBI_SRC_ROOT     ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_SRC})
+set(NCBI_INC_ROOT     ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE})
+set(NCBITK_TREE_ROOT  ${top_src_dir})
+set(NCBITK_SRC_ROOT   ${NCBITK_TREE_ROOT}/${NCBI_DIRNAME_SRC})
+set(NCBITK_INC_ROOT   ${NCBITK_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE})
  if (NOT EXISTS "${NCBI_SRC_ROOT}")
      set(NCBI_SRC_ROOT   ${NCBI_TREE_ROOT})
  endif()
@@ -63,9 +68,9 @@ set(includedir      ${NCBI_INC_ROOT})
  set(incdir          ${CMAKE_BINARY_DIR}/${NCBI_DIRNAME_CFGINC})
  set(incinternal     ${NCBI_INC_ROOT}/${NCBI_DIRNAME_INTERNAL})
  
-
  set(NCBI_DIRNAME_BUILD  build)
-if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
+#if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
+if (OFF)
      string(FIND ${CMAKE_BINARY_DIR} ${NCBI_TREE_ROOT} _pos_root)
      string(FIND ${CMAKE_BINARY_DIR} ${NCBI_SRC_ROOT}  _pos_src)
      if(NOT "${_pos_root}" LESS "0" AND "${_pos_src}" LESS "0" AND NOT "${CMAKE_BINARY_DIR}" STREQUAL "${NCBI_TREE_ROOT}")
@@ -95,9 +100,6 @@ else()
      endif()
  endif()
  endif()
-if (NOT IS_DIRECTORY ${incinternal})
-    set(incinternal     "")
-endif()
  
  if (NCBI_EXPERIMENTAL_CFG)
      set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_RUNTIME}")
@@ -111,26 +113,9 @@ else()
      set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${LIBRARY_OUTPUT_PATH}")
  endif()
  
-if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
-    set(NCBI_TREE_BUILDCFG "${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_BUILDCFG}")
-    set(NCBI_TREE_CMAKECFG "${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_CMAKECFG}")
-    set(NCBI_TREE_COMMON_INCLUDE  ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/common)
-else()
-    set(NCBI_TREE_BUILDCFG "${NCBI_TREE_ROOT}/${NCBI_DIRNAME_BUILDCFG}")
-    set(NCBI_TREE_CMAKECFG "${NCBI_TREE_ROOT}/${NCBI_DIRNAME_CMAKECFG}")
-    set(NCBI_TREE_COMMON_INCLUDE  ${NCBI_INC_ROOT}/common)
-endif()
-if(OFF)
-message("CMAKE_SOURCE_DIR    = ${CMAKE_SOURCE_DIR}")
-message("NCBI_TREE_ROOT      = ${NCBI_TREE_ROOT}")
-message("NCBI_SRC_ROOT       = ${NCBI_SRC_ROOT}")
-message("NCBI_INC_ROOT       = ${NCBI_INC_ROOT}")
-message("NCBI_BUILD_ROOT     = ${NCBI_BUILD_ROOT}")
-message("NCBI_CFGINC_ROOT    = ${NCBI_CFGINC_ROOT}")
-message("NCBI_TREE_BUILDCFG  = ${NCBI_TREE_BUILDCFG}")
-message("NCBI_TREE_CMAKECFG  = ${NCBI_TREE_CMAKECFG}")
-message("NCBI_TREE_COMMON_INCLUDE = ${NCBI_TREE_COMMON_INCLUDE}")
-endif()
+set(NCBI_TREE_CMAKECFG "${CMAKE_CURRENT_LIST_DIR}")
+get_filename_component(NCBI_TREE_BUILDCFG "${CMAKE_CURRENT_LIST_DIR}/.."   ABSOLUTE)
+
  if(EXISTS ${NCBI_TREE_ROOT}/CMake.CustomConfig.txt)
         include(${NCBI_TREE_ROOT}/CMake.CustomConfig.txt)
  endif()
@@ -174,23 +159,38 @@ else()
  endif()
  set(NCBI_DIRNAME_PREBUILT  ${_prebuilt_loc})
  
+set(_tk_includedir      ${NCBITK_INC_ROOT})
+set(_tk_incinternal     ${NCBITK_INC_ROOT}/${NCBI_DIRNAME_INTERNAL})
+set(_inc_dirs)
+foreach( _inc IN ITEMS ${includedir} ${incinternal} ${_tk_includedir} ${_tk_incinternal})
+    if (IS_DIRECTORY ${_inc})
+        list(APPEND _inc_dirs ${_inc})
+    endif()
+endforeach()
+list(REMOVE_DUPLICATES _inc_dirs)
+include_directories(${incdir} ${_inc_dirs})
+include_regular_expression("^.*[.](h|hpp|c|cpp|inl|inc)$")
+if(OFF)
+message("CMAKE_SOURCE_DIR    = ${CMAKE_SOURCE_DIR}")
+message("NCBI_TREE_ROOT      = ${NCBI_TREE_ROOT}")
+message("NCBI_SRC_ROOT       = ${NCBI_SRC_ROOT}")
+message("NCBI_INC_ROOT       = ${NCBI_INC_ROOT}")
+message("NCBITK_TREE_ROOT    = ${NCBITK_TREE_ROOT}")
+message("NCBITK_SRC_ROOT     = ${NCBITK_SRC_ROOT}")
+message("NCBITK_INC_ROOT     = ${NCBITK_INC_ROOT}")
+message("NCBI_BUILD_ROOT     = ${NCBI_BUILD_ROOT}")
+message("NCBI_CFGINC_ROOT    = ${NCBI_CFGINC_ROOT}")
+message("NCBI_TREE_BUILDCFG  = ${NCBI_TREE_BUILDCFG}")
+message("NCBI_TREE_CMAKECFG  = ${NCBI_TREE_CMAKECFG}")
+message("include_directories(${incdir} ${_inc_dirs})")
+endif()
+
  if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
      set(NCBI_EXTERNAL_BUILD_ROOT  ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_PREBUILT})
-
-    if (IS_DIRECTORY ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE})
-        set(_ext_includedir0 ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE})
-        if (IS_DIRECTORY ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/${NCBI_DIRNAME_INTERNAL})
-            set(_ext_incinternal ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/${NCBI_DIRNAME_INTERNAL})
-        endif()
-    endif()
      if (NOT EXISTS ${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.cmake)
          message(FATAL_ERROR "${NCBI_PTBCFG_INSTALL_EXPORT} was not found in ${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}")
      endif()
-    include_directories(${incdir} ${NCBI_INC_ROOT} ${incinternal} ${_ext_includedir0} ${_ext_incinternal})
-else()
-    include_directories(${incdir} ${includedir0} ${incinternal})
  endif()
-include_regular_expression("^.*[.](h|hpp|c|cpp|inl|inc)$")
  
  #set(CMAKE_MODULE_PATH "${NCBI_SRC_ROOT}/build-system/cmake/" ${CMAKE_MODULE_PATH})
  list(APPEND CMAKE_MODULE_PATH "${NCBI_TREE_CMAKECFG}")
@@ -228,7 +228,7 @@ include(${NCBI_TREE_CMAKECFG}/CMake.NCBIComponents.cmake)
  # This sets a version to be used throughout our config process
  # NOTE: Adjust as needed
  #
-set(NCBI_CPP_TOOLKIT_VERSION_MAJOR 23)
+set(NCBI_CPP_TOOLKIT_VERSION_MAJOR 24)
  set(NCBI_CPP_TOOLKIT_VERSION_MINOR 0)
  set(NCBI_CPP_TOOLKIT_VERSION_PATCH 0)
  set(NCBI_CPP_TOOLKIT_VERSION_EXTRA "")
@@ -318,6 +318,7 @@ string(REPLACE ";" " " FEATURES "${NCBI_ALL_COMPONENTS}")
  
  if (NCBI_EXPERIMENTAL_CFG)
  
+    set(_tk_common_include "${NCBITK_INC_ROOT}/common")
      if (WIN32 OR XCODE)
          foreach(_cfg ${NCBI_CONFIGURATION_TYPES})
  
@@ -346,11 +347,15 @@ else()
                  configure_file(${NCBI_TREE_CMAKECFG}/config.cmake.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbiconf_xcode.h)
              endif()
  endif()
-            if (EXISTS ${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in)
-                configure_file(${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbicfg.cfg.c)
+            if (EXISTS ${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in)
+                configure_file(${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbicfg.cfg.c)
+            endif()
+            configure_file(${_tk_common_include}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/ncbi_build_ver.h)
+            if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
+                configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h)
+            else()
+                configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBITK_INC_ROOT}/common/ncbi_revision.h)
              endif()
-            configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/ncbi_build_ver.h)
-            configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h)
          endforeach()
          if(NOT EXISTS ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c)
              file(WRITE ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c "#include <common/config/ncbicfg.cfg.c>\n")
@@ -371,12 +376,17 @@ endif()
  
          set(NCBI_SIGNATURE "${NCBI_COMPILER}_${NCBI_COMPILER_VERSION}-${NCBI_BUILD_TYPE}--${HOST_CPU}-${HOST_OS_WITH_VERSION}-${_local_host_name}")
          configure_file(${NCBI_TREE_CMAKECFG}/config.cmake.h.in ${NCBI_CFGINC_ROOT}/ncbiconf_unix.h)
-        if (EXISTS ${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in)
-            configure_file(${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c)
+        if (EXISTS ${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in)
+            configure_file(${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c)
          endif()
  
-        configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/common/ncbi_build_ver.h)
-        configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h)
+        configure_file(${_tk_common_include}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/common/ncbi_build_ver.h)
+        configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h)
+        if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
+            configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h)
+        else()
+            configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBITK_INC_ROOT}/common/ncbi_revision.h)
+        endif()
      endif()
  
  else (NCBI_EXPERIMENTAL_CFG)
diff --git a/c++/src/build-system/cmake/CMakeChecks.compiler.cmake b/c++/src/build-system/cmake/CMakeChecks.compiler.cmake

index bf8d5f05cf4985e8d3f3a8240b55557df09bfeae..9178e638e14277090577ab4e5d1a6e0e7401cbde 100644 (file)
--- a/c++/src/build-system/cmake/CMakeChecks.compiler.cmake
+++ b/c++/src/build-system/cmake/CMakeChecks.compiler.cmake
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMakeChecks.compiler.cmake 608131 2020-05-12 15:15:17Z ivanov $
+# $Id: CMakeChecks.compiler.cmake 609369 2020-06-01 14:12:55Z ivanov $
  #############################################################################
  #
  # This config is designed to capture all compiler and linker definitions and search parameters
@@ -262,6 +262,7 @@ if (NOT buildconf)
    set(buildconf0 ${CMAKE_BUILD_TYPE})
    set(NCBI_BUILD_TYPE "${CMAKE_BUILD_TYPE}MT64")
  endif (NOT buildconf)
+set(NCBI_CONFIGURATION_TYPES "${CMAKE_BUILD_TYPE}")
  
  if(MaxDebug IN_LIST NCBI_PTBCFG_PROJECT_FEATURES)
      add_definitions(-D_GLIBCXX_DEBUG)
@@ -469,7 +470,7 @@ message(STATUS "NCBI_COMPILER_WRAPPER = ${NCBI_COMPILER_WRAPPER}")
  
  set(CMAKE_SHARED_LINKER_FLAGS_RDYNAMIC "${CMAKE_SHARED_LINKER_FLAGS}") # for smooth transition, please don't use
  set(CMAKE_SHARED_LINKER_FLAGS_ALLOW_UNDEFINED "${CMAKE_SHARED_LINKER_FLAGS}")
-if ((NOT DEFINED ${APPLE}) OR (NOT ${APPLE}))
+if (NOT APPLE)
    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined")
  endif ()
  
@@ -484,7 +485,7 @@ SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
  SET(CMAKE_INSTALL_RPATH "/$ORIGIN/../lib")
  
  #this add RUNPATH to binaries (RPATH is already there anyway), which makes it more like binaries built by C++ Toolkit
-if (NOT WIN32)
+if (NOT WIN32 AND NOT APPLE)
  SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--enable-new-dtags")
  endif()
  
diff --git a/c++/src/build-system/cmake/CMakeLists.top_builddir.txt b/c++/src/build-system/cmake/CMakeLists.top_builddir.txt

index be02b053a6b28c5f2bcc858b20eaf140751e9dcf..b44cf14fa60d351fa9c867b478650ba3c1b252ea 100644 (file)
--- a/c++/src/build-system/cmake/CMakeLists.top_builddir.txt
+++ b/c++/src/build-system/cmake/CMakeLists.top_builddir.txt
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMakeLists.top_builddir.txt 603341 2020-03-10 17:23:52Z ivanov $
+# $Id: CMakeLists.top_builddir.txt 609363 2020-06-01 14:11:57Z ivanov $
  #############################################################################
  ##############################################################################
  
@@ -28,5 +28,5 @@ set(NCBI_PTBCFG_KNOWN_FOLDERS
    ${NCBI_PTBCFG_KNOWN_FOLDERS}
  )
  
-include(build-system/cmake/CMake.NCBItoolkit.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/CMake.NCBItoolkit.cmake)
  NCBI_add_subdirectory( ${NCBI_PTBCFG_KNOWN_FOLDERS})
diff --git a/c++/src/build-system/cmake/cmake-cfg-unix.sh b/c++/src/build-system/cmake/cmake-cfg-unix.sh

index 55bc3291c9b1e277e839da977fa3369ebb122f55..38cd2482185cbac46fa997c8b125d36bded4a77d 100755 (executable)
--- a/c++/src/build-system/cmake/cmake-cfg-unix.sh
+++ b/c++/src/build-system/cmake/cmake-cfg-unix.sh
@@ -1,6 +1,6 @@
  #!/bin/sh
  #############################################################################
-# $Id: cmake-cfg-unix.sh 607664 2020-05-06 12:50:47Z ivanov $
+# $Id: cmake-cfg-unix.sh 609379 2020-06-01 14:15:14Z ivanov $
  #   Configure NCBI C++ toolkit using CMake build system.
  #   Author: Andrei Gourianov, gouriano@ncbi
  #############################################################################
@@ -13,7 +13,7 @@ extension="cmake_configure_ext.sh"
  NCBI_EXPERIMENTAL="ON"
  
  host_os=`uname`
-if test $host_os = "Darwin"; then
+if test -z "${CMAKE_CMD}" -a $host_os = "Darwin"; then
    CMAKE_CMD=/Applications/CMake.app/Contents/bin/cmake
  fi
  if [ -z "${CMAKE_CMD}" ]; then
@@ -30,6 +30,7 @@ BUILD_TYPE="Debug"
  BUILD_SHARED_LIBS="OFF"
  USE_CCACHE="ON"
  USE_DISTCC="ON"
+SKIP_ANALYSIS="OFF"
  
  ############################################################################# 
  Check_function_exists() {
@@ -72,6 +73,7 @@ OPTIONS:
    --with-build-root=name     -- specify a non-default build directory name
    --without-ccache           -- do not use ccache
    --without-distcc           -- do not use distcc
+  --without-analysis         -- skip source tree analysis
    --with-generator="X"       -- use generator X
  EOF
  
@@ -149,21 +151,24 @@ while [ $# -ne 0 ]; do
      --without-distcc)
        USE_DISTCC="OFF"
        ;;
+    --without-analysis)
+      SKIP_ANALYSIS="ON"
+      ;;
      --with-projects=*)
        PROJECT_LIST=${1#*=}
-      if [ -e "${tree_root}/$PROJECT_LIST" ]; then
+      if [ -f "${tree_root}/$PROJECT_LIST" ]; then
          PROJECT_LIST="${tree_root}/$PROJECT_LIST"
        fi
        ;; 
      --with-tags=*)
        PROJECT_TAGS=${1#*=}
-      if [ -e "${tree_root}/$PROJECT_TAGS" ]; then
+      if [ -f "${tree_root}/$PROJECT_TAGS" ]; then
          PROJECT_TAGS="${tree_root}/$PROJECT_TAGS"
        fi
        ;; 
      --with-targets=*)
        PROJECT_TARGETS=${1#*=}
-      if [ -e "${tree_root}/$PROJECT_TARGETS" ]; then
+      if [ -f "${tree_root}/$PROJECT_TARGETS" ]; then
          PROJECT_TARGETS="${tree_root}/$PROJECT_TARGETS"
        fi
        ;; 
@@ -269,6 +274,9 @@ if [ -n "$CC" ]; then
        if test $host_os = "Darwin"; then
          CC_NAME=`$CC --version 2>/dev/null | awk 'NR==1{print $2}'`
          CC_VERSION=`$CC --version 2>/dev/null | awk 'NR==1{print $4}' | sed 's/[.]//g'`
+        if [ $CC_NAME = "clang" ]; then
+          CC_NAME="Clang"
+        fi
        else
          CC_NAME=`$CC --version | awk 'NR==1{print $1}' | tr '[:lower:]' '[:upper:]'`
          ver=`$CC -dumpfullversion 2>/dev/null || $CC -dumpversion 2>/dev/null`
@@ -303,6 +311,7 @@ CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_PTBCFG_PROJECT_LIST=$(Quote "${PROJECT_LIST}")"
  CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_PTBCFG_PROJECT_TAGS=$(Quote "${PROJECT_TAGS}")"
  CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_PTBCFG_PROJECT_TARGETS=$(Quote "${PROJECT_TARGETS}")"
  CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_VERBOSE_PROJECTS=$(Quote "${PROJECT_DETAILS}")"
+CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_PTBCFG_SKIP_ANALYSIS=$(Quote "${SKIP_ANALYSIS}")"
  if [ -n "$INSTALL_PATH" ]; then
    CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_PTBCFG_INSTALL_PATH=$(Quote "${INSTALL_PATH}")"
  fi
diff --git a/c++/src/build-system/cmake/cmake-cfg-vs.bat b/c++/src/build-system/cmake/cmake-cfg-vs.bat

index 5b12e6735b557b1ed3aca2999e71e0a4a5facaef..0c89d50fbf97fe8927d5d79d10bacf6b24fb834f 100644 (file)
--- a/c++/src/build-system/cmake/cmake-cfg-vs.bat
+++ b/c++/src/build-system/cmake/cmake-cfg-vs.bat
@@ -1,7 +1,7 @@
  @echo off
  setlocal ENABLEDELAYEDEXPANSION
  REM #########################################################################
-REM  $Id: cmake-cfg-vs.bat 607666 2020-05-06 12:51:46Z ivanov $
+REM  $Id: cmake-cfg-vs.bat 609379 2020-06-01 14:15:14Z ivanov $
  REM  Configure NCBI C++ toolkit for Visual Studio using CMake build system.
  REM  Author: Andrei Gourianov, gouriano@ncbi
  REM #########################################################################
@@ -30,6 +30,7 @@ REM #########################################################################
  REM defaults
  set BUILD_SHARED_LIBS=OFF
  set VISUAL_STUDIO=2017
+set SKIP_ANALYSIS=OFF
  
  goto :RUN
  REM #########################################################################
@@ -63,6 +64,7 @@ echo                  examples:    --with-components="-Z"
  echo   --with-features="LIST"   -- specify compilation features
  echo                  examples:    --with-features="StrictGI"
  echo   --with-build-root=name   -- specify a non-default build directory name
+echo   --without-analysis       -- skip source tree analysis
  echo   --with-vs=N              -- use Visual Studio N generator 
  echo                  examples:    --with-vs=2017  (default)
  echo                               --with-vs=2019
@@ -128,6 +130,7 @@ if "%1"=="--with-targets"      (set PROJECT_TARGETS=%~2&   shift& goto :CONTINUE
  if "%1"=="--with-details"      (set PROJECT_DETAILS=%~2&   shift& goto :CONTINUEPARSEARGS)
  if "%1"=="--with-vs"           (set VISUAL_STUDIO=%~2&     shift& goto :CONTINUEPARSEARGS)
  if "%1"=="--with-install"      (set INSTALL_PATH=%~2&      shift& goto :CONTINUEPARSEARGS)
+if "%1"=="--without-analysis"  (set SKIP_ANALYSIS=ON&             goto :CONTINUEPARSEARGS)
  if "%1"=="--with-generator"    (set CMAKE_GENERATOR=%~2&   shift& goto :CONTINUEPARSEARGS)
  if "%1"=="--with-prebuilt"     (set prebuilt_dir=%~dp2& set prebuilt_name=%~nx2&   shift& goto :CONTINUEPARSEARGS)
  set unknown=%unknown% %1
@@ -195,17 +198,26 @@ if "%CMAKE_GENERATOR%"=="Visual Studio 14 2015 Win64" (
  
  if not "%PROJECT_LIST%"=="" (
    if exist "%tree_root%\%PROJECT_LIST%" (
-    set PROJECT_LIST=%tree_root%\%PROJECT_LIST%
+    type "%tree_root%\%PROJECT_LIST%" >NUL 2>&1
+    if not errorlevel 1 (
+      set PROJECT_LIST=%tree_root%\%PROJECT_LIST%
+    )
    )
  )
  if not "%PROJECT_TAGS%"=="" (
    if exist "%tree_root%\%PROJECT_TAGS%" (
-    set PROJECT_TAGS=%tree_root%\%PROJECT_TAGS%
+    type "%tree_root%\%PROJECT_TAGS%" >NUL 2>&1
+    if not errorlevel 1 (
+      set PROJECT_TAGS=%tree_root%\%PROJECT_TAGS%
+    )
    )
  )
  if not "%PROJECT_TARGETS%"=="" (
    if exist "%tree_root%\%PROJECT_TARGETS%" (
-    set PROJECT_TARGETS=%tree_root%\%PROJECT_TARGETS%
+    type "%tree_root%\%PROJECT_TARGETS%" >NUL 2>&1
+    if not errorlevel 1 (
+      set PROJECT_TARGETS=%tree_root%\%PROJECT_TARGETS%
+    )
    )
  )
  
@@ -222,6 +234,7 @@ set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_PROJECT_LIST="%PROJECT_LIST%"
  set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_PROJECT_TAGS="%PROJECT_TAGS%"
  set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_PROJECT_TARGETS="%PROJECT_TARGETS%"
  set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_VERBOSE_PROJECTS="%PROJECT_DETAILS%"
+set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_SKIP_ANALYSIS=%SKIP_ANALYSIS%
  if not "%INSTALL_PATH%"=="" (
    set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_INSTALL_PATH="%INSTALL_PATH%"
  )
diff --git a/c++/src/build-system/cmake/cmake-cfg-xcode.sh b/c++/src/build-system/cmake/cmake-cfg-xcode.sh

index 75cfd14b33160ab0506a1824b3e88c2b568bdda0..c48c5ee5bd4b26c081cc6e9ede9b9fdebbe67e9a 100755 (executable)
--- a/c++/src/build-system/cmake/cmake-cfg-xcode.sh
+++ b/c++/src/build-system/cmake/cmake-cfg-xcode.sh
@@ -1,6 +1,6 @@
  #!/bin/sh
  #############################################################################
-# $Id: cmake-cfg-xcode.sh 603557 2020-03-12 16:26:27Z ivanov $
+# $Id: cmake-cfg-xcode.sh 609379 2020-06-01 14:15:14Z ivanov $
  #   Configure NCBI C++ toolkit for XCode using CMake build system.
  #   Author: Andrei Gourianov, gouriano@ncbi
  #############################################################################
@@ -26,6 +26,7 @@ fi
  ############################################################################# 
  # defaults
  BUILD_SHARED_LIBS="OFF"
+SKIP_ANALYSIS="OFF"
  
  ############################################################################# 
  Check_function_exists() {
@@ -62,6 +63,7 @@ OPTIONS:
    --with-features="LIST"     -- specify compilation features
                      examples:   --with-features="StrictGI"
    --with-build-root=name     -- specify a non-default build directory name
+  --without-analysis         -- skip source tree analysis
  EOF
  
    Check_function_exists configure_ext_Usage && configure_ext_Usage
@@ -115,19 +117,19 @@ while [ $# != 0 ]; do
        ;; 
      --with-projects=*)
        PROJECT_LIST=${1#*=}
-      if [ -e "${tree_root}/$PROJECT_LIST" ]; then
+      if [ -f "${tree_root}/$PROJECT_LIST" ]; then
          PROJECT_LIST="${tree_root}/$PROJECT_LIST"
        fi
        ;; 
      --with-tags=*)
        PROJECT_TAGS=${1#*=}
-      if [ -e "${tree_root}/$PROJECT_TAGS" ]; then
+      if [ -f "${tree_root}/$PROJECT_TAGS" ]; then
          PROJECT_TAGS="${tree_root}/$PROJECT_TAGS"
        fi
        ;; 
      --with-targets=*)
        PROJECT_TARGETS=${1#*=}
-      if [ -e "${tree_root}/$PROJECT_TARGETS" ]; then
+      if [ -f "${tree_root}/$PROJECT_TARGETS" ]; then
          PROJECT_TARGETS="${tree_root}/$PROJECT_TARGETS"
        fi
        ;; 
@@ -151,6 +153,9 @@ while [ $# != 0 ]; do
        prebuilt_dir=`dirname $prebuilt_path`
        prebuilt_name=`basename $prebuilt_path`
        ;; 
+    --without-analysis)
+      SKIP_ANALYSIS="ON"
+      ;;
      *) 
        unknown="$unknown $1"
        ;; 
@@ -205,6 +210,7 @@ CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_PTBCFG_PROJECT_LIST=$(Quote "${PROJECT_LIST}")"
  CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_PTBCFG_PROJECT_TAGS=$(Quote "${PROJECT_TAGS}")"
  CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_PTBCFG_PROJECT_TARGETS=$(Quote "${PROJECT_TARGETS}")"
  CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_VERBOSE_PROJECTS=$(Quote "${PROJECT_DETAILS}")"
+CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_PTBCFG_SKIP_ANALYSIS=$(Quote "${SKIP_ANALYSIS}")"
  if [ -n "$INSTALL_PATH" ]; then
    CMAKE_ARGS="$CMAKE_ARGS  -DNCBI_PTBCFG_INSTALL_PATH=$(Quote "${INSTALL_PATH}")"
  fi
diff --git a/c++/src/build-system/cmake/cmake_configure_ext_gpipe.sh b/c++/src/build-system/cmake/cmake_configure_ext_gpipe.sh

index 1a335e5a67613d4c2a09ea6e7c2c329337b5ae71..ffb66054643923c0a6912309da4f71f285d9cb8f 100755 (executable)
--- a/c++/src/build-system/cmake/cmake_configure_ext_gpipe.sh
+++ b/c++/src/build-system/cmake/cmake_configure_ext_gpipe.sh
@@ -1,7 +1,7 @@
  #!/bin/sh
  
  #############################################################################
-# $Id: cmake_configure_ext_gpipe.sh 600386 2020-01-16 17:00:37Z gouriano $
+# $Id: cmake_configure_ext_gpipe.sh 609574 2020-06-03 20:26:39Z whlavina $
  #############################################################################
  
  _ext_check=`type -t Check_function_exists`
@@ -40,28 +40,28 @@ configure_ext_ParseArgs()
        BUILD_TYPE="Release"
        BUILD_SHARED_LIBS="ON"
        PROJECT_FEATURES="${PROJECT_FEATURES};Int8GI"
-      BUILD_ROOT="Release"
+      : "${BUILD_ROOT:=../Release}"
        add_gpipe_warnings
        ;; 
      "--gpipe-dev")
        BUILD_TYPE="Debug"
        BUILD_SHARED_LIBS="ON"
        PROJECT_FEATURES="${PROJECT_FEATURES};StrictGI"
-      BUILD_ROOT="Debug"
+      : "${BUILD_ROOT:=../Debug}"
        add_gpipe_warnings
        ;; 
      "--gpipe-cgi")
        BUILD_TYPE="Release"
        BUILD_SHARED_LIBS="OFF"
        PROJECT_FEATURES="${PROJECT_FEATURES};Int8GI"
-      BUILD_ROOT="Static"
+      : "${BUILD_ROOT:=../Static}"
        add_gpipe_warnings
        ;; 
      "--gpipe-distrib")
        BUILD_TYPE="Release"
        BUILD_SHARED_LIBS="OFF"
        PROJECT_COMPONENTS="${PROJECT_COMPONENTS};-PCRE"
-      BUILD_ROOT="Distrib"
+      : "${BUILD_ROOT:=../Distrib}"
        add_gpipe_warnings
        ;; 
      *) 
diff --git a/c++/src/build-system/config.h.in b/c++/src/build-system/config.h.in

index c207c1c4d109547ee1a59b6cc47bbd7929114533..fd02d1410c53125b0ee86d7899fe9bbe0ac274e6 100644 (file)
--- a/c++/src/build-system/config.h.in
+++ b/c++/src/build-system/config.h.in
@@ -303,6 +303,9 @@
  /* Define to 1 if you have the `lchown' function. */
  #undef HAVE_LCHOWN
  
+/* Define to 1 if libparquet is available. */
+#undef HAVE_LIBAPACHE_ARROW
+
  /* Define to 1 if libavrocpp is available. */
  #undef HAVE_LIBAVRO
  
@@ -321,6 +324,9 @@
  /* Define to 1 if non-public CONNECT extensions are available. */
  #undef HAVE_LIBCONNEXT
  
+/* Define to 1 if libcppkafka is available. */
+#undef HAVE_LIBCPPKAFKA
+
  /* Define to 1 if CRYPT is available, either in its own library or as part of
     the standard libraries. */
  #undef HAVE_LIBCRYPT
@@ -468,6 +474,9 @@
  /* Define to 1 if libprotobuf$PROTOBUF_SFX is available. */
  #undef HAVE_LIBPROTOBUF
  
+/* Define to 1 if librdkafka is available. */
+#undef HAVE_LIBRDKAFKA
+
  /* Define to 1 if RPCSVC is available, either in its own library or as part of
     the standard libraries. */
  #undef HAVE_LIBRPCSVC
@@ -533,6 +542,9 @@
  /* Define to 1 if libz is available. */
  #undef HAVE_LIBZ
  
+/* Define to 1 if libzstd is available. */
+#undef HAVE_LIBZSTD
+
  /* Define to 1 if you have the <limits> header file. */
  #undef HAVE_LIMITS
  
diff --git a/c++/src/build-system/configure b/c++/src/build-system/configure

index 65d42e0d4fd6ef5aca0b759786e56495ee72fcf0..7b0c8344e4e98908e4a9b48a69dc20687a03b4a7 100755 (executable)
--- a/c++/src/build-system/configure
+++ b/c++/src/build-system/configure
@@ -666,6 +666,9 @@ PERL_INCLUDE
  UNLESS_PUBSEQOS
  ncbi_xreader_pubseqos2
  ncbi_xreader_pubseqos
+CPPKAFKA_STATIC_LIBS
+LIBRDKAFKA_STATIC_LIBS
+APACHE_ARROW_STATIC_LIBS
  HIREDIS_STATIC_LIBS
  AWS_SDK_STATIC_LIBS
  MSGSL_INCLUDE
@@ -913,6 +916,12 @@ srcdir
  top_srcdir
  build_root
  signature
+CPPKAFKA_LIBS
+CPPKAFKA_INCLUDE
+LIBRDKAFKA_LIBS
+LIBRDKAFKA_INCLUDE
+APACHE_ARROW_LIBS
+APACHE_ARROW_INCLUDE
  HIREDIS_LIBS
  HIREDIS_INCLUDE
  AWS_SDK_LIBS
@@ -1061,6 +1070,8 @@ MBEDTLS_LIBS
  MBEDTLS_INCLUDE
  PCRE_LIBS
  PCRE_INCLUDE
+ZSTD_LIBS
+ZSTD_INCLUDE
  LZO_LIBS
  LZO_INCLUDE
  BZ2_LIBS
@@ -1243,6 +1254,7 @@ with_backward_cpp_sig
  with_z
  with_bz2
  with_lzo
+with_zstd
  with_pcre
  with_mbedtls
  with_gmp
@@ -1322,6 +1334,9 @@ with_grpc
  with_msgsl
  with_aws_sdk
  with_hiredis
+with_apache_arrow
+with_librdkafka
+with_cppkafka
  with_3psw
  with_local_lbsm
  with_ncbi_crypt
@@ -1889,7 +1904,8 @@ check ncbi-public strip pch caution ccache distcc \
  ncbi-c wxwidgets wxwidgets-ucs fastcgi sss sssdb sssutils included-sss \
  geo included-geo vdb downloaded-vdb static-vdb ngs libunwind libdw \
  backward-cpp backward-cpp-sig \
-z bz2 lzo pcre mbedtls gmp gcrypt nettle gnutls static-gnutls openssl krb5 \
+z bz2 lzo zstd pcre mbedtls \
+gmp gcrypt nettle gnutls static-gnutls openssl krb5 \
  sybase sybase-local sybase-new ftds mysql \
  orbacus freetype ftgl opengl mesa glut glew glew-mx gl2ps \
  bdb python perl jni sqlite3 icu boost boost-tag \
@@ -1900,6 +1916,7 @@ magic curl mimetic gsoap avro cereal sasl2 \
  mongodb mongodb3 leveldb gmock lapack lmdb \
  libuv libssh2 cassandra nghttp2 h2o influxdb \
  libxlsxwriter protobuf grpc msgsl aws-sdk hiredis \
+apache-arrow librdkafka cppkafka \
  3psw local-lbsm ncbi-crypt connext \
  serial objects dbapi app ctools gui algo internal gbench"
  
@@ -1952,7 +1969,7 @@ for x_arg in "$@" ; do
        --srcdir=* | --x-includes=* | --x-libraries=* | --with-tcheck=* \
        | --with-ncbi-c=* | --with-sss=* | --with-vdb=* | --with-ngs=* \
        | --with-libunwind=* | --with-libdw=* | --with-backward-cpp=* \
-      | --with-z=* | --with-bz2=* | --with-lzo=* \
+      | --with-z=* | --with-bz2=* | --with-lzo=* | --with-zstd=* \
        | --with-pcre=* | --with-mbedtls=* \
        | --with-gmp=* | --with-gcrypt=* | --with-nettle=* \
        | --with-gnutls=* | --with-openssl=* | --with-krb5=* \
@@ -2182,6 +2199,8 @@ Optional Packages:
   --without-bz2           use internal copy of bzlib
   --with-lzo=DIR          use LZO installation in DIR (requires 2.x or up)
   --without-lzo           do not use LZO
+ --with-zstd=DIR         use Zstandard installation in DIR
+ --without-zstd          do not use Zstandard
   --with-pcre=DIR         use PCRE installation in DIR
   --without-pcre          use internal copy of PCRE
   --with-mbedtls(=DIR)    use external mbedTLS installation (in DIR)
@@ -2331,6 +2350,12 @@ Optional Packages:
   --without-aws-sdk       do not use the Amazon Web Services SDK
   --with-hiredis=DIR      use Hiredis installation in DIR
   --without-hiredis       do not use Hiredis
+ --with-apache-arrow=DIR use Apache Arrow installation in DIR
+ --without-apache-arrow  do not use Apache Arrow
+ --with-librdkafka=DIR   use librdkafka installation in DIR
+ --without-librdkafka    do not use librdkafka
+ --with-cppkafka=DIR     use cppkafka installation in DIR
+ --without-cppkafka      do not use cppkafka
   --with-3psw=std:netopt  favor standard (system) builds of the above pkgs.
   --without-3psw          do not use any of the above packages
   --without-local-lbsm    turn off support for IPC with locally running LBSMD
@@ -3590,6 +3615,11 @@ case "$with_3psw" in
           else
              with_lzo=no
           fi
+        if test "${with_zstd-no}" != "no"; then
+            as_fn_error $? "incompatible options: --with-zstd but --without-3psw"
+         else
+            with_zstd=no
+         fi
          if test "${with_pcre-no}" != "no"; then
              as_fn_error $? "incompatible options: --with-pcre but --without-3psw"
           else
@@ -3950,6 +3980,21 @@ case "$with_3psw" in
           else
              with_hiredis=no
           fi
+        if test "${with_apache-arrow-no}" != "no"; then
+            as_fn_error $? "incompatible options: --with-apache-arrow but --without-3psw"
+         else
+            with_apache-arrow=no
+         fi
+        if test "${with_librdkafka-no}" != "no"; then
+            as_fn_error $? "incompatible options: --with-librdkafka but --without-3psw"
+         else
+            with_librdkafka=no
+         fi
+        if test "${with_cppkafka-no}" != "no"; then
+            as_fn_error $? "incompatible options: --with-cppkafka but --without-3psw"
+         else
+            with_cppkafka=no
+         fi
  
        { NCBI=; unset NCBI;}
        ;;
@@ -4801,6 +4846,18 @@ if test "${with_lzo+set}" = set; then :
  fi
  
  
+# Check whether --with-zstd was given.
+if test "${with_zstd+set}" = set; then :
+  withval=$with_zstd;
+fi
+
+
+# Check whether --with-zstd was given.
+if test "${with_zstd+set}" = set; then :
+  withval=$with_zstd;
+fi
+
+
  # Check whether --with-pcre was given.
  if test "${with_pcre+set}" = set; then :
    withval=$with_pcre;
@@ -5689,9 +5746,45 @@ if test "${with_hiredis+set}" = set; then :
  fi
  
  
-# Check whether --with-grpc was given.
-if test "${with_grpc+set}" = set; then :
-  withval=$with_grpc;
+# Check whether --with-hiredis was given.
+if test "${with_hiredis+set}" = set; then :
+  withval=$with_hiredis;
+fi
+
+
+# Check whether --with-apache-arrow was given.
+if test "${with_apache_arrow+set}" = set; then :
+  withval=$with_apache_arrow;
+fi
+
+
+# Check whether --with-apache-arrow was given.
+if test "${with_apache_arrow+set}" = set; then :
+  withval=$with_apache_arrow;
+fi
+
+
+# Check whether --with-librdkafka was given.
+if test "${with_librdkafka+set}" = set; then :
+  withval=$with_librdkafka;
+fi
+
+
+# Check whether --with-librdkafka was given.
+if test "${with_librdkafka+set}" = set; then :
+  withval=$with_librdkafka;
+fi
+
+
+# Check whether --with-cppkafka was given.
+if test "${with_cppkafka+set}" = set; then :
+  withval=$with_cppkafka;
+fi
+
+
+# Check whether --with-cppkafka was given.
+if test "${with_cppkafka+set}" = set; then :
+  withval=$with_cppkafka;
  fi
  
  
@@ -17543,6 +17636,152 @@ if test -n "$LZO_LIBS" -a "x$with_bin_release" = xyes \
     LZO_LIBS="$LZO_LIBPATH -llzo2-static"
  fi
  
+if test -d "$ZSTD_PATH"; then
+   ncbi_fix_dir_tmp=`if cd $ZSTD_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+    /.*) ncbi_fix_dir_tmp2=`cd $ZSTD_PATH && $smart_pwd 2>/dev/null`
+         if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+            ZSTD_PATH=$ncbi_fix_dir_tmp2
+         else
+            case "$ZSTD_PATH" in
+               /*) ;;
+               * ) ZSTD_PATH=$ncbi_fix_dir_tmp ;;
+            esac
+         fi
+         ;;
+    /*) ZSTD_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+fi
+if test "$with_zstd" != "no"; then
+    case "$ZSTD_PATH:$with_zstd" in
+       *:yes | *: | $with_zstd* ) ;;
+       * ) ZSTD_PATH=$with_zstd ;;
+    esac
+    if test "$ZSTD_PATH" != /usr -a -d "$ZSTD_PATH"; then
+       in_path=" in $ZSTD_PATH"
+       if test -z "$ZSTD_INCLUDE" -a -d "$ZSTD_PATH/include"; then
+          ZSTD_INCLUDE="-I$ZSTD_PATH/include"
+       fi
+       if test -n "$ZSTD_LIBPATH"; then
+          :
+       elif test -d "$ZSTD_PATH/lib${bit64_sfx}"; then
+          ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+    for x in $ZSTD_PATH/lib${bit64_sfx}; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+    done
+    ZSTD_LIBPATH="${ncbi_rp_L_flags}"
+ else
+    ncbi_rp_R_flags=
+    ncbi_rp_R_sep=" $CONF_f_runpath"
+    for x in $ZSTD_PATH/lib${bit64_sfx}; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+       x=`echo $x | sed -e "$ncbi_rpath_sed"`
+       ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+       ncbi_rp_R_sep=:
+    done
+    ZSTD_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+       elif test -d "$ZSTD_PATH/lib"; then
+          ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+    for x in $ZSTD_PATH/lib; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+    done
+    ZSTD_LIBPATH="${ncbi_rp_L_flags}"
+ else
+    ncbi_rp_R_flags=
+    ncbi_rp_R_sep=" $CONF_f_runpath"
+    for x in $ZSTD_PATH/lib; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+       x=`echo $x | sed -e "$ncbi_rpath_sed"`
+       ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+       ncbi_rp_R_sep=:
+    done
+    ZSTD_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+       fi
+       ZSTD_LIBS="$ZSTD_LIBPATH -lzstd "
+    else
+       ZSTD_INCLUDE=""
+       ZSTD_LIBS="-lzstd "
+       in_path=
+    fi
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libzstd$in_path" >&5
+$as_echo_n "checking for libzstd$in_path... " >&6; }
+if ${ncbi_cv_lib_zstd+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  CPPFLAGS=" $ZSTD_INCLUDE $orig_CPPFLAGS"
+       LIBS="$ZSTD_LIBS  $orig_LIBS"
+       cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <zstd.h>
+int
+main ()
+{
+ZSTD_CCtx* cctx = ZSTD_createCCtx();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  ncbi_cv_lib_zstd=yes
+else
+  ncbi_cv_lib_zstd=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_zstd" >&5
+$as_echo "$ncbi_cv_lib_zstd" >&6; }
+    if test "$ncbi_cv_lib_zstd" = "no"; then
+       if test "${with_zstd:=no}" != no; then
+       as_fn_error $? "--with-zstd explicitly specified, but no usable version found." "$LINENO" 5
+    fi
+    fi
+ fi
+ if test "$with_zstd" = "no"; then
+    ZSTD_PATH="No_ZSTD"
+    ZSTD_INCLUDE=
+    ZSTD_LIBS=
+ else
+              WithPackages="$WithPackages${WithPackagesSep}ZSTD"; WithPackagesSep=" "
+    ZSTD_INCLUDE=" $ZSTD_INCLUDE"
+
+$as_echo "#define HAVE_LIBZSTD 1" >>confdefs.h
+
+ fi
+
+
+
+
  if test -z "$PCRE_PATH"  &&  pcre-config --version >/dev/null 2>&1; then
      p=`pcre-config --prefix`
      test "x$p" = "x/usr"  ||  PCRE_PATH=$p
@@ -28462,8 +28701,13 @@ fi
  
  ## FreeType and FTGL
  if test "$with_freetype" != "no" ; then
-   : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin}
-   # Extract the first word of "freetype-config", so it can be a program name with args.
+   ft2pc="env PKG_CONFIG_PATH=$FREETYPE_PATH/lib/pkgconfig pkg-config freetype2"
+   if $ft2pc --exists >/dev/null 2>&1; then
+      freetype_config=$ft2pc
+      FREETYPE_PATH=`$ft2pc --variable=exec_prefix`
+   else
+      : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin}
+      # Extract the first word of "freetype-config", so it can be a program name with args.
  set dummy freetype-config; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
@@ -28504,8 +28748,8 @@ $as_echo "no" >&6; }
  fi
  
  
+   fi
     if test -n "$freetype_config" ; then
-      : ${FREETYPE_BINPATH=`dirname $freetype_config`}
        : ${FREETYPE_INCLUDE=`$freetype_config --cflags`}
        if test -z "${FREETYPE_LIBS+set}"; then
      if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
@@ -32463,12 +32707,15 @@ if test -n "$GRPC_PATH"; then
     done
  fi
  if $grpc_pc grpc++ --exists 2>/dev/null; then
+   GRPC_SED=sed
     if test -f "$GRPC_PATH/lib/libboringssl.a"; then
        GRPC_SED="sed -e s/-lssl/-lboringssl/g -e s/-lcrypto/-lboringcrypto/g"
-   elif test -f /usr/lib/libssl.dylib -a \
+   fi
+   if test -f /usr/lib/libssl.dylib -a \
               x"`$grpc_pc grpc++ --variable=prefix`" != x/sw; then
-      GRPC_SED="sed -e s,-L/sw/lib,,"
-   else
+      GRPC_SED="$GRPC_SED -e s,-L/sw/lib,,"
+   fi
+   if test "$GRPC_SED" = sed; then
        GRPC_SED=cat
     fi
     GRPC_CONFIG_LIBS="`$grpc_pc grpc++ grpc --libs | $GRPC_SED`"
@@ -32651,10 +32898,12 @@ if test -n "$GRPC_LIBS"; then
     if test -n "$GRPC_CONFIG_LIBS"; then
        GRPC_LIBS="$GRPC_CONFIG_LIBS $PROTOBUF_LIBS $GRPC_LDEP"
        GRPC_UNSECURE_LIBS="`$grpc_pc grpc++_unsecure grpc_unsecure --libs`"
-      case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in
-         *:::*" -lupb "* ) ;;
-         *" -lupb "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -lupb" ;;
-      esac
+      for x in address_sorting upb cares; do
+         case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in
+            *:::*" -l$x "* ) ;;
+            *" -l$x "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -l$x" ;;
+         esac
+      done
        GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS $PROTOBUF_LIBS $GRPC_LDEP"
     else
        LDFLAGS="$orig_LDFLAGS $GRPC_LIBPATH"
@@ -32827,6 +33076,20 @@ if test -d "$AWS_SDK_PATH"; then
        fi
     done
  fi
+AWS_SDK_LDEP=
+AWS_SDK_STATIC_LDEP=
+for d in "$AWS_SDK_PATH/lib$bit64_sfx" "$AWS_SDK_PATH/lib" \
+         /usr/lib/$multiarch /usr/lib$bit64_sfx /usr/lib \
+         /usr/local/lib$bit64_sfx /usr/local/lib; do
+    if test -f "$d/libaws-cpp-sdk-s3.a"; then
+       AWS_SDK_LIBDIR=$d
+       if test -f "$AWS_SDK_LIBDIR/libaws-c-event-stream.a"; then
+          AWS_SDK_LDEP="-laws-c-event-stream -laws-checksums -laws-c-common"
+          AWS_SDK_STATIC_LDEP="-laws-c-event-stream-static -laws-checksums-static -laws-c-common-static"
+       fi
+       break
+    fi
+done
  if test "$with_aws_sdk" != "no"; then
      case "$AWS_SDK_PATH:$with_aws_sdk" in
         *:yes | *: | $with_aws_sdk* ) ;;
@@ -32902,10 +33165,10 @@ if test "$with_aws_sdk" != "no"; then
      AWS_SDK_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
   fi
         fi
-       AWS_SDK_LIBS="$AWS_SDK_LIBPATH -laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core"
+       AWS_SDK_LIBS="$AWS_SDK_LIBPATH -laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core $AWS_SDK_LDEP"
      else
         AWS_SDK_INCLUDE=""
-       AWS_SDK_LIBS="-laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core"
+       AWS_SDK_LIBS="-laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core $AWS_SDK_LDEP"
         in_path=
      fi
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libaws-cpp-sdk-s3$in_path" >&5
@@ -32959,8 +33222,8 @@ $as_echo "#define HAVE_LIBAWS_SDK 1" >>confdefs.h
  
  
  if test "$with_aws_sdk" != no -a \
-     -f "$AWS_SDK_PATH/lib$bit64_sfx/libaws-cpp-sdk-s3-static.a"; then
-    AWS_SDK_STATIC_LIBS="-L$AWS_SDK_PATH/lib -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static"
+     -f "$AWS_SDK_LIBDIR/libaws-cpp-sdk-s3-static.a"; then
+    AWS_SDK_STATIC_LIBS="-L$AWS_SDK_LIBDIR -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static $AWS_SDK_STATIC_LDEP"
  else
      AWS_SDK_STATIC_LIBS=$AWS_SDK_LIBS
  fi
@@ -33101,6 +33364,550 @@ else
      HIREDIS_STATIC_LIBS=$HIREDIS_LIBS
  fi
  
+case "$with_apache_arrow" in
+   yes | no | '' ) ;;
+   *             ) APACHE_ARROW_PATH=$with_apache_arrow ;;
+esac
+if test -d "$APACHE_ARROW_PATH"; then
+   ncbi_fix_dir_tmp=`if cd $APACHE_ARROW_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+    /.*) ncbi_fix_dir_tmp2=`cd $APACHE_ARROW_PATH && $smart_pwd 2>/dev/null`
+         if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+            APACHE_ARROW_PATH=$ncbi_fix_dir_tmp2
+         else
+            case "$APACHE_ARROW_PATH" in
+               /*) ;;
+               * ) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;;
+            esac
+         fi
+         ;;
+    /*) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+   for d in "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$asan_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$asan_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+      if test -d "$d"; then
+         APACHE_ARROW_PATH=$d
+         ncbi_fix_dir_tmp=`if cd $APACHE_ARROW_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+    /.*) ncbi_fix_dir_tmp2=`cd $APACHE_ARROW_PATH && $smart_pwd 2>/dev/null`
+         if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+            APACHE_ARROW_PATH=$ncbi_fix_dir_tmp2
+         else
+            case "$APACHE_ARROW_PATH" in
+               /*) ;;
+               * ) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;;
+            esac
+         fi
+         ;;
+    /*) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+         break
+      fi
+   done
+fi
+
+if test "$with_apache_arrow" != "no"; then
+    case "$APACHE_ARROW_PATH:$with_apache_arrow" in
+       *:yes | *: | $with_apache_arrow* ) ;;
+       * ) APACHE_ARROW_PATH=$with_apache_arrow ;;
+    esac
+    if test "$APACHE_ARROW_PATH" != /usr -a -d "$APACHE_ARROW_PATH"; then
+       in_path=" in $APACHE_ARROW_PATH"
+       if test -z "$APACHE_ARROW_INCLUDE" -a -d "$APACHE_ARROW_PATH/include"; then
+          APACHE_ARROW_INCLUDE="-I$APACHE_ARROW_PATH/include"
+       fi
+       if test -n "$APACHE_ARROW_LIBPATH"; then
+          :
+       elif test -d "$APACHE_ARROW_PATH/lib${bit64_sfx}"; then
+          ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+    for x in $APACHE_ARROW_PATH/lib${bit64_sfx}; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+    done
+    APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}"
+ else
+    ncbi_rp_R_flags=
+    ncbi_rp_R_sep=" $CONF_f_runpath"
+    for x in $APACHE_ARROW_PATH/lib${bit64_sfx}; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+       x=`echo $x | sed -e "$ncbi_rpath_sed"`
+       ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+       ncbi_rp_R_sep=:
+    done
+    APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+       elif test -d "$APACHE_ARROW_PATH/lib"; then
+          ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+    for x in $APACHE_ARROW_PATH/lib; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+    done
+    APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}"
+ else
+    ncbi_rp_R_flags=
+    ncbi_rp_R_sep=" $CONF_f_runpath"
+    for x in $APACHE_ARROW_PATH/lib; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+       x=`echo $x | sed -e "$ncbi_rpath_sed"`
+       ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+       ncbi_rp_R_sep=:
+    done
+    APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+       fi
+       APACHE_ARROW_LIBS="$APACHE_ARROW_LIBPATH -lparquet -larrow"
+    else
+       APACHE_ARROW_INCLUDE=""
+       APACHE_ARROW_LIBS="-lparquet -larrow"
+       in_path=
+    fi
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libparquet$in_path" >&5
+$as_echo_n "checking for libparquet$in_path... " >&6; }
+if ${ncbi_cv_lib_apache_arrow+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  CPPFLAGS=" $APACHE_ARROW_INCLUDE $orig_CPPFLAGS"
+       LIBS="$APACHE_ARROW_LIBS  $orig_LIBS"
+       cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <parquet/api/reader.h>
+int
+main ()
+{
+parquet::ParquetFileReader pfr;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  ncbi_cv_lib_apache_arrow=yes
+else
+  ncbi_cv_lib_apache_arrow=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_apache_arrow" >&5
+$as_echo "$ncbi_cv_lib_apache_arrow" >&6; }
+    if test "$ncbi_cv_lib_apache_arrow" = "no"; then
+       if test "${with_apache_arrow:=no}" != no; then
+       as_fn_error $? "--with-apache_arrow explicitly specified, but no usable version found." "$LINENO" 5
+    fi
+    fi
+ fi
+ if test "$with_apache_arrow" = "no"; then
+    APACHE_ARROW_PATH="No_APACHE_ARROW"
+    APACHE_ARROW_INCLUDE=
+    APACHE_ARROW_LIBS=
+ else
+              WithPackages="$WithPackages${WithPackagesSep}APACHE_ARROW"; WithPackagesSep=" "
+    APACHE_ARROW_INCLUDE=" $APACHE_ARROW_INCLUDE"
+
+$as_echo "#define HAVE_LIBAPACHE_ARROW 1" >>confdefs.h
+
+ fi
+
+
+
+if test "$with_apache_arrow" != no -a \
+     -f "$APACHE_ARROW_LIBDIR/libparquet-static.a"; then
+    APACHE_ARROW_STATIC_LIBS="-L$APACHE_ARROW_LIBDIR -lparquet-static -larrow-static -larrow_bundled_dependencies-static $BZ2_LIBS $Z_LIBS -lzstd"
+else
+    APACHE_ARROW_STATIC_LIBS=$APACHE_ARROW_LIBS
+fi
+
+case "$with_librdkafka" in
+   yes | no | '' ) ;;
+   *             ) LIBRDKAFKA_PATH=$with_librdkafka ;;
+esac
+if test -d "$LIBRDKAFKA_PATH"; then
+   ncbi_fix_dir_tmp=`if cd $LIBRDKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+    /.*) ncbi_fix_dir_tmp2=`cd $LIBRDKAFKA_PATH && $smart_pwd 2>/dev/null`
+         if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+            LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp2
+         else
+            case "$LIBRDKAFKA_PATH" in
+               /*) ;;
+               * ) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+            esac
+         fi
+         ;;
+    /*) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+   for d in "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+            "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+      if test -d "$d"; then
+         LIBRDKAFKA_PATH=$d
+         ncbi_fix_dir_tmp=`if cd $LIBRDKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+    /.*) ncbi_fix_dir_tmp2=`cd $LIBRDKAFKA_PATH && $smart_pwd 2>/dev/null`
+         if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+            LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp2
+         else
+            case "$LIBRDKAFKA_PATH" in
+               /*) ;;
+               * ) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+            esac
+         fi
+         ;;
+    /*) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+         break
+      fi
+   done
+fi
+
+if test "$with_librdkafka" != "no"; then
+    case "$LIBRDKAFKA_PATH:$with_librdkafka" in
+       *:yes | *: | $with_librdkafka* ) ;;
+       * ) LIBRDKAFKA_PATH=$with_librdkafka ;;
+    esac
+    if test "$LIBRDKAFKA_PATH" != /usr -a -d "$LIBRDKAFKA_PATH"; then
+       in_path=" in $LIBRDKAFKA_PATH"
+       if test -z "$LIBRDKAFKA_INCLUDE" -a -d "$LIBRDKAFKA_PATH/include"; then
+          LIBRDKAFKA_INCLUDE="-I$LIBRDKAFKA_PATH/include"
+       fi
+       if test -n "$LIBRDKAFKA_LIBPATH"; then
+          :
+       elif test -d "$LIBRDKAFKA_PATH/lib${bit64_sfx}"; then
+          ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+    for x in $LIBRDKAFKA_PATH/lib${bit64_sfx}; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+    done
+    LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}"
+ else
+    ncbi_rp_R_flags=
+    ncbi_rp_R_sep=" $CONF_f_runpath"
+    for x in $LIBRDKAFKA_PATH/lib${bit64_sfx}; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+       x=`echo $x | sed -e "$ncbi_rpath_sed"`
+       ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+       ncbi_rp_R_sep=:
+    done
+    LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+       elif test -d "$LIBRDKAFKA_PATH/lib"; then
+          ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+    for x in $LIBRDKAFKA_PATH/lib; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+    done
+    LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}"
+ else
+    ncbi_rp_R_flags=
+    ncbi_rp_R_sep=" $CONF_f_runpath"
+    for x in $LIBRDKAFKA_PATH/lib; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+       x=`echo $x | sed -e "$ncbi_rpath_sed"`
+       ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+       ncbi_rp_R_sep=:
+    done
+    LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+       fi
+       LIBRDKAFKA_LIBS="$LIBRDKAFKA_LIBPATH -lrdkafka "
+    else
+       LIBRDKAFKA_INCLUDE=""
+       LIBRDKAFKA_LIBS="-lrdkafka "
+       in_path=
+    fi
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for librdkafka$in_path" >&5
+$as_echo_n "checking for librdkafka$in_path... " >&6; }
+if ${ncbi_cv_lib_librdkafka+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  CPPFLAGS=" $LIBRDKAFKA_INCLUDE $orig_CPPFLAGS"
+       LIBS="$LIBRDKAFKA_LIBS  $orig_LIBS"
+       cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <librdkafka/rdkafka.h>
+int
+main ()
+{
+rd_kafka_conf_t *conf = rd_kafka_conf_new();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  ncbi_cv_lib_librdkafka=yes
+else
+  ncbi_cv_lib_librdkafka=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_librdkafka" >&5
+$as_echo "$ncbi_cv_lib_librdkafka" >&6; }
+    if test "$ncbi_cv_lib_librdkafka" = "no"; then
+       if test "${with_librdkafka:=no}" != no; then
+       as_fn_error $? "--with-librdkafka explicitly specified, but no usable version found." "$LINENO" 5
+    fi
+    fi
+ fi
+ if test "$with_librdkafka" = "no"; then
+    LIBRDKAFKA_PATH="No_LIBRDKAFKA"
+    LIBRDKAFKA_INCLUDE=
+    LIBRDKAFKA_LIBS=
+ else
+              WithPackages="$WithPackages${WithPackagesSep}LIBRDKAFKA"; WithPackagesSep=" "
+    LIBRDKAFKA_INCLUDE=" $LIBRDKAFKA_INCLUDE"
+
+$as_echo "#define HAVE_LIBRDKAFKA 1" >>confdefs.h
+
+ fi
+
+
+
+if test "$with_librdkafka" != no -a \
+     -f "$LIBRDKAFKA_PATH/lib$bit64_sfx/librdkafka-static.a"; then
+   LIBRDKAFKA_STATIC_LIBS="-L$LIBRDKAFKA_PATH/lib$bit64_sfx -lrdkafka-static"
+else
+   LIBRDKAFKA_STATIC_LIBS=$LIBRDKAFKA_LIBS
+fi
+
+case "$with_cppkafka" in
+   yes | no | '' ) ;;
+   *             ) CPPKAFKA_PATH=$with_cppkafka ;;
+esac
+if test -d "$CPPKAFKA_PATH"; then
+   ncbi_fix_dir_tmp=`if cd $CPPKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+    /.*) ncbi_fix_dir_tmp2=`cd $CPPKAFKA_PATH && $smart_pwd 2>/dev/null`
+         if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+            CPPKAFKA_PATH=$ncbi_fix_dir_tmp2
+         else
+            case "$CPPKAFKA_PATH" in
+               /*) ;;
+               * ) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+            esac
+         fi
+         ;;
+    /*) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+   for d in "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+            "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+      if test -d "$d"; then
+         CPPKAFKA_PATH=$d
+         ncbi_fix_dir_tmp=`if cd $CPPKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+    /.*) ncbi_fix_dir_tmp2=`cd $CPPKAFKA_PATH && $smart_pwd 2>/dev/null`
+         if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+            CPPKAFKA_PATH=$ncbi_fix_dir_tmp2
+         else
+            case "$CPPKAFKA_PATH" in
+               /*) ;;
+               * ) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+            esac
+         fi
+         ;;
+    /*) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+         break
+      fi
+   done
+fi
+
+if test "$with_cppkafka" != "no"; then
+    case "$CPPKAFKA_PATH:$with_cppkafka" in
+       *:yes | *: | $with_cppkafka* ) ;;
+       * ) CPPKAFKA_PATH=$with_cppkafka ;;
+    esac
+    if test "$CPPKAFKA_PATH" != /usr -a -d "$CPPKAFKA_PATH"; then
+       in_path=" in $CPPKAFKA_PATH"
+       if test -z "$CPPKAFKA_INCLUDE" -a -d "$CPPKAFKA_PATH/include"; then
+          CPPKAFKA_INCLUDE="-I$CPPKAFKA_PATH/include"
+       fi
+       if test -n "$CPPKAFKA_LIBPATH"; then
+          :
+       elif test -d "$CPPKAFKA_PATH/lib${bit64_sfx}"; then
+          ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+    for x in $CPPKAFKA_PATH/lib${bit64_sfx}; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+    done
+    CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}"
+ else
+    ncbi_rp_R_flags=
+    ncbi_rp_R_sep=" $CONF_f_runpath"
+    for x in $CPPKAFKA_PATH/lib${bit64_sfx}; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+       x=`echo $x | sed -e "$ncbi_rpath_sed"`
+       ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+       ncbi_rp_R_sep=:
+    done
+    CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+       elif test -d "$CPPKAFKA_PATH/lib"; then
+          ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+    for x in $CPPKAFKA_PATH/lib; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+    done
+    CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}"
+ else
+    ncbi_rp_R_flags=
+    ncbi_rp_R_sep=" $CONF_f_runpath"
+    for x in $CPPKAFKA_PATH/lib; do
+       case "$x" in
+          /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+             continue
+             ;;
+       esac
+       ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+       ncbi_rp_L_sep=" $CONF_f_libpath"
+       x=`echo $x | sed -e "$ncbi_rpath_sed"`
+       ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+       ncbi_rp_R_sep=:
+    done
+    CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+       fi
+       CPPKAFKA_LIBS="$CPPKAFKA_LIBPATH -lcppkafka $LIBRDKAFKA_LIBS"
+    else
+       CPPKAFKA_INCLUDE=""
+       CPPKAFKA_LIBS="-lcppkafka $LIBRDKAFKA_LIBS"
+       in_path=
+    fi
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libcppkafka$in_path" >&5
+$as_echo_n "checking for libcppkafka$in_path... " >&6; }
+if ${ncbi_cv_lib_cppkafka+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  CPPFLAGS="$LIBRDKAFKA_INCLUDE $CPPKAFKA_INCLUDE $orig_CPPFLAGS"
+       LIBS="$CPPKAFKA_LIBS  $orig_LIBS"
+       cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <cppkafka/configuration.h>
+int
+main ()
+{
+cppkafka::Configuration cfg; cfg.set("foo", "bar");
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  ncbi_cv_lib_cppkafka=yes
+else
+  ncbi_cv_lib_cppkafka=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_cppkafka" >&5
+$as_echo "$ncbi_cv_lib_cppkafka" >&6; }
+    if test "$ncbi_cv_lib_cppkafka" = "no"; then
+       if test "${with_cppkafka:=no}" != no; then
+       as_fn_error $? "--with-cppkafka explicitly specified, but no usable version found." "$LINENO" 5
+    fi
+    fi
+ fi
+ if test "$with_cppkafka" = "no"; then
+    CPPKAFKA_PATH="No_CPPKAFKA"
+    CPPKAFKA_INCLUDE=
+    CPPKAFKA_LIBS=
+ else
+              WithPackages="$WithPackages${WithPackagesSep}CPPKAFKA"; WithPackagesSep=" "
+    CPPKAFKA_INCLUDE="$LIBRDKAFKA_INCLUDE $CPPKAFKA_INCLUDE"
+
+$as_echo "#define HAVE_LIBCPPKAFKA 1" >>confdefs.h
+
+ fi
+
+
+
+if test "$with_cppkafka" != no -a \
+     -f "$CPPKAFKA_PATH/lib$bit64_sfx/libcppkafka-static.a"; then
+   CPPKAFKA_STATIC_LIBS="-L$CPPKAFKA_PATH/lib$bit64_sfx -lcppkafka-static $LIBRDKAFKA_STATIC_LIBS"
+else
+   CPPKAFKA_STATIC_LIBS=$CPPKAFKA_LIBS
+fi
+
  ### Restore original compiler/linker flags
  LIBS="$orig_LIBS"
  CPPFLAGS="$orig_CPPFLAGS"
@@ -33526,7 +34333,7 @@ for x in ChaosMonkey Int8GI StrictGI PSGLoader GCC KCC ICC AppleClang LLVMClang
            ;;
        esac
     done
-  for x in UUID FUSE Iconv LIBUNWIND LIBDW BACKWARD_CPP Z LocalZ BZ2 LocalBZ2 LZO PCRE LocalPCRE MBEDTLS GMP GCRYPT NETTLE GNUTLS OPENSSL KRB5 CURL Sybase DBLib FreeTDS MySQL BerkeleyDB BerkeleyDB++ ODBC PYTHON PYTHON25 PYTHON26 PYTHON27 PYTHON3 PERL Boost.Chrono Boost.Filesystem Boost.Iostreams Boost.Program-Options Boost.Regex Boost.Serialization Boost.Spirit Boost.System Boost.Test Boost.Test.Included Boost.Thread C-Toolkit OpenGL MESA GLUT GLEW wxWidgets wx2.8 Fast-CGI LocalSSS LocalMSGMAIL2 SSSUTILS LocalNCBILS NCBILS2 SSSDB SP ORBacus ICU EXPAT SABLOT LIBXML LIBXSLT LIBEXSLT Xerces Xalan Zorba SQLITE3 SQLITE3ASYNC VDB NGS OECHEM SGE MUPARSER HDF5 JPEG PNG TIFF GIF UNGIF XPM GL2PS FreeType FTGL MAGIC MIMETIC GSOAP AVRO Cereal SASL2 MONGODB MONGODB3 LEVELDB GMOCK LAPACK LMDB LocalLMDB LIBUV LIBSSH2 CASSANDRA NGHTTP2 H2O INFLUXDB LIBXLSXWRITER PROTOBUF GRPC MSGSL AWS_SDK HIREDIS; do
+  for x in UUID FUSE Iconv LIBUNWIND LIBDW BACKWARD_CPP Z LocalZ BZ2 LocalBZ2 LZO ZSTD PCRE LocalPCRE MBEDTLS GMP GCRYPT NETTLE GNUTLS OPENSSL KRB5 CURL Sybase DBLib FreeTDS MySQL BerkeleyDB BerkeleyDB++ ODBC PYTHON PYTHON25 PYTHON26 PYTHON27 PYTHON3 PERL Boost.Chrono Boost.Filesystem Boost.Iostreams Boost.Program-Options Boost.Regex Boost.Serialization Boost.Spirit Boost.System Boost.Test Boost.Test.Included Boost.Thread C-Toolkit OpenGL MESA GLUT GLEW wxWidgets wx2.8 Fast-CGI LocalSSS LocalMSGMAIL2 SSSUTILS LocalNCBILS NCBILS2 SSSDB SP ORBacus ICU EXPAT SABLOT LIBXML LIBXSLT LIBEXSLT Xerces Xalan Zorba SQLITE3 SQLITE3ASYNC VDB NGS OECHEM SGE MUPARSER HDF5 JPEG PNG TIFF GIF UNGIF XPM GL2PS FreeType FTGL MAGIC MIMETIC GSOAP AVRO Cereal SASL2 MONGODB MONGODB3 LEVELDB GMOCK LAPACK LMDB LocalLMDB LIBUV LIBSSH2 CASSANDRA NGHTTP2 H2O INFLUXDB LIBXLSXWRITER PROTOBUF GRPC MSGSL AWS_SDK HIREDIS APACHE_ARROW LIBRDKAFKA CPPKAFKA; do
        case " $WithPackages " in
           *" $x "*) ;;
           *) WithoutPackages="$WithoutPackages$WithoutPackagesSep$x"
@@ -33935,6 +34742,9 @@ c_ncbi_runpath=`echo "$ncbi_runpath" | sed -e 's:\\$\\$:\\$:g'`
  
  
  
+
+
+
  
  
  
diff --git a/c++/src/build-system/configure.ac b/c++/src/build-system/configure.ac

index cb3fe35e59a11d5201b100faf587c63518aa15f5..9c72b8ea25c575770c8cdf51abdc5cce6c7163f9 100644 (file)
--- a/c++/src/build-system/configure.ac
+++ b/c++/src/build-system/configure.ac
@@ -1,5 +1,5 @@
  #############################################################################
-#  $Id: configure.ac 608058 2020-05-11 16:30:05Z ivanov $
+#  $Id: configure.ac 616396 2020-09-15 18:22:00Z ivanov $
  #  Derived from configure.in version 1.173.
  # ==========================================================================
  #
@@ -65,7 +65,7 @@ case "$with_3psw" in
           with_ncbi_c=no
        fi
        m4_foreach(X, [sss, sssutils, sssdb, vdb, ngs, libunwind,
-                     z, bz2, lzo, pcre, mbedtls,
+                     z, bz2, lzo, zstd, pcre, mbedtls,
                       gmp, gcrypt, nettle, gnutls, openssl, krb5, boost, lmdb,
                       sybase, ftds, mysql, opengl, mesa, glut, glew, gl2ps,
                       wxwidgets, freetype, ftgl, fastcgi, bdb, orbacus, odbc,
@@ -75,7 +75,8 @@ case "$with_3psw" in
                       curl, gsoap, avro, cereal, sasl2,
                       mongodb, mongodb3, leveldb, gmock, lapack,
                       libuv, libssh2, cassandra, nghttp2, h2o, influxdb,
-                     libxlsxwriter, protobuf, grpc, msgsl, aws-sdk, hiredis],
+                     libxlsxwriter, protobuf, grpc, msgsl, aws-sdk, hiredis,
+                     apache-arrow, librdkafka, cppkafka],
          [if test "${[with_]X-no}" != "no"; then
              AC_MSG_ERROR([incompatible options: --with-]X[ but --without-3psw])
           else
@@ -283,6 +284,10 @@ AC_ARG_WITH(lzo,
     [ --with-lzo=DIR          use LZO installation in DIR (requires 2.x or up)])
  AC_ARG_WITH(lzo,
     [ --without-lzo           do not use LZO])
+AC_ARG_WITH(zstd,
+   [ --with-zstd=DIR         use Zstandard installation in DIR])
+AC_ARG_WITH(zstd,
+   [ --without-zstd          do not use Zstandard])
  AC_ARG_WITH(pcre,
     [ --with-pcre=DIR         use PCRE installation in DIR])
  AC_ARG_WITH(pcre,
@@ -579,8 +584,20 @@ AC_ARG_WITH(aws-sdk,
     [ --without-aws-sdk       do not use the Amazon Web Services SDK])
  AC_ARG_WITH(hiredis,
     [ --with-hiredis=DIR      use Hiredis installation in DIR])
-AC_ARG_WITH(grpc,
+AC_ARG_WITH(hiredis,
     [ --without-hiredis       do not use Hiredis])
+AC_ARG_WITH(apache-arrow,
+   [ --with-apache-arrow=DIR use Apache Arrow installation in DIR])
+AC_ARG_WITH(apache-arrow,
+   [ --without-apache-arrow  do not use Apache Arrow])
+AC_ARG_WITH(librdkafka,
+   [ --with-librdkafka=DIR   use librdkafka installation in DIR])
+AC_ARG_WITH(librdkafka,
+   [ --without-librdkafka    do not use librdkafka])
+AC_ARG_WITH(cppkafka,
+   [ --with-cppkafka=DIR     use cppkafka installation in DIR])
+AC_ARG_WITH(cppkafka,
+   [ --without-cppkafka      do not use cppkafka])
  AC_ARG_WITH(3psw,
     [ --with-3psw=std:netopt  favor standard (system) builds of the above pkgs.])
  AC_ARG_WITH(3psw,
@@ -638,7 +655,8 @@ check ncbi-public strip pch caution ccache distcc \
  ncbi-c wxwidgets wxwidgets-ucs fastcgi sss sssdb sssutils included-sss \
  geo included-geo vdb downloaded-vdb static-vdb ngs libunwind libdw \
  backward-cpp backward-cpp-sig \
-z bz2 lzo pcre mbedtls gmp gcrypt nettle gnutls static-gnutls openssl krb5 \
+z bz2 lzo zstd pcre mbedtls \
+gmp gcrypt nettle gnutls static-gnutls openssl krb5 \
  sybase sybase-local sybase-new ftds mysql \
  orbacus freetype ftgl opengl mesa glut glew glew-mx gl2ps \
  bdb python perl jni sqlite3 icu boost boost-tag \
@@ -649,6 +667,7 @@ magic curl mimetic gsoap avro cereal sasl2 \
  mongodb mongodb3 leveldb gmock lapack lmdb \
  libuv libssh2 cassandra nghttp2 h2o influxdb \
  libxlsxwriter protobuf grpc msgsl aws-sdk hiredis \
+apache-arrow librdkafka cppkafka \
  3psw local-lbsm ncbi-crypt connext \
  serial objects dbapi app ctools gui algo internal gbench"
  
@@ -703,7 +722,7 @@ for x_arg in "$@" ; do
        --srcdir=* | --x-includes=* | --x-libraries=* | --with-tcheck=* \
        | --with-ncbi-c=* | --with-sss=* | --with-vdb=* | --with-ngs=* \
        | --with-libunwind=* | --with-libdw=* | --with-backward-cpp=* \
-      | --with-z=* | --with-bz2=* | --with-lzo=* \
+      | --with-z=* | --with-bz2=* | --with-lzo=* | --with-zstd=* \
        | --with-pcre=* | --with-mbedtls=* \
        | --with-gmp=* | --with-gcrypt=* | --with-nettle=* \
        | --with-gnutls=* | --with-openssl=* | --with-krb5=* \
@@ -4517,6 +4536,13 @@ if test -n "$LZO_LIBS" -a "x$with_bin_release" = xyes \
     LZO_LIBS="$LZO_LIBPATH -llzo2-static"
  fi
  
+if test -d "$ZSTD_PATH"; then
+   NCBI_FIX_DIR(ZSTD_PATH)
+fi
+NCBI_CHECK_THIRD_PARTY_LIB(zstd,
+ [AC_LANG_PROGRAM([@%:@include <zstd.h>],
+      [[ZSTD_CCtx* cctx = ZSTD_createCCtx();]])])
+
  if test -z "$PCRE_PATH"  &&  pcre-config --version >/dev/null 2>&1; then
      p=`pcre-config --prefix`
      test "x$p" = "x/usr"  ||  PCRE_PATH=$p
@@ -7638,11 +7664,16 @@ fi
  
  ## FreeType and FTGL
  if test "$with_freetype" != "no" ; then
-   : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin}
-   AC_PATH_PROG(freetype_config, freetype-config, [],
-                [$FREETYPE_BINPATH:$PATH])
+   ft2pc="env PKG_CONFIG_PATH=$FREETYPE_PATH/lib/pkgconfig pkg-config freetype2"
+   if $ft2pc --exists >/dev/null 2>&1; then
+      freetype_config=$ft2pc
+      FREETYPE_PATH=`$ft2pc --variable=exec_prefix`
+   else
+      : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin}
+      AC_PATH_PROG(freetype_config, freetype-config, [],
+                   [$FREETYPE_BINPATH:$PATH])
+   fi
     if test -n "$freetype_config" ; then
-      : ${FREETYPE_BINPATH=`dirname $freetype_config`}
        : ${FREETYPE_INCLUDE=`$freetype_config --cflags`}
        NCBI_RPATHIFY_OUTPUT_COND(FREETYPE_LIBS, $freetype_config --libs,
           [$no_usr_lib])
@@ -8321,12 +8352,15 @@ if test -n "$GRPC_PATH"; then
     done
  fi
  if $grpc_pc grpc++ --exists 2>/dev/null; then
+   GRPC_SED=sed
     if test -f "$GRPC_PATH/lib/libboringssl.a"; then
        GRPC_SED="sed -e s/-lssl/-lboringssl/g -e s/-lcrypto/-lboringcrypto/g"
-   elif test -f /usr/lib/libssl.dylib -a \
+   fi
+   if test -f /usr/lib/libssl.dylib -a \
               x"`$grpc_pc grpc++ --variable=prefix`" != x/sw; then
-      GRPC_SED="sed -e s,-L/sw/lib,,"
-   else
+      GRPC_SED="$GRPC_SED -e s,-L/sw/lib,,"
+   fi
+   if test "$GRPC_SED" = sed; then
        GRPC_SED=cat
     fi
     GRPC_CONFIG_LIBS="`$grpc_pc grpc++ grpc --libs | $GRPC_SED`"
@@ -8368,10 +8402,12 @@ if test -n "$GRPC_LIBS"; then
     if test -n "$GRPC_CONFIG_LIBS"; then
        GRPC_LIBS="$GRPC_CONFIG_LIBS $PROTOBUF_LIBS $GRPC_LDEP"
        GRPC_UNSECURE_LIBS="`$grpc_pc grpc++_unsecure grpc_unsecure --libs`"
-      case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in
-         *:::*" -lupb "* ) ;;
-         *" -lupb "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -lupb" ;;
-      esac
+      for x in address_sorting upb cares; do
+         case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in
+            *:::*" -l$x "* ) ;;
+            *" -l$x "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -l$x" ;;
+         esac
+      done
        GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS $PROTOBUF_LIBS $GRPC_LDEP"
     else
        LDFLAGS="$orig_LDFLAGS $GRPC_LIBPATH"
@@ -8425,15 +8461,30 @@ if test -d "$AWS_SDK_PATH"; then
        fi
     done
  fi
+AWS_SDK_LDEP=
+AWS_SDK_STATIC_LDEP=
+for d in "$AWS_SDK_PATH/lib$bit64_sfx" "$AWS_SDK_PATH/lib" \
+         /usr/lib/$multiarch /usr/lib$bit64_sfx /usr/lib \
+         /usr/local/lib$bit64_sfx /usr/local/lib; do
+    if test -f "$d/libaws-cpp-sdk-s3.a"; then
+       AWS_SDK_LIBDIR=$d
+       if test -f "$AWS_SDK_LIBDIR/libaws-c-event-stream.a"; then
+          AWS_SDK_LDEP="-laws-c-event-stream -laws-checksums -laws-c-common"
+          AWS_SDK_STATIC_LDEP="-laws-c-event-stream-static -laws-checksums-static -laws-c-common-static"
+       fi
+       break
+    fi
+done
  NCBI_CHECK_THIRD_PARTY_LIB_EX(aws_sdk, AWS_SDK, aws-cpp-sdk-s3,
     [AC_LANG_PROGRAM([[@%:@include <aws/s3/S3Client.h>
                        @%:@include <aws/ec2/EC2Client.h>]],
         [[Aws::S3::S3Client s3cli;
           Aws::EC2::EC2Client ec2cli;]])],
-   [-laws-cpp-sdk-ec2 -laws-cpp-sdk-core], [$CURL_LIBS $OPENSSL_LIBS $Z_LIBS])
+   [-laws-cpp-sdk-ec2 -laws-cpp-sdk-core $AWS_SDK_LDEP],
+   [$CURL_LIBS $OPENSSL_LIBS $Z_LIBS])
  if test "$with_aws_sdk" != no -a \
-     -f "$AWS_SDK_PATH/lib$bit64_sfx/libaws-cpp-sdk-s3-static.a"; then
-    AWS_SDK_STATIC_LIBS="-L$AWS_SDK_PATH/lib -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static"
+     -f "$AWS_SDK_LIBDIR/libaws-cpp-sdk-s3-static.a"; then
+    AWS_SDK_STATIC_LIBS="-L$AWS_SDK_LIBDIR -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static $AWS_SDK_STATIC_LDEP"
  else
      AWS_SDK_STATIC_LIBS=$AWS_SDK_LIBS
  fi
@@ -8448,6 +8499,96 @@ else
      HIREDIS_STATIC_LIBS=$HIREDIS_LIBS
  fi
  
+case "$with_apache_arrow" in
+   yes | no | '' ) ;;
+   *             ) APACHE_ARROW_PATH=$with_apache_arrow ;;
+esac
+if test -d "$APACHE_ARROW_PATH"; then
+   NCBI_FIX_DIR(APACHE_ARROW_PATH)
+   for d in "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$asan_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$asan_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+            "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+      if test -d "$d"; then
+         APACHE_ARROW_PATH=$d
+         NCBI_FIX_DIR(APACHE_ARROW_PATH)
+         break
+      fi
+   done
+fi
+
+NCBI_CHECK_THIRD_PARTY_LIB_EX(apache_arrow, APACHE_ARROW, parquet,
+  [AC_LANG_PROGRAM([[@%:@include <parquet/api/reader.h>]],
+     [[parquet::ParquetFileReader pfr;]])],
+  [-larrow])
+if test "$with_apache_arrow" != no -a \
+     -f "$APACHE_ARROW_LIBDIR/libparquet-static.a"; then
+    APACHE_ARROW_STATIC_LIBS="-L$APACHE_ARROW_LIBDIR -lparquet-static -larrow-static -larrow_bundled_dependencies-static $BZ2_LIBS $Z_LIBS -lzstd"
+else
+    APACHE_ARROW_STATIC_LIBS=$APACHE_ARROW_LIBS
+fi
+
+case "$with_librdkafka" in
+   yes | no | '' ) ;;
+   *             ) LIBRDKAFKA_PATH=$with_librdkafka ;;
+esac
+if test -d "$LIBRDKAFKA_PATH"; then
+   NCBI_FIX_DIR(LIBRDKAFKA_PATH)
+   for d in "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+            "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+      if test -d "$d"; then
+         LIBRDKAFKA_PATH=$d
+         NCBI_FIX_DIR(LIBRDKAFKA_PATH)
+         break
+      fi
+   done
+fi
+
+NCBI_CHECK_THIRD_PARTY_LIB_EX(librdkafka, LIBRDKAFKA, rdkafka,
+  [AC_LANG_PROGRAM([[@%:@include <librdkafka/rdkafka.h>]],
+      [[rd_kafka_conf_t *conf = rd_kafka_conf_new();]])])
+if test "$with_librdkafka" != no -a \
+     -f "$LIBRDKAFKA_PATH/lib$bit64_sfx/librdkafka-static.a"; then
+   LIBRDKAFKA_STATIC_LIBS="-L$LIBRDKAFKA_PATH/lib$bit64_sfx -lrdkafka-static"
+else
+   LIBRDKAFKA_STATIC_LIBS=$LIBRDKAFKA_LIBS
+fi
+
+case "$with_cppkafka" in
+   yes | no | '' ) ;;
+   *             ) CPPKAFKA_PATH=$with_cppkafka ;;
+esac
+if test -d "$CPPKAFKA_PATH"; then
+   NCBI_FIX_DIR(CPPKAFKA_PATH)
+   for d in "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+            "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+            "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+      if test -d "$d"; then
+         CPPKAFKA_PATH=$d
+         NCBI_FIX_DIR(CPPKAFKA_PATH)
+         break
+      fi
+   done
+fi
+
+NCBI_CHECK_THIRD_PARTY_LIB(cppkafka,
+  [AC_LANG_PROGRAM([[@%:@include <cppkafka/configuration.h>]],
+      [[cppkafka::Configuration cfg; cfg.set("foo", "bar");]])],
+  [$LIBRDKAFKA_LIBS], [], [$LIBRDKAFKA_INCLUDE])
+if test "$with_cppkafka" != no -a \
+     -f "$CPPKAFKA_PATH/lib$bit64_sfx/libcppkafka-static.a"; then
+   CPPKAFKA_STATIC_LIBS="-L$CPPKAFKA_PATH/lib$bit64_sfx -lcppkafka-static $LIBRDKAFKA_STATIC_LIBS"
+else
+   CPPKAFKA_STATIC_LIBS=$CPPKAFKA_LIBS
+fi
+
  ### Restore original compiler/linker flags
  LIBS="$orig_LIBS"
  CPPFLAGS="$orig_CPPFLAGS"
@@ -9208,6 +9349,9 @@ AC_SUBST(GRPC_BIN)
  AC_SUBST(MSGSL_INCLUDE)
  AC_SUBST(AWS_SDK_STATIC_LIBS)
  AC_SUBST(HIREDIS_STATIC_LIBS)
+AC_SUBST(APACHE_ARROW_STATIC_LIBS)
+AC_SUBST(LIBRDKAFKA_STATIC_LIBS)
+AC_SUBST(CPPKAFKA_STATIC_LIBS)
  AC_SUBST(ncbi_xreader_pubseqos)
  AC_SUBST(ncbi_xreader_pubseqos2)
  AC_SUBST(UNLESS_PUBSEQOS)
diff --git a/c++/src/build-system/install.sh.in b/c++/src/build-system/install.sh.in

index a256f94d135861828c95bc8bc8cccce6c40a36ce..bd2b07bf5776e4680586a8f1c867a800a9f6326c 100644 (file)
--- a/c++/src/build-system/install.sh.in
+++ b/c++/src/build-system/install.sh.in
@@ -17,7 +17,7 @@
  
  echo "[`date`]"
  
-svn_location=`echo '$HeadURL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.10.1/c++/src/build-system/install.sh.in $' | sed "s%\\$[H]eadURL: *\\([^$][^$]*\\) \\$.*%\\1%"`
+svn_location=`echo '$HeadURL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.11.0/c++/src/build-system/install.sh.in $' | sed "s%\\$[H]eadURL: *\\([^$][^$]*\\) \\$.*%\\1%"`
  svn_revision=`echo '$Revision: 541872 $' | sed "s%\\$[R]evision: *\\([^$][^$]*\\) \\$.*%\\1%"`
  
  script_name=`basename $0`
diff --git a/c++/src/build-system/ncbi_package_version b/c++/src/build-system/ncbi_package_version

index 8bbb6e406a7332cc0a8793e19dae32dc6fc1c0d4..46b81d815a23b1a6b60bc9160f21295a5f9e4e75 100644 (file)
--- a/c++/src/build-system/ncbi_package_version
+++ b/c++/src/build-system/ncbi_package_version
@@ -1 +1 @@
-2.10.1
+2.11.0
diff --git a/c++/src/build-system/project_tree_builder.ini b/c++/src/build-system/project_tree_builder.ini

index c103c0f7dee1aaaa42298b7f32a6cba9b970c5db..839d9042a2de7a8fd6d04eae39199389d33d8ae7 100644 (file)
--- a/c++/src/build-system/project_tree_builder.ini
+++ b/c++/src/build-system/project_tree_builder.ini
@@ -1,4 +1,4 @@
-#  $Id: project_tree_builder.ini 607715 2020-05-06 17:37:02Z ivanov $
+#  $Id: project_tree_builder.ini 617210 2020-09-28 17:22:08Z ivanov $
  ###############################################################################
  
  
@@ -27,7 +27,7 @@ ThirdParty_C_ncbi = \\\\snowman\\win-coremake\\Lib\\Ncbi\\C\\$(msvc_3rd)\\c.sc-2
  
  #----------------------------------------------------------------------------
  # Location of custom code generators
-CustomCodeGenerator.proto = \\\\snowman\\win-coremake\\Lib\\ThirdParty\\grpc\\$(msvc_3rd)\\1.21.1-ncbi1\\bin\\ReleaseDLL
+CustomCodeGenerator.proto = \\\\snowman\\win-coremake\\Lib\\ThirdParty\\grpc\\$(msvc_3rd)\\1.28.1\\bin\\ReleaseDLL
  XCode_CustomCodeGenerator.proto = /netopt/ncbi_tools/grpc-1.28.1-ncbi1/Release/bin
  
  #----------------------------------------------------------------------------
@@ -217,7 +217,7 @@ ThirdParty_GIF          = $(ThirdPartyBasePath)\\gif\\$(msvc_3rd)\\4.1.3
  ThirdParty_GLEW         = $(ThirdPartyBasePath)\\glew\\$(msvc_3rd)\\1.5.8
  ThirdParty_GL2PS        = $(ThirdPartyBasePath)\\gl2ps\\$(msvc_3rd)\\1.4.0
  ThirdParty_GNUTLS       = $(ThirdPartyBasePath)\\gnutls\\$(msvc_3rd)\\3.4.9
-ThirdParty_GRPC         = $(ThirdPartyBasePath)\\grpc\\$(msvc_3rd)\\1.21.1-ncbi1
+ThirdParty_GRPC         = $(ThirdPartyBasePath)\\grpc\\$(msvc_3rd)\\1.28.1
  ThirdParty_INFLUXDB     = $(ThirdPartyBasePath)\\influxdb\\$(msvc_3rd)\\20190426
  ###ThirdParty_ICU          = $(ThirdPartyBasePath)\\icu\\$(msvc_3rd)\\3.2
  ThirdParty_JDK          = $(ThirdPartyBasePath)\\jdk\\1.6.0_25
@@ -243,7 +243,7 @@ ThirdParty_Xerces       = $(ThirdPartyBasePath)\\xerces\\$(msvc_3rd)\\2.8.0
  ThirdParty_XML          = $(ThirdPartyBasePath)\\xml\\$(msvc_3rd)\\2.7.8
  ThirdParty_XSLT         = $(ThirdPartyBasePath)\\xslt\\$(msvc_3rd)\\1.1.26
  ThirdParty_Z            = $(ThirdPartyBasePath)\\z\\$(msvc_3rd)\\1.2.11
-ThirdParty_VDB          = $(ThirdPartyVDBBasePath)\\vdb\\vdb-versions\\2.10.5
+ThirdParty_VDB          = $(ThirdPartyVDBBasePath)\\vdb\\vdb-versions\\2.10.8
  
  PYTHON_PATH = $(ThirdPartyAppsBasePath)\\Python252\\$(msvc_3rd)
  
@@ -475,7 +475,7 @@ ThirdParty_GL2PS      = $(XCode_ThirdPartyBasePath)/gl2ps-1.4.0
  ThirdParty_wxWidgets  = $(XCode_ThirdPartyBasePath)/wxWidgets-3.1.3-ncbi1
  ThirdParty_FreeType   = /opt/X11
  ThirdParty_FTGL       = $(XCode_ThirdPartyBasePath)/ftgl-2.1.3-rc5
-ThirdParty_VDB        = $(XCode_ThirdPartyVDBBasePath)/vdb/vdb-versions/2.10.5
+ThirdParty_VDB        = $(XCode_ThirdPartyVDBBasePath)/vdb/vdb-versions/2.10.8
  ThirdParty_GMP        = $(Xcode_ThirdPartyBasePath)/gmp-6.0.0a
  ThirdParty_Nettle     = $(Xcode_ThirdPartyBasePath)/nettle-3.1.1
  ThirdParty_GNUTLS     = $(Xcode_ThirdPartyBasePath)/gnutls-3.4.0
@@ -2116,20 +2116,20 @@ INCLUDE = $(ThirdParty_GRPC)\\include
  DEFINES = _WIN32_WINNT=0x0600
  [GRPC.debug]
  LIBPATH = $(ThirdParty_GRPC)\\lib\\DebugDLL
-LIB     = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobufd.lib boringssl.lib boringcrypto.lib
+LIB     = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobufd.lib upb.lib crypto.lib ssl.lib absl_throw_delegate.lib absl_strings.lib absl_bad_optional_access.lib  absl_str_format_internal.lib absl_raw_logging_internal.lib absl_int128.lib
  [GRPC.release]
  LIBPATH = $(ThirdParty_GRPC)\\lib\\ReleaseDLL
-LIB     = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobuf.lib boringssl.lib boringcrypto.lib
+LIB     = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobuf.lib  upb.lib crypto.lib ssl.lib absl_throw_delegate.lib absl_strings.lib absl_bad_optional_access.lib  absl_str_format_internal.lib absl_raw_logging_internal.lib absl_int128.lib
  [GRPC.xcode]
  INCLUDE = $(ThirdParty_GRPC)/include
  [GRPC.xcode.debug]
  INCLUDE = $(ThirdParty_GRPC)/Debug/include
  LIBPATH = $(ThirdParty_GRPC)/Debug/lib
-LIB     = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobufd -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lssl -lcrypto
+LIB     = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobufd -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lboringssl -lboringcrypto
  [GRPC.xcode.release]
  INCLUDE = $(ThirdParty_GRPC)/Release/include
  LIBPATH = $(ThirdParty_GRPC)/Release/lib
-LIB     = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobuf -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lssl -lcrypto
+LIB     = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobuf -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lboringssl -lboringcrypto
  
  [HAVE_LIBGRPC]
  Component = PROTOBUF GRPC
diff --git a/c++/src/build-system/relocate.sh.in b/c++/src/build-system/relocate.sh.in

index f9d4b2f6751513399009cf08c684ab3d39dc54dc..1ee97fb47f667acfc91e2176c93890b2355fde31 100644 (file)
--- a/c++/src/build-system/relocate.sh.in
+++ b/c++/src/build-system/relocate.sh.in
@@ -1,6 +1,6 @@
  @script_shell@
  
-# $Id: relocate.sh.in 608163 2020-05-12 16:03:04Z blastadm $
+# $Id: relocate.sh.in 617724 2020-10-06 07:11:17Z blastadm $
  # Author:  Denis Vakatov, NCBI 
  # 
  #  Adjust paths to this build tree and the relevant source tree
diff --git a/c++/src/corelib/ncbi_param.cpp b/c++/src/corelib/ncbi_param.cpp

index 3b2f6af84ed19b87917d3e86e67250dc174d8872..6fde6d82ccc78c86a6e1e91df2d7015f0fe7c5e2 100644 (file)
--- a/c++/src/corelib/ncbi_param.cpp
+++ b/c++/src/corelib/ncbi_param.cpp
@@ -1,4 +1,4 @@
-/*  $Id: ncbi_param.cpp 598497 2019-12-10 14:23:27Z grichenk $
+/*  $Id: ncbi_param.cpp 608309 2020-05-14 12:35:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -446,6 +446,7 @@ string NCBI_XNCBI_EXPORT g_GetConfigString(const char* section,
          }
      }
      const char* dvalue = default_value? default_value: "";
+    if ( src ) *src = default_value? CParamBase::eSource_Default: CParamBase::eSource_NotSet;
  #ifdef NCBI_PARAM_ENABLE_CONFIG_DUMP
      if ( s_CanDumpConfig() ) {
          if ( section  &&  *section ) {
diff --git a/c++/src/corelib/ncbi_stack.cpp b/c++/src/corelib/ncbi_stack.cpp

index 5b9361e4fc7a834e530d81b640825987f6650318..510ffd50d1445a04f6d8fdfa726018bd94163843 100644 (file)
--- a/c++/src/corelib/ncbi_stack.cpp
+++ b/c++/src/corelib/ncbi_stack.cpp
@@ -1,4 +1,4 @@
-/*  $Id: ncbi_stack.cpp 569055 2018-08-15 17:40:18Z vasilche $
+/*  $Id: ncbi_stack.cpp 613683 2020-08-11 17:27:52Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -119,6 +119,11 @@ void CStackTrace::x_ExpandStackTrace(void) const
  }
  
  
+static const vector<string> s_StackFilters {
+    "ncbi::CStackTrace::", "ncbi::CStackTraceImpl::", "ncbi::CException::",
+    "backward::"
+};
+
  void CStackTrace::Write(CNcbiOstream& os) const
  {
      x_ExpandStackTrace();
@@ -129,7 +134,16 @@ void CStackTrace::Write(CNcbiOstream& os) const
      }
  
      ITERATE(TStack, it, m_Stack) {
-        os << m_Prefix << it->AsString() << endl;
+        string s = it->AsString();
+        bool skip = false;
+        for (auto filter : s_StackFilters) {
+            if (s.find(filter) != NPOS) {
+                skip = true;
+                break;
+            }
+        }
+        if (skip) continue;
+        os << m_Prefix << s << endl;
      }
  }
  
diff --git a/c++/src/corelib/ncbi_system.cpp b/c++/src/corelib/ncbi_system.cpp

index 4bb589c17e6515430b3246c574b6198c90a80e98..b7beb86f005f6fd0105839ed6934474f9358a481 100644 (file)
--- a/c++/src/corelib/ncbi_system.cpp
+++ b/c++/src/corelib/ncbi_system.cpp
@@ -1,4 +1,4 @@
-/* $Id: ncbi_system.cpp 601275 2020-02-04 21:52:35Z vakatov $
+/* $Id: ncbi_system.cpp 613789 2020-08-12 18:02:48Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -71,6 +71,10 @@ extern "C" {
  #  define HAVE_MADVISE 1
  #endif //NCBI_OS_UNIX
  
+#if defined(NCBI_OS_LINUX)
+#  include <sched.h>
+#endif
+
  #ifdef NCBI_OS_DARWIN
  extern "C" {
  #  include <mach/mach.h>
@@ -253,7 +257,7 @@ static bool s_SetExitHandler(TLimitsPrintHandler handler,
  
  /////////////////////////////////////////////////////////////////////////////
  //
-// SetHeapLimit
+// Memory limits
  //
  
  #ifdef USE_SETMEMLIMIT
@@ -287,10 +291,12 @@ bool SetMemoryLimit(size_t max_size,
          rl.rlim_cur = rl.rlim_max = RLIM_INFINITY;
      }
      if (setrlimit(RLIMIT_DATA, &rl) != 0) {
+        CNcbiError::SetFromErrno();
          return false;
      }
  #  if !defined(NCBI_OS_SOLARIS)
      if (setrlimit(RLIMIT_AS, &rl) != 0) {
+        CNcbiError::SetFromErrno();
          return false;
      }
  #  endif //NCBI_OS_SOLARIS
@@ -320,6 +326,7 @@ bool SetMemoryLimitSoft(size_t max_size,
  
      rlimit rl;
      if (getrlimit(RLIMIT_DATA, &rl) != 0) {
+        CNcbiError::SetFromErrno();
          return false;
      }
      if ( max_size ) {
@@ -328,15 +335,18 @@ bool SetMemoryLimitSoft(size_t max_size,
          rl.rlim_cur = RLIM_INFINITY;
      }
      if (setrlimit(RLIMIT_DATA, &rl) != 0) {
+        CNcbiError::SetFromErrno();
          return false;
      }
  #  if !defined(NCBI_OS_SOLARIS)
      rlimit rlas;
      if (getrlimit(RLIMIT_AS, &rlas) != 0) {
+        CNcbiError::SetFromErrno();
          return false;
      }
      rl.rlim_max = rlas.rlim_max;
      if (setrlimit(RLIMIT_AS, &rl) != 0) {
+        CNcbiError::SetFromErrno();
          return false;
      }
  #  endif //NCBI_OS_SOLARIS
@@ -366,6 +376,7 @@ bool SetMemoryLimitHard(size_t max_size,
      size_t cur_soft_limit = 0;
      rlimit rl;
      if (getrlimit(RLIMIT_DATA, &rl) != 0) {
+        CNcbiError::SetFromErrno();
          return false;
      }
      if ( max_size ) {
@@ -378,11 +389,13 @@ bool SetMemoryLimitHard(size_t max_size,
          rl.rlim_max = RLIM_INFINITY;
      }
      if (setrlimit(RLIMIT_DATA, &rl) != 0) {
+        CNcbiError::SetFromErrno();
          return false;
      }
  #  if !defined(NCBI_OS_SOLARIS)
      rlimit rlas;
      if (getrlimit(RLIMIT_AS, &rlas) != 0) {
+        CNcbiError::SetFromErrno();
          return false;
      }
      if ( max_size ) {
@@ -399,6 +412,7 @@ bool SetMemoryLimitHard(size_t max_size,
          rlas.rlim_max = RLIM_INFINITY;
      }
      if (setrlimit(RLIMIT_AS, &rlas) != 0) {
+        CNcbiError::SetFromErrno();
          return false;
      }
  #  endif //NCBI_OS_SOLARIS
@@ -434,6 +448,7 @@ bool SetHeapLimit(size_t max_size,
          rl.rlim_cur = rl.rlim_max = RLIM_INFINITY;
      }
      if (setrlimit(RLIMIT_DATA, &rl) != 0) {
+        CNcbiError::SetFromErrno();
          return false;
      }
      s_MemoryLimitSoft = max_size;
@@ -446,34 +461,88 @@ bool SetHeapLimit(size_t max_size,
  }
  
  
+size_t GetVirtualMemoryLimitSoft(void)
+{
+    // Query limits from kernel, s_MemoryLimit* values can not reflect real limits.
+    rlimit rl = {0,0};
+#  if !defined(NCBI_OS_SOLARIS)
+    if (getrlimit(RLIMIT_AS, &rl) != 0) {
+        CNcbiError::SetFromErrno();
+        return 0;
+    }
+    if (rl.rlim_cur == RLIM_INFINITY) {
+        return 0;
+    }
+#else
+    CNcbiError::Set(CNcbiError::eNotSupported);
+#endif
+    return rl.rlim_cur;
+}
+
+
+size_t GetVirtualMemoryLimitHard(void)
+{
+    // Query limits from kernel, s_MemoryLimit* values can not reflect real limits.
+    rlimit rl = {0,0};
+#  if !defined(NCBI_OS_SOLARIS)
+    if (getrlimit(RLIMIT_AS, &rl) != 0) {
+        CNcbiError::SetFromErrno();
+        return 0;
+    }
+    if (rl.rlim_max == RLIM_INFINITY) {
+        return 0;
+    }
+#else
+    CNcbiError::Set(CNcbiError::eNotSupported);
+#endif
+    return rl.rlim_max;
+}
+
+
  #else
  
  bool SetMemoryLimit(size_t max_size, 
                      TLimitsPrintHandler handler, 
                      TLimitsPrintParameter parameter)
  {
-  return false;
+    CNcbiError::Set(CNcbiError::eNotSupported);
+    return false;
  }
  
  bool SetMemoryLimitSoft(size_t max_size, 
                      TLimitsPrintHandler handler, 
                      TLimitsPrintParameter parameter)
  {
-  return false;
+    CNcbiError::Set(CNcbiError::eNotSupported);
+    return false;
  }
  
  bool SetMemoryLimitHard(size_t max_size, 
                      TLimitsPrintHandler handler, 
                      TLimitsPrintParameter parameter)
  {
-  return false;
+    CNcbiError::Set(CNcbiError::eNotSupported);
+    return false;
  }
  
  bool SetHeapLimit(size_t max_size, 
                    TLimitsPrintHandler handler, 
                    TLimitsPrintParameter parameter)
  {
-  return false;
+    CNcbiError::Set(CNcbiError::eNotSupported);
+    return false;
+}
+
+size_t GetVirtualMemoryLimitSoft(void)
+{
+    CNcbiError::Set(CNcbiError::eNotSupported);
+    return 0;
+}
+
+size_t GetVirtualMemoryLimitHard(void)
+{
+    CNcbiError::Set(CNcbiError::eNotSupported);
+    return 0;
  }
  
  #endif //USE_SETMEMLIMIT
@@ -621,6 +690,53 @@ unsigned int CSystemInfo::GetCpuCount(void)
  }
  
  
+unsigned int CSystemInfo::GetCpuCountAllowed(void)
+{
+
+#if defined(NCBI_OS_MSWIN)
+
+    DWORD_PTR proc_mask = 0, sys_mask = 0;
+    if (!::GetProcessAffinityMask(::GetCurrentProcess(), &proc_mask, &sys_mask)) {
+        return 0;
+    }
+    unsigned int n = 0;  // number of bits set in proc_mask
+    for (; proc_mask; proc_mask >>= 1) {
+        n += proc_mask & 1;
+    }
+    return n;
+
+#elif defined(NCBI_OS_LINUX)
+  
+    unsigned int total_cpus = CSystemInfo::GetCpuCount();
+    if (total_cpus == 1) {
+        // GetCpuCount() returns 1 if unable to get real number
+        return 1;
+    }
+    // Standard type cpu_set_t can be limited if used directly,
+    // so use dynamic allocation approach
+    cpu_set_t* cpuset_ptr = CPU_ALLOC(total_cpus);
+    if (cpuset_ptr == NULL) {
+        return 0;
+    }
+    size_t cpuset_size = CPU_ALLOC_SIZE(total_cpus);
+    CPU_ZERO_S(cpuset_size, cpuset_ptr);
+   
+    if (sched_getaffinity(getpid(), cpuset_size, cpuset_ptr) != 0) {
+        CPU_FREE(cpuset_ptr);
+        return 0;
+    }
+    int n = CPU_COUNT_S(cpuset_size, cpuset_ptr);
+    CPU_FREE(cpuset_ptr);
+    return (n < 0) ? 0 : static_cast<unsigned int>(n);
+
+#endif //NCBI_OS_...
+
+    // TODO: add support for other UNIX versions where possible
+
+    return 0;
+}
+
+
  double CSystemInfo::GetUptime(void)
  {
  #if defined(NCBI_OS_MSWIN)
diff --git a/c++/src/corelib/ncbiapp.cpp b/c++/src/corelib/ncbiapp.cpp

index e6fc3aa841b44a497856ade270a759ca00ea6c8b..cde3db09c87c28461d4ea3e2d9c382b6fbf533a7 100644 (file)
--- a/c++/src/corelib/ncbiapp.cpp
+++ b/c++/src/corelib/ncbiapp.cpp
@@ -1,4 +1,4 @@
-/*  $Id: ncbiapp.cpp 604618 2020-03-31 13:29:46Z ivanov $
+/*  $Id: ncbiapp.cpp 610397 2020-06-16 18:45:55Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -69,11 +69,13 @@ BEGIN_NCBI_SCOPE
  //  Constants
  //
  
-static const char* s_ArgLogFile     = "-logfile";
-static const char* s_ArgCfgFile     = "-conffile";
-static const char* s_ArgVersion     = "-version";
-static const char* s_ArgFullVersion = "-version-full";
-static const char* s_ArgDryRun      = "-dryrun";
+static const char* s_ArgLogFile         = "-logfile";
+static const char* s_ArgCfgFile         = "-conffile";
+static const char* s_ArgVersion         = "-version";
+static const char* s_ArgFullVersion     = "-version-full";
+static const char* s_ArgFullVersionXml  = "-version-full-xml";
+static const char* s_ArgFullVersionJson = "-version-full-json";
+static const char* s_ArgDryRun          = "-dryrun";
  
  
  /////////////////////////////////////////////////////////////////////////////
@@ -192,12 +194,21 @@ CNcbiApplicationAPI::CNcbiApplicationAPI(const SBuildInfo& build_info)
      m_DryRun = false;
  }
  
+void CNcbiApplicationAPI::ExecuteOnExitActions()
+{
+    m_OnExitActions.ExecuteActions();
+}
+
  
  CNcbiApplicationAPI::~CNcbiApplicationAPI(void)
  {
      CThread::sm_IsExiting = true;
+
      // Execute exit actions before waiting for all threads to stop.
-    m_OnExitActions.ExecuteActions();
+    // NOTE: The exit actions may already be executed by higher-level
+    //       destructors. This is a final fail-safe place for this.
+    ExecuteOnExitActions();
+
  #if defined(NCBI_THREADS)
      CThread::WaitForAllThreads();
  #endif
@@ -240,6 +251,11 @@ CNcbiApplication::CNcbiApplication(const SBuildInfo& build_info)
  
  CNcbiApplication::~CNcbiApplication()
  {
+    // This earlier execution of the actions allows a safe use of
+    // CNcbiApplication::Instance() from the exit action functions. Instance()
+    // can return NULL pointer if called as part of CNcbiApplicationAPI dtor
+    // when the CNcbiApplication dtor already finished.
+    ExecuteOnExitActions();
  }
  
  
@@ -886,8 +902,7 @@ int CNcbiApplicationAPI::AppMain
              } else if ( NStr::strcmp(argv[i], s_ArgVersion) == 0 ) {
                  delete[] v;
                  // Print VERSION
-                cout << GetFullVersion().Print( appname,
-                    CVersionAPI::fVersionInfo | CVersionAPI::fPackageShort );
+                cout << GetFullVersion().Print( appname, CVersionAPI::fVersionInfo | CVersionAPI::fPackageShort );
                  diag_context.DiscardMessages();
                  return 0;
  
@@ -898,6 +913,18 @@ int CNcbiApplicationAPI::AppMain
                  cout << GetFullVersion().Print( appname );
                  diag_context.DiscardMessages();
                  return 0;
+            } else if ( NStr::strcmp(argv[i], s_ArgFullVersionXml) == 0 ) {
+                delete[] v;
+                // Print full VERSION in XML format
+                cout << GetFullVersion().PrintXml( appname );
+                diag_context.DiscardMessages();
+                return 0;
+            } else if ( NStr::strcmp(argv[i], s_ArgFullVersionJson) == 0 ) {
+                delete[] v;
+                // Print full VERSION in JSON format
+                cout << GetFullVersion().PrintJson( appname );
+                diag_context.DiscardMessages();
+                return 0;
  
                  // Dry run
              } else if ( NStr::strcmp(argv[i], s_ArgDryRun) == 0 ) {
@@ -1283,82 +1310,99 @@ void CNcbiApplicationAPI::x_SetupStdio(void)
  void CNcbiApplicationAPI::x_AddDefaultArgs(void)
  {
      if ( !m_DisableArgDesc ) {
-        for(CArgDescriptions* desc : m_ArgDesc->GetAllDescriptions()) {
-        if (desc->IsAutoHelpEnabled()) {
-            if ((m_HideArgs & fHideHelp) != 0) {
-                if (desc->Exist("h")) {
-                    desc->Delete("h");
+        for(CArgDescriptions* desc : m_ArgDesc->GetAllDescriptions())
+        {
+            if (desc->IsAutoHelpEnabled()) {
+                if ((m_HideArgs & fHideHelp) != 0) {
+                    if (desc->Exist("h")) {
+                        desc->Delete("h");
+                    }
                  }
              }
-        }
-        if ((m_HideArgs & fHideFullHelp) != 0) {
-            if (desc->Exist("help")) {
-                desc->Delete("help");
-            }
-        }
-        if ((m_HideArgs & fHideXmlHelp) != 0) {
-            if (desc->Exist("xmlhelp")) {
-                desc->Delete("xmlhelp");
-            }
-        }
-        if ((m_HideArgs & fHideLogfile) != 0) {
-            if (desc->Exist(s_ArgLogFile + 1)) {
-                desc->Delete(s_ArgLogFile + 1);
-            }
-        } else {
-            if (!desc->Exist(s_ArgLogFile + 1)) {
-                desc->AddOptionalKey
-                    (s_ArgLogFile+1, "File_Name",
-                        "File to which the program log should be redirected",
-                        CArgDescriptions::eOutputFile);
-            }
-        }
-        if ((m_HideArgs & fHideConffile) != 0) {
-            if (desc->Exist(s_ArgCfgFile + 1)) {
-                desc->Delete(s_ArgCfgFile + 1);
-            }
-        } else {
-            if (!desc->Exist(s_ArgCfgFile + 1)) {
-                desc->AddOptionalKey
-                    (s_ArgCfgFile + 1, "File_Name",
-                        "Program's configuration (registry) data file",
-                        CArgDescriptions::eInputFile);
+            if ((m_HideArgs & fHideFullHelp) != 0) {
+                if (desc->Exist("help")) {
+                    desc->Delete("help");
+                }
              }
-        }
-        if ((m_HideArgs & fHideVersion) != 0) {
-            if (desc->Exist(s_ArgVersion + 1)) {
-                desc->Delete(s_ArgVersion + 1);
+            if ((m_HideArgs & fHideXmlHelp) != 0) {
+                if (desc->Exist("xmlhelp")) {
+                    desc->Delete("xmlhelp");
+                }
              }
-        } else {
-            if (!desc->Exist(s_ArgVersion + 1)) {
-                desc->AddFlag
-                    (s_ArgVersion + 1,
-                        "Print version number;  ignore other arguments");
+            if ((m_HideArgs & fHideLogfile) != 0) {
+                if (desc->Exist(s_ArgLogFile + 1)) {
+                    desc->Delete(s_ArgLogFile + 1);
+                }
+            } else {
+                if (!desc->Exist(s_ArgLogFile + 1)) {
+                    desc->AddOptionalKey
+                        (s_ArgLogFile+1, "File_Name",
+                            "File to which the program log should be redirected",
+                            CArgDescriptions::eOutputFile);
+                }
              }
-        }
-        if ((m_HideArgs & fHideFullVersion) != 0) {
-            if (desc->Exist(s_ArgFullVersion + 1)) {
-                desc->Delete(s_ArgFullVersion + 1);
+            if ((m_HideArgs & fHideConffile) != 0) {
+                if (desc->Exist(s_ArgCfgFile + 1)) {
+                    desc->Delete(s_ArgCfgFile + 1);
+                }
+            } else {
+                if (!desc->Exist(s_ArgCfgFile + 1)) {
+                    desc->AddOptionalKey
+                        (s_ArgCfgFile + 1, "File_Name",
+                            "Program's configuration (registry) data file",
+                            CArgDescriptions::eInputFile);
+                }
              }
-        } else {
-            if (!desc->Exist(s_ArgFullVersion + 1)) {
-                desc->AddFlag
-                    (s_ArgFullVersion + 1,
-                        "Print extended version data;  ignore other arguments");
+            if ((m_HideArgs & fHideVersion) != 0) {
+                if (desc->Exist(s_ArgVersion + 1)) {
+                    desc->Delete(s_ArgVersion + 1);
+                }
+            } else {
+                if (!desc->Exist(s_ArgVersion + 1)) {
+                    desc->AddFlag
+                        (s_ArgVersion + 1,
+                            "Print version number;  ignore other arguments");
+                }
              }
-        }
-        if ((m_HideArgs & fHideDryRun) != 0) {
-            if (desc->Exist(s_ArgDryRun + 1)) {
-                desc->Delete(s_ArgDryRun + 1);
+            if ((m_HideArgs & fHideFullVersion) != 0) {
+                if (desc->Exist(s_ArgFullVersion + 1)) {
+                    desc->Delete(s_ArgFullVersion + 1);
+                }
+                if (desc->Exist(s_ArgFullVersionXml+ 1)) {
+                    desc->Delete(s_ArgFullVersionXml + 1);
+                }
+                if (desc->Exist(s_ArgFullVersionJson + 1)) {
+                    desc->Delete(s_ArgFullVersionJson + 1);
+                }
+            } else {
+                if (!desc->Exist(s_ArgFullVersion + 1)) {
+                    desc->AddFlag
+                        (s_ArgFullVersion + 1,
+                            "Print extended version data;  ignore other arguments");
+                }
+                if (!desc->Exist(s_ArgFullVersionXml + 1)) {
+                    desc->AddFlag
+                        (s_ArgFullVersionXml + 1,
+                            "Print extended version data in XML format;  ignore other arguments");
+                }
+                if (!desc->Exist(s_ArgFullVersionJson + 1)) {
+                    desc->AddFlag
+                        (s_ArgFullVersionJson + 1,
+                            "Print extended version data in JSON format;  ignore other arguments");
+                }
              }
-        } else {
-            if (!desc->Exist(s_ArgDryRun + 1)) {
-                desc->AddFlag
-                    (s_ArgDryRun + 1,
-                        "Dry run the application: do nothing, only test all preconditions");
+            if ((m_HideArgs & fHideDryRun) != 0) {
+                if (desc->Exist(s_ArgDryRun + 1)) {
+                    desc->Delete(s_ArgDryRun + 1);
+                }
+            } else {
+                if (!desc->Exist(s_ArgDryRun + 1)) {
+                    desc->AddFlag
+                        (s_ArgDryRun + 1,
+                            "Dry run the application: do nothing, only test all preconditions");
+                }
              }
          }
-        }
      }
  }
  
diff --git a/c++/src/corelib/ncbiargs.cpp b/c++/src/corelib/ncbiargs.cpp

index e408147b6854165dca28cb948beab92c7baefd6a..09b019f847513c3aa7399c64fd2aa5fdca85e76e 100644 (file)
--- a/c++/src/corelib/ncbiargs.cpp
+++ b/c++/src/corelib/ncbiargs.cpp
@@ -1,4 +1,4 @@
-/*  $Id: ncbiargs.cpp 604618 2020-03-31 13:29:46Z ivanov $
+/*  $Id: ncbiargs.cpp 609368 2020-06-01 14:12:44Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -2159,10 +2159,10 @@ CArgValue* CArgErrorHandler::HandleError(const CArgDesc& arg_desc,
  {
      if ((arg_desc.GetFlags() & CArgDescriptions::fIgnoreInvalidValue) == 0) {
          // Re-process invalid value to throw the same exception
-        arg_desc.ProcessArgument(value);
+        return arg_desc.ProcessArgument(value);
          // Should never get past ProcessArgument()
      }
-    if ((arg_desc.GetFlags() & CArgDescriptions::fWarnOnInvalidValue) == 0) {
+    if ((arg_desc.GetFlags() & CArgDescriptions::fWarnOnInvalidValue) != 0) {
          ERR_POST_X(22, Warning << "Invalid value " << value <<
              " for argument " << arg_desc.GetName() <<
              " - argument will be ignored.");
diff --git a/c++/src/corelib/ncbidiag.cpp b/c++/src/corelib/ncbidiag.cpp

index c9cca4e494db1f51b8177ad088a191206ffa6011..de6fd6fc56e2be07dbc88daf15e02d5648666119 100644 (file)
--- a/c++/src/corelib/ncbidiag.cpp
+++ b/c++/src/corelib/ncbidiag.cpp
@@ -1,4 +1,4 @@
-/*  $Id: ncbidiag.cpp 606469 2020-04-22 14:13:58Z ivanov $
+/*  $Id: ncbidiag.cpp 615738 2020-09-03 11:26:10Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -35,6 +35,7 @@
  #include <common/ncbi_source_ver.h>
  #include <common/ncbi_package_ver.h>
  #include <corelib/ncbiexpt.hpp>
+#include <corelib/version.hpp>
  #include <corelib/ncbi_process.hpp>
  #include <corelib/ncbifile.hpp>
  #include <corelib/syslog.hpp>
@@ -255,7 +256,8 @@ extern "C" {
  NCBI_PARAM_DECL(bool, Diag, Old_Post_Format);
  NCBI_PARAM_DEF_EX(bool, Diag, Old_Post_Format, true, eParam_NoThread,
                    DIAG_OLD_POST_FORMAT);
-static CSafeStatic<NCBI_PARAM_TYPE(Diag, Old_Post_Format)> s_OldPostFormat;
+static CSafeStatic<NCBI_PARAM_TYPE(Diag, Old_Post_Format)> s_OldPostFormat(
+    CSafeStaticLifeSpan(CSafeStaticLifeSpan::eLifeSpan_Long, 2));
  
  // Auto-print context properties on set/change.
  NCBI_PARAM_DECL(bool, Diag, AutoWrite_Context);
@@ -2138,7 +2140,7 @@ CDiagContext_Extra& CDiagContext_Extra::PrintNcbiAppInfoOnStart(void)
      CNcbiApplication* ins = CNcbiApplication::Instance();
      if (ins) {
          Print("ncbi_app_path", ins->GetProgramExecutablePath());
-        const CVersion& ver = ins->GetFullVersion();
+        const CVersionAPI& ver = ins->GetFullVersion();
          if (!ver.GetBuildInfo().date.empty()) {
              Print("ncbi_app_build_date", ver.GetBuildInfo().date);
          }
@@ -2186,7 +2188,7 @@ CDiagContext_Extra& CDiagContext_Extra::PrintNcbiAppInfoOnRequest(void)
  {
      CNcbiApplication* ins = CNcbiApplication::Instance();
      if (ins) {
-        const CVersion& ver = ins->GetFullVersion();
+        const CVersionAPI& ver = ins->GetFullVersion();
          const CVersionInfo& vi = ver.GetVersionInfo();
  //#if defined (NCBI_SC_VERSION) && NCBI_SC_VERSION <= 21
  #if 1
@@ -7514,6 +7516,30 @@ extern void SetDiagFilter(EDiagFilter what, const char* filter_str)
  }
  
  
+extern string GetDiagFilter(EDiagFilter what)
+{
+    CDiagLock lock(CDiagLock::eWrite);
+    if (what == eDiagFilter_Trace)
+        return s_TraceFilter->GetFilterStr();
+
+    if (what == eDiagFilter_Post)
+        return s_PostFilter->GetFilterStr();
+
+    return kEmptyStr;
+}
+
+
+extern void AppendDiagFilter(EDiagFilter what, const char* filter_str)
+{
+    CDiagLock lock(CDiagLock::eWrite);
+    if (what == eDiagFilter_Trace || what == eDiagFilter_All)
+        s_TraceFilter->Append(filter_str);
+
+    if (what == eDiagFilter_Post || what == eDiagFilter_All)
+        s_PostFilter->Append(filter_str);
+}
+
+
  
  ///////////////////////////////////////////////////////
  //  CNcbiDiag::
diff --git a/c++/src/corelib/ncbidiag_p.cpp b/c++/src/corelib/ncbidiag_p.cpp

index 33d1a5808550031cc1ab230e130ffe7cf8e12c64..1aa5c5d6eaecc9acac9a37f8f5297dfe7f5cba1d 100644 (file)
--- a/c++/src/corelib/ncbidiag_p.cpp
+++ b/c++/src/corelib/ncbidiag_p.cpp
@@ -1,4 +1,4 @@
-/*  $Id: ncbidiag_p.cpp 486111 2015-12-01 17:17:39Z grichenk $
+/*  $Id: ncbidiag_p.cpp 611708 2020-07-09 17:56:10Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -311,10 +311,12 @@ void CDiagFilter::Clean(void)
  void CDiagFilter::Fill(const char* filter_string)
  {
      try {
+        m_Filter.clear();
          CDiagSyntaxParser parser;
          CNcbiIstrstream in(filter_string);
  
          parser.Parse(in, *this);
+        m_Filter = filter_string;
      }
      catch (const CDiagSyntaxParser::TErrorInfo& err_info) {
          CNcbiOstrstream message;
@@ -326,6 +328,13 @@ void CDiagFilter::Fill(const char* filter_string)
      }
  }
  
+void CDiagFilter::Append(const char* filter_string)
+{
+    string new_filter = m_Filter + " " + filter_string;
+    Fill(new_filter.c_str());
+}
+
+
  EDiagFilterAction CDiagFilter::Check(const CNcbiDiag&  msg,
                                       const CException* ex) const
  {
@@ -642,7 +651,9 @@ CDiagLexParser::ESymbol CDiagLexParser::Parse(istream& in)
              if ( !isspace((unsigned char) symbol) ) {
                  if ( symbol == '[' ||
                       symbol == '(' ||
-                    (symbol == '!' && CT_TO_CHAR_TYPE(in.peek()) == '(')) {
+                     symbol == '/' ||
+                    (symbol == '!' && CT_TO_CHAR_TYPE(in.peek()) == '(') ||
+                    (symbol == '!' && CT_TO_CHAR_TYPE(in.peek()) == '/')) {
                      in.putback( symbol );
                      --m_Pos;
                      state = eStart;
diff --git a/c++/src/corelib/ncbidiag_p.hpp b/c++/src/corelib/ncbidiag_p.hpp

index 047e8f5913041321e4e0a8ba5d56ace82de3b8fa..170069ac8d11ec2e8dfbfafe9d5407a480c58222 100644 (file)
--- a/c++/src/corelib/ncbidiag_p.hpp
+++ b/c++/src/corelib/ncbidiag_p.hpp
@@ -1,7 +1,7 @@
  #ifndef CORELIB___NCBIDIAG_P__HPP
  #define CORELIB___NCBIDIAG_P__HPP
  
-/*  $Id: ncbidiag_p.hpp 505891 2016-06-29 17:58:41Z gouriano $
+/*  $Id: ncbidiag_p.hpp 611708 2020-07-09 17:56:10Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -260,6 +260,10 @@ public:
      /// Print state
      void Print(ostream& out) const;
  
+    const string& GetFilterStr(void) const { return m_Filter; }
+
+    void Append(const char* filter_string);
+
  private:
      /// Check if the filter accepts errcode
      EDiagFilterAction x_CheckErrCode(int code, int subcode, EDiagSev sev) const;
@@ -296,6 +300,7 @@ private:
  
  private:
      typedef deque< AutoPtr<CDiagMatcher> >  TMatchers;
+    string m_Filter;
      TMatchers m_Matchers;
      size_t    m_NotMatchersNum;
  };
diff --git a/c++/src/corelib/ncbifile.cpp b/c++/src/corelib/ncbifile.cpp

index f72409a98ed61a8583346cac4c1dcf5873bdc2d6..d6b30d21de12c5ce80419c787740aa6ff253cbfc 100644 (file)
--- a/c++/src/corelib/ncbifile.cpp
+++ b/c++/src/corelib/ncbifile.cpp
@@ -1,4 +1,4 @@
-/*  $Id: ncbifile.cpp 604618 2020-03-31 13:29:46Z ivanov $
+/*  $Id: ncbifile.cpp 610319 2020-06-15 17:06:08Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -4744,7 +4744,7 @@ static const SFileSystem s_FileSystem[] = {
      memset(&st, 0, sizeof(st));                                \
      if (statvfs(path.c_str(), &st) != 0) {                     \
          CNcbiError::SetFromErrno();                            \
-        NCBI_THROW(CFileErrnoException, eFileSystemInfo, msg); \
+        NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path); \
      }                                                          \
      info->total_space  = (Uint8)st.f_bsize * st.f_blocks;      \
      if (st.f_frsize) {                                         \
@@ -4762,7 +4762,7 @@ static const SFileSystem s_FileSystem[] = {
      memset(&st, 0, sizeof(st));                                \
      if (statfs(path.c_str(), &st) != 0) {                      \
          CNcbiError::SetFromErrno();                            \
-        NCBI_THROW(CFileErrnoException, eFileSystemInfo, msg); \
+        NCBI_THROW(CFileErrnoException, eFileSystemInfo,  string(msg) + path); \
      }                                                          \
      info->total_space  = (Uint8)st.f_bsize * st.f_blocks;      \
      info->free_space   = (Uint8)st.f_bsize * st.f_bavail;      \
@@ -5076,7 +5076,7 @@ void s_GetFileSystemInfo(const string&               path,
                                       &fs_flags,
                                       fs_name,
                                       sizeof(fs_name)/sizeof(fs_name[0])) ) {
-            NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + xpath);
+            NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path);
          }
          info->filename_max = filename_max;
          ufs_name = _T_CSTRING(fs_name);
@@ -5088,7 +5088,7 @@ void s_GetFileSystemInfo(const string&               path,
          if ( !::GetDiskFreeSpaceEx(_T_XCSTRING(xpath),
                                     (PULARGE_INTEGER)&info->free_space,
                                     (PULARGE_INTEGER)&info->total_space, 0) ) {
-            NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + xpath);
+            NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path);
          }
      }
  
@@ -5099,7 +5099,7 @@ void s_GetFileSystemInfo(const string&               path,
          if ( !::GetDiskFreeSpace(_T_XCSTRING(xpath),
                                   &dwSectPerClust, &dwBytesPerSect,
                                   NULL, NULL) ) {
-            NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + xpath);
+            NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path);
          }
          info->block_size = dwBytesPerSect * dwSectPerClust;
      }
@@ -5203,7 +5203,7 @@ void s_GetFileSystemInfo(const string&               path,
  #  elif defined(NCBI_OS_DARWIN)  &&  defined(HAVE_STATFS)
  
      GET_STATFS_INFO;
-    // Seems statfs structure on Darwin dont have any information 
+    // Seems statfs structure on Darwin doesn't have any information 
      // about name length, so rely on pathconf() only.
      //if (need_name_max) {
      //    info->filename_max = (unsigned long)st.f_namelen;
diff --git a/c++/src/corelib/version.cpp b/c++/src/corelib/version.cpp

index 3c924e3561812671d09ba82987f1f84eff153dd6..fe77f0894a203725e3156a5180f710ab6a47200f 100644 (file)
--- a/c++/src/corelib/version.cpp
+++ b/c++/src/corelib/version.cpp
@@ -1,4 +1,4 @@
-/*  $Id: version.cpp 591546 2019-08-16 16:59:06Z vasilche $
+/*  $Id: version.cpp 612086 2020-07-15 11:49:39Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -31,7 +31,7 @@
   */
  
  #include <ncbi_pch.hpp>
-#include <corelib/version_api.hpp>
+#include <corelib/version.hpp>
  #include <common/ncbi_package_ver.h>
  #include <common/ncbi_source_ver.h>
  
@@ -159,10 +159,10 @@ string CVersionInfo::PrintJson(void) const
      os << "{";
      bool need_separator = false;
      if (m_Major >= 0) {
-        os << "\"major\": \"" << m_Major <<
-            "\", \"minor\": \"" << (m_Minor >= 0 ? m_Minor : 0) << "\"";
+        os << "\"major\": " << m_Major <<
+            ", \"minor\": " << (m_Minor >= 0 ? m_Minor : 0);
          if (m_PatchLevel >= 0) {
-            os << ", \"patch_level\": \"" << m_PatchLevel << "\"";
+            os << ", \"patch_level\": " << m_PatchLevel;
          }
          need_separator = true;
      }
@@ -435,8 +435,9 @@ string CComponentVersionInfoAPI::PrintJson(void) const
      os << "{ \"name\": \"" <<
          NStr::JsonEncode(GetComponentName()) <<
              "\", \"version_info\": " <<
-            CVersionInfo::PrintJson() << endl <<
-            m_BuildInfo.PrintJson() << "}" << endl;
+            CVersionInfo::PrintJson() << ",\n" <<
+            "        \"build_info\": " <<
+            m_BuildInfo.PrintJson() << "}";
      return CNcbiOstrstreamToString(os);
  }
  
@@ -573,22 +574,21 @@ string SBuildInfo::PrintJson(void) const
  {
      CNcbiOstrstream os;
      bool need_separator = false;
-    os << '{' << endl;
+    os << '{';
      if ( !date.empty() ) {
          os << "\"" << ExtraNameJson(eBuildDate) << "\": \"" << NStr::JsonEncode(date) << '\"';
          need_separator = true;
      }
      if ( !tag.empty() ) {
-        if ( need_separator ) os << ',' << endl;
+        if ( need_separator ) os << ", ";
          os << '\"' << ExtraNameJson(eBuildTag) << "\": \"" << NStr::JsonEncode(tag) << '\"';
          need_separator = true;
      }
      for( const auto& e : m_extra) {
-        if ( need_separator ) os << "," << endl;
+        if ( need_separator ) os << ", ";
          os << '\"' << ExtraNameJson(e.first) << "\": \"" << NStr::JsonEncode(e.second) << '\"';
          need_separator = true;
      }
-    if ( need_separator ) os << endl;
      os << '}';
      return CNcbiOstrstreamToString(os);
  }
@@ -810,7 +810,7 @@ string CVersionAPI::PrintJson(const string& appname, TPrintFlags flags) const
  
      if (flags & fComponents) {
          if ( need_separator ) os << ",\n";
-        os << "    \"components\": [";
+        os << "    \"component\": [";
          need_separator = false;
          for (const auto& c : m_Components) {
              if ( need_separator ) os << ",";
diff --git a/c++/src/dbapi/driver/dbapi_conn_factory.cpp b/c++/src/dbapi/driver/dbapi_conn_factory.cpp

index 321ac0d5d5123853e8927548a4effcdeb50479aa..eff961ec313707fb41d2e907ad63e2add74bb6f8 100644 (file)
--- a/c++/src/dbapi/driver/dbapi_conn_factory.cpp
+++ b/c++/src/dbapi/driver/dbapi_conn_factory.cpp
@@ -1,4 +1,4 @@
-/*  $Id: dbapi_conn_factory.cpp 600085 2020-01-11 15:56:54Z mcelhany $
+/*  $Id: dbapi_conn_factory.cpp 610945 2020-06-25 18:31:37Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -35,9 +35,9 @@
  #include <dbapi/driver/impl/dbapi_driver_utils.hpp>
  #include <dbapi/driver/impl/dbapi_impl_connection.hpp>
  #include <dbapi/driver/impl/dbapi_impl_context.hpp>
+#include <dbapi/driver/impl/dbapi_pool_balancer.hpp>
  #include <dbapi/driver/public.hpp>
  #include <dbapi/error_codes.hpp>
-#include "dbapi_pool_balancer.hpp"
  #include <corelib/ncbiapp.hpp>
  #include <corelib/request_ctx.hpp>
  
@@ -472,8 +472,8 @@ CDBConnectionFactory::DispatchServerName(
          &&  !service_name.empty()  ) {
          balancer.Reset(new CDBPoolBalancer
                         (service_name, params.GetParam("pool_name"),
-                        ctx.driver_ctx,
-                        rt_data.GetServerOptions(service_name)));
+                        rt_data.GetServerOptions(service_name),
+                        &ctx.driver_ctx));
      }
      for ( ; !t_con && alternatives > 0; --alternatives ) {
          TSvrRef dsp_srv;
@@ -488,7 +488,7 @@ CDBConnectionFactory::DispatchServerName(
          // In this case we even won't try to map it.
          else if (!service_name.empty()) {
              if (balancer.NotEmpty()) {
-                dsp_srv = balancer->GetServer(&t_con, params);
+                dsp_srv = balancer->GetServer(&t_con, &params);
              }
              if (dsp_srv.Empty()) {
                  dsp_srv = rt_data.GetDispatchedServer(service_name);
@@ -537,8 +537,8 @@ CDBConnectionFactory::DispatchServerName(
                          balancer.Reset
                              (new CDBPoolBalancer
                               (service_name, params.GetParam("pool_name"),
-                              ctx.driver_ctx,
-                              rt_data.GetServerOptions(service_name, true)));
+                              rt_data.GetServerOptions(service_name, true),
+                              &ctx.driver_ctx));
                      }
                      full_retry_made = true;
                      continue;
diff --git a/c++/src/dbapi/driver/dbapi_impl_context.cpp b/c++/src/dbapi/driver/dbapi_impl_context.cpp

index 06f35c31f3be745cba5ca5232172ae310c10758f..67c0fb40377482893f985ee19def8b039f3bf7e6 100644 (file)
--- a/c++/src/dbapi/driver/dbapi_impl_context.cpp
+++ b/c++/src/dbapi/driver/dbapi_impl_context.cpp
@@ -1,4 +1,4 @@
-/*  $Id: dbapi_impl_context.cpp 600087 2020-01-11 19:46:51Z mcelhany $
+/*  $Id: dbapi_impl_context.cpp 610920 2020-06-25 13:37:30Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -966,6 +966,7 @@ CDriverContext::SatisfyPoolMinimum(const CDBConnParams& params)
              ++total_cnt;
          }
      }
+    mg.Release();
      vector< AutoPtr<CDB_Connection> > conns(pool_min);
      for (int i = total_cnt; i < pool_min; ++i) {
          try {
diff --git a/c++/src/dbapi/driver/dbapi_pool_balancer.cpp b/c++/src/dbapi/driver/dbapi_pool_balancer.cpp

index ed656b3f194236e1dcc631baf334ebbae57d7195..2a45ed7680c342beb472bd3c2fb3e00a21a6321a 100644 (file)
--- a/c++/src/dbapi/driver/dbapi_pool_balancer.cpp
+++ b/c++/src/dbapi/driver/dbapi_pool_balancer.cpp
@@ -1,4 +1,4 @@
-/*  $Id: dbapi_pool_balancer.cpp 548289 2017-10-12 14:54:18Z ucko $
+/*  $Id: dbapi_pool_balancer.cpp 610945 2020-06-25 18:31:37Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -33,7 +33,7 @@
  
  #include <ncbi_pch.hpp>
  
-#include "dbapi_pool_balancer.hpp"
+#include <dbapi/driver/impl/dbapi_pool_balancer.hpp>
  #include <dbapi/driver/dbapi_conn_factory.hpp>
  #include <dbapi/driver/impl/dbapi_impl_context.hpp>
  #include <dbapi/error_codes.hpp>
@@ -88,11 +88,12 @@ public:
  
  CDBPoolBalancer::CDBPoolBalancer(const string& service_name,
                                   const string& pool_name,
-                                 I_DriverContext& driver_ctx,
-                                 const IDBServiceMapper::TOptions& options)
+                                 const IDBServiceMapper::TOptions& options,
+                                 I_DriverContext* driver_ctx)
      : m_DriverCtx(driver_ctx), m_TotalCount(0U)
  {
-    bool is_ftds = NStr::StartsWith(driver_ctx.GetDriverName(), "ftds");
+    bool is_ftds = (driver_ctx == nullptr
+                    ||  NStr::StartsWith(driver_ctx->GetDriverName(), "ftds"));
      for (auto it : options) {
          CTempString name = it->GetName();
          auto key = impl::MakeEndpointKey(it->GetHost(), it->GetPort());
@@ -127,10 +128,13 @@ CDBPoolBalancer::CDBPoolBalancer(const string& service_name,
      }
      
      const impl::CDriverContext* ctx_impl
-        = dynamic_cast<const impl::CDriverContext*>(&driver_ctx);
+        = dynamic_cast<const impl::CDriverContext*>(driver_ctx);
      impl::CDriverContext::TCounts counts;
      if (ctx_impl == NULL) {
-        ERR_POST_X(1, Warning << "Called with non-standard IDriverContext");
+        if (driver_ctx != nullptr) {
+            ERR_POST_X(1, Warning <<
+                       "Called with non-standard IDriverContext");
+        }
      } else if (pool_name.empty()) {
          ctx_impl->GetCountsForService(service_name, &counts);
      } else {
@@ -177,7 +181,7 @@ CDBPoolBalancer::CDBPoolBalancer(const string& service_name,
  }
  
  TSvrRef CDBPoolBalancer::GetServer(CDB_Connection** conn,
-                                   const CDBConnParams& params)
+                                   const CDBConnParams* params)
  {
      TSvrRef             result;
      impl::TEndpointKey  conn_key = 0;
@@ -196,10 +200,11 @@ TSvrRef CDBPoolBalancer::GetServer(CDB_Connection** conn,
          return result;
      }
  
-    if (/* m_TotalCount > 1  && */  conn != NULL) {
-        string pool_name = params.GetParam("pool_name");
-        CDBConnParams_DNC dnc_params(params);
-        *conn = IDBConnectionFactory::CtxMakeConnection(m_DriverCtx,
+    if (/* m_TotalCount > 1  && */  conn != nullptr  &&  params != nullptr
+        &&  m_DriverCtx != nullptr) {
+        string pool_name = params->GetParam("pool_name");
+        CDBConnParams_DNC dnc_params(*params);
+        *conn = IDBConnectionFactory::CtxMakeConnection(*m_DriverCtx,
                                                          dnc_params);
          if (*conn != NULL) {
              const string&  server_name  = (*conn)->ServerName();
@@ -214,7 +219,7 @@ TSvrRef CDBPoolBalancer::GetServer(CDB_Connection** conn,
                             "Unrecognized endpoint for existing connection to "
                             << impl::ConvertN2A(host) << ":" << port
                             << " (" << server_name << ')');
-                excess = m_DriverCtx.NofConnections(server_name, pool_name);
+                excess = m_DriverCtx->NofConnections(server_name, pool_name);
                  result.Reset(&*it->second.ref);
              } else {
                  double scale_factor = m_TotalCount / total_ranking;
@@ -227,7 +232,7 @@ TSvrRef CDBPoolBalancer::GetServer(CDB_Connection** conn,
                     << ":" << port << " (" << server_name
                     << ") for turnover; projected excess count " << excess);
              if (excess > 0.0) {
-                string        pool_max_str  = params.GetParam("pool_maxsize");
+                string        pool_max_str  = params->GetParam("pool_maxsize");
                  unsigned int  pool_max      = 0u;
                  if ( !pool_max_str.empty()  &&  pool_max_str != "default") {
                      NStr::StringToNumeric(pool_max_str, &pool_max,
@@ -306,8 +311,8 @@ TSvrRef CDBPoolBalancer::GetServer(CDB_Connection** conn,
                  // This call might not close the exact connection we
                  // considered, but closing any connection to the
                  // relevant server is sufficient here.
-                m_DriverCtx.CloseUnusedConnections
-                    (server_name, params.GetParam("pool_name"), 1u);
+                m_DriverCtx->CloseUnusedConnections
+                    (server_name, params->GetParam("pool_name"), 1u);
              }
          }
      }
diff --git a/c++/src/dbapi/driver/dbapi_pool_balancer.hpp b/c++/src/dbapi/driver/dbapi_pool_balancer.hpp

deleted file mode 100644 (file)

index b26d6b2..0000000
--- a/c++/src/dbapi/driver/dbapi_pool_balancer.hpp
+++ /dev/null
@@ -1,82 +0,0 @@
-#ifndef DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP
-#define DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP
-
-/*  $Id: dbapi_pool_balancer.hpp 548289 2017-10-12 14:54:18Z ucko $
- * ===========================================================================
- *
- *                            PUBLIC DOMAIN NOTICE
- *               National Center for Biotechnology Information
- *
- *  This software/database is a "United States Government Work" under the
- *  terms of the United States Copyright Act.  It was written as part of
- *  the author's official duties as a United States Government employee and
- *  thus cannot be copyrighted.  This software/database is freely available
- *  to the public for use. The National Library of Medicine and the U.S.
- *  Government have not placed any restriction on its use or reproduction.
- *
- *  Although all reasonable efforts have been taken to ensure the accuracy
- *  and reliability of the software and data, the NLM and the U.S.
- *  Government do not and cannot warrant the performance or results that
- *  may be obtained by using this software or data. The NLM and the U.S.
- *  Government disclaim all warranties, express or implied, including
- *  warranties of performance, merchantability or fitness for any particular
- *  purpose.
- *
- *  Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Author:  Aaron Ucko
- *
- */
-
-/// @file dbapi_pool_balancer.hpp
-/// Help distribute connections within a pool across servers.
-
-#include <dbapi/driver/impl/dbapi_driver_utils.hpp>
-
-/** @addtogroup DBAPI
- *
- * @{
- */
-
-BEGIN_NCBI_SCOPE
-
-class CDBPoolBalancer : public CObject
-{
-public:
-    CDBPoolBalancer(const string& service_name,
-                    const string& pool_name,
-                    I_DriverContext& driver_ctx,
-                    const IDBServiceMapper::TOptions& options);
-
-    TSvrRef GetServer(CDB_Connection** conn, const CDBConnParams& params);
-
-private:
-    struct SEndpointInfo {
-        SEndpointInfo()
-            : effective_ranking(0.0), ideal_count(0.0), actual_count(0U),
-              penalty_level(0U)
-            { }
-        
-        CRef<CDBServerOption>  ref;
-        double                 effective_ranking;
-        double                 ideal_count;
-        unsigned int           actual_count;
-        unsigned int           penalty_level;
-    };
-    typedef map<impl::TEndpointKey, SEndpointInfo> TEndpoints;
-
-    impl::TEndpointKey x_NameToKey(CTempString& name) const;
-    
-    TEndpoints        m_Endpoints;
-    multiset<double>  m_Rankings;
-    I_DriverContext&  m_DriverCtx;
-    unsigned int      m_TotalCount;
-};
-
-END_NCBI_SCOPE
-
-/* @} */
-
-#endif  /* DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP */
diff --git a/c++/src/objects/dbsnp/primary_track/snpptis.cpp b/c++/src/objects/dbsnp/primary_track/snpptis.cpp

index b0f1007eb5645d393a0938249efe8428c3a611a3..85c30bd63404c1b05516f5646149014941f982c8 100644 (file)
--- a/c++/src/objects/dbsnp/primary_track/snpptis.cpp
+++ b/c++/src/objects/dbsnp/primary_track/snpptis.cpp
@@ -1,4 +1,5 @@
-/* ===========================================================================
+/*  $Id: snpptis.cpp 615550 2020-09-01 13:13:11Z fukanchi $
+ * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
   *               National Center for Biotechnology Information
@@ -34,6 +35,7 @@
  #ifdef HAVE_LIBGRPC
  # include <objects/dbsnp/primary_track/impl/snpptis_impl.hpp>
  # include <corelib/ncbi_param.hpp>
+# include <corelib/ncbi_system.hpp>
  #endif
  
  BEGIN_NCBI_NAMESPACE;
@@ -50,10 +52,46 @@ CSnpPtisClient::~CSnpPtisClient()
  }
  
  
+#ifdef HAVE_LIBGRPC
+const char* const kSection = "ID2SNP";
+const char* const kParam_PTISName = "PTIS_NAME";
+const char* const kParam_Retry = "RETRY";
+const char* const kParam_Timeout    = "TIMEOUT";
+const char* const kParam_TimeoutMul = "TIMEOUT_MULTIPLIER";
+const char* const kParam_TimeoutInc = "TIMEOUT_INCREMENT";
+const char* const kParam_TimeoutMax = "TIMEOUT_MAX";
+const char* const kParam_WaitTime    = "WAIT_TIME";
+const char* const kParam_WaitTimeMul = "WAIT_TIME_MULTIPLIER";
+const char* const kParam_WaitTimeInc = "WAIT_TIME_INCREMENT";
+const char* const kParam_WaitTimeMax = "WAIT_TIME_MAX";
+const int kDefault_Retry = 5;
+const float kDefault_Timeout    = 1;
+const float kDefault_TimeoutMul = 1.5;
+const float kDefault_TimeoutInc = 0;
+const float kDefault_TimeoutMax = 10;
+const float kDefault_WaitTime    = 0.5;
+const float kDefault_WaitTimeMul = 1.2;
+const float kDefault_WaitTimeInc = 0.2;
+const float kDefault_WaitTimeMax = 5;
+#endif
+
+
  bool CSnpPtisClient::IsEnabled()
  {
  #ifdef HAVE_LIBGRPC
-    return CGRPCClientContext::IsImplemented();
+    if ( !CGRPCClientContext::IsImplemented() ) {
+        return false;
+    }
+    // check if there's valid address
+    int source;
+    auto addr = g_NCBI_GRPC_GetAddress(kSection, kParam_PTISName, nullptr, &source);
+#ifndef NCBI_OS_LINUX
+    if ( source == CParamBase::eSource_Default ) {
+        // default grpc link to linkerd daemon works on Linux only
+        return false;
+    }
+#endif
+    return !addr.empty();
  #else
      return false;
  #endif
@@ -96,9 +134,20 @@ string CSnpPtisClient::GetPrimarySnpTrackForId(const CSeq_id& id)
  #ifdef HAVE_LIBGRPC
  CSnpPtisClient_Impl::CSnpPtisClient_Impl()
  {
-    channel = grpc::CreateChannel(g_NCBI_GRPC_GetAddress("ID2SNP", "PTIS_NAME"),
-                                  grpc::InsecureChannelCredentials());
-
+    grpc::ChannelArguments args;
+    string address = g_NCBI_GRPC_GetAddress(kSection, kParam_PTISName);
+    //LOG_POST(Trace<<"CSnpPtisClient: connecting to "<<address);
+    channel = grpc::CreateCustomChannel(address, grpc::InsecureChannelCredentials(), args);
+    max_retries = g_GetConfigInt(kSection, kParam_Retry, nullptr, kDefault_Retry);
+    timeout     = g_GetConfigDouble(kSection, kParam_Timeout   , nullptr, kDefault_Timeout   );
+    timeout_mul = g_GetConfigDouble(kSection, kParam_TimeoutMul, nullptr, kDefault_TimeoutMul);
+    timeout_inc = g_GetConfigDouble(kSection, kParam_TimeoutInc, nullptr, kDefault_TimeoutInc);
+    timeout_max = g_GetConfigDouble(kSection, kParam_TimeoutMax, nullptr, kDefault_TimeoutMax);
+    wait_time     = g_GetConfigDouble(kSection, kParam_WaitTime   , nullptr, kDefault_WaitTime   );
+    wait_time_mul = g_GetConfigDouble(kSection, kParam_WaitTimeMul, nullptr, kDefault_WaitTimeMul);
+    wait_time_inc = g_GetConfigDouble(kSection, kParam_WaitTimeInc, nullptr, kDefault_WaitTimeInc);
+    wait_time_max = g_GetConfigDouble(kSection, kParam_WaitTimeMax, nullptr, kDefault_WaitTimeMax);
+    
      stub = ncbi::grpcapi::dbsnp::primary_track::DbSnpPrimaryTrack::NewStub(channel);
  }
  
@@ -125,22 +174,36 @@ string CSnpPtisClient_Impl::GetPrimarySnpTrackForAccVer(const string& acc_ver)
  
  string CSnpPtisClient_Impl::x_GetPrimarySnpTrack(const TRequest& request)
  {
-    CGRPCClientContext context;
-    
-    ncbi::grpcapi::dbsnp::primary_track::PrimaryTrackReply reply;
-
-    auto status = stub->ForSeqId(&context, request, &reply);
+    int cur_retry = 0;
+    float cur_timeout = timeout;
+    float cur_wait_time = wait_time;
+    for ( ;; ) {
+        CGRPCClientContext context;
+        std::chrono::system_clock::time_point deadline =
+            std::chrono::system_clock::now() + std::chrono::microseconds(Int8(cur_timeout*1e6));
+        context.set_deadline(deadline);
      
-    if ( !status.ok() ) {
+        ncbi::grpcapi::dbsnp::primary_track::PrimaryTrackReply reply;
+        
+        auto status = stub->ForSeqId(&context, request, &reply);
+        
+        if ( status.ok() ) {
+            return reply.na_track_acc_with_filter();
+        }
+        
          if ( status.error_code() == grpc::StatusCode::NOT_FOUND ) {
              return string();
          }
-        NCBI_THROW(CException, eUnknown, status.error_message());
+        if ( ++cur_retry >= max_retries ) {
+            NCBI_THROW(CException, eUnknown, status.error_message());
+        }
+        LOG_POST(Trace<<
+                 "CSnpPtisClient: failed : "<<status.error_message()<<". "
+                 "Waiting "<<cur_wait_time<<" seconds before retry...");
+        SleepMicroSec(Int8(cur_wait_time*1e6));
+        cur_timeout = min(cur_timeout*timeout_mul + timeout_inc, timeout_max);
+        cur_wait_time = min(cur_wait_time*wait_time_mul + wait_time_inc, wait_time_max);
      }
-
-    // cout << reply.na_track_acc_with_filter() << "\t" << reply.tms_track_id() << endl;
-
-    return reply.na_track_acc_with_filter();
  }
  #endif
  
diff --git a/c++/src/objects/general/Dbtag.cpp b/c++/src/objects/general/Dbtag.cpp

index e4e2e368fb6db5ff7a2ff03d992c37099267e13f..f1801fcf7cee4ce70fc09ecf2c4b41c724a41f65 100644 (file)
--- a/c++/src/objects/general/Dbtag.cpp
+++ b/c++/src/objects/general/Dbtag.cpp
@@ -1,4 +1,4 @@
-/* $Id: Dbtag.cpp 600775 2020-01-27 19:10:07Z kans $
+/* $Id: Dbtag.cpp 617215 2020-09-28 17:22:41Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -182,6 +182,7 @@ static const TDbxrefPair kApprovedDbXrefs[] = {
      { "dbProbe", CDbtag::eDbtagType_dbProbe },
      { "dbSNP", CDbtag::eDbtagType_dbSNP },
      { "dbSTS", CDbtag::eDbtagType_dbSTS },
+    { "dbVar", CDbtag::eDbtagType_dbVar },
      { "dictyBase", CDbtag::eDbtagType_dictyBase },
      { "miRBase", CDbtag::eDbtagType_miRBase },
      { "niaEST", CDbtag::eDbtagType_niaEST }, 
@@ -569,7 +570,7 @@ void CDbtag::InvalidateType(void)
  //=========================================================================//
  
  // special case URLs
-static const char kFBan[] = "http://www.fruitfly.org/cgi-bin/annot/fban?";  // url not found \93Internal Server Error\94 tested 7/13/2016
+static const char kFBan[] = "http://www.fruitfly.org/cgi-bin/annot/fban?";  // url not found "Internal Server Error" tested 7/13/2016
  static const char kHInvDbHIT[] = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=30&KEN_STR="; // access forbidden 7/13/2016
  static const char kHInvDbHIX[] = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=31&KEN_STR=";  // \93Internal Server Error\94 tested 7/13/2016
  static const char kDictyPrim[] = "http://dictybase.org/db/cgi-bin/gene_page.pl?primary_id=";  // url not found tested 7/13/2016
@@ -722,6 +723,7 @@ static const TDbtUrl sc_url_prefix[] = {
      { CDbtag::eDbtagType_EPDnew, "http://epd.vital-it.ch/cgi-bin/get_doc?format=genome&entry=" },
      { CDbtag::eDbtagType_Ensembl, "https://www.ensembl.org/id/" }, // url seems incorrect, includes msg user has been redirected and  \93Error 404 Page not found\94 tested 7/13/2016  
      { CDbtag::eDbtagType_PseudoCAP, "http://www.pseudomonas.com/primarySequenceFeature/list?c1=name&e1=1&v1=" }, // url not found tested 7/13/2016
+    { CDbtag::eDbtagType_dbVar, "https://www.ncbi.nlm.nih.gov/dbvar/variants/" }
  };
  
  typedef CStaticPairArrayMap<CDbtag::EDbtagType, const char*> TUrlPrefixMap;
diff --git a/c++/src/objects/genomecoll/genomic_collections_cli.cpp b/c++/src/objects/genomecoll/genomic_collections_cli.cpp

index 2a47c5ee75acc20c65f0c40d1f968f1cddf10045..8a015efa6758a1aba5fe746ed3d0bd195afdac49 100644 (file)
--- a/c++/src/objects/genomecoll/genomic_collections_cli.cpp
+++ b/c++/src/objects/genomecoll/genomic_collections_cli.cpp
@@ -1,4 +1,4 @@
-/* $Id: genomic_collections_cli.cpp 603970 2020-03-19 15:32:22Z ivanov $
+/* $Id: genomic_collections_cli.cpp 617470 2020-10-01 17:56:09Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -94,7 +94,7 @@ CGenomicCollectionsService::CGenomicCollectionsService(const CArgs& args)
  void CGenomicCollectionsService::x_ConfigureConnection()
  {
      SetTimeout(&kTimeout);
-    SetRetryLimit(20);
+    SetRetryLimit(40);
  
      // it's a backward-compatibility fix for old versions of server (no much harm to leave it - only little data overhead is expected)
      // always send request and get response in ASN text format so that server can properly parse request
@@ -222,21 +222,34 @@ string CGenomicCollectionsService::ValidateChrType(const string& chrType, const
  {
      CGCClient_ValidateChrTypeLocRequest req;
      CGCClientResponse reply;
-
      req.SetType(chrType);
      req.SetLocation(chrLoc);
  
      LogRequest(req);
-
-    try {
-        return AskGet_chrtype_valid(req, &reply);
-    } catch (CException& ) {
-        if (reply.IsSrvr_error())
-            throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
-        throw;
-    }
+    
+    int retry_counter=0;
+    const int RETRY_MAX = 3;
+    for(retry_counter=1; retry_counter <= RETRY_MAX; retry_counter++) {
+        try {
+            return AskGet_chrtype_valid(req, &reply);
+        } catch (const CException& e) {
+            if( retry_counter == RETRY_MAX) {
+                if (reply.IsSrvr_error())
+                    throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+                throw e;
+            } else {
+                ERR_POST(Warning <<"Try "<<retry_counter<<":"<<e.GetMsg());
+                SleepSec(10);
+            }
+        } // end catch
+    } // end retry for
+                
+    if (reply.IsSrvr_error())
+        throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+    NCBI_THROW(CException, eUnknown, "ValidateChrType ran out of retries.");
  }
  
+
  CRef<CGCClient_AssemblyInfo> CGenomicCollectionsService::FindOneAssemblyBySequences(const string& sequence_acc, int filter, CGCClient_GetAssemblyBySequenceRequest::ESort sort)
  {
      CRef<CGCClient_AssemblySequenceInfo> asmseq_info(FindOneAssemblyBySequences(list<string>(1, sequence_acc), filter, sort));
@@ -283,13 +296,26 @@ CRef<CGCClient_AssembliesForSequences> CGenomicCollectionsService::x_FindAssembl
  
      LogRequest(req);
  
-    try {
-        return AskGet_assembly_by_sequence(req, &reply);
-    } catch (const CException& ) {
-        if (reply.IsSrvr_error())
-            throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
-        throw;
-    }
+    int retry_counter=0;
+    const int RETRY_MAX = 3;
+    for(retry_counter=1; retry_counter <= RETRY_MAX; retry_counter++) {
+        try {
+            return AskGet_assembly_by_sequence(req, &reply);
+        } catch (const CException& e) {
+            if( retry_counter == RETRY_MAX) {
+                if (reply.IsSrvr_error())
+                    throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+                throw e;
+            } else {
+                ERR_POST(Warning <<"Try "<<retry_counter<<":"<<e.GetMsg());
+                SleepSec(10);
+            }
+        } // end catch
+    } // end retry for
+                
+    if (reply.IsSrvr_error())
+        throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+    NCBI_THROW(CException, eUnknown, "FindAssembliesBySequences ran out of retries.");
  }
  
  
@@ -302,14 +328,27 @@ CRef<CGCClient_EquivalentAssemblies> CGenomicCollectionsService::GetEquivalentAs
      req.SetEquivalency(equivalency);
  
      LogRequest(req);
-
-    try {
-        return AskGet_equivalent_assemblies(req, &reply);
-    } catch (const CException& ) {
-        if (reply.IsSrvr_error())
-            throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
-        throw;
-    }
+        
+    int retry_counter=0;
+    const int RETRY_MAX = 3;
+    for(retry_counter=1; retry_counter <= RETRY_MAX; retry_counter++) {
+        try {
+            return AskGet_equivalent_assemblies(req, &reply);
+        } catch (const CException& e) {
+            if( retry_counter == RETRY_MAX) {
+                if (reply.IsSrvr_error())
+                    throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+                throw e;
+            } else {
+                ERR_POST(Warning <<"Try "<<retry_counter<<":"<<e.GetMsg());
+                SleepSec(10);
+            }
+        } // end catch
+    } // end retry for
+                
+    if (reply.IsSrvr_error())
+        throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+    NCBI_THROW(CException, eUnknown, "GetEquivalentAssemblies ran out of retries.");
  }
  
  
diff --git a/c++/src/objects/seq/Bioseq.cpp b/c++/src/objects/seq/Bioseq.cpp

index 662029b93d1afca14182d03df369f00be7a51cc0..97fa4c44711df1b85559c433ef2531ec4e48dca2 100644 (file)
--- a/c++/src/objects/seq/Bioseq.cpp
+++ b/c++/src/objects/seq/Bioseq.cpp
@@ -1,4 +1,4 @@
-/* $Id: Bioseq.cpp 502444 2016-05-24 18:46:25Z kans $
+/* $Id: Bioseq.cpp 614732 2020-08-21 13:43:27Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -178,8 +178,8 @@ int CBioseq::GetTaxId() const
  {
      /// A taxid can be found either in a source descriptor (the newer form) or in a
      /// org descriptor. If both are there, the source descriptor should have precedence.
-    int taxid_from_source = 0,
-        taxid_from_org = 0;
+    TTaxId taxid_from_source = ZERO_TAX_ID,
+        taxid_from_org = ZERO_TAX_ID;
  
      if (IsSetDescr()) {
          ITERATE (TDescr::Tdata, it, GetDescr().Get()) {
@@ -189,13 +189,13 @@ int CBioseq::GetTaxId() const
              } else if (desc.IsSource() && desc.GetSource().IsSetOrg()) {
                  taxid_from_source = desc.GetSource().GetOrg().GetTaxId();
              }
-            if (taxid_from_source) {
+            if (taxid_from_source != ZERO_TAX_ID) {
                  break;
              }
          }
      }
  
-    return taxid_from_source ? taxid_from_source : taxid_from_org;
+    return TAX_ID_TO(int, taxid_from_source != ZERO_TAX_ID ? taxid_from_source : taxid_from_org);
  }
  
  
diff --git a/c++/src/objects/seq/so_map.cpp b/c++/src/objects/seq/so_map.cpp

index 205ddb297478486289e953ffd395afcd70a01693..00b1b60a84e9523c55483f00331052de51ab2ae2 100644 (file)
--- a/c++/src/objects/seq/so_map.cpp
+++ b/c++/src/objects/seq/so_map.cpp
@@ -1,4 +1,4 @@
-/*  $Id: so_map.cpp 607816 2020-05-07 19:01:26Z ivanov $
+/*  $Id: so_map.cpp 617358 2020-09-30 12:55:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -65,6 +65,32 @@ bool CompareNoCase::operator()(
      return (tolower(*pLhs) < tolower(*pRhs)); 
  }
  
+//  ----------------------------------------------------------------------------
+string GetUnambiguousNamedQual(
+    const CSeq_feat& feature, 
+    const string& qualName)
+//  ----------------------------------------------------------------------------
+{
+    string namedQual;
+    const auto& quals = feature.GetQual();
+    for (const auto& qual: quals) {
+        if (!qual->CanGetQual()  ||  !qual->CanGetVal()) {
+            continue;
+        }
+        if (qual->GetQual() != qualName) {
+            continue;
+        }
+        if (namedQual.empty()) {
+            namedQual = qual->GetVal();
+            continue;
+        }
+        if (namedQual != qual->GetVal()) {
+            return "";
+        }
+    }
+    return namedQual;
+}
+
  //  ----------------------------------------------------------------------------
  CSoMap::TYPEMAP CSoMap::mMapSoTypeToId;
  //  ----------------------------------------------------------------------------
@@ -440,7 +466,7 @@ bool CSoMap::xFeatureMakeNcRna(
      CSeq_feat& feature)
  //  ----------------------------------------------------------------------------
  {
-    static const map<string, string, CompareNoCase> mTypeToClass = {
+    static const TYPEMAP mTypeToClass = {
          {"ncRNA", "other"},
      };
      feature.SetData().SetRna().SetType(CRNA_ref::eType_ncRNA);
@@ -491,7 +517,7 @@ bool CSoMap::xFeatureMakeMiscFeature(
      CSeq_feat& feature)
  //  ----------------------------------------------------------------------------
  {
-    static const map<string, string, CompareNoCase> mapTypeToQual = {
+    static const TYPEMAP mapTypeToQual = {
          {"TSS", "transcription_start_site"},
      };
      feature.SetData().SetImp().SetKey("misc_feature");
@@ -517,7 +543,7 @@ bool CSoMap::xFeatureMakeMiscRecomb(
      CSeq_feat& feature)
  //  ----------------------------------------------------------------------------
  {
-    static const map<string, string, CompareNoCase> mapTypeToQual = {
+    static const TYPEMAP mapTypeToQual = {
          {"meiotic_recombination_region", "meiotic"},
          {"mitotic_recombination_region", "mitotic"},
          {"non_allelic_homologous_recombination", "non_allelic_homologous"},
@@ -556,7 +582,7 @@ bool CSoMap::xFeatureMakeImp(
      CSeq_feat& feature)
  //  ----------------------------------------------------------------------------
  {
-    static const map<string, string, CompareNoCase> mapTypeToKey = {
+    static const TYPEMAP mapTypeToKey = {
          {"C_gene_segment", "C_region"},
          {"D_gene_segment", "D_segment"},
          {"D_loop", "D-loop"},
@@ -612,7 +638,7 @@ bool CSoMap::xFeatureMakeRegulatory(
      CSeq_feat& feature)
  //  ----------------------------------------------------------------------------
  {
-    static const map<string, string, CompareNoCase> mapTypeToQual = {
+    static const TYPEMAP mapTypeToQual = {
          {"DNAsel_hypersensitive_site", "DNase_I_hypersensitive_site"}, 
          {"GC_rich_promoter_region", "GC_signal"},
          {"boundary_element", "insulator"},
@@ -639,12 +665,12 @@ bool CSoMap::xFeatureMakeRepeatRegion(
      CSeq_feat& feature)
  //  ----------------------------------------------------------------------------
  {
-    static const map<string, string, CompareNoCase> mapTypeToSatellite = {
+    static const TYPEMAP mapTypeToSatellite = {
          {"microsatellite", "microsatellite"},
          {"minisatellite", "minisatellite"},
          {"satellite_DNA", "satellite"},
      };
-    static const map<string, string, CompareNoCase> mapTypeToRptType = {
+    static const TYPEMAP mapTypeToRptType = {
          {"tandem_repeat", "tandem"},
          {"inverted_repeat", "inverted"},
          {"direct_repeat", "direct"},
@@ -814,7 +840,7 @@ bool CSoMap::xMapGeneric(
          {CSeqFeatData::eSubtype_primer_bind, "primer_binding_site"}, 
          {CSeqFeatData::eSubtype_promoter, "promoter"}, 
          {CSeqFeatData::eSubtype_propeptide, "propeptide"}, 
-        {CSeqFeatData::eSubtype_prot, "protein"},
+        {CSeqFeatData::eSubtype_prot, "polypeptide"},
          {CSeqFeatData::eSubtype_protein_bind, "protein_binding_site"},
          {CSeqFeatData::eSubtype_rep_origin, "origin_of_replication"},
          {CSeqFeatData::eSubtype_S_region, "S_region"},
@@ -849,7 +875,7 @@ bool CSoMap::xMapRegion(
      string& so_type)
  //  ----------------------------------------------------------------------------
  {
-    so_type = "region";
+    so_type = "biological_region";
      return true;
  }
  
@@ -952,11 +978,11 @@ bool CSoMap::xMapMiscFeature(
      string& so_type)
  //  ----------------------------------------------------------------------------
  {
-    map<string, string> mapFeatClassToSoType = {
+    static const TYPEMAP mapFeatClassToSoType = {
          {"transcription_start_site", "TSS"},
          {"other", "sequence_feature"},
      };
-    string feat_class = feature.GetNamedQual("feat_class");
+    string feat_class = GetUnambiguousNamedQual(feature, "feat_class");
      if (feat_class.empty()) {
          so_type = "sequence_feature";
          return true;
@@ -976,7 +1002,7 @@ bool CSoMap::xMapMiscRecomb(
      string& so_type)
  //  ----------------------------------------------------------------------------
  {
-    map<string, string> mapRecombClassToSoType = {
+    static const TYPEMAP mapRecombClassToSoType = {
          {"meiotic", "meiotic_recombination_region"},
          {"mitotic", "mitotic_recombination_region"},
          {"non_allelic_homologous", "non_allelic_homologous_recombination_region"},
@@ -985,7 +1011,7 @@ bool CSoMap::xMapMiscRecomb(
          {"non_allelic_homologous_recombination", "non_allelic_homologous_recombination_region"},
          {"other", "recombination_feature"},
      };
-    string recomb_class = feature.GetNamedQual("recombination_class");
+    string recomb_class = GetUnambiguousNamedQual(feature, "recombination_class");
      if (recomb_class.empty()) {
          so_type = "recombination_feature";
          return true;
@@ -1022,7 +1048,7 @@ bool CSoMap::xMapNcRna(
      string& so_type)
  //  ----------------------------------------------------------------------------
  {
-    map<string, string> mapNcRnaClassToSoType = {
+    static const TYPEMAP mapNcRnaClassToSoType = {
          {"antisense_RNA", "antisense_RNA"},
          {"autocatalytically_spliced_intron", "autocatalytically_spliced_intron"},
          {"guide_RNA", "guide_RNA"},
@@ -1044,7 +1070,7 @@ bool CSoMap::xMapNcRna(
          {"vault_RNA", "vault_RNA"},
          {"Y_RNA", "Y_RNA"},
      };
-    string ncrna_class = feature.GetNamedQual("ncRNA_class");
+    string ncrna_class = GetUnambiguousNamedQual(feature, "ncRNA_class");
      if (ncrna_class.empty()) {
          if (feature.IsSetData()  &&
                  feature.GetData().IsRna()  &&
@@ -1084,8 +1110,8 @@ bool CSoMap::xMapRegulatory(
      string& so_type)
  //  ----------------------------------------------------------------------------
  {
-    map<string, string> mapRegulatoryClassToSoType = {
-        {"DNase_I_hypersensitive_site", "DNAseI_hypersensitive_site"},
+    static const TYPEMAP mapRegulatoryClassToSoType = {
+        {"DNase_I_hypersensitive_site", "DNaseI_hypersensitive_site"},
          {"GC_signal", "GC_rich_promoter_region"},
          {"enhancer_blocking_element", "enhancer_blocking_element"},
          {"epigenetically_modified_region", "epigenetically_modified_region"},
@@ -1096,7 +1122,7 @@ bool CSoMap::xMapRegulatory(
          {"ribosome_binding_site", "ribosome_entry_site"},
      };
  
-    string regulatory_class = feature.GetNamedQual("regulatory_class");
+    string regulatory_class = GetUnambiguousNamedQual(feature, "regulatory_class");
      if (regulatory_class.empty()) {
          so_type = "regulatory_region";
          return true;
@@ -1124,11 +1150,11 @@ bool CSoMap::xMapBond(
      string& so_type)
  //  ----------------------------------------------------------------------------
  {
-    map<string, string> mapBondTypeToSoType = {
+    static const TYPEMAP mapBondTypeToSoType = {
          {"disulfide", "disulfide_bond"},
          {"xlink", "cross_link"},
      };
-    string bond_type = feature.GetNamedQual("bond_type");
+    string bond_type = GetUnambiguousNamedQual(feature, "bond_type");
      if (bond_type.empty()) {
          return false;
      }
@@ -1141,18 +1167,19 @@ bool CSoMap::xMapBond(
      return true;
  }
  
+
  //  ----------------------------------------------------------------------------
  bool CSoMap::xMapRepeatRegion(
      const CSeq_feat& feature,
      string& so_type)
  //  ----------------------------------------------------------------------------
  {
-    map<string, string> mapSatelliteToSoType = {
+    static const TYPEMAP mapSatelliteToSoType = {
          {"satellite", "satellite_DNA"},
          {"microsatellite", "microsatellite"},
          {"minisatellite", "minisatellite"},
      };
-    string satellite = feature.GetNamedQual("satellite");
+    string satellite = GetUnambiguousNamedQual(feature, "satellite");
      if (!satellite.empty()) {
          auto cit = mapSatelliteToSoType.find(satellite);
          if (cit == mapSatelliteToSoType.end()) {
@@ -1162,7 +1189,7 @@ bool CSoMap::xMapRepeatRegion(
          return true;
      }
  
-    map<string, string> mapRptTypeToSoType = {
+    static const TYPEMAP mapRptTypeToSoType = {
          {"tandem", "tandem_repeat"},
          {"inverted", "inverted_repeat"},
          {"flanking", "repeat_region"},
@@ -1175,7 +1202,7 @@ bool CSoMap::xMapRepeatRegion(
          {"y_prime_element", "Y_prime_element"},
          {"other", "repeat_region"},
      };
-    string rpt_type = feature.GetNamedQual("rpt_type");
+    string rpt_type = GetUnambiguousNamedQual(feature, "rpt_type");
      if (rpt_type.empty()) {
          so_type = "repeat_region";
          return true;
diff --git a/c++/src/objects/seqfeat/OrgMod.cpp b/c++/src/objects/seqfeat/OrgMod.cpp

index 993fc0c408c8b49a46422fe5199efd5f9e23fe21..fd204e140716010c3e4bf17e005934c2fdf3014c 100644 (file)
--- a/c++/src/objects/seqfeat/OrgMod.cpp
+++ b/c++/src/objects/seqfeat/OrgMod.cpp
@@ -1,4 +1,4 @@
-/* $Id: OrgMod.cpp 602802 2020-03-02 23:09:16Z kans $
+/* $Id: OrgMod.cpp 613887 2020-08-13 18:36:41Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -67,7 +67,8 @@ COrgMod::TSubtype COrgMod::GetSubtypeValue(const string& str,
      replace(name.begin(), name.end(), ' ', '-');
      
      if (name == "note" ||
-        NStr::EqualNocase(name, "orgmod-note")) {
+        NStr::EqualNocase(name, "orgmod-note") ||
+        NStr::EqualNocase(name, "note-orgmod")) {
          return eSubtype_other;
      } else if (vocabulary == eVocabulary_insdc) {
          if (name == "host" || name == "specific-host") {
@@ -90,7 +91,8 @@ bool COrgMod::IsValidSubtypeName(const string& str,
      replace(name.begin(), name.end(), ' ', '-');
  
      if (name == "note" ||
-        name == "orgmod-note") {
+        name == "orgmod-note" ||
+        name == "note-orgmod") {
          return true;
      } else if (vocabulary == eVocabulary_insdc) {
          if (name == "host" || name == "sub-strain") {
diff --git a/c++/src/objects/seqfeat/SeqFeatData.cpp b/c++/src/objects/seqfeat/SeqFeatData.cpp

index 90251befe9e3fffaaad31848de3b229edc4dc3d6..3f55821b9aa526e15e08759f6b351132dfe15133 100644 (file)
--- a/c++/src/objects/seqfeat/SeqFeatData.cpp
+++ b/c++/src/objects/seqfeat/SeqFeatData.cpp
@@ -1,4 +1,4 @@
-/* $Id: SeqFeatData.cpp 599381 2019-12-26 23:31:18Z kans $
+/* $Id: SeqFeatData.cpp 613780 2020-08-12 16:42:40Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -169,7 +169,7 @@ static const TInfoPair kInfoPairs[] = {
      FEAT_INFO_PAIR(Txinit, txinit, "TxInit", "promoter"),
      FEAT_INFO_PAIR(Num, num, "Num", "misc_feature"),
      FEAT_INFO_PAIR(Psec_str, psec_str, "SecStr", "SecStr"),
-    FEAT_INFO_PAIR(Non_std_residue, non_std_residue, "NonStdRes", "misc_feature"),
+    FEAT_INFO_PAIR(Non_std_residue, non_std_residue, "NonStdRes", "NonStdRes"),
      FEAT_INFO_PAIR(Het, het, "Het", "Het"),
      FEAT_INFO_PAIR(Biosrc, biosrc, "Src", "source"),
      FEAT_INFO_PAIR(Clone, clone, "CloneRef", "misc_feature"),
@@ -2646,8 +2646,30 @@ const CSeqFeatData::TSubTypeQualifiersMap& CSeqFeatData::s_GetLegalQualMap() noe
             eQual_usedin,
  } },
  
-//{ eSubtype_non_std_residue, {
-//},
+{ eSubtype_non_std_residue, {
+           eQual_allele,
+           eQual_citation,
+           eQual_db_xref,
+           eQual_exception,
+           eQual_experiment,
+           eQual_function,
+           eQual_gene,
+           eQual_gene_synonym,
+           eQual_inference,
+           eQual_label,
+           eQual_locus_tag,
+           eQual_map,
+           eQual_non_std_residue,
+           eQual_note,
+           eQual_number,
+           eQual_old_locus_tag,
+           eQual_phenotype,
+           eQual_product,
+           eQual_pseudo,
+           eQual_pseudogene,
+           eQual_standard_name,
+           eQual_usedin,
+} },
  
  //sameasmisc_feature
  { eSubtype_het, {
@@ -3063,6 +3085,7 @@ MAKE_TWOWAY_CONST_MAP(sc_QualPairs, CSeqFeatData::EQualifier, ct::tagStrNocase,
      { CSeqFeatData::eQual_mol_type, "mol_type" },
      { CSeqFeatData::eQual_name, "name" },
      { CSeqFeatData::eQual_nomenclature, "nomenclature" },
+    { CSeqFeatData::eQual_non_std_residue, "non_std_residue" },
      { CSeqFeatData::eQual_ncRNA_class, "ncRNA_class" },
      { CSeqFeatData::eQual_note, "note" },
      { CSeqFeatData::eQual_number, "number" },
@@ -4428,6 +4451,7 @@ CSeqFeatData::EFeatureLocationAllowed CSeqFeatData::AllowedFeatureLocation(ESubt
          case eSubtype_propeptide_aa:
          case eSubtype_bond:
          case eSubtype_psec_str:
+        case eSubtype_non_std_residue:
              rval = eFeatureLocationAllowed_ProtOnly;
              break;
          case eSubtype_region:
diff --git a/c++/src/objects/seqfeat/SubSource.cpp b/c++/src/objects/seqfeat/SubSource.cpp

index a0a811b95c86001f317eee3e75aa0b7e31d7957c..8aa5ed46eb8492ee7a1bd2358b910d677e3b5cf1 100644 (file)
--- a/c++/src/objects/seqfeat/SubSource.cpp
+++ b/c++/src/objects/seqfeat/SubSource.cpp
@@ -1,4 +1,4 @@
-/* $Id: SubSource.cpp 605788 2020-04-15 14:55:53Z ivanov $
+/* $Id: SubSource.cpp 615787 2020-09-03 18:18:36Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -97,7 +97,8 @@ CSubSource::TSubtype CSubSource::GetSubtypeValue(const string& str,
  
      if ( NStr::EqualNocase(name, "note") ||
           NStr::EqualNocase(name, "subsource-note") ||
-         NStr::EqualNocase(name, "subsrc-note")) {
+         NStr::EqualNocase(name, "subsrc-note") ||
+         NStr::EqualNocase(name, "note-subsource")) {
          return eSubtype_other;
      } else if (vocabulary == eVocabulary_insdc) {
          // consider a table if more special cases arise.
@@ -126,7 +127,8 @@ bool CSubSource::IsValidSubtypeName(const string& str,
  
      if ( NStr::EqualNocase(name, "note") ||
           NStr::EqualNocase(name, "subsource-note") ||
-         NStr::EqualNocase(name, "subsrc-note")) {
+         NStr::EqualNocase(name, "subsrc-note") ||
+         NStr::EqualNocase(name, "note-subsource")) {
           return true;
      }
      if (vocabulary == eVocabulary_insdc) {
@@ -2084,15 +2086,6 @@ string CSubSource::ValidateLatLonCountry (const string& input_countryname, strin
          return kEmptyStr;
      }
  
-
-    if (NStr::EqualNocase (country, "China") && NStr::EqualNocase (cguess, "Hong Kong")) {
-        delete id;
-        return kEmptyStr;
-    }
-    if (NStr::EqualNocase (country, "USA") && NStr::EqualNocase (cguess, "Puerto Rico")) {
-        delete id;
-        return kEmptyStr;
-    }
      if (NStr::EqualNocase (country, "State of Palestine") &&
          (NStr::EqualNocase (cguess, "Gaza Strip") ||
           NStr::EqualNocase (cguess, "West Bank"))) {
@@ -2645,11 +2638,12 @@ bool CSubSource::IsEndogenousVirusNameValid(const string& value)
  //   7.        Spaces and other printable characters are permitted
  //   8.        Must not contain the word "plasmid" (ignoring case)
  //   9.        Must not contain the word "chromosome" (ignoring case)
-//   10.       Must not contain the phrase "linkage group" (ignoring case)
-//   11.       Must not contain the series of letters "chr" (ignoring case)
-//   12.       Must not contain the taxname (ignoring case)
-//   14.  Must not contain the genus (ignoring case)
+//   10. Must not contain the phrase "linkage group" (ignoring case)
+//   11. Must not contain the series of letters "chr" (ignoring case)
+//   12. Must not contain the taxname (ignoring case)
+//   14. Must not contain the genus (ignoring case)
  //   15. Must not contain the species (ignoring case)
+//       except allow the species to match the value after an initial 'p' (e.g., JX416328)
  //   16. Must not contain the series of letters "chrm" (ignoring case)
  //   17. Must not contain the series of letters "chrom" (ignoring case)
  //   18. Must not contain the phrase "linkage-group" (ignoring case)
@@ -2676,13 +2670,18 @@ bool CSubSource::x_MeetsCommonChromosomeLinkageGroupPlasmidNameRules(const strin
          }
          size_t pos = NStr::Find(taxname, " ");
          if (pos != NPOS) {
-            if (NStr::FindNoCase(value, taxname.substr(0, pos)) != NPOS) {
+            string genus = taxname.substr(0, pos);
+            if (NStr::FindNoCase(value, genus) != NPOS) {
                  // B.14
                  return false;
              }
-            if (NStr::FindNoCase(value, taxname.substr(pos + 1)) != NPOS) {
-                // B.15
-                return false;
+            string species = taxname.substr(pos + 1);
+            pos = NStr::FindNoCase(value, species);
+            if (pos != NPOS) {
+                if (pos != 1 || value[0] != 'p') {
+                    // B.15
+                    return false;
+                }
              }
          }
      }
diff --git a/c++/src/objects/seqfeat/ecnum_ambiguous.inc b/c++/src/objects/seqfeat/ecnum_ambiguous.inc

index e8b03951d961ee0bff3c08da4611b9bc8772ca54..5ce04fbf32f86dff74dadf67f815dd06422d362e 100644 (file)
--- a/c++/src/objects/seqfeat/ecnum_ambiguous.inc
+++ b/c++/src/objects/seqfeat/ecnum_ambiguous.inc
@@ -1,4 +1,4 @@
-/*  $Id: ecnum_ambiguous.inc 578243 2019-01-15 21:20:22Z kans $
+/*  $Id: ecnum_ambiguous.inc 615790 2020-09-03 18:19:26Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -45,6 +45,8 @@ static const char* const kECNum_ambiguous[] = {
      "1.1.4.n\tWith a disulfide as acceptor",
      "1.1.5.-\tWith a quinone or similar compound as acceptor",
      "1.1.5.n\tWith a quinone or similar compound as acceptor",
+    "1.1.7.-\tWith an iron-sulfur protein as acceptor",
+    "1.1.7.n\tWith an iron-sulfur protein as acceptor",
      "1.1.9.-\tWith a copper protein as acceptor",
      "1.1.9.n\tWith a copper protein as acceptor",
      "1.1.98.-\tWith other, known, acceptors",
@@ -763,6 +765,8 @@ static const char* const kECNum_ambiguous[] = {
      "6.2.n.n\tForming carbon-sulfur bonds",
      "6.2.1.-\tAcid--thiol ligases",
      "6.2.1.n\tAcid--thiol ligases",
+    "6.2.2.-\tAmide--thiol ligases",
+    "6.2.2.n\tAmide--thiol ligases",
      "6.3.-.-\tForming carbon-nitrogen bonds",
      "6.3.n.n\tForming carbon-nitrogen bonds",
      "6.3.1.-\tAcid--ammonia (or amine) ligases (amide synthases)",
diff --git a/c++/src/objects/seqfeat/ecnum_ambiguous.txt b/c++/src/objects/seqfeat/ecnum_ambiguous.txt

index 08af4f2cf0175edb78a76f5f8275dcf41bc375d4..f5731d2836e1e5f225441fa6853872c5a9380ee4 100644 (file)
--- a/c++/src/objects/seqfeat/ecnum_ambiguous.txt
+++ b/c++/src/objects/seqfeat/ecnum_ambiguous.txt
@@ -12,6 +12,8 @@
  1.1.4.n        With a disulfide as acceptor
  1.1.5.-        With a quinone or similar compound as acceptor
  1.1.5.n        With a quinone or similar compound as acceptor
+1.1.7.-        With an iron-sulfur protein as acceptor
+1.1.7.n        With an iron-sulfur protein as acceptor
  1.1.9.-        With a copper protein as acceptor
  1.1.9.n        With a copper protein as acceptor
  1.1.98.-       With other, known, acceptors
@@ -730,6 +732,8 @@
  6.2.n.n        Forming carbon-sulfur bonds
  6.2.1.-        Acid--thiol ligases
  6.2.1.n        Acid--thiol ligases
+6.2.2.-        Amide--thiol ligases
+6.2.2.n        Amide--thiol ligases
  6.3.-.-        Forming carbon-nitrogen bonds
  6.3.n.n        Forming carbon-nitrogen bonds
  6.3.1.-        Acid--ammonia (or amine) ligases (amide synthases)
diff --git a/c++/src/objects/seqfeat/ecnum_replaced.inc b/c++/src/objects/seqfeat/ecnum_replaced.inc

index 6247a763f2f5cfad2ae5faf20fa6ee0c7739410f..385a290fd0340f8f1f9a3644caee007eca6a4551 100644 (file)
--- a/c++/src/objects/seqfeat/ecnum_replaced.inc
+++ b/c++/src/objects/seqfeat/ecnum_replaced.inc
@@ -1,4 +1,4 @@
-/*  $Id: ecnum_replaced.inc 604099 2020-03-23 12:20:07Z ivanov $
+/*  $Id: ecnum_replaced.inc 612554 2020-07-23 15:34:08Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -119,6 +119,7 @@ static const char* const kECNum_replaced[] = {
      "1.3.1.52\t1.3.8.5",
      "1.3.1.63\t1.21.1.2",
      "1.3.1.80\t1.3.7.12",
+    "1.3.1.99\t1.3.1.122",
      "1.3.1.n1\t1.3.1.87",
      "1.3.1.n2\t1.14.19.52",
      "1.3.2.1\t1.3.8.1",
@@ -230,6 +231,7 @@ static const char* const kECNum_replaced[] = {
      "1.8.6.1\t2.5.1.18",
      "1.8.99.3\t1.8.99.5",
      "1.8.99.4\t1.8.4.8",
+    "1.9.3.1\t7.1.1.9",
      "1.9.3.2\t1.7.2.1",
      "1.9.99.1\t1.9.98.1",
      "1.10.2.2\t7.1.1.8",
@@ -244,6 +246,7 @@ static const char* const kECNum_replaced[] = {
      "1.10.99.2\t1.10.5.1",
      "1.10.99.3\t1.23.5.1",
      "1.11.1.4\t1.13.11.11",
+    "1.11.1.15\t1.11.1.24",
      "1.12.1.1\t1.12.7.2",
      "1.12.7.1\t1.12.7.2",
      "1.12.99.1\t1.12.98.1",
@@ -696,7 +699,7 @@ static const char* const kECNum_replaced[] = {
      "2.7.7.17\t4.6.1.19",
      "2.7.7.21\t2.7.7.72",
      "2.7.7.25\t2.7.7.72",
-    "2.7.7.26\t3.1.27.3",
+    "2.7.7.26\t4.6.1.24",
      "2.7.7.29\t2.7.7.28",
      "2.7.7.54\t6.3.2.40",
      "2.7.7.55\t6.3.2.40",
@@ -742,7 +745,7 @@ static const char* const kECNum_replaced[] = {
      "3.1.4.5\t3.1.21.1",
      "3.1.4.6\t3.1.22.1",
      "3.1.4.7\t3.1.31.1",
-    "3.1.4.8\t3.1.27.3",
+    "3.1.4.8\t4.6.1.24",
      "3.1.4.9\t3.1.30.2",
      "3.1.4.10\t4.6.1.13",
      "3.1.4.15\t2.7.7.89",
@@ -762,6 +765,9 @@ static const char* const kECNum_replaced[] = {
      "3.1.4.n1\t3.1.4.53",
      "3.1.7.4\t4.2.1.133\t4.2.3.141",
      "3.1.7.7\t4.2.3.194",
+    "3.1.11.7\t3.6.1.71",
+    "3.1.11.8\t3.6.1.70",
+    "3.1.12.2\t3.6.1.72",
      "3.1.22.3\t3.1.21.7",
      "3.1.23.1\t3.1.21.4",
      "3.1.23.2\t3.1.21.4",
@@ -830,6 +836,7 @@ static const char* const kECNum_replaced[] = {
      "3.1.26.n1\t3.1.26.12",
      "3.1.27.1\t4.6.1.19",
      "3.1.27.2\t4.6.1.22",
+    "3.1.27.3\t4.6.1.24",
      "3.1.27.4\t4.6.1.20",
      "3.1.27.5\t4.6.1.18",
      "3.1.27.6\t4.6.1.21",
@@ -840,6 +847,7 @@ static const char* const kECNum_replaced[] = {
      "3.2.1.29\t3.2.1.52",
      "3.2.1.30\t3.2.1.52",
      "3.2.1.34\t3.2.1.35",
+    "3.2.1.44\t3.2.1.211",
      "3.2.1.69\t3.2.1.41",
      "3.2.1.79\t3.2.1.55",
      "3.2.1.110\t3.2.1.97",
@@ -1098,6 +1106,7 @@ static const char* const kECNum_replaced[] = {
      "4.1.2.31\t4.1.3.16",
      "4.1.2.37\t4.1.2.46\t4.1.2.47",
      "4.1.2.39\t4.1.2.46\t4.1.2.47",
+    "4.1.2.41\t4.1.2.61",
      "4.1.2.n1\t4.1.2.44",
      "4.1.2.n3\t4.1.2.53",
      "4.1.2.n4\t4.1.2.52",
diff --git a/c++/src/objects/seqfeat/ecnum_replaced.txt b/c++/src/objects/seqfeat/ecnum_replaced.txt

index 4381f940670345025a539d49bd5159f99ae89fcf..256739191c641e611155059c68fe4d846672bf2f 100644 (file)
--- a/c++/src/objects/seqfeat/ecnum_replaced.txt
+++ b/c++/src/objects/seqfeat/ecnum_replaced.txt
@@ -86,6 +86,7 @@
  1.3.1.52       1.3.8.5
  1.3.1.63       1.21.1.2
  1.3.1.80       1.3.7.12
+1.3.1.99       1.3.1.122
  1.3.1.n1       1.3.1.87
  1.3.1.n2       1.14.19.52
  1.3.2.1        1.3.8.1
@@ -197,6 +198,7 @@
  1.8.6.1        2.5.1.18
  1.8.99.3       1.8.99.5
  1.8.99.4       1.8.4.8
+1.9.3.1        7.1.1.9
  1.9.3.2        1.7.2.1
  1.9.99.1       1.9.98.1
  1.10.2.2       7.1.1.8
@@ -211,6 +213,7 @@
  1.10.99.2      1.10.5.1
  1.10.99.3      1.23.5.1
  1.11.1.4       1.13.11.11
+1.11.1.15      1.11.1.24
  1.12.1.1       1.12.7.2
  1.12.7.1       1.12.7.2
  1.12.99.1      1.12.98.1
@@ -663,7 +666,7 @@
  2.7.7.17       4.6.1.19
  2.7.7.21       2.7.7.72
  2.7.7.25       2.7.7.72
-2.7.7.26       3.1.27.3
+2.7.7.26       4.6.1.24
  2.7.7.29       2.7.7.28
  2.7.7.54       6.3.2.40
  2.7.7.55       6.3.2.40
@@ -709,7 +712,7 @@
  3.1.4.5        3.1.21.1
  3.1.4.6        3.1.22.1
  3.1.4.7        3.1.31.1
-3.1.4.8        3.1.27.3
+3.1.4.8        4.6.1.24
  3.1.4.9        3.1.30.2
  3.1.4.10       4.6.1.13
  3.1.4.15       2.7.7.89
@@ -729,6 +732,9 @@
  3.1.4.n1       3.1.4.53
  3.1.7.4        4.2.1.133       4.2.3.141
  3.1.7.7        4.2.3.194
+3.1.11.7       3.6.1.71
+3.1.11.8       3.6.1.70
+3.1.12.2       3.6.1.72
  3.1.22.3       3.1.21.7
  3.1.23.1       3.1.21.4
  3.1.23.2       3.1.21.4
@@ -797,6 +803,7 @@
  3.1.26.n1      3.1.26.12
  3.1.27.1       4.6.1.19
  3.1.27.2       4.6.1.22
+3.1.27.3       4.6.1.24
  3.1.27.4       4.6.1.20
  3.1.27.5       4.6.1.18
  3.1.27.6       4.6.1.21
@@ -807,6 +814,7 @@
  3.2.1.29       3.2.1.52
  3.2.1.30       3.2.1.52
  3.2.1.34       3.2.1.35
+3.2.1.44       3.2.1.211
  3.2.1.69       3.2.1.41
  3.2.1.79       3.2.1.55
  3.2.1.110      3.2.1.97
@@ -1065,6 +1073,7 @@
  4.1.2.31       4.1.3.16
  4.1.2.37       4.1.2.46        4.1.2.47
  4.1.2.39       4.1.2.46        4.1.2.47
+4.1.2.41       4.1.2.61
  4.1.2.n1       4.1.2.44
  4.1.2.n3       4.1.2.53
  4.1.2.n4       4.1.2.52
diff --git a/c++/src/objects/seqfeat/ecnum_specific.inc b/c++/src/objects/seqfeat/ecnum_specific.inc

index 31b7c274481de0c6b2b1dcd64c10c8550499569e..f5a794121fc4b505e506a22f353bfc1136c3d3b0 100644 (file)
--- a/c++/src/objects/seqfeat/ecnum_specific.inc
+++ b/c++/src/objects/seqfeat/ecnum_specific.inc
@@ -1,4 +1,4 @@
-/*  $Id: ecnum_specific.inc 604099 2020-03-23 12:20:07Z ivanov $
+/*  $Id: ecnum_specific.inc 615790 2020-09-03 18:19:26Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -429,6 +429,10 @@ static const char* const kECNum_specific[] = {
      "1.1.1.417\t3-beta-hydroxysteroid-4-beta-carboxylate 3-dehydrogenase (decarboxylating)",
      "1.1.1.418\tPlant 3-beta-hydroxysteroid-4-alpha-carboxylate 3-dehydrogenase (decarboxylating)",
      "1.1.1.419\tNepetalactol dehydrogenase",
+    "1.1.1.420\tD-apiose dehydrogenase",
+    "1.1.1.421\tD-apionate oxidoisomerase",
+    "1.1.1.422\tPseudoephedrine dehydrogenase",
+    "1.1.1.423\tEphedrine dehydrogenase",
      "1.1.1.n4\t(-)-trans-carveol dehydrogenase",
      "1.1.1.n5\t3-methylmalate dehydrogenase",
      "1.1.1.n11\tSuccinic semialdehyde reductase",
@@ -441,6 +445,7 @@ static const char* const kECNum_specific[] = {
      "1.1.2.7\tMethanol dehydrogenase (cytochrome c)",
      "1.1.2.8\tAlcohol dehydrogenase (cytochrome c)",
      "1.1.2.9\t1-butanol dehydrogenase (cytochrome c)",
+    "1.1.2.10\tLanthanide-dependent methanol dehydrogenase",
      "1.1.3.4\tGlucose oxidase",
      "1.1.3.5\tHexose oxidase",
      "1.1.3.6\tCholesterol oxidase",
@@ -495,6 +500,7 @@ static const char* const kECNum_specific[] = {
      "1.1.98.4\tF420H(2):quinone oxidoreductase",
      "1.1.98.5\tSecondary-alcohol dehydrogenase (coenzyme-F420)",
      "1.1.98.6\tRibonucleoside-triphosphate reductase (formate)",
+    "1.1.98.7\tSerine-type anaerobic sulfatase-maturating enzyme",
      "1.1.99.1\tCholine dehydrogenase",
      "1.1.99.2\tL-2-hydroxyglutarate dehydrogenase",
      "1.1.99.3\tGluconate 2-dehydrogenase (acceptor)",
@@ -614,6 +620,7 @@ static const char* const kECNum_specific[] = {
      "1.2.1.100\t5-formyl-3-hydroxy-2-methylpyridine 4-carboxylate 5-dehydrogenase",
      "1.2.1.101\tL-tyrosine reductase",
      "1.2.1.102\tIsopyridoxal dehydrogenase (5-pyridoxate-forming)",
+    "1.2.1.103\t[Amino-group carrier protein]-6-phospho-L-2-aminoadipate reductase",
      "1.2.1.n2\tFatty acyl-CoA reductase",
      "1.2.2.1\tFormate dehydrogenase (cytochrome)",
      "1.2.2.4\tCarbon-monoxide dehydrogenase (cytochrome b-561)",
@@ -678,7 +685,7 @@ static const char* const kECNum_specific[] = {
      "1.3.1.31\t2-enoate reductase",
      "1.3.1.32\tMaleylacetate reductase",
      "1.3.1.33\tProtochlorophyllide reductase",
-    "1.3.1.34\t2,4-dienoyl-CoA reductase (NADPH)",
+    "1.3.1.34\t2,4-dienoyl-CoA reductase ((2E)-enoyl-CoA-producing)",
      "1.3.1.36\tGeissoschizine dehydrogenase",
      "1.3.1.37\tCis-2-enoyl-CoA reductase (NADPH)",
      "1.3.1.38\tTrans-2-enoyl-CoA reductase (NADPH)",
@@ -735,7 +742,6 @@ static const char* const kECNum_specific[] = {
      "1.3.1.96\tBotryococcus squalene synthase",
      "1.3.1.97\tBotryococcene synthase",
      "1.3.1.98\tUDP-N-acetylmuramate dehydrogenase",
-    "1.3.1.99\tIridoid synthase",
      "1.3.1.100\tChanoclavine-I aldehyde reductase",
      "1.3.1.101\t2,3-bis-O-geranylgeranyl-sn-glycerol 1-phosphate reductase (NAD(P)H)",
      "1.3.1.102\t2-alkenal reductase (NADP(+))",
@@ -757,6 +763,10 @@ static const char* const kECNum_specific[] = {
      "1.3.1.118\tMeromycolic acid enoyl-[acyl-carrier-protein] reductase",
      "1.3.1.119\tChlorobenzene dihydrodiol dehydrogenase",
      "1.3.1.120\tCyclohexane-1-carbonyl-CoA reductase (NADP(+))",
+    "1.3.1.121\t4-amino-4-deoxyprephenate dehydrogenase",
+    "1.3.1.122\t(S)-8-oxocitronellyl enol synthase",
+    "1.3.1.123\t7-epi-iridoid synthase",
+    "1.3.1.124\t2,4-dienoyl-CoA reductase ((3E)-enoyl-CoA-producing)",
      "1.3.1.n3\tCurcumin reductase",
      "1.3.2.3\tL-galactonolactone dehydrogenase",
      "1.3.3.3\tCoproporphyrinogen oxidase",
@@ -806,6 +816,7 @@ static const char* const kECNum_specific[] = {
      "1.3.8.12\t(2S)-methylsuccinyl-CoA dehydrogenase",
      "1.3.8.13\tCrotonobetainyl-CoA reductase",
      "1.3.8.14\tL-prolyl-[peptidyl-carrier protein] dehydrogenase",
+    "1.3.8.15\t3-(aryl)acrylate reductase",
      "1.3.98.1\tDihydroorotate oxidase (fumarate)",
      "1.3.98.3\tCoproporphyrinogen dehydrogenase",
      "1.3.98.4\t5a,11a-dehydrotetracycline reductase",
@@ -1084,6 +1095,8 @@ static const char* const kECNum_specific[] = {
      "1.8.4.12\tPeptide-methionine (R)-S-oxide reductase",
      "1.8.4.13\tL-methionine (S)-S-oxide reductase",
      "1.8.4.14\tL-methionine (R)-S-oxide reductase",
+    "1.8.4.15\tProtein dithiol oxidoreductase (disulfide-forming)",
+    "1.8.4.16\tThioredoxin:protein disulfide reductase",
      "1.8.5.1\tGlutathione dehydrogenase (ascorbate)",
      "1.8.5.2\tThiosulfate dehydrogenase (quinone)",
      "1.8.5.3\tRespiratory dimethylsulfoxide reductase",
@@ -1092,6 +1105,7 @@ static const char* const kECNum_specific[] = {
      "1.8.5.6\tSulfite dehydrogenase (quinone)",
      "1.8.5.7\tGlutathionyl-hydroquinone reductase",
      "1.8.5.8\tEukaryotic sulfide quinone oxidoreductase",
+    "1.8.5.9\tProtein dithiol:quinone oxidoreductase DsbB",
      "1.8.7.1\tAssimilatory sulfite reductase (ferredoxin)",
      "1.8.7.2\tFerredoxin:thioredoxin reductase",
      "1.8.7.3\tFerredoxin:CoB-CoM heterodisulfide reductase",
@@ -1101,9 +1115,9 @@ static const char* const kECNum_specific[] = {
      "1.8.98.4\tCoenzyme F420:CoB-CoM heterodisulfide,ferredoxin reductase",
      "1.8.98.5\tH(2):CoB-CoM heterodisulfide,ferredoxin reductase",
      "1.8.98.6\tFormate:CoB-CoM heterodisulfide,ferredoxin reductase",
+    "1.8.98.7\tCysteine-type anaerobic sulfatase-maturating enzyme",
      "1.8.99.2\tAdenylyl-sulfate reductase",
      "1.8.99.5\tDissimilatory sulfite reductase",
-    "1.9.3.1\tCytochrome-c oxidase",
      "1.9.6.1\tNitrate reductase (cytochrome)",
      "1.9.98.1\tIron--cytochrome-c reductase",
      "1.10.1.1\tTrans-acenaphthene-1,2-diol dehydrogenase",
@@ -1118,6 +1132,7 @@ static const char* const kECNum_specific[] = {
      "1.10.3.11\tUbiquinol oxidase (non-electrogenic)",
      "1.10.3.15\tGrixazone synthase",
      "1.10.3.16\tDihydrophenazinedicarboxylate synthase",
+    "1.10.3.17\tSuperoxide oxidase",
      "1.10.5.1\tRibosyldihydronicotinamide dehydrogenase (quinone)",
      "1.11.1.1\tNADH peroxidase",
      "1.11.1.2\tNADPH peroxidase",
@@ -1132,7 +1147,6 @@ static const char* const kECNum_specific[] = {
      "1.11.1.12\tPhospholipid-hydroperoxide glutathione peroxidase",
      "1.11.1.13\tManganese peroxidase",
      "1.11.1.14\tLignin peroxidase",
-    "1.11.1.15\tPeroxiredoxin",
      "1.11.1.16\tVersatile peroxidase",
      "1.11.1.17\tGlutathione amide-dependent peroxidase",
      "1.11.1.18\tBromide peroxidase",
@@ -1141,6 +1155,12 @@ static const char* const kECNum_specific[] = {
      "1.11.1.21\tCatalase peroxidase",
      "1.11.1.22\tHydroperoxy fatty acid reductase",
      "1.11.1.23\t(S)-2-hydroxypropylphosphonic acid epoxidase",
+    "1.11.1.24\tThioredoxin-dependent peroxiredoxin",
+    "1.11.1.25\tGlutaredoxin-dependent peroxiredoxin",
+    "1.11.1.26\tNADH-dependent peroxiredoxin",
+    "1.11.1.27\tGlutathione-dependent peroxiredoxin",
+    "1.11.1.28\tLipoyl-dependent peroxiredoxin",
+    "1.11.1.29\tMycoredoxin-dependent peroxiredoxin",
      "1.11.2.1\tUnspecific peroxygenase",
      "1.11.2.2\tMyeloperoxidase",
      "1.11.2.3\tPlant seed peroxygenase",
@@ -1325,6 +1345,8 @@ static const char* const kECNum_specific[] = {
      "1.14.11.67\t[Histone H3]-trimethyl-L-lysine(4) demethylase",
      "1.14.11.68\t[Histone H3]-trimethyl-L-lysine(27) demethylase",
      "1.14.11.69\t[Histone H3]-trimethyl-L-lysine(36) demethylase",
+    "1.14.11.70\t7-deoxycylindrospermopsin hydroxylase",
+    "1.14.11.71\tMethylphosphonate hydroxylase",
      "1.14.11.n2\tMethylcytosine dioxygenase",
      "1.14.11.n4\tAnkyrin-repeat-histidine dioxagenase",
      "1.14.12.1\tAnthranilate 1,2-dioxygenase (deaminating, decarboxylating)",
@@ -1471,6 +1493,7 @@ static const char* const kECNum_specific[] = {
      "1.14.13.244\tPhenol 2-monooxygenase (NADH)",
      "1.14.13.245\tAssimilatory dimethylsulfide S-monooxygenase",
      "1.14.13.246\t4-beta-methylsterol monooxygenase",
+    "1.14.13.247\tStachydrine N-demethylase",
      "1.14.13.n6\tHexahomomethionine N-hydroxylase",
      "1.14.13.n7\t4-nitrophenol 2-hydroxylase",
      "1.14.14.1\tUnspecific monooxygenase",
@@ -1696,6 +1719,7 @@ static const char* const kECNum_specific[] = {
      "1.14.18.9\t4-alpha-methylsterol monooxygenase",
      "1.14.18.10\tPlant 4,4-dimethylsterol C-4-alpha-methyl-monooxygenase",
      "1.14.18.11\tPlant 4-alpha-monomethylsterol monooxygenase",
+    "1.14.18.12\t2-hydroxy fatty acid dioxygenase",
      "1.14.19.1\tStearoyl-CoA 9-desaturase",
      "1.14.19.2\tStearoyl-[acyl-carrier-protein] 9-desaturase",
      "1.14.19.3\tAcyl-CoA 6-desaturase",
@@ -1881,6 +1905,7 @@ static const char* const kECNum_specific[] = {
      "1.17.99.4\tUracil/thymine dehydrogenase",
      "1.17.99.6\tEpoxyqueuosine reductase",
      "1.17.99.7\tFormate dehydrogenase (acceptor)",
+    "1.17.99.8\tLimonene dehydrogenase",
      "1.18.1.1\tRubredoxin--NAD(+) reductase",
      "1.18.1.2\tFerredoxin--NADP(+) reductase",
      "1.18.1.3\tFerredoxin--NAD(+) reductase",
@@ -1894,7 +1919,7 @@ static const char* const kECNum_specific[] = {
      "1.19.6.1\tNitrogenase (flavodoxin)",
      "1.20.1.1\tPhosphonate dehydrogenase",
      "1.20.2.1\tArsenate reductase (cytochrome c)",
-    "1.20.4.1\tArsenate reductase (glutaredoxin)",
+    "1.20.4.1\tArsenate reductase (glutathione/glutaredoxin)",
      "1.20.4.2\tMethylarsonate reductase",
      "1.20.4.3\tMycoredoxin",
      "1.20.4.4\tArsenate reductase (thioredoxin)",
@@ -2270,13 +2295,14 @@ static const char* const kECNum_specific[] = {
      "2.1.1.360\t[Histone H3]-lysine(79) N-trimethyltransferase",
      "2.1.1.361\t[Histone H4]-lysine(20) N-methyltransferase",
      "2.1.1.362\t[Histone H4]-N-methyl-L-lysine(20) N-methyltransferase",
+    "2.1.1.363\tPre-sodorifen synthase",
      "2.1.1.n1\tResorcinol O-methyltransferase",
      "2.1.1.n4\tThiocyanate methyltransferase",
      "2.1.1.n7\t5-pentadecatrienyl resorcinol O-methyltransferase",
      "2.1.1.n8\tSmall RNA 2'-O-methyltransferase",
      "2.1.1.n11\tMethylphosphotriester-DNA--[protein]-cysteine S-methyltransferase",
      "2.1.2.1\tGlycine hydroxymethyltransferase",
-    "2.1.2.2\tPhosphoribosylglycinamide formyltransferase",
+    "2.1.2.2\tPhosphoribosylglycinamide formyltransferase 1",
      "2.1.2.3\tPhosphoribosylaminoimidazolecarboxamide formyltransferase",
      "2.1.2.4\tGlycine formimidoyltransferase",
      "2.1.2.5\tGlutamate formimidoyltransferase",
@@ -2548,7 +2574,7 @@ static const char* const kECNum_specific[] = {
      "2.3.1.242\tKdo(2)-lipid IV(A) palmitoleoyltransferase",
      "2.3.1.243\tLauroyl-Kdo(2)-lipid IV(A) myristoyltransferase",
      "2.3.1.244\t2-methylbutanoate polyketide synthase",
-    "2.3.1.245\t3-hydroxy-5-phosphonooxypentane-2,4-dione thiolase",
+    "2.3.1.245\t3-hydroxy-5-phosphooxypentane-2,4-dione thiolase",
      "2.3.1.246\t3,5-dihydroxyphenylacetyl-CoA synthase",
      "2.3.1.247\t3-keto-5-aminohexanoate cleavage enzyme",
      "2.3.1.248\tSpermidine disinapoyl transferase",
@@ -2597,6 +2623,8 @@ static const char* const kECNum_specific[] = {
      "2.3.1.291\tSphingoid base N-palmitoyltransferase",
      "2.3.1.292\t(Phenol)carboxyphthiodiolenone synthase",
      "2.3.1.293\tMeromycolic acid 3-oxoacyl-(acyl carrier protein) synthase I",
+    "2.3.1.294\tMeromycolic acid 3-oxoacyl-(acyl carrier protein) synthase II",
+    "2.3.1.295\tMycoketide-CoA synthase",
      "2.3.1.296\tOmega-hydroxyceramide transacylase",
      "2.3.1.297\tVery-long-chain ceramide synthase",
      "2.3.1.298\tUltra-long-chain ceramide synthase",
@@ -2637,6 +2665,7 @@ static const char* const kECNum_specific[] = {
      "2.3.2.30\tL-ornithine N(alpha)-acyltransferase",
      "2.3.2.31\tRBR-type E3 ubiquitin transferase",
      "2.3.2.32\tCullin-RING-type E3 NEDD8 transferase",
+    "2.3.2.33\tRCR-type E3 ubiquitin transferase",
      "2.3.3.1\tCitrate (Si)-synthase",
      "2.3.3.2\tDecylcitrate synthase",
      "2.3.3.3\tCitrate (Re)-synthase",
@@ -2993,6 +3022,10 @@ static const char* const kECNum_specific[] = {
      "2.4.1.368\tOleanolate 3-O-glucosyltransferase",
      "2.4.1.369\tEnterobactin C-glucosyltransferase",
      "2.4.1.370\tInositol phosphorylceramide mannosyltransferase",
+    "2.4.1.371\tPolymannosyl GlcNAc-diphospho-ditrans,octacis-undecaprenol 2,3-alpha-mannosylpolymerase",
+    "2.4.1.372\tMutansucrase",
+    "2.4.1.373\tAlpha-(1->2) branching sucrase",
+    "2.4.1.374\tBeta-1,2-mannooligosaccharide synthase",
      "2.4.1.n2\tLoliose synthase",
      "2.4.2.1\tPurine-nucleoside phosphorylase",
      "2.4.2.2\tPyrimidine-nucleoside phosphorylase",
@@ -3053,7 +3086,7 @@ static const char* const kECNum_specific[] = {
      "2.4.2.60\tCysteine-dependent adenosine diphosphate thiazole synthase",
      "2.4.2.61\tAlpha-dystroglycan beta-1,4-xylosyltransferase",
      "2.4.2.n2\tGlucoside xylosyltransferase",
-    "2.4.2.n3\tXyloside xylosyltransferase",
+    "2.4.2.n3\tXylosyl alpha-1,3-xylosyltransferase",
      "2.4.99.1\tBeta-galactoside alpha-(2,6)-sialyltransferase",
      "2.4.99.2\tBeta-D-galactosyl-(1->3)-N-acetyl-beta-D-galactosaminide alpha-2,3-sialyltransferase",
      "2.4.99.3\tAlpha-N-acetylgalactosaminide alpha-2,6-sialyltransferase",
@@ -3325,6 +3358,7 @@ static const char* const kECNum_specific[] = {
      "2.6.1.115\t5-hydroxydodecatetraenal 1-aminotransferase",
      "2.6.1.116\t6-aminohexanoate aminotransferase",
      "2.6.1.117\tL-glutamine--4-(methylsulfanyl)-2-oxobutanoate aminotransferase",
+    "2.6.1.118\t[Amino group carrier protein]-gamma-(L-lysyl)-L-glutamate aminotransferase",
      "2.6.3.1\tOximinotransferase",
      "2.6.99.1\tdATP(dGTP)--DNA purinetransferase",
      "2.6.99.2\tPyridoxine 5'-phosphate synthase",
@@ -3373,7 +3407,7 @@ static const char* const kECNum_specific[] = {
      "2.7.1.45\t2-dehydro-3-deoxygluconokinase",
      "2.7.1.46\tL-arabinokinase",
      "2.7.1.47\tD-ribulokinase",
-    "2.7.1.48\tUridine kinase",
+    "2.7.1.48\tUridine/cytidine kinase",
      "2.7.1.49\tHydroxymethylpyrimidine kinase",
      "2.7.1.50\tHydroxyethylthiazole kinase",
      "2.7.1.51\tL-fuculokinase",
@@ -3443,7 +3477,7 @@ static const char* const kECNum_specific[] = {
      "2.7.1.144\tTagatose-6-phosphate kinase",
      "2.7.1.145\tDeoxynucleoside kinase",
      "2.7.1.146\tADP-specific phosphofructokinase",
-    "2.7.1.147\tADP-specific glucokinase",
+    "2.7.1.147\tADP-specific glucose/glucosamine kinase",
      "2.7.1.148\t4-(cytidine 5'-diphospho)-2-C-methyl-D-erythritol kinase",
      "2.7.1.149\t1-phosphatidylinositol-5-phosphate 4-kinase",
      "2.7.1.150\t1-phosphatidylinositol-3-phosphate 5-kinase",
@@ -3524,6 +3558,7 @@ static const char* const kECNum_specific[] = {
      "2.7.1.227\tInositol phosphorylceramide synthase",
      "2.7.1.228\tMannosyl-inositol-phosphoceramide inositolphosphotransferase",
      "2.7.1.229\tDeoxyribokinase",
+    "2.7.1.230\tAmicoumacin kinase",
      "2.7.2.1\tAcetate kinase",
      "2.7.2.2\tCarbamate kinase",
      "2.7.2.3\tPhosphoglycerate kinase",
@@ -3537,6 +3572,8 @@ static const char* const kECNum_specific[] = {
      "2.7.2.13\tGlutamate 1-kinase",
      "2.7.2.14\tBranched-chain-fatty-acid kinase",
      "2.7.2.15\tPropionate kinase",
+    "2.7.2.16\t2-phosphoglycerate kinase",
+    "2.7.2.17\t[Amino-group carrier protein]-L-2-aminoadipate 6-kinase",
      "2.7.3.1\tGuanidinoacetate kinase",
      "2.7.3.2\tCreatine kinase",
      "2.7.3.3\tArginine kinase",
@@ -3846,6 +3883,7 @@ static const char* const kECNum_specific[] = {
      "2.8.3.23\tCaffeate CoA-transferase",
      "2.8.3.24\t(R)-2-hydroxy-4-methylpentanoate CoA-transferase",
      "2.8.3.25\tBile acid CoA-transferase",
+    "2.8.3.26\tSuccinyl-CoA:mesaconate CoA transferase",
      "2.8.4.1\tCoenzyme-B sulfoethylthiotransferase",
      "2.8.4.2\tArsenate-mycothiol transferase",
      "2.8.4.3\ttRNA-2-methylthio-N(6)-dimethylallyladenosine synthase",
@@ -3855,6 +3893,7 @@ static const char* const kECNum_specific[] = {
      "2.8.5.2\tL-cysteine S-thiosulfotransferase",
      "2.9.1.1\tL-seryl-tRNA(Sec) selenium transferase",
      "2.9.1.2\tO-phospho-L-seryl-tRNA(Sec):L-selenocysteinyl-tRNA synthase",
+    "2.9.1.3\ttRNA 2-selenouridine synthase",
      "2.10.1.1\tMolybdopterin molybdotransferase",
      "3.1.1.1\tCarboxylesterase",
      "3.1.1.2\tArylesterase",
@@ -4092,6 +4131,8 @@ static const char* const kECNum_specific[] = {
      "3.1.3.104\t5-amino-6-(5-phospho-D-ribitylamino)uracil phosphatase",
      "3.1.3.105\tN-acetyl-D-muramate 6-phosphate phosphatase",
      "3.1.3.106\t2-lysophosphatidate phosphatase",
+    "3.1.3.107\tAmicoumacin phosphatase",
+    "3.1.3.108\tNocturnin",
      "3.1.4.1\tPhosphodiesterase I",
      "3.1.4.2\tGlycerophosphocholine phosphodiesterase",
      "3.1.4.3\tPhospholipase C",
@@ -4164,10 +4205,7 @@ static const char* const kECNum_specific[] = {
      "3.1.11.4\tExodeoxyribonuclease (phage SP3-induced)",
      "3.1.11.5\tExodeoxyribonuclease V",
      "3.1.11.6\tExodeoxyribonuclease VII",
-    "3.1.11.7\tAdenosine-5'-diphospho-5'-(DNA) diphosphatase",
-    "3.1.11.8\tGuaosine-5'-diphospho-5'-(DNA) diphosphatase",
      "3.1.12.1\t5' to 3' exodeoxyribonuclease (nucleoside 3'-phosphate-forming)",
-    "3.1.12.2\tDNA-3'-diphospho-5'-guanosine diphosphatase",
      "3.1.13.1\tExoribonuclease II",
      "3.1.13.2\tExoribonuclease H",
      "3.1.13.3\tOligonucleotidase",
@@ -4204,7 +4242,6 @@ static const char* const kECNum_specific[] = {
      "3.1.26.12\tRibonuclease E",
      "3.1.26.13\tRetroviral ribonuclease H",
      "3.1.26.n2\tArgonaute-2",
-    "3.1.27.3\tRibonuclease T(1)",
      "3.1.27.7\tRibonuclease F",
      "3.1.27.8\tRibonuclease V",
      "3.1.30.1\tAspergillus nuclease S(1)",
@@ -4243,7 +4280,6 @@ static const char* const kECNum_specific[] = {
      "3.2.1.41\tPullulanase",
      "3.2.1.42\tGDP-glucosidase",
      "3.2.1.43\tBeta-L-rhamnosidase",
-    "3.2.1.44\tFucoidanase",
      "3.2.1.45\tGlucosylceramidase",
      "3.2.1.46\tGalactosylceramidase",
      "3.2.1.47\tGalactosylgalactosylglucosylceramidase",
@@ -4348,7 +4384,7 @@ static const char* const kECNum_specific[] = {
      "3.2.1.152\tMannosylglycoprotein endo-beta-mannosidase",
      "3.2.1.153\tFructan beta-(2,1)-fructosidase",
      "3.2.1.154\tFructan beta-(2,6)-fructosidase",
-    "3.2.1.155\tXyloglucan-specific exo-beta-1,4-glucanase",
+    "3.2.1.155\tXyloglucan-specific endo-processive beta-1,4-glucanase",
      "3.2.1.156\tOligosaccharide reducing-end xylanase",
      "3.2.1.157\tIota-carrageenase",
      "3.2.1.158\tAlpha-agarase",
@@ -4403,6 +4439,9 @@ static const char* const kECNum_specific[] = {
      "3.2.1.208\tGlucosylglycerate hydrolase",
      "3.2.1.209\tEndoplasmic reticulum Man(9)GlcNAc(2) 1,2-alpha-mannosidase",
      "3.2.1.210\tEndoplasmic reticulum Man(8)GlcNAc(2) 1,2-alpha-mannosidase",
+    "3.2.1.211\tEndo-(1->3)-fucoidanase",
+    "3.2.1.212\tEndo-(1->4)-fucoidanase",
+    "3.2.1.213\tGalactan exo-1,6-beta-galactobiohydrolase (non-reducing end)",
      "3.2.1.n1\tBlood group B branched chain alpha-1,3-galactosidase",
      "3.2.1.n2\tBlood group B linear chain alpha-1,3-galactosidase",
      "3.2.1.n3\tDictyostelium lysozyme A",
@@ -4525,6 +4564,7 @@ static const char* const kECNum_specific[] = {
      "3.4.17.21\tGlutamate carboxypeptidase II",
      "3.4.17.22\tMetallocarboxypeptidase D",
      "3.4.17.23\tAngiotensin-converting enzyme 2",
+    "3.4.17.24\tTubulin-glutamate carboxypeptidase",
      "3.4.18.1\tCathepsin X",
      "3.4.19.1\tAcylaminoacyl-peptidase",
      "3.4.19.2\tPeptidyl-glycinamidase",
@@ -4925,7 +4965,7 @@ static const char* const kECNum_specific[] = {
      "3.5.1.107\tMaleamate amidohydrolase",
      "3.5.1.108\tUDP-3-O-acyl-N-acetylglucosamine deacetylase",
      "3.5.1.109\tSphingomyelin deacylase",
-    "3.5.1.110\tPeroxyureidoacrylate/ureidoacrylate amidohydrolase",
+    "3.5.1.110\tUreidoacrylate amidohydrolase",
      "3.5.1.111\t2-oxoglutaramate amidase",
      "3.5.1.112\t2'-N-acetylparomamine deacetylase",
      "3.5.1.113\t2'''-acetyl-6'''-hydroxyneomycin C deacetylase",
@@ -4944,8 +4984,9 @@ static const char* const kECNum_specific[] = {
      "3.5.1.127\tJasmonoyl-L-amino acid hydrolase",
      "3.5.1.128\tDeaminated glutathione amidase",
      "3.5.1.129\tN(5)-(cytidine 5'-diphosphoramidyl)-L-glutamine hydrolase",
-    "3.5.1.130\t[Lysine-biosynthesis-protein LysW]-lysine/ornithine hydrolase",
+    "3.5.1.130\t[Amino group carrier protein]-lysine hydrolase",
      "3.5.1.131\t1-carboxybiuret hydrolase",
+    "3.5.1.132\t[Amino group carrier protein]-ornithine hydrolase",
      "3.5.1.133\tN(alpha)-acyl-L-glutamine aminoacylase",
      "3.5.1.134\t(Indol-3-yl)acetyl-L-aspartate hydrolase",
      "3.5.1.n3\t4-deoxy-4-formamido-L-arabinose-phosphoundecaprenol deformylase",
@@ -5106,6 +5147,11 @@ static const char* const kECNum_specific[] = {
      "3.6.1.66\tXTP/dITP diphosphatase",
      "3.6.1.67\tDihydroneopterin triphosphate diphosphatase",
      "3.6.1.68\tGeranyl diphosphate phosphohydrolase",
+    "3.6.1.69\t8-oxo-(d)GTP phosphatase",
+    "3.6.1.70\tGuaosine-5'-diphospho-5'-(DNA) diphosphatase",
+    "3.6.1.71\tAdenosine-5'-diphospho-5'-(DNA) diphosphatase",
+    "3.6.1.72\tDNA-3'-diphospho-5'-guanosine diphosphatase",
+    "3.6.1.73\tInosine/xanthosine triphosphatase",
      "3.6.1.n1\tD-tyrosyl-tRNA(Tyr) hydrolase",
      "3.6.1.n2\tL-cysteinyl-tRNA(Pro)",
      "3.6.1.n3\tL-cysteinyl-tRNA(Cys) hydrolase",
@@ -5147,6 +5193,7 @@ static const char* const kECNum_specific[] = {
      "3.7.1.23\tMaleylpyruvate hydrolase",
      "3.7.1.24\t2,4-diacetylphloroglucinol hydrolase",
      "3.7.1.25\t2-hydroxy-6-oxohepta-2,4-dienoate hydrolase",
+    "3.7.1.26\t2,4-didehydro-3-deoxy-L-rhamnonate hydrolase",
      "3.8.1.1\tAlkylhalidase",
      "3.8.1.2\t(S)-2-haloacid dehalogenase",
      "3.8.1.3\tHaloacetate dehalogenase",
@@ -5173,6 +5220,7 @@ static const char* const kECNum_specific[] = {
      "3.13.1.6\t[CysO sulfur-carrier protein]-S-L-cysteine hydrolase",
      "3.13.1.7\tCarbonyl sulfide hydrolase",
      "3.13.1.8\tS-adenosyl-L-methionine hydrolase (adenosine-forming)",
+    "3.13.1.9\tS-inosyl-L-homocysteine hydrolase",
      "4.1.1.1\tPyruvate decarboxylase",
      "4.1.1.2\tOxalate decarboxylase",
      "4.1.1.4\tAcetoacetate decarboxylase",
@@ -5283,6 +5331,7 @@ static const char* const kECNum_specific[] = {
      "4.1.1.116\tD-ornithine/D-lysine decarboxylase",
      "4.1.1.117\t2-((L-alanin-3-ylcarbamoyl)methyl)-2-hydroxybutanedioate decarboxylase",
      "4.1.1.118\tIsophthalyl-CoA decarboxylase",
+    "4.1.1.119\tPhenylacetate decarboxylase",
      "4.1.2.2\tKetotetrose-phosphate aldolase",
      "4.1.2.4\tDeoxyribose-phosphate aldolase",
      "4.1.2.5\tL-threonine aldolase",
@@ -5313,7 +5362,6 @@ static const char* const kECNum_specific[] = {
      "4.1.2.36\tLactate aldolase",
      "4.1.2.38\tBenzoin aldolase",
      "4.1.2.40\tTagatose-bisphosphate aldolase",
-    "4.1.2.41\tVanillin synthase",
      "4.1.2.42\tD-threonine aldolase",
      "4.1.2.43\t3-hexulose-6-phosphate synthase",
      "4.1.2.44\t2,3-epoxybenzoyl-CoA dihydrolase",
@@ -5333,6 +5381,7 @@ static const char* const kECNum_specific[] = {
      "4.1.2.58\t2-dehydro-3,6-dideoxy-6-sulfogluconate aldolase",
      "4.1.2.59\tDihydroneopterin phosphate aldolase",
      "4.1.2.60\tDihydroneopterin triphosphate aldolase",
+    "4.1.2.61\tFeruloyl-CoA hydratase/lyase",
      "4.1.2.n2\t2-hydroxyphytanoyl-CoA lyase",
      "4.1.3.1\tIsocitrate lyase",
      "4.1.3.3\tN-acetylneuraminate lyase",
@@ -5444,7 +5493,7 @@ static const char* const kECNum_specific[] = {
      "4.2.1.84\tNitrile hydratase",
      "4.2.1.85\tDimethylmaleate hydratase",
      "4.2.1.87\tOctopamine dehydratase",
-    "4.2.1.88\t(R)-synephrine",
+    "4.2.1.88\tSynephrine dehydratase",
      "4.2.1.90\tL-rhamnonate dehydratase",
      "4.2.1.91\tArogenate dehydratase",
      "4.2.1.92\tHydroperoxide dehydratase",
@@ -5555,6 +5604,7 @@ static const char* const kECNum_specific[] = {
      "4.2.2.24\tRhamnogalacturonan exolyase",
      "4.2.2.25\tGellan lyase",
      "4.2.2.26\tOligo-alginate lyase",
+    "4.2.2.27\tPectin monosaccharide-lyase",
      "4.2.2.n1\tPeptidoglycan lytic exotransglycosylase",
      "4.2.2.n2\tPeptidoglycan lytic endotransglycosylase",
      "4.2.3.1\tThreonine synthase",
@@ -5689,7 +5739,7 @@ static const char* const kECNum_specific[] = {
      "4.2.3.131\tMiltiradiene synthase",
      "4.2.3.132\tNeoabietadiene synthase",
      "4.2.3.133\tAlpha-copaene synthase",
-    "4.2.3.134\t5-phosphonooxy-L-lysine phospho-lyase",
+    "4.2.3.134\t5-phosphooxy-L-lysine phospho-lyase",
      "4.2.3.135\tDelta(6)-protoilludene synthase",
      "4.2.3.136\tAlpha-isocomene synthase",
      "4.2.3.137\t(E)-2-epi-beta-caryophyllene synthase",
@@ -5868,6 +5918,8 @@ static const char* const kECNum_specific[] = {
      "4.6.1.21\tEnterobacter ribonuclease",
      "4.6.1.22\tBacillus subtilis ribonuclease",
      "4.6.1.23\tRibotoxin",
+    "4.6.1.24\tRibonuclease T(1)",
+    "4.6.1.25\tBacteriophage T(4) restriction endoribonuclease RegB",
      "4.7.1.1\tAlpha-D-ribose 1-methylphosphonate 5-phosphate C-P-lyase",
      "4.99.1.1\tProtoporphyrin ferrochelatase",
      "4.99.1.2\tAlkylmercury lyase",
@@ -5999,7 +6051,7 @@ static const char* const kECNum_specific[] = {
      "5.3.1.29\tRibose 1,5-bisphosphate isomerase",
      "5.3.1.30\t5-deoxy-glucuronate isomerase",
      "5.3.1.31\tSulfoquinovose isomerase",
-    "5.3.1.32\t(4S)-4-hydroxy-5-phosphonooxypentane-2,3-dione isomerase",
+    "5.3.1.32\t(4S)-4-hydroxy-5-phosphooxypentane-2,3-dione isomerase",
      "5.3.1.33\tL-erythrulose 1-phosphate isomerase",
      "5.3.1.34\tD-erythrulose 4-phosphate isomerase",
      "5.3.1.35\t2-dehydrotetronate isomerase",
@@ -6171,6 +6223,8 @@ static const char* const kECNum_specific[] = {
      "5.5.1.31\tHapalindole H synthase",
      "5.5.1.32\t12-epi-hapalindole U synthase",
      "5.5.1.33\t12-epi-fischerindole U synthase",
+    "5.5.1.34\t(+)-cis,trans-nepetalactol synthase",
+    "5.5.1.35\t(+)-cis,cis-nepetalactol synthase",
      "5.6.1.1\tMicrotubule-severing ATPase",
      "5.6.1.2\tDynein ATPase",
      "5.6.1.3\tPlus-end-directed kinesin ATPase",
@@ -6270,6 +6324,9 @@ static const char* const kECNum_specific[] = {
      "6.2.1.58\tIsophthalate--CoA ligase",
      "6.2.1.59\tLong-chain fatty acid adenylase/transferase FadD26",
      "6.2.1.60\tMarinolic acid--CoA ligase",
+    "6.2.1.61\tSalicylate--[aryl-carrier protein] ligase",
+    "6.2.1.62\t3,4-dihydroxybenzoate--[aryl-carrier protein] ligase",
+    "6.2.1.63\tL-arginine--[L-arginyl-carrier protein] ligase",
      "6.2.1.n2\tAmino acid--[acyl-carrier-protein] ligase",
      "6.2.1.n3\tMalonate--CoA ligase",
      "6.3.1.1\tAspartate--ammonia ligase",
@@ -6326,7 +6383,7 @@ static const char* const kECNum_specific[] = {
      "6.3.2.40\tCyclopeptine synthase",
      "6.3.2.41\tN-acetylaspartylglutamate synthase",
      "6.3.2.42\tN-acetylaspartylglutamylglutamate synthase",
-    "6.3.2.43\t[Amino group carrier protein]--L-2-aminoadipate ligase",
+    "6.3.2.43\t[Amino-group carrier protein]--L-2-aminoadipate ligase",
      "6.3.2.44\tPantoate--beta-alanine ligase (ADP-forming)",
      "6.3.2.45\tUDP-N-acetylmuramate L-alanyl-gamma-D-glutamyl-meso-2,6-diaminoheptanedioate ligase",
      "6.3.2.46\tFumarate--(S)-2,3-diaminopropanoate ligase",
@@ -6335,7 +6392,7 @@ static const char* const kECNum_specific[] = {
      "6.3.2.49\tL-alanine--L-anticapsin ligase",
      "6.3.2.50\tTenuazonic acid synthetase",
      "6.3.2.51\tPhosphopantothenate--cysteine ligase (ATP)",
-    "6.3.2.52\tJasmonoyl--L-amino acid synthetase",
+    "6.3.2.52\tJasmonoyl--L-amino acid ligase",
      "6.3.2.53\tUDP-N-acetylmuramoyl-L-alanine--L-glutamate ligase",
      "6.3.2.54\tL-2,3-diaminopropanoate--citrate ligase",
      "6.3.2.55\t2-((L-alanin-3-ylcarbamoyl)methyl)-3-(2-aminoethylcarbamoyl)-2-hydroxypropanoate synthase",
@@ -6412,6 +6469,7 @@ static const char* const kECNum_specific[] = {
      "7.1.1.6\tPlastoquinol--plastocyanin reductase",
      "7.1.1.7\tUbiquinol oxidase (electrogenic, proton-motive force generating)",
      "7.1.1.8\tQuinol--cytochrome-c reductase",
+    "7.1.1.9\tCytochrome-c oxidase",
      "7.1.2.1\tP-type H(+)-exporting transporter",
      "7.1.2.2\tH(+)-transporting two-sector ATPase",
      "7.1.3.1\tH(+)-exporting diphosphatase",
@@ -6463,6 +6521,7 @@ static const char* const kECNum_specific[] = {
      "7.4.2.10\tABC-type glutathione transporter",
      "7.4.2.11\tABC-type methionine transporter",
      "7.4.2.12\tABC-type cystine transporter",
+    "7.4.2.13\tABC-type tyrosine transporter",
      "7.5.2.1\tABC-type maltose transporter",
      "7.5.2.2\tABC-type oligosaccharide transporter",
      "7.5.2.3\tABC-type beta-glucan transporter",
diff --git a/c++/src/objects/seqfeat/ecnum_specific.txt b/c++/src/objects/seqfeat/ecnum_specific.txt

index cc45423faf545a52c346fa30e0ca32315c81e27e..088ef8ec43d45302d716e4ad55468c7cdc178169 100644 (file)
--- a/c++/src/objects/seqfeat/ecnum_specific.txt
+++ b/c++/src/objects/seqfeat/ecnum_specific.txt
@@ -396,6 +396,10 @@
  1.1.1.417      3-beta-hydroxysteroid-4-beta-carboxylate 3-dehydrogenase (decarboxylating)
  1.1.1.418      Plant 3-beta-hydroxysteroid-4-alpha-carboxylate 3-dehydrogenase (decarboxylating)
  1.1.1.419      Nepetalactol dehydrogenase
+1.1.1.420      D-apiose dehydrogenase
+1.1.1.421      D-apionate oxidoisomerase
+1.1.1.422      Pseudoephedrine dehydrogenase
+1.1.1.423      Ephedrine dehydrogenase
  1.1.1.n4       (-)-trans-carveol dehydrogenase
  1.1.1.n5       3-methylmalate dehydrogenase
  1.1.1.n11      Succinic semialdehyde reductase
@@ -408,6 +412,7 @@
  1.1.2.7        Methanol dehydrogenase (cytochrome c)
  1.1.2.8        Alcohol dehydrogenase (cytochrome c)
  1.1.2.9        1-butanol dehydrogenase (cytochrome c)
+1.1.2.10       Lanthanide-dependent methanol dehydrogenase
  1.1.3.4        Glucose oxidase
  1.1.3.5        Hexose oxidase
  1.1.3.6        Cholesterol oxidase
@@ -462,6 +467,7 @@
  1.1.98.4       F420H(2):quinone oxidoreductase
  1.1.98.5       Secondary-alcohol dehydrogenase (coenzyme-F420)
  1.1.98.6       Ribonucleoside-triphosphate reductase (formate)
+1.1.98.7       Serine-type anaerobic sulfatase-maturating enzyme
  1.1.99.1       Choline dehydrogenase
  1.1.99.2       L-2-hydroxyglutarate dehydrogenase
  1.1.99.3       Gluconate 2-dehydrogenase (acceptor)
@@ -581,6 +587,7 @@
  1.2.1.100      5-formyl-3-hydroxy-2-methylpyridine 4-carboxylate 5-dehydrogenase
  1.2.1.101      L-tyrosine reductase
  1.2.1.102      Isopyridoxal dehydrogenase (5-pyridoxate-forming)
+1.2.1.103      [Amino-group carrier protein]-6-phospho-L-2-aminoadipate reductase
  1.2.1.n2       Fatty acyl-CoA reductase
  1.2.2.1        Formate dehydrogenase (cytochrome)
  1.2.2.4        Carbon-monoxide dehydrogenase (cytochrome b-561)
@@ -645,7 +652,7 @@
  1.3.1.31       2-enoate reductase
  1.3.1.32       Maleylacetate reductase
  1.3.1.33       Protochlorophyllide reductase
-1.3.1.34       2,4-dienoyl-CoA reductase (NADPH)
+1.3.1.34       2,4-dienoyl-CoA reductase ((2E)-enoyl-CoA-producing)
  1.3.1.36       Geissoschizine dehydrogenase
  1.3.1.37       Cis-2-enoyl-CoA reductase (NADPH)
  1.3.1.38       Trans-2-enoyl-CoA reductase (NADPH)
@@ -702,7 +709,6 @@
  1.3.1.96       Botryococcus squalene synthase
  1.3.1.97       Botryococcene synthase
  1.3.1.98       UDP-N-acetylmuramate dehydrogenase
-1.3.1.99       Iridoid synthase
  1.3.1.100      Chanoclavine-I aldehyde reductase
  1.3.1.101      2,3-bis-O-geranylgeranyl-sn-glycerol 1-phosphate reductase (NAD(P)H)
  1.3.1.102      2-alkenal reductase (NADP(+))
@@ -724,6 +730,10 @@
  1.3.1.118      Meromycolic acid enoyl-[acyl-carrier-protein] reductase
  1.3.1.119      Chlorobenzene dihydrodiol dehydrogenase
  1.3.1.120      Cyclohexane-1-carbonyl-CoA reductase (NADP(+))
+1.3.1.121      4-amino-4-deoxyprephenate dehydrogenase
+1.3.1.122      (S)-8-oxocitronellyl enol synthase
+1.3.1.123      7-epi-iridoid synthase
+1.3.1.124      2,4-dienoyl-CoA reductase ((3E)-enoyl-CoA-producing)
  1.3.1.n3       Curcumin reductase
  1.3.2.3        L-galactonolactone dehydrogenase
  1.3.3.3        Coproporphyrinogen oxidase
@@ -773,6 +783,7 @@
  1.3.8.12       (2S)-methylsuccinyl-CoA dehydrogenase
  1.3.8.13       Crotonobetainyl-CoA reductase
  1.3.8.14       L-prolyl-[peptidyl-carrier protein] dehydrogenase
+1.3.8.15       3-(aryl)acrylate reductase
  1.3.98.1       Dihydroorotate oxidase (fumarate)
  1.3.98.3       Coproporphyrinogen dehydrogenase
  1.3.98.4       5a,11a-dehydrotetracycline reductase
@@ -1051,6 +1062,8 @@
  1.8.4.12       Peptide-methionine (R)-S-oxide reductase
  1.8.4.13       L-methionine (S)-S-oxide reductase
  1.8.4.14       L-methionine (R)-S-oxide reductase
+1.8.4.15       Protein dithiol oxidoreductase (disulfide-forming)
+1.8.4.16       Thioredoxin:protein disulfide reductase
  1.8.5.1        Glutathione dehydrogenase (ascorbate)
  1.8.5.2        Thiosulfate dehydrogenase (quinone)
  1.8.5.3        Respiratory dimethylsulfoxide reductase
@@ -1059,6 +1072,7 @@
  1.8.5.6        Sulfite dehydrogenase (quinone)
  1.8.5.7        Glutathionyl-hydroquinone reductase
  1.8.5.8        Eukaryotic sulfide quinone oxidoreductase
+1.8.5.9        Protein dithiol:quinone oxidoreductase DsbB
  1.8.7.1        Assimilatory sulfite reductase (ferredoxin)
  1.8.7.2        Ferredoxin:thioredoxin reductase
  1.8.7.3        Ferredoxin:CoB-CoM heterodisulfide reductase
@@ -1068,9 +1082,9 @@
  1.8.98.4       Coenzyme F420:CoB-CoM heterodisulfide,ferredoxin reductase
  1.8.98.5       H(2):CoB-CoM heterodisulfide,ferredoxin reductase
  1.8.98.6       Formate:CoB-CoM heterodisulfide,ferredoxin reductase
+1.8.98.7       Cysteine-type anaerobic sulfatase-maturating enzyme
  1.8.99.2       Adenylyl-sulfate reductase
  1.8.99.5       Dissimilatory sulfite reductase
-1.9.3.1        Cytochrome-c oxidase
  1.9.6.1        Nitrate reductase (cytochrome)
  1.9.98.1       Iron--cytochrome-c reductase
  1.10.1.1       Trans-acenaphthene-1,2-diol dehydrogenase
@@ -1085,6 +1099,7 @@
  1.10.3.11      Ubiquinol oxidase (non-electrogenic)
  1.10.3.15      Grixazone synthase
  1.10.3.16      Dihydrophenazinedicarboxylate synthase
+1.10.3.17      Superoxide oxidase
  1.10.5.1       Ribosyldihydronicotinamide dehydrogenase (quinone)
  1.11.1.1       NADH peroxidase
  1.11.1.2       NADPH peroxidase
@@ -1099,7 +1114,6 @@
  1.11.1.12      Phospholipid-hydroperoxide glutathione peroxidase
  1.11.1.13      Manganese peroxidase
  1.11.1.14      Lignin peroxidase
-1.11.1.15      Peroxiredoxin
  1.11.1.16      Versatile peroxidase
  1.11.1.17      Glutathione amide-dependent peroxidase
  1.11.1.18      Bromide peroxidase
@@ -1108,6 +1122,12 @@
  1.11.1.21      Catalase peroxidase
  1.11.1.22      Hydroperoxy fatty acid reductase
  1.11.1.23      (S)-2-hydroxypropylphosphonic acid epoxidase
+1.11.1.24      Thioredoxin-dependent peroxiredoxin
+1.11.1.25      Glutaredoxin-dependent peroxiredoxin
+1.11.1.26      NADH-dependent peroxiredoxin
+1.11.1.27      Glutathione-dependent peroxiredoxin
+1.11.1.28      Lipoyl-dependent peroxiredoxin
+1.11.1.29      Mycoredoxin-dependent peroxiredoxin
  1.11.2.1       Unspecific peroxygenase
  1.11.2.2       Myeloperoxidase
  1.11.2.3       Plant seed peroxygenase
@@ -1292,6 +1312,8 @@
  1.14.11.67     [Histone H3]-trimethyl-L-lysine(4) demethylase
  1.14.11.68     [Histone H3]-trimethyl-L-lysine(27) demethylase
  1.14.11.69     [Histone H3]-trimethyl-L-lysine(36) demethylase
+1.14.11.70     7-deoxycylindrospermopsin hydroxylase
+1.14.11.71     Methylphosphonate hydroxylase
  1.14.11.n2     Methylcytosine dioxygenase
  1.14.11.n4     Ankyrin-repeat-histidine dioxagenase
  1.14.12.1      Anthranilate 1,2-dioxygenase (deaminating, decarboxylating)
@@ -1438,6 +1460,7 @@
  1.14.13.244    Phenol 2-monooxygenase (NADH)
  1.14.13.245    Assimilatory dimethylsulfide S-monooxygenase
  1.14.13.246    4-beta-methylsterol monooxygenase
+1.14.13.247    Stachydrine N-demethylase
  1.14.13.n6     Hexahomomethionine N-hydroxylase
  1.14.13.n7     4-nitrophenol 2-hydroxylase
  1.14.14.1      Unspecific monooxygenase
@@ -1663,6 +1686,7 @@
  1.14.18.9      4-alpha-methylsterol monooxygenase
  1.14.18.10     Plant 4,4-dimethylsterol C-4-alpha-methyl-monooxygenase
  1.14.18.11     Plant 4-alpha-monomethylsterol monooxygenase
+1.14.18.12     2-hydroxy fatty acid dioxygenase
  1.14.19.1      Stearoyl-CoA 9-desaturase
  1.14.19.2      Stearoyl-[acyl-carrier-protein] 9-desaturase
  1.14.19.3      Acyl-CoA 6-desaturase
@@ -1848,6 +1872,7 @@
  1.17.99.4      Uracil/thymine dehydrogenase
  1.17.99.6      Epoxyqueuosine reductase
  1.17.99.7      Formate dehydrogenase (acceptor)
+1.17.99.8      Limonene dehydrogenase
  1.18.1.1       Rubredoxin--NAD(+) reductase
  1.18.1.2       Ferredoxin--NADP(+) reductase
  1.18.1.3       Ferredoxin--NAD(+) reductase
@@ -1861,7 +1886,7 @@
  1.19.6.1       Nitrogenase (flavodoxin)
  1.20.1.1       Phosphonate dehydrogenase
  1.20.2.1       Arsenate reductase (cytochrome c)
-1.20.4.1       Arsenate reductase (glutaredoxin)
+1.20.4.1       Arsenate reductase (glutathione/glutaredoxin)
  1.20.4.2       Methylarsonate reductase
  1.20.4.3       Mycoredoxin
  1.20.4.4       Arsenate reductase (thioredoxin)
@@ -2237,13 +2262,14 @@
  2.1.1.360      [Histone H3]-lysine(79) N-trimethyltransferase
  2.1.1.361      [Histone H4]-lysine(20) N-methyltransferase
  2.1.1.362      [Histone H4]-N-methyl-L-lysine(20) N-methyltransferase
+2.1.1.363      Pre-sodorifen synthase
  2.1.1.n1       Resorcinol O-methyltransferase
  2.1.1.n4       Thiocyanate methyltransferase
  2.1.1.n7       5-pentadecatrienyl resorcinol O-methyltransferase
  2.1.1.n8       Small RNA 2'-O-methyltransferase
  2.1.1.n11      Methylphosphotriester-DNA--[protein]-cysteine S-methyltransferase
  2.1.2.1        Glycine hydroxymethyltransferase
-2.1.2.2        Phosphoribosylglycinamide formyltransferase
+2.1.2.2        Phosphoribosylglycinamide formyltransferase 1
  2.1.2.3        Phosphoribosylaminoimidazolecarboxamide formyltransferase
  2.1.2.4        Glycine formimidoyltransferase
  2.1.2.5        Glutamate formimidoyltransferase
@@ -2515,7 +2541,7 @@
  2.3.1.242      Kdo(2)-lipid IV(A) palmitoleoyltransferase
  2.3.1.243      Lauroyl-Kdo(2)-lipid IV(A) myristoyltransferase
  2.3.1.244      2-methylbutanoate polyketide synthase
-2.3.1.245      3-hydroxy-5-phosphonooxypentane-2,4-dione thiolase
+2.3.1.245      3-hydroxy-5-phosphooxypentane-2,4-dione thiolase
  2.3.1.246      3,5-dihydroxyphenylacetyl-CoA synthase
  2.3.1.247      3-keto-5-aminohexanoate cleavage enzyme
  2.3.1.248      Spermidine disinapoyl transferase
@@ -2564,6 +2590,8 @@
  2.3.1.291      Sphingoid base N-palmitoyltransferase
  2.3.1.292      (Phenol)carboxyphthiodiolenone synthase
  2.3.1.293      Meromycolic acid 3-oxoacyl-(acyl carrier protein) synthase I
+2.3.1.294      Meromycolic acid 3-oxoacyl-(acyl carrier protein) synthase II
+2.3.1.295      Mycoketide-CoA synthase
  2.3.1.296      Omega-hydroxyceramide transacylase
  2.3.1.297      Very-long-chain ceramide synthase
  2.3.1.298      Ultra-long-chain ceramide synthase
@@ -2604,6 +2632,7 @@
  2.3.2.30       L-ornithine N(alpha)-acyltransferase
  2.3.2.31       RBR-type E3 ubiquitin transferase
  2.3.2.32       Cullin-RING-type E3 NEDD8 transferase
+2.3.2.33       RCR-type E3 ubiquitin transferase
  2.3.3.1        Citrate (Si)-synthase
  2.3.3.2        Decylcitrate synthase
  2.3.3.3        Citrate (Re)-synthase
@@ -2960,6 +2989,10 @@
  2.4.1.368      Oleanolate 3-O-glucosyltransferase
  2.4.1.369      Enterobactin C-glucosyltransferase
  2.4.1.370      Inositol phosphorylceramide mannosyltransferase
+2.4.1.371      Polymannosyl GlcNAc-diphospho-ditrans,octacis-undecaprenol 2,3-alpha-mannosylpolymerase
+2.4.1.372      Mutansucrase
+2.4.1.373      Alpha-(1->2) branching sucrase
+2.4.1.374      Beta-1,2-mannooligosaccharide synthase
  2.4.1.n2       Loliose synthase
  2.4.2.1        Purine-nucleoside phosphorylase
  2.4.2.2        Pyrimidine-nucleoside phosphorylase
@@ -3020,7 +3053,7 @@
  2.4.2.60       Cysteine-dependent adenosine diphosphate thiazole synthase
  2.4.2.61       Alpha-dystroglycan beta-1,4-xylosyltransferase
  2.4.2.n2       Glucoside xylosyltransferase
-2.4.2.n3       Xyloside xylosyltransferase
+2.4.2.n3       Xylosyl alpha-1,3-xylosyltransferase
  2.4.99.1       Beta-galactoside alpha-(2,6)-sialyltransferase
  2.4.99.2       Beta-D-galactosyl-(1->3)-N-acetyl-beta-D-galactosaminide alpha-2,3-sialyltransferase
  2.4.99.3       Alpha-N-acetylgalactosaminide alpha-2,6-sialyltransferase
@@ -3292,6 +3325,7 @@
  2.6.1.115      5-hydroxydodecatetraenal 1-aminotransferase
  2.6.1.116      6-aminohexanoate aminotransferase
  2.6.1.117      L-glutamine--4-(methylsulfanyl)-2-oxobutanoate aminotransferase
+2.6.1.118      [Amino group carrier protein]-gamma-(L-lysyl)-L-glutamate aminotransferase
  2.6.3.1        Oximinotransferase
  2.6.99.1       dATP(dGTP)--DNA purinetransferase
  2.6.99.2       Pyridoxine 5'-phosphate synthase
@@ -3340,7 +3374,7 @@
  2.7.1.45       2-dehydro-3-deoxygluconokinase
  2.7.1.46       L-arabinokinase
  2.7.1.47       D-ribulokinase
-2.7.1.48       Uridine kinase
+2.7.1.48       Uridine/cytidine kinase
  2.7.1.49       Hydroxymethylpyrimidine kinase
  2.7.1.50       Hydroxyethylthiazole kinase
  2.7.1.51       L-fuculokinase
@@ -3410,7 +3444,7 @@
  2.7.1.144      Tagatose-6-phosphate kinase
  2.7.1.145      Deoxynucleoside kinase
  2.7.1.146      ADP-specific phosphofructokinase
-2.7.1.147      ADP-specific glucokinase
+2.7.1.147      ADP-specific glucose/glucosamine kinase
  2.7.1.148      4-(cytidine 5'-diphospho)-2-C-methyl-D-erythritol kinase
  2.7.1.149      1-phosphatidylinositol-5-phosphate 4-kinase
  2.7.1.150      1-phosphatidylinositol-3-phosphate 5-kinase
@@ -3491,6 +3525,7 @@
  2.7.1.227      Inositol phosphorylceramide synthase
  2.7.1.228      Mannosyl-inositol-phosphoceramide inositolphosphotransferase
  2.7.1.229      Deoxyribokinase
+2.7.1.230      Amicoumacin kinase
  2.7.2.1        Acetate kinase
  2.7.2.2        Carbamate kinase
  2.7.2.3        Phosphoglycerate kinase
@@ -3504,6 +3539,8 @@
  2.7.2.13       Glutamate 1-kinase
  2.7.2.14       Branched-chain-fatty-acid kinase
  2.7.2.15       Propionate kinase
+2.7.2.16       2-phosphoglycerate kinase
+2.7.2.17       [Amino-group carrier protein]-L-2-aminoadipate 6-kinase
  2.7.3.1        Guanidinoacetate kinase
  2.7.3.2        Creatine kinase
  2.7.3.3        Arginine kinase
@@ -3813,6 +3850,7 @@
  2.8.3.23       Caffeate CoA-transferase
  2.8.3.24       (R)-2-hydroxy-4-methylpentanoate CoA-transferase
  2.8.3.25       Bile acid CoA-transferase
+2.8.3.26       Succinyl-CoA:mesaconate CoA transferase
  2.8.4.1        Coenzyme-B sulfoethylthiotransferase
  2.8.4.2        Arsenate-mycothiol transferase
  2.8.4.3        tRNA-2-methylthio-N(6)-dimethylallyladenosine synthase
@@ -3822,6 +3860,7 @@
  2.8.5.2        L-cysteine S-thiosulfotransferase
  2.9.1.1        L-seryl-tRNA(Sec) selenium transferase
  2.9.1.2        O-phospho-L-seryl-tRNA(Sec):L-selenocysteinyl-tRNA synthase
+2.9.1.3        tRNA 2-selenouridine synthase
  2.10.1.1       Molybdopterin molybdotransferase
  3.1.1.1        Carboxylesterase
  3.1.1.2        Arylesterase
@@ -4059,6 +4098,8 @@
  3.1.3.104      5-amino-6-(5-phospho-D-ribitylamino)uracil phosphatase
  3.1.3.105      N-acetyl-D-muramate 6-phosphate phosphatase
  3.1.3.106      2-lysophosphatidate phosphatase
+3.1.3.107      Amicoumacin phosphatase
+3.1.3.108      Nocturnin
  3.1.4.1        Phosphodiesterase I
  3.1.4.2        Glycerophosphocholine phosphodiesterase
  3.1.4.3        Phospholipase C
@@ -4131,10 +4172,7 @@
  3.1.11.4       Exodeoxyribonuclease (phage SP3-induced)
  3.1.11.5       Exodeoxyribonuclease V
  3.1.11.6       Exodeoxyribonuclease VII
-3.1.11.7       Adenosine-5'-diphospho-5'-(DNA) diphosphatase
-3.1.11.8       Guaosine-5'-diphospho-5'-(DNA) diphosphatase
  3.1.12.1       5' to 3' exodeoxyribonuclease (nucleoside 3'-phosphate-forming)
-3.1.12.2       DNA-3'-diphospho-5'-guanosine diphosphatase
  3.1.13.1       Exoribonuclease II
  3.1.13.2       Exoribonuclease H
  3.1.13.3       Oligonucleotidase
@@ -4171,7 +4209,6 @@
  3.1.26.12      Ribonuclease E
  3.1.26.13      Retroviral ribonuclease H
  3.1.26.n2      Argonaute-2
-3.1.27.3       Ribonuclease T(1)
  3.1.27.7       Ribonuclease F
  3.1.27.8       Ribonuclease V
  3.1.30.1       Aspergillus nuclease S(1)
@@ -4210,7 +4247,6 @@
  3.2.1.41       Pullulanase
  3.2.1.42       GDP-glucosidase
  3.2.1.43       Beta-L-rhamnosidase
-3.2.1.44       Fucoidanase
  3.2.1.45       Glucosylceramidase
  3.2.1.46       Galactosylceramidase
  3.2.1.47       Galactosylgalactosylglucosylceramidase
@@ -4315,7 +4351,7 @@
  3.2.1.152      Mannosylglycoprotein endo-beta-mannosidase
  3.2.1.153      Fructan beta-(2,1)-fructosidase
  3.2.1.154      Fructan beta-(2,6)-fructosidase
-3.2.1.155      Xyloglucan-specific exo-beta-1,4-glucanase
+3.2.1.155      Xyloglucan-specific endo-processive beta-1,4-glucanase
  3.2.1.156      Oligosaccharide reducing-end xylanase
  3.2.1.157      Iota-carrageenase
  3.2.1.158      Alpha-agarase
@@ -4370,6 +4406,9 @@
  3.2.1.208      Glucosylglycerate hydrolase
  3.2.1.209      Endoplasmic reticulum Man(9)GlcNAc(2) 1,2-alpha-mannosidase
  3.2.1.210      Endoplasmic reticulum Man(8)GlcNAc(2) 1,2-alpha-mannosidase
+3.2.1.211      Endo-(1->3)-fucoidanase
+3.2.1.212      Endo-(1->4)-fucoidanase
+3.2.1.213      Galactan exo-1,6-beta-galactobiohydrolase (non-reducing end)
  3.2.1.n1       Blood group B branched chain alpha-1,3-galactosidase
  3.2.1.n2       Blood group B linear chain alpha-1,3-galactosidase
  3.2.1.n3       Dictyostelium lysozyme A
@@ -4492,6 +4531,7 @@
  3.4.17.21      Glutamate carboxypeptidase II
  3.4.17.22      Metallocarboxypeptidase D
  3.4.17.23      Angiotensin-converting enzyme 2
+3.4.17.24      Tubulin-glutamate carboxypeptidase
  3.4.18.1       Cathepsin X
  3.4.19.1       Acylaminoacyl-peptidase
  3.4.19.2       Peptidyl-glycinamidase
@@ -4892,7 +4932,7 @@
  3.5.1.107      Maleamate amidohydrolase
  3.5.1.108      UDP-3-O-acyl-N-acetylglucosamine deacetylase
  3.5.1.109      Sphingomyelin deacylase
-3.5.1.110      Peroxyureidoacrylate/ureidoacrylate amidohydrolase
+3.5.1.110      Ureidoacrylate amidohydrolase
  3.5.1.111      2-oxoglutaramate amidase
  3.5.1.112      2'-N-acetylparomamine deacetylase
  3.5.1.113      2'''-acetyl-6'''-hydroxyneomycin C deacetylase
@@ -4911,8 +4951,9 @@
  3.5.1.127      Jasmonoyl-L-amino acid hydrolase
  3.5.1.128      Deaminated glutathione amidase
  3.5.1.129      N(5)-(cytidine 5'-diphosphoramidyl)-L-glutamine hydrolase
-3.5.1.130      [Lysine-biosynthesis-protein LysW]-lysine/ornithine hydrolase
+3.5.1.130      [Amino group carrier protein]-lysine hydrolase
  3.5.1.131      1-carboxybiuret hydrolase
+3.5.1.132      [Amino group carrier protein]-ornithine hydrolase
  3.5.1.133      N(alpha)-acyl-L-glutamine aminoacylase
  3.5.1.134      (Indol-3-yl)acetyl-L-aspartate hydrolase
  3.5.1.n3       4-deoxy-4-formamido-L-arabinose-phosphoundecaprenol deformylase
@@ -5073,6 +5114,11 @@
  3.6.1.66       XTP/dITP diphosphatase
  3.6.1.67       Dihydroneopterin triphosphate diphosphatase
  3.6.1.68       Geranyl diphosphate phosphohydrolase
+3.6.1.69       8-oxo-(d)GTP phosphatase
+3.6.1.70       Guaosine-5'-diphospho-5'-(DNA) diphosphatase
+3.6.1.71       Adenosine-5'-diphospho-5'-(DNA) diphosphatase
+3.6.1.72       DNA-3'-diphospho-5'-guanosine diphosphatase
+3.6.1.73       Inosine/xanthosine triphosphatase
  3.6.1.n1       D-tyrosyl-tRNA(Tyr) hydrolase
  3.6.1.n2       L-cysteinyl-tRNA(Pro)
  3.6.1.n3       L-cysteinyl-tRNA(Cys) hydrolase
@@ -5114,6 +5160,7 @@
  3.7.1.23       Maleylpyruvate hydrolase
  3.7.1.24       2,4-diacetylphloroglucinol hydrolase
  3.7.1.25       2-hydroxy-6-oxohepta-2,4-dienoate hydrolase
+3.7.1.26       2,4-didehydro-3-deoxy-L-rhamnonate hydrolase
  3.8.1.1        Alkylhalidase
  3.8.1.2        (S)-2-haloacid dehalogenase
  3.8.1.3        Haloacetate dehalogenase
@@ -5140,6 +5187,7 @@
  3.13.1.6       [CysO sulfur-carrier protein]-S-L-cysteine hydrolase
  3.13.1.7       Carbonyl sulfide hydrolase
  3.13.1.8       S-adenosyl-L-methionine hydrolase (adenosine-forming)
+3.13.1.9       S-inosyl-L-homocysteine hydrolase
  4.1.1.1        Pyruvate decarboxylase
  4.1.1.2        Oxalate decarboxylase
  4.1.1.4        Acetoacetate decarboxylase
@@ -5250,6 +5298,7 @@
  4.1.1.116      D-ornithine/D-lysine decarboxylase
  4.1.1.117      2-((L-alanin-3-ylcarbamoyl)methyl)-2-hydroxybutanedioate decarboxylase
  4.1.1.118      Isophthalyl-CoA decarboxylase
+4.1.1.119      Phenylacetate decarboxylase
  4.1.2.2        Ketotetrose-phosphate aldolase
  4.1.2.4        Deoxyribose-phosphate aldolase
  4.1.2.5        L-threonine aldolase
@@ -5280,7 +5329,6 @@
  4.1.2.36       Lactate aldolase
  4.1.2.38       Benzoin aldolase
  4.1.2.40       Tagatose-bisphosphate aldolase
-4.1.2.41       Vanillin synthase
  4.1.2.42       D-threonine aldolase
  4.1.2.43       3-hexulose-6-phosphate synthase
  4.1.2.44       2,3-epoxybenzoyl-CoA dihydrolase
@@ -5300,6 +5348,7 @@
  4.1.2.58       2-dehydro-3,6-dideoxy-6-sulfogluconate aldolase
  4.1.2.59       Dihydroneopterin phosphate aldolase
  4.1.2.60       Dihydroneopterin triphosphate aldolase
+4.1.2.61       Feruloyl-CoA hydratase/lyase
  4.1.2.n2       2-hydroxyphytanoyl-CoA lyase
  4.1.3.1        Isocitrate lyase
  4.1.3.3        N-acetylneuraminate lyase
@@ -5411,7 +5460,7 @@
  4.2.1.84       Nitrile hydratase
  4.2.1.85       Dimethylmaleate hydratase
  4.2.1.87       Octopamine dehydratase
-4.2.1.88       (R)-synephrine
+4.2.1.88       Synephrine dehydratase
  4.2.1.90       L-rhamnonate dehydratase
  4.2.1.91       Arogenate dehydratase
  4.2.1.92       Hydroperoxide dehydratase
@@ -5522,6 +5571,7 @@
  4.2.2.24       Rhamnogalacturonan exolyase
  4.2.2.25       Gellan lyase
  4.2.2.26       Oligo-alginate lyase
+4.2.2.27       Pectin monosaccharide-lyase
  4.2.2.n1       Peptidoglycan lytic exotransglycosylase
  4.2.2.n2       Peptidoglycan lytic endotransglycosylase
  4.2.3.1        Threonine synthase
@@ -5656,7 +5706,7 @@
  4.2.3.131      Miltiradiene synthase
  4.2.3.132      Neoabietadiene synthase
  4.2.3.133      Alpha-copaene synthase
-4.2.3.134      5-phosphonooxy-L-lysine phospho-lyase
+4.2.3.134      5-phosphooxy-L-lysine phospho-lyase
  4.2.3.135      Delta(6)-protoilludene synthase
  4.2.3.136      Alpha-isocomene synthase
  4.2.3.137      (E)-2-epi-beta-caryophyllene synthase
@@ -5835,6 +5885,8 @@
  4.6.1.21       Enterobacter ribonuclease
  4.6.1.22       Bacillus subtilis ribonuclease
  4.6.1.23       Ribotoxin
+4.6.1.24       Ribonuclease T(1)
+4.6.1.25       Bacteriophage T(4) restriction endoribonuclease RegB
  4.7.1.1        Alpha-D-ribose 1-methylphosphonate 5-phosphate C-P-lyase
  4.99.1.1       Protoporphyrin ferrochelatase
  4.99.1.2       Alkylmercury lyase
@@ -5966,7 +6018,7 @@
  5.3.1.29       Ribose 1,5-bisphosphate isomerase
  5.3.1.30       5-deoxy-glucuronate isomerase
  5.3.1.31       Sulfoquinovose isomerase
-5.3.1.32       (4S)-4-hydroxy-5-phosphonooxypentane-2,3-dione isomerase
+5.3.1.32       (4S)-4-hydroxy-5-phosphooxypentane-2,3-dione isomerase
  5.3.1.33       L-erythrulose 1-phosphate isomerase
  5.3.1.34       D-erythrulose 4-phosphate isomerase
  5.3.1.35       2-dehydrotetronate isomerase
@@ -6138,6 +6190,8 @@
  5.5.1.31       Hapalindole H synthase
  5.5.1.32       12-epi-hapalindole U synthase
  5.5.1.33       12-epi-fischerindole U synthase
+5.5.1.34       (+)-cis,trans-nepetalactol synthase
+5.5.1.35       (+)-cis,cis-nepetalactol synthase
  5.6.1.1        Microtubule-severing ATPase
  5.6.1.2        Dynein ATPase
  5.6.1.3        Plus-end-directed kinesin ATPase
@@ -6237,6 +6291,9 @@
  6.2.1.58       Isophthalate--CoA ligase
  6.2.1.59       Long-chain fatty acid adenylase/transferase FadD26
  6.2.1.60       Marinolic acid--CoA ligase
+6.2.1.61       Salicylate--[aryl-carrier protein] ligase
+6.2.1.62       3,4-dihydroxybenzoate--[aryl-carrier protein] ligase
+6.2.1.63       L-arginine--[L-arginyl-carrier protein] ligase
  6.2.1.n2       Amino acid--[acyl-carrier-protein] ligase
  6.2.1.n3       Malonate--CoA ligase
  6.3.1.1        Aspartate--ammonia ligase
@@ -6293,7 +6350,7 @@
  6.3.2.40       Cyclopeptine synthase
  6.3.2.41       N-acetylaspartylglutamate synthase
  6.3.2.42       N-acetylaspartylglutamylglutamate synthase
-6.3.2.43       [Amino group carrier protein]--L-2-aminoadipate ligase
+6.3.2.43       [Amino-group carrier protein]--L-2-aminoadipate ligase
  6.3.2.44       Pantoate--beta-alanine ligase (ADP-forming)
  6.3.2.45       UDP-N-acetylmuramate L-alanyl-gamma-D-glutamyl-meso-2,6-diaminoheptanedioate ligase
  6.3.2.46       Fumarate--(S)-2,3-diaminopropanoate ligase
@@ -6302,7 +6359,7 @@
  6.3.2.49       L-alanine--L-anticapsin ligase
  6.3.2.50       Tenuazonic acid synthetase
  6.3.2.51       Phosphopantothenate--cysteine ligase (ATP)
-6.3.2.52       Jasmonoyl--L-amino acid synthetase
+6.3.2.52       Jasmonoyl--L-amino acid ligase
  6.3.2.53       UDP-N-acetylmuramoyl-L-alanine--L-glutamate ligase
  6.3.2.54       L-2,3-diaminopropanoate--citrate ligase
  6.3.2.55       2-((L-alanin-3-ylcarbamoyl)methyl)-3-(2-aminoethylcarbamoyl)-2-hydroxypropanoate synthase
@@ -6379,6 +6436,7 @@
  7.1.1.6        Plastoquinol--plastocyanin reductase
  7.1.1.7        Ubiquinol oxidase (electrogenic, proton-motive force generating)
  7.1.1.8        Quinol--cytochrome-c reductase
+7.1.1.9        Cytochrome-c oxidase
  7.1.2.1        P-type H(+)-exporting transporter
  7.1.2.2        H(+)-transporting two-sector ATPase
  7.1.3.1        H(+)-exporting diphosphatase
@@ -6430,6 +6488,7 @@
  7.4.2.10       ABC-type glutathione transporter
  7.4.2.11       ABC-type methionine transporter
  7.4.2.12       ABC-type cystine transporter
+7.4.2.13       ABC-type tyrosine transporter
  7.5.2.1        ABC-type maltose transporter
  7.5.2.2        ABC-type oligosaccharide transporter
  7.5.2.3        ABC-type beta-glucan transporter
diff --git a/c++/src/objects/seqfeat/gc.inc b/c++/src/objects/seqfeat/gc.inc

index 5acdcaa641cd5779b1549430204e7126eb1f2c2d..2ceaed2d3aa3cfb35895e30fc5f99785a432f704 100644 (file)
--- a/c++/src/objects/seqfeat/gc.inc
+++ b/c++/src/objects/seqfeat/gc.inc
@@ -1,4 +1,4 @@
-/*  $Id: gc.inc 585639 2019-05-01 19:41:04Z fukanchi $
+/*  $Id: gc.inc 610069 2020-06-10 17:10:47Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -145,7 +145,7 @@ static const char* const s_GenCodeTblMemStr[] = {
      "  sncbieaa \"--*-------**--*-----------------M--M---------------M------------\"",
      " } ,",
      " {",
-    "  name \"Pterobranchia Mitochondrial\" ,",
+    "  name \"Rhabdopleuridae Mitochondrial\" ,",
      "  id 24 ,",
      "  ncbieaa  \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG\",",
      "  sncbieaa \"---M------**-------M---------------M---------------M------------\"",
diff --git a/c++/src/objects/seqfeat/gc.prt b/c++/src/objects/seqfeat/gc.prt

index 6738f01d45616c82e2fd3ad22f38fbab467b117c..ffb786f65f30cd3df2e19a9962b643b7631eaaab 100644 (file)
--- a/c++/src/objects/seqfeat/gc.prt
+++ b/c++/src/objects/seqfeat/gc.prt
@@ -6,6 +6,9 @@
  --    readability at the suggestion of Peter Rice, EMBL
  --  Later additions by Taxonomy Group staff at NCBI
  --
+--  Version 4.6
+--     Renamed genetic code 24 to Rhabdopleuridae Mitochondrial
+--
  --  Version 4.5
  --     Added Cephalodiscidae mitochondrial genetic code 33
  --
@@ -263,7 +266,7 @@ Genetic-code-table ::= {
    -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
   } ,
   {
-  name "Pterobranchia Mitochondrial" ,
+  name "Rhabdopleuridae Mitochondrial" ,
    id 24 ,
    ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
    sncbieaa "---M------**-------M---------------M---------------M------------"
diff --git a/c++/src/objects/seqfeat/institution_codes.inc b/c++/src/objects/seqfeat/institution_codes.inc

index 9a2d161bda2873088e38aa35b581f6c43a972164..b67fe886391ca5523df889b9c3f3cbb865f9bc19 100644 (file)
--- a/c++/src/objects/seqfeat/institution_codes.inc
+++ b/c++/src/objects/seqfeat/institution_codes.inc
@@ -1,4 +1,4 @@
-/*  $Id: institution_codes.inc 607542 2020-05-05 14:51:12Z ivanov $
+/*  $Id: institution_codes.inc 616908 2020-09-22 18:24:46Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -31,7 +31,7 @@
   */
  
  static const char* const kInstitutionCollectionCodeList[] = {
-"# $Id: institution_codes.inc 607542 2020-05-05 14:51:12Z ivanov $",
+"# $Id: institution_codes.inc 616908 2020-09-22 18:24:46Z ivanov $",
  "A\ts\tArnold Arboretum, Harvard University\t\t\t",
  "AA\ts\tMinistry of Science, Academy of Sciences\t\t\t",
  "AAC\tc\tArignar Anna College\t\t\t",
@@ -74,11 +74,11 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "ABTRI\tc\tApex Biotechnology Training and Research Institute\t\t\t",
  "ABU<NGA>\ts\tAhmadu Bello University Herbarium\t\t\t",
  "AC\ts\tAmherst College\t\t\t",
-"ACA\ts\tAgricultural University of Athens\t\t\t",
  "ACA-DC\tc\tGreek Coordinated Collections of Microorganisms\t\t\t",
  "ACAD\ts\tAcadia University, K. C. Irving Environmental Science Centre & Harriet Irving Botanical Gardens\t\t\t",
  "ACAD<AUS>\tsb\tAustralian Centre for Ancient DNA\t\t\t",
  "ACAM\tc\tThe Australian Collection of Antarctic Microorganisms, Cooperative Research Center for the Antarctic and Southern Ocean Environment\t\t\t",
+"ACAM<GRC>\ts\tAgricultural University of Athens\tACA\t\t",
  "ACAP\ts\tAquaculture Center of Aomori Prefecture\t\t\t",
  "ACBC\ts\tAgriculture Canada Research Station\t\t\t",
  "ACBR\tc\tAustrian Center of Biological Resources and Applied Mycology\t\t\t",
@@ -465,7 +465,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "BCCM/ITM\tc\tBelgian Coordinated Collections of Microorganisms / ITM Mycobacteria Collection\tITM\t\t",
  "BCCM/LMG\tc\tBelgian Coordinated Collections of Microorganisms/ LMG Bacteria Collection\tLMG\thttp://bccm.belspo.be/catalogues/lmg-strain-details?NUM=\t",
  "BCCM/MUCL\tc\tBelgian Coordinated Collections of Microorganisms / MUCL Agro-food & Environmental Fungal Collection\tMUCL\t\t",
-"BCCM/ULC\tc\tBelgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection\tULC\thttp://bccm.belspo.be/catalogues/ulc-strain-details?ACCESSION_NUMBER=\t",
+"BCCM/ULC\tc\tBelgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection\tULC\thttps://bccm.belspo.be/catalogues/bm-details?accession_number=\t",
  "BCCN\tc\tBrucella Culture Collection\t\t\t",
  "BCCUSP\tc\tBrazilian Cyanobacteria Collection - University of Sao Paulo\t\t\t",
  "BCF\ts\tUniversitat de Barcelona, Laboratori de Botanica\t\t\t",
@@ -682,7 +682,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "BPBM\ts\tBernice P. Bishop Museum\tBISHOP\t\t",
  "BPBM:Fish\ts\tBernice P. Bishop Museum, Fish Collection\t\t",
  "BPBM:IZ\ts\tBernice P. Bishop Museum, Invertebrate Zoology\t\t",
-"BPI\tsc\tU.S. National Fungus Collections, Systematic Botany and Mycology Laboratory\t\thttp://nt.ars-grin.gov/fungaldatabases/specimens/new_rptSpecimenOneRec.cfm?thisrec=BPI+&spec;\t",
+"BPI\tsc\tU.S. National Fungus Collections, Systematic Botany and Mycology Laboratory\t\t\t",
  "BPI<ZAF>\ts\tBernard Price Institute for Palaeontological Research\t\t\t",
  "BPIC\tc\tBenaki Phytopathological Institute Collection\t\t\t",
  "BPL\ts\tMuseum of Barnstaple & North Devon\t\t\t",
@@ -972,6 +972,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "CCM\tc\tCzech Collection of Microorganisms\tCCM<CZE>\t\t",
  "CCM-A\tc\tColeccion de Cultivos Microbianos\t\t\t",
  "CCM-CIBE\tc\tEscuela Superior Politecnica del Litoral\t\t\t",
+"CCM-UFV\tc\tCollection of Cyanobacteria and Microalgae at the Universidade Federal de Vicosa\t\t\t",
  "CCM<CHN>\ts\tChangchun College of Traditional Chinese Medicine, Department of Chinese Materia Medica\t\t\t",
  "CCM<USA-MT>\ts\tCarter County Museum\t\t\t",
  "CCMA-UFSCar\tc\tCulture Collection of Freshwater Microalgae\t\t\t",
@@ -1076,7 +1077,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "CGEC\ts\tChina Entomological Research Institute\t\t\t",
  "CGG\ts\tCambridge University Botanic Garden\t\t\t",
  "CGH\ts\tNational Museum of Prague\t\t\t",
-"CGMCC\tc\tChina General Microbiological Culture Collection Center\tAS\t\t",
+"CGMCC\tc\tChina General Microbiological Culture Collection Center\tAS\thttp://www.cgmcc.net/english/cata.php?stn=CGMCC%20\t",
  "CGMS\ts\tUniversidade Federal de Mato Grosso do Sul, Departamento de Biologia\t\t\t",
  "CGN\ts\tCentre for Genetic Resources, The Netherlands\t\t\t",
  "CGRIS\tb\tChinese Crop Germplasm Resources Information Network\t\t\t",
@@ -1098,6 +1099,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "CHELB\ts\tCheltenham College for Boys\t\t\t",
  "CHEP\ts\tEscuela Superior Politecnica del Chimborazo\t\t\t",
  "CHER\ts\tYu. Fedcovich Chernivtsi State University, Botany Department\t\t\t",
+"CHFC-EA\tsc\tChilean Fungal Collection\tCHFC,ChFC\t\t",
  "CHFD\ts\tChelmsford and Essex Museum\t\t\t",
  "CHI\ts\tUniversity of Illinois, Biological Sciences Department\t\t\t",
  "CHIA\ts\tNational Chiayi Agricultural College, Forestry Department\t\t\t",
@@ -1139,7 +1141,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "CIBM\ts\tCentro Invest. Biol. Noroeste\t\t\t",
  "CIC\ts\tAlbertson College of Idaho, Biology Department\t\t\t",
  "CICC\tc\tChina Center for Industrial Culture Collection\t\t\t",
-"CICCM\tc\tCawthron Institute Culture Collection of Micro-algae\t\t\t",
+"CICCM\tc\tCawthron Institute Culture Collection of Micro-algae\tCAWD\t\t",
  "CICESE\ts\tCentro de Investigacion Cientifica y de Educacion Superior de Ensenada\t\t\t",
  "CICIM\tc\tCulture and Information Centre of Industrial Microorganisms of China's Univeristies\t\t\t",
  "CICIMAR\ts\tCentro Interdisciplinario de Ciencias Marinas\t\t\t",
@@ -1422,6 +1424,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "CSAT\ts\tColegio de Postgraduados, Campus Tabasco\t\t\t",
  "CSAU\ts\tNational Agrarian University, Southern Branch \"Crimean Agrotechnological University\", Department of Botany, Plant Physiology and Genetics\t\t\t",
  "CSB\ts\tSt. John's University/College of Saint Benedict, Biology Department\t\t\t",
+"CSBD\ts\tCentre for Study of Biological Diversity\t\t\t",
  "CSC\ts\tColegio del Sagrado Corazon\t\t\t",
  "CSC-CLCH\tc\tCentro Substrati Cellulari, Cell Lines Collection and Hybridomas\t\t\t",
  "CSCA\ts\tCalifornia State Collection of Arthropods\t\t\t",
@@ -1474,6 +1477,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "CTNRC\ts\tThai National Reference Collections\t\t\t",
  "CTR\ts\tCharles T. Ramsden historical collection\t\t\t",
  "CTS\ts\tChongqing Teachers College\t\t\t",
+"CTUA\ts\tColeccio&#769;n Teriolo&#769;gica de la Universidad de Antioquia\t\t\t",
  "CTY\ts\tCanterbury Literary and Philosophical Institution\t\t\t",
  "CU\tsb\tCornell University\t\t\t",
  "CUAC\ts\tClemson University\t\t\t",
@@ -1658,6 +1662,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "DKG\ts\tJuniper Hall Field Centre\t\t\t",
  "DLF\ts\tStetson University, Biology Department\t\t\t",
  "DLU\ts\tDa Lat University\t\t\t",
+"DLUCC\tc\tDali University Culture Collection\t\t\t",
  "DLY\ts\tDudley and Midland Geological and Scientific Society and Field Club\t\t\t",
  "DM<NZ>\ts\tDominion Museum\t\t\t",
  "DM<USA-UT>\ts\tThe Dinosaur Museum\t\t\t",
@@ -1821,6 +1826,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "ECK\ts\tBuffalo State College\t\t\t",
  "ECM\ts\tHubei College of Traditional Chinese Medicine, Department of Chinese Materia Medica\t\t\t",
  "ECNB\ts\tEscuela Nacional Ciencias\t\t\t",
+"ECNU\ts\tMuseum of Biology, East China Normal University, School of Life Sciences\t\t\t",
  "ECOCHM\ts\tColeccion de Mamiferos del Museo de Zoologia-ECOSUR\t\t\t",
  "ECOL\ts\tCollection du Laborataire d'Ecologie\t\t\t",
  "ECOMAR<FRA>\ts\tECOMAR lab University of Reunion\t\t\t",
@@ -1844,7 +1850,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "EELM\ts\tEstacion Experimental Agricola de la Molina\t\t\t",
  "EERU\ts\tEconomic Entomology  Research Unit\t\t\t",
  "EFC\ts\tEscola de Florestas\t\t\t",
-"EFCC\ts\tEpping Forest Conservation Centre\t\t\t",
+"EFCC\tc\tEntomopathogenic Fungal Collection\t\t\t",
  "EFH\ts\tForestry Commission\t\t\t",
  "EFM\ts\tEpping Forest Museum, Corporation of London\t\t\t",
  "EFWM\ts\tDepartment of Entomology\t\t\t",
@@ -1971,7 +1977,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "F\ts\tField Museum of Natural History, Botany Department\tFMNH:F\t\t",
  "FAA\ts\tUniversidad Nacional del Centro de la Provincia de Buenos Aires\t\t\t",
  "FABR\ts\tHarmas de J. H. Fabre\t\t\t",
-"FACHB\tc\tFreshwater Algae Culture Collection\t\t\t",
+"FACHB\tc\tFreshwater Algae Culture Collection\tCHAB<China> \t\t",
  "FACS\ts\tFujian Agricultural College\t\t\t",
  "FAK\ts\tDepartment of Fisheries, Faculty of Agriculture\t\t\t",
  "FAKOU\ts\tFaculty of Agriculture, Kochi Univerisity\t\t\t",
@@ -3710,6 +3716,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "LECB\ts\tSaint Petersburg State University, Botany Department\t\t\t",
  "LEDLIE\ts\tPatricia Ledlie Herbarium\t\t\t",
  "LEF\ts\tEconomic Forestry Institute of Liaoning Province\t\t\t",
+"LEGEcc\tc\tBlue Biotechnology and Ecotoxicology Culture Collection\t\t\t",
  "LEH\ts\tLehigh University\t\t\t",
  "LEI\ts\tLeicester Literary and Philosophical Society\t\t\t",
  "LEISHCRYOBANK\tc\tInternational Cryobank of Leishmania\t\t\t",
@@ -3961,7 +3968,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "MADM\ts\tMuseu Municipal do Funchal\t\t\t",
  "MADS\ts\tMuseu de Historia Natural do Seminario do Funchal\t\t\t",
  "MAF\ts\tUniversidad Complutense, Departamento de Biologia Vegetal II\t\t\t",
-"MAFF\tc\tMAFF Genebank, Ministry of Agriculture Forestry and Fisheries\t\t\t",
+"MAFF\tc\tMAFF Genebank, Ministry of Agriculture Forestry and Fisheries\t\thttps://www.gene.affrc.go.jp/databases-micro_search_detail_en.php?maff=\t",
  "MAFF<FJI>\ts\tColo-i-Suva Silvicultural Station\t\t\t",
  "MAFI\ts\tMagyar Allami Foeldtani Intezet, Budapest - Hungarian Geological Survey\t\t\t",
  "MAFST\ts\tInstituto Forestal de la Moncloa\t\t\t",
@@ -4152,6 +4159,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "MDTN\ts\tMiddleton Botanical Society\t\t\t",
  "MDUG\ts\tUniversidad Guanajuato, Museo Alfredo Duges\t\t\t",
  "MDZAU\ts\tMuseum Deptartment of Zoology\t\t\t",
+"MEAN\tc\tMicoteca da Estacao Agronomica Nacional\t\t\t",
  "MECB\ts\tUniversidade Federal de Pelotas, Museu Entomologico Ceslau Biezanko\t\t\t",
  "MECG\ts\tMedical Entomology Collection Gallery\t\t\t",
  "MECN\ts\tMuseo Ecuadoriano de Ciencias Naturales\tDHMECN\t\t",
@@ -4616,6 +4624,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "MU<TUR>\tc\tMugla Sitki Kocman University\t\t\t",
  "MU<USA-OH>\ts\tMiami University, Botany Department, Willard Sherman Turrell Herbarium\t\thttp://herbarium.muohio.edu/herbariummu/\t",
  "MU<USA-TX>\ts\tMidwestern University\t\t\t",
+"MUA-AVP\ts\tMuseo Universitario de la Universidad de Antioquia\t\t\t",
  "MUACC\tc\tMurdoch University Algal Culture Collection\t\t\t",
  "MUAF\tc\tCulture collection of Mendel University of Agriculture and Forestry in Brno\t\t\t",
  "MUAP\ts\tMuseo del Mar Universidad Arturo Prat\t\t\t",
@@ -4710,6 +4719,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "MZCR\ts\tMuseo de Zoologia\t\t\t",
  "MZFC\ts\tMuseo de Zoologia \"Alfonso L. Herrera\"\t\t\t",
  "MZFN\ts\tMuseo Zoologico dell'Universita \"Federico II\"\t\t\t",
+"MZFS-DAR\ts\tMuseu de Zoologia da Universidade Estadual de Feira de Santana\t\t\t",
  "MZGZ\ts\tMuseum Zoologia del Giardino Zoologico\t\t\t",
  "MZH\ts\tZoolgical Museum, Finnish Museum of Natural History\t\t\t",
  "MZKI\tc\tMicrobial Culture Collection of National Institute of Chemistry\t\t\t",
@@ -5000,6 +5010,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "NLU\ts\tUniversity of Louisiana at Monroe, Museum of Natural History\t\t\t",
  "NLUH\ts\tUniversity of the Philippines College Baguio\t\t\t",
  "NM\ts\tNorthern Michigan University, Biology Department\t\t\t",
+"NMA\ts\tNational Museum Australia -\t\t\t",
  "NMAC\ts\tInner Mongolia Agricultural University, Department of Pratacultural Science\t\t\t",
  "NMAG\ts\tNaturhistorisches Museum, Augsburg\t\t\t",
  "NMB\tc\tNingbo Marine Biotechnology\t\t\t",
@@ -5167,10 +5178,11 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "NRC<EGY>\ts\tNational Research Centre\t\t\t",
  "NRCC\ts\tNational Research Council of Canada\t\t\t",
  "NRCS\tc\tNational Reference Center for Streptococci in Aachen\t\t\t",
+"NRI\ts\tTexas A&M Natural Resources Institute\t\t\t",
  "NRIBAS\ts\tNational Research Institute of Biology, Academia Sinica\t\t\t",
  "NRIC\tc\tNODAI Research Institute Culture Collection\t\t\t",
  "NRL\tc\tNeisseria Reference Laboratory\t\t\t",
-"NRM\ts\tSwedish Museum of Natural History\t\t\t",
+"NRM\ts\tSwedish Museum of Natural History\tSMNH\t\t",
  "NRN\ts\tNairn Literary Society Library, Public Library\t\t\t",
  "NRNZ\ts\tNorthland Regional Museum\t\t\t",
  "NRPSU\tc\tDepartment of Agro-industry, Faculty of Natural Resources\t\t\t",
@@ -5220,9 +5232,10 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "NTOU\tsc\tInstitute of Marine Biology, National Taiwan Ocean University\t\t\t",
  "NTS\ts\tNevada Operations Office, U.S. Department of Energy\t\t\t",
  "NTSC\ts\tUniversity of North Texas, Biological Sciences Department\t\t\t",
+"NTUCC\tc\tPlant Pathology and Microbiology, National Taiwan University Culture Collection\t\t\t",
  "NTUF\ts\tNational Taiwan University, Forestry Department\t\t\t",
-"NTUM\ts\tNational Taiwan University\t\t\t",
-"NTUMA\ts\tNational Taiwan University\t\t\t",
+"NTUH\ts\tHerbarium of the Department of Plant Pathology and Microbiology, National Taiwan University\t\t\t",
+"NTUM\ts\tNational Taiwan University Museum\t\t\t",
  "NU<THA>\tc\tDepartment of Microbiology, Faculty of Science\t\t\t",
  "NU<ZAF>\ts\tUniversity of Natal, School of Botany and Zoology\t\t\t",
  "NUA\tc\tDepartment of Microbiology, National University of Athens\t\t\t",
@@ -7165,7 +7178,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "UHCC\tc\tUniversity of Helsinki Cyanobacteria Culture Collection\t\t\t",
  "UHI\ts\tUssishkin House, Botany Department\t\t\t",
  "UHM\ts\tManoa, College of Tropical Agriculture, Department of Entomology\t\t\t",
-"UI<NGA>\ts\tUniversity of Ibadan\t\t\t",
+"UI<NGA>\tsc\tUniversity of Ibadan\t\t\t",
  "UI<USA-UT>\ts\tBureau of Land Management (Uinta Herbarium)\t\t\t",
  "UICC\tc\tUniversity of Indonesia Culture Collection\t\t\t",
  "UIDA\ts\tUniversity of Idaho, Bird and Mammal Museum\t\t\t",
@@ -7358,7 +7371,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "UPEI\ts\tUniversity of Prince Edward Island, Biology Department\t\t\t",
  "UPF\ts\tUniversite de Polynesie Francaise Herbarium\t\t\t",
  "UPIE\tb\tUnidad de Patologia Infecciosa y Epidemiologia\t\t\t",
-"UPLB\ts\tMuseum of Natural History, University of the Philippines\t\t\t",
+"UPLB\ts\tUniversity of Philippines Los Banos\t\t\t",
  "UPM<FRA>\ts\tDepartement des Siences de la Terre\t\t\t",
  "UPM<MYS>\ts\tUniversiti Pertanian Malaysia, Biology Department\t\t\t",
  "UPM<RUS>\ts\tUdory Paleontological Museum\t\t\t",
@@ -7652,7 +7665,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "VPCI\tc\tFungal Culture Collection\t\t\t",
  "VPH\ts\tVan Pharmaceutical Herbarium, Yuzuncu Yil University\t\t\t",
  "VPI\tsc\tVirginia Polytechnic Institute and State University\tVTMH\t\t",
-"VPI:F\ts\tVirginia Polytechnic Institute and State University, Fungal Collection\t",
+"VPI:F\ts\tVirginia Polytechnic Institute and State University, Fungal Collection\t\t",
  "VPIC\ts\tVirginia Polytechnic Institute and State University\t\t\t",
  "VPIMM\ts\tVirginia Polytechnic University, Mammal Museum\t\t\t",
  "VPM\ts\tVolgograd Provincial Museum\t\t\t",
@@ -7747,6 +7760,7 @@ static const char* const kInstitutionCollectionCodeList[] = {
  "WFPL\tc\tWestern Forest Products Laboratory\t\t\t",
  "WFU\ts\tWake Forest University, Biology Department\t\t\t",
  "WFUVC\ts\tWake Forest University, Vertebrate Collection\t\t\t",
+"WFVZ\ts\tWestern Foundation of Vertebrate Zoology\t\t\t",
  "WGC\ts\tState University of West Georgia, Biology Department\t\t\t",
  "WGCH\ts\tWilton Garden Club\t\t\t",
  "WGD\ts\tWashington Game Department\t\t\t",
diff --git a/c++/src/objects/seqfeat/institution_codes.txt b/c++/src/objects/seqfeat/institution_codes.txt

index 03d296c1c59ea29fb0e7256ec826651eafd8c2ca..e63a4495910d2adca64af1a844d4c821b4baafc0 100644 (file)
--- a/c++/src/objects/seqfeat/institution_codes.txt
+++ b/c++/src/objects/seqfeat/institution_codes.txt
@@ -1,4 +1,4 @@
-# $Id: institution_codes.txt 607542 2020-05-05 14:51:12Z ivanov $
+# $Id: institution_codes.txt 616908 2020-09-22 18:24:46Z ivanov $
  A      s       Arnold Arboretum, Harvard University                    
  AA     s       Ministry of Science, Academy of Sciences                        
  AAC    c       Arignar Anna College                    
@@ -41,11 +41,11 @@ ABTC        s       Australian Biological Tissue Collection, South Australian Museum        SAMA:ABT
  ABTRI  c       Apex Biotechnology Training and Research Institute                      
  ABU<NGA>       s       Ahmadu Bello University Herbarium                       
  AC     s       Amherst College                 
-ACA    s       Agricultural University of Athens                       
  ACA-DC c       Greek Coordinated Collections of Microorganisms                 
  ACAD   s       Acadia University, K. C. Irving Environmental Science Centre & Harriet Irving Botanical Gardens                 
  ACAD<AUS>      sb      Australian Centre for Ancient DNA                       
  ACAM   c       The Australian Collection of Antarctic Microorganisms, Cooperative Research Center for the Antarctic and Southern Ocean Environment                     
+ACAM<GRC>      s       Agricultural University of Athens       ACA             
  ACAP   s       Aquaculture Center of Aomori Prefecture                 
  ACBC   s       Agriculture Canada Research Station                     
  ACBR   c       Austrian Center of Biological Resources and Applied Mycology                    
@@ -432,7 +432,7 @@ BCCM/IHEM   c       Belgian Coordinated Collections of Microorganisms / IHEM Fungi colle
  BCCM/ITM       c       Belgian Coordinated Collections of Microorganisms / ITM Mycobacteria Collection ITM             
  BCCM/LMG       c       Belgian Coordinated Collections of Microorganisms/ LMG Bacteria Collection      LMG     http://bccm.belspo.be/catalogues/lmg-strain-details?NUM=        
  BCCM/MUCL      c       Belgian Coordinated Collections of Microorganisms / MUCL Agro-food & Environmental Fungal Collection    MUCL            
-BCCM/ULC       c       Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection        ULC     http://bccm.belspo.be/catalogues/ulc-strain-details?ACCESSION_NUMBER=   
+BCCM/ULC       c       Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection        ULC     https://bccm.belspo.be/catalogues/bm-details?accession_number=  
  BCCN   c       Brucella Culture Collection                     
  BCCUSP c       Brazilian Cyanobacteria Collection - University of Sao Paulo                    
  BCF    s       Universitat de Barcelona, Laboratori de Botanica                        
@@ -649,7 +649,7 @@ BP  s       Hungarian Natural History Museum, Botanical Department
  BPBM   s       Bernice P. Bishop Museum        BISHOP          
  BPBM:Fish      s       Bernice P. Bishop Museum, Fish Collection               
  BPBM:IZ        s       Bernice P. Bishop Museum, Invertebrate Zoology          
-BPI    sc      U.S. National Fungus Collections, Systematic Botany and Mycology Laboratory             http://nt.ars-grin.gov/fungaldatabases/specimens/new_rptSpecimenOneRec.cfm?thisrec=BPI+&spec;   
+BPI    sc      U.S. National Fungus Collections, Systematic Botany and Mycology Laboratory                     
  BPI<ZAF>       s       Bernard Price Institute for Palaeontological Research                   
  BPIC   c       Benaki Phytopathological Institute Collection                   
  BPL    s       Museum of Barnstaple & North Devon                      
@@ -939,6 +939,7 @@ CCIM        c       Culture Collection of Industrial Microorganisms
  CCM    c       Czech Collection of Microorganisms      CCM<CZE>                
  CCM-A  c       Coleccion de Cultivos Microbianos                       
  CCM-CIBE       c       Escuela Superior Politecnica del Litoral                        
+CCM-UFV        c       Collection of Cyanobacteria and Microalgae at the Universidade Federal de Vicosa                        
  CCM<CHN>       s       Changchun College of Traditional Chinese Medicine, Department of Chinese Materia Medica                 
  CCM<USA-MT>    s       Carter County Museum                    
  CCMA-UFSCar    c       Culture Collection of Freshwater Microalgae                     
@@ -1043,7 +1044,7 @@ CGE       s       Cambridge University, Department of Plant Sciences
  CGEC   s       China Entomological Research Institute                  
  CGG    s       Cambridge University Botanic Garden                     
  CGH    s       National Museum of Prague                       
-CGMCC  c       China General Microbiological Culture Collection Center AS              
+CGMCC  c       China General Microbiological Culture Collection Center AS      http://www.cgmcc.net/english/cata.php?stn=CGMCC%20      
  CGMS   s       Universidade Federal de Mato Grosso do Sul, Departamento de Biologia                    
  CGN    s       Centre for Genetic Resources, The Netherlands                   
  CGRIS  b       Chinese Crop Germplasm Resources Information Network                    
@@ -1065,6 +1066,7 @@ CHEL      s       Chelsea Physic Garden
  CHELB  s       Cheltenham College for Boys                     
  CHEP   s       Escuela Superior Politecnica del Chimborazo                     
  CHER   s       Yu. Fedcovich Chernivtsi State University, Botany Department                    
+CHFC-EA        sc      Chilean Fungal Collection       CHFC,ChFC               
  CHFD   s       Chelmsford and Essex Museum                     
  CHI    s       University of Illinois, Biological Sciences Department                  
  CHIA   s       National Chiayi Agricultural College, Forestry Department                       
@@ -1106,7 +1108,7 @@ CIBIO     s       Centro de Investigacao em Biodiversidade e Recursos Geneticos
  CIBM   s       Centro Invest. Biol. Noroeste                   
  CIC    s       Albertson College of Idaho, Biology Department                  
  CICC   c       China Center for Industrial Culture Collection                  
-CICCM  c       Cawthron Institute Culture Collection of Micro-algae                    
+CICCM  c       Cawthron Institute Culture Collection of Micro-algae    CAWD            
  CICESE s       Centro de Investigacion Cientifica y de Educacion Superior de Ensenada                  
  CICIM  c       Culture and Information Centre of Industrial Microorganisms of China's Univeristies                     
  CICIMAR        s       Centro Interdisciplinario de Ciencias Marinas                   
@@ -1389,6 +1391,7 @@ CSAEG     c       Culture Collection of Phytopathogenic Fungi  at the Colegio Superior Agr
  CSAT   s       Colegio de Postgraduados, Campus Tabasco                        
  CSAU   s       National Agrarian University, Southern Branch "Crimean Agrotechnological University", Department of Botany, Plant Physiology and Genetics                       
  CSB    s       St. John's University/College of Saint Benedict, Biology Department                     
+CSBD   s       Centre for Study of Biological Diversity                        
  CSC    s       Colegio del Sagrado Corazon                     
  CSC-CLCH       c       Centro Substrati Cellulari, Cell Lines Collection and Hybridomas                        
  CSCA   s       California State Collection of Arthropods                       
@@ -1441,6 +1444,7 @@ CTN       s       Free Library and Museum
  CTNRC  s       Thai National Reference Collections                     
  CTR    s       Charles T. Ramsden historical collection                        
  CTS    s       Chongqing Teachers College                      
+CTUA   s       Coleccio&#769;n Teriolo&#769;gica de la Universidad de Antioquia                        
  CTY    s       Canterbury Literary and Philosophical Institution                       
  CU     sb      Cornell University                      
  CUAC   s       Clemson University                      
@@ -1625,6 +1629,7 @@ DIX       s       Dixie College, Natural History Museum
  DKG    s       Juniper Hall Field Centre                       
  DLF    s       Stetson University, Biology Department                  
  DLU    s       Da Lat University                       
+DLUCC  c       Dali University Culture Collection                      
  DLY    s       Dudley and Midland Geological and Scientific Society and Field Club                     
  DM<NZ> s       Dominion Museum                 
  DM<USA-UT>     s       The Dinosaur Museum                     
@@ -1788,6 +1793,7 @@ ECH       s       Elmira College
  ECK    s       Buffalo State College                   
  ECM    s       Hubei College of Traditional Chinese Medicine, Department of Chinese Materia Medica                     
  ECNB   s       Escuela Nacional Ciencias                       
+ECNU   s       Museum of Biology, East China Normal University, School of Life Sciences                        
  ECOCHM s       Coleccion de Mamiferos del Museo de Zoologia-ECOSUR                     
  ECOL   s       Collection du Laborataire d'Ecologie                    
  ECOMAR<FRA>    s       ECOMAR lab University of Reunion                        
@@ -1811,7 +1817,7 @@ EEBP      s       Estacao Experimental de Biologia e Piscicultura de Pirassununga
  EELM   s       Estacion Experimental Agricola de la Molina                     
  EERU   s       Economic Entomology  Research Unit                      
  EFC    s       Escola de Florestas                     
-EFCC   s       Epping Forest Conservation Centre                       
+EFCC   c       Entomopathogenic Fungal Collection                      
  EFH    s       Forestry Commission                     
  EFM    s       Epping Forest Museum, Corporation of London                     
  EFWM   s       Department of Entomology                        
@@ -1938,7 +1944,7 @@ EXR       s       University of Exeter, Biological Sciences Department
  F      s       Field Museum of Natural History, Botany Department      FMNH:F          
  FAA    s       Universidad Nacional del Centro de la Provincia de Buenos Aires                 
  FABR   s       Harmas de J. H. Fabre                   
-FACHB  c       Freshwater Algae Culture Collection                     
+FACHB  c       Freshwater Algae Culture Collection     CHAB<China>             
  FACS   s       Fujian Agricultural College                     
  FAK    s       Department of Fisheries, Faculty of Agriculture                 
  FAKOU  s       Faculty of Agriculture, Kochi Univerisity                       
@@ -3677,6 +3683,7 @@ LEC       s       Universita degli Studi di Lecce, Dipartimento di Biologia
  LECB   s       Saint Petersburg State University, Botany Department                    
  LEDLIE s       Patricia Ledlie Herbarium                       
  LEF    s       Economic Forestry Institute of Liaoning Province                        
+LEGEcc c       Blue Biotechnology and Ecotoxicology Culture Collection                 
  LEH    s       Lehigh University                       
  LEI    s       Leicester Literary and Philosophical Society                    
  LEISHCRYOBANK  c       International Cryobank of Leishmania                    
@@ -3928,7 +3935,7 @@ MADJ      s       Jardim Botanico da Madeira
  MADM   s       Museu Municipal do Funchal                      
  MADS   s       Museu de Historia Natural do Seminario do Funchal                       
  MAF    s       Universidad Complutense, Departamento de Biologia Vegetal II                    
-MAFF   c       MAFF Genebank, Ministry of Agriculture Forestry and Fisheries                   
+MAFF   c       MAFF Genebank, Ministry of Agriculture Forestry and Fisheries           https://www.gene.affrc.go.jp/databases-micro_search_detail_en.php?maff= 
  MAFF<FJI>      s       Colo-i-Suva Silvicultural Station                       
  MAFI   s       Magyar Allami Foeldtani Intezet, Budapest - Hungarian Geological Survey                 
  MAFST  s       Instituto Forestal de la Moncloa                        
@@ -4119,6 +4126,7 @@ MDRG      s       Museum voor Dierkunde, Rijksuniversiteit
  MDTN   s       Middleton Botanical Society                     
  MDUG   s       Universidad Guanajuato, Museo Alfredo Duges                     
  MDZAU  s       Museum Deptartment of Zoology                   
+MEAN   c       Micoteca da Estacao Agronomica Nacional                 
  MECB   s       Universidade Federal de Pelotas, Museu Entomologico Ceslau Biezanko                     
  MECG   s       Medical Entomology Collection Gallery                   
  MECN   s       Museo Ecuadoriano de Ciencias Naturales DHMECN          
@@ -4583,6 +4591,7 @@ MTUF      s       University Museum, Tokyo University of Fisheries
  MU<TUR>        c       Mugla Sitki Kocman University                   
  MU<USA-OH>     s       Miami University, Botany Department, Willard Sherman Turrell Herbarium          http://herbarium.muohio.edu/herbariummu/        
  MU<USA-TX>     s       Midwestern University                   
+MUA-AVP        s       Museo Universitario de la Universidad de Antioquia                      
  MUACC  c       Murdoch University Algal Culture Collection                     
  MUAF   c       Culture collection of Mendel University of Agriculture and Forestry in Brno                     
  MUAP   s       Museo del Mar Universidad Arturo Prat                   
@@ -4677,6 +4686,7 @@ MZCP      s       Universidade de Coimbra
  MZCR   s       Museo de Zoologia                       
  MZFC   s       Museo de Zoologia "Alfonso L. Herrera"                  
  MZFN   s       Museo Zoologico dell'Universita "Federico II"                   
+MZFS-DAR       s       Museu de Zoologia da Universidade Estadual de Feira de Santana                  
  MZGZ   s       Museum Zoologia del Giardino Zoologico                  
  MZH    s       Zoolgical Museum, Finnish Museum of Natural History                     
  MZKI   c       Microbial Culture Collection of National Institute of Chemistry                 
@@ -4967,6 +4977,7 @@ NLSN      s       Notre Dame University, Biological Sciences Department
  NLU    s       University of Louisiana at Monroe, Museum of Natural History                    
  NLUH   s       University of the Philippines College Baguio                    
  NM     s       Northern Michigan University, Biology Department                        
+NMA    s       National Museum Australia -                     
  NMAC   s       Inner Mongolia Agricultural University, Department of Pratacultural Science                     
  NMAG   s       Naturhistorisches Museum, Augsburg                      
  NMB    c       Ningbo Marine Biotechnology                     
@@ -5134,10 +5145,11 @@ NRC     c       Division of Biological Sciences, National Research Council of Canada
  NRC<EGY>       s       National Research Centre                        
  NRCC   s       National Research Council of Canada                     
  NRCS   c       National Reference Center for Streptococci in Aachen                    
+NRI    s       Texas A&M Natural Resources Institute                   
  NRIBAS s       National Research Institute of Biology, Academia Sinica                 
  NRIC   c       NODAI Research Institute Culture Collection                     
  NRL    c       Neisseria Reference Laboratory                  
-NRM    s       Swedish Museum of Natural History                       
+NRM    s       Swedish Museum of Natural History       SMNH            
  NRN    s       Nairn Literary Society Library, Public Library                  
  NRNZ   s       Northland Regional Museum                       
  NRPSU  c       Department of Agro-industry, Faculty of Natural Resources                       
@@ -5187,9 +5199,10 @@ NTNU-VM  s       Norwegian University of Science and Technology, Museum of Natural Hist
  NTOU   sc      Institute of Marine Biology, National Taiwan Ocean University                   
  NTS    s       Nevada Operations Office, U.S. Department of Energy                     
  NTSC   s       University of North Texas, Biological Sciences Department                       
+NTUCC  c       Plant Pathology and Microbiology, National Taiwan University Culture Collection                 
  NTUF   s       National Taiwan University, Forestry Department                 
-NTUM   s       National Taiwan University                      
-NTUMA  s       National Taiwan University                      
+NTUH   s       Herbarium of the Department of Plant Pathology and Microbiology, National Taiwan University                     
+NTUM   s       National Taiwan University Museum                       
  NU<THA>        c       Department of Microbiology, Faculty of Science                  
  NU<ZAF>        s       University of Natal, School of Botany and Zoology                       
  NUA    c       Department of Microbiology, National University of Athens                       
@@ -7132,7 +7145,7 @@ UH        s       University of Hawaii
  UHCC   c       University of Helsinki Cyanobacteria Culture Collection                 
  UHI    s       Ussishkin House, Botany Department                      
  UHM    s       Manoa, College of Tropical Agriculture, Department of Entomology                        
-UI<NGA>        s       University of Ibadan                    
+UI<NGA>        sc      University of Ibadan                    
  UI<USA-UT>     s       Bureau of Land Management (Uinta Herbarium)                     
  UICC   c       University of Indonesia Culture Collection                      
  UIDA   s       University of Idaho, Bird and Mammal Museum                     
@@ -7325,7 +7338,7 @@ UPCT      s       Universidad Politecnica De Cartagena
  UPEI   s       University of Prince Edward Island, Biology Department                  
  UPF    s       Universite de Polynesie Francaise Herbarium                     
  UPIE   b       Unidad de Patologia Infecciosa y Epidemiologia                  
-UPLB   s       Museum of Natural History, University of the Philippines                        
+UPLB   s       University of Philippines Los Banos                     
  UPM<FRA>       s       Departement des Siences de la Terre                     
  UPM<MYS>       s       Universiti Pertanian Malaysia, Biology Department                       
  UPM<RUS>       s       Udory Paleontological Museum                    
@@ -7619,7 +7632,7 @@ VPB       c       Veterinary Pathology and Bacteriology Collection
  VPCI   c       Fungal Culture Collection                       
  VPH    s       Van Pharmaceutical Herbarium, Yuzuncu Yil University                    
  VPI    sc      Virginia Polytechnic Institute and State University     VTMH            
-VPI:F  s       Virginia Polytechnic Institute and State University, Fungal Collection  
+VPI:F  s       Virginia Polytechnic Institute and State University, Fungal Collection          
  VPIC   s       Virginia Polytechnic Institute and State University                     
  VPIMM  s       Virginia Polytechnic University, Mammal Museum                  
  VPM    s       Volgograd Provincial Museum                     
@@ -7714,6 +7727,7 @@ WFIS      s       Wagner Free Institute of Science
  WFPL   c       Western Forest Products Laboratory                      
  WFU    s       Wake Forest University, Biology Department                      
  WFUVC  s       Wake Forest University, Vertebrate Collection                   
+WFVZ   s       Western Foundation of Vertebrate Zoology                        
  WGC    s       State University of West Georgia, Biology Department                    
  WGCH   s       Wilton Garden Club                      
  WGD    s       Washington Game Department                      
diff --git a/c++/src/objects/seqfeat/lat_lon_country.inc b/c++/src/objects/seqfeat/lat_lon_country.inc

index 4c7f464cc34564afdf0da45c6df9057b1f15a562..551edebd369bc04fd7276b5f0112419359d0f094 100644 (file)
--- a/c++/src/objects/seqfeat/lat_lon_country.inc
+++ b/c++/src/objects/seqfeat/lat_lon_country.inc
@@ -1,4 +1,4 @@
-/*  $Id: lat_lon_country.inc 599818 2020-01-07 20:09:07Z kans $
+/*  $Id: lat_lon_country.inc 612552 2020-07-23 15:34:00Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
diff --git a/c++/src/objects/seqloc/Seq_id.cpp b/c++/src/objects/seqloc/Seq_id.cpp

index 529e63150f8f13dfc0651471a201e4b34cbfae2c..861f3c0ed2b71e104258fe9dbaf9d11e0143fd5d 100644 (file)
--- a/c++/src/objects/seqloc/Seq_id.cpp
+++ b/c++/src/objects/seqloc/Seq_id.cpp
@@ -1,4 +1,4 @@
-/* $Id: Seq_id.cpp 603822 2020-03-17 17:37:01Z ivanov $
+/* $Id: Seq_id.cpp 617367 2020-09-30 12:57:11Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -2276,8 +2276,10 @@ SIZE_TYPE CSeq_id::ParseIDs(CBioseq::TId& ids, const CTempString& s,
          E_Choice     type = WhichInverseSeqId(fasta_pieces.front());
          ETypeVariant tv;
          if (type == e_not_set) {
-            // unknown database are reported as 'general'
-            type = e_General;
+            if (fasta_pieces.size() == 2) {
+                // unknown database are reported as 'general'
+                type = e_General;
+            }
              tv   = eTV_plain;
          } else {
              tv = x_IdentifyTypeVariant(type, fasta_pieces.front());
@@ -2306,8 +2308,19 @@ SIZE_TYPE CSeq_id::ParseIDs(CBioseq::TId& ids, const CTempString& s,
                  ids.push_back(id);
                  ++count;
              } catch (std::exception& e) {
+                if (fasta_pieces.empty()) {
+                    throw;
+                }
                  if ((flags & fParse_PartialOK) != 0) {
                      ERR_POST_X(7, Warning << e.what());
+                    do {
+                        auto l = fasta_pieces.front().size();
+                        if (l != 2  &&  l != 3) {
+                            fasta_pieces.pop_front();
+                        } else {
+                            break;
+                        }
+                    } while ( !fasta_pieces.empty() );
                  } else {
                      throw;
                  }
diff --git a/c++/src/objects/seqloc/accguide.inc b/c++/src/objects/seqloc/accguide.inc

index 2038a529b3d64b742eb074f3d665ea2c6e203b03..84b0dece4d8cbbfe6ea36cf12d2eaa0b87b937d0 100644 (file)
--- a/c++/src/objects/seqloc/accguide.inc
+++ b/c++/src/objects/seqloc/accguide.inc
@@ -1,4 +1,4 @@
-/*  $Id: accguide.inc 603797 2020-03-17 13:51:04Z ucko $
+/*  $Id: accguide.inc 615212 2020-08-28 13:43:44Z ucko $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -31,7 +31,7 @@
   */
  
  static const char* const kBuiltInGuide[] = {
-    "# $Id: accguide.inc 603797 2020-03-17 13:51:04Z ucko $",
+    "# $Id: accguide.inc 615212 2020-08-28 13:43:44Z ucko $",
      "version  1 # of file format",
      "",
      "# three-letter-prefix protein accessions (traditionally with five digits)",
@@ -1181,7 +1181,7 @@ static const char* const kBuiltInGuide[] = {
      "2+6  LQ  embl_patent",
      "2+8  LQ  embl_other_nuc",
      "2+10 LQ  embl_other_nuc",
-    "2+6  LR  embl_dirsub # embl_patent?",
+    "2+6  LR  embl_dirsub * # embl_patent?",
      "2+8  LR  embl_other_nuc",
      "2+10 LR  embl_other_nuc",
      "2+6  LS  embl_dirsub # embl_patent?",
@@ -1220,7 +1220,7 @@ static const char* const kBuiltInGuide[] = {
      "2+6  MD  ddbj_patent",
      "2+8  MD  ddbj_other_nuc",
      "2+10 MD  ddbj_other_nuc",
-    "2+6  ME  ddbj_other_nuc",
+    "2+6  ME  ddbj_patent",
      "2+8  ME  ddbj_other_nuc",
      "2+10 ME  ddbj_other_nuc",
      "2+6  MF  gb_dirsub",
@@ -1271,6 +1271,12 @@ static const char* const kBuiltInGuide[] = {
      "2+6  MU  gb_con",
      "2+8  MU  gb_other_nuc",
      "2+10 MU  gb_other_nuc",
+    "2+6  MV  gb_patent",
+    "2+8  MV  gb_other_nuc",
+    "2+10 MV  gb_other_nuc",
+    "2+6  MW  gb_dirsub",
+    "2+8  MW  gb_other_nuc",
+    "2+10 MW  gb_other_nuc",
      "2+6  M?  gb_other_nuc",
      "2+8  M?  gb_other_nuc",
      "2+10 M?  gb_other_nuc",
@@ -3508,6 +3514,9 @@ static const char* const kBuiltInGuide[] = {
      "special  LN901386-LN901412  embl_est",
      "",
      "# Nominally embl_dirsub.",
+    "special  LR594708-LR594709  embl_tpa_nuc",
+    "",
+    "# Nominally embl_dirsub.",
      "special  LT159851-LT159865  embl_est",
      "special  LT548096-LT548244  embl_tpa_nuc",
      "special  LT556286-LT558089  embl_est",
@@ -3522,7 +3531,15 @@ static const char* const kBuiltInGuide[] = {
      "special  OB000001-OB660024  embl_con",
      "",
      "# Some \"EMBL\" WGS nucleotide accessions are really third-party annotations.",
-    "special  CAADVW000000000-CAADVX999999999  embl_tpa_wgs_nuc # 6+9",
+    "special  CAADSF000000000-CAADSF999999999  embl_tpa_wgs_nuc # 6+9",
+    "special  CAADSM000000000-CAAGJX999999999  embl_tpa_wgs_nuc # 6+9",
+    "special  CAAGKD000000000-CAAGKQ999999999  embl_tpa_wgs_nuc # 6+9",
+    "special  CAAGKS000000000-CAAGRI999999999  embl_tpa_wgs_nuc # 6+9",
+    "special  CAAGRK000000000-CAAGSH999999999  embl_tpa_wgs_nuc # 6+9",
+    "special  CAAHDL000000000-CAAHDL999999999  embl_tpa_wgs_nuc # 6+9",
+    "special  CAAHDO000000000-CAAHFA999999999  embl_tpa_wgs_nuc # 6+9",
+    "special  CADEPO000000000-CADEVH999999999  embl_tpa_wgs_nuc # 6+9",
+    "special  CADEVJ000000000-CADFGZ999999999  embl_tpa_wgs_nuc # 6+9",
      "",
      "# Some \"EMBL\" 8-character protein accessions are really third-party",
      "# annotations.",
diff --git a/c++/src/objects/seqloc/accguide.txt b/c++/src/objects/seqloc/accguide.txt

index cc4b76339b4d50678caefa8c49bb8c83dece9624..7b6107ea44e4dc11f5ba0c71c29160412ace547b 100644 (file)
--- a/c++/src/objects/seqloc/accguide.txt
+++ b/c++/src/objects/seqloc/accguide.txt
@@ -1,4 +1,4 @@
-# $Id: accguide.txt 603797 2020-03-17 13:51:04Z ucko $
+# $Id: accguide.txt 615212 2020-08-28 13:43:44Z ucko $
  version  1 # of file format
  
  # three-letter-prefix protein accessions (traditionally with five digits)
@@ -1148,7 +1148,7 @@ version  1 # of file format
  2+6  LQ  embl_patent
  2+8  LQ  embl_other_nuc
  2+10 LQ  embl_other_nuc
-2+6  LR  embl_dirsub # embl_patent?
+2+6  LR  embl_dirsub * # embl_patent?
  2+8  LR  embl_other_nuc
  2+10 LR  embl_other_nuc
  2+6  LS  embl_dirsub # embl_patent?
@@ -1187,7 +1187,7 @@ version  1 # of file format
  2+6  MD  ddbj_patent
  2+8  MD  ddbj_other_nuc
  2+10 MD  ddbj_other_nuc
-2+6  ME  ddbj_other_nuc
+2+6  ME  ddbj_patent
  2+8  ME  ddbj_other_nuc
  2+10 ME  ddbj_other_nuc
  2+6  MF  gb_dirsub
@@ -1238,6 +1238,12 @@ version  1 # of file format
  2+6  MU  gb_con
  2+8  MU  gb_other_nuc
  2+10 MU  gb_other_nuc
+2+6  MV  gb_patent
+2+8  MV  gb_other_nuc
+2+10 MV  gb_other_nuc
+2+6  MW  gb_dirsub
+2+8  MW  gb_other_nuc
+2+10 MW  gb_other_nuc
  2+6  M?  gb_other_nuc
  2+8  M?  gb_other_nuc
  2+10 M?  gb_other_nuc
@@ -3474,6 +3480,9 @@ special  LN898187-LN898198  embl_tpa_nuc
  special  LN901194-LN901210  embl_tpa_nuc
  special  LN901386-LN901412  embl_est
  
+# Nominally embl_dirsub.
+special  LR594708-LR594709  embl_tpa_nuc
+
  # Nominally embl_dirsub.
  special  LT159851-LT159865  embl_est
  special  LT548096-LT548244  embl_tpa_nuc
@@ -3489,7 +3498,15 @@ special  LT990249-LT990597  embl_tpa_nuc
  special  OB000001-OB660024  embl_con
  
  # Some "EMBL" WGS nucleotide accessions are really third-party annotations.
-special  CAADVW000000000-CAADVX999999999  embl_tpa_wgs_nuc # 6+9
+special  CAADSF000000000-CAADSF999999999  embl_tpa_wgs_nuc # 6+9
+special  CAADSM000000000-CAAGJX999999999  embl_tpa_wgs_nuc # 6+9
+special  CAAGKD000000000-CAAGKQ999999999  embl_tpa_wgs_nuc # 6+9
+special  CAAGKS000000000-CAAGRI999999999  embl_tpa_wgs_nuc # 6+9
+special  CAAGRK000000000-CAAGSH999999999  embl_tpa_wgs_nuc # 6+9
+special  CAAHDL000000000-CAAHDL999999999  embl_tpa_wgs_nuc # 6+9
+special  CAAHDO000000000-CAAHFA999999999  embl_tpa_wgs_nuc # 6+9
+special  CADEPO000000000-CADEVH999999999  embl_tpa_wgs_nuc # 6+9
+special  CADEVJ000000000-CADFGZ999999999  embl_tpa_wgs_nuc # 6+9
  
  # Some "EMBL" 8-character protein accessions are really third-party
  # annotations.
diff --git a/c++/src/objects/valerr/ValidErrItem.cpp b/c++/src/objects/valerr/ValidErrItem.cpp

index 077ef84729cdecb96e7e96367e8d9b942dc79ac2..d59d4551687ca43685e5103cac4daa7cef7960b7 100644 (file)
--- a/c++/src/objects/valerr/ValidErrItem.cpp
+++ b/c++/src/objects/valerr/ValidErrItem.cpp
@@ -1,4 +1,4 @@
-/* $Id: ValidErrItem.cpp 597158 2019-11-18 17:58:02Z kans $
+/* $Id: ValidErrItem.cpp 611904 2020-07-13 15:51:08Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -2205,6 +2205,9 @@ same id type" } },
     { eErr_SEQ_FEAT_CDSdoesNotMatchVDJC,
     { "CDSdoesNotMatchVDJC",
       "The CDS does not have a parent VDJ or C segment" } },
+   { eErr_SEQ_FEAT_GeneOnNucPositionOfPeptide,
+   { "GeneOnNucPositionOfPeptide",
+     "Peptide under CDS matches small Gene" } },
  
  /* SEQ_ALIGN */
  
diff --git a/c++/src/objmgr/scope.cpp b/c++/src/objmgr/scope.cpp

index 27d9279679b377ef293ac2dcf9183bc48cab69d5..3e1a9d95a753fa9e2a9a57f764b0f5c5336a5023 100644 (file)
--- a/c++/src/objmgr/scope.cpp
+++ b/c++/src/objmgr/scope.cpp
@@ -1,4 +1,4 @@
-/*  $Id: scope.cpp 603742 2020-03-16 17:25:41Z ivanov $
+/*  $Id: scope.cpp 610058 2020-06-10 16:19:48Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -328,6 +328,18 @@ void CScope::ResetDataAndHistory(ERemoveDataLoaders)
  }
  
  
+void CScope::RemoveFromHistory(const CSeq_id_Handle& seq_id)
+{
+    m_Impl->RemoveFromHistory(seq_id);
+}
+
+
+void CScope::RemoveFromHistory(const CSeq_id& seq_id)
+{
+    RemoveFromHistory(CSeq_id_Handle::GetHandle(seq_id));
+}
+
+
  void CScope::RemoveFromHistory(const CBioseq_Handle& bioseq,
                                 EActionIfLocked action)
  {
diff --git a/c++/src/objmgr/scope_impl.cpp b/c++/src/objmgr/scope_impl.cpp

index 4d4439e64699e1a47432c80584a32492513398ab..5d6b86423100456be785d39b39dfcb502801a1b0 100644 (file)
--- a/c++/src/objmgr/scope_impl.cpp
+++ b/c++/src/objmgr/scope_impl.cpp
@@ -1,4 +1,4 @@
-/*  $Id: scope_impl.cpp 602775 2020-03-02 19:52:55Z grichenk $
+/*  $Id: scope_impl.cpp 610058 2020-06-10 16:19:48Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -2587,6 +2587,28 @@ void CScope_Impl::RemoveFromHistory(const CTSE_Handle& tse, int action)
  }
  
  
+void CScope_Impl::RemoveFromHistory(const CSeq_id_Handle& seq_id)
+{
+    if ( !seq_id ) {
+        return;
+    }
+    TConfWriteLockGuard guard(m_ConfLock);
+    // Clear removed bioseq handles
+    TSeq_idMap::iterator it = m_Seq_idMap.find(seq_id);
+    if ( it != m_Seq_idMap.end() ) {
+        it->second.x_ResetAnnotRef_Info();
+        if ( it->second.m_Bioseq_Info ) {
+            CBioseq_ScopeInfo& binfo = *it->second.m_Bioseq_Info;
+            binfo.x_ResetAnnotRef_Info();
+            if ( binfo.IsDetached() ) {
+                binfo.m_SynCache.Reset();
+                m_Seq_idMap.erase(it);
+            }
+        }
+    }
+}
+
+
  void CScope_Impl::ResetHistory(int action)
  {
      TConfWriteLockGuard guard(m_ConfLock);
diff --git a/c++/src/objmgr/tse_info.cpp b/c++/src/objmgr/tse_info.cpp

index 230b877404f9170506dedaac8443d9742fa1133c..bd9b4ebe8fa57ba611d8a593837a8169ca387d84 100644 (file)
--- a/c++/src/objmgr/tse_info.cpp
+++ b/c++/src/objmgr/tse_info.cpp
@@ -1,4 +1,4 @@
-/*  $Id: tse_info.cpp 606922 2020-04-28 18:58:25Z ivanov $
+/*  $Id: tse_info.cpp 611227 2020-07-01 11:37:30Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -428,7 +428,7 @@ CBioObjectId CTSE_Info::x_RegisterBioObject(CTSE_Info_Object& info)
      }
          
      uniq_id = CBioObjectId(CBioObjectId::eUniqNumber,
-                           m_InternalBioObjNumber++);
+                           ++m_InternalBioObjNumber);
      m_BioObjects[uniq_id] = &info;
      return uniq_id;
  }
diff --git a/c++/src/objmgr/util/autodef.cpp b/c++/src/objmgr/util/autodef.cpp

index 7b2b6478dc14be5f9dfd5583e6b5c9987cb26186..0ca537eaf243f37a89f7edef742881a9523524dc 100644 (file)
--- a/c++/src/objmgr/util/autodef.cpp
+++ b/c++/src/objmgr/util/autodef.cpp
@@ -1,4 +1,4 @@
-/*  $Id: autodef.cpp 607821 2020-05-07 19:13:41Z ivanov $
+/*  $Id: autodef.cpp 611612 2020-07-08 17:43:23Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -880,6 +880,9 @@ string CAutoDef::x_GetNonFeatureListEnding()
          case CAutoDefOptions::eListAllFeatures:
              end = " sequence.";
              break;
+        case CAutoDefOptions::eWholeGenomeShotgunSequence:
+            end = " whole genome shotgun sequence.";
+            break;
          default:
              break;
      }
diff --git a/c++/src/objmgr/util/autodef_options.cpp b/c++/src/objmgr/util/autodef_options.cpp

index 7020c8deaa757e138cfcb1c1eb3ba947857bb012..a13933a65896c36f7e7edb3ab704663fe93adac6 100644 (file)
--- a/c++/src/objmgr/util/autodef_options.cpp
+++ b/c++/src/objmgr/util/autodef_options.cpp
@@ -1,4 +1,4 @@
-/*  $Id: autodef_options.cpp 530196 2017-03-13 12:59:43Z bollin $
+/*  $Id: autodef_options.cpp 611612 2020-07-08 17:43:23Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -217,7 +217,8 @@ const TNameValPair sc_FeatureListTypeStr[] = {
          { "List All Features", CAutoDefOptions::eListAllFeatures },
          { "Partial Genome", CAutoDefOptions::ePartialGenome },
          { "Partial Sequence", CAutoDefOptions::ePartialSequence },
-        { "Sequence", CAutoDefOptions::eSequence }
+        { "Sequence", CAutoDefOptions::eSequence },
+        { "Whole Genome Shotgun Sequence", CAutoDefOptions::eWholeGenomeShotgunSequence }
  };
  DEFINE_STATIC_ARRAY_MAP_WITH_COPY(TNameValPairMap, sc_FeatureListTypeStrsMap, sc_FeatureListTypeStr);
  
diff --git a/c++/src/objmgr/util/create_defline.cpp b/c++/src/objmgr/util/create_defline.cpp

index 7d532895da2e7d8a4d2a54ff180b81f5f45869c0..2d574511bb3c2a19add87aa5feefb6323dcf29cb 100644 (file)
--- a/c++/src/objmgr/util/create_defline.cpp
+++ b/c++/src/objmgr/util/create_defline.cpp
@@ -1432,7 +1432,7 @@ static bool x_EndsWithStrain (
          return false;
      }
  
-    pos = NStr::FindNoCase (taxname, strain, 0, taxname.size() - 1, NStr::eLast);
+    pos = NStr::Find (taxname, strain, NStr::eNocase, NStr::eReverseSearch);
      if (pos == taxname.size() - strain.size()) {
          // check for space to avoid fortuitous match to end of taxname
          char ch = taxname[pos - 1];
@@ -2047,7 +2047,7 @@ static string s_RemoveBracketedOrgFromEnd (string str, string taxname)
      int len = str.length();
      if (len < 5) return str;
      if (str [len - 1] != ']') return str;
-    SIZE_TYPE cp = NStr::Find(str, "[", 0, NPOS, NStr::eLast);
+    SIZE_TYPE cp = NStr::Find(str, "[", NStr::eNocase, NStr::eReverseSearch);
      if (cp == NPOS) return str;
      string suffix = str.substr(cp+1);
      if (NStr::StartsWith(suffix, "NAD")) return str;
@@ -2953,7 +2953,7 @@ static size_t s_TitleEndsInOrganism (
  
      idx = len1 - len2 - 3;
      if (len1 > len2 + 4 && title [idx] == ' ' && title [idx + 1] == '[' && title [len1 - 1] == ']') {
-        pos = NStr::FindNoCase(title, taxname, 0, NPOS, NStr::eLast);
+        pos = NStr::Find(title, taxname, NStr::eNocase, NStr::eReverseSearch);
          if (pos == idx + 2) {
              return pos - 1;
          }
@@ -3009,7 +3009,7 @@ void CDeflineGenerator::x_AdjustProteinTitleSuffixIdx (
              tpos = s_TitleEndsInOrganism(m_MainTitle, binomial);
              if (tpos == NPOS) {
                  if (m_IsCrossKingdom) {
-                    pos = NStr::FindNoCase(m_MainTitle, "][", 0, NPOS, NStr::eLast);
+                    pos = NStr::Find(m_MainTitle, "][", NStr::eNocase, NStr::eReverseSearch);
                      if (pos != NPOS) {
                          m_MainTitle.erase (pos + 1);
                          s_TrimMainTitle (m_MainTitle);
@@ -3162,7 +3162,7 @@ void CDeflineGenerator::x_AdjustProteinTitleSuffix (
              tpos = s_TitleEndsInOrganism(m_MainTitle, binomial);
              if (tpos == NPOS) {
                  if (m_IsCrossKingdom) {
-                    pos = NStr::FindNoCase(m_MainTitle, "][", 0, NPOS, NStr::eLast);
+                    pos = NStr::Find(m_MainTitle, "][", NStr::eNocase, NStr::eReverseSearch);
                      if (pos != NPOS) {
                          m_MainTitle.erase (pos + 1);
                          s_TrimMainTitle (m_MainTitle);
@@ -3412,6 +3412,51 @@ string CDeflineGenerator::x_GetModifiers(const CBioseq_Handle & bsh)
                  }
              }
          }
+        if ( bios && bios->IsSetPcr_primers() ) {
+            const CBioSource_Base::TPcr_primers & primers = bios->GetPcr_primers();
+            if ( primers.CanGet() ) {
+                ITERATE( CBioSource_Base::TPcr_primers::Tdata, it, primers.Get() ) {
+
+                    // bool has_fwd_seq = false;
+                    // bool has_rev_seq = false;
+
+                    if( (*it)->IsSetForward() ) {
+                        const CPCRReaction_Base::TForward &forward = (*it)->GetForward();
+                        if( forward.CanGet() ) {
+                            ITERATE( CPCRReaction_Base::TForward::Tdata, it2, forward.Get() ) {
+                                const string &fwd_name = ( (*it2)->CanGetName() ? (*it2)->GetName().Get() : kEmptyStr );
+                                if( ! fwd_name.empty() ) {
+                                    joiner.Add("fwd-primer-name", fwd_name);
+                                }
+                                const string &fwd_seq = ( (*it2)->CanGetSeq() ? (*it2)->GetSeq().Get() : kEmptyStr );
+                                // NStr::ToLower( fwd_seq );
+                                if( ! fwd_seq.empty() ) {
+                                    joiner.Add("fwd-primer-seq", fwd_seq);
+                                    // has_fwd_seq = true;
+                                }
+                            }
+                        }
+                    }
+                    if( (*it)->IsSetReverse() ) {
+                        const CPCRReaction_Base::TReverse &reverse = (*it)->GetReverse();
+                        if( reverse.CanGet() ) {
+                            ITERATE( CPCRReaction_Base::TReverse::Tdata, it2, reverse.Get() ) {
+                                const string &rev_name = ((*it2)->CanGetName() ? (*it2)->GetName().Get() : kEmptyStr );
+                                if( ! rev_name.empty() ) {
+                                    joiner.Add("rev-primer-name", rev_name);
+                                }
+                                const string &rev_seq = ( (*it2)->CanGetSeq() ? (*it2)->GetSeq().Get() : kEmptyStr );
+                                // NStr::ToLower( rev_seq ); // do we need this? 
+                                if( ! rev_seq.empty() ) {
+                                    joiner.Add("rev-primer-seq", rev_seq);
+                                    // has_rev_seq = true;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
      }
      catch (CException &) {
          // ignore exception; it probably just means there's no org-ref
diff --git a/c++/src/objmgr/util/feature_edit.cpp b/c++/src/objmgr/util/feature_edit.cpp

index 1035d23e9fc8fe7498a59cccb73b18179ef876c0..e8541345c2884c76b8ee1eafe98e3fd76810ea1f 100644 (file)
--- a/c++/src/objmgr/util/feature_edit.cpp
+++ b/c++/src/objmgr/util/feature_edit.cpp
@@ -1,5 +1,5 @@
  
-/*  $Id: feature_edit.cpp 599823 2020-01-07 21:35:24Z foleyjp $
+/*  $Id: feature_edit.cpp 610146 2020-06-11 11:11:01Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -282,6 +282,59 @@ void CFeatTrim::x_TrimLocation(const TSeqPos from, const TSeqPos to,
  }
  
  
+static TSeqPos s_GetTrimmedLength(const CSeq_loc& trimmed_loc)
+{
+
+    if (trimmed_loc.IsEmpty() || trimmed_loc.IsNull()) {
+        return 0;
+    }
+
+    if (trimmed_loc.IsPnt()) {
+        return 1;
+    }
+
+    if (trimmed_loc.IsInt()) {
+        return trimmed_loc.GetInt().GetLength();
+    }
+
+    if (trimmed_loc.IsPacked_int()) {
+        TSeqPos length=0;
+        for (auto pSubInt : trimmed_loc.GetPacked_int().Get()) {
+            length += pSubInt->GetLength();
+        }
+        return length;
+    }
+
+    if (trimmed_loc.IsPacked_pnt()) {
+        return trimmed_loc.GetPacked_pnt().GetPoints().size();
+    }
+
+    if (trimmed_loc.IsMix()) {
+        TSeqPos length=0;
+        for (auto pSubLoc : trimmed_loc.GetMix().Get()) {
+            length += s_GetTrimmedLength(*pSubLoc);
+        }
+        return length;
+    }
+
+    return 0;
+}
+
+static TSeqPos s_GetTrimmedLength(const CSeq_loc& loc, TSeqPos from, TSeqPos to)
+{
+    auto pTrimmedInt = Ref(new CSeq_loc());
+    CSeq_loc_CI loc_it(loc);
+    pTrimmedInt->SetInt().SetId().Assign(loc_it.GetSeq_id());
+    pTrimmedInt->SetInt().SetFrom(from);
+    pTrimmedInt->SetInt().SetTo(to);
+    auto pTrimmedLoc = loc.Intersect(*pTrimmedInt, CSeq_loc::fStrand_Ignore, nullptr);
+    if (pTrimmedLoc) {
+        return s_GetTrimmedLength(*pTrimmedLoc);
+    }
+    return 0;
+}
+
+
  TSeqPos CFeatTrim::x_GetStartOffset(const CSeq_feat& feat,
      TSeqPos from, TSeqPos to) 
  {
@@ -292,13 +345,19 @@ TSeqPos CFeatTrim::x_GetStartOffset(const CSeq_feat& feat,
      if (strand != eNa_strand_minus) {
          TSeqPos feat_from = feat_range.GetFrom();
          if (feat_from < from) {
-            offset = from - feat_from;
+            if (feat.GetLocation().IsInt()) {
+                return (from - feat_from);
+            }
+            return s_GetTrimmedLength(feat.GetLocation(), feat_from, from-1);
          }
      }
      else { // eNa_strand_minus
          TSeqPos feat_to = feat_range.GetTo();
          if (feat_to > to) {
-            offset = feat_to - to;
+            if (feat.GetLocation().IsInt()) {
+                return (feat_to - to);
+            }
+            return s_GetTrimmedLength(feat.GetLocation(), to+1, feat_to);
          }
      }
      return offset;
@@ -326,7 +385,6 @@ TSeqPos CFeatTrim::x_GetFrame(const CCdregion& cds)
  CCdregion::EFrame CFeatTrim::GetCdsFrame(const CSeq_feat& cds_feature, const CRange<TSeqPos>& range)
  {
      const TSeqPos offset = x_GetStartOffset(cds_feature, range.GetFrom(), range.GetTo());
-
      return x_GetNewFrame(offset, cds_feature.GetData().GetCdregion());
  }
  
@@ -340,7 +398,12 @@ CCdregion::EFrame CFeatTrim::x_GetNewFrame(const TSeqPos offset, const CCdregion
      }
  
      const TSeqPos old_frame = x_GetFrame(cdregion);
-    const TSeqPos new_frame = (old_frame + frame_change)%3;
+
+    // RW-1098 
+    const TSeqPos new_frame = 3 - ((3 + offset - old_frame)%3); 
+    // Note new_frame, thus defined, takes values 1,2,3,
+    // whereas old_frame takes values 0,1,2.
+    // However, 0 == 3 in modulo 3 arithmetic.
      if (new_frame == 1) {
          return CCdregion::eFrame_two;
      }
diff --git a/c++/src/objmgr/util/indexer.cpp b/c++/src/objmgr/util/indexer.cpp

index a493dbd30235ecb347e449ffabdf01eb538da2f6..f23f70a4f8e9f39de1cc5e24e510126a1bc9c8dd 100644 (file)
--- a/c++/src/objmgr/util/indexer.cpp
+++ b/c++/src/objmgr/util/indexer.cpp
@@ -42,6 +42,7 @@
  
  #include <objmgr/util/indexer.hpp>
  #include <objmgr/util/sequence.hpp>
+#include <objmgr/util/feature_edit.hpp>
  
  #define NCBI_USE_ERRCODE_X  ObjMgr_Indexer
  
@@ -53,60 +54,60 @@ BEGIN_SCOPE(objects)
  // CSeqEntryIndex
  
  // Constructors take top-level sequence object, create a CRef<CSeqMasterIndex>, and call its initializer
-CSeqEntryIndex::CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy, TFlags flags)
  
  {
      m_Idx.Reset(new CSeqMasterIndex);
-    m_Idx->x_Initialize(topseh, policy, flags, depth);
+    m_Idx->x_Initialize(topseh, policy, flags);
  }
  
-CSeqEntryIndex::CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy, TFlags flags)
  
  {
      m_Idx.Reset(new CSeqMasterIndex);
-    m_Idx->x_Initialize(bsh, policy, flags, depth);
+    m_Idx->x_Initialize(bsh, policy, flags);
  }
  
-CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy, TFlags flags)
  
  {
      m_Idx.Reset(new CSeqMasterIndex);
-    m_Idx->x_Initialize(topsep, policy, flags, depth);
+    m_Idx->x_Initialize(topsep, policy, flags);
  }
  
-CSeqEntryIndex::CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy, TFlags flags)
  
  {
      m_Idx.Reset(new CSeqMasterIndex);
-    m_Idx->x_Initialize(seqset, policy, flags, depth);
+    m_Idx->x_Initialize(seqset, policy, flags);
  }
  
-CSeqEntryIndex::CSeqEntryIndex (CBioseq& bioseq, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CBioseq& bioseq, EPolicy policy, TFlags flags)
  
  {
      m_Idx.Reset(new CSeqMasterIndex);
-    m_Idx->x_Initialize(bioseq, policy, flags, depth);
+    m_Idx->x_Initialize(bioseq, policy, flags);
  }
  
-CSeqEntryIndex::CSeqEntryIndex (CSeq_submit& submit, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CSeq_submit& submit, EPolicy policy, TFlags flags)
  
  {
      m_Idx.Reset(new CSeqMasterIndex);
-    m_Idx->x_Initialize(submit, policy, flags, depth);
+    m_Idx->x_Initialize(submit, policy, flags);
  }
  
-CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy, TFlags flags)
  
  {
      m_Idx.Reset(new CSeqMasterIndex);
-    m_Idx->x_Initialize(topsep, sblock, policy, flags, depth);
+    m_Idx->x_Initialize(topsep, sblock, policy, flags);
  }
  
-CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy, TFlags flags)
  
  {
      m_Idx.Reset(new CSeqMasterIndex);
-    m_Idx->x_Initialize(topsep, descr, policy, flags, depth);
+    m_Idx->x_Initialize(topsep, descr, policy, flags);
  }
  
  // Get first Bioseq index
@@ -151,35 +152,58 @@ CRef<CBioseqIndex> CSeqEntryIndex::GetBioseqIndex (const CSeq_loc& loc)
      return m_Idx->GetBioseqIndex(loc);
  }
  
-// Get Bioseq index by subrange
-CRef<CBioseqIndex> CSeqEntryIndex::GetBioseqIndex (const string& accn, int from, int to, bool rev_comp)
+const vector<CRef<CBioseqIndex>>& CSeqEntryIndex::GetBioseqIndices(void)
  
  {
-    return m_Idx->GetBioseqIndex(accn, from, to, rev_comp);
+    return m_Idx->GetBioseqIndices();
  }
  
-CRef<CBioseqIndex> CSeqEntryIndex::GetBioseqIndex (int from, int to, bool rev_comp)
+const vector<CRef<CSeqsetIndex>>& CSeqEntryIndex::GetSeqsetIndices(void)
  
  {
-    return m_Idx->GetBioseqIndex("", from, to, rev_comp);
+    return m_Idx->GetSeqsetIndices();
  }
  
-const vector<CRef<CBioseqIndex>>& CSeqEntryIndex::GetBioseqIndices(void)
+bool CSeqEntryIndex::DistributedReferences(void)
  
  {
-    return m_Idx->GetBioseqIndices();
+    return m_Idx->DistributedReferences();
  }
  
-const vector<CRef<CSeqsetIndex>>& CSeqEntryIndex::GetSeqsetIndices(void)
+void CSeqEntryIndex::SetSnpFunc(FAddSnpFunc* snp)
  
  {
-    return m_Idx->GetSeqsetIndices();
+    m_Idx->SetSnpFunc (snp);
  }
  
-bool CSeqEntryIndex::DistributedReferences(void)
+FAddSnpFunc* CSeqEntryIndex::GetSnpFunc(void)
  
  {
-    return m_Idx->DistributedReferences();
+    return m_Idx->GetSnpFunc();
+}
+
+void CSeqEntryIndex::SetFeatDepth(int featDepth)
+
+{
+    m_Idx->SetFeatDepth (featDepth);
+}
+
+int CSeqEntryIndex::GetFeatDepth(void)
+
+{
+    return m_Idx->GetFeatDepth();
+}
+
+void CSeqEntryIndex::SetGapDepth(int featDepth)
+
+{
+    m_Idx->SetGapDepth (featDepth);
+}
+
+int CSeqEntryIndex::GetGapDepth(void)
+
+{
+    return m_Idx->GetGapDepth();
  }
  
  bool CSeqEntryIndex::IsFetchFailure(void)
@@ -198,11 +222,10 @@ bool CSeqEntryIndex::IsIndexFailure(void)
  // CSeqMasterIndex
  
  // Initializers take top-level sequence object, create Seq-entry wrapper if necessary
-void CSeqMasterIndex::x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
  {
      m_Policy = policy;
      m_Flags = flags;
-    m_Depth = depth;
  
      m_Tseh = topseh.GetTopLevelEntry();
      CConstRef<CSeq_entry> tcsep = m_Tseh.GetCompleteSeq_entry();
@@ -215,6 +238,9 @@ void CSeqMasterIndex::x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::E
      m_HasOperon = false;
      m_IsSmallGenomeSet = false;
      m_DistributedReferences = false;
+    m_SnpFunc = 0;
+    m_FeatDepth = 0;
+    m_GapDepth = 0;
      m_IndexFailure = false;
  
      try {
@@ -243,11 +269,10 @@ void CSeqMasterIndex::x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::E
      }
  }
  
-void CSeqMasterIndex::x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
  {
      m_Policy = policy;
      m_Flags = flags;
-    m_Depth = depth;
  
      m_Tseh = bsh.GetTopLevelEntry();
      CConstRef<CSeq_entry> tcsep = m_Tseh.GetCompleteSeq_entry();
@@ -260,6 +285,9 @@ void CSeqMasterIndex::x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy
      m_HasOperon = false;
      m_IsSmallGenomeSet = false;
      m_DistributedReferences = false;
+    m_SnpFunc = 0;
+    m_FeatDepth = 0;
+    m_GapDepth = 0;
      m_IndexFailure = false;
  
      try {
@@ -288,11 +316,10 @@ void CSeqMasterIndex::x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy
      }
  }
  
-void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
  {
      m_Policy = policy;
      m_Flags = flags;
-    m_Depth = depth;
  
      topsep.Parentize();
      m_Tsep.Reset(&topsep);
@@ -300,11 +327,10 @@ void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy
      x_Init();
  }
  
-void CSeqMasterIndex::x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
  {
      m_Policy = policy;
      m_Flags = flags;
-    m_Depth = depth;
  
      CSeq_entry* parent = seqset.GetParentEntry();
      if (parent) {
@@ -320,11 +346,10 @@ void CSeqMasterIndex::x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy
      x_Init();
  }
  
-void CSeqMasterIndex::x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
  {
      m_Policy = policy;
      m_Flags = flags;
-    m_Depth = depth;
  
      CSeq_entry* parent = bioseq.GetParentEntry();
      if (parent) {
@@ -340,11 +365,10 @@ void CSeqMasterIndex::x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy pol
      x_Init();
  }
  
-void CSeqMasterIndex::x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
  {
      m_Policy = policy;
      m_Flags = flags;
-    m_Depth = depth;
  
      _ASSERT(submit.CanGetData());
      _ASSERT(submit.CanGetSub());
@@ -359,11 +383,10 @@ void CSeqMasterIndex::x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy
      x_Init();
  }
  
-void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
  {
      m_Policy = policy;
      m_Flags = flags;
-    m_Depth = depth;
  
      topsep.Parentize();
      m_Tsep.Reset(&topsep);
@@ -372,11 +395,10 @@ void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, C
      x_Init();
  }
  
-void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
  {
      m_Policy = policy;
      m_Flags = flags;
-    m_Depth = depth;
  
      topsep.Parentize();
      m_Tsep.Reset(&topsep);
@@ -385,6 +407,43 @@ void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqE
      x_Init();
  }
  
+void CSeqMasterIndex::SetSnpFunc (FAddSnpFunc* snp)
+
+{
+    m_SnpFunc = snp;
+}
+
+FAddSnpFunc* CSeqMasterIndex::GetSnpFunc (void)
+
+{
+    return m_SnpFunc;
+}
+
+void CSeqMasterIndex::SetFeatDepth (int featDepth)
+
+{
+    m_FeatDepth = featDepth;
+}
+
+int CSeqMasterIndex::GetFeatDepth (void)
+
+{
+    return m_FeatDepth;
+}
+
+void CSeqMasterIndex::SetGapDepth (int gapDepth)
+
+{
+    m_GapDepth = gapDepth;
+}
+
+int CSeqMasterIndex::GetGapDepth (void)
+
+{
+    return m_GapDepth;
+}
+
+
  // At end of program, poll all Bioseqs to check for far fetch failure flag
  bool CSeqMasterIndex::IsFetchFailure (void)
  
@@ -459,7 +518,7 @@ void CSeqMasterIndex::x_InitSeqs (const CSeq_entry& sep, CRef<CSeqsetIndex> prnt
          CBioseq_Handle bsh = m_Scope->GetBioseqHandle(bsp);
          if (bsh) {
              // create CBioseqIndex object for current Bioseq
-            CRef<CBioseqIndex> bsx(new CBioseqIndex(bsh, bsp, bsh, prnt, m_Tseh, m_Scope, *this, m_Policy, m_Flags, m_Depth, false));
+            CRef<CBioseqIndex> bsx(new CBioseqIndex(bsh, bsp, bsh, prnt, m_Tseh, m_Scope, *this, m_Policy, m_Flags));
  
              // record CBioseqIndex in vector for IterateBioseqs or GetBioseqIndex
              m_BsxList.push_back(bsx);
@@ -573,6 +632,9 @@ void CSeqMasterIndex::x_Init (void)
      m_HasOperon = false;
      m_IsSmallGenomeSet = false;
      m_DistributedReferences = false;
+    m_SnpFunc = 0;
+    m_FeatDepth = 0;
+    m_GapDepth = 0;
      m_IndexFailure = false;
  
      try {
@@ -604,92 +666,6 @@ void CSeqMasterIndex::x_Init (void)
      }
  }
  
-// Support for temporary delta sequence referring to subrange of original sequence
-CRef<CSeq_id> CSeqMasterIndex::x_MakeUniqueId(void)
-{
-    CRef<CSeq_id> id(new CSeq_id());
-    bool good = false;
-    while (!good) {
-        id->SetLocal().SetStr("tmp_delta_subset_" + NStr::NumericToString(m_Counter.Add(1)));
-        CBioseq_Handle bsh = m_Scope->GetBioseqHandle(*id);
-        if (! bsh) {
-            good = true;
-        }
-    }
-    return id;
-}
-
-CRef<CBioseqIndex> CSeqMasterIndex::x_DeltaIndex(const CSeq_loc& loc)
-
-{
-    try {
-        // create delta sequence referring to location or range, using temporary local Seq-id
-        CBioseq_Handle bsh = m_Scope->GetBioseqHandle(loc);
-        CRef<CBioseq> delta(new CBioseq());
-        delta->SetId().push_back(x_MakeUniqueId());
-        delta->SetInst().Assign(bsh.GetInst());
-        delta->SetInst().ResetSeq_data();
-        delta->SetInst().ResetExt();
-        delta->SetInst().SetRepr(CSeq_inst::eRepr_delta);
-        CRef<CDelta_seq> element(new CDelta_seq());
-        element->SetLoc().Assign(loc);
-        delta->SetInst().SetExt().SetDelta().Set().push_back(element);
-        delta->SetInst().SetLength(sequence::GetLength(loc, m_Scope));
-
-        // add to scope
-        CBioseq_Handle deltaBsh = m_Scope->AddBioseq(*delta);
-
-        if (deltaBsh) {
-            // create CBioseqIndex object for delta Bioseq
-            CRef<CSeqsetIndex> noparent;
-
-            CRef<CBioseqIndex> bsx(new CBioseqIndex(deltaBsh, *delta, bsh, noparent, m_Tseh, m_Scope, *this, m_Policy, m_Flags, m_Depth, true));
-
-           return bsx;
-        }
-    }
-    catch (CException& e) {
-        LOG_POST_X(2, Error << "Error in CSeqMasterIndex::x_DeltaIndex: " << e.what());
-    }
-    return CRef<CBioseqIndex> ();
-}
-
-CConstRef<CSeq_loc> CSeqMasterIndex::x_SubRangeLoc(const string& accn, int from, int to, bool rev_comp)
-
-{
-    TAccnIndexMap::iterator it = m_AccnIndexMap.find(accn);
-    if (it != m_AccnIndexMap.end()) {
-        CRef<CBioseqIndex> bsx = it->second;
-        for (const CRef<CSeq_id>& id : bsx->GetBioseq().GetId()) {
-            switch (id->Which()) {
-                case CSeq_id::e_Other:
-                case CSeq_id::e_Genbank:
-                case CSeq_id::e_Embl:
-                case CSeq_id::e_Ddbj:
-                case CSeq_id::e_Tpg:
-                case CSeq_id::e_Tpe:
-                case CSeq_id::e_Tpd:
-                    {
-                        CSeq_loc::TStrand strand = eNa_strand_unknown;
-                        if (rev_comp) {
-                            strand = eNa_strand_minus;
-                        }
-                        CSeq_id& nc_id = const_cast<CSeq_id&>(*id);
-                        // create location from range
-                        CConstRef<CSeq_loc> loc(new CSeq_loc(nc_id, from, to, strand));
-                        if (loc) {
-                           return loc;
-                        }
-                    }
-                    break;
-                default:
-                    break;
-            }
-        }
-    }
-    return CConstRef<CSeq_loc> ();
-}
-
  // Get first Bioseq index
  CRef<CBioseqIndex> CSeqMasterIndex::GetBioseqIndex (void)
  
@@ -762,40 +738,8 @@ CRef<CBioseqIndex> CSeqMasterIndex::GetBioseqIndex (const CMappedFeat& mf)
  CRef<CBioseqIndex> CSeqMasterIndex::GetBioseqIndex (const CSeq_loc& loc)
  
  {
-    CRef<CBioseqIndex> bsx = x_DeltaIndex(loc);
-
-    if (bsx) {
-        return bsx;
-    }
-    return CRef<CBioseqIndex> ();
-}
-
-// Get Bioseq index by subrange
-CRef<CBioseqIndex> CSeqMasterIndex::GetBioseqIndex (const string& accn, int from, int to, bool rev_comp)
-
-{
-    string accession = accn;
-    if (accession.empty()) {
-        CRef<CBioseqIndex> bsx = GetBioseqIndex();
-        if (bsx) {
-            accession = bsx->GetAccession();
-        }
-    }
-
-    if (! accession.empty()) {
-        CConstRef<CSeq_loc> loc = x_SubRangeLoc(accession, from, to, rev_comp);
-
-        if (loc) {
-            return GetBioseqIndex(*loc);
-        }
-    }
-    return CRef<CBioseqIndex> ();
-}
-
-CRef<CBioseqIndex> CSeqMasterIndex::GetBioseqIndex (int from, int to, bool rev_comp)
-
-{
-    return GetBioseqIndex("", from, to, rev_comp);
+    CBioseq_Handle bsh = m_Scope->GetBioseqHandle(loc);
+    return GetBioseqIndex(bsh);
  }
  
  // Allow access to internal vectors for application to use in iterators
@@ -841,9 +785,7 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh,
                              CRef<CScope> scope,
                              CSeqMasterIndex& idx,
                              CSeqEntryIndex::EPolicy policy,
-                            CSeqEntryIndex::TFlags flags,
-                            int depth,
-                            bool surrogate)
+                            CSeqEntryIndex::TFlags flags)
      : m_Bsh(bsh),
        m_Bsp(bsp),
        m_OrigBsh(obsh),
@@ -852,9 +794,7 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh,
        m_Scope(scope),
        m_Idx(&idx),
        m_Policy(policy),
-      m_Flags(flags),
-      m_Depth(depth),
-      m_Surrogate(surrogate)
+      m_Flags(flags)
  {
      m_FetchFailure = false;
  
@@ -873,6 +813,7 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh,
      m_Topology = NCBI_SEQTOPOLOGY(not_set);
  
      m_IsDelta = false;
+    m_IsDeltaLitOnly = false;
      m_IsVirtual = false;
      m_IsMap = false;
  
@@ -885,6 +826,7 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh,
  
      m_Accession.clear();
  
+    m_IsRefSeq = false;
      m_IsNC = false;
      m_IsNM = false;
      m_IsNR = false;
@@ -931,7 +873,7 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh,
      m_Taxname.clear();
      m_Common.clear();
      m_Lineage.clear();
-    m_Taxid = 0;
+    m_Taxid = ZERO_TAX_ID;
      m_UsingAnamorph = false;
      m_Genus.clear();
      m_Species.clear();
@@ -1000,16 +942,44 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh,
              m_IsVirtual = (repr == CSeq_inst::eRepr_virtual);
              m_IsMap = (repr == CSeq_inst::eRepr_map);
          }
+        if (m_IsDelta && m_Bsh.IsSetInst_Ext()) {
+            const CBioseq_Handle::TInst_Ext& ext = m_Bsh.GetInst_Ext();
+            bool hasLoc = false;
+            if ( ext.IsDelta() ) {
+                ITERATE (CDelta_ext::Tdata, it, ext.GetDelta().Get()) {
+                    if ( (*it)->IsLoc() ) {
+                        const CSeq_loc& loc = (*it)->GetLoc();
+                        if (loc.IsNull()) continue;
+                        hasLoc = true;
+                    }
+                }
+            }
+            if (! hasLoc) {
+                m_IsDeltaLitOnly = true;
+            }
+        }
      }
  
      // process Seq-ids
      for (CSeq_id_Handle sid : obsh.GetId()) {
+        // first switch to set RefSeq and ThirdParty flags
          switch (sid.Which()) {
+            case NCBI_SEQID(Other):
+                m_IsRefSeq = true;
+                break;
              case NCBI_SEQID(Tpg):
              case NCBI_SEQID(Tpe):
              case NCBI_SEQID(Tpd):
                  m_ThirdParty = true;
-                // fall through
+                break;
+            default:
+                break;
+        }
+        // second switch now avoids complicated flag setting logic
+        switch (sid.Which()) {
+            case NCBI_SEQID(Tpg):
+            case NCBI_SEQID(Tpe):
+            case NCBI_SEQID(Tpd):
              case NCBI_SEQID(Other):
              case NCBI_SEQID(Genbank):
              case NCBI_SEQID(Embl):
@@ -1117,13 +1087,6 @@ CBioseqIndex::CBioseqIndex (CBioseq_Handle bsh,
  CBioseqIndex::~CBioseqIndex (void)
  
  {
-    if (m_Surrogate) {
-        try {
-            m_Scope->RemoveBioseq(m_Bsh);
-        } catch (CException&) {
-            // presumably still in use; let it be
-        }
-    }
  }
  
  // Gap collection (delayed until needed)
@@ -1144,11 +1107,12 @@ void CBioseqIndex::x_InitGaps (void)
          SSeqMapSelector sel;
  
          size_t resolveCount = 0;
-        /*
-        if (m_Policy == CSeqEntryIndex::eInternal) {
-            resolveCount = 0;
+
+        CWeakRef<CSeqMasterIndex> idx = GetSeqMasterIndex();
+        auto idxl = idx.Lock();
+        if (idxl) {
+            resolveCount = idxl->GetGapDepth();
          }
-        */
  
          sel.SetFlags(CSeqMap::fFindGap)
             .SetResolveCount(resolveCount);
@@ -1855,339 +1819,187 @@ void CBioseqIndex::x_InitDescs (void)
      }
  }
  
-// Feature collection (delayed until needed)
-void CBioseqIndex::x_InitFeats (void)
+void CBioseqIndex::x_DefaultSelector(SAnnotSelector& sel, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, bool onlyNear, CScope& scope)
  
  {
-    try {
-        if (m_FeatsInitialized) {
-           return;
-        }
+    bool snpOK = false;
+    bool cddOK = false;
  
-        if (! m_DescsInitialized) {
-            // initialize descriptors first to get m_ForceOnlyNearFeats flag
-            x_InitDescs();
-        }
+    if (policy == CSeqEntryIndex::eExhaustive) {
  
-        m_FeatsInitialized = true;
+        // experimental policy forces collection of features from all sequence levels
+        sel.SetResolveAll();
+        sel.SetResolveDepth(kMax_Int);
+        // ignores RefSeq/INSD barrier, overrides far fetch policy user object
+        // for now, always excludes external annots, ignores custom enable bits
  
-        SAnnotSelector sel;
+    } else if (policy == CSeqEntryIndex::eInternal || onlyNear) {
  
-        if (m_Policy != CSeqEntryIndex::eExternal) {
-            // unless explicitly desired, exclude external annots - need explicit show flags
-            if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) {
-                sel.ExcludeNamedAnnots("SNP");
-            }
-            if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) {
-                sel.ExcludeNamedAnnots("CDD");
-            }
-            sel.ExcludeNamedAnnots("STS");
-        }
+        // do not fetch features from underlying sequence component records
+        sel.SetResolveDepth(0);
+        sel.SetExcludeExternal(true);
+        // always excludes external annots, ignores custom enable bits
  
-        if (m_Policy == CSeqEntryIndex::eInternal || m_ForceOnlyNearFeats) {
+    } else if (policy == CSeqEntryIndex::eAdaptive) {
  
-            // do not fetch features from underlying sequence component records
-            if (m_Surrogate) {
-                // delta with sublocation needs to map features from original Bioseq
-                sel.SetResolveAll();
-                sel.SetResolveDepth(1);
-                sel.SetExcludeExternal();
-            } else {
-                // otherwise limit collection to local records in top-level Seq-entry
-                sel.SetResolveDepth(0);
-                sel.SetExcludeExternal();
-            }
+        sel.SetResolveAll();
+        // normal situation uses adaptive depth for feature collection,
+        // includes barrier between RefSeq and INSD accession types
+        sel.SetAdaptiveDepth(true);
  
-        } else if (m_Policy == CSeqEntryIndex::eExhaustive) {
+        // conditionally allows external annots, based on custom enable bits
+        if ((flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
+            snpOK = true;
+        }
+        if ((flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
+            cddOK = true;
+        }
  
-            sel.SetResolveAll();
-             // experimental flag forces collection of features from all levels
-            sel.SetResolveDepth(kMax_Int);
-            // also ignores RefSeq/INSD barrier, far fetch policy user object
+    } else if (policy == CSeqEntryIndex::eExternal) {
  
-        } else if (m_Policy == CSeqEntryIndex::eExternal) {
+        // same as eAdaptive
+        sel.SetResolveAll();
+        sel.SetAdaptiveDepth(true);
  
-            // same as eAdaptive, except also allows external annots
-            sel.SetResolveAll();
-            sel.SetAdaptiveDepth(true);
-            // needs to be here
-            sel.AddUnnamedAnnots();
-            // allow external SNPs
-            if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("SNP");
-                sel.AddNamedAnnots("SNP");
-            }
-            if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("CDD");
-                sel.AddNamedAnnots("CDD");
-            }
-            m_Scope->SetKeepExternalAnnotsForEdit();
-            // obey flag to hide CDD features by default in the web display
-            if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) {
-                sel.ExcludeNamedAnnots("CDD");
-            }
+        // but always allows external annots without need for custom enable bits
+        snpOK = true;
+        cddOK = true;
  
-        } else if (m_Depth > -1) {
+    } else if (policy == CSeqEntryIndex::eFtp) {
  
+        // for public ftp releases
+        if (m_IsRefSeq) {
              sel.SetResolveAll();
-            // explicit depth setting overrides adaptive depth (probably only needed for debugging)
-            sel.SetResolveDepth(m_Depth);
+            sel.SetAdaptiveDepth(true);
+        } else if (m_IsDeltaLitOnly) {
+            sel.SetResolveDepth(0);
+            sel.SetExcludeExternal(true);
+        } else {
+            sel.SetResolveDepth(0);
+            sel.SetExcludeExternal(true);
+        }
  
-        } else if (m_Policy == CSeqEntryIndex::eAdaptive) {
+    } else if (policy == CSeqEntryIndex::eWeb) {
  
+        // for public web pages
+        if (m_IsRefSeq) {
              sel.SetResolveAll();
-            // normal situation uses adaptive depth for feature collection,
-            // includes barrier between RefSeq and INSD accession types
              sel.SetAdaptiveDepth(true);
-
-            // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot
-            // but commenting it out allows external variations in NG_008330 to override internal gene, mRNA, CDS, and exon features
-            sel.AddUnnamedAnnots();
-
-            // allow external SNPs - testing for now, probably needs to be in external policy
-            if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("SNP");
-                sel.AddNamedAnnots("SNP");
-            }
-            if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("CDD");
-                sel.AddNamedAnnots("CDD");
-            }
-            m_Scope->SetKeepExternalAnnotsForEdit();
-
-        } else if (m_Policy == CSeqEntryIndex::eIncremental) {
-
-            // do not fetch features from underlying sequence component records
-            if (m_Surrogate) {
-                // delta with sublocation needs to map features from original Bioseq
-                sel.SetResolveAll();
-                sel.SetResolveDepth(1);
-                sel.SetExcludeExternal();
-            } else {
-                // otherwise limit collection to local records in top-level Seq-entry
-                sel.SetResolveAll();
-                sel.SetResolveDepth(0);
-                sel.SetExcludeExternal();
-            }
-
-            /*
+        } else if (m_IsDeltaLitOnly) {
              sel.SetResolveAll();
-            // flatfile generator now needs to do its own exploration of far delta components
-            // and needs to implement barrier between RefSeq and INSD accession types
-            sel.SetResolveDepth(1);
-
-            // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot
-            // sel.AddUnnamedAnnots();
-
-            // allow external SNPs - testing for now, probably needs to be in external policy
-            if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("SNP");
-                sel.AddNamedAnnots("SNP");
-            }
-            if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("CDD");
-                sel.AddNamedAnnots("CDD");
-            }
-            m_Scope->SetKeepExternalAnnotsForEdit();
-            */
+            sel.SetAdaptiveDepth(true);
+        } else {
+            sel.SetResolveAll();
+            sel.SetAdaptiveDepth(true);
          }
  
-        // bit flags exclude specific features
-        if ((m_Flags & CSeqEntryIndex::fHideImpFeats) != 0) {
-            sel.ExcludeFeatType(CSeqFeatData::e_Imp);
-        }
-        if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) {
-            sel.ExcludeFeatType(CSeqFeatData::e_Variation);
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_variation);
+        // conditionally allows external annots, based on custom enable bits
+        if ((flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
+            snpOK = true;
          }
-        if ((m_Flags & CSeqEntryIndex::fHideSTSFeats) != 0) {
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_STS);
-        }
-        if ((m_Flags & CSeqEntryIndex::fHideExonFeats) != 0) {
-            sel.ExcludeNamedAnnots("Exon");
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon);
-        }
-        if ((m_Flags & CSeqEntryIndex::fHideIntronFeats) != 0) {
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron);
-        }
-        if ((m_Flags & CSeqEntryIndex::fHideMiscFeats) != 0) {
-            sel.ExcludeFeatType(CSeqFeatData::e_Site);
-            sel.ExcludeFeatType(CSeqFeatData::e_Bond);
-            sel.ExcludeFeatType(CSeqFeatData::e_Region);
-            sel.ExcludeFeatType(CSeqFeatData::e_Comment);
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature);
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_preprotein);
-        }
-        if ((m_Flags & CSeqEntryIndex::fHideGapFeats) != 0) {
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_gap);
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_assembly_gap);
-        }
-
-        // additional common settings
-        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue)
-           .ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite)
-           .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq);
-
-        sel.SetFeatComparator(new feature::CFeatComparatorByLabel);
-
-        // request exception to capture fetch failure
-        sel.SetFailUnresolved();
-
-        bool onlyGeneRNACDS = false;
-        if ((m_Flags & CSeqEntryIndex::fGeneRNACDSOnly) != 0) {
-            onlyGeneRNACDS = true;
+        if ((flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
+            cddOK = true;
          }
+    }
  
-        // variables for setting m_BestProteinFeature
-        TSeqPos longest = 0;
-        CProt_ref::EProcessed bestprocessed = CProt_ref::eProcessed_not_set;
-        CProt_ref::EProcessed processed;
+    // fHideSNPFeats and fHideCDDFeats flags override any earlier settings
+    if ((flags & CSeqEntryIndex::fHideSNPFeats) != 0) {
+        snpOK = false;
+    }
+    if ((flags & CSeqEntryIndex::fHideCDDFeats) != 0) {
+        cddOK = false;
+    }
  
-        // next gap
-        CGapIndex* sgx = NULL;
-        if (m_GapList.size() > 0) {
-            sgx = m_GapList[0];
-        }
+    // configure remote annot settings in selector
+    if ( snpOK ) {
  
          CWeakRef<CSeqMasterIndex> idx = GetSeqMasterIndex();
          auto idxl = idx.Lock();
          if (idxl) {
-            /*
-            if (! idxl->IsSmallGenomeSet()) {
-                // limit feature collection to immediate Bioseq-set parent
-                CRef<CSeqsetIndex> prnt = GetParent();
-                if (prnt) {
-                    CBioseq_set_Handle bssh = prnt->GetSeqsetHandle();
-                    if (bssh) {
-                        CSeq_entry_Handle pseh = bssh.GetParentEntry();
-                        if (pseh) {
-                            sel.SetLimitSeqEntry(pseh);
-                        }
-                    }
+            FAddSnpFunc* func = idxl->GetSnpFunc();
+            if (func) {
+                // under PubSeq Gateway, need to get exact accession for SNP retrieval
+                CBioseq_Handle bsh = GetBioseqHandle();
+                string na_acc;
+                (*func) (bsh, na_acc);
+                if (na_acc.length() > 0) {
+                    sel.IncludeNamedAnnotAccession(na_acc);
                  }
+            } else {
+                // otherwise just give SNP name
+                sel.IncludeNamedAnnotAccession("SNP");
              }
-            */
-
-            CRef<feature::CFeatTree> ft = idxl->GetFeatTree();
-
-            // iterate features on Bioseq
-            for (CFeat_CI feat_it(m_Bsh, sel); feat_it; ++feat_it) {
-                const CMappedFeat mf = *feat_it;
-
-                if (onlyGeneRNACDS) {
-                    const CSeqFeatData& data = mf.GetData();
-                    CSeqFeatData::E_Choice type = data.Which();
-                    if (type != CSeqFeatData::e_Gene &&
-                        type != CSeqFeatData::e_Rna &&
-                        type != CSeqFeatData::e_Cdregion) {
-                        continue;
-                    }
-                }
-
-                CSeq_feat_Handle hdl = mf.GetSeq_feat_Handle();
-
-                CRef<CFeatureIndex> sfx(new CFeatureIndex(hdl, mf, *this));
-                m_SfxList.push_back(sfx);
-
-                ft->AddFeature(mf);
-
-                // CFeatureIndex from CMappedFeat for use with GetBestGene
-                m_FeatIndexMap[mf] = sfx;
+        }
  
-                // set specific flags for various feature types
-                CSeqFeatData::E_Choice type = sfx->GetType();
-                CSeqFeatData::ESubtype subtype = sfx->GetSubtype();
+    } else {
+        sel.ExcludeNamedAnnotAccession("SNP");
+    }
  
-                if (type == CSeqFeatData::e_Biosrc) {
-                    m_HasSource = true;
-                    if (! m_BioSource) {
-                        if (! mf.IsSetData ()) continue;
-                        const CSeqFeatData& sfdata = mf.GetData();
-                        const CBioSource& biosrc = sfdata.GetBiosrc();
-                        m_BioSource.Reset (&biosrc);
-                    }
-                    continue;
-                }
+    if ( cddOK ) {
+        sel.IncludeNamedAnnotAccession("CDD");
+    } else {
+        sel.ExcludeNamedAnnotAccession("CDD");
+    }
  
-                if (type == CSeqFeatData::e_Gene) {
-                    m_HasGene = true;
-                    if (m_HasMultiIntervalGenes) {
-                        continue;
-                    }
-                    const CSeq_loc& loc = mf.GetLocation ();
-                    switch (loc.Which()) {
-                        case CSeq_loc::e_Packed_int:
-                        case CSeq_loc::e_Packed_pnt:
-                        case CSeq_loc::e_Mix:
-                        case CSeq_loc::e_Equiv:
-                            m_HasMultiIntervalGenes = true;
-                            break;
-                        default:
-                            break;
-                    }
-                    continue;
-                }
+    CWeakRef<CSeqMasterIndex> idx = GetSeqMasterIndex();
+    auto idxl = idx.Lock();
+    if (idxl) {
+        int featDepth = idxl->GetFeatDepth();
+        if (featDepth > 0) {
+            sel.SetResolveDepth(featDepth);
+        }
+    }
  
-                if (subtype == CSeqFeatData::eSubtype_operon) {
-                    idxl->SetHasOperon(true);
-                    continue;
-                }
+    // bit flags exclude specific features
+    // source features are collected elsewhere
+    sel.ExcludeFeatType(CSeqFeatData::e_Biosrc);
+    // pub features are used in the REFERENCES section
+    sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_pub);
+    // some feature types are always excluded (deprecated?)
+    // sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue)
+    sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite)
+       .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq);
+    // exclude other types based on user flags
+    if ((flags & CSeqEntryIndex::fHideImpFeats) != 0) {
+        sel.ExcludeFeatType(CSeqFeatData::e_Imp);
+    }
+    if ((flags & CSeqEntryIndex::fHideSTSFeats) != 0) {
+        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_STS);
+    }
+    if ((flags & CSeqEntryIndex::fHideExonFeats) != 0) {
+        sel.ExcludeNamedAnnots("Exon");
+        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon);
+    }
+    if ((flags & CSeqEntryIndex::fHideIntronFeats) != 0) {
+        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron);
+    }
+    if ((flags & CSeqEntryIndex::fHideMiscFeats) != 0) {
+        sel.ExcludeFeatType(CSeqFeatData::e_Site);
+        sel.ExcludeFeatType(CSeqFeatData::e_Bond);
+        sel.ExcludeFeatType(CSeqFeatData::e_Region);
+        sel.ExcludeFeatType(CSeqFeatData::e_Comment);
+        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature);
+        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_preprotein);
+    }
+    if ((flags & CSeqEntryIndex::fHideGapFeats) != 0) {
+        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_gap);
+        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_assembly_gap);
+    }
  
-                if (type == CSeqFeatData::e_Prot && IsAA()) {
-                    if (! mf.IsSetData ()) continue;
-                    const CSeqFeatData& sfdata = mf.GetData();
-                    const CProt_ref& prp = sfdata.GetProt();
-                    processed = CProt_ref::eProcessed_not_set;
-                    if (prp.IsSetProcessed()) {
-                        processed = prp.GetProcessed();
-                    }
-                    const CSeq_loc& loc = mf.GetLocation ();
-                    TSeqPos prot_length = sequence::GetLength(loc, m_Scope);
-                    if (prot_length > longest) {
-                        m_BestProtFeatInitialized = true;
-                        m_BestProteinFeature = sfx;
-                        longest = prot_length;
-                        bestprocessed = processed;
-                    } else if (prot_length == longest) {
-                        // unprocessed 0 > preprotein 1 > mat peptide 2
-                        if (processed < bestprocessed) {
-                            m_BestProtFeatInitialized = true;
-                            m_BestProteinFeature = sfx;
-                            longest = prot_length;
-                            bestprocessed = processed;
-                        }
-                    }
-                    continue;
-                }
+    // additional common settings
+    sel.SetFeatComparator(new feature::CFeatComparatorByLabel);
  
-                if (type == CSeqFeatData::e_Cdregion && IsNA()) {
-                } else if (type == CSeqFeatData::e_Rna && IsNA()) {
-                } else if (type == CSeqFeatData::e_Prot && IsAA()) {
-                } else {
-                    continue;
-                }
+    // limit exploration of far deltas with no features to avoid timeout
+    sel.SetMaxSearchSegments(500);
+    sel.SetMaxSearchSegmentsAction(SAnnotSelector::eMaxSearchSegmentsSilent);
+    sel.SetMaxSearchTime(25);
  
-                // index feature for (local) product Bioseq (CDS -> protein, mRNA -> cDNA, or Prot -> peptide)
-                CSeq_id_Handle idh = mf.GetProductId();
-                if (idh) {
-                    string str = idh.AsString();
-                    CRef<CBioseqIndex> bsxp = idxl->GetBioseqIndex(str);
-                    if (bsxp) {
-                        bsxp->m_FeatForProdInitialized = true;
-                        bsxp->m_FeatureForProduct = sfx;
-                    }
-                }
-            }
-        }
-    }
-    catch (CException& e) {
-        m_FetchFailure = true;
-        LOG_POST_X(6, Error << "Error in CBioseqIndex::x_InitFeats: " << e.what());
-    }
+    // request exception to capture fetch failure
+    sel.SetFailUnresolved();
  }
  
-// Feature collection (delayed until needed)
-void CBioseqIndex::x_InitFeatsByLoc (const CSeq_loc& slp)
+// Feature collection common implementation method (delayed until needed)
+void CBioseqIndex::x_InitFeats (CSeq_loc* slpp)
  
  {
      try {
@@ -2202,165 +2014,7 @@ void CBioseqIndex::x_InitFeatsByLoc (const CSeq_loc& slp)
  
          SAnnotSelector sel;
  
-        if (m_Policy != CSeqEntryIndex::eExternal) {
-            // unless explicitly desired, exclude external annots - need explicit show flags
-            if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) {
-                sel.ExcludeNamedAnnots("SNP");
-            }
-            if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) {
-                sel.ExcludeNamedAnnots("CDD");
-            }
-            sel.ExcludeNamedAnnots("STS");
-        }
-
-        if (m_Policy == CSeqEntryIndex::eExhaustive) {
-
-            sel.SetResolveAll();
-             // experimental flag forces collection of features from all levels
-            sel.SetResolveDepth(kMax_Int);
-            // also ignores RefSeq/INSD barrier, far fetch policy user object
-
-        } else if (m_Policy == CSeqEntryIndex::eExternal) {
-
-            // same as eAdaptive, except also allows external annots
-            sel.SetResolveAll();
-            sel.SetAdaptiveDepth(true);
-            // needs to be here
-            sel.AddUnnamedAnnots();
-            // allow external SNPs
-            if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("SNP");
-                sel.AddNamedAnnots("SNP");
-            }
-            if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("CDD");
-                sel.AddNamedAnnots("CDD");
-            }
-            m_Scope->SetKeepExternalAnnotsForEdit();
-            // obey flag to hide CDD features by default in the web display
-            if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) {
-                sel.ExcludeNamedAnnots("CDD");
-            }
-
-        } else if (m_Policy == CSeqEntryIndex::eInternal || m_ForceOnlyNearFeats) {
-
-            // do not fetch features from underlying sequence component records
-            if (m_Surrogate) {
-                // delta with sublocation needs to map features from original Bioseq
-                sel.SetResolveAll();
-                sel.SetResolveDepth(1);
-                sel.SetExcludeExternal();
-            } else {
-                // otherwise limit collection to local records in top-level Seq-entry
-                sel.SetResolveDepth(0);
-                sel.SetExcludeExternal();
-            }
-
-        } else if (m_Depth > -1) {
-
-            sel.SetResolveAll();
-            // explicit depth setting overrides adaptive depth (probably only needed for debugging)
-            sel.SetResolveDepth(m_Depth);
-
-        } else if (m_Policy == CSeqEntryIndex::eAdaptive) {
-
-            sel.SetResolveAll();
-            // normal situation uses adaptive depth for feature collection,
-            // includes barrier between RefSeq and INSD accession types
-            sel.SetAdaptiveDepth(true);
-
-            // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot
-            // but commenting it out allows external variations in NG_008330 to override internal gene, mRNA, CDS, and exon features
-            sel.AddUnnamedAnnots();
-
-            // allow external SNPs - testing for now, probably needs to be in external policy
-            if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("SNP");
-                sel.AddNamedAnnots("SNP");
-            }
-            if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("CDD");
-                sel.AddNamedAnnots("CDD");
-            }
-            m_Scope->SetKeepExternalAnnotsForEdit();
-
-        } else if (m_Policy == CSeqEntryIndex::eIncremental) {
-
-            // do not fetch features from underlying sequence component records
-            if (m_Surrogate) {
-                // delta with sublocation needs to map features from original Bioseq
-                sel.SetResolveAll();
-                sel.SetResolveDepth(1);
-                sel.SetExcludeExternal();
-            } else {
-                // otherwise limit collection to local records in top-level Seq-entry
-                sel.SetResolveAll();
-                sel.SetResolveDepth(0);
-                sel.SetExcludeExternal();
-            }
-
-            /*
-            sel.SetResolveAll();
-            // flatfile generator now needs to do its own exploration of far delta components
-            // and needs to implement barrier between RefSeq and INSD accession types
-            sel.SetResolveDepth(1);
-
-            // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot
-            // sel.AddUnnamedAnnots();
-
-            // allow external SNPs - testing for now, probably needs to be in external policy
-            if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("SNP");
-                sel.AddNamedAnnots("SNP");
-            }
-            if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
-                sel.IncludeNamedAnnotAccession("CDD");
-                sel.AddNamedAnnots("CDD");
-            }
-            m_Scope->SetKeepExternalAnnotsForEdit();
-            */
-        }
-
-        // bit flags exclude specific features
-        if ((m_Flags & CSeqEntryIndex::fHideImpFeats) != 0) {
-            sel.ExcludeFeatType(CSeqFeatData::e_Imp);
-        }
-        if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) {
-            sel.ExcludeFeatType(CSeqFeatData::e_Variation);
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_variation);
-        }
-        if ((m_Flags & CSeqEntryIndex::fHideSTSFeats) != 0) {
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_STS);
-        }
-        if ((m_Flags & CSeqEntryIndex::fHideExonFeats) != 0) {
-            sel.ExcludeNamedAnnots("Exon");
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon);
-        }
-        if ((m_Flags & CSeqEntryIndex::fHideIntronFeats) != 0) {
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron);
-        }
-        if ((m_Flags & CSeqEntryIndex::fHideMiscFeats) != 0) {
-            sel.ExcludeFeatType(CSeqFeatData::e_Site);
-            sel.ExcludeFeatType(CSeqFeatData::e_Bond);
-            sel.ExcludeFeatType(CSeqFeatData::e_Region);
-            sel.ExcludeFeatType(CSeqFeatData::e_Comment);
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature);
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_preprotein);
-        }
-        if ((m_Flags & CSeqEntryIndex::fHideGapFeats) != 0) {
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_gap);
-            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_assembly_gap);
-        }
-
-        // additional common settings
-        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue)
-           .ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite)
-           .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq);
-
-        sel.SetFeatComparator(new feature::CFeatComparatorByLabel);
-
-        // request exception to capture fetch failure
-        sel.SetFailUnresolved();
+        x_DefaultSelector(sel, m_Policy, m_Flags, m_ForceOnlyNearFeats, *m_Scope);
  
          bool onlyGeneRNACDS = false;
          if ((m_Flags & CSeqEntryIndex::fGeneRNACDSOnly) != 0) {
@@ -2372,12 +2026,6 @@ void CBioseqIndex::x_InitFeatsByLoc (const CSeq_loc& slp)
          CProt_ref::EProcessed bestprocessed = CProt_ref::eProcessed_not_set;
          CProt_ref::EProcessed processed;
  
-        // next gap
-        CGapIndex* sgx = NULL;
-        if (m_GapList.size() > 0) {
-            sgx = m_GapList[0];
-        }
-
          CWeakRef<CSeqMasterIndex> idx = GetSeqMasterIndex();
          auto idxl = idx.Lock();
          if (idxl) {
@@ -2402,23 +2050,58 @@ void CBioseqIndex::x_InitFeatsByLoc (const CSeq_loc& slp)
              // start collection over on each segment
              m_SfxList.clear();
  
+            // iterate features on Bioseq or sublocation
+            CFeat_CI feat_it;
+            CRef<CSeq_loc_Mapper> slice_mapper;
+            if (slpp == 0) {
+                feat_it = CFeat_CI(m_Bsh, sel);
+            } else {
+                SAnnotSelector sel_cpy = sel;
+                sel_cpy.SetIgnoreStrand();
+                /*
+                if (selp->IsSetStrand() && selp->GetStrand() == eNa_strand_minus) {
+                    sel_cpy.SetSortOrder(SAnnotSelector::eSortOrder_Reverse);
+                }
+                */
+                CConstRef<CSeq_id> bsid = m_Bsh.GetSeqId();
+                if (bsid) {
+                    SetDiagFilter(eDiagFilter_All, "!(1305.28,31)");
+                    CSeq_id seq_id;
+                    seq_id.Assign( *bsid );
+                    CSeq_loc old_loc;
+                    old_loc.SetInt().SetId( seq_id );
+                    old_loc.SetInt().SetFrom( 0 );
+                    old_loc.SetInt().SetTo( m_Length - 1 );
+                    slice_mapper = new CSeq_loc_Mapper( *slpp, old_loc, m_Scope );
+                    slice_mapper->SetFuzzOption( CSeq_loc_Mapper::fFuzzOption_RemoveLimTlOrTr );
+                    slice_mapper->TruncateNonmappingRanges();
+                    SetDiagFilter(eDiagFilter_All, "");
+                }
+                feat_it = CFeat_CI(*m_Scope, *slpp, sel_cpy);
+            }
+
              // iterate features on Bioseq
-            for (CFeat_CI feat_it(*m_Scope, slp, sel); feat_it; ++feat_it) {
+            for (; feat_it; ++feat_it) {
                  const CMappedFeat mf = *feat_it;
  
+                const CSeqFeatData& data = mf.GetData();
+                CSeqFeatData::E_Choice typ = data.Which();
                  if (onlyGeneRNACDS) {
-                    const CSeqFeatData& data = mf.GetData();
-                    CSeqFeatData::E_Choice type = data.Which();
-                    if (type != CSeqFeatData::e_Gene &&
-                        type != CSeqFeatData::e_Rna &&
-                        type != CSeqFeatData::e_Cdregion) {
+                    if (typ != CSeqFeatData::e_Gene &&
+                        typ != CSeqFeatData::e_Rna &&
+                        typ != CSeqFeatData::e_Cdregion) {
                          continue;
                      }
                  }
  
                  CSeq_feat_Handle hdl = mf.GetSeq_feat_Handle();
  
-                CRef<CFeatureIndex> sfx(new CFeatureIndex(hdl, mf, *this));
+                CConstRef<CSeq_loc> feat_loc(&mf.GetLocation());
+                if (slpp) {
+                    feat_loc.Reset( slice_mapper->Map( mf.GetLocation() ) );
+                }
+
+                CRef<CFeatureIndex> sfx(new CFeatureIndex(hdl, mf, feat_loc, *this));
                  m_SfxList.push_back(sfx);
  
                  ft->AddFeature(mf);
@@ -2514,10 +2197,23 @@ void CBioseqIndex::x_InitFeatsByLoc (const CSeq_loc& slp)
      }
      catch (CException& e) {
          m_FetchFailure = true;
-        LOG_POST_X(6, Error << "Error in CBioseqIndex::x_InitFeatsByLoc: " << e.what());
+        LOG_POST_X(6, Error << "Error in CBioseqIndex::x_InitFeats: " << e.what());
      }
  }
  
+// Feature collection methods (delayed until needed)
+void CBioseqIndex::x_InitFeats (void)
+
+{
+    x_InitFeats(0);
+}
+
+void CBioseqIndex::x_InitFeats (CSeq_loc& slp)
+
+{
+    x_InitFeats(&slp);
+}
+
  // GetFeatureForProduct allows hypothetical protein defline generator to obtain gene locus tag
  CRef<CFeatureIndex> CBioseqIndex::GetFeatureForProduct (void)
  
@@ -2756,7 +2452,7 @@ const string& CBioseqIndex::GetLineage (void)
      return m_Lineage;
  }
  
-int CBioseqIndex::GetTaxid (void)
+TTaxId CBioseqIndex::GetTaxid (void)
  
  {
      if (! m_SourcesInitialized) {
@@ -3338,14 +3034,14 @@ CGapIndex::CGapIndex (TSeqPos start,
                        bool isUnknownLength,
                        bool isAssemblyGap,
                        CBioseqIndex& bsx)
-    : m_Start(start),
+    : m_Bsx(&bsx),
+      m_Start(start),
        m_End(end),
        m_Length(length),
        m_GapType(type),
        m_GapEvidence(evidence),
        m_IsUnknownLength(isUnknownLength),
-      m_IsAssemblyGap(isAssemblyGap),
-      m_Bsx(&bsx)
+      m_IsAssemblyGap(isAssemblyGap)
  {
  }
  
@@ -3367,6 +3063,7 @@ CDescriptorIndex::CDescriptorIndex (const CSeqdesc& sd,
  // Constructor
  CFeatureIndex::CFeatureIndex (CSeq_feat_Handle sfh,
                                const CMappedFeat mf,
+                              CConstRef<CSeq_loc> feat_loc,
                                CBioseqIndex& bsx)
      : m_Sfh(sfh),
        m_Mf(mf),
@@ -3375,11 +3072,9 @@ CFeatureIndex::CFeatureIndex (CSeq_feat_Handle sfh,
      const CSeqFeatData& data = m_Mf.GetData();
      m_Type = data.Which();
      m_Subtype = data.GetSubtype();
-    const CSeq_feat& mpd = m_Mf.GetMappedFeature();
-    CConstRef<CSeq_loc> fl(&mpd.GetLocation());
-    m_Fl = fl;
-    m_Start = fl->GetStart(eExtreme_Positional);
-    m_End = fl->GetStop(eExtreme_Positional);
+    m_Fl = feat_loc;
+    m_Start = m_Fl->GetStart(eExtreme_Positional);
+    m_End = m_Fl->GetStop(eExtreme_Positional);
  }
  
  // Find CFeatureIndex object for best gene using internal CFeatTree
diff --git a/c++/src/objtools/CMakeLists.txt b/c++/src/objtools/CMakeLists.txt

index b7f920448a349c7dace94e820c620fa45f00df57..f70a294c79e6f66e110e8d4e29617e32283e5358 100644 (file)
--- a/c++/src/objtools/CMakeLists.txt
+++ b/c++/src/objtools/CMakeLists.txt
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMakeLists.txt 593577 2019-09-20 12:22:42Z gouriano $
+# $Id: CMakeLists.txt 612973 2020-07-30 19:13:00Z ivanov $
  #############################################################################
  
  NCBI_add_subdirectory(
@@ -7,5 +7,5 @@ NCBI_add_subdirectory(
    alnmgr cddalignview test manip cleanup format edit validator
    asniotest align seqmasks_io eutils
    align_format snputil uudutil variation writers pubseq_gateway
-  logging import
+  logging import flatfile
  )
diff --git a/c++/src/objtools/Makefile.in b/c++/src/objtools/Makefile.in

index bdaf1aff32bea5fb1e11503ce6a45fe3ea82b1e8..ce3274a53db6d0420cf05f52d6f50a34176d9225 100644 (file)
--- a/c++/src/objtools/Makefile.in
+++ b/c++/src/objtools/Makefile.in
@@ -1,4 +1,4 @@
-# $Id: Makefile.in 586035 2019-05-08 18:29:07Z vakatov $
+# $Id: Makefile.in 612973 2020-07-30 19:13:00Z ivanov $
  
  # Meta-makefile("objtools" project)
  #################################
@@ -10,7 +10,7 @@ SUB_PROJ = logging unit_test_util readers blast lds2 pubseq_gateway \
             alnmgr cddalignview test manip edit cleanup format validator \
             asniotest align seqmasks_io eutils \
             align_format snputil uudutil variation writers \
-           import      
+           import flatfile
  
  srcdir = @srcdir@
  include @builddir@/Makefile.meta
diff --git a/c++/src/objtools/alnmgr/alnvec.cpp b/c++/src/objtools/alnmgr/alnvec.cpp

index 8d2355c34b87b8e7f72c0516c5e9696f73f7d29f..2ef9995337456da3e20c2ab3b56c8b60ac2b848a 100644 (file)
--- a/c++/src/objtools/alnmgr/alnvec.cpp
+++ b/c++/src/objtools/alnmgr/alnvec.cpp
@@ -1,4 +1,4 @@
-/*  $Id: alnvec.cpp 577167 2018-12-31 20:16:49Z dicuccio $
+/*  $Id: alnvec.cpp 608806 2020-05-21 14:51:55Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -233,8 +233,8 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow       row,
      const bool record_coords  = scrn_width && scrn_lefts && scrn_rights;
  
      // allocate space for the row
-    char* c_buff = new char[aln_len + 1];
-    char* c_buff_ptr = c_buff;
+    buffer.clear();
+    buffer.reserve(aln_len);
      string buff;
      
      const TNumseg& left_seg = x_GetSeqLeftSeg(row);
@@ -282,12 +282,10 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow       row,
                  // add regular sequence to buffer
                  GetSeqString(buff, row, start, stop);
                  TSeqPos buf_len = min<TSeqPos>(buff.size(), seg_len);
-                memcpy(c_buff_ptr, buff.c_str(), buf_len);
-                c_buff_ptr += buf_len;
+                buffer += buff;
                  if (buf_len < seg_len) {
                      // Not enough chars in the sequence, add gap
                      buf_len = seg_len - buf_len;
-                    char* ch_buff = new char[buf_len + 1];
                      char fill_ch;
  
                      if (seg < left_seg  ||  seg > right_seg) {
@@ -296,11 +294,9 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow       row,
                          fill_ch = GetGapChar(row);
                      }
  
-                    memset(ch_buff, fill_ch, buf_len);
-                    ch_buff[buf_len] = 0;
-                    memcpy(c_buff_ptr, ch_buff, buf_len);
-                    c_buff_ptr += buf_len;
-                    delete[] ch_buff;
+                    for (size_t i = 0; i < buf_len; ++i) {
+                        buffer += fill_ch;
+                    }
                  }
  
                  // take care of coords if necessary
@@ -364,7 +360,6 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow       row,
              } else {
                  // add appropriate number of gap/end chars
                  
-                char* ch_buff = new char[seg_len + 1];
                  char fill_ch;
                  
                  if (seg < left_seg  ||  seg > right_seg) {
@@ -373,11 +368,9 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow       row,
                      fill_ch = GetGapChar(row);
                  }
                  
-                memset(ch_buff, fill_ch, seg_len);
-                ch_buff[seg_len] = 0;
-                memcpy(c_buff_ptr, ch_buff, seg_len);
-                c_buff_ptr += seg_len;
-                delete[] ch_buff;
+                for (size_t i = 0; i < seg_len; ++i) {
+                    buffer += fill_ch;
+                }
              }
              aln_pos += len;
          }
@@ -403,9 +396,6 @@ string& CAlnVec::GetWholeAlnSeqString(TNumrow       row,
              }
          }
      }
-    c_buff[aln_len] = '\0';
-    buffer = c_buff;
-    delete [] c_buff;
      return buffer;
  }
  
diff --git a/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp b/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp

index 8db09d82430b14e60f240399e50847a70b6c94e7..5dd4eb3bed5fe58a4d3f3783e7aef8c427c76c83 100644 (file)
--- a/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp
+++ b/c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp
@@ -1,4 +1,4 @@
-/*  $Id: blastdb_dataextract.cpp 591961 2019-08-23 13:08:25Z madden $
+/*  $Id: blastdb_dataextract.cpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -184,13 +184,13 @@ string CBlastDBExtractor::ExtractLinksInteger()
              if (seqid->IsGi()) {
                  if (seqid->GetGi() == m_Gi) {
                      ITERATE(CBlast_def_line::TLinks, links_int, (*itr)->GetLinks()) {
-                        retval += NStr::IntToString(*links_int) + SEPARATOR;
+                        retval += NStr::NumericToString(*links_int) + SEPARATOR;
                      }
                      break;
                  }
              } else {
                  ITERATE(CBlast_def_line::TLinks, links_int, (*itr)->GetLinks()) {
-                    retval += NStr::IntToString(*links_int) + SEPARATOR;
+                    retval += NStr::NumericToString(*links_int) + SEPARATOR;
                  }
              }
          }
@@ -384,28 +384,28 @@ string CBlastDBExtractor::ExtractTitle() {
  }
  
  string CBlastDBExtractor::ExtractTaxId() {
-    return NStr::IntToString(x_ExtractTaxId());
+    return NStr::NumericToString(x_ExtractTaxId());
  }
  
  string CBlastDBExtractor::ExtractLeafTaxIds() {
-    set<int> taxids;
+    set<TTaxId> taxids;
      x_ExtractLeafTaxIds(taxids);
      if (taxids.empty()) {
          return ExtractTaxId();
      }
      string retval;
-    ITERATE(set<int>, taxids_iter, taxids) {
+    ITERATE(set<TTaxId>, taxids_iter, taxids) {
          if (retval.empty()) {
-            retval = NStr::IntToString(*taxids_iter);
+            retval = NStr::NumericToString(*taxids_iter);
          } else {
-            retval += SEPARATOR + NStr::IntToString(*taxids_iter);
+            retval += SEPARATOR + NStr::NumericToString(*taxids_iter);
          }
      }
      return retval;
  }
  
  string CBlastDBExtractor::ExtractCommonTaxonomicName() {
-    const int kTaxID = x_ExtractTaxId();
+    const TTaxId kTaxID = x_ExtractTaxId();
      SSeqDBTaxInfo tax_info;
      string retval(NOT_AVAILABLE);
      try {
@@ -417,12 +417,12 @@ string CBlastDBExtractor::ExtractCommonTaxonomicName() {
  }
  
  string CBlastDBExtractor::ExtractLeafCommonTaxonomicNames() {
-    set<int> taxids;
+    set<TTaxId> taxids;
      x_ExtractLeafTaxIds(taxids);
      SSeqDBTaxInfo tax_info;
      string retval;
-    ITERATE(set<int>, taxid_iter, taxids) {
-        const int kTaxID = *taxid_iter;
+    ITERATE(set<TTaxId>, taxid_iter, taxids) {
+        const TTaxId kTaxID = *taxid_iter;
          try {
              m_BlastDb.GetTaxInfo(kTaxID, tax_info);
              _ASSERT(kTaxID == tax_info.taxid);
@@ -441,7 +441,7 @@ string CBlastDBExtractor::ExtractLeafCommonTaxonomicNames() {
  }
  
  string CBlastDBExtractor::ExtractScientificName() {
-    const int kTaxID = x_ExtractTaxId();
+    const TTaxId kTaxID = x_ExtractTaxId();
      SSeqDBTaxInfo tax_info;
      string retval(NOT_AVAILABLE);
      try {
@@ -453,12 +453,12 @@ string CBlastDBExtractor::ExtractScientificName() {
  }
  
  string CBlastDBExtractor::ExtractLeafScientificNames() {
-    set<int> taxids;
+    set<TTaxId> taxids;
      x_ExtractLeafTaxIds(taxids);
      SSeqDBTaxInfo tax_info;
      string retval;
-    ITERATE(set<int>, taxid_iter, taxids) {
-        const int kTaxID = *taxid_iter;
+    ITERATE(set<TTaxId>, taxid_iter, taxids) {
+        const TTaxId kTaxID = *taxid_iter;
          try {
              m_BlastDb.GetTaxInfo(kTaxID, tax_info);
              _ASSERT(kTaxID == tax_info.taxid);
@@ -477,7 +477,7 @@ string CBlastDBExtractor::ExtractLeafScientificNames() {
  }
  
  string CBlastDBExtractor::ExtractBlastName() {
-    const int kTaxID = x_ExtractTaxId();
+    const TTaxId kTaxID = x_ExtractTaxId();
      SSeqDBTaxInfo tax_info;
      string retval(NOT_AVAILABLE);
      try {
@@ -513,7 +513,7 @@ string CBlastDBExtractor::ExtractBlastName() {
  //}
  
  string CBlastDBExtractor::ExtractSuperKingdom() {
-    const int kTaxID = x_ExtractTaxId();
+    const TTaxId kTaxID = x_ExtractTaxId();
      SSeqDBTaxInfo tax_info;
      string retval(NOT_AVAILABLE);
      try {
@@ -739,7 +739,7 @@ string CBlastDBExtractor::ExtractFasta(const CBlastDBSeqId &id) {
      return out.str();
  }
  
-int CBlastDBExtractor::x_ExtractTaxId()
+TTaxId CBlastDBExtractor::x_ExtractTaxId()
  {
      x_SetGi();
  
@@ -752,12 +752,12 @@ int CBlastDBExtractor::x_ExtractTaxId()
          return m_Gi2TaxidMap.second[m_Gi];
      }
      // for database without Gi:
-    vector<int> taxid;
+    vector<TTaxId> taxid;
      m_BlastDb.GetTaxIDs(m_Oid, taxid);
-    return taxid.size() ? taxid[0] : 0;
+    return taxid.size() ? taxid[0] : ZERO_TAX_ID;
  }
  
-void CBlastDBExtractor::x_ExtractLeafTaxIds(set<int>& taxids)
+void CBlastDBExtractor::x_ExtractLeafTaxIds(set<TTaxId>& taxids)
  {
      x_SetGi();
  
@@ -768,12 +768,12 @@ void CBlastDBExtractor::x_ExtractLeafTaxIds(set<int>& taxids)
              m_BlastDb.GetLeafTaxIDs(m_Oid, m_Gi2TaxidSetMap.second);
          }
          taxids.clear();
-        const set<int>& taxid_set = m_Gi2TaxidSetMap.second[m_Gi];
+        const set<TTaxId>& taxid_set = m_Gi2TaxidSetMap.second[m_Gi];
          taxids.insert(taxid_set.begin(), taxid_set.end());
          return;
      }
      // for database without Gi:
-    vector<int> taxid;
+    vector<TTaxId> taxid;
      m_BlastDb.GetLeafTaxIDs(m_Oid, taxid);
      taxids.clear();
      taxids.insert(taxid.begin(), taxid.end());
@@ -861,7 +861,7 @@ void CBlastDeflineUtil::ExtractDataFromBlastDefline(const CBlast_def_line & dl,
                 }
         }
         if ((fields.tax_id == 1) || (fields.tax_names == 1)) {
-               unsigned int tax_id = 0;
+        TTaxId tax_id = ZERO_TAX_ID;
                 if (dl.IsSetTaxid()) {
                         tax_id = dl.GetTaxid();
                 }
@@ -888,18 +888,18 @@ void CBlastDeflineUtil::ExtractDataFromBlastDefline(const CBlast_def_line & dl,
         }
  
         if ((fields.leaf_node_tax_ids == 1) || (fields.leaf_node_tax_names == 1)) {
-               set<int>  tax_id_set = dl.GetLeafTaxIds();
+               set<TTaxId>  tax_id_set = dl.GetLeafTaxIds();
                 if (tax_id_set.empty()) {
                         if (dl.IsSetTaxid()) {
                                 tax_id_set.insert(dl.GetTaxid());
                         }
                         else {
-                               tax_id_set.insert(0);
+                               tax_id_set.insert(ZERO_TAX_ID);
                         }
                 }
  
                 string separator = kEmptyStr;
-               ITERATE(set<int>, itr, tax_id_set) {
+               ITERATE(set<TTaxId>, itr, tax_id_set) {
                         if (fields.leaf_node_tax_names == 1) {
                                 try {
                                         SSeqDBTaxInfo taxinfo;
@@ -941,7 +941,7 @@ void CBlastDeflineUtil::ExtractDataFromBlastDefline(const CBlast_def_line & dl,
         if(fields.links == 1) {
                 if (dl.IsSetLinks()) {
                         ITERATE(CBlast_def_line::TLinks, links_int, dl.GetLinks()) {
-                               results[CBlastDeflineUtil::links] += NStr::IntToString(*links_int) + SEPARATOR;
+                               results[CBlastDeflineUtil::links] += NStr::NumericToString(*links_int) + SEPARATOR;
                         }
                 }
                 else {
diff --git a/c++/src/objtools/blast/seqdb_reader/seqdb.cpp b/c++/src/objtools/blast/seqdb_reader/seqdb.cpp

index dec197584da3cac5461e42f23009e9cb40b2ec11..82a345c75936550bca1395519ab4da7d84c00126 100644 (file)
--- a/c++/src/objtools/blast/seqdb_reader/seqdb.cpp
+++ b/c++/src/objtools/blast/seqdb_reader/seqdb.cpp
@@ -1,4 +1,4 @@
-/*  $Id: seqdb.cpp 605340 2020-04-09 16:06:43Z ivanov $
+/*  $Id: seqdb.cpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -254,12 +254,12 @@ void CSeqDB::AccessionsToOids(const vector<string>& accs, vector<blastdb::TOid>&
       m_Impl->AccessionsToOids(accs, oids);
  }
  
-void CSeqDB::TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const
+void CSeqDB::TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const
  {
       m_Impl->TaxIdsToOids(tax_ids, rv);
  }
  
-void CSeqDB::GetDBTaxIds(set<Int4> & tax_ids) const
+void CSeqDB::GetDBTaxIds(set<TTaxId> & tax_ids) const
  {
       m_Impl->GetDBTaxIds(tax_ids);
  }
@@ -434,11 +434,11 @@ CSeqDB::ESeqType CSeqDB::GetSequenceType() const
  }
  
  void CSeqDB::GetTaxIDs(int             oid,
-                       map<TGi, int> & gi_to_taxid,
+                       map<TGi, TTaxId> & gi_to_taxid,
                         bool            persist) const
  {
      ////m_Impl->Verify();
-    typedef map<TGi, int> TmpMap;
+    typedef map<TGi, TTaxId> TmpMap;
      TmpMap gi_to_taxid_tmp;
      m_Impl->GetTaxIDs(oid, gi_to_taxid_tmp, persist);
      if ( !persist ) {
@@ -451,7 +451,7 @@ void CSeqDB::GetTaxIDs(int             oid,
  }
  
  void CSeqDB::GetTaxIDs(int           oid,
-                       vector<int> & taxids,
+                       vector<TTaxId> & taxids,
                         bool          persist) const
  {
      ////m_Impl->Verify();
@@ -460,19 +460,19 @@ void CSeqDB::GetTaxIDs(int           oid,
  }
  
  void CSeqDB::GetAllTaxIDs(int           oid,
-                          set<int> & taxids) const
+                          set<TTaxId> & taxids) const
  {
      m_Impl->GetAllTaxIDs(oid, taxids);
  }
  
  void CSeqDB::GetLeafTaxIDs(
          int                  oid,
-        map<TGi, set<int> >& gi_to_taxid_set,
+        map<TGi, set<TTaxId> >& gi_to_taxid_set,
          bool                 persist
  ) const
  {
      ////m_Impl->Verify();
-    typedef map<TGi, set<int> > TmpMap;
+    typedef map<TGi, set<TTaxId> > TmpMap;
      TmpMap gi_to_taxid_set_tmp;
      m_Impl->GetLeafTaxIDs(oid, gi_to_taxid_set_tmp, persist);
      if ( !persist ) {
@@ -486,7 +486,7 @@ void CSeqDB::GetLeafTaxIDs(
  
  void CSeqDB::GetLeafTaxIDs(
          int          oid,
-        vector<int>& taxids,
+        vector<TTaxId>& taxids,
          bool         persist
  ) const
  {
@@ -1100,7 +1100,7 @@ void CSeqDB::GetAliasFileValues(TAliasFileValues & afv)
      ////m_Impl->Verify();
  }
  
-void CSeqDB::GetTaxInfo(int taxid, SSeqDBTaxInfo & info)
+void CSeqDB::GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info)
  {
      CSeqDBImpl::GetTaxInfo(taxid, info);
  }
diff --git a/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp b/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp

index 0fc86119f5b91a828ebc187753cc5339eec54357..a19f0dd7c0c7da404bc8f6731722298cb4d3c45d 100644 (file)
--- a/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp
+++ b/c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp
@@ -1,4 +1,4 @@
-/*  $Id: seqdb_lmdb.cpp 595902 2019-10-29 17:32:09Z fongah2 $
+/*  $Id: seqdb_lmdb.cpp 616872 2020-09-22 13:14:27Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -75,22 +75,23 @@ void CBlastLMDBManager::CBlastEnv::InitDbi(lmdb::env & env, ELMDBFileType file_t
  }
  
  CBlastLMDBManager::CBlastEnv::CBlastEnv(const string & fname, ELMDBFileType file_type, bool read_only, Uint8 map_size) :
-               m_Filename(fname), m_FileType(file_type),m_Env(lmdb::env::create()), m_Count(1), m_ReadOnly(read_only), m_MapSize(map_size)
+               m_Filename(fname), m_FileType(file_type),m_Env(lmdb::env::create()), m_Count(1), m_ReadOnly(read_only)
  {
         const MDB_dbi num_db(3);
         m_Env.set_max_dbs(num_db);
         m_dbis.resize(eDbiMax, UINT_MAX);
         if(m_ReadOnly) {
                 CFile tf(fname);
-               m_MapSize = (tf.GetLength()/10000 + 1) *10000;
-               m_Env.set_mapsize(m_MapSize);
+               Uint8 readMapSize = (tf.GetLength()/10000 + 1) *10000;
+               m_Env.set_mapsize(readMapSize);
                 m_Env.open(m_Filename.c_str(), MDB_NOSUBDIR|MDB_NOLOCK|MDB_RDONLY, 0664);
          InitDbi(m_Env,file_type);
         }
         else {
+               LOG_POST(Info <<"Initial Map Size: " << map_size);
                 /// map_size 0 means use lmdb default
-               if(m_MapSize != 0) {
-                       m_Env.set_mapsize(m_MapSize);
+               if(map_size != 0) {
+                       m_Env.set_mapsize(map_size);
                 }
                 m_Env.open(m_Filename.c_str(), MDB_NOSUBDIR , 0664);
         }
@@ -130,6 +131,13 @@ MDB_dbi CBlastLMDBManager::CBlastEnv::GetDbi(EDbiType dbi_type)
         return m_dbis[dbi_type];
  }
  
+void CBlastLMDBManager::CBlastEnv::SetMapSize(Uint8 map_size)
+{
+       if(!m_ReadOnly) {
+               m_Env.set_mapsize(map_size);
+       }
+}
+
  CBlastLMDBManager & CBlastLMDBManager::GetInstance() {
         static CSafeStatic<CBlastLMDBManager> lmdb_manager;
         return lmdb_manager.Get();
@@ -142,31 +150,41 @@ lmdb::env & CBlastLMDBManager::GetReadEnvVol(const string & fname,  MDB_dbi & db
         db_volname = p->GetDbi(CBlastEnv::eDbiVolname);
         return p->GetEnv();
  }
-lmdb::env & CBlastLMDBManager::GetReadEnvAcc(const string & fname, MDB_dbi & db_acc)
+lmdb::env & CBlastLMDBManager::GetReadEnvAcc(const string & fname, MDB_dbi & db_acc, bool* opened)
  {
-       CBlastEnv* p = GetBlastEnv(fname, eLMDB);
+       CBlastEnv* p = GetBlastEnv(fname, eLMDB, opened);
         db_acc = p->GetDbi(CBlastEnv::eDbiAcc2oid);
         return p->GetEnv();
  }
-lmdb::env & CBlastLMDBManager::GetReadEnvTax(const string & fname, MDB_dbi & db_tax)
+lmdb::env & CBlastLMDBManager::GetReadEnvTax(const string & fname, MDB_dbi & db_tax, bool* opened)
  {
-       CBlastEnv* p = GetBlastEnv(fname, eTaxId2Offsets);
+       CBlastEnv* p = GetBlastEnv(fname, eTaxId2Offsets, opened);
         db_tax = p->GetDbi(CBlastEnv::eDbiTaxid2offset);
         return p->GetEnv();
  }
  
  
-CBlastLMDBManager::CBlastEnv* CBlastLMDBManager::GetBlastEnv(const string & fname, ELMDBFileType file_type)
+CBlastLMDBManager::CBlastEnv* CBlastLMDBManager::GetBlastEnv(const string & fname,
+                                                             ELMDBFileType file_type,
+                                                             bool* opened)
  {
         CFastMutexGuard guard(m_Mutex);
         NON_CONST_ITERATE(list <CBlastEnv* >, itr, m_EnvList) {
                 if((*itr)->GetFilename() == fname)  {
                         (*itr)->AddReference();
+            if ( opened && !*opened ) {
+               (*itr)->AddReference();
+                *opened = true;
+            }
                         return (*itr);
                 }
         }
         CBlastEnv * p (new CBlastEnv(fname, file_type));
         m_EnvList.push_back(p);
+    if ( opened && !*opened ) {
+        p->AddReference();
+        *opened = true;
+    }
         return p;
  }
  
@@ -212,9 +230,17 @@ CSeqDBLMDB::CSeqDBLMDB(const string & fname)
        m_Oid2SeqIdsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eOid2SeqIds)),
        m_Oid2TaxIdsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eOid2TaxIds)),
        m_TaxId2OidsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eTaxId2Oids)),
-      m_TaxId2OffsetsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eTaxId2Offsets))
+      m_TaxId2OffsetsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eTaxId2Offsets)),
+      m_LMDBFileOpened(false)
+{
+}
  
+CSeqDBLMDB::~CSeqDBLMDB()
  {
+    if ( m_LMDBFileOpened ) {
+        CBlastLMDBManager::GetInstance().CloseEnv(m_LMDBFile);
+        m_LMDBFileOpened = false;
+    }
  }
  
  void 
@@ -224,7 +250,7 @@ CSeqDBLMDB::GetOid(const string & accession, vector<blastdb::TOid> & oids, const
      oids.clear();
      {
      MDB_dbi dbi_handle;
-       lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle);
+       lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle, &m_LMDBFileOpened);
      lmdb::dbi dbi(dbi_handle);
      auto txn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
      auto cursor = lmdb::cursor::open(txn, dbi);
@@ -319,7 +345,7 @@ CSeqDBLMDB::GetOids(const vector<string>& accessions, vector<blastdb::TOid>& oid
      oids.resize(accessions.size(), kSeqDBEntryNotFound);
  
      MDB_dbi dbi_handle;
-       lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle);
+       lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle, &m_LMDBFileOpened);
         {
      lmdb::dbi dbi(dbi_handle);
      auto txn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
@@ -510,7 +536,7 @@ CSeqDBLMDB::NegativeSeqIdsToOids(const vector<string>& ids, vector<blastdb::TOid
  
  }
  
-void CSeqDBLMDB::GetDBTaxIds(vector<Int4> & tax_ids) const
+void CSeqDBLMDB::GetDBTaxIds(vector<TTaxId> & tax_ids) const
  {
  
         tax_ids.clear();
@@ -523,7 +549,7 @@ void CSeqDBLMDB::GetDBTaxIds(vector<Int4> & tax_ids) const
         auto cursor = lmdb::cursor::open(txn, dbi);
         lmdb::val key;
          while (cursor.get(key, MDB_NEXT)) {
-               Int4 taxid = *((Int4 *)key.data());
+               TTaxId taxid = TAX_ID_FROM(Int4, *((Int4 *)key.data()));
                 tax_ids.push_back(taxid);
          }
          cursor.close();
@@ -540,9 +566,10 @@ void CSeqDBLMDB::GetDBTaxIds(vector<Int4> & tax_ids) const
                 NCBI_THROW( CSeqDBException, eArgErr, "Taxonomy Id to Oids lookup error in " + dbname);
                 }
      }
+    CBlastLMDBManager::GetInstance().CloseEnv(m_TaxId2OffsetsFile);
  }
  
-void CSeqDBLMDB::GetOidsForTaxIds(const set<Int4> & tax_ids, vector<blastdb::TOid>& oids, vector<Int4> & tax_ids_found) const
+void CSeqDBLMDB::GetOidsForTaxIds(const set<TTaxId> & tax_ids, vector<blastdb::TOid>& oids, vector<TTaxId> & tax_ids_found) const
  {
  
      try {
@@ -555,8 +582,8 @@ void CSeqDBLMDB::GetOidsForTaxIds(const set<Int4> & tax_ids, vector<blastdb::TOi
      auto txn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
         lmdb::dbi dbi(dbi_handle);
      auto cursor = lmdb::cursor::open(txn, dbi);
-    ITERATE(set<Int4>, itr, tax_ids) {
-       Int4 tax_id = *itr;
+    ITERATE(set<TTaxId>, itr, tax_ids) {
+       Int4 tax_id = TAX_ID_TO(Int4, *itr);
          lmdb::val data2find(tax_id);
  
          if (cursor.get(data2find, MDB_SET)) {
@@ -626,14 +653,14 @@ public:
                 m_DataStart += (2* (num_of_oids + 1));
         }
  
-       inline void GetTaxIdListForOid(blastdb::TOid oid, vector<Int4> & taxid_list);
+       inline void GetTaxIdListForOid(blastdb::TOid oid, vector<TTaxId> & taxid_list);
  private:
  
         Uint8 * m_IndexStart;
         Int4 * m_DataStart;
  };
  
-void CLookupTaxIds::GetTaxIdListForOid(blastdb::TOid oid, vector<Int4> & taxid_list)
+void CLookupTaxIds::GetTaxIdListForOid(blastdb::TOid oid, vector<TTaxId> & taxid_list)
  {
         taxid_list.clear();
         Uint8 * index_ptr = m_IndexStart + oid;
@@ -641,23 +668,23 @@ void CLookupTaxIds::GetTaxIdListForOid(blastdb::TOid oid, vector<Int4> & taxid_l
         index_ptr--;
         Int4 * begin = (oid == 0) ? m_DataStart:m_DataStart + (*index_ptr);
         while (begin < end) {
-               taxid_list.push_back(*begin);
+               taxid_list.push_back(TAX_ID_FROM(Int4, *begin));
                 begin++;
         }
  }
  
  void
-CSeqDBLMDB::NegativeTaxIdsToOids(const set<Int4>& tax_ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const
+CSeqDBLMDB::NegativeTaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const
  {
         rv.clear();
         vector<blastdb::TOid> oids;
         GetOidsForTaxIds(tax_ids, oids, tax_ids_found);
  
         CMemoryFile oid_file(m_Oid2TaxIdsFile);
-       set<Int4> tax_id_list(tax_ids.begin(), tax_ids.end());
+       set<TTaxId> tax_id_list(tax_ids.begin(), tax_ids.end());
         CLookupTaxIds lookup(oid_file);
         for(unsigned int i=0; i < oids.size(); i++) {
-               vector<Int4>  file_list;
+               vector<TTaxId>  file_list;
                 lookup.GetTaxIdListForOid(oids[i], file_list);
                 if(file_list.size() > tax_ids.size()) {
                         continue;
@@ -676,12 +703,12 @@ CSeqDBLMDB::NegativeTaxIdsToOids(const set<Int4>& tax_ids, vector<blastdb::TOid>
         }
  }
  
-void CSeqDBLMDB::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const
+void CSeqDBLMDB::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const
  {
         CMemoryFile oid_file(m_Oid2TaxIdsFile);
         CLookupTaxIds lookup(oid_file);
         for(unsigned int i=0; i < oids.size(); i++) {
-               vector<Int4>  taxid_list;
+               vector<TTaxId>  taxid_list;
                 lookup.GetTaxIdListForOid(oids[i], taxid_list);
                 tax_ids.insert(taxid_list.begin(), taxid_list.end());
         }
diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp

index eb34f9a87297683263143f15343190654997db09..072277fa2f1ea3ec69a606f5e8fd35be6bb954cc 100644 (file)
--- a/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp
+++ b/c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp
@@ -1,4 +1,4 @@
-/*  $Id: seqdbcommon.cpp 605336 2020-04-09 16:04:52Z ivanov $
+/*  $Id: seqdbcommon.cpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -1102,7 +1102,7 @@ void SeqDB_ReadMemoryTaxIdList(const char * fbeginp,
          }
  
          for(Int4 * elem = (bbeginp + 2); elem < bendp; ++elem) {
-           taxids.tax_ids.insert(SeqDB_GetStdOrd(elem));
+           taxids.tax_ids.insert(TAX_ID_FROM(Int4, SeqDB_GetStdOrd(elem)));
          }
      } else {
          Int4 elem(0);
@@ -1113,7 +1113,7 @@ void SeqDB_ReadMemoryTaxIdList(const char * fbeginp,
              if (dig == -1) {
                  // Skip blank lines or comments by ignoring zero.
                  if (elem != 0) {
-                    taxids.tax_ids.insert(elem);
+                    taxids.tax_ids.insert(TAX_ID_FROM(Int4, elem));
                  }
                  elem = 0;
                  continue;
diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp

index e0097aa27df93de120b449c9f6745b215f3cc7d7..38fcf76ec60941737efcd14d13dd472a62202bd5 100644 (file)
--- a/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp
+++ b/c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp
@@ -1,4 +1,4 @@
-/*  $Id: seqdbgilistset.cpp 597735 2019-11-26 17:53:47Z fongah2 $
+/*  $Id: seqdbgilistset.cpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -254,7 +254,7 @@ CSeqDBGiListSet::x_ResolvePositiveList(CSeqDBAtlas            & atlas,
                 }
                 if(user_list->GetNumTaxIds() > 0) {
                         vector<blastdb::TOid> & oids = user_list->SetOidsForTaxIdsList();
-                       set<Int4> &  tax_ids = user_list->GetTaxIdsList();
+                       set<TTaxId> &  tax_ids = user_list->GetTaxIdsList();
                         lmdb_set.TaxIdsToOids(tax_ids, oids);
                 }
                 if((user_list->GetNumGis() == 0) && (user_list->GetNumTis() == 0) &&
@@ -332,7 +332,7 @@ CSeqDBGiListSet::x_ResolveNegativeList(CSeqDBAtlas            & atlas,
                 }
                 if(m_NegativeList->GetNumTaxIds() > 0) {
                         vector<blastdb::TOid> & oids = m_NegativeList->SetExcludedOids();
-                       set<Int4> &  tax_ids = m_NegativeList->GetTaxIdsList();
+                       set<TTaxId> &  tax_ids = m_NegativeList->GetTaxIdsList();
                         lmdb_set.NegativeTaxIdsToOids(tax_ids, oids);
                 }
  
diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp

index 65741950f487926e3e72afbecd4af7ace9f44461..0c6b94ddad81c2092a8c2f541743fed6692b76d1 100644 (file)
--- a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp
+++ b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp
@@ -1,4 +1,4 @@
-/*  $Id: seqdbimpl.cpp 607218 2020-04-30 18:42:35Z ivanov $
+/*  $Id: seqdbimpl.cpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -438,7 +438,7 @@ int CSeqDBImpl::GetSeqLengthApprox(int oid) const
  }
  
  void CSeqDBImpl::GetTaxIDs(int             oid,
-                           map<TGi, int> & gi_to_taxid,
+                           map<TGi, TTaxId> & gi_to_taxid,
                             bool            persist)
  {
      CSeqDBLockHold locked(m_Atlas);
@@ -474,7 +474,7 @@ void CSeqDBImpl::GetTaxIDs(int             oid,
  }
  
  void CSeqDBImpl::GetTaxIDs(int           oid,
-                           vector<int> & taxids,
+                           vector<TTaxId> & taxids,
                             bool          persist)
  {
      CSeqDBLockHold locked(m_Atlas);
@@ -499,7 +499,7 @@ void CSeqDBImpl::GetTaxIDs(int           oid,
  }
  
  void CSeqDBImpl::GetAllTaxIDs(int           oid,
-                              set<int> & taxids)
+                              set<TTaxId> & taxids)
  {
      CSeqDBLockHold locked(m_Atlas);
  
@@ -516,7 +516,7 @@ void CSeqDBImpl::GetAllTaxIDs(int           oid,
  
  void CSeqDBImpl::GetLeafTaxIDs(
          int                  oid,
-        map<TGi, set<int> >& gi_to_taxid_set,
+        map<TGi, set<TTaxId> >& gi_to_taxid_set,
          bool                 persist
  )
  {
@@ -553,7 +553,7 @@ void CSeqDBImpl::GetLeafTaxIDs(
  
  void CSeqDBImpl::GetLeafTaxIDs(
          int          oid,
-        vector<int>& taxids,
+        vector<TTaxId>& taxids,
          bool         persist
  )
  {
@@ -1317,7 +1317,7 @@ void CSeqDBImpl::AccessionToOids(const string & acc,
  }
  
  
-void CSeqDBImpl::TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv)
+void CSeqDBImpl::TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv)
  {
      CHECK_MARKER();
      rv.clear();
@@ -1339,7 +1339,7 @@ void CSeqDBImpl::TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv)
      return;
  }
  
-void CSeqDBImpl::GetDBTaxIds(set<Int4> & tax_ids)
+void CSeqDBImpl::GetDBTaxIds(set<TTaxId> & tax_ids)
  {
      CHECK_MARKER();
      CSeqDBLockHold locked(m_Atlas);
@@ -1629,7 +1629,7 @@ void CSeqDBImpl::x_ScanTotals(bool             approx,
      }
  }
  
-void CSeqDBImpl::GetTaxInfo(int taxid, SSeqDBTaxInfo & info)
+void CSeqDBImpl::GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info)
  {
      if (! CSeqDBTaxInfo::GetTaxNames(taxid, info)) {
          CNcbiOstrstream oss;
diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp

index 9f9400dccc7e05a116158ccd72c01db21a78ba76..285f412d279f766f2fa832626de1fcc37c94e842 100644 (file)
--- a/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp
+++ b/c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_READERS_SEQDB__SEQDBIMPL_HPP
  #define OBJTOOLS_READERS_SEQDB__SEQDBIMPL_HPP
  
-/*  $Id: seqdbimpl.hpp 605340 2020-04-09 16:06:43Z ivanov $
+/*  $Id: seqdbimpl.hpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -244,7 +244,7 @@ public:
      /// @param persist
      ///   If false, the map will be cleared before adding new entries.
      void GetTaxIDs(int             oid,
-                   map<TGi, int> & gi_to_taxid,
+                   map<TGi, TTaxId> & gi_to_taxid,
                     bool            persist);
  
      /// Get taxids for an OID.
@@ -264,7 +264,7 @@ public:
      /// @param persist
      ///   If false, the map will be cleared before adding new entries.
      void GetTaxIDs(int           oid,
-                   vector<int> & taxids,
+                   vector<TTaxId> & taxids,
                     bool          persist);
  
      /// Get gi to taxid map for an OID.
@@ -284,13 +284,13 @@ public:
      ///   If false, the map will be cleared before adding new entries.
      void GetLeafTaxIDs(
              int                  oid,
-            map<TGi, set<int> >& gi_to_taxid_set,
+            map<TGi, set<TTaxId> >& gi_to_taxid_set,
              bool                 persist
      );
  
      /// Get all tax ids (leaf and non-leaf for an oid
      void GetAllTaxIDs(int           oid,
-                      set<int> & taxids);
+                      set<TTaxId> & taxids);
  
      /// Get gi to taxid map for an OID.
      ///
@@ -310,7 +310,7 @@ public:
      ///   If false, the map will be cleared before adding new entries.
      void GetLeafTaxIDs(
              int          oid,
-            vector<int>& gi_to_taxid_set,
+            vector<TTaxId>& gi_to_taxid_set,
              bool         persist
      );
  
@@ -727,7 +727,7 @@ public:
      ///   An integer identifying the taxid to fetch.
      /// @param info
      ///   A structure containing taxonomic description strings.
-    static void GetTaxInfo(int taxid, SSeqDBTaxInfo & info);
+    static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info);
  
      /// Returns the sum of the sequence lengths.
      ///
@@ -1066,11 +1066,11 @@ public:
      /// Get Oid list for input tax ids
      /// @param tax_ids taxonomy ids
      /// @param rv              oids corrpond to tax ids
-    void TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv);
+    void TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv);
  
      /// Get all unique tax ids from db
      /// @param tax_ids return taxonomy ids in db
-    void GetDBTaxIds(set<Int4> & tax_ids);
+    void GetDBTaxIds(set<TTaxId> & tax_ids);
  
  private:
      CLASS_MARKER_FIELD("IMPL")
diff --git a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp

index aeb7c5e37288f7f144ea47d36a03854d9bb0023b..e2436ffb7454a9e9c08d0c4f99c403a220ab3c71 100644 (file)
--- a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp
+++ b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp
@@ -161,24 +161,24 @@ void CSeqDBLMDBEntry::NegativeSeqIdsToOids(const vector<string>& ids, vector<bla
         x_AdjustOidsOffset(rv);
  }
  
-void CSeqDBLMDBEntry::TaxIdsToOids(const set<Int4>& tax_ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const
+void CSeqDBLMDBEntry::TaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const
  {
         m_LMDB->GetOidsForTaxIds(tax_ids, rv, tax_ids_found);
         x_AdjustOidsOffset_TaxList(rv);
  }
  
-void CSeqDBLMDBEntry::NegativeTaxIdsToOids(const set<Int4>& tax_ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const
+void CSeqDBLMDBEntry::NegativeTaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const
  {
         m_LMDB->NegativeTaxIdsToOids(tax_ids, rv, tax_ids_found);
         x_AdjustOidsOffset_TaxList(rv);
  }
  
-void CSeqDBLMDBEntry::GetDBTaxIds(vector<Int4> & tax_ids) const
+void CSeqDBLMDBEntry::GetDBTaxIds(vector<TTaxId> & tax_ids) const
  {
         m_LMDB->GetDBTaxIds(tax_ids);
  }
  
-void CSeqDBLMDBEntry::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const
+void CSeqDBLMDBEntry::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const
  {
         if(m_isPartial) {
                 vector<TOid> tmp;
@@ -311,10 +311,10 @@ void CSeqDBLMDBSet::NegativeSeqIdsToOids(const vector<string>& ids, vector<blast
  
  }
  
-void CSeqDBLMDBSet::TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const
+void CSeqDBLMDBSet::TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const
  {
-       vector<Int4> tax_ids_found;
-       set<Int4> rv_tax_ids;
+       vector<TTaxId> tax_ids_found;
+       set<TTaxId> rv_tax_ids;
         m_LMDBEntrySet[0]->TaxIdsToOids(tax_ids, rv, tax_ids_found);
         rv_tax_ids.insert(tax_ids_found.begin(), tax_ids_found.end());
         for(unsigned int i=1; i < m_LMDBEntrySet.size(); i++) {
@@ -331,10 +331,10 @@ void CSeqDBLMDBSet::TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv)
         tax_ids.swap(rv_tax_ids);
  }
  
-void CSeqDBLMDBSet::NegativeTaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const
+void CSeqDBLMDBSet::NegativeTaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const
  {
-       vector<Int4> tax_ids_found;
-       set<Int4> rv_tax_ids;
+       vector<TTaxId> tax_ids_found;
+       set<TTaxId> rv_tax_ids;
         m_LMDBEntrySet[0]->NegativeTaxIdsToOids(tax_ids, rv, tax_ids_found);
         rv_tax_ids.insert(tax_ids_found.begin(), tax_ids_found.end());
         for(unsigned int i=1; i < m_LMDBEntrySet.size(); i++) {
@@ -352,9 +352,9 @@ void CSeqDBLMDBSet::NegativeTaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOi
         tax_ids.swap(rv_tax_ids);
  }
  
-void CSeqDBLMDBSet::GetDBTaxIds(set<Int4> & tax_ids) const
+void CSeqDBLMDBSet::GetDBTaxIds(set<TTaxId> & tax_ids) const
  {
-       vector<Int4> t;
+       vector<TTaxId> t;
         m_LMDBEntrySet[0]->GetDBTaxIds(t);
         tax_ids.insert(t.begin(), t.end());
         for(unsigned int i=1; i < m_LMDBEntrySet.size(); i++) {
@@ -365,7 +365,7 @@ void CSeqDBLMDBSet::GetDBTaxIds(set<Int4> & tax_ids) const
  }
  
  
-void CSeqDBLMDBSet::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const
+void CSeqDBLMDBSet::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const
  {
         if (m_LMDBEntrySet.size() > 1) {
         vector<TOid> t;
@@ -373,7 +373,7 @@ void CSeqDBLMDBSet::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int
         for(unsigned int i =0; i < oids.size(); i++){
                         if (oids[i] >= m_LMDBEntrySet[j]->GetOIDEnd()){
                                 if (t.size() > 0){
-                                       set<Int4> t_set;
+                                       set<TTaxId> t_set;
                                         m_LMDBEntrySet[j]->GetTaxIdsForOids(t, t_set);
                                         t.clear();
                                         tax_ids.insert(t_set.begin(), t_set.end());
@@ -383,7 +383,7 @@ void CSeqDBLMDBSet::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int
                         t.push_back(oids[i] - m_LMDBEntrySet[j]->GetOIDStart());
                 }
         if (t.size() > 0){
-               set<Int4> t_set;
+               set<TTaxId> t_set;
                 m_LMDBEntrySet[j]->GetTaxIdsForOids(t, t_set);
                 tax_ids.insert(t_set.begin(), t_set.end());
         }
diff --git a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp

index 0eb441bd50e98bec604f860b84f558ff8ef37698..8477af837c6762abb835539c11dfd430d4709712 100644 (file)
--- a/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp
+++ b/c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp
@@ -100,13 +100,13 @@ public:
  
      void NegativeSeqIdsToOids(const vector<string>& ids, vector<blastdb::TOid>& rv) const;
  
-    void TaxIdsToOids(const set<Int4>& tax_ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const;
+    void TaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const;
  
-    void NegativeTaxIdsToOids(const set<Int4>& tax_ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const;
+    void NegativeTaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const;
  
-    void GetDBTaxIds(vector<Int4> & tax_ids) const;
+    void GetDBTaxIds(vector<TTaxId> & tax_ids) const;
  
-    void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const;
+    void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const;
  
  private:
      void x_AdjustOidsOffset(vector<TOid> & oids) const;
@@ -171,13 +171,13 @@ public:
  
      void NegativeSeqIdsToOids(const vector<string>& ids, vector<blastdb::TOid>& rv) const;
  
-    void TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const;
+    void TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const;
  
-    void NegativeTaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const;
+    void NegativeTaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const;
  
-    void GetDBTaxIds(set<Int4> & tax_ids) const;
+    void GetDBTaxIds(set<TTaxId> & tax_ids) const;
  
-    void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const;
+    void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const;
  
  private:
      vector<CRef<CSeqDBLMDBEntry> >  m_LMDBEntrySet;
diff --git a/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp b/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp

index de3d0f57c0c9d8a77338b0d31c5d0da3b97b0775..fbfb13bc34485f04887b6c2a0eeb3c32691c7fb6 100644 (file)
--- a/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp
+++ b/c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp
@@ -1,4 +1,4 @@
-/*  $Id: seqdboidlist.cpp 579001 2019-01-29 13:54:57Z fongah2 $
+/*  $Id: seqdboidlist.cpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -641,11 +641,11 @@ void s_ProcessTaxIdFilters(const vector<string> &     fnames,
                 return;
         }
  
-       set<Int4> user_taxids;
+       set<TTaxId> user_taxids;
         if(!user_list.Empty() && (user_list->GetNumTaxIds() > 0)) {
                 user_taxids = user_list->GetTaxIdsList();
         }
-       set<Int4> neg_user_taxids;
+       set<TTaxId> neg_user_taxids;
         if(!neg_user_list.Empty() && (neg_user_list->GetNumTaxIds() > 0)) {
                 neg_user_taxids = neg_user_list->GetTaxIdsList();
         }
@@ -655,15 +655,15 @@ void s_ProcessTaxIdFilters(const vector<string> &     fnames,
                 vector<blastdb::TOid> oids;
                 CRef<CSeqDBGiList> list(new CSeqDBFileGiList(fnames[k], CSeqDBFileGiList::eTaxIdList));
                 s_GetFilteredOidRange(volset, fnames_vols[k], excluded_vols, list);
-               set<Int4> taxids;
+               set<TTaxId> taxids;
                 taxids = list->GetTaxIdsList();
                 if(taxids.size() == 0){
                         continue;
                 }
                 if(user_taxids.size() > 0){
-                       vector<Int4> common;
+                       vector<TTaxId> common;
                         common.resize(taxids.size());
-                       vector<Int4>::iterator itr = set_intersection(taxids.begin(), taxids.end(),
+                       vector<TTaxId>::iterator itr = set_intersection(taxids.begin(), taxids.end(),
                                                                               user_taxids.begin(), user_taxids.end(), common.begin());
                         common.resize(itr-common.begin());
                         if( common.size() == 0) {
@@ -673,9 +673,9 @@ void s_ProcessTaxIdFilters(const vector<string> &     fnames,
                         taxids.insert(common.begin(), common.end());
                 }
                 if(neg_user_taxids.size() > 0) {
-                       vector<Int4> difference;
+                       vector<TTaxId> difference;
                         difference.resize(taxids.size());
-                       vector<Int4>::iterator itr = set_difference(taxids.begin(), taxids.end(),
+                       vector<TTaxId>::iterator itr = set_difference(taxids.begin(), taxids.end(),
                                                                                         neg_user_taxids.begin(), neg_user_taxids.end(), difference.begin());
                         difference.resize(itr-difference.begin());
                         if(difference.size() == 0){
diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbtax.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbtax.cpp

index ada5e6c1bef8ff4979afa145c9258db4541749f5..1ee99291d3b33fd42123090bf78b5156dc1e46a5 100644 (file)
--- a/c++/src/objtools/blast/seqdb_reader/seqdbtax.cpp
+++ b/c++/src/objtools/blast/seqdb_reader/seqdbtax.cpp
@@ -1,4 +1,4 @@
-/*  $Id: seqdbtax.cpp 530943 2017-03-20 12:53:37Z fongah2 $
+/*  $Id: seqdbtax.cpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -61,9 +61,9 @@ public:
      }
  
      /// Return the taxonomic identifier field (in host order)
-    Int4 GetTaxId()const
+    TTaxId GetTaxId()const
      {
-        return SeqDB_GetStdOrd(& m_Taxid);
+        return TAX_ID_FROM(Int4, SeqDB_GetStdOrd(& m_Taxid));
      }
  
      /// Return the offset field (in host order)
@@ -216,7 +216,7 @@ CTaxDBFileInfo::~CTaxDBFileInfo()
  }
  
  
-bool CSeqDBTaxInfo::GetTaxNames(Int4             tax_id,
+bool CSeqDBTaxInfo::GetTaxNames(TTaxId           tax_id,
                                  SSeqDBTaxInfo  & info )
  {
         static CTaxDBFileInfo t;
@@ -227,8 +227,8 @@ bool CSeqDBTaxInfo::GetTaxNames(Int4             tax_id,
      
      const char * Data = t.GetDataPtr();
      const CSeqDBTaxId*  Index = t.GetIndexPtr();
-    Int4 low_taxid  = Index[low_index ].GetTaxId();
-    Int4 high_taxid = Index[high_index].GetTaxId();
+    TTaxId low_taxid  = Index[low_index ].GetTaxId();
+    TTaxId high_taxid = Index[high_index].GetTaxId();
  
      if((tax_id < low_taxid) || (tax_id > high_taxid))
          return false;
@@ -237,7 +237,7 @@ bool CSeqDBTaxInfo::GetTaxNames(Int4             tax_id,
      Int4 old_index = new_index;
      
      while(1) {
-        Int4 curr_taxid = Index[new_index].GetTaxId();
+        TTaxId curr_taxid = Index[new_index].GetTaxId();
          
          if (tax_id < curr_taxid) {
              high_index = new_index;
diff --git a/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp b/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp

index d30a42690358ce088c6762484fc6f336f2b5d055..7d58c7d3fcfd12ae99f1a3953d290ab10de2aa1f 100644 (file)
--- a/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp
+++ b/c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp
@@ -1,4 +1,4 @@
-/*  $Id: seqdbvol.cpp 607218 2020-04-30 18:42:35Z ivanov $
+/*  $Id: seqdbvol.cpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -1082,18 +1082,18 @@ CSeqDBVol::x_GetTaxonomy(int                    oid,
      //m_Atlas.Lock(locked);
  
      for(TBDLLConstIter iter = dl.begin(); iter != dl.end(); iter ++) {
-        int taxid = 0;
+        TTaxId taxid = ZERO_TAX_ID;
  
          if ((*iter)->CanGetTaxid()) {
              taxid = (*iter)->GetTaxid();
          }
-        if (taxid <= 0) {
+        if (taxid <= ZERO_TAX_ID) {
              continue;
          }
  
          bool have_org_desc = false;
  
-        if (use_taxinfo_cache && m_TaxCache.Lookup(taxid).NotEmpty()) {
+        if (use_taxinfo_cache && m_TaxCache.Lookup(TAX_ID_TO(int, taxid)).NotEmpty()) {
              have_org_desc = true;
          }
  
@@ -1110,11 +1110,11 @@ CSeqDBVol::x_GetTaxonomy(int                    oid,
  
          if (provide_new_taxonomy_info) {
              if (have_org_desc) {
-                taxonomy.push_back(m_TaxCache.Lookup(taxid));
+                taxonomy.push_back(m_TaxCache.Lookup(TAX_ID_TO(int, taxid)));
              } else {
                  CRef<CDbtag> org_tag(new CDbtag);
                  org_tag->SetDb(TAX_ORGREF_DB_NAME);
-                org_tag->SetTag().SetId(taxid);
+                org_tag->SetTag().SetId(TAX_ID_TO(int, taxid));
  
                  CRef<COrg_ref> org(new COrg_ref);
                  if (found_taxid_in_taxonomy_blastdb) {
@@ -1133,7 +1133,7 @@ CSeqDBVol::x_GetTaxonomy(int                    oid,
                  taxonomy.push_back(desc);
  
                  if (use_taxinfo_cache) {
-                    m_TaxCache.Lookup(taxid) = desc;
+                    m_TaxCache.Lookup(TAX_ID_TO(int, taxid)) = desc;
                  }
              }
          }
@@ -1845,7 +1845,7 @@ CSeqDBVol::GetFilteredHeader(int                    oid,
      return x_GetFilteredHeader(oid, NULL);
  }
  
-bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set<int> & user_tax_ids)
+bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set<TTaxId> & user_tax_ids)
  {
         CBlast_def_line::TTaxIds tax_ids;
         if (def.IsSetTaxid()) {
@@ -1853,8 +1853,12 @@ bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set<int> & user_t
         }
         if(def.IsSetLinks()) {
                 CBlast_def_line::TLinks leaf_ids = def.GetLinks();
-               tax_ids.insert(leaf_ids.begin(), leaf_ids.end());
-       }
+#ifdef NCBI_STRICT_TAX_ID
+        ITERATE(CBlast_def_line::TLinks, it, leaf_ids) tax_ids.insert(TAX_ID_FROM(int, *it));
+#else
+        tax_ids.insert(leaf_ids.begin(), leaf_ids.end());
+#endif
+    }
  
         if(user_tax_ids.size() > tax_ids.size()) {
                 ITERATE(CBlast_def_line::TTaxIds, itr, tax_ids) {
@@ -1865,7 +1869,7 @@ bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set<int> & user_t
  
         }
         else {
-               ITERATE(set<int>, itr, user_tax_ids) {
+               ITERATE(set<TTaxId>, itr, user_tax_ids) {
                         if(tax_ids.find(*itr) != tax_ids.end()) {
                                 return true;
                         }
@@ -1874,7 +1878,7 @@ bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set<int> & user_t
         return false;
  }
  
-bool s_IncludeDefline_NegativeTaxid(const CBlast_def_line & def, const set<int> & user_tax_ids)
+bool s_IncludeDefline_NegativeTaxid(const CBlast_def_line & def, const set<TTaxId> & user_tax_ids)
  {
         CBlast_def_line::TTaxIds taxid_set = def.GetTaxIds();
         if(taxid_set.size() > user_tax_ids.size()) {
diff --git a/c++/src/objtools/blast/seqdb_writer/build-alias-index b/c++/src/objtools/blast/seqdb_writer/build-alias-index

index 0e93a356c136f5bf2d383c04fd8281c58f1668eb..ad30891505412b04f705e7214e0c892e63bd0a4e 100755 (executable)
--- a/c++/src/objtools/blast/seqdb_writer/build-alias-index
+++ b/c++/src/objtools/blast/seqdb_writer/build-alias-index
@@ -3,7 +3,7 @@
  # subdirectory
  # Author: Kevin Bealer
  # Original date: 10/21/2005
-# $URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.10.1/c++/src/objtools/blast/seqdb_writer/build-alias-index $
+# $URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.11.0/c++/src/objtools/blast/seqdb_writer/build-alias-index $
  
  INDEX_NAME=index.alx
  OUTNAME=index.alx.new
diff --git a/c++/src/objtools/blast/seqdb_writer/taxid_set.cpp b/c++/src/objtools/blast/seqdb_writer/taxid_set.cpp

index cab433347418f8a55a49ddef3cc6411621a10faa..c629bbde22181dd128e265422fb563fa1ad4cd64 100644 (file)
--- a/c++/src/objtools/blast/seqdb_writer/taxid_set.cpp
+++ b/c++/src/objtools/blast/seqdb_writer/taxid_set.cpp
@@ -1,4 +1,4 @@
-/*  $Id: taxid_set.cpp 548810 2017-10-18 13:38:41Z ivanov $
+/*  $Id: taxid_set.cpp 616350 2020-09-15 12:19:05Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -40,6 +40,8 @@ BEGIN_NCBI_SCOPE
  USING_SCOPE(objects);
  #endif
  
+const TTaxId CTaxIdSet::kTaxIdNotSet = ZERO_TAX_ID;
+
  void CTaxIdSet::SetMappingFromFile(CNcbiIstream & f)
  {
      while(f && (! f.eof())) {
@@ -62,7 +64,7 @@ void CTaxIdSet::SetMappingFromFile(CNcbiIstream & f)
          }
          
          if (gi_str.size() && tx_str.size()) {
-            int taxid = NStr::StringToInt(tx_str, NStr::fAllowLeadingSpaces);
+            TTaxId taxid = NStr::StringToNumeric<TTaxId>(tx_str, NStr::fAllowLeadingSpaces);
              string key = AccessionToKey(gi_str);
              
              m_TaxIdMap[key] = taxid;
@@ -71,9 +73,9 @@ void CTaxIdSet::SetMappingFromFile(CNcbiIstream & f)
      m_Matched = (m_GlobalTaxId != kTaxIdNotSet) || m_TaxIdMap.empty();
  }
  
-int CTaxIdSet::x_SelectBestTaxid(const objects::CBlast_def_line & defline) 
+TTaxId CTaxIdSet::x_SelectBestTaxid(const objects::CBlast_def_line & defline) 
  {
-    int retval = m_GlobalTaxId;
+    TTaxId retval = m_GlobalTaxId;
  
      if (retval != kTaxIdNotSet) {
          return retval;
@@ -87,7 +89,7 @@ int CTaxIdSet::x_SelectBestTaxid(const objects::CBlast_def_line & defline)
              if (key->empty())
                  continue;
              
-            map<string, int>::const_iterator item = m_TaxIdMap.find(*key);
+            map<string, TTaxId>::const_iterator item = m_TaxIdMap.find(*key);
              
              if (item != m_TaxIdMap.end()) {
                  retval = item->second;
diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_lmdb_unit_test.cpp b/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_lmdb_unit_test.cpp

index b2d166ba057361b8d6aab0d67f604c9fde94af20..fdc399a488696ea2114bbe0b7eb1eeeb8124c8a3 100644 (file)
--- a/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_lmdb_unit_test.cpp
+++ b/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_lmdb_unit_test.cpp
@@ -72,9 +72,9 @@ BOOST_AUTO_TEST_CASE(CreateLMDBFile)
                 test_db.InsertVolumesInfo(vol_names, vol_num_oids);
  
                 CWriteDB_TaxID taxdb(tax_lmdb,100000);
-           const int taxids[5] = { 9606, 562, 0, 2, 10239 };
+           const TTaxId taxids[5] = { TAX_ID_CONST(9606), TAX_ID_CONST(562), TAX_ID_CONST(0), TAX_ID_CONST(2), TAX_ID_CONST(10239) };
                 for (int i=0; i < source_db.GetNumOIDs(); i++) {
-                       set<int> t;
+                       set<TTaxId> t;
                         for(int j=0; j < (i % 5 + 1); j++) {
                                 t.insert(taxids[j]);
                         }
@@ -122,9 +122,9 @@ BOOST_AUTO_TEST_CASE(CreateLMDBFile)
  
                 /* Test Tax Ids */
                 vector<blastdb::TOid> tax_oids;
-               set<Int4> tax_ids;
-               tax_ids.insert(10239);
-               vector<Int4> rv_tax_ids;
+               set<TTaxId> tax_ids;
+               tax_ids.insert(TAX_ID_CONST(10239));
+               vector<TTaxId> rv_tax_ids;
                 test_db.GetOidsForTaxIds(tax_ids, tax_oids, rv_tax_ids);
                 for(unsigned int i=0; i < tax_ids.size(); i++) {
                         BOOST_REQUIRE_EQUAL(tax_oids[i] % 5, 4);
@@ -146,6 +146,103 @@ BOOST_AUTO_TEST_CASE(CreateLMDBFile)
  }
  
  
+BOOST_AUTO_TEST_CASE(TestLMDBMapSize)
+{
+       const string base_name = "tmp_lmdb";
+       DeleteLMDBFiles(true, base_name);
+       const string lmdb_name = BuildLMDBFileName(base_name, true);
+       const string tax_lmdb = GetFileNameFromExistingLMDBFile(lmdb_name, ELMDBFileType::eTaxId2Offsets);
+       const int kNumVols = 4;
+       CSeqDB source_db("data/writedb_prot",CSeqDB::eProtein);
+       vector<string> vol_names;
+       vector<blastdb::TOid> vol_num_oids;
+       for(unsigned int k=0; k < kNumVols; k++) {
+               vol_names.push_back("tmp_lmdb" + NStr::IntToString(k));
+               vol_num_oids.push_back(k*1234);
+       }
+
+       {
+               CWriteDB_LMDB test_db(lmdb_name, 10);
+               for (int i=0; i < source_db.GetNumOIDs(); i++) {
+                       list< CRef<CSeq_id> >  ids = source_db.GetSeqIDs(i);
+                       test_db.InsertEntries(ids, i);
+               }
+               test_db.InsertVolumesInfo(vol_names, vol_num_oids);
+
+               CWriteDB_TaxID taxdb(tax_lmdb,10);
+           const TTaxId taxids[5] = { TAX_ID_CONST(9606), TAX_ID_CONST(562), TAX_ID_CONST(0), TAX_ID_CONST(2), TAX_ID_CONST(10239) };
+               for (int i=0; i < source_db.GetNumOIDs(); i++) {
+                       set<TTaxId> t;
+                       for(int j=0; j < (i % 5 + 1); j++) {
+                               t.insert(taxids[j]);
+                       }
+                       taxdb.InsertEntries(t, i);
+               }
+       }
+
+       {
+               vector<string> test_neg_accs;
+               CSeqDBLMDB test_db(lmdb_name);
+
+               /* Test GetOids from Seq IDs */
+               for(int i=0; i < source_db.GetNumOIDs(); i++) {
+                       vector<string> test_accs;
+                       vector<blastdb::TOid> test_oids;
+                       list< CRef<CSeq_id> >  ids = source_db.GetSeqIDs(i);
+                       CRef<CSeq_id> n_id = FindBestChoice(ids, CSeq_id::WorstRank);
+                       test_neg_accs.push_back(n_id->GetSeqIdString(false));
+                       ITERATE(list< CRef<CSeq_id> >, itr, ids) {
+                               if((*itr)->IsGi()) {
+                                       continue;
+                               }
+                               test_accs.push_back((*itr)->GetSeqIdString(true));
+                               test_accs.push_back((*itr)->GetSeqIdString(false));
+                       }
+                       test_db.GetOids(test_accs, test_oids);
+                       for(unsigned int j=0; j < test_accs.size(); j++) {
+                               BOOST_REQUIRE_EQUAL(test_oids[j], i);
+                       }
+               }
+
+               /* Test Negative Seq IDs  to OIDs */
+               vector<blastdb::TOid> neg_oids;
+               test_db.NegativeSeqIdsToOids(test_neg_accs, neg_oids);
+               BOOST_REQUIRE_EQUAL(neg_oids.size(), 65);
+
+               /* Test Vol Info */
+               vector<string> test_vol_names;
+               vector<blastdb::TOid> test_vol_num_oids;
+               test_db.GetVolumesInfo(test_vol_names, test_vol_num_oids);
+               for(unsigned int k=0; k < kNumVols; k++) {
+                       BOOST_REQUIRE_EQUAL(test_vol_num_oids[k], vol_num_oids[k]);
+                       BOOST_REQUIRE_EQUAL(test_vol_names[k], vol_names[k]);
+               }
+
+               /* Test Tax Ids */
+               vector<blastdb::TOid> tax_oids;
+               set<TTaxId> tax_ids;
+               tax_ids.insert(TAX_ID_CONST(10239));
+               vector<TTaxId> rv_tax_ids;
+               test_db.GetOidsForTaxIds(tax_ids, tax_oids, rv_tax_ids);
+               for(unsigned int i=0; i < tax_ids.size(); i++) {
+                       BOOST_REQUIRE_EQUAL(tax_oids[i] % 5, 4);
+               }
+
+               test_db.NegativeTaxIdsToOids(tax_ids, tax_oids, rv_tax_ids);
+               BOOST_REQUIRE_EQUAL(tax_oids.size(), 0);
+
+               tax_ids.clear();
+               tax_ids.insert(9606);
+               tax_ids.insert(562);
+               test_db.NegativeTaxIdsToOids(tax_ids, tax_oids, rv_tax_ids);
+               for(unsigned int i=0; i < rv_tax_ids.size(); i++) {
+                       BOOST_REQUIRE((tax_oids[i] % 5 < 2));
+               }
+
+       }
+       DeleteLMDBFiles(true, base_name);
+}
+
  
  BOOST_AUTO_TEST_SUITE_END()
  
diff --git a/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp b/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp

index 74e659bb8bf9a4cd91d436140dcc258dcf2717da..898a6a09adc51592e975b7e09f4f5d2781c0fc3a 100644 (file)
--- a/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp
+++ b/c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp
@@ -1,4 +1,4 @@
-/*  $Id: writedb_unit_test.cpp 588813 2019-07-01 12:29:54Z fongah2 $
+/*  $Id: writedb_unit_test.cpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -2537,7 +2537,7 @@ BOOST_AUTO_TEST_CASE(CBuildDatabase_WriteToInvalidPathUnix)
  
  BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomy)
  {
-    const int kTaxId(9986);
+    const TTaxId kTaxId = TAX_ID_CONST(9986);
      CTaxIdSet tis(kTaxId);
      const string kDbName("foo");
      CWriteDB blastdb(kDbName, CWriteDB::eNucleotide, kDbName);
@@ -2563,7 +2563,7 @@ BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomy)
      int total=db.GetNumSeqs();
      for (int oid=0; oid<total; oid++)
      {
-            vector<int> taxids;
+            vector<TTaxId> taxids;
              db.GetTaxIDs(oid, taxids);
              BOOST_REQUIRE(taxids.size() == 1);
              BOOST_REQUIRE_EQUAL(kTaxId, taxids.front());
@@ -2573,7 +2573,7 @@ BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomy)
  
  BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMap)
  {
-    const int kTaxId(9986);
+    const TTaxId kTaxId = TAX_ID_CONST(9986);
      CRef<CTaxIdSet> tis(new CTaxIdSet());
      const string kDbName("foo");
      CWriteDB blastdb(kDbName, CWriteDB::eNucleotide, kDbName);
@@ -2601,7 +2601,7 @@ BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMap)
      int total=db.GetNumSeqs();
      for (int oid=0; oid<total; oid++)
      {
-          vector<int> taxids;
+          vector<TTaxId> taxids;
            db.GetTaxIDs(oid, taxids);
            BOOST_REQUIRE(taxids.size() == 1);
            BOOST_REQUIRE_EQUAL(kTaxId, taxids.front());
@@ -2611,7 +2611,7 @@ BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMap)
  
  BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMapLclIds)
  {
-    const int kTaxId(382);
+    const TTaxId kTaxId = TAX_ID_CONST(382);
      CRef<CTaxIdSet> tis(new CTaxIdSet());
      const string kDbName("foo");
      CWriteDB blastdb(kDbName, CWriteDB::eProtein, kDbName);
@@ -2637,7 +2637,7 @@ BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMapLclIds)
      int total=db.GetNumSeqs();
      for (int oid=0; oid<total; oid++)
      {
-          vector<int> taxids;
+          vector<TTaxId> taxids;
            db.GetTaxIDs(oid, taxids);
            BOOST_REQUIRE(taxids.size() == 1);
            BOOST_REQUIRE_EQUAL(kTaxId, taxids.front());
@@ -3577,7 +3577,7 @@ BOOST_AUTO_TEST_CASE(LimitProteinDeflines)
         CSeqDB readdb(dbname, CSeqDB::eProtein);
         for(unsigned int i=0; i < kNumOfDeflines; i++){
                 CRef<CBlast_def_line_set> new_set = readdb.GetHdr(i);
-               set<int> t;
+               set<TTaxId> t;
                 readdb.GetAllTaxIDs(i, t);
                 BOOST_REQUIRE_EQUAL(num_taxids[i], t.size());
                 BOOST_REQUIRE_EQUAL(num_deflines[i], new_set->Set().size());
diff --git a/c++/src/objtools/blast/seqdb_writer/writedb_impl.cpp b/c++/src/objtools/blast/seqdb_writer/writedb_impl.cpp

index bb14e692602b2f3d9448abfebe87bbf34e245507..0c3abbe82877e9e6d34d6b9afbddfea5b4f446d1 100644 (file)
--- a/c++/src/objtools/blast/seqdb_writer/writedb_impl.cpp
+++ b/c++/src/objtools/blast/seqdb_writer/writedb_impl.cpp
@@ -1,4 +1,4 @@
-/*  $Id: writedb_impl.cpp 588812 2019-07-01 12:29:10Z fongah2 $
+/*  $Id: writedb_impl.cpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -583,7 +583,7 @@ CWriteDB_Impl::x_BuildDeflinesFromBioseq(const CBioseq                  & bioseq
          return;
      }
  
-    vector<int> taxids;
+    vector<TTaxId> taxids;
      string titles;
  
      // Scan the CBioseq for taxids and the title string.
@@ -616,7 +616,7 @@ CWriteDB_Impl::x_BuildDeflinesFromBioseq(const CBioseq                  & bioseq
  
                          if (oi.IsId()) {
                              //defline->SetTaxid(oi.GetId());
-                            taxids.push_back(oi.GetId());
+                            taxids.push_back(TAX_ID_FROM(CObject_id::TId, oi.GetId()));
                          }
                      }
                  }
@@ -752,7 +752,7 @@ CWriteDB_Impl::x_ExtractDeflines(CConstRef<CBioseq>             & bioseq,
                                   const vector< vector<int> >    & membbits,
                                   const vector< vector<int> >    & linkouts,
                                   int                              pig,
-                                 set<Int4>                      & tax_ids,
+                                 set<TTaxId>                    & tax_ids,
                                   int                              OID,
                                   bool                             parse_ids,
                                   bool                             long_ids,
@@ -1553,7 +1553,7 @@ CWriteDB_Impl::ExtractBioseqDeflines(const CBioseq & bs, bool parse_ids,
      CConstRef<CBlast_def_line_set> deflines;
      string binary_header;
      vector< vector<int> > v1, v2;
-    set<Int4> t;
+    set<TTaxId> t;
  
      CConstRef<CBioseq> bsref(& bs);
      x_ExtractDeflines(bsref, deflines, binary_header, v2, v2, 0, t, -1, parse_ids,
diff --git a/c++/src/objtools/blast/seqdb_writer/writedb_impl.hpp b/c++/src/objtools/blast/seqdb_writer/writedb_impl.hpp

index f62085aca09b064ae156b0adb144c77018490d9d..4012c8c971bfc5734b024d4a26383f8ac9e9d261 100644 (file)
--- a/c++/src/objtools/blast/seqdb_writer/writedb_impl.hpp
+++ b/c++/src/objtools/blast/seqdb_writer/writedb_impl.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS_WRITERS_WRITEDB__WRITEDB_IMPL_HPP
  #define OBJTOOLS_WRITERS_WRITEDB__WRITEDB_IMPL_HPP
  
-/*  $Id: writedb_impl.hpp 588812 2019-07-01 12:29:10Z fongah2 $
+/*  $Id: writedb_impl.hpp 616350 2020-09-15 12:19:05Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -530,7 +530,7 @@ private:
                                    const vector< vector<int> >    & membbits,
                                    const vector< vector<int> >    & linkouts,
                                    int                              pig,
-                                  set<Int4>                      & tax_ids,
+                                  set<TTaxId>                    & tax_ids,
                                    int                              OID=-1,
                                    bool                             parse_ids=true,
                                    bool                             long_seqid=false,
@@ -610,7 +610,7 @@ private:
      /// Binary header in format that will be written to disk.
      string m_BinHdr;
  
-    set<Int4> m_TaxIds;
+    set<TTaxId> m_TaxIds;
  
      // Volumes
  
diff --git a/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp b/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp

index 12fe387cad5912cc038f928d6cbaeada2910e4ce..98d2547b0b7252d9905013c6eaee2300c4506e0b 100644 (file)
--- a/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp
+++ b/c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp
@@ -51,7 +51,8 @@ BEGIN_NCBI_SCOPE
  CWriteDB_LMDB::CWriteDB_LMDB(const string& dbname,  Uint8 map_size, Uint8 capacity): m_Db(dbname),
                               m_Env(CBlastLMDBManager::GetInstance().GetWriteEnv(dbname, map_size)),
                               m_ListCapacity(capacity),
-                             m_MaxEntryPerTxn(DEFAULT_MAX_ENTRY_PER_TXN)
+                             m_MaxEntryPerTxn(DEFAULT_MAX_ENTRY_PER_TXN),
+                             m_TotalIdsLength(0)
  {
         m_list.reserve(m_ListCapacity);
         char* max_entry_str = getenv("MAX_LMDB_TXN_ENTRY");
@@ -72,6 +73,8 @@ CWriteDB_LMDB::~CWriteDB_LMDB()
  
  void CWriteDB_LMDB::InsertVolumesInfo(const vector<string> & vol_names, const vector<blastdb::TOid> & vol_num_oids)
  {
+       x_IncreaseEnvMapSize(vol_names, vol_num_oids);
+
      lmdb::txn txn = lmdb::txn::begin(m_Env);
      lmdb::dbi volinfo = lmdb::dbi::open(txn, blastdb::volinfo_str.c_str(), MDB_CREATE | MDB_INTEGERKEY);
      lmdb::dbi volname = lmdb::dbi::open(txn, blastdb::volname_str.c_str(), MDB_CREATE | MDB_INTEGERKEY);
@@ -90,7 +93,6 @@ void CWriteDB_LMDB::InsertVolumesInfo(const vector<string> & vol_names, const ve
         txn.commit();
  }
  
-
  int CWriteDB_LMDB::InsertEntries(const list<CRef<CSeq_id>> & seqids, const blastdb::TOid oid)
  {
      int count = 0;
@@ -193,6 +195,60 @@ void CWriteDB_LMDB::x_InsertEntry(const CRef<CSeq_id> &seqid, const blastdb::TOi
      return;
  }
  
+void CWriteDB_LMDB::x_IncreaseEnvMapSize(const vector<string> & vol_names, const vector<blastdb::TOid> & vol_num_oids)
+{
+       // 2 meta pages
+       const size_t MIN_PAGES = 3;
+       const size_t BRANCH_PAGES = 2;
+       // Each entry has 8 byte overhead + size of (key + entry)
+       size_t vol_name_size = (vol_names.front().size() + 24)* vol_names.size();
+       size_t vol_info_size = 24* vol_names.size();
+
+       MDB_env *env = m_Env.handle();
+       MDB_stat stat;
+       MDB_envinfo info;
+       lmdb::env_stat(env, &stat);
+       lmdb::env_info(env, &info);
+       size_t page_size = stat.ms_psize;
+       // For each page 16 byte header
+       size_t page_max_size = page_size -16;
+       size_t last_page_num = info.me_last_pgno;
+       size_t max_num_pages = info.me_mapsize/page_size;
+       size_t leaf_pages_needed = vol_name_size/page_max_size + vol_info_size/page_max_size + 2;
+       size_t total_pages_needed = MIN_PAGES + BRANCH_PAGES + leaf_pages_needed;
+       if( (total_pages_needed + last_page_num) > max_num_pages ) {
+               size_t newMapSize = (total_pages_needed + last_page_num) * page_size;
+               m_Env.set_mapsize(newMapSize);
+               LOG_POST(Info << "Increased lmdb mapsize to " << newMapSize);
+       }
+
+}
+
+void CWriteDB_LMDB::x_IncreaseEnvMapSize()
+{
+       size_t size = m_TotalIdsLength  + m_list.size() * 16;
+       size_t avg_id_length = m_TotalIdsLength/m_list.size();
+       MDB_env *env = m_Env.handle();
+       MDB_stat stat;
+       MDB_envinfo info;
+       lmdb::env_stat(env, &stat);
+       lmdb::env_info(env, &info);
+       size_t page_size = stat.ms_psize;
+       // 16 byte header for each page
+       size_t page_max_size = page_size -16;
+       size_t last_page_num = info.me_last_pgno;
+       size_t max_num_pages = info.me_mapsize/page_size;
+       size_t leaf_pages_needed = size/page_max_size + 1;
+       size_t dup_pages = (leaf_pages_needed > 200) ? 14: 7;
+       size_t branch_pages_needed = (avg_id_length + 16)* leaf_pages_needed/page_max_size + 1;
+       size_t total_pages_needed = leaf_pages_needed + branch_pages_needed + dup_pages;
+       if( (total_pages_needed + last_page_num) > max_num_pages) {
+               size_t newMapSize = (total_pages_needed + last_page_num) * page_size;
+               m_Env.set_mapsize(newMapSize);
+               LOG_POST(Info << "Increased lmdb mapsize to " << newMapSize);
+       }
+}
+
  void CWriteDB_LMDB::x_Split(vector<SKeyValuePair>::iterator  b, vector<SKeyValuePair>::iterator e, const unsigned int min_chunk_size)
  {
  #ifdef _OPENMP
@@ -250,6 +306,9 @@ void CWriteDB_LMDB::x_CommitTransaction()
  #else
         std::sort (m_list.begin(), m_list.end(), SKeyValuePair::cmp_key);
  #endif
+
+       x_IncreaseEnvMapSize();
+
      unsigned int j=0;
      while (j < m_list.size()){
         lmdb::txn txn = lmdb::txn::begin(m_Env);
@@ -334,6 +393,7 @@ void CWriteDB_LMDB::x_CreateOidToSeqidsLookupFile()
                         count++;
                         tmp_ids.clear();
                 }
+               m_TotalIdsLength +=m_list[i].id.size();
                 if(!m_list[i].saveToOidList) {
                         continue;
                 }
@@ -383,17 +443,17 @@ CWriteDB_TaxID::~CWriteDB_TaxID()
      CFile(m_Db+"-lock").Remove();
  }
  
-int CWriteDB_TaxID::InsertEntries(const set<Int4> & tax_ids, const blastdb::TOid oid)
+int CWriteDB_TaxID::InsertEntries(const set<TTaxId> & tax_ids, const blastdb::TOid oid)
  {
      int count = 0;
      if(tax_ids.size() == 0) {
         x_Resize();
-       SKeyValuePair<blastdb::TOid>  kv(0, oid);
+       SKeyValuePair<blastdb::TOid>  kv(ZERO_TAX_ID, oid);
         m_TaxId2OidList.push_back(kv);
         return 1;
      }
  
-    ITERATE(set<Int4>, itr, tax_ids) {
+    ITERATE(set<TTaxId>, itr, tax_ids) {
         x_Resize();
         SKeyValuePair<blastdb::TOid> kv(*itr, oid);
         m_TaxId2OidList.push_back(kv);
@@ -403,11 +463,37 @@ int CWriteDB_TaxID::InsertEntries(const set<Int4> & tax_ids, const blastdb::TOid
      return count;
  }
  
+void CWriteDB_TaxID::x_IncreaseEnvMapSize()
+{
+       const size_t MIN_PAGES = 4;
+       MDB_env *env = m_Env.handle();
+       MDB_stat stat;
+       MDB_envinfo info;
+       lmdb::env_stat(env, &stat);
+       lmdb::env_info(env, &info);
+       size_t size = m_TaxId2OffsetsList.size()*32;
+       size_t page_size = stat.ms_psize;
+       size_t page_max_size = stat.ms_psize - 16;
+       size_t last_page_num = info.me_last_pgno;
+       size_t max_num_pages = info.me_mapsize/page_size;
+       size_t leaf_pages_needed = size/page_max_size + 1;
+       size_t branch_pages_needed = 24 * leaf_pages_needed/page_max_size + 1;
+       size_t total_pages_needed = leaf_pages_needed + branch_pages_needed + MIN_PAGES;
+       if( (total_pages_needed + last_page_num) > max_num_pages) {
+               size_t newMapSize = (total_pages_needed + last_page_num) * page_size;
+               m_Env.set_mapsize(newMapSize);
+               LOG_POST(Info << "Increased lmdb mapsize to " << newMapSize);
+       }
+}
+
+
  void CWriteDB_TaxID::x_CommitTransaction()
  {
         _ASSERT(m_TaxId2OffsetsList.size());
      sort (m_TaxId2OffsetsList.begin(), m_TaxId2OffsetsList.end(), SKeyValuePair<Uint8>::cmp_key);
  
+    x_IncreaseEnvMapSize();
+
      unsigned int j=0;
      while (j < m_TaxId2OffsetsList.size()){
         lmdb::txn txn = lmdb::txn::begin(m_Env);
@@ -420,25 +506,25 @@ void CWriteDB_TaxID::x_CommitTransaction()
         }
         for(; i < j; i++){
                 Uint8 & offset = m_TaxId2OffsetsList[i].value;
-               Int4 & tax_id = m_TaxId2OffsetsList[i].tax_id;
+            TTaxId & tax_id = m_TaxId2OffsetsList[i].tax_id;
                 //cerr << m_list[i].id << endl;
                         lmdb::val value{&offset, sizeof(offset)};
                         lmdb::val key{&tax_id, sizeof(tax_id)};
                         bool rc = lmdb::dbi_put(txn, dbi.handle(), key, value, MDB_APPENDDUP);
                         if (!rc) {
-                               NCBI_THROW( CSeqDBException, eArgErr, "taxid2offset error for tax id " + tax_id);
+                               NCBI_THROW( CSeqDBException, eArgErr, "taxid2offset error for tax id " + NStr::NumericToString(tax_id));
                         }
                 }
         txn.commit();
      }
      return;
-
  }
  
-Uint4 s_WirteTaxIds(CNcbiOfstream & os, vector<Int4> & tax_ids)
+Uint4 s_WirteTaxIds(CNcbiOfstream & os, vector<TTaxId> & tax_ids)
  {
         for(unsigned int j =0; j < tax_ids.size(); j++) {
-               os.write((char *)&tax_ids[j], 4);
+        Int4 tid = TAX_ID_TO(Int4, tax_ids[j]);
+               os.write((char *)&tid, 4);
         }
         return tax_ids.size();
  }
@@ -462,7 +548,7 @@ void CWriteDB_TaxID::x_CreateOidToTaxIdsLookupFile()
         os.flush();
  
         blastdb::TOid count = 0;
-       vector<Int4> tmp_tax_ids;
+       vector<TTaxId> tmp_tax_ids;
         for(unsigned int i = 0; i < m_TaxId2OidList.size(); i++) {
                 if(i > 0 && m_TaxId2OidList[i].value != m_TaxId2OidList[i-1].value ) {
                         if((m_TaxId2OidList[i].value - m_TaxId2OidList[i-1].value) != 1) {
diff --git a/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt b/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt

index 3cb87a8611d73f41a686405b6a0fb4c0a4eded0b..8eec048429dc41ef8fecebc42d9203612e127469 100644 (file)
--- a/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt
+++ b/c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMakeLists.cleanup.lib.txt 594157 2019-09-30 18:28:48Z gouriano $
+# $Id: CMakeLists.cleanup.lib.txt 608332 2020-05-14 16:04:14Z ivanov $
  #############################################################################
  
  NCBI_begin_lib(xcleanup)
@@ -7,6 +7,7 @@ NCBI_begin_lib(xcleanup)
      autogenerated_cleanup autogenerated_extended_cleanup cleanup
      cleanup_utils gene_qual_normalization cleanup_user_object cleanup_author
      cleanup_pub newcleanupp capitalization_string fix_feature_id
+    cleanup_message
    )
    NCBI_uses_toolkit_libraries(xobjedit)
    NCBI_project_watchers(bollin kans)
diff --git a/c++/src/objtools/cleanup/Makefile.cleanup.lib b/c++/src/objtools/cleanup/Makefile.cleanup.lib

index e63390626e608a9a88192ba82767307e35e7488d..0d8299a147271072c46e32fdc6415abb1ac3914b 100644 (file)
--- a/c++/src/objtools/cleanup/Makefile.cleanup.lib
+++ b/c++/src/objtools/cleanup/Makefile.cleanup.lib
@@ -1,4 +1,4 @@
-# $Id: Makefile.cleanup.lib 581537 2019-03-01 21:27:51Z ucko $
+# $Id: Makefile.cleanup.lib 608332 2020-05-14 16:04:14Z ivanov $
  
  # Build library "xcleanup"
  ###############################
@@ -8,7 +8,8 @@ WATCHERS = bollin kans
  ASN_DEP = submit taxon3 valid
  SRC = autogenerated_cleanup autogenerated_extended_cleanup cleanup \
        cleanup_utils gene_qual_normalization cleanup_user_object cleanup_author \
-      cleanup_pub newcleanupp capitalization_string fix_feature_id
+      cleanup_pub newcleanupp capitalization_string fix_feature_id \
+      cleanup_message
  
  DLL_LIB = $(OBJEDIT_LIBS) xregexp $(PCRE_LIB)
  LIB = xcleanup
diff --git a/c++/src/objtools/cleanup/cleanup.cpp b/c++/src/objtools/cleanup/cleanup.cpp

index fb02b6879edc6d9961c70dcab42fd3200fd9b362..7b129d82b08823c5f616276c33827bf5fae52ee4 100644 (file)
--- a/c++/src/objtools/cleanup/cleanup.cpp
+++ b/c++/src/objtools/cleanup/cleanup.cpp
@@ -1,4 +1,4 @@
-/* $Id: cleanup.cpp 608035 2020-05-11 13:51:46Z ivanov $
+/* $Id: cleanup.cpp 614966 2020-08-25 16:46:33Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -66,11 +66,14 @@
  #include <objtools/edit/cds_fix.hpp>
  #include <objtools/cleanup/cleanup.hpp>
  #include "cleanup_utils.hpp"
+#include <objtools/cleanup/cleanup_message.hpp>
  
  #include <util/strsearch.hpp>
  
  #include "newcleanupp.hpp"
  
+#include <objtools/logging/listener.hpp>
+
  BEGIN_NCBI_SCOPE
  BEGIN_SCOPE(objects)
  
@@ -2637,7 +2640,20 @@ static bool s_CleanupIsShortrRNA(const CSeq_feat& f, CScope* scope) // used in f
      }
      bool is_bad = false;
      size_t len = sequence::GetLength(f.GetLocation(), scope);
-    string rrna_name = f.GetData().GetRna().GetRnaProductName();
+    const CRNA_ref& rrna = f.GetData().GetRna();
+    string rrna_name = rrna.GetRnaProductName();
+    if (rrna_name.empty()) {
+        // RNA name may still be in product GBQual
+        if (f.IsSetQual()) {
+            for (auto qit : f.GetQual()) {
+                const CGb_qual& gbq = *qit;
+                if ( gbq.IsSetQual() && gbq.GetQual() == "product" ) {
+                    rrna_name = gbq.GetVal();
+                    break;
+                }
+            }
+        }
+    }
      ITERATE (TRNALengthMap, it, kTrnaLengthMap) {
          SIZE_TYPE pos = NStr::FindNoCase(rrna_name, it->first);
          if (pos != string::npos && len < it->second.first && !(it->second.second && f.IsSetPartial() && f.GetPartial()) ) {
@@ -2764,26 +2780,12 @@ bool CCleanup::WGSCleanup(CSeq_entry_Handle entry, bool instantiate_missing_prot
  
      CTSE_Handle tse = entry.GetTSE_Handle();
  
-    for (CFeat_CI gene_it(entry, SAnnotSelector(CSeqFeatData::e_Gene)); gene_it; ++gene_it) {
-        bool change_this_gene;
-        CRef<CSeq_feat> new_gene(new CSeq_feat());
-        new_gene->Assign(*(gene_it->GetSeq_feat()));
-
-        change_this_gene = ExpandGeneToIncludeChildren(*new_gene, tse);
-
-        change_this_gene |= SetGenePartialByLongestContainedFeature(*new_gene, entry.GetScope());
-
-        if (change_this_gene) {
-            CSeq_feat_EditHandle gene_h(*gene_it);
-            gene_h.Replace(*new_gene);
-            any_changes = true;
-        }
-    }
-
      for (CFeat_CI rna_it(entry, SAnnotSelector(CSeqFeatData::e_Rna)); rna_it; ++rna_it) {
  
          const CSeq_feat& rna_feat = *(rna_it->GetSeq_feat());
-        if (rna_feat.IsSetData() && rna_feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_rRNA && !rna_feat.IsSetPartial() && s_CleanupIsShortrRNA(rna_feat, &(entry.GetScope()))) {
+        if (rna_feat.IsSetData() && 
+            rna_feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_rRNA &&
+            s_CleanupIsShortrRNA(rna_feat, &(entry.GetScope()))) {
  
              bool change_this_rrna = false;
              CRef<CSeq_feat> new_rrna(new CSeq_feat());
@@ -2818,6 +2820,22 @@ bool CCleanup::WGSCleanup(CSeq_entry_Handle entry, bool instantiate_missing_prot
         }
      }
  
+    for (CFeat_CI gene_it(entry, SAnnotSelector(CSeqFeatData::e_Gene)); gene_it; ++gene_it) {
+        bool change_this_gene;
+        CRef<CSeq_feat> new_gene(new CSeq_feat());
+        new_gene->Assign(*(gene_it->GetSeq_feat()));
+
+        change_this_gene = ExpandGeneToIncludeChildren(*new_gene, tse);
+
+        change_this_gene |= SetGenePartialByLongestContainedFeature(*new_gene, entry.GetScope());
+
+        if (change_this_gene) {
+            CSeq_feat_EditHandle gene_h(*gene_it);
+            gene_h.Replace(*new_gene);
+            any_changes = true;
+        }
+    }
+
      NormalizeDescriptorOrder(entry);
  
      for (CBioseq_CI bi(entry, CSeq_inst::eMol_na); bi; ++bi) {
@@ -3135,7 +3153,7 @@ void s_GetAuthorsString(
  
  void CCleanup::GetPubdescLabels
  (const CPubdesc& pd,
-vector<int>& pmids, vector<int>& muids, vector<int>& serials,
+vector<TEntrezId>& pmids, vector<TEntrezId>& muids, vector<int>& serials,
  vector<string>& published_labels,
  vector<string>& unpublished_labels)
  {
@@ -3208,8 +3226,8 @@ vector<CConstRef<CPub> > CCleanup::GetCitationList(CBioseq_Handle bsh)
      // first get descriptor pubs
      CSeqdesc_CI di(bsh, CSeqdesc::e_Pub);
      while (di) {
-        vector<int> pmids;
-        vector<int> muids;
+        vector<TEntrezId> pmids;
+        vector<TEntrezId> muids;
          vector<int> serials;
          vector<string> published_labels;
          vector<string> unpublished_labels;
@@ -3241,8 +3259,8 @@ vector<CConstRef<CPub> > CCleanup::GetCitationList(CBioseq_Handle bsh)
      // now get pub features
      CFeat_CI fi(bsh, SAnnotSelector(CSeqFeatData::e_Pub));
      while (fi) {
-        vector<int> pmids;
-        vector<int> muids;
+        vector<TEntrezId> pmids;
+        vector<TEntrezId> muids;
          vector<int> serials;
          vector<string> published_labels;
          vector<string> unpublished_labels;
@@ -4417,7 +4435,11 @@ bool CCleanup::ConvertDeltaSeqToRaw(CSeq_entry_Handle seh, CSeq_inst::EMol filte
  }
  
  
-bool CCleanup::ParseCodeBreak(const CSeq_feat& feat, CCdregion& cds, const string& str, CScope& scope)
+bool CCleanup::ParseCodeBreak(const CSeq_feat& feat, 
+        CCdregion& cds, 
+        const CTempString& str, 
+        CScope& scope,
+        IObjtoolsListener* pMessageListener)
  {
      if (str.empty() || !feat.IsSetLocation()) {
          return false;
@@ -4459,7 +4481,19 @@ bool CCleanup::ParseCodeBreak(const CSeq_feat& feat, CCdregion& cds, const strin
      }
  
      loc_pos = NStr::Find(str, "(pos:");
+
+    using TSubcode = CCleanupMessage::ESubcode;
+    auto postMessage =
+        [pMessageListener](string msg, TSubcode subcode) {
+            pMessageListener->PutMessage(
+                    CCleanupMessage(msg, eDiag_Error, CCleanupMessage::ECode::eCodeBreak, subcode));
+        };
+
      if (loc_pos == string::npos) {
+        if (pMessageListener) {
+            string msg = "Unable to identify code-break location in '" + str + "'";
+            postMessage(msg, TSubcode::eParseError);
+        }
          return false;
      }
      loc_pos += 5;
@@ -4485,11 +4519,26 @@ bool CCleanup::ParseCodeBreak(const CSeq_feat& feat, CCdregion& cds, const strin
      break_loc = ReadLocFromText(pos, feat_loc_seq_id, &scope);
  
      if (break_loc == NULL) {
+        if (pMessageListener) {
+            string msg = "Unable to extract code-break location from '" + str + "'";
+            postMessage(msg, TSubcode::eParseError);
+        }
          return false;
-    } else if (break_loc->IsInt() && sequence::GetLength(*break_loc, &scope) > 3) {
+    }
+
+    if (break_loc->IsInt() && sequence::GetLength(*break_loc, &scope) > 3) {
+        if (pMessageListener) {
+            string msg = "code-break location exceeds 3 bases";
+            postMessage(msg, TSubcode::eBadLocation);
+        }
          return false;
-    } else if ((break_loc->IsInt() || break_loc->IsPnt()) &&
+    } 
+    if ((break_loc->IsInt() || break_loc->IsPnt()) &&
           sequence::Compare(*break_loc, feat.GetLocation(), &scope, sequence::fCompareOverlapping) != sequence::eContained) {
+        if (pMessageListener) {
+            string msg = "code-break location lies outside of coding region";
+            postMessage(msg, TSubcode::eBadLocation);
+        }
          return false;
      }
  
diff --git a/c++/src/objtools/cleanup/cleanup_message.cpp b/c++/src/objtools/cleanup/cleanup_message.cpp

new file mode 100644 (file)

index 0000000..297bced
--- /dev/null
+++ b/c++/src/objtools/cleanup/cleanup_message.cpp
@@ -0,0 +1,48 @@
+/*  $Id: cleanup_message.cpp 608332 2020-05-14 16:04:14Z ivanov $
+ * ===========================================================================
+ *
+ *                            PUBLIC DOMAIN NOTICE
+ *               National Center for Biotechnology Information
+ *
+ *  This software/database is a "United States Government Work" under the
+ *  terms of the United States Copyright Act.  It was written as part of
+ *  the author's official duties as a United States Government employee and
+ *  thus cannot be copyrighted.  This software/database is freely available
+ *  to the public for use. The National Library of Medicine and the U.S.
+ *  Government have not placed any restriction on its use or reproduction.
+ *
+ *  Although all reasonable efforts have been taken to ensure the accuracy
+ *  and reliability of the software and data, the NLM and the U.S.
+ *  Government do not and cannot warrant the performance or results that
+ *  may be obtained by using this software or data. The NLM and the U.S.
+ *  Government disclaim all warranties, express or implied, including
+ *  warranties of performance, merchantability or fitness for any particular
+ *  purpose.
+ *
+ *  Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author:  Justin Foley
+ *
+ * File Description:
+ *   .......
+ *
+ */
+#include <ncbi_pch.hpp>
+#include <objtools/cleanup/cleanup_message.hpp>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(objects)
+
+CCleanupMessage::CCleanupMessage(string text, EDiagSev sev, ECode code, ESubcode subcode)
+    : CObjtoolsMessage(text, sev), m_Code(code), m_Subcode(subcode) {}
+
+CCleanupMessage* CCleanupMessage::Clone(void) const 
+{
+    return new CCleanupMessage(GetText(), GetSeverity(), m_Code, m_Subcode);
+}
+
+END_SCOPE(objects)
+END_NCBI_SCOPE    
+
diff --git a/c++/src/objtools/cleanup/cleanup_pub.cpp b/c++/src/objtools/cleanup/cleanup_pub.cpp

index 60fb375fe02fc58406a61e69c88fd8f090d04d6a..424d9c05d2a4c6484fe3022e3bb2d777f0759e0c 100644 (file)
--- a/c++/src/objtools/cleanup/cleanup_pub.cpp
+++ b/c++/src/objtools/cleanup/cleanup_pub.cpp
@@ -1,4 +1,4 @@
-/* $Id: cleanup_pub.cpp 591351 2019-08-14 14:26:28Z bollin $
+/* $Id: cleanup_pub.cpp 614966 2020-08-25 16:46:33Z fukanchi $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -190,8 +190,8 @@ bool CPubEquivCleaner::Clean(bool fix_initials, bool strip_serial)
  
      // we keep the last of these because we might transfer one
      // to the other as necessary to fill in gaps.
-    int last_pmid = 0;
-    int last_article_pubmed_id = 0; // the last from a journal
+    TEntrezId last_pmid = ZERO_ENTREZ_ID;
+    TEntrezId last_article_pubmed_id = ZERO_ENTREZ_ID; // the last from a journal
      CRef<CCit_art> last_article;
  
      auto& pe_set = m_Equiv.Set();
@@ -239,13 +239,13 @@ bool CPubEquivCleaner::Clean(bool fix_initials, bool strip_serial)
      }
  
      // Now, we might have to transfer data to fill in missing information
-    if (last_pmid == 0 && last_article_pubmed_id > 0) {
+    if (last_pmid == ZERO_ENTREZ_ID && last_article_pubmed_id > ZERO_ENTREZ_ID) {
          CRef<CPub> new_pub(new CPub);
          new_pub->SetPmid().Set(last_article_pubmed_id);
          m_Equiv.Set().insert(m_Equiv.Set().begin(), new_pub);
          change = true;
      }
-    else if (last_pmid > 0 && last_article_pubmed_id == 0 && last_article) {
+    else if (last_pmid > ZERO_ENTREZ_ID && last_article_pubmed_id == ZERO_ENTREZ_ID && last_article) {
          CRef<CArticleId> new_article_id(new CArticleId);
          new_article_id->SetPubmed().Set(last_pmid);
          last_article->SetIds().Set().push_back(new_article_id);
@@ -403,7 +403,7 @@ bool CCitGenCleaner::IsEmpty()
  {
      return (!m_Gen.IsSetCit()) &&
          !m_Gen.IsSetAuthors() &&
-        (!m_Gen.IsSetMuid() || m_Gen.GetMuid() <= 0) &&
+        (!m_Gen.IsSetMuid() || m_Gen.GetMuid() <= ZERO_ENTREZ_ID) &&
          !m_Gen.IsSetJournal() &&
          (!m_Gen.IsSetVolume() || m_Gen.GetVolume().empty()) &&
          (!m_Gen.IsSetIssue() || m_Gen.GetIssue().empty()) &&
@@ -411,7 +411,7 @@ bool CCitGenCleaner::IsEmpty()
          !m_Gen.IsSetDate() &&
          (!m_Gen.IsSetSerial_number() || m_Gen.GetSerial_number() <= 0) &&
          (!m_Gen.IsSetTitle() || m_Gen.GetTitle().empty()) &&
-        (!m_Gen.IsSetPmid() || m_Gen.GetPmid() <= 0);
+        (!m_Gen.IsSetPmid() || m_Gen.GetPmid().Get() <= ZERO_ENTREZ_ID);
  }
  
  
diff --git a/c++/src/objtools/cleanup/cleanup_utils.hpp b/c++/src/objtools/cleanup/cleanup_utils.hpp

index d5d36e30498cfc97a5465a72c904417d6e3bd53f..af4b38e01b353253636e77e743e69513186fe142 100644 (file)
--- a/c++/src/objtools/cleanup/cleanup_utils.hpp
+++ b/c++/src/objtools/cleanup/cleanup_utils.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJECTS_GENERAL___CLEANUP_UTILS__HPP
  #define OBJECTS_GENERAL___CLEANUP_UTILS__HPP
  
-/* $Id: cleanup_utils.hpp 581496 2019-03-01 16:42:04Z bollin $
+/* $Id: cleanup_utils.hpp 613129 2020-08-03 12:12:19Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -379,7 +379,7 @@ if ((o).IsSet##x()) { \
  CRef<CSeq_loc> ReadLocFromText(const string& text, const CSeq_id *id, CScope *scope);
  
  // for finding the correct amino acid letter given an abbreviation
-char ValidAminoAcid (const string& abbrev);
+char NCBI_CLEANUP_EXPORT ValidAminoAcid (const string& abbrev);
  
  // for sorting and uniquing dbtags
  bool s_DbtagCompare (const CRef<CDbtag>& dbt1, const CRef<CDbtag>& dbt2);
diff --git a/c++/src/objtools/cleanup/newcleanupp.cpp b/c++/src/objtools/cleanup/newcleanupp.cpp

index 653e9683595c65bc7825743eaa4cd59c79be1650..1b1f427ebce417247984d6e9623a84c5d4ec0f9f 100644 (file)
--- a/c++/src/objtools/cleanup/newcleanupp.cpp
+++ b/c++/src/objtools/cleanup/newcleanupp.cpp
@@ -5972,6 +5972,10 @@ void CNewCleanup_imp::Except_textBC (
  
  {
      if (NStr::Find (except_text, "ribosome slippage") == NPOS &&
+        NStr::Find (except_text, "ribosome-slippage") == NPOS &&
+        NStr::Find (except_text, "ribosome_slippage") == NPOS &&
+        NStr::Find (except_text, "ribosomal-slippage") == NPOS &&
+        NStr::Find (except_text, "ribosomal_slippage") == NPOS &&
          NStr::Find (except_text, "trans splicing") == NPOS &&
          NStr::Find (except_text, "trans_splicing") == NPOS &&
          NStr::Find (except_text, "alternate processing") == NPOS &&
@@ -5991,7 +5995,8 @@ void CNewCleanup_imp::Except_textBC (
              ChangeMade (CCleanupChange::eTrimSpaces);
          }
          if (! text.empty()) {
-            if (text == "ribosome slippage") {
+            if (text == "ribosome slippage" || text == "ribosome-slippage" || text == "ribosome_slippage" ||
+                text == "ribosomal-slippage" || text == "ribosomal_slippage") {
                  text = "ribosomal slippage";
                  ChangeMade (CCleanupChange::eChangeException);
              } else if (text == "trans splicing" || text == "trans_splicing") {
@@ -8540,12 +8545,12 @@ void CNewCleanup_imp::x_PostProcessing(void)
      if( ! m_MuidPubContainer.empty() ) {
          NON_CONST_ITERATE( TMuidPubContainer, pub_iter, m_MuidPubContainer ) {
              CPub &pub = **pub_iter;
-            const int muid = pub.GetMuid();
+            const TEntrezId muid = pub.GetMuid();
              
              // attempt to find that muid in the muid-to-pmid mapping created earlier
-            TMuidToPmidMap::const_iterator map_iter = m_MuidToPmidMap.find(muid);
+            TMuidToPmidMap::const_iterator map_iter = m_MuidToPmidMap.find(ENTREZ_ID_TO(int, muid));
              if( map_iter != m_MuidToPmidMap.end() ) {
-                const int pmid = map_iter->second;
+                const TEntrezId pmid = ENTREZ_ID_FROM(int, map_iter->second);
                  pub.SetPmid().Set(pmid);
                  ChangeMade(CCleanupChange::eChangePublication);
              }
@@ -8686,16 +8691,16 @@ void CNewCleanup_imp::x_NotePubdescOrAnnotPubs(
  }
  
  void CNewCleanup_imp::x_NotePubdescOrAnnotPubs_RecursionHelper(
-    const CPub_equiv &pub_equiv, int &muid, int &pmid ) 
+    const CPub_equiv &pub_equiv, int &muid, int &pmid )
  {
      FOR_EACH_PUB_ON_PUBEQUIV(pub_iter, pub_equiv) {
          const CPub &pub = **pub_iter;
          switch( pub.Which() ) {
          case NCBI_PUB(Muid):
-            muid = pub.GetMuid();
+            muid = ENTREZ_ID_TO(int, pub.GetMuid());
              break;
          case NCBI_PUB(Pmid):
-            pmid = pub.GetPmid().Get();
+            pmid = ENTREZ_ID_TO(int, pub.GetPmid().Get());
              break;
          case NCBI_PUB(Gen): 
              {
@@ -10083,9 +10088,9 @@ bool CNewCleanup_imp::x_IsPubContentBad(const CPub& pub)
  {
      if (pub.IsGen() && IsMinimal(pub.GetGen())) {
          return true;
-    } else if (pub.IsMuid() && pub.GetMuid() == 0) {
+    } else if (pub.IsMuid() && pub.GetMuid() == ZERO_ENTREZ_ID) {
          return true;
-    } else if (pub.IsPmid() && pub.GetPmid() == 0) {
+    } else if (pub.IsPmid() && pub.GetPmid() == ZERO_ENTREZ_ID) {
          return true;
      } else if (pub.IsPat_id() && x_IsPubContentBad(pub.GetPat_id())) {
          return true;
diff --git a/c++/src/objtools/data_loaders/blastdb/bdbloader.cpp b/c++/src/objtools/data_loaders/blastdb/bdbloader.cpp

index 00e58d4e86c63c3c4ccb39a20cccd011c8bde6e1..c19f215dd1c864e5b480ad345f9de9b2b800d51a 100644 (file)
--- a/c++/src/objtools/data_loaders/blastdb/bdbloader.cpp
+++ b/c++/src/objtools/data_loaders/blastdb/bdbloader.cpp
@@ -1,4 +1,4 @@
-/*  $Id: bdbloader.cpp 500404 2016-05-04 14:59:01Z camacho $
+/*  $Id: bdbloader.cpp 612733 2020-07-27 11:38:27Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -167,6 +167,7 @@ CBlastDbDataLoader::CBlastDbDataLoader(const string        & loader_name,
        m_DBName              (param.m_DbName),
        m_DBType              (param.m_DbType),
        m_BlastDb             (0),
+      m_Ids                 (1000),
        m_UseFixedSizeSlices  (param.m_UseFixedSizeSlices)
  {
      if (param.m_BlastDbHandle.NotEmpty()) {
diff --git a/c++/src/objtools/data_loaders/genbank/reader.cpp b/c++/src/objtools/data_loaders/genbank/reader.cpp

index 9398fa041ad765a2fc43ed596fabaeaf440757d2..f399331c4c99d6b3f18ffc4703193dab7fd3c42e 100644 (file)
--- a/c++/src/objtools/data_loaders/genbank/reader.cpp
+++ b/c++/src/objtools/data_loaders/genbank/reader.cpp
@@ -1,4 +1,4 @@
-/*  $Id: reader.cpp 578792 2019-01-25 16:39:00Z vasilche $
+/*  $Id: reader.cpp 610682 2020-06-22 17:47:10Z ivanov $
   * ===========================================================================
   *                            PUBLIC DOMAIN NOTICE
   *               National Center for Biotechnology Information
@@ -77,6 +77,7 @@ CReader::CDebugPrinter::~CDebugPrinter()
  }
  
  
+#define DEFAULT_PREOPEN false
  #define DEFAULT_RETRY_COUNT 5
  #define DEFAULT_WAIT_TIME_ERRORS 2
  #define DEFAULT_WAIT_TIME 1
@@ -110,7 +111,7 @@ static CIncreasingTime::SAllParams s_WaitTimeParams = {
  CReader::CReader(void)
      : m_Dispatcher(0),
        m_MaxConnections(0),
-      m_PreopenConnection(true),
+      m_PreopenConnection(DEFAULT_PREOPEN),
        m_NextNewConnection(0),
        m_NumFreeConnections(0, 1000),
        m_MaximumRetryCount(3),
@@ -140,7 +141,7 @@ void CReader::InitParams(CConfig& conf,
          conf.GetBool(driver_name,
                       NCBI_GBLOADER_READER_PARAM_PREOPEN,
                       CConfig::eErr_NoThrow,
-                     true);
+                     DEFAULT_PREOPEN);
      SetPreopenConnection(open_initial_connection);
      m_WaitTimeErrors =
          conf.GetInt(driver_name,
diff --git a/c++/src/objtools/edit/CMakeLists.edit.lib.txt b/c++/src/objtools/edit/CMakeLists.edit.lib.txt

index 30578d648f6e56c544930b72a9216ed719d7973a..51165d0b4251b179c920ad407ee1cd39413c6ab7 100644 (file)
--- a/c++/src/objtools/edit/CMakeLists.edit.lib.txt
+++ b/c++/src/objtools/edit/CMakeLists.edit.lib.txt
@@ -1,5 +1,5 @@
  #############################################################################
-# $Id: CMakeLists.edit.lib.txt 594157 2019-09-30 18:28:48Z gouriano $
+# $Id: CMakeLists.edit.lib.txt 615131 2020-08-27 17:51:01Z fukanchi $
  #############################################################################
  
  NCBI_begin_lib(xobjedit)
@@ -13,7 +13,7 @@ NCBI_begin_lib(xobjedit)
      external_annots feature_propagate text_object_description
      seq_edit
    )
-  NCBI_uses_toolkit_libraries(mlacli taxon3 valid xobjread xobjutil)
+  NCBI_uses_toolkit_libraries(mlacli taxon3 valid xobjread xobjutil xlogging)
    NCBI_project_watchers(bollin gotvyans foleyjp)
  NCBI_end_lib()
  
diff --git a/c++/src/objtools/edit/feattable_edit.cpp b/c++/src/objtools/edit/feattable_edit.cpp

index 737655e369a9f5c0cad3792c1c4cb580085b3947..561a0dc5ff8feced8d57b955c52584c2e69d429e 100644 (file)
--- a/c++/src/objtools/edit/feattable_edit.cpp
+++ b/c++/src/objtools/edit/feattable_edit.cpp
@@ -1,4 +1,4 @@
-/*  $Id: feattable_edit.cpp 594944 2019-10-11 12:07:51Z ludwigf $
+/*  $Id: feattable_edit.cpp 612522 2020-07-23 11:23:26Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -38,6 +38,8 @@
  #include <objects/seqfeat/Feat_id.hpp>
  #include <objects/seqfeat/Gb_qual.hpp>
  #include <objects/seqfeat/SeqFeatXref.hpp>
+#include <objects/seqfeat/Genetic_code_table.hpp>
+#include <objects/seqfeat/Trna_ext.hpp>
  
  #include <objmgr/object_manager.hpp>
  #include <objmgr/scope.hpp>
@@ -355,7 +357,75 @@ void CFeatTableEdit::EliminateBadQualifiers()
      }
  }
  
+//  ----------------------------------------------------------------------------
+void CFeatTableEdit::ProcessCodonRecognized()
+//  ----------------------------------------------------------------------------
+{
+    static map<char, list<char>> sIUPACmap {
+        {'A', list<char>({'A'})},
+        {'G', list<char>({'G'})},
+        {'C', list<char>({'C'})},
+        {'T', list<char>({'T'})},
+        {'U', list<char>({'U'})},
+        {'M', list<char>({'A', 'C'})},
+        {'R', list<char>({'A', 'G'})},
+        {'W', list<char>({'A', 'T'})},
+        {'S', list<char>({'C', 'G'})},
+        {'Y', list<char>({'C', 'T'})},
+        {'K', list<char>({'G', 'T'})},
+        {'V', list<char>({'A', 'C', 'G'})},
+        {'H', list<char>({'A', 'C', 'T'})},
+        {'D', list<char>({'A', 'G', 'T'})},
+        {'B', list<char>({'C', 'G', 'T'})},
+        {'N', list<char>({'A', 'C', 'G', 'T'})}
+    };
  
+    SAnnotSelector sel;
+    sel.IncludeFeatSubtype(CSeqFeatData::eSubtype_tRNA);
+    CFeat_CI it(mHandle, sel);
+    for (; it; ++it) {
+        CMappedFeat mf = *it;
+        auto codonRecognized = mf.GetNamedQual("codon_recognized");
+        if (codonRecognized.empty()) {
+            continue;
+        }
+        if (codonRecognized.size() != 3) {
+            xPutErrorBadCodonRecognized(codonRecognized);
+            return;
+        }
+        NStr::ToUpper(codonRecognized);
+
+        const CSeq_feat& origFeat = mf.GetOriginalFeature();
+
+        CRef<CSeq_feat> pEditedFeat(new CSeq_feat);
+        pEditedFeat->Assign(origFeat);
+        CRNA_ref::C_Ext::TTRNA & extTrna = pEditedFeat->SetData().SetRna().SetExt().SetTRNA();
+
+        set<int> codons;
+        try {
+            for (char char1 : sIUPACmap.at(codonRecognized[0])) {
+                for (char char2 : sIUPACmap.at(codonRecognized[1])) {
+                    for (char char3 : sIUPACmap.at(codonRecognized[2])) {
+                        const auto codonIndex = CGen_code_table::CodonToIndex(char1, char2, char3);
+                        codons.insert(codonIndex);
+                    }
+                }
+            }
+        }
+        catch(CException&) {
+            xPutErrorBadCodonRecognized(codonRecognized);
+            return;
+        }
+        if (!codons.empty()) {
+            for (const auto codonIndex : codons) {
+                extTrna.SetCodon().push_back(codonIndex);
+            }
+            CSeq_feat_EditHandle feh(mpScope->GetObjectHandle(origFeat));
+            feh.Replace(*pEditedFeat);
+            feh.RemoveQualifier("codon_recognized");
+        }
+    }
+}
  
  // ---------------------------------------------------------------------------
  void CFeatTableEdit::GenerateProteinAndTranscriptIds()
@@ -1580,6 +1650,20 @@ CFeatTableEdit::xPutErrorMissingTranscriptId(
      xPutError(message);
  }
  
+//  ----------------------------------------------------------------------------
+void
+CFeatTableEdit::xPutErrorBadCodonRecognized(
+    const string codonRecognized)
+//  ----------------------------------------------------------------------------
+{
+    if (!mpMessageListener) {
+        return;
+    }
+    string message = "tRNA with bad codon recognized attribute \"" +
+        codonRecognized + "\".";
+    xPutError(message);
+}
+
  //  ----------------------------------------------------------------------------
  void
  CFeatTableEdit::xPutErrorMissingProteinId(
@@ -1606,7 +1690,7 @@ CFeatTableEdit::xPutErrorMissingProteinId(
  void
  CFeatTableEdit::xPutErrorDifferingProteinIds(
      const CMappedFeat& mrna)
-    //  ----------------------------------------------------------------------------
+//  ----------------------------------------------------------------------------
  {
      if (!mpMessageListener) {
          return;
@@ -1624,7 +1708,7 @@ CFeatTableEdit::xPutErrorDifferingProteinIds(
  void
  CFeatTableEdit::xPutErrorDifferingTranscriptIds(
      const CMappedFeat& mrna)
-    //  ----------------------------------------------------------------------------
+//  ----------------------------------------------------------------------------
  {
      if (!mpMessageListener) {
          return;
diff --git a/c++/src/objtools/edit/loc_edit.cpp b/c++/src/objtools/edit/loc_edit.cpp

index 7a9e26b647f6e37b68cd815fd4ff5d11ae8c4d45..a0ff2cdc2ff2413894683dc322fddb825809ca21 100644 (file)
--- a/c++/src/objtools/edit/loc_edit.cpp
+++ b/c++/src/objtools/edit/loc_edit.cpp
@@ -1,4 +1,4 @@
-/*  $Id: loc_edit.cpp 601240 2020-02-04 16:06:49Z ludwigf $
+/*  $Id: loc_edit.cpp 609624 2020-06-04 15:45:32Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -2608,8 +2608,10 @@ bool ExtendPartialFeatureEnds(CBioseq_Handle bsh)
              CRef<CSeq_feat> new_cds(new CSeq_feat());
              new_cds->Assign(*(f->GetOriginalSeq_feat()));
  
-            if (AdjustFeatureEnd5(*new_cds, related_features, bsh.GetScope()) ||
-                AdjustFeatureEnd3(*new_cds, related_features, bsh.GetScope())) {
+            const bool adjusted_5prime = AdjustFeatureEnd5(*new_cds, related_features, bsh.GetScope());
+            const bool adjusted_3prime = AdjustFeatureEnd3(*new_cds, related_features, bsh.GetScope());
+
+            if (adjusted_5prime || adjusted_3prime) {
                  feature::RetranslateCDS(*new_cds, bsh.GetScope());
                  CSeq_feat_EditHandle feh(*f);
                  feh.Replace(*new_cds);
diff --git a/c++/src/objtools/edit/remote_updater.cpp b/c++/src/objtools/edit/remote_updater.cpp

index 91b2e56c8879f0c6df0e48c66606f1a2be3b6a8b..7fc406f687972b2a4da8c49697132e5bd7643a8a 100644 (file)
--- a/c++/src/objtools/edit/remote_updater.cpp
+++ b/c++/src/objtools/edit/remote_updater.cpp
@@ -1,4 +1,4 @@
-/*  $Id: remote_updater.cpp 605109 2020-04-07 11:01:53Z ivanov $
+/*  $Id: remote_updater.cpp 614634 2020-08-20 13:02:41Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -57,6 +57,8 @@
  #include <objects/general/Name_std.hpp>
  
  #include <objtools/edit/remote_updater.hpp>
+#include <objtools/edit/edit_error.hpp>
+#include <objtools/logging/listener.hpp>
  
  #include <common/test_assert.h>  /* This header must go last */
  
@@ -69,7 +71,7 @@ DEFINE_CLASS_STATIC_MUTEX(CRemoteUpdater::m_static_mutex);
  namespace
  {
  
-int FindPMID(CMLAClient& mlaClient, const CPub_equiv::Tdata& arr)
+TEntrezId FindPMID(const list<CRef<CPub>>& arr)
  {
      for (auto pPub : arr) {
          if (pPub->IsPmid()) {
@@ -77,36 +79,93 @@ int FindPMID(CMLAClient& mlaClient, const CPub_equiv::Tdata& arr)
          }
  
      }
-    return 0;
+    return ZERO_ENTREZ_ID;
  }
  
-// the method is not used at the momment
-void CreatePubPMID(CMLAClient& mlaClient, CPub_equiv::Tdata& arr, int id)
+
+static bool s_IsConnectionFailure(EError_val mlaErrorVal) {
+    switch(mlaErrorVal) {
+    case eError_val_cannot_connect_pmdb:
+    case eError_val_cannot_connect_searchbackend_pmdb:
+        return true;
+    default:
+        break;
+    }
+    return false;
+}
+
+
+CRef<CPub> s_GetPubFrompmid(CMLAClient& mlaClient, TEntrezId id, int maxAttempts, IObjtoolsListener* pMessageListener)
  {
-    try {
-        CPubMedId req(id);
-        CRef<CPub> new_pub = mlaClient.AskGetpubpmid(req);
-        if (new_pub.NotEmpty())
-        {
-            // authors come back in a weird format that we need
-            // to convert to ISO
-            if (new_pub->IsSetAuthors())
-               CRemoteUpdater::ConvertToStandardAuthors((CAuth_list&)new_pub->GetAuthors());
+    CRef<CPub> result;
+    CPubMedId  request(id);
+    CMLAClient::TReply reply;
+
+    int maxCount = max(1, maxAttempts);
+    for (int count=0; count<maxCount; ++count) { 
+        try {
+            result = mlaClient.AskGetpubpmid(request, &reply);
+            return result;
+        }
+        catch(CException&) {
+            auto errorVal = reply.GetError();
+            auto isConnectionError = s_IsConnectionFailure(errorVal);
+            if (isConnectionError && count<maxCount-1) {
+                continue;
+            }
  
-            arr.clear();
-            CRef<CPub> new_pmid(new CPub);
-            new_pmid->SetPmid().Set(id);
-            arr.push_back(new_pmid);
-            arr.push_back(new_pub);
+            CNcbiOstrstream oss;
+            oss << "Failed to retrieve publication for PMID " 
+                << id 
+                << ". ";
+            if (isConnectionError) {
+                oss << count+1 << " attempts made. ";
+            }
+            oss << "CMLAClient : " 
+                << errorVal;
+            string msg = CNcbiOstrstreamToString(oss);
+            if (pMessageListener) {
+                pMessageListener->PutMessage(CObjEditMessage(msg, eDiag_Error));
+                break;
+            }
+            else {
+                NCBI_THROW(CException, eUnknown, msg);
+            }
          }
-    } catch(...) {
-        // don't worry if we can't look it up
      }
-
+    return result;
  }
  
  }// end anonymous namespace
  
+bool CRemoteUpdater::xUpdatePubPMID(list<CRef<CPub>>& arr, TEntrezId id)
+{
+    CMLAClient::TReply reply;
+    auto new_pub = 
+        s_GetPubFrompmid(*m_mlaClient, id, m_MaxMlaAttempts, m_pMessageListener);
+    if (!new_pub) {
+        return false;
+    }
+
+    // authors come back in a weird format that we need
+    // to convert to ISO
+    if (new_pub->IsSetAuthors())
+        CRemoteUpdater::ConvertToStandardAuthors((CAuth_list&)new_pub->GetAuthors());
+
+    arr.clear();
+    CRef<CPub> new_pmid(new CPub);
+    new_pmid->SetPmid().Set(id);
+    arr.push_back(new_pmid);
+    arr.push_back(new_pub);
+    return true;
+}
+
+
+void CRemoteUpdater::SetMaxMlaAttempts(int maxAttempts) 
+{
+    m_MaxMlaAttempts = maxAttempts;
+}
+
  
  class CCachedTaxon3_impl
  {
@@ -131,6 +190,30 @@ public:
          }
      }
  
+    CRef<COrg_ref> GetOrg(const COrg_ref& org, IObjtoolsListener* pMessageListener=nullptr)
+    {
+        CRef<COrg_ref> result;
+        CRef<CT3Reply> reply = GetOrgReply(org);
+        if (reply->IsError() && pMessageListener)
+        {
+            const string& error_message = 
+                "Taxon update: " +
+                (org.IsSetTaxname() ? org.GetTaxname() : NStr::NumericToString(org.GetTaxId())) + ": " +
+                reply->GetError().GetMessage();
+
+            pMessageListener->PutMessage(
+                    CObjEditMessage(error_message, eDiag_Error));
+        
+        }
+        else
+        if (reply->IsData() && reply->SetData().IsSetOrg())
+        {
+            result.Reset(&reply->SetData().SetOrg());
+        }
+        return result;
+    }
+
+
      CRef<COrg_ref> GetOrg(const COrg_ref& org, CRemoteUpdater::FLogger f_logger)
      {
          CRef<COrg_ref> result;
@@ -141,14 +224,8 @@ public:
                  "Taxon update: " +
                  (org.IsSetTaxname() ? org.GetTaxname() : NStr::IntToString(org.GetTaxId())) + ": " +
                  reply->GetError().GetMessage();
-
-/*
-            logger->PutError(*auto_ptr<CLineError>(
-                CLineError::Create(ILineError::eProblem_Unset, eDiag_Warning, "", 0,
-                string("Taxon update: ") + 
-                (org.IsSetTaxname() ? org.GetTaxname() : NStr::IntToString(org.GetTaxId())) + ": " +
-                reply->GetError().GetMessage())));
-            */
+        
+            f_logger(error_message);
          }
          else
          if (reply->IsData() && reply->SetData().IsSetOrg())
@@ -218,7 +295,7 @@ protected:
      auto_ptr<CCachedReplyMap> m_cache;
  };
  
-void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeqdesc& obj)
+void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, CSeqdesc& obj)
  {
      if (obj.IsOrg())
      {
@@ -232,7 +309,8 @@ void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeqdesc& obj)
  }
  
  void CRemoteUpdater::xUpdateOrgTaxname(FLogger logger, COrg_ref& org)
-{
+{ // remove after the deprecated UpdateOrgFromTaxon(FLogger, CSeqdes&) 
+  // has been removed.
      CMutexGuard guard(m_Mutex);
  
      int taxid = org.GetTaxId();
@@ -252,6 +330,42 @@ void CRemoteUpdater::xUpdateOrgTaxname(FLogger logger, COrg_ref& org)
      }
  }
  
+void CRemoteUpdater::UpdateOrgFromTaxon(CSeqdesc& desc)
+{
+    if (desc.IsOrg())
+    {
+        xUpdateOrgTaxname(desc.SetOrg());
+    }
+    else
+    if (desc.IsSource() && desc.GetSource().IsSetOrg())
+    {
+        xUpdateOrgTaxname(desc.SetSource().SetOrg());
+    }
+}
+
+
+void CRemoteUpdater::xUpdateOrgTaxname(COrg_ref& org)
+{
+    CMutexGuard guard(m_Mutex);
+
+    TTaxId taxid = org.GetTaxId();
+    if (taxid == ZERO_TAX_ID && !org.IsSetTaxname())
+        return;
+
+    if (m_taxClient.get() == 0)
+    {
+        m_taxClient.reset(new CCachedTaxon3_impl);
+        m_taxClient->Init();
+    }
+        
+    CRef<COrg_ref> new_org = m_taxClient->GetOrg(org, m_pMessageListener);
+    if (new_org.NotEmpty())
+    {
+        org.Assign(*new_org);
+    }
+}
+
+
  CRemoteUpdater& CRemoteUpdater::GetInstance()
  {
      CMutexGuard guard(m_static_mutex);
@@ -261,6 +375,12 @@ CRemoteUpdater& CRemoteUpdater::GetInstance()
      return instance;
  }
  
+CRemoteUpdater::CRemoteUpdater(IObjtoolsListener* pMessageListener) :
+    m_pMessageListener(pMessageListener)
+{
+}
+
+
  CRemoteUpdater::CRemoteUpdater(bool enable_caching)
      :m_enable_caching(enable_caching)
  {
@@ -280,7 +400,7 @@ void CRemoteUpdater::ClearCache()
      }
  }
  
-void CRemoteUpdater::UpdatePubReferences(objects::CSeq_entry_EditHandle& obj)
+void CRemoteUpdater::UpdatePubReferences(CSeq_entry_EditHandle& obj)
  {
      for (CBioseq_CI it(obj); it; ++it)
      {
@@ -336,61 +456,59 @@ void CRemoteUpdater::xUpdatePubReferences(CSeq_entry& entry)
      xUpdatePubReferences(entry.SetDescr());
  }
  
-void CRemoteUpdater::xUpdatePubReferences(objects::CSeq_descr& seq_descr)
+
+
+void CRemoteUpdater::xUpdatePubReferences(CSeq_descr& seq_descr)
  {
      CMutexGuard guard(m_Mutex);
  
-    CSeq_descr::Tdata& descr = seq_descr.Set();
-    size_t count = descr.size();
-    CSeq_descr::Tdata::iterator it = descr.begin();
-
-    for (size_t i=0; i<count; ++it,  ++i)
-    {
-        if (! ( (**it).IsPub() && (**it).GetPub().IsSetPub() ) )
+    for (auto pDesc : seq_descr.Set()) {
+        if (!pDesc->IsPub() || !pDesc->GetPub().IsSetPub()) {
              continue;
+        }   
  
-        CPub_equiv::Tdata& arr = (**it).SetPub().SetPub().Set();
-        if (m_mlaClient.Empty())
-            m_mlaClient.Reset(new CMLAClient);
+        auto& arr = pDesc->SetPub().SetPub().Set();
+        if (m_mlaClient.Empty()) 
+            m_mlaClient.Reset(new CMLAClient());
  
-        int id = FindPMID(*m_mlaClient, arr);
-        if (id>0)
-        {
-            CreatePubPMID(*m_mlaClient, arr, id);
+        auto id = FindPMID(arr);
+        if (id>ZERO_ENTREZ_ID) {
+            xUpdatePubPMID(arr, id);
+            continue;
          }
-        else
-        // nothing was found
-        NON_CONST_ITERATE(CPub_equiv::Tdata, item_it, arr)
-        {
-            if ((**item_it).IsArticle())
-            try
-            {
-                id = m_mlaClient->AskCitmatchpmid(**item_it);
-                if (id>0)
+
+        for (auto pPubEquiv : arr) {
+            if (pPubEquiv->IsArticle()) {
+                CMLAClient::TReply reply;
+                try {
+                    id = ENTREZ_ID_FROM(int, m_mlaClient->AskCitmatchpmid(*pPubEquiv, &reply));
+                }
+                catch(CException& e) 
                  {
-                    CreatePubPMID(*m_mlaClient, arr, id);
+                    continue;
+                }
+                if (id>ZERO_ENTREZ_ID &&
+                    xUpdatePubPMID(arr,id)) {
                      break;
                  }
              }
-            catch(CException& /*ex*/)
-            {
-            }
          }
      }
  }
  
+
  namespace
  {
      typedef set<CRef< CSeqdesc >* > TOwnerSet;
      typedef struct { TOwnerSet owner; CRef<COrg_ref> org_ref; } TOwner;
      typedef map<string, TOwner > TOrgMap;
-    void _UpdateOrgFromTaxon(CRemoteUpdater::FLogger logger, objects::CSeq_entry& entry, TOrgMap& m)
+    void _UpdateOrgFromTaxon(CSeq_entry& entry, TOrgMap& m)
      {
          if (entry.IsSet())
          {
              NON_CONST_ITERATE(CSeq_entry::TSet::TSeq_set, it, entry.SetSet().SetSeq_set())
              {
-                _UpdateOrgFromTaxon(logger, **it, m);
+                _UpdateOrgFromTaxon(**it, m);
              }
          }
  
@@ -404,13 +522,11 @@ namespace
              CRef<COrg_ref> org_ref;
              if (desc.IsOrg())
              {
-                //xUpdateOrgTaxname(logger, desc.SetOrg());
                  org_ref.Reset(&desc.SetOrg());
              }
              else
              if (desc.IsSource() && desc.GetSource().IsSetOrg())
              {
-                //xUpdateOrgTaxname(logger, desc.SetSource().SetOrg());
                  org_ref.Reset(&desc.SetSource().SetOrg());
              }
              if (org_ref)
@@ -442,11 +558,12 @@ namespace
          }
      }
  }
-void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeq_entry& entry)
+
+void CRemoteUpdater::UpdateOrgFromTaxon(CSeq_entry& entry)
  {   
      TOrgMap org_to_update;
  
-    _UpdateOrgFromTaxon(logger, entry, org_to_update); 
+    _UpdateOrgFromTaxon(entry, org_to_update); 
      if (org_to_update.empty())
          return;
  
@@ -475,7 +592,14 @@ void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeq_entry& ent
      }
  }
  
-void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeq_entry_EditHandle& obj)
+void CRemoteUpdater::UpdateOrgFromTaxon(FLogger /*logger*/, CSeq_entry& entry)
+{
+    // this method is deprecated. 
+    // until we remove it, it simply calls the non-deprecated method
+    UpdateOrgFromTaxon(entry);   
+}
+
+void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, CSeq_entry_EditHandle& obj)
  {
      for (CBioseq_CI bioseq_it(obj); bioseq_it; ++bioseq_it)
      {
@@ -486,6 +610,8 @@ void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeq_entry_Edit
      }
  }
  
+
+
  namespace
  {
  bool s_IsAllCaps(const string& str)
@@ -663,7 +789,10 @@ void CRemoteUpdater::PostProcessPubs(CSeq_entry_EditHandle& obj)
              PostProcessPubs((CPubdesc&)desc_it->GetPub());
          }
      }
-   
+}
+
+void CRemoteUpdater::SetMLAClient(CMLAClient& mlaClient) {
+    m_mlaClient.Reset(&mlaClient);
  }
  
  END_SCOPE(edit)
diff --git a/c++/src/objtools/format/context.cpp b/c++/src/objtools/format/context.cpp

index d1346af11ac832e53cc188ee95bd9bd1723b941c..ac96ff35a60b0e72aa1ecc65f349f4971f0ab451 100644 (file)
--- a/c++/src/objtools/format/context.cpp
+++ b/c++/src/objtools/format/context.cpp
@@ -1,4 +1,4 @@
-/*  $Id: context.cpp 602293 2020-02-20 18:24:39Z kans $
+/*  $Id: context.cpp 608545 2020-05-18 19:35:41Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -688,7 +688,8 @@ void CBioseqContext::x_CheckForShowComments() const
      }
  
      // JIRA SQD-4444 : copy annot selector from the one saved in this context structure
-    SAnnotSelector sel = m_FFCtx.SetAnnotSelector();
+    // SAnnotSelector sel = m_FFCtx.SetAnnotSelector();
+    SAnnotSelector sel;
      sel.SetAnnotType(CSeq_annot::TData::e_Ftable);
      CAnnot_CI annot_ci(m_Handle, sel);
      for( ; annot_ci; ++annot_ci ) {
diff --git a/c++/src/objtools/format/dbsource_item.cpp b/c++/src/objtools/format/dbsource_item.cpp

index 2fa2903dc893bf84865061942aa8f77a97aee98b..59be61e4194c54a33398d520038b4bdecd836a68 100644 (file)
--- a/c++/src/objtools/format/dbsource_item.cpp
+++ b/c++/src/objtools/format/dbsource_item.cpp
@@ -1,4 +1,4 @@
-/*  $Id: dbsource_item.cpp 577454 2019-01-03 22:58:25Z kans $
+/*  $Id: dbsource_item.cpp 614611 2020-08-20 12:59:34Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -628,7 +628,7 @@ string CDBSourceItem::x_FormatDBSourceID(const CSeq_id_Handle& idh)
                      GetContext()->Config().GetHTMLFormatter().FormatUniProtId(ht, acc);
                  } else {
                      GetContext()->Config().GetHTMLFormatter().FormatNucId(ht, *idh.GetSeqId(),
-                        GetContext()->GetScope().GetGi(idh), acc);
+                        GI_TO(TIntId, GetContext()->GetScope().GetGi(idh)), acc);
                  }
  #endif
                  s += comma + sep + "accession " + ht;
diff --git a/c++/src/objtools/format/defline_item.cpp b/c++/src/objtools/format/defline_item.cpp

index 2ed653f03a69f8c404d5ac4b4810230ac0b98541..4504eab31e6e8ebdeb412bf78e78773f95605a8b 100644 (file)
--- a/c++/src/objtools/format/defline_item.cpp
+++ b/c++/src/objtools/format/defline_item.cpp
@@ -1,4 +1,4 @@
-/*  $Id: defline_item.cpp 577454 2019-01-03 22:58:25Z kans $
+/*  $Id: defline_item.cpp 613774 2020-08-12 16:32:22Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -91,6 +91,9 @@ void CDeflineItem::x_GatherInfo(CBioseqContext& ctx)
      if ( ctx.Config().IgnoreExistingTitle() ) {
          flags |= sequence::CDeflineGenerator::fIgnoreExisting;
      }
+    if ( ctx.Config().ShowDeflineModifiers() ) {
+        flags |= sequence::CDeflineGenerator::fShowModifiers;
+    }
      if ( ctx.UsingSeqEntryIndex() ) {
          CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
          CBioseq_Handle bsh = scope.GetBioseqHandle(*bioseq);
@@ -101,8 +104,11 @@ void CDeflineItem::x_GatherInfo(CBioseqContext& ctx)
      if (! Defliner.UsePDBCompoundForDefline()) {
          ctx.SetPDBCompoundForComment(true);
      }
-    // CompressSpaces( m_Defline );
-    CleanAndCompress (m_Defline, m_Defline.c_str());
+    if ( ctx.Config().ShowDeflineModifiers() ) {
+        CompressSpaces( m_Defline );
+    } else {
+        CleanAndCompress (m_Defline, m_Defline.c_str());
+    }
      ConvertQuotes(m_Defline);
      AddPeriod(m_Defline);
      CSeqdesc_CI di(ctx.GetHandle(), CSeqdesc::e_Title);
diff --git a/c++/src/objtools/format/feature_item.cpp b/c++/src/objtools/format/feature_item.cpp

index 8b989cbd784b7dab99ee79d80664d36c3ad92b21..97a3e31410348a50f46c6beca5e6f9b887c4eec4 100644 (file)
--- a/c++/src/objtools/format/feature_item.cpp
+++ b/c++/src/objtools/format/feature_item.cpp
@@ -1,4 +1,4 @@
-/*  $Id: feature_item.cpp 606747 2020-04-27 11:07:41Z ivanov $
+/*  $Id: feature_item.cpp 615038 2020-08-26 13:39:07Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -489,7 +489,7 @@ static bool s_SkipFeature(const CMappedFeat& feat,
      CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype();        
  
      if ( subtype == CSeqFeatData::eSubtype_pub              ||
-         subtype == CSeqFeatData::eSubtype_non_std_residue  ||
+      /* subtype == CSeqFeatData::eSubtype_non_std_residue  || */
           subtype == CSeqFeatData::eSubtype_biosrc           ||
           subtype == CSeqFeatData::eSubtype_rsite            ||
           subtype == CSeqFeatData::eSubtype_seq ) {
@@ -1698,16 +1698,16 @@ void CFeatureItem::x_AddQualsIdx(
                          gf = &(mf.GetMappedFeature());
                          gr = &(mf.GetData().GetGene());
                          if (gr) {
-                            if (feat_gene_xref->IsSetLocus() && gr->IsSetLocus()) {
-                                if (feat_gene_xref->GetLocus() == gr->GetLocus()) {
+                            if (feat_gene_xref->IsSetLocus_tag() && gr->IsSetLocus_tag()) {
+                                if (feat_gene_xref->GetLocus_tag() == gr->GetLocus_tag()) {
                                      gene_feat = &(mf.GetMappedFeature());
                                      gene_ref = &(mf.GetData().GetGene());
                                  } else {
                                      // RW-985
                                      gene_ref = feat_gene_xref;
                                  }
-                            } else if (feat_gene_xref->IsSetLocus_tag() && gr->IsSetLocus_tag()) {
-                                if (feat_gene_xref->GetLocus_tag() == gr->GetLocus_tag()) {
+                            } else if (feat_gene_xref->IsSetLocus() && gr->IsSetLocus()) {
+                                if (feat_gene_xref->GetLocus() == gr->GetLocus()) {
                                      gene_feat = &(mf.GetMappedFeature());
                                      gene_ref = &(mf.GetData().GetGene());
                                  } else {
@@ -1817,6 +1817,9 @@ void CFeatureItem::x_AddQualsIdx(
      case CSeqFeatData::e_Psec_str:
          x_AddQualsPsecStr( ctx );
          break;
+    case CSeqFeatData::e_Non_std_residue:
+        x_AddQualsNonStd( ctx );
+        break;
      case CSeqFeatData::e_Het:
          x_AddQualsHet( ctx );
          break;
@@ -2017,6 +2020,9 @@ void CFeatureItem::x_AddQuals(
      case CSeqFeatData::e_Psec_str:
          x_AddQualsPsecStr( ctx );
          break;
+    case CSeqFeatData::e_Non_std_residue:
+        x_AddQualsNonStd( ctx );
+        break;
      case CSeqFeatData::e_Het:
          x_AddQualsHet( ctx );
          break;
@@ -2162,7 +2168,7 @@ void CFeatureItem::x_AddQualsRna(
                                  x_AddQual(slot, new CFlatSeqIdQVal(*acc_id));
                              }
                              /*
-                            if (! cfg.HideGI()) {
+                            if (! (cfg.HideGI() || cfg.IsPolicyFtp())) {
                                  x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(*sip, true));
                              }
                              */
@@ -2593,7 +2599,7 @@ void CFeatureItem::x_GetAssociatedProtInfoIdx(
      if ( protId ) {
          if ( !cfg.AlwaysTranslateCDS() ) {
              CScope::EGetBioseqFlag get_flag = CScope::eGetBioseq_Loaded;
-            if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() ) {
+            if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() ) {
                  get_flag = CScope::eGetBioseq_All;
              }
              protHandle =  scope.GetBioseqHandle(*protId, get_flag);
@@ -2642,7 +2648,7 @@ void CFeatureItem::x_GetAssociatedProtInfo(
      if ( protId ) {
          if ( !cfg.AlwaysTranslateCDS() ) {
              CScope::EGetBioseqFlag get_flag = CScope::eGetBioseq_Loaded;
-            if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() ) {
+            if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() ) {
                  get_flag = CScope::eGetBioseq_All;
              }
              protHandle =  scope.GetBioseqHandle(*protId, get_flag);
@@ -2753,7 +2759,7 @@ void CFeatureItem::x_AddQualProteinId(
              case CSeq_id::e_Gi:
                  if( seqid.GetGi() > ZERO_GI ) {
                      const CFlatFileConfig& cfg = GetContext()->Config();
-                    if (! cfg.HideGI()) {
+                    if (! (cfg.HideGI() || cfg.IsPolicyFtp())) {
                          if ( eLastRegularChoice == CSeq_id::e_not_set ) {
                              // use as protein_id if it's the first usable one
                              x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) );
@@ -3134,7 +3140,7 @@ void CFeatureItem::x_AddProductIdQuals(
          const CFlatFileConfig& cfg = GetContext()->Config();
          ITERATE( CBioseq_Handle::TId, id_iter, ids ) {
              if( id_iter->IsGi() ) {
-                if (! cfg.HideGI()) {
+                if (! (cfg.HideGI() || cfg.IsPolicyFtp())) {
                      x_AddQual( eFQ_db_xref,
                          new CFlatStringQVal("GI:" + NStr::NumericToString(id_iter->GetGi()) ));
                  }
@@ -3263,6 +3269,20 @@ void CFeatureItem::x_AddQualsPsecStr(
      x_AddQual( eFQ_sec_str_type, new CFlatStringQVal( sec_str_as_str ) );
  }
  
+//  ----------------------------------------------------------------------------
+void CFeatureItem::x_AddQualsNonStd(
+    CBioseqContext& ctx )
+//  ----------------------------------------------------------------------------
+{
+    _ASSERT( m_Feat.GetData().IsNon_std_residue() );
+
+    const CSeqFeatData& data = m_Feat.GetData();
+
+    CSeqFeatData_Base::TNon_std_residue n_s_res = data.GetNon_std_residue();
+
+    x_AddQual( eFQ_non_std_residue, new CFlatStringQVal( n_s_res ) );
+}
+
  //  ----------------------------------------------------------------------------
  void CFeatureItem::x_AddQualsHet(
      CBioseqContext& ctx )
@@ -4503,6 +4523,7 @@ void CFeatureItem::x_FormatQuals(CFlatFeature& ff) const
      DO_QUAL(site_type);
      DO_QUAL(sec_str_type);
      DO_QUAL(heterogen);
+    DO_QUAL(non_std_residue);
  
      DO_QUAL(tag_peptide);
  
@@ -5186,6 +5207,7 @@ static const TQualPair sc_GbToFeatQualMap[] = {
      { eFQ_mol_wt, CSeqFeatData::eQual_calculated_mol_wt },
      { eFQ_ncRNA_class, CSeqFeatData::eQual_ncRNA_class },
      { eFQ_nomenclature, CSeqFeatData::eQual_nomenclature },
+    { eFQ_non_std_residue, CSeqFeatData::eQual_non_std_residue },
      { eFQ_number, CSeqFeatData::eQual_number },
      { eFQ_old_locus_tag, CSeqFeatData::eQual_old_locus_tag },
      { eFQ_operon, CSeqFeatData::eQual_operon },
@@ -5311,6 +5333,9 @@ void CFeatureItem::x_AddFTableQuals(
      case CSeqFeatData::e_Psec_str:
          x_AddFTablePsecStrQuals(data.GetPsec_str());
          break;
+    case CSeqFeatData::e_Non_std_residue:
+        x_AddFTableNonStdQuals(data.GetNon_std_residue());
+        break;
      case CSeqFeatData::e_Het:
          x_AddFTablePsecStrQuals(data.GetHet());
          break;
@@ -5521,6 +5546,8 @@ void CFeatureItem::x_AddFTableAnticodon(
      case CTrna_ext::C_Aa::e_Ncbistdaa:
          aa = GetAAName(trna_ext.GetAa().GetNcbistdaa(), false);
          break;
+    default:
+        break;
      }
  
      string seq("---");
@@ -5585,7 +5612,7 @@ void CFeatureItem::x_AddFTableRnaQuals(
          CBioseq_Handle prod = 
              ctx.GetScope().GetBioseqHandle(m_Feat.GetProductId());
          if ( prod ) {
-            string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !ctx.Config().HideGI());
+            string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !(ctx.Config().HideGI() || ctx.Config().IsPolicyFtp()));
              if (!NStr::IsBlank(id_str)) {
                  x_AddFTableQual("transcript_id", id_str);
              }
@@ -5719,7 +5746,7 @@ void CFeatureItem::x_AddFTableCdregionQuals(
      }
  
      if (prod && !cfg.HideProteinID()) {
-        string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !ctx.Config().HideGI());
+        string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !(ctx.Config().HideGI() || ctx.Config().IsPolicyFtp()));
          if (!NStr::IsBlank(id_str)) {
              x_AddFTableQual("protein_id", id_str);
          }
@@ -5812,6 +5839,16 @@ void CFeatureItem::x_AddFTablePsecStrQuals(
      }
  }
  
+//  ----------------------------------------------------------------------------
+void CFeatureItem::x_AddFTableNonStdQuals(
+    const CSeqFeatData::TNon_std_residue& res )
+//  ----------------------------------------------------------------------------
+{
+    if ( !res.empty() ) {
+        x_AddFTableQual("non_std_residue", res);
+    }
+}
+
  
  static const string s_GetSubtypeString(const COrgMod::TSubtype& subtype)
  {
@@ -6067,6 +6104,13 @@ static string s_GetSpecimenVoucherText(
          if( voucher_info_ref->m_Prefix != NULL ) {
              text << *voucher_info_ref->m_Prefix;
          }
+        if( voucher_info_ref->m_Trim != NULL ) {
+            const string& trim = *voucher_info_ref->m_Trim;
+            if (NStr::StartsWith(id, trim)) {
+                NStr::TrimPrefixInPlace(id, trim);
+                NStr::TruncateSpacesInPlace(id);
+            }
+        }
          if( voucher_info_ref->m_PadTo > 0 && voucher_info_ref->m_PadWith != NULL) {
              int len_id = id.length();
              int len_pad = voucher_info_ref->m_PadWith->length();
@@ -6448,6 +6492,7 @@ void CSourceFeatureItem::x_FormatGBNoteQuals(CFlatFeature& ff) const
  }
  
  
+/*
  static bool s_IsExactAndNonExactMatchOnNoteQuals(CFlatFeature::TQuals& qvec, const string& str)
  {
      if (qvec.empty()) {
@@ -6473,6 +6518,7 @@ static bool s_IsExactAndNonExactMatchOnNoteQuals(CFlatFeature::TQuals& qvec, con
      if (has_exact == 1 && non_exact > 0) return true;
      return false;
  }
+*/
  
  
  
diff --git a/c++/src/objtools/format/flat_file_config.cpp b/c++/src/objtools/format/flat_file_config.cpp

index 287b248b72a51874c3cbed10e24ac89100e761d6..a509b9fddf711bdd61f1110ed29542a39405e583 100644 (file)
--- a/c++/src/objtools/format/flat_file_config.cpp
+++ b/c++/src/objtools/format/flat_file_config.cpp
@@ -1,4 +1,4 @@
-/*  $Id: flat_file_config.cpp 606754 2020-04-27 11:09:46Z ivanov $
+/*  $Id: flat_file_config.cpp 614736 2020-08-21 13:43:48Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -314,15 +314,17 @@ CFlatFileConfig::CFlatFileConfig(
      TStyle style,
      TFlags flags,
      TView view,
-    TPolicy policy) :
-    m_Format(format), m_Mode(mode), m_Style(style), m_Flags(flags), m_View(view), m_Policy(policy)
+    TPolicy policy,
+    TCustom custom) :
+    m_Format(format), m_Mode(mode), m_Style(style), m_Flags(flags), m_View(view), m_Policy(policy), m_Custom(custom)
  {
      m_RefSeqConventions = false;
+    m_FeatDepth = 0;
+    m_GapDepth = 0;
      SetGenbankBlocks(fGenbankBlocks_All);
      SetGenbankBlockCallback(NULL);
      SetCanceledCallback(NULL);
      BasicCleanup(false);
-    SetCustom(0);
  
      // FTable always requires master style
      if (m_Format == eFormat_FTable) {
@@ -535,7 +537,7 @@ void CFlatFileConfig::AddArgumentDescriptions(CArgDescriptions& args)
                                   "Far fetch policy",
                                   CArgDescriptions::eString, "adaptive");
           arg_desc->SetConstraint("policy",
-                                 &(*new CArgAllow_Strings, "adaptive", "internal", "external", "exhaustive"));
+                                 &(*new CArgAllow_Strings, "adaptive", "internal", "external", "exhaustive", "ftp", "web"));
  
           // flags (default: 0)
           arg_desc->AddDefaultKey("flags", "Flags",
@@ -615,6 +617,9 @@ void CFlatFileConfig::AddArgumentDescriptions(CArgDescriptions& args)
           arg_desc->AddOptionalKey("depth", "Depth",
                                    "Exploration depth", CArgDescriptions::eInteger);
  
+         arg_desc->AddOptionalKey("gap-depth", "GapDepth",
+                                  "Gap exploration depth", CArgDescriptions::eInteger);
+
           arg_desc->AddOptionalKey("max_search_segments", "MaxSearchSegments",
                                    "Max number of empty segments to search", CArgDescriptions::eInteger);
  
@@ -746,6 +751,10 @@ CFlatFileConfig::EPolicy x_GetPolicy(const CArgs& args)
          return CFlatFileConfig::ePolicy_External;
      } else if ( Policy == "exhaustive" ) {
          return CFlatFileConfig::ePolicy_Exhaustive;
+    } else if ( Policy == "ftp" ) {
+        return CFlatFileConfig::ePolicy_Ftp;
+    } else if ( Policy == "web" ) {
+        return CFlatFileConfig::ePolicy_Web;
      }
  
      // default
@@ -847,11 +856,6 @@ CFlatFileConfig::ECustom x_GetCustom(const CArgs& args)
  {
      int custom = args["custom"].AsInteger();
  
-    // ID-5865 : Set the "show SNP" and "show CDD" bits based on the value of the
-    // "enable-external" flag.
-    if (args["enable-external"] || args["policy"].AsString() == "external")
-        custom |= (CFlatFileConfig::fShowSNPFeatures | CFlatFileConfig::fShowCDDFeatures);
-
      return (CFlatFileConfig::ECustom)custom;
  }
  
@@ -916,6 +920,24 @@ void CFlatFileConfig::FromArguments(const CArgs& args)
      CFlatFileConfig::TGenbankBlocks genbank_blocks = x_GetGenbankBlocks(args);
      CFlatFileConfig::ECustom        custom         = x_GetCustom(args);
  
+    // ID-5865 : Set the "show SNP" and "show CDD" bits based on the value of the
+    // "enable-external" flag.
+    if (args["no-external"]) {
+        int flg = (int) flags;
+        flg |= CFlatFileConfig::fHideCDDFeatures;
+        flg |= CFlatFileConfig::fHideSNPFeatures;
+        flags = (CFlatFileConfig::EFlags) flg;
+    } else if (args["enable-external"] || args["policy"].AsString() == "external") {
+        int cust = (int) custom;
+        if ((flags & CFlatFileConfig::fHideCDDFeatures) == 0) {
+            cust |= CFlatFileConfig::fShowCDDFeatures;
+        }
+        if ((flags & CFlatFileConfig::fHideSNPFeatures) == 0) {
+            cust |= CFlatFileConfig::fShowSNPFeatures;
+        }
+        custom = (CFlatFileConfig::ECustom) cust;
+    }
+
      SetFormat(format);
      SetMode(mode);
      SetStyle(style);
@@ -925,6 +947,15 @@ void CFlatFileConfig::FromArguments(const CArgs& args)
      m_fGenbankBlocks = genbank_blocks;
      m_BasicCleanup = args["cleanup"];
      SetCustom(custom);
+
+    if( args["depth"] ) {
+        int featDepth = args["depth"].AsInteger();
+        SetFeatDepth(featDepth);
+    }
+    if( args["gap-depth"] ) {
+        int gapDepth = args["gap-depth"].AsInteger();
+        SetGapDepth(gapDepth);
+    }
  }
  
  #ifdef NEW_HTML_FMT
@@ -958,7 +989,7 @@ void CHTMLEmptyFormatter::FormatNucSearch(CNcbiOstream& os, const string& id) co
      os << id;
  }
  
-void CHTMLEmptyFormatter::FormatTaxid(string& str, const int taxid, const string& taxname) const
+void CHTMLEmptyFormatter::FormatTaxid(string& str, const TTaxId taxid, const string& taxname) const
  {
      str = taxname;
  }
diff --git a/c++/src/objtools/format/flat_file_generator.cpp b/c++/src/objtools/format/flat_file_generator.cpp

index 2337cf73ee85d1b6ba346463c1aab0044c436a70..f767951584e5e1fa2b97a77332bf4e34ed84881a 100644 (file)
--- a/c++/src/objtools/format/flat_file_generator.cpp
+++ b/c++/src/objtools/format/flat_file_generator.cpp
@@ -1,4 +1,4 @@
-/*  $Id: flat_file_generator.cpp 606748 2020-04-27 11:07:58Z ivanov $
+/*  $Id: flat_file_generator.cpp 615047 2020-08-26 13:40:19Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -84,14 +84,14 @@ CFlatFileGenerator::CFlatFileGenerator
   CFlatFileConfig::TStyle  style,
   CFlatFileConfig::TFlags  flags,
   CFlatFileConfig::TView   view,
- CFlatFileConfig::TCustom custom) :
-    m_Ctx(new CFlatFileContext(CFlatFileConfig(format, mode, style, flags, view)))
+ CFlatFileConfig::TCustom custom,
+ CFlatFileConfig::TPolicy policy) :
+    m_Ctx(new CFlatFileContext(CFlatFileConfig(format, mode, style, flags, view, policy, custom)))
  {
      m_Failed = false;
      if ( !m_Ctx ) {
         NCBI_THROW(CFlatException, eInternal, "Unable to initialize context");
      }
-    m_Ctx->SetConfig().SetCustom(custom);
  }
  
  
@@ -319,6 +319,12 @@ void CFlatFileGenerator::Generate
                  if ( m_Ctx->GetConfig().IsPolicyExhaustive() ) {
                      policy = CSeqEntryIndex::eExhaustive;
                  }
+                if ( m_Ctx->GetConfig().IsPolicyFtp() ) {
+                    policy = CSeqEntryIndex::eFtp;
+                }
+                if ( m_Ctx->GetConfig().IsPolicyWeb() ) {
+                    policy = CSeqEntryIndex::eWeb;
+                }
                  CRef<CSeqEntryIndex> idx(new CSeqEntryIndex( topseh, policy, flags ));
                  m_Ctx->SetSeqEntryIndex(idx);
                  if (idx->IsIndexFailure()) {
@@ -337,9 +343,11 @@ void CFlatFileGenerator::Generate
      // bool nearFeatsSuppress = false;
  
      bool isNc = false;
+    /*
      bool isNgNtNwNz = false;
      bool isGED = false;
      bool isTPA = false;
+    */
  
      bool hasLocalFeat = false;
      bool forceOnlyNear = false;
@@ -355,12 +363,12 @@ void CFlatFileGenerator::Generate
                  case CSeq_id::e_Genbank:
                  case CSeq_id::e_Embl:
                  case CSeq_id::e_Ddbj:
-                    isGED = true;
+                    // isGED = true;
                      break;
                  case CSeq_id::e_Tpg:
                  case CSeq_id::e_Tpe:
                  case CSeq_id::e_Tpd:
-                    isTPA = true;
+                    // isTPA = true;
                      break;
                  case CSeq_id::e_Other:
                      {
@@ -370,7 +378,7 @@ void CFlatFileGenerator::Generate
                              if (acc == "NC_") {
                                  isNc = true;
                              } else if (acc == "NG_" || acc == "NT_" || acc == "NW_" || acc == "NZ_") {
-                                isNgNtNwNz = true;
+                                // isNgNtNwNz = true;
                              }
                          }
                      }
@@ -718,31 +726,39 @@ void CFlatFileGenerator::Generate
                  }
                  if ( cfg.HideSNPFeatures() ) {
                      flags |= CSeqEntryIndex::fHideSNPFeats;
+                } else if ( cfg.ShowSNPFeatures() ) {
+                    flags |= CSeqEntryIndex::fShowSNPFeats;
                  }
                  if ( cfg.HideCDDFeatures() ) {
                      flags |= CSeqEntryIndex::fHideCDDFeats;
-                }
-                if ( cfg.ShowSNPFeatures() ) {
-                    flags |= CSeqEntryIndex::fShowSNPFeats;
-                }
-                if ( cfg.ShowCDDFeatures() ) {
+                } else if ( cfg.ShowCDDFeatures() ) {
                      flags |= CSeqEntryIndex::fShowCDDFeats;
                  }
-                if ( m_Ctx->GetConfig().IsPolicyInternal() ) {
+                if ( cfg.IsPolicyInternal() ) {
                      policy = CSeqEntryIndex::eInternal;
                  }
-                if ( m_Ctx->GetConfig().IsPolicyExternal() ) {
+                if ( cfg.IsPolicyExternal() ) {
                      policy = CSeqEntryIndex::eExternal;
                  }
-                if ( m_Ctx->GetConfig().IsPolicyExhaustive() ) {
+                if ( cfg.IsPolicyExhaustive() ) {
                      policy = CSeqEntryIndex::eExhaustive;
                  }
-                CRef<CSeqEntryIndex> idx(new CSeqEntryIndex( topseh, policy, flags ));
+                if ( cfg.IsPolicyFtp() ) {
+                    policy = CSeqEntryIndex::eFtp;
+                }
+                if ( cfg.IsPolicyWeb() ) {
+                    policy = CSeqEntryIndex::eWeb;
+                }
+                CRef<CSeqEntryIndex> idx(new CSeqEntryIndex( topseh, policy, flags));
                  m_Ctx->SetSeqEntryIndex(idx);
                  if (idx->IsIndexFailure()) {
                      m_Failed = true;
                      return;
                  }
+                int featDepth = cfg.GetFeatDepth();
+                idx->SetFeatDepth(featDepth);
+                int gapDepth = cfg.GetGapDepth();
+                idx->SetGapDepth(gapDepth);
              } catch(CException &) {
                  m_Failed = true;
                  return;
@@ -781,6 +797,20 @@ void CFlatFileGenerator::Generate
  }
  
  
+void CFlatFileGenerator::Generate
+(const CBioseq_Handle& bsh,
+ CNcbiOstream& os,
+ bool useSeqEntryIndexing)
+{
+    CRef<CFlatItemOStream>
+        item_os(new CFormatItemOStream(new COStreamTextOStream(os)));
+
+    const CSeq_entry_Handle entry = bsh.GetSeq_entry_Handle();
+    Generate(entry, *item_os, useSeqEntryIndexing);
+
+}
+
+
  void CFlatFileGenerator::Generate
  (const CSeq_submit& submit,
   CScope& scope,
diff --git a/c++/src/objtools/format/flat_qual_slots.cpp b/c++/src/objtools/format/flat_qual_slots.cpp

index f26530e2b7edcbbbe7bcf6cbf96ab7f26ef06da2..074303cf3428920392810168cbe87e34d82d1da9 100644 (file)
--- a/c++/src/objtools/format/flat_qual_slots.cpp
+++ b/c++/src/objtools/format/flat_qual_slots.cpp
@@ -1,4 +1,4 @@
-/*  $Id: flat_qual_slots.cpp 564513 2018-05-29 17:40:10Z kans $
+/*  $Id: flat_qual_slots.cpp 613781 2020-08-12 16:42:43Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -114,6 +114,7 @@ GetStringOfFeatQual(EFeatureQualifier eFeatureQualifier)
          TYPICAL_FQ(mol_wt),
          TYPICAL_FQ(ncRNA_class),
          TYPICAL_FQ(nomenclature),
+        TYPICAL_FQ(non_std_residue),
          TYPICAL_FQ(number),
          TYPICAL_FQ(old_locus_tag),
          TYPICAL_FQ(operon),
diff --git a/c++/src/objtools/format/gather_items.cpp b/c++/src/objtools/format/gather_items.cpp

index 9848b033a7e122b8f660bbe5cdd486dffe9e707c..5f08094322f500975bdd3b887ef0060cc38ed155 100644 (file)
--- a/c++/src/objtools/format/gather_items.cpp
+++ b/c++/src/objtools/format/gather_items.cpp
@@ -1,4 +1,4 @@
-/*  $Id: gather_items.cpp 607405 2020-05-04 14:19:32Z ivanov $
+/*  $Id: gather_items.cpp 615788 2020-09-03 18:19:11Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -751,7 +751,7 @@ void CFlatGatherer::x_GatherReferencesIdx(const CSeq_loc& loc, TReferences& refs
      if (! bsx) return;
  
      // gather references from descriptors
-    bsx->IterateDescriptors([this, &ctx, &scope, &refs, &idx, bsx](CDescriptorIndex& sdx) {
+    bsx->IterateDescriptors([this, &refs, &idx, bsx](CDescriptorIndex& sdx) {
          try {
              CSeqdesc::E_Choice chs = sdx.GetType();
              if (chs == CSeqdesc::e_Pub) {
@@ -1247,8 +1247,7 @@ void CFlatGatherer::x_IdComments(CBioseqContext& ctx,
      string genome_build_number =
          CGenomeAnnotComment::GetGenomeBuildNumber(ctx.GetHandle());
      bool has_ref_track_status = s_HasRefTrackStatus(ctx.GetHandle());
-    CCommentItem::ECommentFormat format = ctx.Config().DoHTML() ?
-        CCommentItem::eFormat_Html : CCommentItem::eFormat_Text;
+    // CCommentItem::ECommentFormat format = ctx.Config().DoHTML() ? CCommentItem::eFormat_Html : CCommentItem::eFormat_Text;
  
      ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) {
          const CSeq_id& id = **id_iter;
@@ -2006,7 +2005,7 @@ void CFlatGatherer::x_CollectBioSourcesOnBioseq
      // if protein, get sources applicable to DNA location of CDS
      if ( ctx.IsProt() ) {
          // collect biosources features on bioseq
-        if ( !ctx.DoContigStyle()  ||  cfg.ShowContigSources() ) {
+        if ( !ctx.DoContigStyle()  ||  cfg.ShowContigSources() || cfg.IsPolicyFtp() ) {
              CConstRef<CSeq_feat> src_feat = x_GetSourceFeatFromCDS (bh);
              if (src_feat.NotEmpty()) {
                  // CMappedFeat mapped_feat(bh.GetScope().GetSeq_featHandle(*src_feat));
@@ -2028,7 +2027,7 @@ void CFlatGatherer::x_CollectBioSourcesOnBioseq
  
      if ( ! ctx.IsProt() ) {
          // collect biosources features on bioseq
-        if ( !ctx.DoContigStyle()  ||  cfg.ShowContigSources() ) {
+        if ( !ctx.DoContigStyle()  ||  cfg.ShowContigSources() || cfg.IsPolicyFtp() ) {
              x_CollectSourceFeatures(bh, range, ctx, srcs);
          }
      }
@@ -2038,7 +2037,7 @@ void CFlatGatherer::x_CollectBioSourcesOnBioseq
  void CFlatGatherer::x_CollectBioSources(TSourceFeatSet& srcs) const
  {
      CBioseqContext& ctx = *m_Current;
-    CScope* scope = &ctx.GetScope();
+    // CScope* scope = &ctx.GetScope();
      const CFlatFileConfig& cfg = ctx.Config();
  
      x_CollectBioSourcesOnBioseq(ctx.GetHandle(),
@@ -2462,6 +2461,7 @@ bool CFlatGatherer::x_BiosourcesEqualForMergingPurposes(
      return true;
  }
  
+// for the non-indexed, non-faster, older version of the flatfile generator
  void s_SetSelection(SAnnotSelector& sel, CBioseqContext& ctx)
  {
      const CFlatFileConfig& cfg = ctx.Config();
@@ -2966,7 +2966,7 @@ static bool s_IsCDD(const CSeq_feat_Handle& feat)
  {
      if (feat.GetAnnot().IsNamed()) {
          const string& name = feat.GetAnnot().GetName();
-        return (name == "Annot:CDD" || name == "CDDSearch");
+        return (name == "Annot:CDD" || name == "CDDSearch" || name == "CDD");
      }
      return false;
  }
@@ -3006,7 +3006,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx
   SAnnotSelector& sel,
   CBioseqContext& ctx) const
  {
-    CScope& scope = ctx.GetScope();
+    // CScope& scope = ctx.GetScope();
      CFlatItemOStream& out = *m_ItemOS;
  
      CSeqMap_CI gap_it = s_CreateGapMapIter(loc, ctx);
@@ -3045,7 +3045,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx
          s_SetGapIdxData (gap_data, gaps);
      }
  
-    bsx->IterateFeatures([this, &ctx, &scope, &prev_feat, &gap_it, &loc_len, &item, &out, &slice_mapper,
+    bsx->IterateFeatures([this, &ctx, &prev_feat, &loc_len, &item, &out, &slice_mapper,
                            gaps, &gap_data, showGapsOfSizeZero, bsx](CFeatureIndex& sfx) {
          try {
              CMappedFeat mf = sfx.GetMappedFeat();
@@ -3104,7 +3104,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx
                  // may need to map sig_peptide on a different segment
                  if (feat.GetData().IsCdregion()) {
                      if (( !ctx.Config().IsFormatFTable()  ||  ctx.Config().ShowFtablePeptides() )) {
-                        x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, slice_mapper);
+                        x_GetFeatsOnCdsProductIdx(original_feat, ctx, slice_mapper);
                      }
                  }
                  return; // continue;
@@ -3120,7 +3120,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx
              bool has_gap = gap_data.has_gap;
              int gap_start = gap_data.gap_start;
              int gap_end = gap_data.gap_end;
-            while (has_gap && gap_start < feat_start) {
+            while (has_gap && gap_start <= feat_start) {
                  const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_start <= gap_end) );
                  const bool gapMatch = ( subtype == CSeqFeatData::eSubtype_gap && feat_start == gap_start && feat_end == gap_end - 1 );
                  if ( noGapSizeProblem && ! gapMatch ) {
@@ -3170,7 +3170,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx
                      {{  
                          // map features from protein
                          if (( !ctx.Config().IsFormatFTable()  ||  ctx.Config().ShowFtablePeptides() )) {
-                            x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, 
+                            x_GetFeatsOnCdsProductIdx(original_feat, ctx, 
                                  slice_mapper,
                                  CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
                          }
@@ -3206,7 +3206,7 @@ void CFlatGatherer::x_GatherFeaturesOnWholeLocationIdx
  
      // when all features are done, output remaining gaps
      while (gap_data.has_gap) {
-        const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_data.gap_start < gap_data.gap_end) );
+        const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_data.gap_start <= gap_data.gap_end) );
          if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) {
              item.Reset( s_NewGapItem(gap_data.gap_start, gap_data.gap_end, gap_data.gap_length, gap_data.gap_type,
                          gap_data.gap_evidence, gap_data.is_unknown_length, gap_data.is_assembly_gap, ctx) );
@@ -3491,17 +3491,18 @@ void CFlatGatherer::x_GatherFeaturesOnRangeIdx
      CRef<CSeq_loc_Mapper> slice_mapper = s_MakeSliceMapper(loc, ctx);
  
      // Gaps of length zero are only shown for SwissProt Genpept records
-    const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
+    // const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
  
      // cache to avoid repeated calculations
-    const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
+    // const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
  
      CSeq_feat_Handle prev_feat;
      CConstRef<IFlatItem> item;
  
      CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
      if (! idx) return;
-    CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (loc);
+    // CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (loc);
+    CRef<CBioseqIndex> bsx = idx->GetBioseqIndex ();
      if (! bsx) return;
  
      const vector<CRef<CGapIndex>>& gaps = bsx->GetGapIndices();
@@ -3515,8 +3516,10 @@ void CFlatGatherer::x_GatherFeaturesOnRangeIdx
          s_SetGapIdxData (gap_data, gaps);
      }
  
-    bsx->IterateFeatures([this, &ctx, &scope, &prev_feat, &gap_it, &loc_len, &item, &out, &slice_mapper,
-                          gaps, &gap_data, showGapsOfSizeZero, bsx](CFeatureIndex& sfx) {
+    CSeq_loc slp;
+    slp.Assign(loc);
+    bsx->IterateFeatures(slp, [this, &ctx, &scope, &prev_feat, &item, &out, &slice_mapper,
+                          gaps, bsx](CFeatureIndex& sfx) {
          try {
              CMappedFeat mf = sfx.GetMappedFeat();
              CSeq_feat_Handle feat = sfx.GetSeqFeatHandle(); // it->GetSeq_feat_Handle();
@@ -3582,7 +3585,7 @@ void CFlatGatherer::x_GatherFeaturesOnRangeIdx
                  // may need to map sig_peptide on a different segment
                  if (feat.GetData().IsCdregion()) {
                      if (( !ctx.Config().IsFormatFTable()  ||  ctx.Config().ShowFtablePeptides() )) {
-                        x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, slice_mapper);
+                        x_GetFeatsOnCdsProductIdx(original_feat, ctx, slice_mapper);
                      }
                  }
                  return;
@@ -3597,135 +3600,11 @@ void CFlatGatherer::x_GatherFeaturesOnRangeIdx
              const CSeq_loc& loc = original_feat.GetLocation();
              CRef<CSeq_loc> loc2(new CSeq_loc);
              loc2->Assign(*feat_loc);
-            loc2->SetId(*loc.GetId());
- 
-            item.Reset( x_NewFeatureItem(mf, ctx, loc2, m_Feat_Tree, CFeatureItem::eMapped_not_mapped, true) );
-            out << item;
-
-            // Add more features depending on user preferences
-
-            switch (feat.GetFeatSubtype()) {
-                case CSeqFeatData::eSubtype_mRNA:
-                {{
-                    // optionally map CDS from cDNA onto genomic
-                    if (s_CopyCDSFromCDNA(ctx)   &&  feat.IsSetProduct()) {
-                        x_CopyCDSFromCDNA(original_feat, ctx);
-                    }
-                    break;
-                }}
-                case CSeqFeatData::eSubtype_cdregion:
-                    {{  
-                        // map features from protein
-                        if (( !ctx.Config().IsFormatFTable()  ||  ctx.Config().ShowFtablePeptides() )) {
-                            x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, 
-                                slice_mapper,
-                                CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
-                        }
-                        break;
-                    }}
-                default:
-                    break;
+            const CSeq_id* id2 = loc.GetId();
+            // test needed for gene in X55766, to prevent seg fault, but still does not produce correct mixed location
+            if (id2) {
+                loc2->SetId(*id2);
              }
-        } catch (CException& e) {
-            // special case: Job cancellation exceptions make us stop
-            // generating features.
-            CMappedFeat mf = sfx.GetMappedFeat();
-            if( NStr::EqualNocase(e.what(), "job cancelled") ||
-                NStr::EqualNocase(e.what(), "job canceled") )
-            {
-                LOG_POST_X(2, Error << "Job canceled while processing feature "
-                                << s_GetFeatDesc(mf.GetSeq_feat_Handle())
-                                << " [" << e << "]; flatfile may be truncated");
-                return;
-            }
-
-            // for cases where a halt is requested, just rethrow the exception
-            if( e.GetErrCodeString() == string("eHaltRequested") ) {
-                throw e;
-            }
-
-            // post to log, go on to next feature
-            LOG_POST_X(2, Error << "Error processing feature "
-                                << s_GetFeatDesc(mf.GetSeq_feat_Handle())
-                                << " [" << e << "]");
-        }
-    });  //  end of for loop
-}
-
-size_t CFlatGatherer::x_GatherFeaturesOnSegmentIdx
-(const CSeq_loc& loc,
- SAnnotSelector& sel,
- CBioseqContext& ctx) const
-{
-    size_t count = 0;
-
-    CScope& scope = ctx.GetScope();
-    CFlatItemOStream& out = *m_ItemOS;
-
-    // logic to handle offsets that occur when user sets
-    // the -from and -to command-line parameters
-    // build slice_mapper for mapping locations
-    CRef<CSeq_loc_Mapper> slice_mapper = s_MakeSliceMapper(loc, ctx);
-
-    // Gaps of length zero are only shown for SwissProt Genpept records
-    const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
-
-    // cache to avoid repeated calculations
-    const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
-
-    CSeq_feat_Handle prev_feat;
-    CConstRef<IFlatItem> item;
-
-    CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
-    if (! idx) return count;
-    CRef<CBioseqIndex> bsx = idx->GetBioseqIndex ();
-    if (! bsx) return count;
-
-    count = bsx->IterateFeaturesByLoc(loc, [this, &ctx, &scope, &prev_feat, &loc_len,
-                              &item, &out, &slice_mapper, showGapsOfSizeZero, bsx](CFeatureIndex& sfx) {
-        try {
-            CMappedFeat mf = sfx.GetMappedFeat();
-            CSeq_feat_Handle feat = sfx.GetSeqFeatHandle(); // it->GetSeq_feat_Handle();
-            const CSeq_feat& original_feat = sfx.GetMappedFeat().GetOriginalFeature(); // it->GetOriginalFeature();
-
-            /// we need to cleanse CDD features
-
-            s_CleanCDDFeature(original_feat);
-
-            const CFlatFileConfig& cfg = ctx.Config();
-            CSeqFeatData::ESubtype subtype = feat.GetFeatSubtype();
-            if (cfg.HideCDDFeatures()  &&
-                (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site)  &&
-                s_IsCDD(feat)) {
-                return;
-            }
-
-            if( (feat.GetFeatSubtype() == CSeqFeatData::eSubtype_gap) && ! feat.IsPlainFeat() ) {
-                // skip gaps when we take slices (i.e. "-from" and "-to" command-line args),
-                // unless they're a plain feature.
-                // (compare NW_001468136 (100 to 200000) and AC185591 (100 to 100000) )
-                return;
-            }
-
-            // supress duplicate features
-            if (prev_feat  &&  s_IsDuplicateFeatures(prev_feat, feat)) {
-                return;
-            }
-            prev_feat = feat;
-
-            CConstRef<CSeq_loc> feat_loc( sfx.GetMappedLocation()); // &it->GetLocation()); 
-
-            feat_loc = s_NormalizeNullsBetween( feat_loc );
-
-            feat_loc = Seq_loc_Merge(*feat_loc, CSeq_loc::fMerge_Abutting, &scope);
-
-            // HANDLE GAPS SECTION GOES HERE
-
-
-            const CSeq_loc& loc = original_feat.GetLocation();
-            CRef<CSeq_loc> loc2(new CSeq_loc);
-            loc2->Assign(*feat_loc);
-            loc2->SetId(*loc.GetId());
   
              item.Reset( x_NewFeatureItem(mf, ctx, loc2, m_Feat_Tree, CFeatureItem::eMapped_not_mapped, true) );
              out << item;
@@ -3745,7 +3624,7 @@ size_t CFlatGatherer::x_GatherFeaturesOnSegmentIdx
                      {{  
                          // map features from protein
                          if (( !ctx.Config().IsFormatFTable()  ||  ctx.Config().ShowFtablePeptides() )) {
-                            x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, 
+                            x_GetFeatsOnCdsProductIdx(original_feat, ctx, 
                                  slice_mapper,
                                  CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
                          }
@@ -3778,8 +3657,6 @@ size_t CFlatGatherer::x_GatherFeaturesOnSegmentIdx
                                  << " [" << e << "]");
          }
      });  //  end of for loop
-
-    return count;
  }
  
  void CFlatGatherer::x_GatherFeaturesOnRange
@@ -4020,10 +3897,35 @@ s_ContainsGaps( const CSeq_loc &loc )
      return false;
  }
  
-void CFlatGatherer::x_GatherFeatures(void) const
+/*
+static bool s_NotForceNearFeats(CBioseqContext& ctx)
+{
+    // asn2flat -id NW_003127872  -flags 2 -faster -custom 2048
+    CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
+    if (idx) {
+        CBioseq_Handle hdl = ctx.GetHandle();
+        CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
+        if (bsx) {
+            if (bsx->IsForceOnlyNearFeats()) return false;
+        }
+    }
+
+    return true;
+}
+*/
+
+void CFlatGatherer::x_GatherFeaturesIdx(void) const
  {
      CBioseqContext& ctx = *m_Current;
      const CFlatFileConfig& cfg = ctx.Config();
+    if ( ! cfg.UseSeqEntryIndexer()) return;
+
+    CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
+    if (! idx) return;
+    CBioseq_Handle hdl = ctx.GetHandle();
+    CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
+    if (! bsx) return;
+
      CFlatItemOStream& out = *m_ItemOS;
      CConstRef<IFlatItem> item;
  
@@ -4059,96 +3961,144 @@ void CFlatGatherer::x_GatherFeatures(void) const
      }
  
      // collect features
-    // if ( ctx.IsSegmented()  &&  cfg.IsStyleMaster()  &&  cfg.OldFeaturesOrder() ) {
-    if ( cfg.UseSeqEntryIndexer() && ctx.IsDelta() && ! ctx.IsDeltaLitOnly() && cfg.IsStyleMaster() && ctx.GetLocation().IsWhole() ) {
+    if (ctx.GetLocation().IsWhole()) {
+        x_GatherFeaturesOnWholeLocationIdx(loc, sel, ctx);
+    } else {
+        x_GatherFeaturesOnRangeIdx(loc, sel, ctx);
+    }
+
+    if ( ctx.IsProt() ) {
+        // Also collect features which this protein is their product.
+        // Currently there are only two possible candidates: Coding regions
+        // and Prot features (rare).
          
-        CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
-        if (! idx) return;
-        CBioseq_Handle hdl = ctx.GetHandle();
-        CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
-        if (! bsx) return;
+        // look for the Cdregion feature for this protein
+        CBioseq_Handle handle = ( ctx.CanGetMaster() ? ctx.GetMaster().GetHandle() : ctx.GetHandle() );
+        SAnnotSelector sel(CSeqFeatData::e_Cdregion);
+        sel.SetByProduct().SetResolveDepth(0);
+        // try first in-TSE CDS
+        sel.SetLimitTSE(handle.GetTSE_Handle());
+        CFeat_CI feat_it(handle, sel);
+        if ( !feat_it ) {
+            // then any other CDS
+            sel.SetLimitNone().ExcludeTSE(handle.GetTSE_Handle());
+            feat_it = CFeat_CI(handle, sel);
+        }
+        if (feat_it) {
+            try {
+                CMappedFeat cds = *feat_it;
  
-        // Gaps of length zero are only shown for SwissProt Genpept records
-        const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
+                // map CDS location to its location on the product
+                CSeq_loc_Mapper mapper(*cds.GetOriginalSeq_feat(),
+                    CSeq_loc_Mapper::eLocationToProduct,
+                    &ctx.GetScope());
+                mapper.SetFuzzOption( CSeq_loc_Mapper::fFuzzOption_CStyle | CSeq_loc_Mapper::fFuzzOption_RemoveLimTlOrTr );
+                CRef<CSeq_loc> cds_prod = mapper.Map(cds.GetLocation());
+                cds_prod = cds_prod->Merge( ( s_IsCircularTopology(ctx) ? CSeq_loc::fMerge_All : CSeq_loc::fSortAndMerge_All ), NULL );
  
-        const vector<CRef<CGapIndex>>& gaps = bsx->GetGapIndices();
+                // it's a common case that we map one residue past the edge of the protein (e.g. NM_131089).
+                // In that case, we shrink the cds's location back one residue.
+                if( cds_prod->IsInt() && cds.GetProduct().IsWhole() ) {
+                    const CSeq_id *cds_prod_seq_id = cds.GetProduct().GetId();
+                    if( cds_prod_seq_id != NULL ) {
+                        CBioseq_Handle prod_bioseq_handle = ctx.GetScope().GetBioseqHandle( *cds_prod_seq_id );
+                        if( prod_bioseq_handle ) {
+                            const TSeqPos bioseq_len = prod_bioseq_handle.GetBioseqLength();
+                            if( cds_prod->GetInt().GetTo() >= bioseq_len ) {
+                                cds_prod->SetInt().SetTo( bioseq_len - 1 );
+                            }
+                        }
+                    }
+                }
  
-        SGapIdxData gap_data{};
+                // if there are any gaps in the location, we know that there was an issue with the mapping, so
+                // we fall back on the product.
+                if( s_ContainsGaps(*cds_prod) ) {
+                    cds_prod->Assign( cds.GetProduct() );
+                }
  
-        gap_data.num_gaps = gaps.size();
-        gap_data.next_gap = 0;
+                // remove fuzz
+                cds_prod->SetPartialStart( false, eExtreme_Positional );
+                cds_prod->SetPartialStop ( false, eExtreme_Positional );
  
-        if (gap_data.num_gaps > 0 && ! ctx.Config().HideGapFeatures()) {
-            s_SetGapIdxData (gap_data, gaps);
+                item.Reset(
+                    x_NewFeatureItem(cds, ctx, &*cds_prod, m_Feat_Tree,
+                        CFeatureItem::eMapped_from_cdna) );
+
+                out << item;
+            } catch (CAnnotMapperException& e) {
+                LOG_POST_X(2, Error << e );
+            }
          }
  
-        SSeqMapSelector msel;
-        msel.SetFlags(CSeqMap::fFindAny);
-        CBioseq_Handle bsh = ctx.GetHandle();
+        // look for Prot features (only for RefSeq records or
+        // GenBank not release_mode).
+        if ( ctx.IsRefSeq()  ||  !cfg.ForGBRelease() ) {
+            SAnnotSelector prod_sel(CSeqFeatData::e_Prot, true);
+            prod_sel.SetLimitTSE(ctx.GetHandle().GetTopLevelEntry());
+            prod_sel.SetResolveMethod(SAnnotSelector::eResolve_TSE);
+            prod_sel.SetOverlapType(SAnnotSelector::eOverlap_Intervals);
+            CFeat_CI it(ctx.GetHandle(), prod_sel);
+            ctx.GetFeatTree().AddFeatures(it);
+            for ( ;  it;  ++it) {  
+                item.Reset(x_NewFeatureItem(*it,
+                                            ctx,
+                                            &it->GetProduct(),
+                                            m_Feat_Tree,
+                                            CFeatureItem::eMapped_from_prot) );
+                out << item;
+            }
+        }
+    }
+}
  
-        bool keepGoing = true;
-        bool noFeatsSeen = true;
-        int withoutFeats = 0;
+void CFlatGatherer::x_GatherFeatures(void) const
+{
+    CBioseqContext& ctx = *m_Current;
+    const CFlatFileConfig& cfg = ctx.Config();
  
-        SetDiagFilter(eDiagFilter_All, "!(1305.28,31)");
+    if (cfg.UseSeqEntryIndexer()) {
+        x_GatherFeaturesIdx();
+        return;
+    }
  
-        CConstRef<CSeqMap> seqmap;
-        if (ctx.GetLocation().IsWhole()) {
-            seqmap = &bsh.GetSeqMap();
-        } else {
-            seqmap = CSeqMap::CreateSeqMapForSeq_loc(loc, &ctx.GetScope());
-        }
+    CFlatItemOStream& out = *m_ItemOS;
+    CConstRef<IFlatItem> item;
  
-        for ( CSeqMap_CI seg(seqmap, &ctx.GetScope(), msel); seg; ++seg ) {
-            if (seg.GetType() != CSeqMap::eSeqGap) {
-                if (keepGoing) {
-                    // go over each of the segments
-                    ENa_strand strand = eNa_strand_unknown;
-                    if (seg.GetRefMinusStrand()) {
-                        strand = eNa_strand_minus;
-                    }
-                    // cout << "SEG " << seg.GetType() << " @ " << seg.GetPosition() << " - " << seg.GetEndPosition() << " " << seg.GetLength() << endl;
-                    CRef<CSeq_loc> sl = bsh.GetRangeSeq_loc(seg.GetPosition(), seg.GetEndPosition() - 1, strand);
-                    if (sl) {
-                        size_t count = x_GatherFeaturesOnSegmentIdx(*sl, *selp, ctx);
-                        if (count > 0) {
-                            noFeatsSeen = false;
-                        } else if (ctx.IsEMBL() || ctx.IsDDBJ()) {
-                            withoutFeats++;
-                            if (withoutFeats > 20 && noFeatsSeen) {
-                                keepGoing = false;
-                            }
-                        }
-                    }
-                }
-            } else {
-                // cout << "GAP " << seg.GetType() << " @ " << seg.GetPosition() << " - " << seg.GetEndPosition() << " " << seg.GetLength() << endl;
-                const bool noGapSizeProblem = ( false || (seg.GetPosition() < seg.GetEndPosition()) );
-                if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) {
-                    CConstRef<IFlatItem> item;
-                    if (gap_data.has_gap) {
-                        const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_data.gap_start < gap_data.gap_end) );
-                        if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) {
-                            item.Reset( s_NewGapItem(gap_data.gap_start, gap_data.gap_end, gap_data.gap_length, gap_data.gap_type,
-                                        gap_data.gap_evidence, gap_data.is_unknown_length, gap_data.is_assembly_gap, ctx) );
-                            out << item;
-                        }
-                        if (gap_data.next_gap < gap_data.num_gaps) {
-                            s_SetGapIdxData (gap_data, gaps);
-                        } else {
-                            gap_data.has_gap = false;
-                        }
-                    }
-                }
+    SAnnotSelector sel;
+    SAnnotSelector* selp = &sel;
+    if (ctx.GetAnnotSelector() != NULL) {
+        selp = &ctx.SetAnnotSelector();
+    }
+    s_SetSelection(*selp, ctx);
+
+    // optionally map gene from genomic onto cDNA
+    if ( ctx.IsInGPS()  &&  cfg.CopyGeneToCDNA()  &&
+         ctx.GetBiomol() == CMolInfo::eBiomol_mRNA ) {
+        CMappedFeat mrna = GetMappedmRNAForProduct(ctx.GetHandle());
+        if (mrna) {
+            CMappedFeat gene = GetBestGeneForMrna(mrna, &ctx.GetFeatTree());
+            if (gene) {
+                CRef<CSeq_loc> loc(new CSeq_loc);
+                loc->SetWhole(*ctx.GetPrimaryId());
+                item.Reset( 
+                    x_NewFeatureItem(gene, ctx, loc, m_Feat_Tree,
+                                     CFeatureItem::eMapped_from_genomic) );
+                out << item;
              }
          }
+    }
  
-        SetDiagFilter(eDiagFilter_All, "");
-
+    CSeq_loc loc;
+    if ( ctx.GetMasterLocation() != 0 ) {
+        loc.Assign(*ctx.GetMasterLocation());
      } else {
-        x_GatherFeaturesOnLocation(loc, *selp, ctx);
+        loc.Assign(*ctx.GetHandle().GetRangeSeq_loc(0, 0));
      }
  
+    // collect features
+    x_GatherFeaturesOnLocation(loc, *selp, ctx);
+
      if ( ctx.IsProt() ) {
          // Also collect features which this protein is their product.
          // Currently there are only two possible candidates: Coding regions
@@ -4334,7 +4284,6 @@ void s_FixIntervalProtToCds(
  
  //  ============================================================================
  void CFlatGatherer::x_GetFeatsOnCdsProductIdx(
-    CMappedFeat mf,
      const CSeq_feat& feat,
      CBioseqContext& ctx,
      CRef<CSeq_loc_Mapper> slice_mapper,
@@ -4359,42 +4308,25 @@ void CFlatGatherer::x_GetFeatsOnCdsProductIdx(
  
      CBioseq_Handle  prot;
  
-    prot = scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle());
+    // prot = scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle());
+    prot = scope.GetBioseqHandle(*prot_id);
      // !!! need a flag for fetching far proteins
      if (!prot) {
          return;
      }
-
-    CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
-    if (! idx) {
-        return;
-    }
-    
-    CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (prot);
-    if (! bsx) return;
-
-    /*
      CFeat_CI it(prot, s_GetCdsProductSel(ctx));
      if (!it) {
          return;
      }
      ctx.GetFeatTree().AddFeatures( it ); // !!!
-    */
  
      // map from cds product to nucleotide
      CSeq_loc_Mapper prot_to_cds(feat, CSeq_loc_Mapper::eProductToLocation, &scope);
      prot_to_cds.SetFuzzOption( CSeq_loc_Mapper::fFuzzOption_CStyle );
      
      CSeq_feat_Handle prev;  // keep track of the previous feature
-    /*
-    for ( ; it; ++it )
-    */
-    bsx->IterateFeatures([this, &ctx, &scope, &prev, &cfg, &prot_to_cds, &slice_mapper, &cdsFeatureItem, bsx](CFeatureIndex& sfx) {
-
-        CMappedFeat mf = sfx.GetMappedFeat();
-        CSeq_feat_Handle curr = sfx.GetSeqFeatHandle(); // it->GetSeq_feat_Handle();
-        const CSeq_feat& original_feat = sfx.GetMappedFeat().GetOriginalFeature(); // it->GetOriginalFeature();
-
+    for ( ; it; ++it ) {
+        CSeq_feat_Handle curr = it->GetSeq_feat_Handle();
          const CSeq_loc& curr_loc = curr.GetLocation();
          CSeqFeatData::ESubtype subtype = curr.GetFeatSubtype();
  
@@ -4406,24 +4338,24 @@ void CFlatGatherer::x_GetFeatsOnCdsProductIdx(
              subtype != CSeqFeatData::eSubtype_transit_peptide_aa &&
              subtype != CSeqFeatData::eSubtype_preprotein &&
              subtype != CSeqFeatData::eSubtype_propeptide_aa) {
-            return;
+            continue;
          }
  
-        if ( cfg.HideCDDFeatures()  &&
+        if ( ( cfg.HideCDDFeatures() || ! cfg.ShowCDDFeatures() )  &&
               (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site)  &&
               s_IsCDD(curr) ) {
              // passing this test prevents mapping of COG CDD region features
-            return;
+            continue;
          }
  
          // suppress duplicate features (on protein)
          if (prev  &&  s_IsDuplicateFeatures(curr, prev)) {
-            return;
+            continue;
          }
  
          /// we need to cleanse CDD features
  
-        s_CleanCDDFeature(original_feat);
+        s_CleanCDDFeature(it->GetOriginalFeature());
  
          // map prot location to nuc location
          CRef<CSeq_loc> loc(prot_to_cds.Map(curr_loc));
@@ -4438,20 +4370,20 @@ void CFlatGatherer::x_GetFeatsOnCdsProductIdx(
              }
          }
          if (!loc  ||  loc->IsNull()) {
-            return;
+            continue;
          }
          if ( !s_SeqLocEndsOnBioseq(*loc, ctx, eEndsOnBioseqOpt_AnyPartOfSeqLoc, CSeqFeatData::e_Cdregion) ) {
-            return;
+            continue;
          }
  
          CConstRef<IFlatItem> item;
          // for command-line args "-from" and "-to"
-        CMappedFeat mapped_feat = mf;
+        CMappedFeat mapped_feat = *it;
          if( slice_mapper && loc ) {
              CRange<TSeqPos> range = ctx.GetLocation().GetTotalRange();
              CRef<CSeq_loc> mapped_loc = slice_mapper->Map(*CFeatTrim::Apply(*loc, range));
              if( mapped_loc->IsNull() ) {
-                return;
+                continue;
              }
              CRef<CSeq_feat> feat(new CSeq_feat());
              feat->Assign(mapped_feat.GetMappedFeature());
@@ -4461,7 +4393,7 @@ void CFlatGatherer::x_GetFeatsOnCdsProductIdx(
              loc = mapped_loc;
          }
  
-        item = ConstRef( x_NewFeatureItem(mapped_feat, ctx, 
+        item = ConstRef( x_NewFeatureItem(*it, ctx, 
              s_NormalizeNullsBetween(loc), m_Feat_Tree,
              CFeatureItem::eMapped_from_prot, true,
              cdsFeatureItem ) );
@@ -4469,7 +4401,7 @@ void CFlatGatherer::x_GetFeatsOnCdsProductIdx(
          *m_ItemOS << item;
  
          prev = curr;
-    });  //  end of iterate loop
+    }    
  }
  
  //  ============================================================================
diff --git a/c++/src/objtools/format/gbseq_formatter.cpp b/c++/src/objtools/format/gbseq_formatter.cpp

index 17e698aad87207ec3dbee44fbf032d52939b1ab7..94b008373eb87fc4b337ec4f316c5dcf7cf92cb6 100644 (file)
--- a/c++/src/objtools/format/gbseq_formatter.cpp
+++ b/c++/src/objtools/format/gbseq_formatter.cpp
@@ -1,4 +1,4 @@
-/*  $Id: gbseq_formatter.cpp 601813 2020-02-13 18:41:46Z kans $
+/*  $Id: gbseq_formatter.cpp 614619 2020-08-20 13:00:42Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -806,8 +806,8 @@ void CGBSeqFormatter::FormatReference
          str.append( s_CloseTag("          ", "GBXref"));
          str.append( s_CloseTag("        ", "GBReference_xref"));
      }
-    if ( ref.GetPMID() != 0 ) {
-        str.append( s_CombineStrings("        ", "GBReference_pubmed", ref.GetPMID()));
+    if ( ref.GetPMID() != ZERO_ENTREZ_ID ) {
+        str.append( s_CombineStrings("        ", "GBReference_pubmed", ENTREZ_ID_TO(int, ref.GetPMID())));
      }
      if ( !ref.GetRemark().empty() ) {
          str.append( s_CombineStrings("        ", "GBReference_remark", ref.GetRemark()));
diff --git a/c++/src/objtools/format/genbank_formatter.cpp b/c++/src/objtools/format/genbank_formatter.cpp

index e75d730787c7bfb57752fe1653065c6baf30c557..54474e0c3a4abc23ea5fcde69bba989c34016f89 100644 (file)
--- a/c++/src/objtools/format/genbank_formatter.cpp
+++ b/c++/src/objtools/format/genbank_formatter.cpp
@@ -1,4 +1,4 @@
-/*  $Id: genbank_formatter.cpp 602692 2020-02-28 22:11:47Z kans $
+/*  $Id: genbank_formatter.cpp 615046 2020-08-26 13:40:11Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -523,7 +523,7 @@ void CGenbankFormatter::FormatVersion
          version_line << version.GetAccession();
          if ( version.GetGi() > ZERO_GI ) {
              const CFlatFileConfig& cfg = GetContext().GetConfig();
-            if (! cfg.HideGI()) {
+            if (! (cfg.HideGI() || cfg.IsPolicyFtp())) {
                  version_line << "  GI:" << version.GetGi();
              }
          }
@@ -852,7 +852,7 @@ void CGenbankFormatter::FormatReference
      x_Consortium(l, ref, ctx);
      x_Title(l, ref, ctx);
      x_Journal(l, ref, ctx);
-    if (ref.GetPMID() == 0) {  // suppress MEDLINE if has PUBMED
+    if (ref.GetPMID() == ZERO_ENTREZ_ID) {  // suppress MEDLINE if has PUBMED
          x_Medline(l, ref, ctx);
      }
      x_Pubmed(l, ref, ctx);
@@ -1054,10 +1054,10 @@ void CGenbankFormatter::x_Medline
      bool bHtml = ctx.Config().DoHTML();
  
      string strDummy( "[PUBMED-ID]" );
-    if ( ref.GetMUID() != 0 ) {
+    if ( ref.GetMUID() != ZERO_ENTREZ_ID) {
          Wrap(l, GetWidth(), "MEDLINE", strDummy, eSubp);
      }
-    string strPubmed( NStr::IntToString( ref.GetMUID() ) );
+    string strPubmed( NStr::NumericToString( ref.GetMUID() ) );
      if ( bHtml ) {
          string strLink = "<a href=\"";
          strLink += strLinkBasePubmed;
@@ -1079,10 +1079,10 @@ void CGenbankFormatter::x_Pubmed
   CBioseqContext& ctx) const
  {
      
-    if ( ref.GetPMID() == 0 ) {
+    if ( ref.GetPMID() == ZERO_ENTREZ_ID) {
          return;
      }
-    string strPubmed = NStr::IntToString( ref.GetPMID() );
+    string strPubmed = NStr::NumericToString( ref.GetPMID() );
      if ( ctx.Config().DoHTML() ) {
          string strRaw = strPubmed;
          strPubmed = "<a href=\"https://www.ncbi.nlm.nih.gov/pubmed/";
@@ -1498,7 +1498,7 @@ bool s_GetLinkFeatureKey(
      // assembly of the actual string:
         strLink.reserve(100); // euristical URL length
  #ifdef NEW_HTML_FMT
-    item.GetContext()->Config().GetHTMLFormatter().FormatLocation(strLink, item.GetFeat().GetLocation(), iGi, strRawKey);
+    item.GetContext()->Config().GetHTMLFormatter().FormatLocation(strLink, item.GetFeat().GetLocation(), GI_TO(TIntId, iGi), strRawKey);
  #else
      // check if this is a protein or nucleotide link
      bool is_prot = false;
@@ -1913,7 +1913,7 @@ s_FormatRegularSequencePiece
                  fill(line, line+kLineBufferSize, ' ');
  
                  // add the span stuff
-                TSeqPos length_of_span_before_base_count = 0;
+                length_of_span_before_base_count = 0;
                  if( bHtml ) {
                      string kSpan = " <span class=\"ff_line\" id=\"";
                      kSpan += accn;
diff --git a/c++/src/objtools/format/genbank_gather.cpp b/c++/src/objtools/format/genbank_gather.cpp

index 6ae41e51da43275f1f6d68773fa93675a106641d..374ded8866d69dcc23ab820207e4699249257973 100644 (file)
--- a/c++/src/objtools/format/genbank_gather.cpp
+++ b/c++/src/objtools/format/genbank_gather.cpp
@@ -1,4 +1,4 @@
-/*  $Id: genbank_gather.cpp 602636 2020-02-27 20:27:11Z kans $
+/*  $Id: genbank_gather.cpp 612549 2020-07-23 15:33:36Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -214,7 +214,7 @@ void CGenbankGatherer::x_DoSingleSection(CBioseqContext& ctx) const
      {
          GATHER_VIA_FUNC(Tsa, x_GatherTLS);
      } else if ( ctx.DoContigStyle() ) {
-        if ( cfg.ShowContigFeatures() ) {
+        if ( cfg.ShowContigFeatures() || cfg.IsPolicyFtp() ) {
              GATHER_VIA_FUNC(FeatAndGap, x_GatherFeatures);
          }
          else if ( cfg.IsModeEntrez() && m_Current->GetLocation().IsWhole()) {
diff --git a/c++/src/objtools/format/genome_project_item.cpp b/c++/src/objtools/format/genome_project_item.cpp

index 13a25fdc3713e005b75a5cd185b465696663bdac..734804fdab4a5b103bea3a50778a592e55536a46 100644 (file)
--- a/c++/src/objtools/format/genome_project_item.cpp
+++ b/c++/src/objtools/format/genome_project_item.cpp
@@ -1,4 +1,4 @@
-/*  $Id: genome_project_item.cpp 577454 2019-01-03 22:58:25Z kans $
+/*  $Id: genome_project_item.cpp 615791 2020-09-03 18:19:35Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -205,12 +205,16 @@ void CGenomeProjectItem::x_GatherInfo(CBioseqContext& ctx)
          }
          string strHeader = uo.GetType().GetStr();
          if ( NStr::EqualNocase(strHeader, "GenomeProjectsDB")) {
-            genome_projects_user_obje = &uo;
-                       x_SetObject(*desc);
-               } else if( NStr::EqualNocase( strHeader, "DBLink" ) ) {
-            dblink_user_obj = &uo;
-                       x_SetObject(*desc);
-               }
+            if (! genome_projects_user_obje) {
+                genome_projects_user_obje = &uo;
+                x_SetObject(*desc);
+            }
+        } else if( NStr::EqualNocase( strHeader, "DBLink" ) ) {
+            if (! dblink_user_obj) {
+                dblink_user_obj = &uo;
+                x_SetObject(*desc);
+            }
+        }
      }
  
      // process GenomeProjectsDB
diff --git a/c++/src/objtools/format/inst_info_map.cpp b/c++/src/objtools/format/inst_info_map.cpp

index a6c1df8417ed1b9faa84897b7dc37701706d2843..78e7b30c556f1f3cceeb7748be65b5a8e9d91a2a 100644 (file)
--- a/c++/src/objtools/format/inst_info_map.cpp
+++ b/c++/src/objtools/format/inst_info_map.cpp
@@ -1,4 +1,4 @@
-/* $Id: inst_info_map.cpp 601754 2020-02-12 23:10:12Z kans $
+/* $Id: inst_info_map.cpp 611903 2020-07-13 15:51:00Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -49,7 +49,7 @@ CInstInfoMap::GetInstitutionVoucherInfo(
  
      static const string  s_acbr_base("http://www.acbr-database.at/BioloMICS.aspx?Link=T&DB=0&Table=0&Descr=");
      static const string  s_atcc_base("http://www.atcc.org/Products/All/");
-    static const string  s_bccm_base("http://bccm.belspo.be/catalogues/ulc-strain-details?ACCESSION_NUMBER=ULC");
+    static const string  s_bccm_base("https://bccm.belspo.be/catalogues/bm-details?accession_number=ULC%20");
      static const string  s_bcrc_base("https://catalog.bcrc.firdi.org.tw/BSAS_cart/controller?event=SEARCH&bcrc_no=");
      static const string  s_cas_base("http://collections.calacademy.org/herp/specimen/");
      static const string  s_cbs_base("http://www.cbs.knaw.nl/collections/BioloMICS.aspx?Fields=All&ExactMatch=T&Table=CBS+strain+database&Name=CBS+");
@@ -90,9 +90,11 @@ CInstInfoMap::GetInstitutionVoucherInfo(
  
      static const string yp0("0");
  
+    static const string s_bccm_trim("ULC");
+
      static const string s_colon_pfx(":");
      static const string s_uscr_pfx("_");
-    
+
      static const string s_kui_pfx("KUI/");
      static const string s_kuit_pfx("KUIT/");
      static const string s_psu_pfx("PSU:Mamm:");
@@ -113,114 +115,114 @@ CInstInfoMap::GetInstitutionVoucherInfo(
  
      typedef SStaticPair<const char*, TVoucherInfoRef> TVoucherInfoElem;
      static const TVoucherInfoElem sc_voucher_info_map[] = {
-        { "ACBR",             TVoucherInfoRef(new SVoucherInfo(&s_acbr_base,  false, false, 0, NULL,   NULL,          &s_acbr_sfx, "Austrian Center of Biological Resources and Applied Mycology") ) },
-        { "ATCC",             TVoucherInfoRef(new SVoucherInfo(&s_atcc_base,  false, false, 0, NULL,   NULL,          &s_atcc_sfx, "American Type Culture Collection") ) },
-        { "BCCM",             TVoucherInfoRef(new SVoucherInfo(&s_bccm_base,  false, false, 0, NULL,   NULL,          NULL,        "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
-        { "BCCM/ULC",         TVoucherInfoRef(new SVoucherInfo(&s_bccm_base,  false, false, 0, NULL,   NULL,          NULL,        "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
-        { "BCRC",             TVoucherInfoRef(new SVoucherInfo(&s_bcrc_base,  false, false, 0, NULL,   NULL,          &s_bcrc_sfx, "Bioresource Collection and Research Center") ) },
-        { "CAS:HERP",         TVoucherInfoRef(new SVoucherInfo(&s_cas_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "California Academy of Sciences, Herpetology collection") ) },
-        { "CBS",              TVoucherInfoRef(new SVoucherInfo(&s_cbs_base,   false, false, 0, NULL,   NULL,          NULL,        "Westerdijk Fungal Biodiversity Institute") ) },
-        { "CCAP",             TVoucherInfoRef(new SVoucherInfo(&s_ccap_base,  false, false, 0, NULL,   NULL,          NULL,        "Culture Collection of Algae and Protozoa") ) },
-        { "CCMP",             TVoucherInfoRef(new SVoucherInfo(&s_ccmp_base,  false, false, 0, NULL,   NULL,          NULL,        "Provasoli-Guillard National Center for Culture of Marine Phytoplankton") ) },
-        { "CCUG",             TVoucherInfoRef(new SVoucherInfo(&s_ccug_base,  false, false, 0, NULL,   NULL,          NULL,        "Culture Collection, University of Goteborg, Department of Clinical Bacteriology") ) },
-        { "CFMR",             TVoucherInfoRef(new SVoucherInfo(&s_cfmr_base,  false, false, 0, NULL,   NULL,          NULL,        "USDA Forest Service, Center for Forest Mycology Research") ) },
-        { "CHR",              TVoucherInfoRef(new SVoucherInfo(&s_lcr_base,   true,  false, 0, NULL,   &s_uscr_pfx,   NULL,        "Allan Herbarium, Landcare Research New Zealand Limited") ) },
-        { "CRCM:Bird",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Washington State University, Charles R. Conner Museum, bird collection") ) },
-        { "CUMV:Fish",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Cornell University Museum of Vertebrates, Fish Collection") ) },
-        { "Coriell",          TVoucherInfoRef(new SVoucherInfo(&s_cori_base,  false, false, 0, NULL,   NULL,          NULL,        "Coriell Institute for Medical Research") ) },
-        { "DGR:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Division of Genomic Resources, University of New Mexico, bird tissue collection") ) },
-        { "DGR:Ento",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Division of Genomic Resources, University of New Mexico, entomology tissue collection") ) },
-        { "DGR:Fish",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Division of Genomic Resources, University of New Mexico, fish tissue collection") ) },
-        { "DGR:Herp",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Division of Genomic Resources, University of New Mexico, herpetology tissue collection") ) },
-        { "DGR:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Division of Genomic Resources, University of New Mexico, mammal tissue collection") ) },
-        { "DMNS:Bird",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Denver Museum of Nature and Science, Ornithology Collections") ) },
-        { "DMNS:Mamm",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Denver Museum of Nature and Science, Mammology Collection") ) },
-        { "DMNS:Para",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Denver Museum of Nature and Science, Parasite Collection") ) },
-        { "DSM",              TVoucherInfoRef(new SVoucherInfo(&s_dsm_base,   false, false, 0, NULL,   NULL,          NULL,        "Deutsche Sammlung von Mikroorganismen und Zellkulturen GmbH") ) },
-        { "DSMZ",             TVoucherInfoRef(new SVoucherInfo(&s_dsmz_base,  false, false, 0, NULL,   NULL,          NULL,        "Deutsche Sammlung von Mikroorganismen und Zellkulturen") ) },
-        { "EMEC",             TVoucherInfoRef(new SVoucherInfo(&s_emec_base,  true,  false, 0, NULL,   NULL,          NULL,        "Essig Museum") ) },
-        { "EMEC:EMEC",        TVoucherInfoRef(new SVoucherInfo(&s_emec_base,  false, true,  0, NULL,   NULL,          NULL,        "Essig Museum") ) },
-        { "EMEC:UCIS",        TVoucherInfoRef(new SVoucherInfo(&s_emec_base,  false, true,  0, NULL,   NULL,          NULL,        "Essig Museum") ) },
-        { "FRR",              TVoucherInfoRef(new SVoucherInfo(&s_frr_base,   false, false, 0, NULL,   NULL,          NULL,        "Food Science Australia, Ryde") ) },
-        { "FSU<DEU>",         TVoucherInfoRef(new SVoucherInfo(&s_fsu_base,   false, false, 0, NULL,   NULL,          NULL,        "Jena Microbial Resource Collection") ) },
-        { "ICMP",             TVoucherInfoRef(new SVoucherInfo(&s_lcr_base,   true,  false, 0, NULL,   &s_uscr_pfx,   NULL,        "International Collection of Microorganisms from Plants") ) },
-        { "JCM",              TVoucherInfoRef(new SVoucherInfo(&s_jcm_base,   false, false, 0, NULL,   NULL,          NULL,        "Japan Collection of Microorganisms") ) },
-        { "KCTC",             TVoucherInfoRef(new SVoucherInfo(&s_kctc_base,  false, false, 0, NULL,   NULL,          NULL,        "Korean Collection for Type Cultures") ) },
-        { "KNWR:Ento",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Kenai National Wildlife Refuge, Entomology Collection") ) },
-        { "KU:I",             TVoucherInfoRef(new SVoucherInfo(&s_ku_base,    false, false, 0, NULL,   &s_kui_pfx,    &s_ku_sfx,   "University of Kansas, Museum of Natural History, Ichthyology collection") ) },
-        { "KU:IT",            TVoucherInfoRef(new SVoucherInfo(&s_ku_base,    false, false, 0, NULL,   &s_kuit_pfx,   &s_ku_sfx,   "University of Kansas, Museum of Natural History, Ichthyology tissue collection") ) },
-        { "KWP:Ento",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Kenelm W. Philip Collection, University of Alaska Museum of the North, Lepidoptera collection") ) },
-        { "MAFF",             TVoucherInfoRef(new SVoucherInfo(&s_maff_base,  false, false, 0, NULL,   NULL,          NULL,        "Genebank, Ministry of Agriculture Forestry and Fisheries") ) },
-        { "MCZ:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Ornithology Collection") ) },
-        { "MCZ:Cryo",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Cryogenic Collection") ) },
-        { "MCZ:Ent",          TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Entomology Collection") ) },
-        { "MCZ:Fish",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Icthyology Collection") ) },
-        { "MCZ:Herp",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Herpetology Collection") ) },
-        { "MCZ:IP",           TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Invertebrate Paleontology Collection") ) },
-        { "MCZ:IZ",           TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Invertebrate Zoology Collection") ) },
-        { "MCZ:Ich",          TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Icthyology Collection") ) },
-        { "MCZ:Mala",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Malacology Collection") ) },
-        { "MCZ:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Mammalogy Collection") ) },
-        { "MCZ:Orn",          TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Ornithology Collection") ) },
-        { "MLZ:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Moore Laboratory of Zoology, Occidental College, Bird Collection" ) ) },
-        { "MLZ:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Moore Laboratory of Zoology, Occidental College, Mammal Collection" ) ) },
-        { "MSB:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Museum of Southwestern Biology, Bird Collection") ) },
-        { "MSB:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Museum of Southwestern Biology, Mammal Collection") ) },
-        { "MSB:Para",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Museum of Southwestern Biology, Parasitology Collection") ) },
-        { "MTCC",             TVoucherInfoRef(new SVoucherInfo(&s_mtcc_base,  false, false, 0, NULL,   NULL,          NULL,        "Microbial Type Culture Collection & Gene Bank") ) },
-        { "MUCL",             TVoucherInfoRef(new SVoucherInfo(&s_mucl_base,  false, false, 0, NULL,   NULL,          &s_mucl_sfx, "Mycotheque de l'Universite Catholique de Louvain") ) },
-        { "MVZ:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Bird Collection") ) },
-        { "MVZ:Egg",          TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Egg Collection") ) },
-        { "MVZ:Herp",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) },
-        { "MVZ:Hild",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Milton Hildebrand collection") ) },
-        { "MVZ:Img",          TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Image Collection") ) },
-        { "MVZ:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Mammal Collection") ) },
-        { "MVZ:Page",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Notebook Page Collection") ) },
-        { "MVZObs:Herp",      TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) },
-        { "NBRC",             TVoucherInfoRef(new SVoucherInfo(&s_nbrc_base,  false, false, 8, &yp0,   NULL,          NULL,        "NITE Biological Resource Center") ) },
-        { "NBSB:Bird",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "National Biomonitoring Specimen Bank, U.S. Geological Survey, bird collection") ) },
-        { "NCIMB",            TVoucherInfoRef(new SVoucherInfo(&s_ncimb_base, false, false, 0, NULL,   NULL,          NULL,        "National Collections of Industrial Food and Marine Bacteria (incorporating the NCFB)") ) },
-        { "NCTC",             TVoucherInfoRef(new SVoucherInfo(&s_nctc_base,  false, false, 0, NULL,   NULL,          NULL,        "National Collection of Type Cultures") ) },
-        { "NRRL",             TVoucherInfoRef(new SVoucherInfo(&s_nrrl_base,  false, false, 0, NULL,   NULL,          NULL,        "Agricultural Research Service Culture Collection") ) },
-        { "NRRL:MOLD",        TVoucherInfoRef(new SVoucherInfo(&s_nrrl_mold,  false, false, 0, NULL,   NULL,          NULL,        "Agricultural Research Service Culture Collection, Mold collection") ) },
-        { "NRRL:PROK",        TVoucherInfoRef(new SVoucherInfo(&s_nrrl_prok,  false, false, 0, NULL,   NULL,          NULL,        "Agricultural Research Service Culture Collection, Prokaryotic collection") ) },
-        { "NRRL:YEAST",       TVoucherInfoRef(new SVoucherInfo(&s_nrrl_yest,  false, false, 0, NULL,   NULL,          NULL,        "Agricultural Research Service Culture Collection, Yeast Collection") ) },
-        { "NZAC",             TVoucherInfoRef(new SVoucherInfo(&s_lcr_base,   true,  false, 0, NULL,   &s_uscr_pfx,   NULL,        "New Zealand Arthropod Collection") ) },
-        { "PCC",              TVoucherInfoRef(new SVoucherInfo(&s_pcc_base,   false, false, 0, NULL,   NULL,          NULL,        "Pasteur Culture Collection of Cyanobacteria") ) },
-        { "PCMB",             TVoucherInfoRef(new SVoucherInfo(&s_pcmb_base,  false, false, 0, NULL,   NULL,          NULL,        "The Pacific Center for Molecular Biodiversity") ) },
-        { "PDD",              TVoucherInfoRef(new SVoucherInfo(&s_lcr_base,   true,  false, 0, NULL,   &s_uscr_pfx,   NULL,        "New Zealand Fungarium") ) },
-        { "PSU<USA-OR>:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   false, false, 0, NULL,   &s_psu_pfx,    NULL,        "Portland State University, Vertebrate Biology Museum, Mammal Collection") ) },
-        { "PYCC",             TVoucherInfoRef(new SVoucherInfo(&s_pycc_base,  false, false, 0, NULL,   NULL,          &s_pycc_sfx, "Portuguese Yeast Culture Collection") ) },
-        { "SAG",              TVoucherInfoRef(new SVoucherInfo(&s_sag_base,   false, false, 0, NULL,   NULL,          NULL,        "Sammlung von Algenkulturen at Universitat Gottingen") ) },
-        { "TGRC",             TVoucherInfoRef(new SVoucherInfo(&s_tgrc_base,  false, false, 0, NULL,   NULL,          NULL,        "C.M. Rick Tomato Genetics Resource Center") ) },
-        { "UAM:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Bird Collection") ) },
-        { "UAM:Bryo",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Bryozoan Collection") ) },
-        { "UAM:Crus",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Marine Arthropod Collection") ) },
-        { "UAM:Ento",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Insect Collection") ) },
-        { "UAM:Fish",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Fish Collection") ) },
-        { "UAM:Herb",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, UAM Herbarium") ) },
-        { "UAM:Herp",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Amphibian and Reptile Collection") ) },
-        { "UAM:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Mammal Collection") ) },
-        { "UAM:Moll",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Mollusc Collection") ) },
-        { "UAM:Paleo",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, paleontology collection") ) },
-        { "UAMH",             TVoucherInfoRef(new SVoucherInfo(&s_uamh_base,  false, false, 0, NULL,   NULL,          NULL,        "Centre for Global Microfungal Biodiversity") ) },
-        { "UAMObs:Mamm",      TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Mammal Collection") ) },
-        { "ULC",              TVoucherInfoRef(new SVoucherInfo(&s_bccm_base,  false, false, 0, NULL,   NULL,          NULL,        "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
-        { "USNM:Birds",       TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, Division of Birds") ) },
-        { "USNM:ENT",         TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, Entomology Collection") ) },
-        { "USNM:Fish",        TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, National Fish Collection") ) },
-        { "USNM:Herp",        TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, Division of Amphibians and Reptiles") ) },
-        { "USNM:IZ",          TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, Department of Invertebrate Zoology") ) },
-        { "USNM:MAMM",        TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, Division of Mammals") ) },
-        { "WNMU:Bird",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Western New Mexico University Museum, bird collection") ) },
-        { "WNMU:Fish",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Western New Mexico University Museum, fish collection") ) },
-        { "WNMU:Mamm",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   &s_colon_pfx,  NULL,        "Western New Mexico University Museum, mammal collection") ) },
-        { "YPM:ENT",          TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   &s_ypment_pfx, NULL,        "Yale Peabody Museum of Natural History, Entomology Collection") ) },
-        { "YPM:HER",          TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   &s_ypmher_pfx, NULL,        "Yale Peabody Museum of Natural History, Herpetology Collection") ) },
-        { "YPM:ICH",          TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   &s_ypmich_pfx, NULL,        "Yale Peabody Museum of Natural History, Ichthyology Collection") ) },
-        { "YPM:IZ",           TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   &s_ypmiz_pfx,  NULL,        "Yale Peabody Museum of Natural History, Invertebrate Zoology Collection") ) },
-        { "YPM:MAM",          TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   &s_ypmmam_pfx, NULL,        "Yale Peabody Museum of Natural History, Mammology Collection") ) },
-        { "YPM:ORN",          TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   &s_ypmorn_pfx, NULL,        "Yale Peabody Museum of Natural History, Ornithology Collection") ) }
+        { "ACBR",             TVoucherInfoRef(new SVoucherInfo(&s_acbr_base,  false, false, 0, NULL,   NULL,          NULL,          &s_acbr_sfx, "Austrian Center of Biological Resources and Applied Mycology") ) },
+        { "ATCC",             TVoucherInfoRef(new SVoucherInfo(&s_atcc_base,  false, false, 0, NULL,   NULL,          NULL,          &s_atcc_sfx, "American Type Culture Collection") ) },
+        { "BCCM",             TVoucherInfoRef(new SVoucherInfo(&s_bccm_base,  false, false, 4, &yp0,   NULL,          NULL,          NULL,        "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
+        { "BCCM/ULC",         TVoucherInfoRef(new SVoucherInfo(&s_bccm_base,  false, false, 4, &yp0,   &s_bccm_trim,   NULL,         NULL,        "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
+        { "BCRC",             TVoucherInfoRef(new SVoucherInfo(&s_bcrc_base,  false, false, 0, NULL,   &s_bccm_trim,   NULL,         &s_bcrc_sfx, "Bioresource Collection and Research Center") ) },
+        { "CAS:HERP",         TVoucherInfoRef(new SVoucherInfo(&s_cas_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "California Academy of Sciences, Herpetology collection") ) },
+        { "CBS",              TVoucherInfoRef(new SVoucherInfo(&s_cbs_base,   false, false, 0, NULL,   NULL,          NULL,          NULL,        "Westerdijk Fungal Biodiversity Institute") ) },
+        { "CCAP",             TVoucherInfoRef(new SVoucherInfo(&s_ccap_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Culture Collection of Algae and Protozoa") ) },
+        { "CCMP",             TVoucherInfoRef(new SVoucherInfo(&s_ccmp_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Provasoli-Guillard National Center for Culture of Marine Phytoplankton") ) },
+        { "CCUG",             TVoucherInfoRef(new SVoucherInfo(&s_ccug_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Culture Collection, University of Goteborg, Department of Clinical Bacteriology") ) },
+        { "CFMR",             TVoucherInfoRef(new SVoucherInfo(&s_cfmr_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "USDA Forest Service, Center for Forest Mycology Research") ) },
+        { "CHR",              TVoucherInfoRef(new SVoucherInfo(&s_lcr_base,   true,  false, 0, NULL,   NULL,          &s_uscr_pfx,   NULL,        "Allan Herbarium, Landcare Research New Zealand Limited") ) },
+        { "CRCM:Bird",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Washington State University, Charles R. Conner Museum, bird collection") ) },
+        { "CUMV:Fish",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Cornell University Museum of Vertebrates, Fish Collection") ) },
+        { "Coriell",          TVoucherInfoRef(new SVoucherInfo(&s_cori_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Coriell Institute for Medical Research") ) },
+        { "DGR:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Division of Genomic Resources, University of New Mexico, bird tissue collection") ) },
+        { "DGR:Ento",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Division of Genomic Resources, University of New Mexico, entomology tissue collection") ) },
+        { "DGR:Fish",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Division of Genomic Resources, University of New Mexico, fish tissue collection") ) },
+        { "DGR:Herp",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Division of Genomic Resources, University of New Mexico, herpetology tissue collection") ) },
+        { "DGR:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Division of Genomic Resources, University of New Mexico, mammal tissue collection") ) },
+        { "DMNS:Bird",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Denver Museum of Nature and Science, Ornithology Collections") ) },
+        { "DMNS:Mamm",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Denver Museum of Nature and Science, Mammology Collection") ) },
+        { "DMNS:Para",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Denver Museum of Nature and Science, Parasite Collection") ) },
+        { "DSM",              TVoucherInfoRef(new SVoucherInfo(&s_dsm_base,   false, false, 0, NULL,   NULL,          NULL,          NULL,        "Deutsche Sammlung von Mikroorganismen und Zellkulturen GmbH") ) },
+        { "DSMZ",             TVoucherInfoRef(new SVoucherInfo(&s_dsmz_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Deutsche Sammlung von Mikroorganismen und Zellkulturen") ) },
+        { "EMEC",             TVoucherInfoRef(new SVoucherInfo(&s_emec_base,  true,  false, 0, NULL,   NULL,          NULL,          NULL,        "Essig Museum") ) },
+        { "EMEC:EMEC",        TVoucherInfoRef(new SVoucherInfo(&s_emec_base,  false, true,  0, NULL,   NULL,          NULL,          NULL,        "Essig Museum") ) },
+        { "EMEC:UCIS",        TVoucherInfoRef(new SVoucherInfo(&s_emec_base,  false, true,  0, NULL,   NULL,          NULL,          NULL,        "Essig Museum") ) },
+        { "FRR",              TVoucherInfoRef(new SVoucherInfo(&s_frr_base,   false, false, 0, NULL,   NULL,          NULL,          NULL,        "Food Science Australia, Ryde") ) },
+        { "FSU<DEU>",         TVoucherInfoRef(new SVoucherInfo(&s_fsu_base,   false, false, 0, NULL,   NULL,          NULL,          NULL,        "Jena Microbial Resource Collection") ) },
+        { "ICMP",             TVoucherInfoRef(new SVoucherInfo(&s_lcr_base,   true,  false, 0, NULL,   NULL,          &s_uscr_pfx,   NULL,        "International Collection of Microorganisms from Plants") ) },
+        { "JCM",              TVoucherInfoRef(new SVoucherInfo(&s_jcm_base,   false, false, 0, NULL,   NULL,          NULL,          NULL,        "Japan Collection of Microorganisms") ) },
+        { "KCTC",             TVoucherInfoRef(new SVoucherInfo(&s_kctc_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Korean Collection for Type Cultures") ) },
+        { "KNWR:Ento",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Kenai National Wildlife Refuge, Entomology Collection") ) },
+        { "KU:I",             TVoucherInfoRef(new SVoucherInfo(&s_ku_base,    false, false, 0, NULL,   NULL,          &s_kui_pfx,    &s_ku_sfx,   "University of Kansas, Museum of Natural History, Ichthyology collection") ) },
+        { "KU:IT",            TVoucherInfoRef(new SVoucherInfo(&s_ku_base,    false, false, 0, NULL,   NULL,          &s_kuit_pfx,   &s_ku_sfx,   "University of Kansas, Museum of Natural History, Ichthyology tissue collection") ) },
+        { "KWP:Ento",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Kenelm W. Philip Collection, University of Alaska Museum of the North, Lepidoptera collection") ) },
+        { "MAFF",             TVoucherInfoRef(new SVoucherInfo(&s_maff_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Genebank, Ministry of Agriculture Forestry and Fisheries") ) },
+        { "MCZ:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Ornithology Collection") ) },
+        { "MCZ:Cryo",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Cryogenic Collection") ) },
+        { "MCZ:Ent",          TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Entomology Collection") ) },
+        { "MCZ:Fish",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Icthyology Collection") ) },
+        { "MCZ:Herp",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Herpetology Collection") ) },
+        { "MCZ:IP",           TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Invertebrate Paleontology Collection") ) },
+        { "MCZ:IZ",           TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Invertebrate Zoology Collection") ) },
+        { "MCZ:Ich",          TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Icthyology Collection") ) },
+        { "MCZ:Mala",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Malacology Collection") ) },
+        { "MCZ:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Mammalogy Collection") ) },
+        { "MCZ:Orn",          TVoucherInfoRef(new SVoucherInfo(&s_mcz_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Harvard Museum of Comparative Zoology, Ornithology Collection") ) },
+        { "MLZ:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Moore Laboratory of Zoology, Occidental College, Bird Collection" ) ) },
+        { "MLZ:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Moore Laboratory of Zoology, Occidental College, Mammal Collection" ) ) },
+        { "MSB:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Museum of Southwestern Biology, Bird Collection") ) },
+        { "MSB:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Museum of Southwestern Biology, Mammal Collection") ) },
+        { "MSB:Para",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Museum of Southwestern Biology, Parasitology Collection") ) },
+        { "MTCC",             TVoucherInfoRef(new SVoucherInfo(&s_mtcc_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Microbial Type Culture Collection & Gene Bank") ) },
+        { "MUCL",             TVoucherInfoRef(new SVoucherInfo(&s_mucl_base,  false, false, 0, NULL,   NULL,          NULL,          &s_mucl_sfx, "Mycotheque de l'Universite Catholique de Louvain") ) },
+        { "MVZ:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Bird Collection") ) },
+        { "MVZ:Egg",          TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Egg Collection") ) },
+        { "MVZ:Herp",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) },
+        { "MVZ:Hild",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Milton Hildebrand collection") ) },
+        { "MVZ:Img",          TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Image Collection") ) },
+        { "MVZ:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Mammal Collection") ) },
+        { "MVZ:Page",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Notebook Page Collection") ) },
+        { "MVZObs:Herp",      TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) },
+        { "NBRC",             TVoucherInfoRef(new SVoucherInfo(&s_nbrc_base,  false, false, 8, &yp0,   &yp0,          NULL,          NULL,        "NITE Biological Resource Center") ) },
+        { "NBSB:Bird",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "National Biomonitoring Specimen Bank, U.S. Geological Survey, bird collection") ) },
+        { "NCIMB",            TVoucherInfoRef(new SVoucherInfo(&s_ncimb_base, false, false, 0, NULL,   NULL,          NULL,          NULL,        "National Collections of Industrial Food and Marine Bacteria (incorporating the NCFB)") ) },
+        { "NCTC",             TVoucherInfoRef(new SVoucherInfo(&s_nctc_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "National Collection of Type Cultures") ) },
+        { "NRRL",             TVoucherInfoRef(new SVoucherInfo(&s_nrrl_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Agricultural Research Service Culture Collection") ) },
+        { "NRRL:MOLD",        TVoucherInfoRef(new SVoucherInfo(&s_nrrl_mold,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Agricultural Research Service Culture Collection, Mold collection") ) },
+        { "NRRL:PROK",        TVoucherInfoRef(new SVoucherInfo(&s_nrrl_prok,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Agricultural Research Service Culture Collection, Prokaryotic collection") ) },
+        { "NRRL:YEAST",       TVoucherInfoRef(new SVoucherInfo(&s_nrrl_yest,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Agricultural Research Service Culture Collection, Yeast Collection") ) },
+        { "NZAC",             TVoucherInfoRef(new SVoucherInfo(&s_lcr_base,   true,  false, 0, NULL,   NULL,          &s_uscr_pfx,   NULL,        "New Zealand Arthropod Collection") ) },
+        { "PCC",              TVoucherInfoRef(new SVoucherInfo(&s_pcc_base,   false, false, 0, NULL,   NULL,          NULL,          NULL,        "Pasteur Culture Collection of Cyanobacteria") ) },
+        { "PCMB",             TVoucherInfoRef(new SVoucherInfo(&s_pcmb_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "The Pacific Center for Molecular Biodiversity") ) },
+        { "PDD",              TVoucherInfoRef(new SVoucherInfo(&s_lcr_base,   true,  false, 0, NULL,   NULL,          &s_uscr_pfx,   NULL,        "New Zealand Fungarium") ) },
+        { "PSU<USA-OR>:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   false, false, 0, NULL,   NULL,          &s_psu_pfx,    NULL,        "Portland State University, Vertebrate Biology Museum, Mammal Collection") ) },
+        { "PYCC",             TVoucherInfoRef(new SVoucherInfo(&s_pycc_base,  false, false, 0, NULL,   NULL,          NULL,          &s_pycc_sfx, "Portuguese Yeast Culture Collection") ) },
+        { "SAG",              TVoucherInfoRef(new SVoucherInfo(&s_sag_base,   false, false, 0, NULL,   NULL,          NULL,          NULL,        "Sammlung von Algenkulturen at Universitat Gottingen") ) },
+        { "TGRC",             TVoucherInfoRef(new SVoucherInfo(&s_tgrc_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "C.M. Rick Tomato Genetics Resource Center") ) },
+        { "UAM:Bird",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Bird Collection") ) },
+        { "UAM:Bryo",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Bryozoan Collection") ) },
+        { "UAM:Crus",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Marine Arthropod Collection") ) },
+        { "UAM:Ento",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Insect Collection") ) },
+        { "UAM:Fish",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Fish Collection") ) },
+        { "UAM:Herb",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, UAM Herbarium") ) },
+        { "UAM:Herp",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Amphibian and Reptile Collection") ) },
+        { "UAM:Mamm",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Mammal Collection") ) },
+        { "UAM:Moll",         TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Mollusc Collection") ) },
+        { "UAM:Paleo",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, paleontology collection") ) },
+        { "UAMH",             TVoucherInfoRef(new SVoucherInfo(&s_uamh_base,  false, false, 0, NULL,   NULL,          NULL,          NULL,        "Centre for Global Microfungal Biodiversity") ) },
+        { "UAMObs:Mamm",      TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "University of Alaska, Museum of the North, Mammal Collection") ) },
+        { "ULC",              TVoucherInfoRef(new SVoucherInfo(&s_bccm_base,  false, false, 4, &yp0,   &s_bccm_trim,  NULL,          NULL,        "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
+        { "USNM:Birds",       TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   NULL,          &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, Division of Birds") ) },
+        { "USNM:ENT",         TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   NULL,          &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, Entomology Collection") ) },
+        { "USNM:Fish",        TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   NULL,          &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, National Fish Collection") ) },
+        { "USNM:Herp",        TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   NULL,          &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, Division of Amphibians and Reptiles") ) },
+        { "USNM:IZ",          TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   NULL,          &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, Department of Invertebrate Zoology") ) },
+        { "USNM:MAMM",        TVoucherInfoRef(new SVoucherInfo(&s_usnm_base,  false, true,  0, NULL,   NULL,          &s_colon_pfx,  NULL,        "National Museum of Natural History, Smithsonian Institution, Division of Mammals") ) },
+        { "WNMU:Bird",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Western New Mexico University Museum, bird collection") ) },
+        { "WNMU:Fish",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Western New Mexico University Museum, fish collection") ) },
+        { "WNMU:Mamm",        TVoucherInfoRef(new SVoucherInfo(&s_uam_base,   true,  false, 0, NULL,   NULL,          &s_colon_pfx,  NULL,        "Western New Mexico University Museum, mammal collection") ) },
+        { "YPM:ENT",          TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   NULL,          &s_ypment_pfx, NULL,        "Yale Peabody Museum of Natural History, Entomology Collection") ) },
+        { "YPM:HER",          TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   NULL,          &s_ypmher_pfx, NULL,        "Yale Peabody Museum of Natural History, Herpetology Collection") ) },
+        { "YPM:ICH",          TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   NULL,          &s_ypmich_pfx, NULL,        "Yale Peabody Museum of Natural History, Ichthyology Collection") ) },
+        { "YPM:IZ",           TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   NULL,          &s_ypmiz_pfx,  NULL,        "Yale Peabody Museum of Natural History, Invertebrate Zoology Collection") ) },
+        { "YPM:MAM",          TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   NULL,          &s_ypmmam_pfx, NULL,        "Yale Peabody Museum of Natural History, Mammology Collection") ) },
+        { "YPM:ORN",          TVoucherInfoRef(new SVoucherInfo(&s_ypm_base,   false, false, 6, &yp0,   NULL,          &s_ypmorn_pfx, NULL,        "Yale Peabody Museum of Natural History, Ornithology Collection") ) }
      };
      typedef CStaticArrayMap<const char*, TVoucherInfoRef, PCase_CStr> TVoucherInfoMap;
      DEFINE_STATIC_ARRAY_MAP(TVoucherInfoMap, sc_VoucherInfoMap, sc_voucher_info_map);
diff --git a/c++/src/objtools/format/inst_info_map.hpp b/c++/src/objtools/format/inst_info_map.hpp

index e8a6ab5243bcd9e64e154f531f30da0c1c03ba86..861b3fdaef7d48f318c3a3e31cf0b3df68724cdc 100644 (file)
--- a/c++/src/objtools/format/inst_info_map.hpp
+++ b/c++/src/objtools/format/inst_info_map.hpp
@@ -1,4 +1,4 @@
-/* $Id: inst_info_map.hpp 567275 2018-07-16 20:27:34Z kans $
+/* $Id: inst_info_map.hpp 611903 2020-07-13 15:51:00Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -49,6 +49,7 @@ public:
              bool          prependCollection,
              int           pad_to,
              const string *pad_with,
+            const string *trim,
              const string *prefix,
              const string *suffix,
              const char   *inst_full_name ): 
@@ -57,6 +58,7 @@ public:
              m_PrependCollection(prependCollection),
              m_PadTo(pad_to),
              m_PadWith(pad_with),
+            m_Trim(trim),
              m_Prefix(prefix),
              m_Suffix(suffix),
              m_InstFullName(inst_full_name) { }
@@ -66,6 +68,7 @@ public:
          bool          m_PrependCollection;
          int           m_PadTo;
          const string *m_PadWith;
+        const string *m_Trim;
          const string *m_Prefix;
          const string *m_Suffix;
          const char   *m_InstFullName;
diff --git a/c++/src/objtools/format/primary_item.cpp b/c++/src/objtools/format/primary_item.cpp

index 1d0937d22654a878cde5da183b3efe5e4b260f96..7d3c5aa7912618ae5d1fd95b969060321531ad54 100644 (file)
--- a/c++/src/objtools/format/primary_item.cpp
+++ b/c++/src/objtools/format/primary_item.cpp
@@ -1,4 +1,4 @@
-/*  $Id: primary_item.cpp 577454 2019-01-03 22:58:25Z kans $
+/*  $Id: primary_item.cpp 610065 2020-06-10 17:10:26Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -203,13 +203,15 @@ void CPrimaryItem::x_GetStrForPrimary(CBioseqContext& ctx)
      
      string str;
      string s;
-    s.reserve(80);
+    string r;
+    s.reserve(82);
      CConstRef<CSeq_id> other_id;
  
      TSignedSeqPos last_stop = -1;
  
      ITERATE( TAlnConstList, it, seglist ) {
          s.erase();
+        r.erase();
          const CSeq_align& align = **it;
  
          TSeqPos this_start = align.GetSeqStart(0);
@@ -287,13 +289,18 @@ void CPrimaryItem::x_GetStrForPrimary(CBioseqContext& ctx)
          }
          s += tid;
          s.resize(39, ' ');
-        s += NStr::IntToString(align.GetSeqStart(1) + 1) + '-' +
+        r = NStr::IntToString(align.GetSeqStart(1) + 1) + '-' +
              NStr::IntToString(align.GetSeqStop(1) + 1);
+        s += r;
  
          ENa_strand s0 = align.GetSeqStrand(0);
          ENa_strand s1 = align.GetSeqStrand(1);
          if (s0 != s1) {
-            s.resize(59, ' ');
+            if (r.length() > 20) {
+                s.resize(61, ' ');
+            } else {
+                s.resize(59, ' ');
+            }
              s += 'c';
          }
  
diff --git a/c++/src/objtools/format/qualifiers.cpp b/c++/src/objtools/format/qualifiers.cpp

index ea9d96019aeb4a7e2d0ec6d59f08e7d040a72d00..737aabfb16a5a6730ec3d89032312985d5b9ed27 100644 (file)
--- a/c++/src/objtools/format/qualifiers.cpp
+++ b/c++/src/objtools/format/qualifiers.cpp
@@ -1,4 +1,4 @@
-/*  $Id: qualifiers.cpp 578574 2019-01-22 18:30:29Z kans $
+/*  $Id: qualifiers.cpp 615036 2020-08-26 13:38:52Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -144,33 +144,26 @@ static string s_GetGOText(
          }
      } else { 
          bool add_dash = false;
+        // RW-922 only make one link from GO:id - text.
+        go_text.clear();
          if (go_id != NULL) {
-            go_text = string( "GO:" );
              if( is_html ) {
                  go_text += "<a href=\"";
                  go_text += strLinkBaseGeneOntology + *go_id + "\">";
              }
+            go_text += string( "GO:" );
              go_text += *go_id;
-            if( is_html ) {
-                go_text += "</a>";
-            }
              add_dash = true;
-        } else {
-            go_text.clear();
          }
          if ( text_string != 0 && text_string->length() > 0 ) {
              if (add_dash) {
                go_text += string( " - " );
              }
-            if( is_html && go_id != NULL ) {
-                go_text += "<a href=\"";
-                go_text += strLinkBaseGeneOntology + *go_id + "\">";
-            }
              // NO, we NO LONGER have the dash here even if there's no go_id (RETAIN compatibility with CHANGE in C)
              go_text += *text_string;
-            if( is_html && go_id != NULL ) {
-                go_text += "</a>";
-            }
+        }
+        if( is_html && go_id != NULL ) {
+            go_text += "</a>";
          }
          if ( evidence != 0 ) {
              go_text += string( " [Evidence " ) + *evidence + string( "]" );
@@ -1035,11 +1028,11 @@ void CFlatPubSetQVal::Format(TFlatQuals& q, const CTempString& name,
                  string value;
                  string pub_id_str;
                  int serial = (*ref_iter)->GetSerial();
-                int pmid = (*ref_iter)->GetPMID();
+                TEntrezId pmid = (*ref_iter)->GetPMID();
                  if (serial) {
                      pub_id_str = NStr::IntToString(serial);
-                } else if (pmid) {
-                    pub_id_str = NStr::IntToString(pmid);
+                } else if (pmid != ZERO_ENTREZ_ID) {
+                    pub_id_str = NStr::NumericToString(pmid);
                  }
                  /*
                  string pub_id_str =
@@ -1047,10 +1040,10 @@ void CFlatPubSetQVal::Format(TFlatQuals& q, const CTempString& name,
                       NStr::IntToString((*ref_iter)->GetSerial()));
                  */
  
-                if(bHtml && pmid) {
+                if(bHtml && pmid != ZERO_ENTREZ_ID) {
                      // create a link
                      value  = "[<a href=\"";
-                    value += strLinkBasePubmed + NStr::IntToString(pmid) + "\">" + pub_id_str + "</a>]";
+                    value += strLinkBasePubmed + NStr::NumericToString(pmid) + "\">" + pub_id_str + "</a>]";
                  } else {
                      value = '[' + pub_id_str + ']';
                  }
@@ -1069,7 +1062,7 @@ void CFlatPubSetQVal::Format(TFlatQuals& q, const CTempString& name,
          CPub_set_Base::TPub::iterator pub_iter = unusedPubs.begin();
          for (; pub_iter != unusedPubs.end(); ++pub_iter) {
              if ((*pub_iter)->IsPmid()) {
-                const int pmid = (*pub_iter)->GetPmid().Get();
+                const TEntrezId pmid = (*pub_iter)->GetPmid().Get();
                  string pmid_str = NStr::NumericToString(pmid);
                  pubmed = "[PUBMED ";
                  if (bHtml) {
@@ -1118,7 +1111,7 @@ void CFlatSeqIdQVal::Format(TFlatQuals& q, const CTempString& name,
      if ( m_Value->IsGi() ) {
          if ( m_GiPrefix ) {
              id_str = "GI:";
-            if (ctx.Config().HideGI() && name == "db_xref") return;
+            if ((ctx.Config().HideGI() || ctx.Config().IsPolicyFtp()) && name == "db_xref") return;
          }
          m_Value->GetLabel(&id_str, CSeq_id::eContent);
      } else {
diff --git a/c++/src/objtools/format/reference_item.cpp b/c++/src/objtools/format/reference_item.cpp

index 50cef023112aa812259765e970286f068cb678f8..0f5856376f8ef086c9b2adc735fb2bb7064bc651 100644 (file)
--- a/c++/src/objtools/format/reference_item.cpp
+++ b/c++/src/objtools/format/reference_item.cpp
@@ -1,4 +1,4 @@
-/*  $Id: reference_item.cpp 604101 2020-03-23 12:20:44Z ivanov $
+/*  $Id: reference_item.cpp 615039 2020-08-26 13:39:14Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -97,10 +97,10 @@ private:
  /////////////////////////////////////////////////////////////////////////////
  
  CCacheItem::CCacheItem(CBioseqContext& ctx, TCache csh, int length, bool is_prot) :
+    CFlatItem(&ctx),
      m_Cache(csh),
      m_Length(length),
-    m_IsProt(is_prot),
-    CFlatItem(&ctx)
+    m_IsProt(is_prot)
  {
  }
  
@@ -192,7 +192,7 @@ void CReferenceItem::FormatAffil(const CAffil& affil, string& result, bool gen_s
  
  CReferenceItem::CReferenceItem(const CSeqdesc& desc, CBioseqContext& ctx) :
      CFlatItem(&ctx), m_PubType(ePub_not_set), m_Category(eUnknown),
-    m_PatentId(0), m_PMID(0), m_MUID(0), m_Serial(kMax_Int),
+    m_PatentId(0), m_PMID(ZERO_ENTREZ_ID), m_MUID(ZERO_ENTREZ_ID), m_Serial(kMax_Int),
      m_JustUids(true), m_Elect(false)
  {
      _ASSERT(desc.IsPub());
@@ -215,7 +215,7 @@ CReferenceItem::CReferenceItem
   CBioseqContext& ctx,
   const CSeq_loc* loc) :
      CFlatItem(&ctx), m_PubType(ePub_not_set), m_Category(eUnknown),
-    m_PatentId(0), m_PMID(0), m_MUID(0), m_Serial(kMax_Int),
+    m_PatentId(0), m_PMID(ZERO_ENTREZ_ID), m_MUID(ZERO_ENTREZ_ID), m_Serial(kMax_Int),
      m_JustUids(true), m_Elect(false)
  {
      _ASSERT(feat.GetData().IsPub());
@@ -242,7 +242,7 @@ CReferenceItem::CReferenceItem
  
  CReferenceItem::CReferenceItem(const CSubmit_block& sub, CBioseqContext& ctx) :
      CFlatItem(&ctx), m_PubType(ePub_sub), m_Category(eSubmission),
-    m_PatentId(0), m_PMID(0), m_MUID(0), m_Serial(kMax_Int),
+    m_PatentId(0), m_PMID(ZERO_ENTREZ_ID), m_MUID(ZERO_ENTREZ_ID), m_Serial(kMax_Int),
      m_JustUids(false), m_Elect(false)
  {
      x_SetObject(sub);
@@ -322,12 +322,12 @@ static bool s_ShouldRemoveRef
      }}
  
      // same PMID ( and overlap )
-    if( curr_ref.GetPMID() != 0 && prev_ref.GetPMID() != 0 ) {
+    if( curr_ref.GetPMID() != ZERO_ENTREZ_ID && prev_ref.GetPMID() != ZERO_ENTREZ_ID) {
          return ( curr_ref.GetPMID() == prev_ref.GetPMID() );
      }
          
      // same MUID ( and overlap )
-    if( curr_ref.GetMUID() != 0 && prev_ref.GetMUID() != 0 ) {
+    if( curr_ref.GetMUID() != ZERO_ENTREZ_ID && prev_ref.GetMUID() != ZERO_ENTREZ_ID) {
          return ( curr_ref.GetMUID() == prev_ref.GetMUID() );
      }
  
@@ -383,8 +383,8 @@ static void s_CombineRefs
      }}
  
      // most merging ops are only done if muid or pmid match
-    const bool same_muid = ( curr_ref.GetMUID() != 0 && (prev_ref.GetMUID() == curr_ref.GetMUID()) );
-    const bool same_pmid = ( curr_ref.GetPMID() != 0 && (prev_ref.GetPMID() == curr_ref.GetPMID()) );
+    const bool same_muid = ( curr_ref.GetMUID() != ZERO_ENTREZ_ID && (prev_ref.GetMUID() == curr_ref.GetMUID()) );
+    const bool same_pmid = ( curr_ref.GetPMID() != ZERO_ENTREZ_ID && (prev_ref.GetPMID() == curr_ref.GetPMID()) );
      if( (same_muid || same_pmid) &&
          ( prev_ref.GetRemark() != curr_ref.GetRemark() )  ) 
      {
@@ -612,7 +612,7 @@ bool CReferenceItem::Matches(const CPub& pub) const
          {{
              // you can only compare on unique string if the reference
              // does not have a pmid or muid (example accession: L40362.1)
-            if( GetMUID() == 0 && GetPMID() == 0 ) {
+            if( GetMUID() == ZERO_ENTREZ_ID && GetPMID() == ZERO_ENTREZ_ID) {
                  x_CreateUniqueStr();
                  const string& uniquestr = m_UniqueStr;
  
@@ -681,7 +681,7 @@ void CReferenceItem::x_GatherInfo(CBioseqContext& ctx)
                  switch(pub.Which()) {
                  case CPub::e_Pmid:
                      {
-                        const int pmid = pub.GetPmid().Get();
+                        const TEntrezId pmid = pub.GetPmid().Get();
  
                          CPubMedId req(pmid);
                          CMLAClient::TReply reply;
@@ -690,7 +690,7 @@ void CReferenceItem::x_GatherInfo(CBioseqContext& ctx)
                      break;
                  case CPub::e_Muid:
                      {
-                        const int muid = pub.GetMuid();
+                        const TEntrezId muid = pub.GetMuid();
                          // RW-1040: removed mlaClient.AskUidtopmid and AskGetpubpmid
                      }
                      break;
@@ -755,7 +755,7 @@ void CReferenceItem::x_Init(const CPub& pub, CBioseqContext& ctx)
          break;
  
      case CPub::e_Muid:
-        if (m_MUID == 0) {
+        if (m_MUID == ZERO_ENTREZ_ID) {
              m_MUID = pub.GetMuid();
              m_Category = ePublished;
          }
@@ -800,8 +800,8 @@ void CReferenceItem::x_Init(const CPub& pub, CBioseqContext& ctx)
          break;
  
      case CPub::e_Pmid:
-        if (m_PMID == 0) {
-            m_PMID = pub.GetPmid();
+        if (m_PMID == ZERO_ENTREZ_ID) {
+            m_PMID = pub.GetPmid().Get();
              m_Category = ePublished;
          }
          break;
@@ -882,13 +882,13 @@ void CReferenceItem::x_Init(const CCit_gen& gen, CBioseqContext& ctx)
      }
  
      // MUID
-    if (gen.CanGetMuid()  &&  m_MUID == 0) {
+    if (gen.CanGetMuid()  &&  m_MUID == ZERO_ENTREZ_ID) {
          m_MUID = gen.GetMuid();
      }
      
      // PMID
-    if (gen.CanGetPmid()  &&  m_PMID == 0) {
-        m_PMID = gen.GetPmid();
+    if (gen.CanGetPmid()  &&  m_PMID == ZERO_ENTREZ_ID) {
+        m_PMID = gen.GetPmid().Get();
      }
  }
  
@@ -922,12 +922,12 @@ void CReferenceItem::x_Init(const CMedline_entry& mle, CBioseqContext& ctx)
  {
      m_Category = ePublished;
  
-    if (mle.CanGetUid()  &&  m_MUID == 0) {
+    if (mle.CanGetUid()  &&  m_MUID == ZERO_ENTREZ_ID) {
          m_MUID = mle.GetUid();
      }
  
-    if (mle.CanGetPmid()  &&  m_PMID == 0) {
-        m_PMID = mle.GetPmid();
+    if (mle.CanGetPmid()  &&  m_PMID == ZERO_ENTREZ_ID) {
+        m_PMID = mle.GetPmid().Get();
      }
  
      if (mle.CanGetCit()) {
@@ -1033,13 +1033,13 @@ void CReferenceItem::x_Init(const CCit_art& art, CBioseqContext& ctx)
          ITERATE (CArticleIdSet::Tdata, it, art.GetIds().Get()) {
              switch ((*it)->Which()) {
              case CArticleId::e_Pubmed:
-                if (m_PMID == 0) {
-                    m_PMID = (*it)->GetPubmed();
+                if (m_PMID == ZERO_ENTREZ_ID) {
+                    m_PMID = (*it)->GetPubmed().Get();
                  }
                  break;
              case CArticleId::e_Medline:
-                if (m_MUID == 0) {
-                    m_MUID = (*it)->GetMedline();
+                if (m_MUID == ZERO_ENTREZ_ID) {
+                    m_MUID = (*it)->GetMedline().Get();
                  }
                  break;
              case CArticleId::e_Doi:
@@ -1662,10 +1662,10 @@ void CReferenceItem::x_GatherRemark(CBioseqContext& ctx)
                              // no DOIs pritned if there's a pmid or muid
                              bool hasPmidOrMuid = false;
                              ITERATE( CArticleIdSet_Base::Tdata, it, ids.Get() ) {
-                                if( (*it)->IsPubmed() && (*it)->GetPubmed().Get() != 0 ) {
+                                if( (*it)->IsPubmed() && (*it)->GetPubmed().Get() != ZERO_ENTREZ_ID ) {
                                      hasPmidOrMuid = true;
                                      break;
-                                } else if(  (*it)->IsMedline() && (*it)->GetMedline().Get() != 0 ) {
+                                } else if(  (*it)->IsMedline() && (*it)->GetMedline().Get() != ZERO_ENTREZ_ID ) {
                                      hasPmidOrMuid = true;
                                      break;
                                  }
@@ -1822,20 +1822,20 @@ bool LessThan::operator()
      // after: dates are the same, or both missing.
      
      // distinguish by uids (swap order for RefSeq)
-    if ( ref1->GetPMID() != 0  &&  ref2->GetPMID() != 0  &&
+    if ( ref1->GetPMID() != ZERO_ENTREZ_ID &&  ref2->GetPMID() != ZERO_ENTREZ_ID &&
           !(ref1->GetPMID() == ref2->GetPMID()) ) {
          return m_IsRefSeq ? (ref1->GetPMID() > ref2->GetPMID()) :
              (ref1->GetPMID() < ref2->GetPMID());
      }
-    if ( ref1->GetMUID() != 0  &&  ref2->GetMUID() != 0  &&
+    if ( ref1->GetMUID() != ZERO_ENTREZ_ID &&  ref2->GetMUID() != ZERO_ENTREZ_ID &&
           !(ref1->GetMUID() == ref2->GetMUID()) ) {
          return m_IsRefSeq ? (ref1->GetMUID() > ref2->GetMUID()) :
              (ref1->GetMUID() < ref2->GetMUID());
      }
  
      // just uids goes last
-    if ( (ref1->GetPMID() != 0  &&  ref2->GetPMID() != 0)  ||
-         (ref1->GetMUID() != 0  &&  ref2->GetMUID() != 0) ) {
+    if ( (ref1->GetPMID() != ZERO_ENTREZ_ID &&  ref2->GetPMID() != ZERO_ENTREZ_ID)  ||
+         (ref1->GetMUID() != ZERO_ENTREZ_ID &&  ref2->GetMUID() != ZERO_ENTREZ_ID) ) {
          if ( ref1->IsJustUids()  &&  !ref2->IsJustUids() ) {
              return true;
          } else if ( !ref1->IsJustUids()  &&  ref2->IsJustUids() ) {
diff --git a/c++/src/objtools/format/source_item.cpp b/c++/src/objtools/format/source_item.cpp

index 854d7f96fcac8115fd9d01f04a579950d52f244b..ce6101e2ae7fb26629c2428ed8a8e7a0897b1b1c 100644 (file)
--- a/c++/src/objtools/format/source_item.cpp
+++ b/c++/src/objtools/format/source_item.cpp
@@ -1,4 +1,4 @@
-/*  $Id: source_item.cpp 577454 2019-01-03 22:58:25Z kans $
+/*  $Id: source_item.cpp 614736 2020-08-21 13:43:48Z fukanchi $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -52,7 +52,7 @@ BEGIN_NCBI_SCOPE
  BEGIN_SCOPE(objects)
  
  
-const CSourceItem::TTaxid CSourceItem::kInvalidTaxid = -1;
+const CSourceItem::TTaxid CSourceItem::kInvalidTaxid = INVALID_TAX_ID;
  
  
  ///////////////////////////////////////////////////////////////////////////
@@ -521,7 +521,7 @@ void CSourceItem::x_SetSource
      // Taxid
      {{
          TTaxid taxid = org.GetTaxId();
-        if (taxid != 0) {
+        if (taxid != ZERO_TAX_ID) {
              m_Taxid = taxid;
          }
      }}
diff --git a/c++/src/objtools/logging/listener.cpp b/c++/src/objtools/logging/listener.cpp

index 5b235b8a1031fe7451106631eb6c89a974ae06b7..c059b36cdf27c9bc0632b7ad63824cd9baf0b207 100644 (file)
--- a/c++/src/objtools/logging/listener.cpp
+++ b/c++/src/objtools/logging/listener.cpp
@@ -1,5 +1,5 @@
  
-/*  $Id: listener.cpp 600608 2020-01-23 17:32:17Z foleyjp $
+/*  $Id: listener.cpp 608330 2020-05-14 16:03:45Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -52,7 +52,7 @@ CObjtoolsListener::~CObjtoolsListener() = default;
  bool 
  CObjtoolsListener::PutMessage(const IObjtoolsMessage& message)
  {
-    m_Messages.emplace_back(dynamic_cast<IObjtoolsMessage*>(message.Clone()));
+    m_Messages.emplace_back(message.Clone());
      return true;
  }
  
diff --git a/c++/src/objtools/pubseq_gateway/client/psg_client.cpp b/c++/src/objtools/pubseq_gateway/client/psg_client.cpp

index 3e14dd1e11b15d1cd937006b06c1bc5e4a27cff5..8fd28d603764d4b92f196b1a9870cc664dbfcc82 100644 (file)
--- a/c++/src/objtools/pubseq_gateway/client/psg_client.cpp
+++ b/c++/src/objtools/pubseq_gateway/client/psg_client.cpp
@@ -1,4 +1,4 @@
-/*  $Id: psg_client.cpp 605160 2020-04-07 18:06:40Z ivanov $
+/*  $Id: psg_client.cpp 612393 2020-07-21 13:51:24Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -308,6 +308,7 @@ string CPSG_Queue::SImpl::x_GetAbsPathRef(shared_ptr<const CPSG_Request> user_re
      }
  
      os << ioc.GetClientId();
+    if (const auto hops = user_request->m_Hops) os << "&hops=" << hops;
      return os.str();
  }
  
@@ -1060,6 +1061,11 @@ bool CPSG_Queue::IsEmpty() const
      return m_Impl->Empty();
  }
  
+CPSG_Queue::TApiLock CPSG_Queue::GetApiLock()
+{
+    return SImpl::GetApiLock();
+}
+
  
  END_NCBI_SCOPE
  
diff --git a/c++/src/objtools/pubseq_gateway/client/psg_client_impl.hpp b/c++/src/objtools/pubseq_gateway/client/psg_client_impl.hpp

index e345f7af7203ab9fdc405457636f21f2d4e964a5..37ee6008952762d103f8909175b9247708f96094 100644 (file)
--- a/c++/src/objtools/pubseq_gateway/client/psg_client_impl.hpp
+++ b/c++/src/objtools/pubseq_gateway/client/psg_client_impl.hpp
@@ -1,7 +1,7 @@
  #ifndef OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_IMPL_HPP
  #define OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_IMPL_HPP
  
-/*  $Id: psg_client_impl.hpp 598004 2019-12-02 22:13:17Z sadyrovr $
+/*  $Id: psg_client_impl.hpp 612393 2020-07-21 13:51:24Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -93,6 +93,8 @@ struct CPSG_Queue::SImpl : CPSG_WaitingStack<shared_ptr<CPSG_Reply>>
  
      bool SendRequest(shared_ptr<const CPSG_Request> request, const CDeadline& deadline);
  
+    static TApiLock GetApiLock() { return CService::GetMap(); }
+
  private:
      class CService
      {
@@ -100,7 +102,6 @@ private:
          using TMap = unordered_map<string, unique_ptr<SPSG_IoCoordinator>>;
  
          SPSG_IoCoordinator& GetIoC(const string& service);
-        static shared_ptr<TMap> GetMap();
  
          shared_ptr<TMap> m_Map;
          static pair<mutex, weak_ptr<TMap>> sm_Instance;
@@ -109,6 +110,8 @@ private:
          SPSG_IoCoordinator& ioc;
  
          CService(const string& service) : m_Map(GetMap()), ioc(GetIoC(service)) {}
+
+        static shared_ptr<TMap> GetMap();
      };
  
      string x_GetAbsPathRef(shared_ptr<const CPSG_Request> user_request);
diff --git a/c++/src/objtools/pubseq_gateway/client/psg_client_transport.cpp b/c++/src/objtools/pubseq_gateway/client/psg_client_transport.cpp

index 00e3f977c9a1598786932cc20e30af21e0adad06..6b6634cb22cdd604236babb484e5483f95786ef1 100644 (file)
--- a/c++/src/objtools/pubseq_gateway/client/psg_client_transport.cpp
+++ b/c++/src/objtools/pubseq_gateway/client/psg_client_transport.cpp
@@ -1,4 +1,4 @@
-/*  $Id: psg_client_transport.cpp 608076 2020-05-11 17:59:21Z ivanov $
+/*  $Id: psg_client_transport.cpp 609548 2020-06-03 17:22:06Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -53,6 +53,7 @@
  #define __STDC_FORMAT_MACROS
  #include <nghttp2/nghttp2.h>
  
+#include <corelib/version.hpp>
  #include <corelib/request_status.hpp>
  
  #include "psg_client_transport.hpp"
diff --git a/c++/src/objtools/readers/aln_reader.cpp b/c++/src/objtools/readers/aln_reader.cpp

index f6dd312aceb7ab482b4a6d43625cc4ee2e78e1a5..f9691deab1ebe617b431b3a8d7b15ab96b0c0c0e 100644 (file)
--- a/c++/src/objtools/readers/aln_reader.cpp
+++ b/c++/src/objtools/readers/aln_reader.cpp
@@ -1,4 +1,4 @@
-/*  $Id: aln_reader.cpp 602230 2020-02-19 15:48:48Z foleyjp $
+/*  $Id: aln_reader.cpp 610753 2020-06-23 18:10:35Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -170,8 +170,8 @@ void CDefaultIdValidate::operator()(
  
  CAlnReader::CAlnReader(CNcbiIstream& is, FValidateIds fValidateIds) : 
      m_fValidateIds(fValidateIds),
-    m_IS(is), m_ReadDone(false), m_ReadSucceeded(false), 
      m_AlignFormat(EAlignFormat::UNKNOWN),
+    m_IS(is), m_ReadDone(false), m_ReadSucceeded(false), 
      m_UseNexusInfo(true)
  {
      m_Errors.clear();
@@ -290,18 +290,6 @@ sReportError(
  }
  
  
-static void 
-sReportError(
-    ILineErrorListener* pEC,
-    EDiagSev severity,
-    const string& seqId,
-    int lineNumber,
-    const string& message,
-    ILineError::EProblem problemType=ILineError::eProblem_GeneralParsingError)
-{
-    sReportError(pEC, severity, eReader_Alignment, 0, seqId, lineNumber, message, problemType);
-}
-
  void CAlnReader::Read(
      TReadFlags readFlags,
      ncbi::objects::ILineErrorListener* pErrorListener)
@@ -403,7 +391,6 @@ void CAlnReader::x_VerifyAlignmentInfo(
          "Only one sequence was detected in the alignment file. An alignment file must contain more than one sequence.");
      }
  
-    const auto numSequences = alignmentInfo.NumSequences();
  
      m_Seqs.assign(alignmentInfo.mSequences.begin(), alignmentInfo.mSequences.end());
  
@@ -750,9 +737,6 @@ CRef<CSeq_entry> CAlnReader::GetSeqEntry(const TFastaFlags fasta_flags,
      m_Entry = new CSeq_entry();
      CRef<CSeq_align> seq_align = GetSeqAlign(fasta_flags, pErrorListener);
  
-    const CDense_seg& denseg = seq_align->GetSegs().GetDenseg();
-    _ASSERT(denseg.GetIds().size() == m_Dim);
-
      CRef<CSeq_annot> seq_annot (new CSeq_annot);
      seq_annot->SetData().SetAlign().push_back(seq_align);
  
@@ -768,7 +752,6 @@ CRef<CSeq_entry> CAlnReader::GetSeqEntry(const TFastaFlags fasta_flags,
  
          // seq-id(s)
          auto& ids = pSubEntry->SetSeq().SetId();
-        //ids.push_back(denseg.GetIds()[row_i]);
          ids = m_Ids[row_i];
  
          // mol
diff --git a/c++/src/objtools/readers/aln_scanner_clustal.cpp b/c++/src/objtools/readers/aln_scanner_clustal.cpp

index 4e091404c62c0eac99221019bfb82a1e1b96e42a..0b90b69fbfe61f1a4ee1f9a696e1eac3af874486 100644 (file)
--- a/c++/src/objtools/readers/aln_scanner_clustal.cpp
+++ b/c++/src/objtools/readers/aln_scanner_clustal.cpp
@@ -1,5 +1,5 @@
  /*
- * $Id: aln_scanner_clustal.cpp 589468 2019-07-11 14:51:16Z kornbluh $
+ * $Id: aln_scanner_clustal.cpp 610753 2020-06-23 18:10:35Z ivanov $
   *
   * ===========================================================================
   *
@@ -110,10 +110,8 @@ CAlnScannerClustal::xImportAlignmentData(
      bool inBlock = false;
      int  blockLineLength = 0;
      int  blockCount = 0;
-    bool firstBlock = true;
      int  numSeqs = 0;
      int  seqCount = 0;
-    int  maxSeqCount = 0;
  
      string line;
      int lineCount = 0;
diff --git a/c++/src/objtools/readers/aln_scanner_nexus.cpp b/c++/src/objtools/readers/aln_scanner_nexus.cpp

index c657d943bd07ac324f95d142df050c4fcb635ca9..2687b6608a75f6bc81630a0273981a6a9ae03f15 100644 (file)
--- a/c++/src/objtools/readers/aln_scanner_nexus.cpp
+++ b/c++/src/objtools/readers/aln_scanner_nexus.cpp
@@ -1,5 +1,5 @@
  /*
- * $Id: aln_scanner_nexus.cpp 599135 2019-12-19 16:40:05Z foleyjp $
+ * $Id: aln_scanner_nexus.cpp 610753 2020-06-23 18:10:35Z ivanov $
   *
   * ===========================================================================
   *
@@ -421,7 +421,7 @@ CAlnScannerNexus::xProcessMatrix(
          }
          
          string seqData = NStr::Join(tokens.begin()+1, tokens.end(), "");
-        auto dataSize = seqData.size();
+        const int dataSize = seqData.size();
  
  
          
@@ -666,7 +666,7 @@ CAlnScannerNexus::xGetArgPos(const TCommandArgs& args,
  
   
  //  ----------------------------------------------------------------------------
-int 
+size_t 
  CAlnScannerNexus::sFindCharOutsideComment(
          char c,
          const string& line,
@@ -674,7 +674,7 @@ CAlnScannerNexus::sFindCharOutsideComment(
          size_t startPos)
  //  ----------------------------------------------------------------------------
  {
-    for (int index=startPos; index<line.size(); ++index) {
+    for (auto index=startPos; index<line.size(); ++index) {
          if (line[index] == '[') {
              ++numUnmatchedLeftBrackets;
          }
@@ -794,7 +794,6 @@ CAlnScannerNexus::xImportAlignmentData(
      }
  
      if (!commandTokens.empty()) {
-        auto commandStartLine =  commandTokens.front().mNumLine;
          string description = 
              "Terminating semicolon missing from command. Commands in a Nexus file must end with a semicolon.";
          throw SShowStopper(
@@ -837,10 +836,10 @@ sStripNexusComments(
          return;
      }
  
-    list<pair<int, int>> commentLimits;
-    int index=0;
-    int start=0;
-    int stop;
+    list<pair<size_t, size_t>> commentLimits;
+    size_t index=0;
+    size_t start=0;
+    size_t stop;
      while (index < line.size()) {
          const auto& c = line[index];
          if (c == '[') {
@@ -884,9 +883,9 @@ CAlnScannerNexus::sStripCommentsOutsideCommand(
          return;
      }
  
-    list<pair<int, int>> commentLimits;
-    int start=0;
-    int stop;
+    list<pair<size_t,size_t>> commentLimits;
+    size_t start=0;
+    size_t stop;
  
      if (!inCommand &&
          (numUnmatchedLeftBrackets == 0) &&
@@ -896,7 +895,7 @@ CAlnScannerNexus::sStripCommentsOutsideCommand(
  
      const auto len = line.size();
  
-    for (int index=0; index<len; ++index) {
+    for (size_t index=0; index<len; ++index) {
          const auto& c = line[index];
  
          if (inCommand) {
diff --git a/c++/src/objtools/readers/aln_scanner_nexus.hpp b/c++/src/objtools/readers/aln_scanner_nexus.hpp

index ce3f33d887ca8cacb0f8b830102ed46b390c72a5..82eb3ed4b418ceb264328e0a6062386ba11884f6 100644 (file)
--- a/c++/src/objtools/readers/aln_scanner_nexus.hpp
+++ b/c++/src/objtools/readers/aln_scanner_nexus.hpp
@@ -2,7 +2,7 @@
  #define _ALN_SCANNER_NEXUS_HPP_
  
  /*
- * $Id: aln_scanner_nexus.hpp 585192 2019-04-24 19:38:23Z foleyjp $
+ * $Id: aln_scanner_nexus.hpp 610753 2020-06-23 18:10:35Z ivanov $
   *
   * ===========================================================================
   *
@@ -56,8 +56,8 @@ class CAlnScannerNexus:
  //  ============================================================================
  {
  public:
-    CAlnScannerNexus(): 
-        mGapChar(0), mMissingChar(0), mMatchChar(0) {};
+//    CAlnScannerNexus(): 
+//        mGapChar(0), mMissingChar(0), mMatchChar(0) {};
      ~CAlnScannerNexus() {};
  
      TDeflines& SetDeflines(void) { return mDeflines; }
@@ -136,7 +136,7 @@ protected:
          int &numUnmatchedLeftBrackets,
          bool &inCommand);
  
-    static int sFindCharOutsideComment(
+    static size_t sFindCharOutsideComment(
          char c,
          const string& line,
          int &numUnmatchedLeftBrackets,
@@ -148,9 +148,9 @@ protected:
  
      int mNumSequences = 0;
      int mSequenceSize = 0;
-    char mMatchChar;
-    char mMissingChar;
-    char mGapChar;
+    char mMatchChar=0;
+    char mMissingChar=0;
+    char mGapChar=0;
      bool mInBlock=false;
      string mCurrentBlock;
      int mBlockStartLine;
diff --git a/c++/src/objtools/readers/descr_mod_apply.cpp b/c++/src/objtools/readers/descr_mod_apply.cpp

index e0e8736cb2bf55647309d77463b6896491067179..3cb70118e25f16eeb601668c6294fb635c10a5ae 100644 (file)
--- a/c++/src/objtools/readers/descr_mod_apply.cpp
+++ b/c++/src/objtools/readers/descr_mod_apply.cpp
@@ -1,4 +1,4 @@
-/*  $Id: descr_mod_apply.cpp 601793 2020-02-13 16:02:42Z foleyjp $
+/*  $Id: descr_mod_apply.cpp 610757 2020-06-23 18:10:59Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -530,18 +530,18 @@ bool CDescrModApply::x_TryOrgRefMod(const TModEntry& mod_entry, bool& preserve_t
          const auto& value = x_GetModValue(mod_entry);
          m_pDescrCache->SetBioSource().SetOrg().SetTaxname(value);
          if (!preserve_taxid &&
-             m_pDescrCache->SetBioSource().GetOrg().GetTaxId()) { 
+             m_pDescrCache->SetBioSource().GetOrg().GetTaxId() != ZERO_ENTREZ_ID) { 
              // clear taxid if it does not occur in this modifier set
-            m_pDescrCache->SetBioSource().SetOrg().SetTaxId(0);
+            m_pDescrCache->SetBioSource().SetOrg().SetTaxId(ZERO_ENTREZ_ID);
          }
          return true;
      }
  
      if (name == "taxid") {
          const auto& value = x_GetModValue(mod_entry);
-        int taxid;
+        TTaxId taxid;
          try {
-            taxid = NStr::StringToInt(value);
+            taxid = NStr::StringToNumeric<TTaxId>(value);
          }
          catch (...) {
              x_ReportInvalidValue(mod_entry.second.front(), "Integer value expected.");
@@ -925,9 +925,9 @@ void CDescrModApply::x_SetPMID(const TModEntry& mod_entry)
      for (const auto& mod : mod_entry.second)
      {
          const auto& value = mod.GetValue();
-        int pmid;
+        TEntrezId pmid;
          try {
-            pmid = NStr::StringToInt(value);
+            pmid = NStr::StringToNumeric<TEntrezId>(value);
          }
          catch(...) {
              x_ReportInvalidValue(mod_entry.second.front(), "Expected integer value.");
@@ -1097,7 +1097,7 @@ CUser_object& CDescrCache::SetDBLink()
  CUser_object& CDescrCache::SetFileTrack()
  {
      return x_SetDescriptor(eFileTrack,
-        [this](const CSeqdesc& desc) {
+        [](const CSeqdesc& desc) {
              return (desc.IsUser() && s_IsUserType(desc.GetUser(), "FileTrack"));
          },
          [this]() {
@@ -1112,7 +1112,7 @@ CUser_object& CDescrCache::SetFileTrack()
  CUser_object& CDescrCache::SetTpaAssembly()
  {
      return x_SetDescriptor(eTpa,
-        [this](const CSeqdesc& desc) {
+        [](const CSeqdesc& desc) {
              return (desc.IsUser() && s_IsUserType(desc.GetUser(), "TpaAssembly"));
          },
          [this]() {
@@ -1127,7 +1127,7 @@ CUser_object& CDescrCache::SetTpaAssembly()
  CUser_object& CDescrCache::SetGenomeProjects()
  {
          return x_SetDescriptor(eGenomeProjects,
-        [this](const CSeqdesc& desc) {
+        [](const CSeqdesc& desc) {
              return (desc.IsUser() && s_IsUserType(desc.GetUser(), "GenomeProjectsDB"));
          },
          [this]() {
diff --git a/c++/src/objtools/readers/fasta.cpp b/c++/src/objtools/readers/fasta.cpp

index 30044d9ae9bf7b754d117899b59ee9c8ae0328e3..6d01a7085d5062afe3943e6bc3057aaaa4854528 100644 (file)
--- a/c++/src/objtools/readers/fasta.cpp
+++ b/c++/src/objtools/readers/fasta.cpp
@@ -1,4 +1,4 @@
-/*  $Id: fasta.cpp 600608 2020-01-23 17:32:17Z foleyjp $
+/*  $Id: fasta.cpp 612524 2020-07-23 11:37:59Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -220,7 +220,6 @@ inline static bool s_ASCII_IsUnAmbigNuc(unsigned char c)
  
  CFastaReader::CFastaReader(ILineReader& reader, TFlags flags, FIdCheck f_idcheck)
      : m_LineReader(&reader), m_MaskVec(0), 
-      m_IDGenerator(new CSeqIdGenerator()), 
        m_gapNmin(0), m_gap_Unknown_length(0),
        m_MaxIDLength(kMax_UI4),
        m_fIdCheck(f_idcheck)
@@ -237,7 +236,6 @@ CFastaReader::CFastaReader(const string& path, TFlags flags, FIdCheck f_idcheck)
  
  CFastaReader::CFastaReader(CReaderBase::TReaderFlags fBaseFlags, TFlags flags, FIdCheck f_idcheck)
      : CReaderBase(fBaseFlags), m_MaskVec(0), 
-      m_IDGenerator(new CSeqIdGenerator), 
        m_gapNmin(0), m_gap_Unknown_length(0),
        m_MaxIDLength(kMax_UI4),
        m_fIdCheck(f_idcheck)
@@ -352,11 +350,12 @@ CRef<CSeq_entry> CFastaReader::ReadOneSeq(ILineErrorListener * pMessageListener)
                  ParseDefLine(">", pMessageListener);
                  need_defline = false;
              } else {
+                const auto lineNum = LineNumber();
                  GetLineReader().UngetLine();
                  NCBI_THROW2(CObjReaderParseException, eNoDefline, 
                              "CFastaReader: Input doesn't start with"
-                            " a defline or comment around line " + NStr::NumericToString(LineNumber()),
-                             LineNumber() );
+                            " a defline or comment around line " + NStr::NumericToString(lineNum),
+                             lineNum);
              }
          }
  
@@ -506,6 +505,7 @@ void CFastaReader::SetMaxIDLength(Uint4 max_len)
      CFastaDeflineReader::s_MaxLocalIDLength =
      CFastaDeflineReader::s_MaxGeneralTagLength =
      CFastaDeflineReader::s_MaxAccessionLength = m_MaxIDLength = max_len;
+    m_bModifiedMaxIdLength=true;
  }
  
  
@@ -584,10 +584,7 @@ bool CFastaReader::xSetSeqMol(const list<CRef<CSeq_id>>& ids, CSeq_inst_Base::EM
  void CFastaReader::ParseDefLine(const TStr& s, ILineErrorListener * pMessageListener)
  {
      SDefLineParseInfo parseInfo;
-    parseInfo.fBaseFlags = m_iFlags;
-    parseInfo.fFastaFlags = GetFlags();
-    parseInfo.maxIdLength = m_MaxIDLength;
-    parseInfo.lineNumber = LineNumber();
+    x_SetDeflineParseInfo(parseInfo);
  
      CFastaDeflineReader::SDeflineData data;
      CFastaDeflineReader::ParseDefline(s, parseInfo, data, pMessageListener, m_fIdCheck);
@@ -679,14 +676,21 @@ bool CFastaReader::ParseIDs(
  bool CFastaReader::ParseIDs(
      const TStr& s, ILineErrorListener * pMessageListener)
  {
-
      SDefLineParseInfo info;
+    x_SetDeflineParseInfo(info);
+    
+    return CFastaDeflineReader::ParseIDs(s, info, m_ignorable, SetIDs(), pMessageListener);
+}
+
+
+void CFastaReader::x_SetDeflineParseInfo(SDefLineParseInfo& info)
+{
      info.fBaseFlags = m_iFlags;
      info.fFastaFlags = GetFlags();
-    info.maxIdLength = m_MaxIDLength;
+    info.maxIdLength = m_bModifiedMaxIdLength ?
+                       m_MaxIDLength :
+                       0;
      info.lineNumber = LineNumber();
-
-    return CFastaDeflineReader::ParseIDs(s, info, m_ignorable, SetIDs(), pMessageListener);
  }
  
  
@@ -2235,12 +2239,8 @@ void CFastaReader::SetGapLinkageEvidence(
  
  void CFastaReader::SetGapLinkageEvidences(CSeq_gap::EType type, const set<int>& evidences)
  {
-    if (type == -1)
-        m_gap_type.Release();
-    else 
-         m_gap_type.Reset(new SGap::TGapTypeObj(type));
-
-
+    m_gap_type.Reset(new SGap::TGapTypeObj(type));
+    
      m_DefaultLinkageEvidence.clear();
      for (const auto& evidence : evidences) {
          m_DefaultLinkageEvidence.insert(static_cast<CLinkage_evidence::EType>(evidence));
diff --git a/c++/src/objtools/readers/fasta_exception.cpp b/c++/src/objtools/readers/fasta_exception.cpp

index 88bce36d3a06ef01f1ab37fb9a472262acd1dd5e..9e8038120c180c97e40d409eef3588ffbce2c8f6 100644 (file)
--- a/c++/src/objtools/readers/fasta_exception.cpp
+++ b/c++/src/objtools/readers/fasta_exception.cpp
@@ -1,4 +1,4 @@
-/*  $Id: fasta_exception.cpp 407174 2013-07-18 16:27:25Z gouriano $
+/*  $Id: fasta_exception.cpp 610176 2020-06-11 19:24:49Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -141,7 +141,7 @@ void CBadResiduesException::SBadResiduePositions::ConvertBadIndexesToString(
  
              pos_prefix = ", ";
          }
-        if( rangesFound.size() > maxRanges ) {
+        if (iRangesFound > maxRanges) {
              out << ", and more";
              return;
          }
diff --git a/c++/src/objtools/readers/fasta_reader_utils.cpp b/c++/src/objtools/readers/fasta_reader_utils.cpp

index 594fbdefc6f148eea17779438322496921c7acee..131a8689e49289933945af79fc840c01b82b7122 100644 (file)
--- a/c++/src/objtools/readers/fasta_reader_utils.cpp
+++ b/c++/src/objtools/readers/fasta_reader_utils.cpp
@@ -1,4 +1,4 @@
-/*  $Id: fasta_reader_utils.cpp 599582 2020-01-02 20:02:39Z foleyjp $
+/*  $Id: fasta_reader_utils.cpp 612524 2020-07-23 11:37:59Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -350,23 +350,6 @@ TSeqPos CFastaDeflineReader::ParseRange(
      return TSeqPos(s.length() - pos);
  }
  
-static bool s_ASCII_IsUnAmbigNuc(unsigned char c)
-{
-    switch( c ) {
-    case 'A':
-    case 'C':
-    case 'G':
-    case 'T':
-    case 'a':
-    case 'c':
-    case 'g':
-    case 't':
-        return true;
-    default:
-        return false;
-    }
-}
-
  
  class CIdErrorReporter
  {
@@ -507,7 +490,6 @@ bool CFastaDeflineReader::ParseIDs(
          return true;
      }
  
-    TSeqPos num_ids = 0;
      // be generous overall, and give raw local IDs the benefit of the
      // doubt for now
      CSeq_id::TParseFlags flags
@@ -527,7 +509,8 @@ bool CFastaDeflineReader::ParseIDs(
              for (auto& ch : local_copy)
                  if (ch == ',')
                      ch = '_';
-            num_ids = CSeq_id::ParseIDs(ids, local_copy, flags);
+
+            CSeq_id::ParseIDs(ids, local_copy, flags);
  
              const string errMessage = 
                  "Near line " + NStr::NumericToString(info.lineNumber) 
@@ -545,7 +528,7 @@ bool CFastaDeflineReader::ParseIDs(
          }
          else
          {
-            num_ids = CSeq_id::ParseIDs(ids, s, flags);
+            CSeq_id::ParseIDs(ids, s, flags);
          }
      } catch (CSeqIdException&) {
          // swap(ids, old_ids);
@@ -557,9 +540,11 @@ bool CFastaDeflineReader::ParseIDs(
  
  
      CFastaIdValidate idValidate(info.fFastaFlags);
-    idValidate.SetMaxLocalIDLength(info.maxIdLength);
-    idValidate.SetMaxGeneralTagLength(info.maxIdLength);
-    idValidate.SetMaxAccessionLength(info.maxIdLength);
+    if (info.maxIdLength) {
+        idValidate.SetMaxLocalIDLength(info.maxIdLength);
+        idValidate.SetMaxGeneralTagLength(info.maxIdLength);
+        idValidate.SetMaxAccessionLength(info.maxIdLength);
+    }
      idValidate(ids, info.lineNumber, CIdErrorReporter(pMessageListener, ignoreGeneralParsingError));
  
      return true;
@@ -587,6 +572,11 @@ void CSeqIdCheck::operator()(const TIds& ids,
      }
  
      CFastaIdValidate s_IdValidate(info.fFastaFlags);
+    if (info.maxIdLength) {
+        s_IdValidate.SetMaxLocalIDLength(info.maxIdLength);
+        s_IdValidate.SetMaxGeneralTagLength(info.maxIdLength);
+        s_IdValidate.SetMaxAccessionLength(info.maxIdLength);
+    }
      s_IdValidate(ids, info.lineNumber, CIdErrorReporter(listener));
  }
  
diff --git a/c++/src/objtools/readers/gff2_data.cpp b/c++/src/objtools/readers/gff2_data.cpp

index 0c69d6e80fe35eed1ef6166259328ae07c45a06f..5b6cff7684db75e1afd79dc66a1d0d4f84d6ebad 100644 (file)
--- a/c++/src/objtools/readers/gff2_data.cpp
+++ b/c++/src/objtools/readers/gff2_data.cpp
@@ -1,4 +1,4 @@
-/*  $Id: gff2_data.cpp 607807 2020-05-07 18:58:43Z ivanov $
+/*  $Id: gff2_data.cpp 610645 2020-06-22 11:31:02Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -821,7 +821,9 @@ bool CGff2Record::xMigrateAttributes(
  
      it = attrs_left.find("partial");
      if (it != attrs_left.end()) {
-        pFeature->SetPartial(true);
+        if (!(flags & CGff2Reader::fGenbankMode)) {
+            pFeature->AddQualifier("partial", it->second);
+        }
          attrs_left.erase(it);
      }
  
diff --git a/c++/src/objtools/readers/gff2_reader.cpp b/c++/src/objtools/readers/gff2_reader.cpp

index 5dfa0c978755d2fd09f42f506ebcc9ff87f4cd93..4bb0d19c34612bf7a43b1ab0f62626640883497d 100644 (file)
--- a/c++/src/objtools/readers/gff2_reader.cpp
+++ b/c++/src/objtools/readers/gff2_reader.cpp
@@ -1,4 +1,4 @@
-/*  $Id: gff2_reader.cpp 603569 2020-03-12 18:23:57Z ivanov $
+/*  $Id: gff2_reader.cpp 610837 2020-06-24 15:29:29Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -182,6 +182,44 @@ void CGff2Reader::xPostProcessAnnot(
      }
  }
  
+//  ----------------------------------------------------------------------------
+void
+CGff2Reader::xGetData(
+    ILineReader& lr,
+    TReaderData& readerData)
+//  ----------------------------------------------------------------------------
+{
+    readerData.clear();
+    string line;
+    if (xGetLine(lr, line)) {
+        if (xNeedsNewSeqAnnot(line)) {
+            return;
+        }
+        if (xIsTrackLine(line)) {
+            if (!mCurrentFeatureCount) {
+                xParseTrackLine(line);
+                xGetData(lr, readerData);
+                return;
+            }
+            m_PendingLine = line;
+            return;
+        }
+        if (xIsTrackTerminator(line)) {
+            if (!mCurrentFeatureCount) {
+                xParseTrackLine("track");
+                xGetData(lr, readerData);
+            }
+            return;
+        }
+        if (!xIsCurrentDataType(line)) {
+            xUngetLine(lr);
+            return;
+        }
+    readerData.push_back(TReaderLine{m_uLineNumber, line});
+    }
+    ++m_uDataCount;
+}
+
  //  ----------------------------------------------------------------------------
  void CGff2Reader::xAssignAnnotId(
      CSeq_annot& annot,
@@ -1347,5 +1385,37 @@ bool CGff2Reader::xIsIgnoredFeatureId(
      return false;
  }
  
+//  ---------------------------------------------------------------------------
+bool
+CGff2Reader::xNeedsNewSeqAnnot(
+    const string& line)
+//  ---------------------------------------------------------------------------
+{
+    if (IsInGenbankMode()) {
+        vector<string> columns;
+        NStr::Split(line, "\t ", columns, NStr::eMergeDelims);
+        string seqId = columns[0];
+        if (m_CurrentSeqId == seqId) {
+            return false;
+        }
+        m_CurrentSeqId = seqId;
+        if (mCurrentFeatureCount == 0) {
+            return false;
+        }
+        m_PendingLine = line;
+        return true;
+    }
+    return false;
+}
+
+//  ----------------------------------------------------------------------------
+bool CGff2Reader::IsInGenbankMode() const
+//  ----------------------------------------------------------------------------
+{
+    return (m_iFlags & CGff2Reader::fGenbankMode);
+}
+
+
+
  END_objects_SCOPE
  END_NCBI_SCOPE
diff --git a/c++/src/objtools/readers/gff3_reader.cpp b/c++/src/objtools/readers/gff3_reader.cpp

index 1e0e52068cf3eeafa23758462dc5728908f3a311..aca1edcf3c7e9e2d5e0e2443d042a9f46c31b517 100644 (file)
--- a/c++/src/objtools/readers/gff3_reader.cpp
+++ b/c++/src/objtools/readers/gff3_reader.cpp
@@ -1,4 +1,4 @@
-/*  $Id: gff3_reader.cpp 607807 2020-05-07 18:58:43Z ivanov $
+/*  $Id: gff3_reader.cpp 610837 2020-06-24 15:29:29Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -224,52 +224,6 @@ CGff3Reader::xProcessData(
      }
  }
  
-//  ----------------------------------------------------------------------------
-bool CGff3Reader::IsInGenbankMode() const
-//  ----------------------------------------------------------------------------
-{
-    return (m_iFlags & CGff3Reader::fGenbankMode);
-}
-
-//  ----------------------------------------------------------------------------
-void
-CGff3Reader::xGetData(
-    ILineReader& lr,
-    TReaderData& readerData)
-//  ----------------------------------------------------------------------------
-{
-    readerData.clear();
-    string line;
-    if (xGetLine(lr, line)) {
-        if (xNeedsNewSeqAnnot(line)) {
-            lr.UngetLine();
-            return;
-        }
-        if (xIsTrackLine(line)) {
-            if (!mCurrentFeatureCount) {
-                xParseTrackLine(line);
-                xGetData(lr, readerData);
-                return;
-            }
-            m_PendingLine = line;
-            return;
-        }
-        if (xIsTrackTerminator(line)) {
-            if (!mCurrentFeatureCount) {
-                xParseTrackLine("track");
-                xGetData(lr, readerData);
-            }
-            return;
-        }
-        if (!xIsCurrentDataType(line)) {
-            xUngetLine(lr);
-            return;
-        }
-    readerData.push_back(TReaderLine{m_uLineNumber, line});
-    }
-    ++m_uDataCount;
-}
-
  //  ----------------------------------------------------------------------------
  void CGff3Reader::xProcessAlignmentData(
      CSeq_annot& annot) 
@@ -1080,29 +1034,5 @@ void CGff3Reader::xPostProcessAnnot(
      return CGff2Reader::xPostProcessAnnot(annot);
  }
  
-//  ---------------------------------------------------------------------------
-bool
-CGff3Reader::xNeedsNewSeqAnnot(
-    const string& line)
-//  ---------------------------------------------------------------------------
-{
-    if (IsInGenbankMode()) {
-        vector<string> columns;
-        NStr::Split(line, "\t ", columns, NStr::eMergeDelims);
-        string seqId = columns[0];
-        if (m_CurrentSeqId == seqId) {
-            return false;
-        }
-        m_CurrentSeqId = seqId;
-        if (mCurrentFeatureCount == 0) {
-            return false;
-        }
-        m_PendingLine = line;
-        return true;
-    }
-    return false;
-}
-
-
  END_objects_SCOPE
  END_NCBI_SCOPE
diff --git a/c++/src/objtools/readers/gtf_reader.cpp b/c++/src/objtools/readers/gtf_reader.cpp

index e08d008de44ffd691fae7869754fba76e0e87105..64a4a1a4b930844aa22cab0c22f9b527e10c8bbc 100644 (file)
--- a/c++/src/objtools/readers/gtf_reader.cpp
+++ b/c++/src/objtools/readers/gtf_reader.cpp
@@ -1,4 +1,4 @@
-/*  $Id: gtf_reader.cpp 603569 2020-03-12 18:23:57Z ivanov $
+/*  $Id: gtf_reader.cpp 610936 2020-06-25 16:26:53Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -169,6 +169,7 @@ CGtfReader::ReadSeqAnnot(
      ILineErrorListener* pEC ) 
  //  ----------------------------------------------------------------------------                
  {
+    mCurrentFeatureCount = 0;
      return CReaderBase::ReadSeqAnnot(lineReader, pEC);
  }
  
@@ -181,6 +182,9 @@ CGtfReader::xProcessData(
  {
      for (const auto& lineData: readerData) {
          const auto& line = lineData.mData;
+        if (xIsTrackTerminator(line)) {
+            continue;
+        }
          if (xParseStructuredComment(line)) {
              continue;
          }
@@ -292,24 +296,7 @@ bool CGtfReader::xUpdateAnnotCds(
              return false;
          }
      }
-        
-    if ( xCdsIsPartial( gff ) ) {
-        CRef<CSeq_feat> pParent = xFindParentMrna(gff);
-        if (pParent) {
-            CSeq_loc& loc = pCds->SetLocation();
-            size_t uCdsStart = gff.SeqStart();
-            size_t uMrnaStart = pParent->GetLocation().GetStart( eExtreme_Positional );
-            if ( uCdsStart == uMrnaStart ) {
-                loc.SetPartialStart( true, eExtreme_Positional );
-            }
  
-            size_t uCdsStop =  gff.SeqStop();
-            size_t uMrnaStop = pParent->GetLocation().GetStop( eExtreme_Positional );
-            if ( uCdsStop == uMrnaStop  && gff.Type() != "stop_codon" ) {
-                loc.SetPartialStop( true, eExtreme_Positional );
-            }
-        }
-    }
      return true;
  }
  
@@ -950,8 +937,10 @@ bool CGtfReader::xProcessQualifierSpecialCase(
          return true;
      }
      if ( 0 == NStr::CompareNocase(key, "partial")) {
-        feature.SetPartial( true );
-        return true;
+        // RW-1108 - ignore partial attribute in Genbank mode
+        if (m_iFlags & CGtfReader::fGenbankMode) {
+            return true;
+        }
      }
      return false;
  }  
diff --git a/c++/src/objtools/readers/line_error.cpp b/c++/src/objtools/readers/line_error.cpp

index ba897d6528175d3b76916e5b81b9af74df8904d7..d29e610862e2d904dd735ec2aac4087c070250e7 100644 (file)
--- a/c++/src/objtools/readers/line_error.cpp
+++ b/c++/src/objtools/readers/line_error.cpp
@@ -1,4 +1,4 @@
-/*  $Id: line_error.cpp 580916 2019-02-22 16:30:37Z foleyjp $
+/*  $Id: line_error.cpp 610758 2020-06-23 18:11:06Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -153,9 +153,9 @@ CLineErrorEx::CLineErrorEx(
      : m_eProblem(eProblem), m_eSeverity( eSeverity ), 
        m_Code(code), m_Subcode(subcode),
      m_strSeqId(strSeqId), m_uLine( uLine ), 
-    m_strErrorMessage(strErrorMessage),
      m_strFeatureName(strFeatureName), m_strQualifierName(strQualifierName), 
      m_strQualifierValue(strQualifierValue), 
+    m_strErrorMessage(strErrorMessage),
      m_vecOfOtherLines(vecOfOtherLines)
  { }
  
diff --git a/c++/src/objtools/readers/mod_reader.cpp b/c++/src/objtools/readers/mod_reader.cpp

index 783ed768ee131f0783f1215af0a5c40fea2b586b..b8e8ec629adb1ade3a9ab01a24c9e7ceec24b9ed 100644 (file)
--- a/c++/src/objtools/readers/mod_reader.cpp
+++ b/c++/src/objtools/readers/mod_reader.cpp
@@ -1,4 +1,4 @@
-/*  $Id: mod_reader.cpp 600608 2020-01-23 17:32:17Z foleyjp $
+/*  $Id: mod_reader.cpp 610749 2020-06-23 18:10:01Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -798,7 +798,6 @@ bool CTitleParser::HasMods(const CTempString& title)
  bool CTitleParser::x_FindBrackets(const CTempString& line, size_t& start, size_t& stop, size_t& eq_pos)
  { // Copied from CSourceModParser
      size_t i = start;
-    bool found = false;
  
      eq_pos = CTempString::npos;
      const char* s = line.data() + start;
diff --git a/c++/src/objtools/readers/readfeat.cpp b/c++/src/objtools/readers/readfeat.cpp

index c75de67676bcc28c026563cbbc4b895dab23a39c..01a76cd17907cc8794015380dbec3e53f629b61d 100644 (file)
--- a/c++/src/objtools/readers/readfeat.cpp
+++ b/c++/src/objtools/readers/readfeat.cpp
@@ -769,10 +769,8 @@ static const TOrgModKey orgmod_key_to_subtype [] = {
  typedef CStaticPairArrayMap <const char*, COrgMod::ESubtype, PCase_CStr> TOrgModMap;
  DEFINE_STATIC_ARRAY_MAP(TOrgModMap, sm_OrgModKeys, orgmod_key_to_subtype);
  
-
-typedef SStaticPair<const char *, int> TTrnaKey;
-
-static const TTrnaKey trna_key_to_subtype [] = {
+static const map<const char*, int, PNocase_CStr> sm_TrnaKeys 
+{
      {  "Ala",            'A'  },
      {  "Alanine",        'A'  },
      {  "Arg",            'R'  },
@@ -831,13 +829,11 @@ static const TTrnaKey trna_key_to_subtype [] = {
      {  "Valine",         'V'  },
      {  "Xle",            'J'  },
      {  "Xxx",            'X'  },
+    {  "Undet",          'X'  },
      {  "fMet",           'M'  },
      {  "iMet",           'M'  }
  };
  
-typedef CStaticPairArrayMap <const char*, int, PCase_CStr> TTrnaMap;
-DEFINE_STATIC_ARRAY_MAP(TTrnaMap, sm_TrnaKeys, trna_key_to_subtype);
-
  
  static 
  set<const char*, PCase_CStr> 
@@ -1550,7 +1546,8 @@ CFeatureTableReader_Imp::x_ParseTrnaExtString(CTrna_ext & ext_trna, const string
                                  seq_start - (aa_start+3);   
  
                  string abbrev = pos_str.substr (aa_start + 3, aa_length);
-                TTrnaMap::const_iterator t_iter = sm_TrnaKeys.find (abbrev.c_str ());
+                //TTrnaMap::const_iterator 
+                auto t_iter = sm_TrnaKeys.find (abbrev.c_str ());
                  if (t_iter == sm_TrnaKeys.end ()) {
                      // unable to parse
                      return false;
@@ -2441,7 +2438,7 @@ bool CFeatureTableReader_Imp::x_AddQualifierToFeature (
              case CSeqFeatData::e_Pub:
                  if( qtype == eQual_PubMed ) {
                      CRef<CPub> new_pub( new CPub );
-                    new_pub->SetPmid( CPubMedId( x_StringToLongNoThrow(val, feat_name, qual) ) );
+                    new_pub->SetPmid( CPubMedId( ENTREZ_ID_FROM(long, x_StringToLongNoThrow(val, feat_name, qual)) ) );
                      sfdata.SetPub().SetPub().Set().push_back( new_pub );
                      return true;
                  }
@@ -2649,25 +2646,31 @@ bool CFeatureTableReader_Imp::x_AddQualifierToFeature (
                  {
                      if (featType == CSeqFeatData::e_Rna &&
                          sfdata.GetRna().GetType() == CRNA_ref::eType_mRNA) {
+                        CBioseq::TId ids;
                          try {
-                            CBioseq::TId ids;
                              CSeq_id::ParseIDs(ids, val, 
-                                    CSeq_id::fParse_ValidLocal 
-                                    | CSeq_id::fParse_PartialOK);
-                            for (const auto& id : ids) {
-                                auto id_string = id->GetSeqIdString(true);
-                                auto res = m_ProcessedTranscriptIds.insert(id_string);
-                                if (res.second == false) { // Insertion failed because Seq-id already encountered
-                                    x_ProcessMsg(
-                                        ILineError::eProblem_DuplicateIDs, eDiag_Error, 
-                                        feat_name, qual, val, 
-                                        "Transcript ID " + id_string + " appears on multiple mRNA features"
-                                        );
-                                }
-                            }
+                                CSeq_id::fParse_ValidLocal 
+                            |   CSeq_id::fParse_PartialOK);
                          }
-                        catch (CException&) {
-                            return false;
+                        catch (CSeqIdException& e) 
+                        {
+                            x_ProcessMsg(
+                                ILineError::eProblem_QualifierBadValue, eDiag_Error,
+                                feat_name, qual, val,
+                                "Invalid transcript_id  : " + val);
+                            return true;
+                        }
+
+                        for (const auto& id : ids) {
+                            auto id_string = id->GetSeqIdString(true);
+                            auto res = m_ProcessedTranscriptIds.insert(id_string);
+                            if (res.second == false) { // Insertion failed because Seq-id already encountered
+                                x_ProcessMsg(
+                                    ILineError::eProblem_DuplicateIDs, eDiag_Error, 
+                                    feat_name, qual, val, 
+                                    "Transcript ID " + id_string + " appears on multiple mRNA features"
+                                );
+                            }
                          }
                      }
                      x_AddGBQualToFeature(sfp, qual, val);
@@ -2681,41 +2684,48 @@ bool CFeatureTableReader_Imp::x_AddQualifierToFeature (
                      (featType == CSeqFeatData::e_Prot &&
                       sfdata.GetProt().IsSetProcessed() &&
                       sfdata.GetProt().GetProcessed() == CProt_ref::eProcessed_mature))
-                try {
+                {
                      CBioseq::TId ids;
-                    CSeq_id::ParseIDs(ids, val,                                
-                            CSeq_id::fParse_ValidLocal |
-                            CSeq_id::fParse_PartialOK);
-                    if (!ids.empty()) { 
-                        if (featType == CSeqFeatData::e_Cdregion) {
-                            for (const auto& id : ids) {
-                                auto id_string = id->GetSeqIdString(true);
-                                auto res = m_ProcessedProteinIds.insert(id_string);
-                                if (res.second == false) { // Insertion failed because Seq-id already encountered
-                                    x_ProcessMsg(
-                                        ILineError::eProblem_DuplicateIDs, eDiag_Error, 
-                                        feat_name, qual, val, 
-                                        "Protein ID " + id_string + " appears on multiple CDS features"
-                                        );
-                                }
+                    try {
+                        CSeq_id::ParseIDs(ids, val,                                
+                                CSeq_id::fParse_ValidLocal |
+                                CSeq_id::fParse_PartialOK);
+                    }
+                    catch (CSeqIdException& e) 
+                    {
+                        x_ProcessMsg(
+                                ILineError::eProblem_QualifierBadValue, eDiag_Error,
+                                feat_name, qual, val,
+                                "Invalid protein_id  : " + val);
+                        return true;
+                    }
+                    
+                    if (featType == CSeqFeatData::e_Cdregion) {
+                        for (const auto& id : ids) {
+                            auto id_string = id->GetSeqIdString(true);
+                            auto res = m_ProcessedProteinIds.insert(id_string);
+                            if (res.second == false) { // Insertion failed because Seq-id already encountered
+                                x_ProcessMsg(
+                                    ILineError::eProblem_DuplicateIDs, eDiag_Error, 
+                                    feat_name, qual, val, 
+                                    "Protein ID " + id_string + " appears on multiple CDS features"
+                                );
                              }
                          }
+                    }
                          
-                        if (featType != CSeqFeatData::e_Rna) { 
-                            auto pBestId = GetBestId(ids);
-                            if (pBestId) {
-                                sfp->SetProduct().SetWhole(*pBestId);
-                            }
-                        }
-
-                        if (featType != CSeqFeatData::e_Prot) { 
-                            x_AddGBQualToFeature(sfp, qual, val);
+                    if (featType != CSeqFeatData::e_Rna) { // mRNA only has a protein_id qualifier
+                        auto pBestId = GetBestId(ids);
+                        if (pBestId) {
+                            sfp->SetProduct().SetWhole(*pBestId);
                          }
                      }
-                    return true;
-                } catch( CSeqIdException & ) {
-                    return false;
                  }
+
+                if (featType != CSeqFeatData::e_Prot) { // Mat-peptide has an instantiated product, but no qualifier
+                    x_AddGBQualToFeature(sfp, qual, val);
+                }
+                return true;
              case eQual_regulatory_class:
                  // This should've been handled up in x_AddQualifierToImp
                  // so it's always a bad value to be here
@@ -3298,14 +3308,6 @@ CRef<CSeq_annot> CFeatureTableReader_Imp::ReadSequinFeatureTable (
  
      while ( !m_reader->AtEOF() ) {
  
-        // since reader's UngetLine doesn't actually push back
-        // into the reader's underlying stream, we try to
-        // be careful to detect the most common case of
-        // "there's another feature next"
-        if( m_reader->PeekChar() == '>' ) {
-            break;
-        }
-
          CTempString line = *++(*m_reader);
  
          if( m_reader->GetLineNumber() % 10000 == 0 &&
@@ -3349,17 +3351,6 @@ CRef<CSeq_annot> CFeatureTableReader_Imp::ReadSequinFeatureTable (
              }
  
          } else if (x_ParseFeatureTableLine (line, loc_info, feat, qual, qual_value, offset)) {
-      //  } else if (x_ParseFeatureTableLine (line, &start, &stop, &partial5, &partial3,
-       //                                     &ispoint, &isminus, feat, qual, qual_value, offset)) {
-/*
-            SFeatLocInfo loc_info;
-            loc_info.start_pos = start;
-            loc_info.stop_pos = stop;
-            loc_info.is_5p_partial = partial5;
-            loc_info.is_3p_partial = partial3;
-            loc_info.is_point = ispoint;
-            loc_info.is_minus_strand = isminus;
-            */
              // process line in feature table
  
              replace( qual_value.begin(), qual_value.end(), '\"', '\'' );
@@ -3391,9 +3382,6 @@ CRef<CSeq_annot> CFeatureTableReader_Imp::ReadSequinFeatureTable (
                      // and add first interval
                      x_AddIntervalToFeature (curr_feat_name, sfp, loc_info);
  
-                  //  x_AddIntervalToFeature (curr_feat_name, sfp,
-                   //     start, stop, partial5, partial3, ispoint, isminus);
-
                      ignore_until_next_feature_key = false;
  
                      curr_feat_name = feat;
@@ -3465,7 +3453,6 @@ CRef<CSeq_annot> CFeatureTableReader_Imp::ReadSequinFeatureTable (
      {
          x_CreateGenesFromCDSs(sap, choiceToFeatMap, flags);
      }
-
      return sap;
  }
  
diff --git a/c++/src/objtools/readers/rm_reader.cpp b/c++/src/objtools/readers/rm_reader.cpp

index 442e6dd6720a0da48c1ce7b2f554c3544536badf..bce2ba8e8b504b70ebad850b1058495610ffe716 100644 (file)
--- a/c++/src/objtools/readers/rm_reader.cpp
+++ b/c++/src/objtools/readers/rm_reader.cpp
@@ -1,4 +1,4 @@
-/*  $Id: rm_reader.cpp 601856 2020-02-14 14:44:09Z mozese2 $
+/*  $Id: rm_reader.cpp 610834 2020-06-24 15:29:06Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -789,6 +789,9 @@ CRepeatMaskerReader::ReadSeqAnnot(ILineReader& lr, ILineErrorListener* pMessageL
              continue;
          }
          ++record_counter;
+        //if (record_counter == 91555) {
+        //    cerr << "";
+        //}
  
          SRepeatRegion mask_data;
          if ( ! ParseRecord( line, mask_data ) ) {
@@ -973,15 +976,15 @@ bool CRepeatMaskerReader::ParseRecord(const string& record, SRepeatRegion& mask_
          // fields position 12 and 14 flip depending on the strand value.
          string rpt_left;
          if (mask_data.IsReverseStrand()) {
-            mask_data.rpt_pos_begin = NStr::StringToUInt( field14 );
+            mask_data.rpt_pos_begin = NStr::StringToInt( field14 );
              rpt_left = field12;
          } else {
-            mask_data.rpt_pos_begin = NStr::StringToUInt( field12 );
+            mask_data.rpt_pos_begin = NStr::StringToInt( field12 );
              rpt_left = field14;
          }
  
          StripParens(rpt_left);
-        mask_data.rpt_left = NStr::StringToUInt(rpt_left);
+        mask_data.rpt_left = NStr::StringToInt(rpt_left);
  
          // 15: "ID"
          ++it;
diff --git a/c++/src/objtools/readers/source_mod_parser.cpp b/c++/src/objtools/readers/source_mod_parser.cpp

index bbe7d8731d3d93abf5aff1c37d843d6403d3727e..4a308400f97212662ddbf63e711c8b9418567db9 100644 (file)
--- a/c++/src/objtools/readers/source_mod_parser.cpp
+++ b/c++/src/objtools/readers/source_mod_parser.cpp
@@ -1,4 +1,4 @@
-/*  $Id: source_mod_parser.cpp 571491 2018-09-27 16:13:08Z foleyjp $
+/*  $Id: source_mod_parser.cpp 610750 2020-06-23 18:10:12Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -1158,11 +1158,11 @@ void CSourceModParser::x_ApplyMods(CAutoInitDesc<CBioSource>& bsrc,
  
  
      if ((mod = FindMod(s_Mod_taxid)) != NULL) {
-        bsrc->SetOrg().SetTaxId( NStr::StringToInt(mod->value, NStr::fConvErr_NoThrow) );
+        bsrc->SetOrg().SetTaxId( NStr::StringToNumeric<TEntrezId>(mod->value, NStr::fConvErr_NoThrow) );
      }
      else 
-    if (reset_taxid && bsrc->IsSetOrgname() && bsrc->GetOrg().GetTaxId() != 0) {
-       bsrc->SetOrg().SetTaxId(0);
+    if (reset_taxid && bsrc->IsSetOrgname() && bsrc->GetOrg().GetTaxId() != ZERO_ENTREZ_ID) {
+       bsrc->SetOrg().SetTaxId(ZERO_ENTREZ_ID);
      }
  }
  
@@ -1582,7 +1582,7 @@ void s_ApplyPubMods(CBioseq& bioseq, const CSourceModParser::TModsRange& range)
  {
      for (CSourceModParser::TModsCI it = range.first;
           it != range.second;  ++it) {
-        TIntId pmid = NStr::StringToNumeric<TIntId>(it->value, NStr::fConvErr_NoThrow);
+        TEntrezId pmid = NStr::StringToNumeric<TEntrezId>(it->value, NStr::fConvErr_NoThrow);
          CRef<CPub> pub(new CPub);
          pub->SetPmid().Set(pmid);
          CRef<CSeqdesc> pubdesc(new CSeqdesc);
diff --git a/c++/src/serial/grpc_integration/grpc_integration.cpp b/c++/src/serial/grpc_integration/grpc_integration.cpp

index eb172a628f15f36938c16690a66d7b54636b9058..bebbb1d4afdde6d71067d7224009529c91cb8414 100644 (file)
--- a/c++/src/serial/grpc_integration/grpc_integration.cpp
+++ b/c++/src/serial/grpc_integration/grpc_integration.cpp
@@ -1,4 +1,4 @@
-/*  $Id: grpc_integration.cpp 606576 2020-04-23 17:12:06Z ivanov $
+/*  $Id: grpc_integration.cpp 608310 2020-05-14 12:35:38Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -458,15 +458,16 @@ bool CGRPCServerCallbacks::x_IsRealRequest(const TGRPCServerContext* sctx)
  /// (in order of priority):
  /// - Config file entry "[section] variable"
  /// - Environment variables: env_var_name (if not empty/NULL);
-///   then "NCBI_CONFIG__<section>__<name>"; then "grpc_proxy"
+///   then "NCBI_CONFIG__<section>__<name>"; then "GRPC_PROXY"
  /// - The hard-coded NCBI default "linkerd:4142"
  string g_NCBI_GRPC_GetAddress(const char* section,
                                const char* variable,
-                              const char* env_var_name)
+                              const char* env_var_name,
+                              int* value_source)
  {
-    auto addr = g_GetConfigString(section, variable, env_var_name, nullptr);
+    auto addr = g_GetConfigString(section, variable, env_var_name, nullptr, value_source);
      if ( addr.empty() ) {
-        addr = g_GetConfigString(nullptr, nullptr, "grpc_proxy", "linkerd:4142");
+        addr = g_GetConfigString(nullptr, nullptr, "GRPC_PROXY", "linkerd:4142", value_source);
      }
      return addr;
  }
diff --git a/c++/src/serial/rpcbase.cpp b/c++/src/serial/rpcbase.cpp

index 4ea37a55e97f7e6603a82838afb65d85e9c6abae..23157cd86dc5f4d66d528c51c6d12c51bbf8bdc4 100644 (file)
--- a/c++/src/serial/rpcbase.cpp
+++ b/c++/src/serial/rpcbase.cpp
@@ -1,4 +1,4 @@
-/*  $Id: rpcbase.cpp 604211 2020-03-24 16:03:08Z ivanov $
+/*  $Id: rpcbase.cpp 615799 2020-09-03 18:56:59Z ivanov $
  * ===========================================================================
  *
  *                            PUBLIC DOMAIN NOTICE
@@ -37,14 +37,81 @@
  BEGIN_NCBI_SCOPE
  
  
+static string s_GetConfigString(const string& service,
+                                const string& variable)
+{
+    if (service.empty() || variable.empty()) return kEmptyStr;
+
+    string env_var = service + "__RPC_CLIENT__" + variable;
+    NStr::ToUpper(env_var);
+    const TXChar* str = NcbiSys_getenv(_T_XCSTRING(env_var.c_str()));
+
+    if (str && *str) {
+        return _T_CSTRING(str);
+    }
+
+    CNcbiApplicationGuard app = CNcbiApplication::InstanceGuard();
+    if (app  &&  app->HasLoadedConfig()) {
+        return app->GetConfig().Get(service + ".rpc_client", variable);
+    }
+    return kEmptyStr;
+}
+
+
+static unsigned int s_GetTryLimit(const string& service)
+{
+    string str = s_GetConfigString(service, "max_try");
+    if (!str.empty()) {
+        try {
+            unsigned int ret = NStr::StringToNumeric<unsigned int>(str);
+            return ret > 0 ? ret : 3;
+        }
+        catch (...) {
+            ERR_POST(Warning << "Bad " << service << "/max_try value: " << str);
+        }
+    }
+    return 3;
+}
+
+
+static CTimeSpan s_GetRetryDelay(const string& service)
+{
+    CTimeSpan ret;
+    string str = s_GetConfigString(service, "retry_delay");
+    if (!str.empty()) {
+        try {
+            double sec = NStr::StringToNumeric<double>(str);
+            return CTimeSpan(sec > 0 ? sec : 0);
+        }
+        catch (...) {
+            ERR_POST(Warning << "Bad " << service << "/retry_delay value: " << str);
+        }
+    }
+    return ret;
+}
+
+
+CRPCClient_Base::CRPCClient_Base(const string&     service,
+                                 ESerialDataFormat format)
+    : m_Format(format),
+      m_RetryDelay(s_GetRetryDelay(service)),
+      m_TryCount(0),
+      m_RecursionCount(0),
+      m_Service(service),
+      m_TryLimit(s_GetTryLimit(service))
+{
+}
+
+
  CRPCClient_Base::CRPCClient_Base(const string&     service,
-                                 ESerialDataFormat format,
-                                 unsigned int      retry_limit)
+    ESerialDataFormat format,
+    unsigned int      try_limit)
      : m_Format(format),
-      m_RetryCount(0),
+      m_RetryDelay(s_GetRetryDelay(service)),
+      m_TryCount(0),
        m_RecursionCount(0),
        m_Service(service),
-      m_RetryLimit(retry_limit)
+      m_TryLimit(try_limit > 0 ? try_limit : 3)
  {
  }
  
@@ -102,7 +169,7 @@ void CRPCClient_Base::SetAffinity(const string& affinity)
  {
      if (m_Affinity != affinity) {
          if (m_RecursionCount > 1) {
-            ERR_POST("Affinity can not be changed on a recursive request");
+            ERR_POST("Affinity cannot be changed on a recursive request");
              return;
          }
          Disconnect();
@@ -151,24 +218,26 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply)
  {
      CMutexGuard LOCK(m_Mutex);
      if (m_RecursionCount == 0) {
-        m_RetryCount = 0;
+        m_TryCount = 0;
      }
      // Recursion counter needs to be decremented on both success and failure.
      CCounterGuard recursion_guard(&m_RecursionCount);
  
-    const string& request_name  = ( request.GetThisTypeInfo() != NULL 
-                                ? ("("+request.GetThisTypeInfo()->GetName()+")") : "(no_request_type)");
+    const string& request_name = request.GetThisTypeInfo() != NULL 
+        ? ("("+request.GetThisTypeInfo()->GetName()+")")
+        : "(no_request_type)";
  
      // Reset headers from previous requests if any.
      m_RetryCtx.Reset();
-    double max_span = m_RetryDelay.GetAsDouble()*m_RetryLimit;
+    double max_span = m_RetryDelay.GetAsDouble()*m_TryLimit;
      double span = max_span;
      bool limit_by_time = !m_RetryDelay.IsEmpty();
      // Retry context can be either the default one (m_RetryCtx), or provided
      // through an exception.
      for (;;) {
          if ( IsCanceled() ) {
-            NCBI_THROW(CRPCClientException, eFailed, "Request canceled "+request_name);
+            NCBI_THROW(CRPCClientException, eFailed,
+                       "Request canceled " + request_name);
          }
          try {
              SetAffinity(x_GetAffinity(request));
@@ -185,7 +254,8 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply)
              }
              m_Stream->peek(); // send data, read response headers
              if (!m_Stream->good()  &&  !m_Stream->eof()) {
-                NCBI_THROW(CRPCClientException, eFailed, "Connection stream is in bad state "+request_name);
+                NCBI_THROW(CRPCClientException, eFailed,
+                           "Connection stream is in bad state " + request_name);
              }
              if (m_RetryCtx.IsSetContentOverride()  &&
                  m_RetryCtx.GetContentOverride() == CHttpRetryContext::eFromResponse) {
@@ -214,7 +284,7 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply)
                  // proceed to retry
              }
              else if ( !dynamic_cast<CSerialException*>(&e)
-                &&  !dynamic_cast<CIOException*>(&e) ) {
+                      &&  !dynamic_cast<CIOException*>(&e) ) {
                  // Not a retry related exception, abort.
                  throw;
              }
@@ -228,18 +298,18 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply)
  
          // If using time limit, allow to make more than m_RetryLimit attempts
          // if the server has set shorter delay.
-        if ((!limit_by_time  &&  ++m_RetryCount >= m_RetryLimit)  ||
-            !x_ShouldRetry(m_RetryCount)) {
+        if ((!limit_by_time  &&  ++m_TryCount >= m_TryLimit)  ||
+            !x_ShouldRetry(m_TryCount)) {
              NCBI_THROW(CRPCClientException, eFailed,
-                       "Failed to receive reply after " +
-                       NStr::NumericToString(m_RetryCount) +
-                       (m_RetryCount == 1 ? " try" : " tries") + 
-                       " " + request_name );
+                       "Failed to receive reply after "
+                       + NStr::NumericToString(m_TryCount)
+                       + (m_TryCount == 1 ? " try " : " tries ")
+                       + request_name );
          }
          if ( m_RetryCtx.IsSetStop() ) {
              NCBI_THROW(CRPCClientException, eFailed,
-                "Retrying request stopped by the server: " +
-                m_RetryCtx.GetStopReason() + " " + request_name);
+                       "Retrying request stopped by the server: "
+                       + m_RetryCtx.GetStopReason() + ' ' + request_name);
          }
          CTimeSpan delay = x_GetRetryDelay(span);
          if ( !delay.IsEmpty() ) {
@@ -248,24 +318,26 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply)
              span -= delay.GetAsDouble();
              if (limit_by_time  &&  span <= 0) {
                  NCBI_THROW(CRPCClientException, eFailed,
-                    "Failed to receive reply in " +
-                    CTimeSpan(max_span).AsSmartString() +
-                    " " + request_name);
+                           "Failed to receive reply in "
+                           + CTimeSpan(max_span).AsSmartString()
+                           + ' ' + request_name);
              }
          }
          // Always reconnect on retry.
          if ( IsCanceled() ) {
-            NCBI_THROW(CRPCClientException, eFailed, "Request canceled "+request_name);
+            NCBI_THROW(CRPCClientException, eFailed,
+                       "Request canceled " + request_name);
          }
          try {
              Reset();
-        } STD_CATCH_ALL_XX(Serial_RPCClient, 1 ,"CRPCClient_Base::Reset()"+request_name);
+        } STD_CATCH_ALL_XX(Serial_RPCClient, 1,
+                           "CRPCClient_Base::Reset() " + request_name);
      }
      // Reset retry context when done.
      m_RetryCtx.Reset();
      // If there were any retries, force disconnect to prevent using old
      // retry url, args etc. with the next request.
-    if ( m_RetryCount > 0  &&  m_RecursionCount <= 1 ) {
+    if ( m_TryCount > 0  &&  m_RecursionCount <= 1 ) {
          Disconnect();
      }
  }
@@ -274,7 +346,7 @@ void CRPCClient_Base::x_Ask(const CSerialObject& request, CSerialObject& reply)
  bool CRPCClient_Base::x_ShouldRetry(unsigned int tries) /* NCBI_FAKE_WARNING */
  {
      _TRACE("CRPCClient_Base::x_ShouldRetry: retrying after " << tries
-           << " failures");
+           << " failure(s)");
      return true;
  }
  
diff --git a/c++/src/util/format_guess.cpp b/c++/src/util/format_guess.cpp

index 854518add2dd61187a9fc435a71428f3b8b46331..9f64e4e492fa26e30e394befae5f5eb581ba4f47 100644 (file)
--- a/c++/src/util/format_guess.cpp
+++ b/c++/src/util/format_guess.cpp
@@ -1,4 +1,4 @@
-/*  $Id: format_guess.cpp 600741 2020-01-27 15:56:56Z foleyjp $
+/*  $Id: format_guess.cpp 612523 2020-07-23 11:23:30Z ivanov $
   * ===========================================================================
   *
   *                            PUBLIC DOMAIN NOTICE
@@ -152,9 +152,9 @@ static void init_symbol_type_table(void)
  }
  
  
-// Must list all EFormats except eUnknown and eFormat_max. 
+// Must list all *supported* EFormats except eUnknown and eFormat_max. 
  // Will cause assertion if violated!
-int CFormatGuess::s_CheckOrder[] =
+vector<int> CFormatGuess::sm_CheckOrder =
  {
      eBam, // must precede eGZip!
      eZip,
@@ -180,74 +180,107 @@ int CFormatGuess::s_CheckOrder[] =
      eHgvs,
      eDistanceMatrix,
      eFlatFileSequence,
+    eFlatFileUniProt,
+    eFlatFileEna,
+    eFlatFileGenbank,
      eFiveColFeatureTable,
      eSnpMarkers,
      eFasta,
      eTextASN,
      eAlignment,    
      eTaxplot,
-    ePhrapAce,
      eTable,
      eBinaryASN,
+    ePhrapAce,
      eUCSCRegion,
-    eJSON
+    eJSON,
  };
  
  
  // This array must stay in sync with enum EFormat, but that's not
  // supposed to change in the middle anyway, so the explicit size
  // should suffice to avoid accidental skew.
-const char* const CFormatGuess::sm_FormatNames[CFormatGuess::eFormat_max] =
-{
-    "unknown",
-    "binary ASN.1",
-    "RepeatMasker",
-    "GFF/GTF Poisoned",
-    "Glimmer3",
-    "AGP",
-    "XML",
-    "WIGGLE",
-    "BED",
-    "BED15",
-    "Newick",
-    "alignment",
-    "distance matrix",
-    "flat-file sequence",
-    "five-column feature table",
-    "SNP Markers",
-    "FASTA",
-    "text ASN.1",
-    "Taxplot",
-    "Phrap ACE",
-    "table",
-    "GTF",
-    "GFF3",
-    "GFF2",
-    "HGVS",
-    "GVF",
-    "zip",
-    "gzip",
-    "bzip2",
-    "lzo",
-    "SRA",
-    "BAM",
-    "VCF",
-    "UCSC Region",
-    "GFF Augustus",
-    "JSON",
-    "PSL",
+const CFormatGuess::NAME_MAP CFormatGuess::sm_FormatNames = {
+    {eUnknown, "unknown"},
+    {eBinaryASN, "binary ASN.1"},
+    {eRmo, "RepeatMasker"},
+    {eGtf_POISENED, "GFF/GTF Poisoned"},
+    {eGlimmer3, "Glimmer3"},
+    {eAgp, "AGP"},
+    {eXml, "XML"},
+    {eWiggle, "WIGGLE"},
+    {eBed, "BED"},
+    {eBed15, "BED15"},
+    {eNewick, "Newick"},
+    {eAlignment, "alignment"},
+    {eDistanceMatrix, "distance matrix"},
+    {eFlatFileSequence, "flat-file sequence"},
+    {eFiveColFeatureTable, "five-column feature table"},
+    {eSnpMarkers, "SNP Markers"},
+    {eFasta, "FASTA"},
+    {eTextASN, "text ASN.1"},
+    {eTaxplot, "Taxplot"},
+    {ePhrapAce, "Phrap ACE"},
+    {eTable, "table"},
+    {eGtf, "GTF"},
+    {eGff3, "GFF3"},
+    {eGff2, "GFF2"},
+    {eHgvs, "HGVS"},
+    {eGvf, "GVF"},
+    {eZip, "zip"},
+    {eGZip, "gzip"},
+    {eBZip2, "bzip2"},
+    {eLzo, "lzo"},
+    {eSra, "SRA"},
+    {eBam, "BAM"},
+    {eVcf, "VCF"},
+    {eUCSCRegion, "UCSC Region"},
+    {eGffAugustus, "GFF Augustus"},
+    {eJSON, "JSON"},
+    {ePsl, "PSL"},
+    {eAltGraphX, "altGraphX"},
+    {eBed5FloatScore, "BED5 float score"},
+    {eBedGraph, "BED graph"},
+    {eBedRnaElements, "BED Rna elements"},
+    {eBigBarChart, "bigBarChart"},
+    {eBigBed, "BigBED"},
+    {eBigPsl, "BigPSL"},
+    {eBigChain, "BigChain"},
+    {eBigMaf, "BigMaf"},
+    {eBigWig, "BigWig"},
+    {eBroadPeak, "BroadPeak"},
+    {eChain, "Chain"},
+    {eClonePos, "ClonePos"},
+    {eColoredExon, "ColoredExon"},
+    {eCtgPos, "CtgPos"},
+    {eDownloadsOnly, "DowloadsOnly"},
+    {eEncodeFiveC, "EncodeFiveC"},
+    {eExpRatio, "ExpRatio"},
+    {eFactorSource, "FactorSource"},
+    {eGenePred, "GenePred"},
+    {eLd2, "Ld2"},
+    {eNarrowPeak, "NarrowPeak"},
+    {eNetAlign, "NetAlign"},
+    {ePeptideMapping, "PeptideMapping"},
+    {eRmsk, "Rmsk"},
+    {eSnake, "Snake"},
+    {eVcfTabix, "VcfTabix"},
+    {eWigMaf, "WigMaf"},
+    {eFlatFileGenbank, "Genbank FlatFile"},
+    {eFlatFileEna, "ENA FlatFile"},
+    {eFlatFileUniProt, "UniProt FlatFile"},
  };
  
  const char*
  CFormatGuess::GetFormatName(EFormat format)
  {
-    unsigned int i = static_cast<unsigned int>(format);
-    if (i >= static_cast <unsigned int>(eFormat_max)) {
+    auto formatIt = sm_FormatNames.find(format);
+    if (formatIt == sm_FormatNames.end()) {
          NCBI_THROW(CUtilException, eWrongData,
                     "CFormatGuess::GetFormatName: out-of-range format value "
-                   + NStr::IntToString(i));
+                   + NStr::IntToString(format));
      }
-    return sm_FormatNames[i];
+    return formatIt->second;
  }
  
  
@@ -381,6 +414,14 @@ CFormatGuess::~CFormatGuess()
      }
  }
  
+//  ----------------------------------------------------------------------------
+bool 
+CFormatGuess::IsSupportedFormat(EFormat format) 
+{
+    return (std::find(sm_CheckOrder.begin(), sm_CheckOrder.end(), format) 
+        != sm_CheckOrder.end());
+}
+
  //  ----------------------------------------------------------------------------
  CFormatGuess::EFormat
  CFormatGuess::GuessFormat( EMode )
@@ -411,12 +452,12 @@ CFormatGuess::GuessFormat(
      }
  
      EMode mode = eQuick;
-    size_t uFormatCount = ArraySize(s_CheckOrder);
+    size_t uFormatCount = sm_CheckOrder.size();
  
      // First, try to use hints
      if ( !m_Hints.IsEmpty() ) {
          for (size_t f = 0; f < uFormatCount; ++f) {
-            EFormat fmt = EFormat( s_CheckOrder[f] );
+            EFormat fmt = EFormat( sm_CheckOrder[f] );
              if (m_Hints.IsPreferred(fmt)  &&  x_TestFormat(fmt, mode)) {
                  return fmt;
              }
@@ -425,7 +466,7 @@ CFormatGuess::GuessFormat(
  
      // Check other formats, skip the ones that are disabled through hints
      for (size_t f = 0; f < uFormatCount; ++f) {
-        EFormat fmt = EFormat( s_CheckOrder[f] );
+        EFormat fmt = EFormat( sm_CheckOrder[f] );
          if ( ! m_Hints.IsDisabled(fmt)  &&  x_TestFormat(fmt, mode) ) {
              return fmt;
          }
@@ -532,6 +573,12 @@ bool CFormatGuess::x_TestFormat(EFormat format, EMode mode)
          return TestFormatAugustus( mode );
      case eJSON:
          return TestFormatJson( mode );
+    case eFlatFileGenbank:
+        return TestFormatFlatFileGenbank( mode );
+    case eFlatFileEna:
+        return TestFormatFlatFileEna( mode );
+    case eFlatFileUniProt:
+        return TestFormatFlatFileUniProt( mode );
      default:
          NCBI_THROW( CCoreException, eInvalidArg,
              "CFormatGuess::x_TestFormat(): Unsupported format ID (" +
@@ -543,13 +590,8 @@ bool CFormatGuess::x_TestFormat(EFormat format, EMode mode)
  void
  CFormatGuess::Initialize()
  {
-    NCBI_ASSERT(eFormat_max-2 == sizeof( s_CheckOrder ) / sizeof( int ),
-        "Indices in s_CheckOrder do not match format count ---"
-        "update s_CheckOrder to list all formats" 
-    );
-    NCBI_ASSERT(eFormat_max == sizeof(sm_FormatNames) / sizeof(const char*)
-                &&  sm_FormatNames[eFormat_max - 1] != NULL,
-                "sm_FormatNames does not list all possible formats");
+    NCBI_ASSERT(eFormat_max == sm_FormatNames.size(),
+        "sm_FormatNames does not list all possible formats");
      m_pTestBuffer = 0;
  
      m_bStatsAreValid = false;
@@ -700,6 +742,33 @@ bool CFormatGuess::TestFormatRepeatMasker(
          IsInputRepeatMaskerWithoutHeader();
  }
  
+
+//  ----------------------------------------------------------------------------
+
+static bool s_LooksLikeNucSeqData(const string& line, size_t minLength=10) {
+    if (line.size()<minLength) {
+        return false;
+    }
+
+    int nucCount=0;
+    for (auto c : line) {
+        if (isalpha(c)) {
+            auto index = static_cast<int>(c);
+            if (symbol_type_table[index] & fDNA_Main_Alphabet) {
+                ++nucCount;
+            }
+            continue;
+        }
+
+        if (!isspace(c)) {
+            return false;
+        }
+    }
+
+    return (nucCount/line.size() > 0.9);
+}
+
+
  //  ----------------------------------------------------------------------------
  bool
  CFormatGuess::TestFormatPhrapAce(
@@ -709,9 +778,19 @@ CFormatGuess::TestFormatPhrapAce(
          return false;
      }
  
-    ITERATE( list<string>, it, m_TestLines ) {
-        if ( IsLinePhrapId( *it ) ) {
-            return true;
+    if (memchr(m_pTestBuffer, 0, m_iTestDataSize)) { // Cannot contain NuLL bytes
+        return false;                                // RW-1102
+    }
+
+    bool foundId = false;
+    for (const auto& line : m_TestLines) {
+        if (foundId) {
+            if (s_LooksLikeNucSeqData(line)) {
+                return true;
+            }
+        }
+        else if (IsLinePhrapId(line)) {
+            foundId = true;
          }
      }
      return false;
@@ -1994,6 +2073,381 @@ bool CFormatGuess::TestFormatPsl(EMode mode)
      return (uPslLineCount != 0);
  }
  
+//  ----------------------------------------------------------------------------
+bool
+GenbankGetKeywordLine(
+    list<string>::iterator& lineIt,
+    list<string>::iterator endIt,
+    string& keyword,
+    string& data)
+//  ----------------------------------------------------------------------------
+{
+    if (lineIt == endIt) {
+        return false;
+    }
+    if (lineIt->size() > 79) {
+        return false;
+    }
+
+    vector<int> validIndents = {0, 2, 3, 5, 12, 21};
+    auto firstNotBlank = lineIt->find_first_not_of(" ");
+    while (firstNotBlank != 0) {
+        if (std::find(validIndents.begin(), validIndents.end(), firstNotBlank) == 
+                validIndents.end()) {
+            auto firstNotBlankOrDigit = lineIt->find_first_not_of(" 1234567890");
+            if (firstNotBlankOrDigit != 10) {
+                return false;
+            }
+        }
+        lineIt++;
+        if (lineIt == endIt) {
+            return false;
+        }
+        firstNotBlank = lineIt->find_first_not_of(" ");
+    }
+    try {
+        NStr::SplitInTwo(
+            *lineIt, " ", keyword, data, NStr::fSplit_MergeDelimiters);
+    }
+    catch (CException&) {
+        return false;
+    }
+    lineIt++;
+    return true;
+}
+
+//  ----------------------------------------------------------------------------
+bool CFormatGuess::TestFormatFlatFileGenbank(
+    EMode /*unused*/)
+{
+    // see ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
+
+    if ( ! EnsureStats() || ! EnsureSplitLines() ) {
+        return false;
+    }
+
+    // smell test:
+    // note: sample size at least 8000 characters, line length soft limited to
+    //  80 characters
+    if (m_TestLines.size() < 9) { // number of required records 
+        return false;
+    }
+    
+    string keyword, data, lookingFor;
+    auto recordIt = m_TestLines.begin();
+    auto endIt = m_TestLines.end();
+    NStr::SplitInTwo(
+        *recordIt, " ", keyword, data, NStr::fSplit_MergeDelimiters);
+
+    lookingFor = "LOCUS"; // excactly one
+    if (keyword != lookingFor) {
+        return false;
+    }
+    recordIt++;
+    if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+        return false;
+    }
+
+    lookingFor = "DEFINITION"; // one or more
+    if (keyword != lookingFor) {
+        return false;
+    }
+    while (keyword == lookingFor) {
+        if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+            return false;
+        }
+    }
+
+    lookingFor = "ACCESSION"; // one or more
+    if (keyword != lookingFor) {
+        return false;
+    }
+    while (keyword == lookingFor) {
+        if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+            return false;
+        }
+    }
+
+    bool nidSeen = false;
+    lookingFor = "NID"; // zero or one, can come before or after VERSION
+    if (keyword == lookingFor) {
+        nidSeen = true;
+        if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+            return false;
+        }
+    }
+
+    lookingFor = "VERSION"; // exactly one
+    if (keyword != lookingFor) {
+        return false;
+    }
+    if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+        return false;
+    }
+
+    if (!nidSeen) {
+        lookingFor = "NID"; // zero or one
+        if (keyword == lookingFor) {
+            if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+                return false;
+            }
+        }
+    }
+
+    lookingFor = "PROJECT"; // zero or more
+    while (keyword == lookingFor) {
+        if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+            return false;
+        }
+    }
+    
+    lookingFor = "DBLINK"; // zero or more
+    while (keyword == lookingFor) {
+        if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+            return false;
+        }
+    }
+
+    lookingFor = "KEYWORDS"; // one or more
+    if (keyword != lookingFor) {
+        return false;
+    }
+
+    // I am convinced now. There may be flaws farther down but this input 
+    //  definitely wants to be a Genbank flat file.
+    return true;
+}
+
+//  ----------------------------------------------------------------------------
+bool
+EnaGetLineData(
+    list<string>::iterator& lineIt,
+    list<string>::iterator endIt,
+    string& lineCode,
+    string& lineData)
+//  ----------------------------------------------------------------------------
+{
+    while (lineIt != endIt  &&  NStr::StartsWith(*lineIt, "XX")) {
+        lineIt++;
+    }
+    if (lineIt == endIt) {
+        return false;
+    }
+    try {
+        NStr::SplitInTwo(
+            *lineIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters);
+    }
+    catch(CException&) {
+        lineCode = *lineIt;
+        lineData = "";
+    }
+    lineIt++;
+    return true;
+}
+    
+//  ----------------------------------------------------------------------------
+bool CFormatGuess::TestFormatFlatFileEna(
+    EMode /*unused*/)
+{
+    // see: ftp://ftp.ebi.ac.uk/pub/databases/ena/sequence/release/doc/usrman.txt
+
+    if ( ! EnsureStats() || ! EnsureSplitLines() ) {
+        return false;
+    }
+
+    // smell test:
+    // note: sample size at least 8000 characters, line length soft limited to
+    //  78 characters
+    if (m_TestLines.size() < 19) { // number of required records 
+        return false;
+    }
+    
+    string lineCode, lineData, lookingFor;
+    auto recordIt = m_TestLines.begin();
+    auto endIt = m_TestLines.end();
+    NStr::SplitInTwo(
+        *recordIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters);
+
+    lookingFor = "ID"; // excactly one
+    if (lineCode != lookingFor) {
+        return false;
+    }
+    recordIt++;
+
+    lookingFor = "AC"; // one or more
+    if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+        return false;
+    }
+    if (lineCode != lookingFor) {
+        return false;
+    }
+    while (lineCode == lookingFor) {
+        if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+            return false;
+        }
+    }
+
+    lookingFor = "PR"; // zero or more
+    while (lineCode == lookingFor) {
+        if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+            return false;
+        }
+    }
+
+    lookingFor = "DT"; // two (first hard difference from UniProt)
+    for (int i = 0; i < 2; ++i) {
+        if (lineCode != lookingFor) {
+            return false;
+        }
+        if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+            return false;
+        }
+    }
+
+    lookingFor = "DE"; // one or more
+    if (lineCode != lookingFor) {
+        return false;
+    }
+    while (lineCode == lookingFor) {
+        if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+            return true;
+        }
+    }
+
+    lookingFor = "KW"; // one or more
+    if (lineCode != lookingFor) {
+        return false;
+    }
+    while (lineCode == lookingFor) {
+        if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+            return true;
+        }
+    }
+
+    lookingFor = "OS"; // one or more
+    if (lineCode != lookingFor) {
+        return false;
+    }
+    while (lineCode == lookingFor) {
+        if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+            return true;
+        }
+    }
+
+    lookingFor = "OC"; // one or more
+    if (lineCode != lookingFor) {
+        return false;
+    }
+    while (lineCode == lookingFor) {
+        if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+            return true;
+        }
+    }
+
+    //  once here it's Ena or someone is messing with me
+    return true;
+}
+
+//  ----------------------------------------------------------------------------
+bool
+UniProtGetLineData(
+    list<string>::iterator& lineIt,
+    list<string>::iterator endIt,
+    string& lineCode,
+    string& lineData)
+//  ----------------------------------------------------------------------------
+{
+    if (lineIt == endIt) {
+        return false;
+    }
+    try {
+        NStr::SplitInTwo(
+            *lineIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters);
+    }
+    catch(CException&) {
+        lineCode = *lineIt;
+        lineData = "";
+    }
+    lineIt++;
+    return true;
+}
+    
+//  ----------------------------------------------------------------------------
+bool CFormatGuess::TestFormatFlatFileUniProt(
+    EMode /*unused*/)
+{
+    // see: https://web.expasy.org/docs/userman.html#genstruc
+
+    if ( ! EnsureStats() || ! EnsureSplitLines() ) {
+        return false;
+    }
+
+    // smell test:
+    // note: sample size at least 8000 characters, line length soft limited to
+    //  75 characters
+    if (m_TestLines.size() < 15) { // number of required records 
+        return false;
+    }
+
+    // note:
+    // we are only trying to assert that the input is *meant* to be uniprot. 
+    // we should not be in the business of validation - this should happen 
+    // downstream, with better error messages than we could possibly provide here.
+    string lineCode, lineData, lookingFor;
+    auto recordIt = m_TestLines.begin();
+    auto endIt = m_TestLines.end();
+    NStr::SplitInTwo(
+        *recordIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters);
+
+    lookingFor = "ID"; // excatly one
+    if (lineCode != lookingFor) {
+        return false;
+    }
+    recordIt++;
+
+    lookingFor = "AC"; // one or more
+    if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) {
+        return false;
+    }
+    if (lineCode != lookingFor) {
+        return false;
+    }
+    while (lineCode == lookingFor) {
+        if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) {
+            return false;
+        }
+    }
+
+    lookingFor = "DT"; // three (first hard difference from UniProt)
+    for (int i = 0; i < 3; ++i) {
+        if (lineCode != lookingFor) {
+            return false;
+        }
+        if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) {
+            return false;
+        }
+    }
+
+
+    lookingFor = "DE"; // one or more
+    if (lineCode != lookingFor) {
+        return false;
+    }
+    while (lineCode == lookingFor) {
+        if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) {
+            return true;
+        }
+    }
+
+    // optional "GN" line or first "OS" line
+    if (lineCode != "GN"  &&  lineCode != "OS") {
+        return false;
+    }
+    
+    //  once here it's UniProt or someone is messing with me
+    return true;
+}
+
  //  ----------------------------------------------------------------------------
  bool CFormatGuess::TestFormatVcf(
      EMode)
author	Aaron M. Ucko <ucko@debian.org>
	Wed, 27 Jan 2021 01:57:37 +0000 (20:57 -0500)
committer	Aaron M. Ucko <ucko@debian.org>
	Wed, 27 Jan 2021 01:57:37 +0000 (20:57 -0500)
c++/include/algo/blast/api/blast_aux.hpp		patch \| blob \| history
c++/include/algo/blast/api/blast_node.hpp	[new file with mode: 0644]	patch \| blob
c++/include/algo/blast/api/blast_usage_report.hpp	[new file with mode: 0644]	patch \| blob
c++/include/algo/blast/blastinput/blast_input.hpp		patch \| blob \| history
c++/include/algo/blast/blastinput/cmdline_flags.hpp		patch \| blob \| history
c++/include/algo/blast/blastinput/rpsblast_args.hpp		patch \| blob \| history
c++/include/algo/blast/blastinput/rpstblastn_args.hpp		patch \| blob \| history
c++/include/algo/blast/format/blast_format.hpp		patch \| blob \| history
c++/include/common/config/ncbiconf_msvc.h		patch \| blob \| history
c++/include/common/ncbi_export.h		patch \| blob \| history
c++/include/common/ncbi_package_ver.h		patch \| blob \| history
c++/include/common/ncbiconf_impl.h		patch \| blob \| history
c++/include/connect/ncbi_usage_report.hpp		patch \| blob \| history
c++/include/corelib/impl/ncbi_dbsvcmapper.hpp		patch \| blob \| history
c++/include/corelib/mswin_no_popup.h		patch \| blob \| history
c++/include/corelib/ncbi_system.hpp		patch \| blob \| history
c++/include/corelib/ncbiapp_api.hpp		patch \| blob \| history
c++/include/corelib/ncbidiag.hpp		patch \| blob \| history
c++/include/corelib/ncbimisc.hpp		patch \| blob \| history
c++/include/corelib/ncbistre.hpp		patch \| blob \| history
c++/include/corelib/request_ctx.hpp		patch \| blob \| history
c++/include/corelib/test_boost.hpp		patch \| blob \| history
c++/include/corelib/version.hpp		patch \| blob \| history
c++/include/corelib/version_api.hpp		patch \| blob \| history
c++/include/dbapi/driver/impl/dbapi_pool_balancer.hpp	[new file with mode: 0644]	patch \| blob
c++/include/ncbi_pch.hpp		patch \| blob \| history
c++/include/ncbi_source_ver.h		patch \| blob \| history
c++/include/ncbiconf.h		patch \| blob \| history
c++/include/objects/dbsnp/primary_track/impl/snpptis_impl.hpp		patch \| blob \| history
c++/include/objects/general/Dbtag.hpp		patch \| blob \| history
c++/include/objects/seqfeat/SeqFeatData.hpp		patch \| blob \| history
c++/include/objects/taxon1/local_taxon.hpp		patch \| blob \| history
c++/include/objects/valerr/ValidErrItem.hpp		patch \| blob \| history
c++/include/objmgr/impl/scope_impl.hpp		patch \| blob \| history
c++/include/objmgr/scope.hpp		patch \| blob \| history
c++/include/objmgr/util/autodef_options.hpp		patch \| blob \| history
c++/include/objmgr/util/indexer.hpp		patch \| blob \| history
c++/include/objtools/blast/blastdb_format/blastdb_dataextract.hpp		patch \| blob \| history
c++/include/objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp		patch \| blob \| history
c++/include/objtools/blast/seqdb_reader/impl/seqdbtax.hpp		patch \| blob \| history
c++/include/objtools/blast/seqdb_reader/seqdb.hpp		patch \| blob \| history
c++/include/objtools/blast/seqdb_reader/seqdbcommon.hpp		patch \| blob \| history
c++/include/objtools/blast/seqdb_writer/taxid_set.hpp		patch \| blob \| history
c++/include/objtools/blast/seqdb_writer/writedb_lmdb.hpp		patch \| blob \| history
c++/include/objtools/cleanup/cleanup.hpp		patch \| blob \| history
c++/include/objtools/cleanup/cleanup_message.hpp	[new file with mode: 0644]	patch \| blob
c++/include/objtools/data_loaders/blastdb/bdbloader.hpp		patch \| blob \| history
c++/include/objtools/edit/feattable_edit.hpp		patch \| blob \| history
c++/include/objtools/edit/remote_updater.hpp		patch \| blob \| history
c++/include/objtools/format/flat_file_config.hpp		patch \| blob \| history
c++/include/objtools/format/flat_file_generator.hpp		patch \| blob \| history
c++/include/objtools/format/gather_items.hpp		patch \| blob \| history
c++/include/objtools/format/items/feature_item.hpp		patch \| blob \| history
c++/include/objtools/format/items/flat_qual_slots.hpp		patch \| blob \| history
c++/include/objtools/format/items/reference_item.hpp		patch \| blob \| history
c++/include/objtools/logging/listener.hpp		patch \| blob \| history
c++/include/objtools/logging/message.hpp		patch \| blob \| history
c++/include/objtools/pubseq_gateway/client/psg_client.hpp		patch \| blob \| history
c++/include/objtools/readers/fasta.hpp		patch \| blob \| history
c++/include/objtools/readers/fasta_reader_utils.hpp		patch \| blob \| history
c++/include/objtools/readers/gff2_reader.hpp		patch \| blob \| history
c++/include/objtools/readers/gff3_reader.hpp		patch \| blob \| history
c++/include/objtools/readers/gtf_reader.hpp		patch \| blob \| history
c++/include/objtools/readers/message_listener.hpp		patch \| blob \| history
c++/include/serial/grpc_integration/grpc_integration.hpp		patch \| blob \| history
c++/include/serial/grpc_integration/impl/grpc_support.hpp		patch \| blob \| history
c++/include/serial/rpcbase.hpp		patch \| blob \| history
c++/include/serial/rpcbase_impl.hpp		patch \| blob \| history
c++/include/serial/streamiter.hpp		patch \| blob \| history
c++/include/util/bitset/bm.h		patch \| blob \| history
c++/include/util/bitset/bmaggregator.h		patch \| blob \| history
c++/include/util/bitset/bmalgo.h		patch \| blob \| history
c++/include/util/bitset/bmalgo_impl.h		patch \| blob \| history
c++/include/util/bitset/bmalloc.h		patch \| blob \| history
c++/include/util/bitset/bmavx2.h		patch \| blob \| history
c++/include/util/bitset/bmblocks.h		patch \| blob \| history
c++/include/util/bitset/bmbmatrix.h		patch \| blob \| history
c++/include/util/bitset/bmbuffer.h		patch \| blob \| history
c++/include/util/bitset/bmconst.h		patch \| blob \| history
c++/include/util/bitset/bmdbg.h		patch \| blob \| history
c++/include/util/bitset/bmdef.h		patch \| blob \| history
c++/include/util/bitset/bmfunc.h		patch \| blob \| history
c++/include/util/bitset/bmgamma.h		patch \| blob \| history
c++/include/util/bitset/bmintervals.h	[new file with mode: 0644]	patch \| blob
c++/include/util/bitset/bmrandom.h		patch \| blob \| history
c++/include/util/bitset/bmrs.h		patch \| blob \| history
c++/include/util/bitset/bmserial.h		patch \| blob \| history
c++/include/util/bitset/bmsparsevec.h		patch \| blob \| history
c++/include/util/bitset/bmsparsevec_algo.h		patch \| blob \| history
c++/include/util/bitset/bmsparsevec_compr.h		patch \| blob \| history
c++/include/util/bitset/bmsparsevec_serial.h		patch \| blob \| history
c++/include/util/bitset/bmsparsevec_util.h		patch \| blob \| history
c++/include/util/bitset/bmsse2.h		patch \| blob \| history
c++/include/util/bitset/bmsse4.h		patch \| blob \| history
c++/include/util/bitset/bmsse_util.h		patch \| blob \| history
c++/include/util/bitset/bmstrsparsevec.h		patch \| blob \| history
c++/include/util/bitset/bmtimer.h		patch \| blob \| history
c++/include/util/bitset/bmundef.h		patch \| blob \| history
c++/include/util/bitset/bmutil.h		patch \| blob \| history
c++/include/util/bitset/bmxor.h		patch \| blob \| history
c++/include/util/bitset/encoding.h		patch \| blob \| history
c++/include/util/format_guess.hpp		patch \| blob \| history
c++/include/util/limited_size_map.hpp		patch \| blob \| history
c++/scripts/common/check/inspxe.sh		patch \| blob \| history
c++/scripts/common/check/tsan.supp		patch \| blob \| history
c++/scripts/common/impl/install.sh		patch \| blob \| history
c++/scripts/projects/blast/Manifest		patch \| blob \| history
c++/scripts/projects/blast/components.link		patch \| blob \| history
c++/scripts/projects/igblast/edit_imgt_file.pl		patch \| blob \| history
c++/scripts/projects/ncbi_gbench.lst		patch \| blob \| history
c++/scripts/projects/netschedule/ChangeLog		patch \| blob \| history
c++/scripts/projects/pubseq_gateway/ChangeLog		patch \| blob \| history
c++/scripts/projects/pubseq_gateway/project.lst		patch \| blob \| history
c++/src/CMakeLists.txt		patch \| blob \| history
c++/src/Makefile.in		patch \| blob \| history
c++/src/algo/blast/api/CMakeLists.xblast.lib.txt		patch \| blob \| history
c++/src/algo/blast/api/Makefile.xblast.lib		patch \| blob \| history
c++/src/algo/blast/api/blast_aux.cpp		patch \| blob \| history
c++/src/algo/blast/api/blast_node.cpp	[new file with mode: 0644]	patch \| blob
c++/src/algo/blast/api/blast_usage_report.cpp	[new file with mode: 0644]	patch \| blob
c++/src/algo/blast/blastinput/blast_input.cpp		patch \| blob \| history
c++/src/algo/blast/blastinput/cmdline_flags.cpp		patch \| blob \| history
c++/src/algo/blast/blastinput/rpsblast_args.cpp		patch \| blob \| history
c++/src/algo/blast/blastinput/rpstblastn_args.cpp		patch \| blob \| history
c++/src/algo/blast/core/blast_engine.c		patch \| blob \| history
c++/src/algo/blast/core/blast_kappa.c		patch \| blob \| history
c++/src/algo/blast/format/blast_format.cpp		patch \| blob \| history
c++/src/algo/blast/unit_tests/api/version_reference_unit_test.cpp		patch \| blob \| history
c++/src/app/CMakeLists.txt		patch \| blob \| history
c++/src/app/Makefile.in		patch \| blob \| history
c++/src/app/blast/CMakeLists.rpsblast.app.txt		patch \| blob \| history
c++/src/app/blast/CMakeLists.rpstblastn.app.txt		patch \| blob \| history
c++/src/app/blast/Makefile.rpsblast.app		patch \| blob \| history
c++/src/app/blast/Makefile.rpstblastn.app		patch \| blob \| history
c++/src/app/blast/blast_app_util.cpp		patch \| blob \| history
c++/src/app/blast/blast_app_util.hpp		patch \| blob \| history
c++/src/app/blast/blast_formatter.cpp		patch \| blob \| history
c++/src/app/blast/blastn_app.cpp		patch \| blob \| history
c++/src/app/blast/blastp_app.cpp		patch \| blob \| history
c++/src/app/blast/blastx_app.cpp		patch \| blob \| history
c++/src/app/blast/deltablast_app.cpp		patch \| blob \| history
c++/src/app/blast/get_species_taxids.sh		patch \| blob \| history
c++/src/app/blast/legacy_blast.pl		patch \| blob \| history
c++/src/app/blast/psiblast_app.cpp		patch \| blob \| history
c++/src/app/blast/rpsblast_app.cpp		patch \| blob \| history
c++/src/app/blast/rpsblast_node.cpp	[new file with mode: 0644]	patch \| blob
c++/src/app/blast/rpsblast_node.hpp	[new file with mode: 0644]	patch \| blob
c++/src/app/blast/rpstblastn_app.cpp		patch \| blob \| history
c++/src/app/blast/rpstblastn_node.cpp	[new file with mode: 0644]	patch \| blob
c++/src/app/blast/rpstblastn_node.hpp	[new file with mode: 0644]	patch \| blob
c++/src/app/blast/tblastn_app.cpp		patch \| blob \| history
c++/src/app/blast/tblastx_app.cpp		patch \| blob \| history
c++/src/app/blast/update_blastdb.pl		patch \| blob \| history
c++/src/app/blastdb/CMakeLists.convert2blastmask.app.txt		patch \| blob \| history
c++/src/app/blastdb/blastdb_aliastool.cpp		patch \| blob \| history
c++/src/app/blastdb/blastdb_convert.cpp		patch \| blob \| history
c++/src/app/blastdb/blastdbcheck.cpp		patch \| blob \| history
c++/src/app/blastdb/blastdbcmd.cpp		patch \| blob \| history
c++/src/app/blastdb/blastdbcp.cpp		patch \| blob \| history
c++/src/app/blastdb/convert2blastmask.cpp		patch \| blob \| history
c++/src/app/blastdb/makeblastdb.cpp		patch \| blob \| history
c++/src/app/blastdb/makeprofiledb.cpp		patch \| blob \| history
c++/src/build-system/Makefile.mk.in		patch \| blob \| history
c++/src/build-system/Makefile.xcode.tmpl		patch \| blob \| history
c++/src/build-system/cmake/CMake.NCBIComponents.cmake		patch \| blob \| history
c++/src/build-system/cmake/CMake.NCBIComponentsMSVC.cmake		patch \| blob \| history
c++/src/build-system/cmake/CMake.NCBIComponentsUNIX.cmake		patch \| blob \| history
c++/src/build-system/cmake/CMake.NCBIComponentsUNIXex.cmake		patch \| blob \| history
c++/src/build-system/cmake/CMake.NCBIComponentsXCODE.cmake		patch \| blob \| history
c++/src/build-system/cmake/CMake.NCBIptb.cmake		patch \| blob \| history
c++/src/build-system/cmake/CMake.NCBIptb.ntest.cmake		patch \| blob \| history
c++/src/build-system/cmake/CMake.NCBItoolkit.cmake		patch \| blob \| history
c++/src/build-system/cmake/CMakeChecks.boost.cmake		patch \| blob \| history
c++/src/build-system/cmake/CMakeChecks.cmake		patch \| blob \| history
c++/src/build-system/cmake/CMakeChecks.compiler.cmake		patch \| blob \| history
c++/src/build-system/cmake/CMakeLists.top_builddir.txt		patch \| blob \| history
c++/src/build-system/cmake/cmake-cfg-unix.sh		patch \| blob \| history
c++/src/build-system/cmake/cmake-cfg-vs.bat		patch \| blob \| history
c++/src/build-system/cmake/cmake-cfg-xcode.sh		patch \| blob \| history
c++/src/build-system/cmake/cmake_configure_ext_gpipe.sh		patch \| blob \| history
c++/src/build-system/config.h.in		patch \| blob \| history
c++/src/build-system/configure		patch \| blob \| history
c++/src/build-system/configure.ac		patch \| blob \| history
c++/src/build-system/install.sh.in		patch \| blob \| history
c++/src/build-system/ncbi_package_version		patch \| blob \| history
c++/src/build-system/project_tree_builder.ini		patch \| blob \| history
c++/src/build-system/relocate.sh.in		patch \| blob \| history
c++/src/corelib/ncbi_param.cpp		patch \| blob \| history
c++/src/corelib/ncbi_stack.cpp		patch \| blob \| history
c++/src/corelib/ncbi_system.cpp		patch \| blob \| history
c++/src/corelib/ncbiapp.cpp		patch \| blob \| history
c++/src/corelib/ncbiargs.cpp		patch \| blob \| history
c++/src/corelib/ncbidiag.cpp		patch \| blob \| history
c++/src/corelib/ncbidiag_p.cpp		patch \| blob \| history
c++/src/corelib/ncbidiag_p.hpp		patch \| blob \| history
c++/src/corelib/ncbifile.cpp		patch \| blob \| history
c++/src/corelib/version.cpp		patch \| blob \| history
c++/src/dbapi/driver/dbapi_conn_factory.cpp		patch \| blob \| history
c++/src/dbapi/driver/dbapi_impl_context.cpp		patch \| blob \| history
c++/src/dbapi/driver/dbapi_pool_balancer.cpp		patch \| blob \| history
c++/src/dbapi/driver/dbapi_pool_balancer.hpp	[deleted file]	patch \| blob \| history
c++/src/objects/dbsnp/primary_track/snpptis.cpp		patch \| blob \| history
c++/src/objects/general/Dbtag.cpp		patch \| blob \| history
c++/src/objects/genomecoll/genomic_collections_cli.cpp		patch \| blob \| history
c++/src/objects/seq/Bioseq.cpp		patch \| blob \| history
c++/src/objects/seq/so_map.cpp		patch \| blob \| history
c++/src/objects/seqfeat/OrgMod.cpp		patch \| blob \| history
c++/src/objects/seqfeat/SeqFeatData.cpp		patch \| blob \| history
c++/src/objects/seqfeat/SubSource.cpp		patch \| blob \| history
c++/src/objects/seqfeat/ecnum_ambiguous.inc		patch \| blob \| history
c++/src/objects/seqfeat/ecnum_ambiguous.txt		patch \| blob \| history
c++/src/objects/seqfeat/ecnum_replaced.inc		patch \| blob \| history
c++/src/objects/seqfeat/ecnum_replaced.txt		patch \| blob \| history
c++/src/objects/seqfeat/ecnum_specific.inc		patch \| blob \| history
c++/src/objects/seqfeat/ecnum_specific.txt		patch \| blob \| history
c++/src/objects/seqfeat/gc.inc		patch \| blob \| history
c++/src/objects/seqfeat/gc.prt		patch \| blob \| history
c++/src/objects/seqfeat/institution_codes.inc		patch \| blob \| history
c++/src/objects/seqfeat/institution_codes.txt		patch \| blob \| history
c++/src/objects/seqfeat/lat_lon_country.inc		patch \| blob \| history
c++/src/objects/seqloc/Seq_id.cpp		patch \| blob \| history
c++/src/objects/seqloc/accguide.inc		patch \| blob \| history
c++/src/objects/seqloc/accguide.txt		patch \| blob \| history
c++/src/objects/valerr/ValidErrItem.cpp		patch \| blob \| history
c++/src/objmgr/scope.cpp		patch \| blob \| history
c++/src/objmgr/scope_impl.cpp		patch \| blob \| history
c++/src/objmgr/tse_info.cpp		patch \| blob \| history
c++/src/objmgr/util/autodef.cpp		patch \| blob \| history
c++/src/objmgr/util/autodef_options.cpp		patch \| blob \| history
c++/src/objmgr/util/create_defline.cpp		patch \| blob \| history
c++/src/objmgr/util/feature_edit.cpp		patch \| blob \| history
c++/src/objmgr/util/indexer.cpp		patch \| blob \| history
c++/src/objtools/CMakeLists.txt		patch \| blob \| history
c++/src/objtools/Makefile.in		patch \| blob \| history
c++/src/objtools/alnmgr/alnvec.cpp		patch \| blob \| history
c++/src/objtools/blast/blastdb_format/blastdb_dataextract.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_reader/seqdb.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_reader/seqdb_lmdb.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_reader/seqdbcommon.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_reader/seqdbgilistset.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_reader/seqdbimpl.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_reader/seqdbimpl.hpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_reader/seqdblmdbset.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_reader/seqdblmdbset.hpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_reader/seqdboidlist.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_reader/seqdbtax.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_reader/seqdbvol.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_writer/build-alias-index		patch \| blob \| history
c++/src/objtools/blast/seqdb_writer/taxid_set.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_writer/unit_test/writedb_lmdb_unit_test.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_writer/unit_test/writedb_unit_test.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_writer/writedb_impl.cpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_writer/writedb_impl.hpp		patch \| blob \| history
c++/src/objtools/blast/seqdb_writer/writedb_lmdb.cpp		patch \| blob \| history
c++/src/objtools/cleanup/CMakeLists.cleanup.lib.txt		patch \| blob \| history
c++/src/objtools/cleanup/Makefile.cleanup.lib		patch \| blob \| history
c++/src/objtools/cleanup/cleanup.cpp		patch \| blob \| history
c++/src/objtools/cleanup/cleanup_message.cpp	[new file with mode: 0644]	patch \| blob
c++/src/objtools/cleanup/cleanup_pub.cpp		patch \| blob \| history
c++/src/objtools/cleanup/cleanup_utils.hpp		patch \| blob \| history
c++/src/objtools/cleanup/newcleanupp.cpp		patch \| blob \| history
c++/src/objtools/data_loaders/blastdb/bdbloader.cpp		patch \| blob \| history
c++/src/objtools/data_loaders/genbank/reader.cpp		patch \| blob \| history
c++/src/objtools/edit/CMakeLists.edit.lib.txt		patch \| blob \| history
c++/src/objtools/edit/feattable_edit.cpp		patch \| blob \| history
c++/src/objtools/edit/loc_edit.cpp		patch \| blob \| history
c++/src/objtools/edit/remote_updater.cpp		patch \| blob \| history
c++/src/objtools/format/context.cpp		patch \| blob \| history
c++/src/objtools/format/dbsource_item.cpp		patch \| blob \| history
c++/src/objtools/format/defline_item.cpp		patch \| blob \| history
c++/src/objtools/format/feature_item.cpp		patch \| blob \| history
c++/src/objtools/format/flat_file_config.cpp		patch \| blob \| history
c++/src/objtools/format/flat_file_generator.cpp		patch \| blob \| history
c++/src/objtools/format/flat_qual_slots.cpp		patch \| blob \| history
c++/src/objtools/format/gather_items.cpp		patch \| blob \| history
c++/src/objtools/format/gbseq_formatter.cpp		patch \| blob \| history
c++/src/objtools/format/genbank_formatter.cpp		patch \| blob \| history
c++/src/objtools/format/genbank_gather.cpp		patch \| blob \| history
c++/src/objtools/format/genome_project_item.cpp		patch \| blob \| history
c++/src/objtools/format/inst_info_map.cpp		patch \| blob \| history
c++/src/objtools/format/inst_info_map.hpp		patch \| blob \| history
c++/src/objtools/format/primary_item.cpp		patch \| blob \| history
c++/src/objtools/format/qualifiers.cpp		patch \| blob \| history
c++/src/objtools/format/reference_item.cpp		patch \| blob \| history
c++/src/objtools/format/source_item.cpp		patch \| blob \| history
c++/src/objtools/logging/listener.cpp		patch \| blob \| history
c++/src/objtools/pubseq_gateway/client/psg_client.cpp		patch \| blob \| history
c++/src/objtools/pubseq_gateway/client/psg_client_impl.hpp		patch \| blob \| history
c++/src/objtools/pubseq_gateway/client/psg_client_transport.cpp		patch \| blob \| history
c++/src/objtools/readers/aln_reader.cpp		patch \| blob \| history
c++/src/objtools/readers/aln_scanner_clustal.cpp		patch \| blob \| history
c++/src/objtools/readers/aln_scanner_nexus.cpp		patch \| blob \| history
c++/src/objtools/readers/aln_scanner_nexus.hpp		patch \| blob \| history
c++/src/objtools/readers/descr_mod_apply.cpp		patch \| blob \| history
c++/src/objtools/readers/fasta.cpp		patch \| blob \| history
c++/src/objtools/readers/fasta_exception.cpp		patch \| blob \| history
c++/src/objtools/readers/fasta_reader_utils.cpp		patch \| blob \| history
c++/src/objtools/readers/gff2_data.cpp		patch \| blob \| history
c++/src/objtools/readers/gff2_reader.cpp		patch \| blob \| history
c++/src/objtools/readers/gff3_reader.cpp		patch \| blob \| history
c++/src/objtools/readers/gtf_reader.cpp		patch \| blob \| history
c++/src/objtools/readers/line_error.cpp		patch \| blob \| history
c++/src/objtools/readers/mod_reader.cpp		patch \| blob \| history
c++/src/objtools/readers/readfeat.cpp		patch \| blob \| history
c++/src/objtools/readers/rm_reader.cpp		patch \| blob \| history
c++/src/objtools/readers/source_mod_parser.cpp		patch \| blob \| history
c++/src/serial/grpc_integration/grpc_integration.cpp		patch \| blob \| history
c++/src/serial/rpcbase.cpp		patch \| blob \| history
c++/src/util/format_guess.cpp		patch \| blob \| history