-/* $Id: blast_aux.hpp 507721 2016-07-21 14:07:53Z fongah2 $
+/* $Id: blast_aux.hpp 615182 2020-08-28 04:28:48Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <objects/seqloc/Seq_interval.hpp>
#include <util/range.hpp> // For TSeqRange
#include <objects/seq/seqlocinfo.hpp>
+#include <objects/blast/Blast4_error.hpp>
#include <objmgr/scope.hpp>
// BLAST includes
#include <algo/blast/core/blast_psi.h>
#include <algo/blast/core/blast_hspstream.h>
+
BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)
static Uint4 m_RefCounter;
};
+/// Class to capture message from diag handler
+class NCBI_XBLAST_EXPORT CBlastAppDiagHandler : public CDiagHandler
+{
+public:
+ /// Constructor
+ CBlastAppDiagHandler():m_handler(GetDiagHandler(true)), m_save (true) {}
+ /// Destructor
+ ~CBlastAppDiagHandler();
+ /// Save and post diag message
+ virtual void Post (const SDiagMessage & mess);
+ /// Reset messgae buffer, erase all saved message
+ void ResetMessages(void);
+ /// Call to turn off saving diag message, discard all saved message
+ void DoNotSaveMessages(void);
+ /// Return list of saved diag messages
+ list<CRef<objects::CBlast4_error> > & GetMessages(void) { return m_messages;}
+private :
+ CDiagHandler * m_handler;
+ list<CRef<objects::CBlast4_error> > m_messages;
+ bool m_save;
+};
+
/** Declares class to handle deallocating of the structure using the appropriate
* function
--- /dev/null
+/* $Id: blast_node.hpp 615348 2020-08-31 15:38:28Z fukanchi $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file blast_node.hpp
+ * BLAST node api
+ */
+
+#ifndef ALGO_BLAST_API___BLAST_NODE__HPP
+#define ALGO_BLAST_API___BLAST_NODE__HPP
+
+#include <algo/blast/core/blast_export.h>
+#include <algo/blast/api/blast_aux.hpp>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(blast)
+
+class NCBI_XBLAST_EXPORT CBlastNodeMsg : public CObject
+{
+public:
+ enum EMsgType {
+ eRunRequest,
+ ePostResult,
+ eErrorExit,
+ ePostLog
+ };
+ CBlastNodeMsg(EMsgType type, void * obj_ptr): m_MsgType(type), m_Obj(obj_ptr) {}
+ EMsgType GetMsgType() { return m_MsgType; }
+ void * GetMsgBody() { return m_Obj; }
+private:
+ EMsgType m_MsgType;
+ void * m_Obj;
+};
+
+class NCBI_XBLAST_EXPORT CBlastNodeMailbox : public CObject
+{
+public:
+ CBlastNodeMailbox(int node_num, CConditionVariable & notify): m_NodeNum(node_num), m_Notify(notify){}
+ void SendMsg(CRef<CBlastNodeMsg> msg);
+ CRef<CBlastNodeMsg> ReadMsg()
+ {
+ CFastMutexGuard guard(m_Mutex);
+ CRef<CBlastNodeMsg> rv;
+ if (! m_MsgQueue.empty()){
+ rv.Reset(m_MsgQueue.front());
+ m_MsgQueue.pop_front();
+ }
+ return rv;
+ }
+ void UnreadMsg(CRef<CBlastNodeMsg> msg) { CFastMutexGuard guard(m_Mutex); m_MsgQueue.push_front(msg);}
+ int GetNumMsgs () { CFastMutexGuard guard(m_Mutex); return m_MsgQueue.size(); }
+ int GetNodeNum() { return m_NodeNum; }
+ ~CBlastNodeMailbox() { m_MsgQueue.resize(0); }
+private:
+ int m_NodeNum;
+ CConditionVariable & m_Notify;
+ list <CRef<CBlastNodeMsg> > m_MsgQueue;
+ CFastMutex m_Mutex;
+};
+
+class NCBI_XBLAST_EXPORT CBlastNode : public CThread
+{
+public :
+ enum EState {
+ eInitialized,
+ eRunning,
+ eError,
+ eDone,
+ };
+ CBlastNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+ CBlastAppDiagHandler & bah, EProgram blast_program,
+ int query_index, int num_queries, CBlastNodeMailbox * mailbox);
+
+ virtual int GetBlastResults(string & results) = 0;
+ int GetNodeNum() { return m_NodeNum;}
+ EState GetState() { return m_State; }
+ int GetStatus() { return m_Status; }
+ const CArgs & GetArgs() { return m_Args; }
+ CBlastAppDiagHandler & GetDiagHandler() { return m_Bah; }
+ const CNcbiArguments & GetArguments() { return m_NcbiArgs; }
+ void SendMsg(CBlastNodeMsg::EMsgType msg_type, void* ptr = NULL);
+ string & GetNodeIdStr() { return m_NodeIdStr;}
+ int GetNumOfQueries() {return m_NumOfQueries;}
+ int GetQueriesLength() {return m_QueriesLength;}
+protected:
+ virtual ~CBlastNode(void);
+ virtual void* Main(void) = 0;
+ void SetState(EState state) { m_State = state; }
+ void SetStatus(int status) { m_Status = status; }
+ void SetQueriesLength(int l) { m_QueriesLength = l;}
+ int m_NodeNum;
+private:
+ const CNcbiArguments & m_NcbiArgs;
+ const CArgs & m_Args;
+ CBlastAppDiagHandler & m_Bah;
+ EProgram m_BlastProgram;
+ int m_QueryIndex;
+ int m_NumOfQueries;
+ string m_NodeIdStr;
+ CRef<CBlastNodeMailbox> m_Mailbox;
+ EState m_State;
+ int m_Status;
+ int m_QueriesLength;
+};
+
+
+class NCBI_XBLAST_EXPORT CBlastMasterNode
+{
+public:
+ CBlastMasterNode(CNcbiOstream & out_stream, int num_threads);
+ typedef map<int, CRef<CBlastNodeMailbox> > TPostOffice;
+ typedef map<int, CRef<CBlastNode> > TRegisteredNodes;
+ typedef map<int, double> TActiveNodes;
+ typedef map<int, CRef<CBlastNodeMsg> > TFormatQueue;
+ void RegisterNode(CBlastNode * node, CBlastNodeMailbox * mailbox);
+ int GetNumNodes() { return m_RegisteredNodes.size();}
+ int IsFull();
+ void Shutdown() { m_MaxNumNodes = -1; }
+ bool Processing();
+ int IsActive()
+ {
+ if ((m_MaxNumNodes < 0) && (m_RegisteredNodes.size() == 0)){
+ return false;
+ }
+ return true;
+ }
+ void FormatResults();
+ CConditionVariable & GetBuzzer() {return m_NewEvent;}
+ ~CBlastMasterNode() {}
+ int GetNumOfQueries() { return m_NumQueries; }
+ Int8 GetQueriesLength() { return m_QueriesLength; }
+ int GetNumErrStatus() { return m_NumErrStatus; }
+private:
+ void x_WaitForNewEvent();
+
+ CNcbiOstream & m_OutputStream;
+ int m_MaxNumThreads;
+ int m_MaxNumNodes;
+ CFastMutex m_Mutex;
+ CStopWatch m_StopWatch;
+ TPostOffice m_PostOffice;
+ TRegisteredNodes m_RegisteredNodes;
+ TActiveNodes m_ActiveNodes;
+ TFormatQueue m_FormatQueue;
+ CConditionVariable m_NewEvent;
+ int m_NumErrStatus;
+ int m_NumQueries;
+ Int8 m_QueriesLength;
+};
+
+
+class NCBI_XBLAST_EXPORT CBlastNodeInputReader : public CStreamLineReader
+{
+public:
+
+ CBlastNodeInputReader(CNcbiIstream& is, int batch_size, int est_avg_len) :
+ CStreamLineReader(is), m_QueryBatchSize(batch_size), m_EstAvgQueryLength(est_avg_len), m_QueryCount(0) {}
+
+ int GetQueryBatch(string & queries, int & query_no);
+
+private:
+ const int m_QueryBatchSize;
+ const int m_EstAvgQueryLength;
+ int m_QueryCount;
+};
+
+END_SCOPE(blast)
+END_NCBI_SCOPE
+
+#endif /* ALGO_BLAST_API___BLAST_NODE__HPP */
--- /dev/null
+/* $Id: blast_usage_report.hpp 617231 2020-09-28 18:27:17Z ivanov $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file blast_usage_report.hpp
+ * BLAST usage report api
+ */
+
+#ifndef ALGO_BLAST_API___BLAST_USAGE_REPORT__HPP
+#define ALGO_BLAST_API___BLAST_USAGE_REPORT__HPP
+
+#include <connect/ncbi_usage_report.hpp>
+#include <algo/blast/core/blast_export.h>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(blast)
+
+class NCBI_XBLAST_EXPORT CBlastUsageReport : public CUsageReport
+{
+
+public:
+ enum EUsageParams {
+ eApp,
+ eVersion,
+ eProgram,
+ eTask,
+ eExitStatus,
+ eRunTime,
+ eDBName,
+ eDBLength,
+ eDBNumSeqs,
+ eDBDate,
+ eBl2seq,
+ eNumSubjects,
+ eSubjectsLength,
+ eNumQueries,
+ eTotalQueryLength,
+ eEvalueThreshold,
+ eNumThreads,
+ eHitListSize,
+ eOutputFmt,
+ eTaxIdList,
+ eNegTaxIdList,
+ eGIList,
+ eNegGIList,
+ eSeqIdList,
+ eNegSeqIdList,
+ eIPGList,
+ eNegIPGList,
+ eMaskAlgo,
+ eCompBasedStats,
+ eRange,
+ eMTMode,
+ eNumQueryBatches,
+ eNumErrStatus,
+ ePSSMInput,
+ eConverged,
+ eArchiveInput,
+ eRIDInput,
+ eDBInfo,
+ eDBTaxInfo,
+ eDBEntry,
+ eDBDumpAll,
+ eDBType,
+ eInputType,
+ eParseSeqIDs,
+ eSeqType,
+ eDBTest,
+ eDBAliasMode,
+ eDocker,
+ eGCP,
+ eAWS,
+ eELBJobId,
+ eELBBatchNum
+ };
+
+ CBlastUsageReport();
+ ~CBlastUsageReport();
+ void AddParam(EUsageParams p, int val);
+ void AddParam(EUsageParams p, const string & val);
+ void AddParam(EUsageParams p, const double & val);
+ void AddParam(EUsageParams p, Int8 val);
+ void AddParam(EUsageParams p, bool val);
+
+private:
+ void x_CheckBlastUsageEnv();
+ string x_EUsageParmsToString(EUsageParams p);
+ void x_CheckRunEnv();
+ CUsageReportParameters m_Params;
+};
+
+END_SCOPE(blast)
+END_NCBI_SCOPE
+
+#endif /* ALGO_BLAST_API___BLAST_USAGE_REPORT__HPP */
-/* $Id: blast_input.hpp 575325 2018-11-27 18:22:00Z ucko $
+/* $Id: blast_input.hpp 615335 2020-08-31 15:36:38Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/// be in a batch of converted sequences
///
CBlastInput(CBlastInputSource* source, int batch_size = kMax_Int)
- : m_Source(source), m_BatchSize(batch_size) {}
+ : m_Source(source), m_BatchSize(batch_size), m_NumSeqs(0), m_TotalLength(0) {}
/// Destructor
///
- ~CBlastInput() {}
+ ~CBlastInput(){}
/// Read and convert all the sequences from the source
/// @param scope CScope object to use in return value [in]
/// Determine if we have reached the end of the BLAST input
bool End() { return m_Source->End(); }
+ int GetNumSeqsProcessed() const { return m_NumSeqs; }
+ int GetTotalLengthProcessed() const { return m_TotalLength; }
private:
CRef<CBlastInputSource> m_Source; ///< pointer to source of sequences
TSeqPos m_BatchSize; ///< total size of one block of sequences
/// Perform the actual copy for assignment operator and copy constructor
void do_copy(const CBlastInput& input);
+
+ // # of seqs processed
+ int m_NumSeqs;
+
+ // Total length processed
+ int m_TotalLength;
};
/// Auxiliary class for creating Bioseqs given SeqIds
-/* $Id: cmdline_flags.hpp 605536 2020-04-13 11:07:50Z ivanov $
+/* $Id: cmdline_flags.hpp 615184 2020-08-28 04:29:55Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
NCBI_BLASTINPUT_EXPORT extern const string kArgUnalignedOutput;
/// Argument to specify format for reporting unaligned reads
NCBI_BLASTINPUT_EXPORT extern const string kArgUnalignedFormat;
+/// Argument to specify mt mode (split by db or split by queries)
+NCBI_BLASTINPUT_EXPORT extern const string kArgMTMode;
END_SCOPE(blast)
END_NCBI_SCOPE
-/* $Id: rpsblast_args.hpp 544441 2017-08-23 11:55:51Z camacho $
+/* $Id: rpsblast_args.hpp 615185 2020-08-28 04:30:03Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/// @inheritDoc
virtual int GetQueryBatchSize() const;
+ /// Get the input stream
+ virtual CNcbiIstream& GetInputStream();
+
+ /// Get the output stream
+ virtual CNcbiOstream& GetOutputStream();
+
+ virtual ~CRPSBlastAppArgs() {}
+
protected:
/// @inheritDoc
virtual CRef<CBlastOptionsHandle>
const CArgs& args);
};
+class NCBI_BLASTINPUT_EXPORT CRPSBlastNodeArgs : public CRPSBlastAppArgs
+{
+public:
+ /// Constructor
+ CRPSBlastNodeArgs(const string & input);
+
+ /// @inheritDoc
+ virtual int GetQueryBatchSize() const;
+
+ /// Get the input stream
+ virtual CNcbiIstream& GetInputStream();
+
+ /// Get the output stream
+ virtual CNcbiOstream& GetOutputStream();
+
+ CNcbiOstrstream & GetOutputStrStream() { return m_OutputStream; }
+
+ virtual ~CRPSBlastNodeArgs();
+
+protected:
+ /// @inheritDoc
+ virtual CRef<CBlastOptionsHandle>
+ x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs& args);
+
+private :
+ CNcbiOstrstream m_OutputStream;
+ CNcbiIstrstream * m_InputStream;
+};
+
END_SCOPE(blast)
END_NCBI_SCOPE
-/* $Id: rpstblastn_args.hpp 161402 2009-05-27 17:35:47Z camacho $
+/* $Id: rpstblastn_args.hpp 615188 2020-08-28 04:30:31Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/// @inheritDoc
virtual int GetQueryBatchSize() const;
+ /// Get the input stream
+ virtual CNcbiIstream& GetInputStream();
+
+ /// Get the output stream
+ virtual CNcbiOstream& GetOutputStream();
+
+ virtual ~CRPSTBlastnAppArgs() {}
+
protected:
/// @inheritDoc
virtual CRef<CBlastOptionsHandle>
const CArgs& args);
};
+class NCBI_BLASTINPUT_EXPORT CRPSTBlastnNodeArgs : public CRPSTBlastnAppArgs
+{
+public:
+ /// Constructor
+ CRPSTBlastnNodeArgs(const string & input);
+
+ /// @inheritDoc
+ virtual int GetQueryBatchSize() const;
+
+ /// Get the input stream
+ virtual CNcbiIstream& GetInputStream();
+
+ /// Get the output stream
+ virtual CNcbiOstream& GetOutputStream();
+
+ CNcbiOstrstream & GetOutputStrStream() { return m_OutputStream; }
+
+ virtual ~CRPSTBlastnNodeArgs();
+
+protected:
+ /// @inheritDoc
+ virtual CRef<CBlastOptionsHandle>
+ x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs& args);
+
+private :
+ CNcbiOstrstream m_OutputStream;
+ CNcbiIstrstream * m_InputStream;
+};
+
+
END_SCOPE(blast)
END_NCBI_SCOPE
-/* $Id: blast_format.hpp 591152 2019-08-12 11:18:21Z fongah2 $
+/* $Id: blast_format.hpp 615337 2020-08-31 15:36:55Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <algo/blast/api/blast_seqinfosrc.hpp>
#include <algo/blast/format/sam.hpp>
#include <objects/blast/blast__.hpp>
+#include <algo/blast/api/blast_usage_report.hpp>
BEGIN_NCBI_SCOPE
static void PrintArchive(CRef<objects::CBlast4_archive> archive,
CNcbiOstream& out);
+
+ // Extract search info in CBlastFormat and add to blast report usage
+ void LogBlastSearchInfo(blast::CBlastUsageReport & report);
+
private:
/// Format type
blast::CFormattingArgs::EOutputFormat m_FormatType;
-/* $Id: ncbiconf_msvc.h 602172 2020-02-18 15:13:29Z ucko $
+/* $Id: ncbiconf_msvc.h 608266 2020-05-13 18:56:44Z ivanov $
* By Denis Vakatov, NCBI (vakatov@ncbi.nlm.nih.gov)
*
* MS-Win 32/64, MSVC++ 6.0/.NET
#define NETDB_REENTRANT 1
#if _MSC_VER >= 1400
-
+// need to include some standard header to get all debugging macros
+# ifdef __cplusplus
+# include <cstdint>
+# endif
/* Suppress 'deprecated' warning for STD functions */
#if !defined(_CRT_NONSTDC_DEPRECATE)
#define _CRT_NONSTDC_DEPRECATE(x)
#ifndef COMMON___NCBI_EXPORT__H
#define COMMON___NCBI_EXPORT__H
-/* $Id: ncbi_export.h 605871 2020-04-16 11:23:58Z ivanov $
+/* $Id: ncbi_export.h 617033 2020-09-24 18:56:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#define NCBI_PACKAGE 1
#define NCBI_PACKAGE_NAME "blast"
#define NCBI_PACKAGE_VERSION_MAJOR 2
-#define NCBI_PACKAGE_VERSION_MINOR 10
-#define NCBI_PACKAGE_VERSION_PATCH 1
+#define NCBI_PACKAGE_VERSION_MINOR 11
+#define NCBI_PACKAGE_VERSION_PATCH 0
#define NCBI_PACKAGE_CONFIG ""
#define NCBI_PACKAGE_VERSION_STRINGIFY(x) #x
#ifndef COMMON___NCBICONF_IMPL__H
#define COMMON___NCBICONF_IMPL__H
-/* $Id: ncbiconf_impl.h 606329 2020-04-20 16:28:09Z ivanov $
+/* $Id: ncbiconf_impl.h 609547 2020-06-03 17:21:47Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
# error "The header can be used from <ncbiconf.h> only."
#endif /*!FORWARDING_NCBICONF_H*/
-#include <common/ncbi_build_info.h>
-
/** @addtogroup Portability
*
#ifndef CONNECT___NCBI_USAGE_REPORT__HPP
#define CONNECT___NCBI_USAGE_REPORT__HPP
-/* $Id: ncbi_usage_report.hpp 602851 2020-03-03 18:47:23Z ivanov $
+/* $Id: ncbi_usage_report.hpp 617219 2020-09-28 17:23:04Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* Authors: Vladislav Evgeniev, Vladimir Ivanov
*
* File Description:
- * Log usage information to NCBI \93pinger\94.
+ * Log usage information to NCBI "pinger".
*
*/
fOS = 1 << 3, ///< OS name ("os")
fHost = 1 << 4, ///< Host name ("host")
//
- fDefault = fAppName | fAppVersion | fOS | fHost
+ fDefault = fAppName | fAppVersion | fOS
};
typedef int TWhat; ///< Binary OR of "EWhat"
};
/// to allow checking reporting progress or failures, see EState for a list of states.
/// @sa
/// EState, CUsageReport::Send()
- virtual void OnStateChange(EState state) {};
+ virtual void OnStateChange(EState /*state*/) {};
/// Copy constructor.
- CUsageReportJob(const CUsageReportJob& other) { x_CopyFrom(other); };
+ CUsageReportJob(const CUsageReportJob& other) : CUsageReportParameters(other) { m_State = other.m_State; };
/// Copy assignment operator.
CUsageReportJob& operator=(const CUsageReportJob& other) { x_CopyFrom(other); return *this; };
#ifndef CORELIB___DB_SERVICE_MAPPER__HPP
#define CORELIB___DB_SERVICE_MAPPER__HPP
-/* $Id: ncbi_dbsvcmapper.hpp 586267 2019-05-13 18:15:06Z ucko $
+/* $Id: ncbi_dbsvcmapper.hpp 610944 2020-06-25 18:30:27Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
*/
-#include <corelib/ncbistd.hpp>
-#include <corelib/ncbiobj.hpp>
+#include <corelib/ncbimtx.hpp>
#ifdef NCBI_OS_MSWIN
# include <winsock2.h>
#ifndef CORELIB___MSWIN_NO_POPUP__H
#define CORELIB___MSWIN_NO_POPUP__H
-/* $Id: mswin_no_popup.h 171076 2009-09-21 16:22:34Z ivanov $
+/* $Id: mswin_no_popup.h 617213 2020-09-28 17:22:30Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#define NCBI_MSWIN_NO_POPUP
/* In case anyone needs to always disable the popup messages (regardless of DIAG_SILENT_ABDORT)
- another pre-processor macro can be defined before #include\92ing either
+ another pre-processor macro can be defined before #include'ing either
<corelib/mswin_no_popup.h> (or <common/test_assert.h>).
*/
/* #define NCBI_MSWIN_NO_POPUP_EVER */
#ifndef NCBI_SYSTEM__HPP
#define NCBI_SYSTEM__HPP
-/* $Id: ncbi_system.hpp 603334 2020-03-10 17:10:33Z ivanov $
+/* $Id: ncbi_system.hpp 613789 2020-08-12 18:02:48Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
enum ELimitsExitCode {
eLEC_None, ///< Normal exit.
eLEC_Memory, ///< Memory limit.
- eLEC_Cpu ///< CPU usage limit.
+ eLEC_Cpu ///< CPU time usage limit.
};
/// Type of parameter for print handler.
TLimitsPrintParameter parameter = NULL);
/// [UNIX only] Set soft memory limit.
-/// @sa SetMemoryLimit
+/// @note
+/// The soft limit is the value that the kernel enforces for the corresponding resource.
+/// An unprivileged process may only set its soft limit to a value in the range
+/// from 0 up to the hard limit, and (irreversibly) lower its hard limit.
+/// A privileged process may make arbitrary changes to either limit value.
+/// @sa SetMemoryLimit, SetMemoryLimitHard
NCBI_XNCBI_EXPORT
extern bool SetMemoryLimitSoft(size_t max_size,
TLimitsPrintHandler handler = NULL,
/// [UNIX only] Set hard memory limit.
/// @note
+/// The hard limit acts as a ceiling for the soft limit:
/// Current soft memory limit will be automatically decreased,
/// if it exceed new value for the hard memory limit.
/// @note
/// Only privileged process can increase current hard level limit.
-/// @sa SetMemoryLimit
+/// @sa SetMemoryLimit, SetMemoryLimitSoft
NCBI_XNCBI_EXPORT
extern bool SetMemoryLimitHard(size_t max_size,
TLimitsPrintHandler handler = NULL,
TLimitsPrintParameter parameter = NULL);
-/// [UNIX only] Set CPU usage limit.
+/// [UNIX only] Get "soft" memory limit of the virtual memory (address space) in bytes for a current process.
+/// @return
+/// Returns "soft" value set by setrlimit(), SetMemoryLimit() or ulimit command
+/// line utility for virtual memory address space.
+/// 0 - if an error occurs and CNcbiError is set, or the memory limit is set to "unlimited".
+/// @note
+/// The implementation of malloc() can be different on many flavors of UNIX, and we
+/// usually don't know how exactly it is implemented on the current system.
+/// Some systems use sbrk()-based implementation (heap), other use mmap() system call
+/// and virtual memory (address space) to allocate memory, some other use hybrid approach
+/// and may allocate memory in two different ways depending on requested memory size
+/// and certain parameters.
+/// Almost all modern Unix versions uses mmap()-based approach for all memory allocations
+/// or at least for big chunks of memory, so probably virtual memory limits is more
+/// important nowadays.
+/// @sa SetMemoryLimit, GetVirtualMemoryLimitHard
+NCBI_XNCBI_EXPORT
+extern size_t GetVirtualMemoryLimitSoft(void);
+
+/// [UNIX only] Get "hard" memory limit of the virtual memory (address space) in bytes for a current process.
+/// @return
+/// Returns "hard" value set by setrlimit(), SetMemoryLimit() or ulimit command
+/// line utility for virtual memory address space.
+/// 0 - if an error occurs and CNcbiError is set, or the memory limit is set to "unlimited".
+/// @note
+/// The implementation of malloc() can be different on many flavors of UNIX, and we
+/// usually don't know how exactly it is implemented on the current system.
+/// Some systems use sbrk()-based implementation (heap), other use mmap() system call
+/// and virtual memory (address space) to allocate memory, some other use hybrid approach
+/// and may allocate memory in two different ways depending on requested memory size
+/// and certain parameters.
+/// Almost all modern Unix versions uses mmap()-based approach for all memory allocations
+/// or at least for big chunks of memory, so probably virtual memory limits is more
+/// important nowadays.
+/// @sa SetMemoryLimit, GetVirtualMemoryLimitSoft
+NCBI_XNCBI_EXPORT
+extern size_t GetVirtualMemoryLimitHard(void);
+
+
+/// [UNIX only] Set CPU time usage limit.
///
/// Set the limit for the CPU time that can be consumed by current process.
///
/// Process owner user name, or empty string if it cannot be determined.
static string GetUserName(void);
- /// Return number of active CPUs (never less than 1).
+ /// Return number of active CPUs/cores (never less than 1).
static unsigned int GetCpuCount(void);
+ /// Return number of allowed to use CPUs/cores for the current thread.
+ /// Returns 0 if unable to get this information on the current OS, or error occurs.
+ static unsigned int GetCpuCountAllowed(void);
+
/// Get system uptime in seconds.
/// @return
/// Seconds since last boot, or negative number if cannot determine it
#ifndef CORELIB___NCBIAPP_API__HPP
#define CORELIB___NCBIAPP_API__HPP
-/* $Id: ncbiapp_api.hpp 593047 2019-09-11 15:29:02Z grichenk $
+/* $Id: ncbiapp_api.hpp 610397 2020-06-16 18:45:55Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
NCBI_DEPRECATED virtual bool SetupDiag_AppSpecific(void);
/// Add callback to be executed from CNcbiApplicationAPI destructor.
- /// @sa CNcbiActionGuard
+ /// @note It is executed earlier, at CNcbiApplication destructor; and, it
+ /// may be executed even earlier from destructors of other
+ /// CNcbiApplicationAPI-derived classes.
+ /// @sa CNcbiActionGuard, ExecuteOnExitActions()
template<class TFunc> void AddOnExitAction(TFunc func)
{
m_OnExitActions.AddAction(func);
typedef int TAppFlags;
void SetAppFlags(TAppFlags flags) { m_AppFlags = flags; }
+ /// Should only be called from the destructors of classes derived from
+ /// CNcbiApplicationAPI - if it is necessary to access their data members
+ /// and virtual methods; or to dynamic_cast<> from the base app class.
+ /// @sa AddOnExitAction()
+ void ExecuteOnExitActions();
+
private:
/// Read standard NCBI application configuration settings.
///
#ifndef CORELIB___NCBIDIAG__HPP
#define CORELIB___NCBIDIAG__HPP
-/* $Id: ncbidiag.hpp 606470 2020-04-22 14:14:58Z ivanov $
+/* $Id: ncbidiag.hpp 611708 2020-07-09 17:56:10Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
NCBI_XNCBI_EXPORT
extern void SetDiagFilter(EDiagFilter what, const char* filter_str);
+/// Get current diagnostic filter
+///
+/// @param what
+/// Filter is set for, only eDiagFilter_Trace and eDiagFilter_Post values are allowed,
+/// otherwise the function returns empty string.
+/// @sa SetDiagFilter
+NCBI_XNCBI_EXPORT
+extern string GetDiagFilter(EDiagFilter what);
+
+/// Append diagnostic filter
+///
+/// @param what
+/// Filter is set for
+/// @param filter_str
+/// Filter string
+/// @sa SetDiagFilter
+NCBI_XNCBI_EXPORT
+extern void AppendDiagFilter(EDiagFilter what, const char* filter_str);
+
/////////////////////////////////////////////////////////////////////////////
///
#ifndef CORELIB___NCBIMISC__HPP
#define CORELIB___NCBIMISC__HPP
-/* $Id: ncbimisc.hpp 607883 2020-05-08 15:09:10Z grichenk $
+/* $Id: ncbimisc.hpp 609927 2020-06-08 16:52:43Z grichenk $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/// Use this typedef rather than its expansion, which may change.
typedef int TSignedSeqPos;
-/// Type for Taxon1-name.taxid
+/// Taxon id type
typedef int TTaxId;
+# define TAX_ID_TO(T, tax_id) (static_cast<T>(tax_id))
+# define TAX_ID_FROM(T, value) (static_cast<ncbi::TTaxId>(value))
+# define TAX_ID_CONST(id) id
+#define ZERO_TAX_ID TAX_ID_CONST(0)
+#define INVALID_TAX_ID TAX_ID_CONST(-1)
/// Type for sequence GI.
///
public:
static const TIntId value = gi;
};
-#define GI_CONST(gi) (TGi(CConstGIChecker<gi>::value))
+#define GI_CONST(gi) (ncbi::TGi(CConstGIChecker<gi>::value))
#define ZERO_GI GI_CONST(0)
#define INVALID_GI GI_CONST(-1)
/// Temporary macros to convert TGi to other types (int, unsigned etc.).
-#define GI_TO(T, gi) (static_cast<T>(TIntId(gi)))
-#define GI_FROM(T, value) (TGi(static_cast<TIntId>(value)))
+#define GI_TO(T, gi) (static_cast<T>(ncbi::TIntId(gi)))
+#define GI_FROM(T, value) (ncbi::TGi(static_cast<ncbi::TIntId>(value)))
-#define ENTREZ_ID_CONST(id) (TEntrezId(CConstGIChecker<id>::value))
+#define ENTREZ_ID_CONST(id) (ncbi::TEntrezId(CConstGIChecker<id>::value))
#define ZERO_ENTREZ_ID ENTREZ_ID_CONST(0)
#define INVALID_ENTREZ_ID ENTREZ_ID_CONST(-1)
/// Temporary macros to convert TEntrezId to other types (int, unsigned etc.).
-#define ENTREZ_ID_TO(T, entrez_id) (static_cast<T>(TIntId(entrez_id)))
-#define ENTREZ_ID_FROM(T, value) (TEntrezId(static_cast<TIntId>(value)))
+#define ENTREZ_ID_TO(T, entrez_id) (static_cast<T>(ncbi::TIntId(entrez_id)))
+#define ENTREZ_ID_FROM(T, value) (ncbi::TEntrezId(static_cast<ncbi::TIntId>(value)))
/// Convert gi-compatible int to/from other types.
#define INT_ID_TO(T, id) (static_cast<T>(id))
-#define INT_ID_FROM(T, value) (static_cast<TIntId>(value))
+#define INT_ID_FROM(T, value) (static_cast<ncbi::TIntId>(value))
/// Helper address class
#ifndef CORELIB___NCBISTRE__HPP
#define CORELIB___NCBISTRE__HPP
-/* $Id: ncbistre.hpp 606328 2020-04-20 16:27:53Z ivanov $
+/* $Id: ncbistre.hpp 617212 2020-09-28 17:22:22Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CNcbiIfstream::open(_Filename.c_str(), _Mode, _Prot);
}
};
-#elif defined(NCBI_COMPILER_MSVC)
-# if _MSC_VER >= 1200 && _MSC_VER < 1300
-class CNcbiIfstream : public IO_PREFIX::ifstream
-{
-public:
- CNcbiIfstream() : m_Fp(0)
- {
- }
-
- explicit CNcbiIfstream(const char* s,
- IOS_BASE::openmode mode = IOS_BASE::in)
- {
- fastopen(s, mode);
- }
-
- void fastopen(const char* s, IOS_BASE::openmode mode = IOS_BASE::in)
- {
- if (is_open() || !(m_Fp = __Fiopen(s, mode | in)))
- setstate(failbit);
- else
- (void) new (rdbuf()) basic_filebuf<char, char_traits<char> >(m_Fp);
- }
-
- virtual ~CNcbiIfstream(void)
- {
- if (m_Fp)
- fclose(m_Fp);
- }
-private:
- FILE* m_Fp;
-};
-# else
-/// Portable alias for ifstream.
-typedef IO_PREFIX::ifstream CNcbiIfstream;
-# endif
#else
/// Portable alias for ifstream.
typedef IO_PREFIX::ifstream CNcbiIfstream;
CNcbiOfstream::open(_Filename.c_str(), _Mode, _Prot);
}
};
-#elif defined(NCBI_COMPILER_MSVC)
-# if _MSC_VER >= 1200 && _MSC_VER < 1300
-class CNcbiOfstream : public IO_PREFIX::ofstream
-{
-public:
- CNcbiOfstream() : m_Fp(0)
- {
- }
-
- explicit CNcbiOfstream(const char* s,
- IOS_BASE::openmode mode = IOS_BASE::out)
- {
- fastopen(s, mode);
- }
-
- void fastopen(const char* s, IOS_BASE::openmode mode = IOS_BASE::out)
- {
- if (is_open() || !(m_Fp = __Fiopen(s, mode | out)))
- setstate(failbit);
- else
- (void) new (rdbuf()) basic_filebuf<char, char_traits<char> >(m_Fp);
- }
-
- virtual ~CNcbiOfstream(void)
- {
- if (m_Fp)
- fclose(m_Fp);
- }
-private:
- FILE* m_Fp;
-};
-# else
-/// Portable alias for ofstream.
-typedef IO_PREFIX::ofstream CNcbiOfstream;
-# endif
#else
/// Portable alias for ofstream.
typedef IO_PREFIX::ofstream CNcbiOfstream;
IO_PREFIX::fstream::open(_Filename,_Mode,_Prot);
}
};
-#elif defined(NCBI_COMPILER_MSVC)
-# if _MSC_VER >= 1200 && _MSC_VER < 1300
-class CNcbiFstream : public IO_PREFIX::fstream
-{
-public:
- CNcbiFstream() : m_Fp(0)
- {
- }
-
- explicit CNcbiFstream(const char* s,
- IOS_BASE::openmode
- mode = IOS_BASE::in | IOS_BASE::out)
- {
- fastopen(s, mode);
- }
-
- void fastopen(const char* s, IOS_BASE::openmode
- mode = IOS_BASE::in | IOS_BASE::out)
- {
- if (is_open() || !(m_Fp = __Fiopen(s, mode)))
- setstate(failbit);
- else
- (void) new (rdbuf()) basic_filebuf<char, char_traits<char> >(m_Fp);
- }
-
- virtual ~CNcbiFstream(void)
- {
- if (m_Fp)
- fclose(m_Fp);
- }
-private:
- FILE* m_Fp;
-};
-# else
-/// Portable alias for fstream.
-typedef IO_PREFIX::fstream CNcbiFstream;
-# endif
#else
/// Portable alias for fstream.
typedef IO_PREFIX::fstream CNcbiFstream;
CNcbiOstream& operator<<(CNcbiOstream& out, const CNcbiOstrstreamToString& s);
inline
-Int8
-GetOssSize(CNcbiOstrstream& oss)
+Int8 GetOssSize(CNcbiOstrstream& oss)
{
#ifdef NCBI_SHUN_OSTRSTREAM
return NcbiStreamposToInt8(oss.tellp());
}
inline
-bool
-IsOssEmpty(CNcbiOstrstream& oss)
+bool IsOssEmpty(CNcbiOstrstream& oss)
{
return GetOssSize(oss) == 0;
}
NCBI_XNCBI_EXPORT
CNcbiOstream& operator<<(CNcbiOstream& out, CPrintableCharPtrConverter s);
-#ifdef NCBI_COMPILER_MSVC
-# if _MSC_VER >= 1200 && _MSC_VER < 1300
-NCBI_XNCBI_EXPORT
-CNcbiOstream& operator<<(CNcbiOstream& out, __int64 val);
-# endif
-#endif
-
/////////////////////////////////////////////////////////////////////////////
///
/// CStreamUtils::Pushback().
/// @sa CStreamUtils::Pushback()
NCBI_XNCBI_EXPORT
-EEncodingForm GetTextEncodingForm(CNcbiIstream& input,
- EBOMDiscard discard_bom);
+EEncodingForm GetTextEncodingForm(CNcbiIstream& input, EBOMDiscard discard_bom);
+
/// Byte Order Mark helper class to use in serialization
///
EEncodingForm m_EncodingForm;
};
+
/// Write Byte Order Mark into output stream
NCBI_XNCBI_EXPORT CNcbiOstream& operator<< (CNcbiOstream& str, const CByteOrderMark& bom);
+
/// Read Byte Order Mark, if present, from input stream
///
/// @note
// NOTE: these must have been inside the _NCBI_SCOPE and without the
// "ncbi::" and "std::" prefixes, but there is some bug in SunPro 5.0...
#if defined(NCBI_USE_OLD_IOSTREAM)
-extern NCBI_NS_NCBI::CNcbiOstream& operator<<(NCBI_NS_NCBI::CNcbiOstream& os,
- const NCBI_NS_STD::string& str);
-extern NCBI_NS_NCBI::CNcbiIstream& operator>>(NCBI_NS_NCBI::CNcbiIstream& is,
- NCBI_NS_STD::string& str);
+extern NCBI_NS_NCBI::CNcbiOstream&
+ operator<<(NCBI_NS_NCBI::CNcbiOstream& os, const NCBI_NS_STD::string& str);
+extern NCBI_NS_NCBI::CNcbiIstream&
+ operator>>(NCBI_NS_NCBI::CNcbiIstream& is, NCBI_NS_STD::string& str);
#endif // NCBI_USE_OLD_IOSTREAM
#ifndef CORELIB___REQUEST_CTX__HPP
#define CORELIB___REQUEST_CTX__HPP
-/* $Id: request_ctx.hpp 574926 2018-11-20 20:23:54Z ucko $
+/* $Id: request_ctx.hpp 617468 2020-10-01 17:54:00Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
class NCBI_XNCBI_EXPORT CSharedHitId
{
public:
- explicit CSharedHitId(const string& hit) : m_HitId(hit), m_SubHitId(0) {}
+ explicit CSharedHitId(const string& hit)
+ : m_HitId(hit), m_SubHitId(0), m_AppState(GetDiagContext().GetAppState()) {}
CSharedHitId(void) : m_SubHitId(0) {}
~CSharedHitId(void) {}
m_SharedSubHitId.Reset();
m_SubHitId = 0;
m_HitId = hit_id;
+ m_AppState = GetDiagContext().GetAppState();
}
typedef unsigned int TSubHitId;
return IsShared() ? (TSubHitId)m_SharedSubHitId->GetData().Add(1) : ++m_SubHitId;
}
+ /// Check if this hit ID was set at request level.
+ bool IsRequestLevel(void) const
+ {
+ return m_AppState == eDiagAppState_RequestBegin ||
+ m_AppState == eDiagAppState_Request ||
+ m_AppState == eDiagAppState_RequestEnd;
+ }
+
private:
typedef CObjectFor<CAtomicCounter> TSharedCounter;
string m_HitId;
TSubHitId m_SubHitId;
mutable CRef<TSharedCounter> m_SharedSubHitId;
+ EDiagAppState m_AppState;
};
return true;
}
if ((src & eHitID_Request) && x_IsSetProp(eProp_HitID)) {
- return true;
+ return m_HitID.IsRequestLevel();
}
if ((src & eHitID_Default) && GetDiagContext().x_IsSetDefaultHitID()) {
return true;
#ifndef CORELIB___TEST_BOOST__HPP
#define CORELIB___TEST_BOOST__HPP
-/* $Id: test_boost.hpp 604629 2020-03-31 13:43:18Z ivanov $
+/* $Id: test_boost.hpp 617213 2020-09-28 17:22:30Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#endif
#include <boost/version.hpp>
-#include <boost/test/auto_unit_test.hpp>
+#if BOOST_VERSION >= 107000
+# include <boost/test/unit_test.hpp>
+#else
+# include <boost/test/auto_unit_test.hpp>
+#endif
#include <boost/test/floating_point_comparison.hpp>
#include <boost/test/framework.hpp>
#include <boost/test/execution_monitor.hpp>
#ifndef CORELIB___VERSION__HPP
#define CORELIB___VERSION__HPP
-/* $Id: version.hpp 593438 2019-09-18 18:13:51Z lavr $
+/* $Id: version.hpp 609546 2020-06-03 17:21:38Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <corelib/version_api.hpp>
+#include <common/ncbi_build_info.h>
# define NCBI_BUILD_TIME __DATE__ " " __TIME__
#endif
+#ifdef NCBI_BUILD_TAG
+# define NCBI_BUILD_TAG_PROXY NCBI_AS_STRING(NCBI_BUILD_TAG)
+#else
+# define NCBI_BUILD_TAG_PROXY ""
+#endif
+
+// Cope with potentially having an older ncbi_build_info.h
+#ifndef NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO
+# define NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \
+ .Extra(SBuildInfo::eTeamCityProjectName, NCBI_TEAMCITY_PROJECT_NAME_PROXY)
+# define NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \
+ .Extra(SBuildInfo::eTeamCityBuildConf, NCBI_TEAMCITY_BUILDCONF_NAME_PROXY)
+# define NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \
+ .Extra(SBuildInfo::eTeamCityBuildNumber, NCBI_TEAMCITY_BUILD_NUMBER_PROXY)
+# define NCBI_SUBVERSION_REVISION_SBUILDINFO \
+ .Extra(SBuildInfo::eSubversionRevision, NCBI_SUBVERSION_REVISION_PROXY)
+# define NCBI_SC_VERSION_SBUILDINFO \
+ .Extra(SBuildInfo::eStableComponentsVersion, NCBI_SC_VERSION_PROXY)
+#endif
+
+// Cope with potentially having an older ncbi_source_ver.h
+#ifndef NCBI_SRCTREE_VER_SBUILDINFO
+# ifdef NCBI_SRCTREE_NAME_PROXY
+# define NCBI_SRCTREE_VER_SBUILDINFO \
+ .Extra(NCBI_SRCTREE_NAME_PROXY, NCBI_SRCTREE_VER_PROXY)
+# else
+# define NCBI_SRCTREE_VER_SBUILDINFO /* empty */
+# endif
+#endif
+
+#ifdef NCBI_APP_BUILT_AS
+# define NCBI_BUILT_AS_SBUILDINFO \
+ .Extra(SBuildInfo::eBuiltAs, NCBI_AS_STRING(NCBI_APP_BUILT_AS))
+#else
+# define NCBI_BUILT_AS_SBUILDINFO /* empty */
+#endif
+
+#ifdef NCBI_TEAMCITY_BUILD_ID
+# define NCBI_BUILD_ID NCBI_TEAMCITY_BUILD_ID
+#elif defined(NCBI_BUILD_SESSION_ID)
+# define NCBI_BUILD_ID NCBI_AS_STRING(NCBI_BUILD_SESSION_ID)
+#endif
+#ifdef NCBI_BUILD_ID
+# define NCBI_BUILD_ID_SBUILDINFO .Extra(SBuildInfo::eBuildID, NCBI_BUILD_ID)
+#else
+# define NCBI_BUILD_ID_SBUILDINFO /* empty */
+#endif
+
+#define NCBI_SBUILDINFO_DEFAULT_IMPL() \
+ NCBI_SBUILDINFO_DEFAULT_INSTANCE() \
+ NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \
+ NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \
+ NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \
+ NCBI_BUILD_ID_SBUILDINFO \
+ NCBI_SUBVERSION_REVISION_SBUILDINFO \
+ NCBI_SC_VERSION_SBUILDINFO \
+ NCBI_SRCTREE_VER_SBUILDINFO \
+ NCBI_BUILT_AS_SBUILDINFO
+
+#if defined(NCBI_USE_PCH) && !defined(NCBI_TEAMCITY_BUILD_NUMBER)
+#define NCBI_SBUILDINFO_DEFAULT() SBuildInfo()
+#else
+#define NCBI_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL()
+#endif
+#define NCBI_APP_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL()
+
#ifdef NCBI_SBUILDINFO_DEFAULT_INSTANCE
# undef NCBI_SBUILDINFO_DEFAULT_INSTANCE
#endif
#ifndef CORELIB___VERSION_API__HPP
#define CORELIB___VERSION_API__HPP
-/* $Id: version_api.hpp 591729 2019-08-19 20:52:06Z vasilche $
+/* $Id: version_api.hpp 609546 2020-06-03 17:21:38Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
string PrintJson(void) const;
};
-#ifdef NCBI_BUILD_TAG
-# define NCBI_BUILD_TAG_PROXY NCBI_AS_STRING(NCBI_BUILD_TAG)
-#else
-# define NCBI_BUILD_TAG_PROXY ""
-#endif
-
-// Cope with potentially having an older ncbi_build_info.h
-#ifndef NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO
-# define NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \
- .Extra(SBuildInfo::eTeamCityProjectName, NCBI_TEAMCITY_PROJECT_NAME_PROXY)
-# define NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \
- .Extra(SBuildInfo::eTeamCityBuildConf, NCBI_TEAMCITY_BUILDCONF_NAME_PROXY)
-# define NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \
- .Extra(SBuildInfo::eTeamCityBuildNumber, NCBI_TEAMCITY_BUILD_NUMBER_PROXY)
-# define NCBI_SUBVERSION_REVISION_SBUILDINFO \
- .Extra(SBuildInfo::eSubversionRevision, NCBI_SUBVERSION_REVISION_PROXY)
-# define NCBI_SC_VERSION_SBUILDINFO \
- .Extra(SBuildInfo::eStableComponentsVersion, NCBI_SC_VERSION_PROXY)
-#endif
-
-// Cope with potentially having an older ncbi_source_ver.h
-#ifndef NCBI_SRCTREE_VER_SBUILDINFO
-# ifdef NCBI_SRCTREE_NAME_PROXY
-# define NCBI_SRCTREE_VER_SBUILDINFO \
- .Extra(NCBI_SRCTREE_NAME_PROXY, NCBI_SRCTREE_VER_PROXY)
-# else
-# define NCBI_SRCTREE_VER_SBUILDINFO /* empty */
-# endif
-#endif
-
-#ifdef NCBI_APP_BUILT_AS
-# define NCBI_BUILT_AS_SBUILDINFO \
- .Extra(SBuildInfo::eBuiltAs, NCBI_AS_STRING(NCBI_APP_BUILT_AS))
-#else
-# define NCBI_BUILT_AS_SBUILDINFO /* empty */
-#endif
-
-#ifdef NCBI_TEAMCITY_BUILD_ID
-# define NCBI_BUILD_ID NCBI_TEAMCITY_BUILD_ID
-#elif defined(NCBI_BUILD_SESSION_ID)
-# define NCBI_BUILD_ID NCBI_AS_STRING(NCBI_BUILD_SESSION_ID)
-#endif
-#ifdef NCBI_BUILD_ID
-# define NCBI_BUILD_ID_SBUILDINFO .Extra(SBuildInfo::eBuildID, NCBI_BUILD_ID)
-#else
-# define NCBI_BUILD_ID_SBUILDINFO /* empty */
-#endif
-
#define NCBI_SBUILDINFO_DEFAULT_INSTANCE() SBuildInfo()
-#define NCBI_SBUILDINFO_DEFAULT_IMPL() \
- NCBI_SBUILDINFO_DEFAULT_INSTANCE() \
- NCBI_TEAMCITY_PROJECT_NAME_SBUILDINFO \
- NCBI_TEAMCITY_BUILDCONF_NAME_SBUILDINFO \
- NCBI_TEAMCITY_BUILD_NUMBER_SBUILDINFO \
- NCBI_BUILD_ID_SBUILDINFO \
- NCBI_SUBVERSION_REVISION_SBUILDINFO \
- NCBI_SC_VERSION_SBUILDINFO \
- NCBI_SRCTREE_VER_SBUILDINFO \
- NCBI_BUILT_AS_SBUILDINFO
-
-#if defined(NCBI_USE_PCH) && !defined(NCBI_TEAMCITY_BUILD_NUMBER)
-#define NCBI_SBUILDINFO_DEFAULT() SBuildInfo()
-#else
-#define NCBI_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL()
-#endif
-#define NCBI_APP_SBUILDINFO_DEFAULT() NCBI_SBUILDINFO_DEFAULT_IMPL()
/////////////////////////////////////////////////////////////////////////////
///
--- /dev/null
+#ifndef DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP
+#define DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP
+
+/* $Id: dbapi_pool_balancer.hpp 610945 2020-06-25 18:31:37Z ivanov $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Aaron Ucko
+ *
+ */
+
+/// @file dbapi_pool_balancer.hpp
+/// Help distribute connections within a pool across servers.
+
+#include <dbapi/driver/impl/dbapi_driver_utils.hpp>
+
+/** @addtogroup DBAPI
+ *
+ * @{
+ */
+
+BEGIN_NCBI_SCOPE
+
+class CDBPoolBalancer : public CObject
+{
+public:
+ CDBPoolBalancer(const string& service_name,
+ const string& pool_name,
+ const IDBServiceMapper::TOptions& options,
+ I_DriverContext* driver_ctx = nullptr);
+
+ TSvrRef GetServer(CDB_Connection** conn, const CDBConnParams* params);
+
+private:
+ struct SEndpointInfo {
+ SEndpointInfo()
+ : effective_ranking(0.0), ideal_count(0.0), actual_count(0U),
+ penalty_level(0U)
+ { }
+
+ CRef<CDBServerOption> ref;
+ double effective_ranking;
+ double ideal_count;
+ unsigned int actual_count;
+ unsigned int penalty_level;
+ };
+ typedef map<impl::TEndpointKey, SEndpointInfo> TEndpoints;
+
+ impl::TEndpointKey x_NameToKey(CTempString& name) const;
+
+ TEndpoints m_Endpoints;
+ multiset<double> m_Rankings;
+ I_DriverContext* m_DriverCtx;
+ unsigned int m_TotalCount;
+};
+
+END_NCBI_SCOPE
+
+/* @} */
+
+#endif /* DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP */
#if defined(NCBI_USE_PCH) && !defined(NCBI_PCH__HPP)
-/* $Id: ncbi_pch.hpp 608162 2020-05-12 16:01:31Z blastadm $
+/* $Id: ncbi_pch.hpp 617723 2020-10-06 07:10:56Z blastadm $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
-/* $Id: ncbi_source_ver.h 608162 2020-05-12 16:01:31Z blastadm $
+/* $Id: ncbi_source_ver.h 617723 2020-10-06 07:10:56Z blastadm $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#ifndef FORWARDING_NCBICONF_H
#define FORWARDING_NCBICONF_H
-/* $Id: ncbiconf.h 608162 2020-05-12 16:01:31Z blastadm $
+/* $Id: ncbiconf.h 617723 2020-10-06 07:10:56Z blastadm $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#ifndef SRA__READER__SRA__IMPL__SNPPTIS__HPP
#define SRA__READER__SRA__IMPL__SNPPTIS__HPP
-/* $Id: snpptis_impl.hpp 597185 2019-11-18 19:46:30Z vasilche $
+/* $Id: snpptis_impl.hpp 615550 2020-09-01 13:13:11Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
typedef ncbi::grpcapi::dbsnp::primary_track::SeqIdRequestStringAccverUnion TRequest;
string x_GetPrimarySnpTrack(const TRequest& request);
-
+
+ int max_retries;
+ float timeout;
+ float timeout_mul;
+ float timeout_inc;
+ float timeout_max;
+ float wait_time;
+ float wait_time_mul;
+ float wait_time_inc;
+ float wait_time_max;
shared_ptr<grpc::Channel> channel;
unique_ptr<ncbi::grpcapi::dbsnp::primary_track::DbSnpPrimaryTrack::Stub> stub;
};
-/* $Id: Dbtag.hpp 591286 2019-08-13 18:04:06Z kans $
+/* $Id: Dbtag.hpp 615789 2020-09-03 18:19:18Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
eDbtagType_EPDnew,
eDbtagType_Ensembl,
eDbtagType_PseudoCAP,
- eDbtagType_MarpolBase
+ eDbtagType_MarpolBase,
+ eDbtagType_dbVar
};
enum EDbtagGroup {
-/* $Id: SeqFeatData.hpp 597755 2019-11-26 19:03:13Z gotvyans $
+/* $Id: SeqFeatData.hpp 613780 2020-08-12 16:42:40Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
eQual_mol_type,
eQual_name,
eQual_nomenclature,
+ eQual_non_std_residue,
eQual_ncRNA_class,
eQual_note,
eQual_number,
-/* $Id: local_taxon.hpp 598592 2019-12-11 15:20:21Z badrazat $
+/* $Id: local_taxon.hpp 615586 2020-09-01 17:59:29Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
{ return GetAncestorByRank(taxid, "species"); }
TTaxid GetGenus(TTaxid taxid)
{ return GetAncestorByRank(taxid, "genus"); }
+ TTaxid GetOrder(TTaxid taxid)
+ { return GetAncestorByRank(taxid, "order"); }
TLineage GetLineage(TTaxid taxid);
TTaxid Join(TTaxid taxid1, TTaxid taxid2);
-/* $Id: ValidErrItem.hpp 597158 2019-11-18 17:58:02Z kans $
+/* $Id: ValidErrItem.hpp 611904 2020-07-13 15:51:08Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
eErr_SEQ_FEAT_TRNAinsideTMRNA,
eErr_SEQ_FEAT_IncorrectQualifierCapitalization,
eErr_SEQ_FEAT_CDSdoesNotMatchVDJC,
+ eErr_SEQ_FEAT_GeneOnNucPositionOfPeptide,
ERR_CODE_END(SEQ_FEAT),
ERR_CODE_BEGIN(SEQ_ALIGN) = 5000,
#ifndef OBJMGR_IMPL_SCOPE_IMPL__HPP
#define OBJMGR_IMPL_SCOPE_IMPL__HPP
-/* $Id: scope_impl.hpp 606922 2020-04-28 18:58:25Z ivanov $
+/* $Id: scope_impl.hpp 610058 2020-06-10 16:19:48Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
void ResetHistory(int action); // CScope::EActionIfLocked
void ResetDataAndHistory(void);
void RemoveFromHistory(const CTSE_Handle& tse, int action);
+ void RemoveFromHistory(const CSeq_id_Handle& seq_id);
// Revoke data sources from the scope. Throw exception if the
// operation fails (e.g. data source is in use or not found).
#ifndef OBJMGR_SCOPE__HPP
#define OBJMGR_SCOPE__HPP
-/* $Id: scope.hpp 575832 2018-12-04 21:08:18Z vasilche $
+/* $Id: scope.hpp 610058 2020-06-10 16:19:48Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/// Bioseq, which TSE is to be removed from the cache.
void RemoveFromHistory(const CBioseq_Handle& bioseq,
EActionIfLocked action = eKeepIfLocked);
+ /// Remove the Seq-id failed resolution from the scope's history.
+ /// @param seq_id
+ /// Seq-id that failed resolution
+ void RemoveFromHistory(const CSeq_id_Handle& seq_id);
+ /// Remove the Seq-id failed resolution from the scope's history.
+ /// @param seq_id
+ /// Seq-id that failed resolution
+ void RemoveFromHistory(const CSeq_id& seq_id);
/// Revoke data loader from the scope. Throw exception if the
/// operation fails (e.g. data source is in use or not found).
#ifndef OBJMGR_UTIL___AUTODEF_OPTIONS__HPP
#define OBJMGR_UTIL___AUTODEF_OPTIONS__HPP
-/* $Id: autodef_options.hpp 530276 2017-03-13 18:20:08Z bollin $
+/* $Id: autodef_options.hpp 611612 2020-07-08 17:43:23Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
eCompleteGenome,
ePartialSequence,
ePartialGenome,
- eSequence
+ eSequence,
+ eWholeGenomeShotgunSequence
};
typedef unsigned int TFeatureListType;
class CDescriptorIndex;
class CFeatureIndex;
+typedef void (*FAddSnpFunc)(CBioseq_Handle bsh, string& na_acc);
// CSeqEntryIndex
//
eInternal = 1,
eExternal = 2,
eExhaustive = 3,
- eIncremental = 4
+ eFtp = 4,
+ eWeb = 5
};
enum EFlags {
// Constructors take the top-level sequence object
// The primary constructor uses an existing CScope created by the application
- CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
- CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
+ CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy = eAdaptive, TFlags flags = fDefault);
+ CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy = eAdaptive, TFlags flags = fDefault);
// Alternative constructors take an object and create a new local default CScope
- CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
- CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
- CSeqEntryIndex (CBioseq& bioseq, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
- CSeqEntryIndex (CSeq_submit& submit, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
+ CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy = eAdaptive, TFlags flags = fDefault);
+ CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy = eAdaptive, TFlags flags = fDefault);
+ CSeqEntryIndex (CBioseq& bioseq, EPolicy policy = eAdaptive, TFlags flags = fDefault);
+ CSeqEntryIndex (CSeq_submit& submit, EPolicy policy = eAdaptive, TFlags flags = fDefault);
// Specialized constructors are for streaming through release files, one component at a time
// Submit-block obtained from top of Seq-submit release file
- CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
+ CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy = eAdaptive, TFlags flags = fDefault);
// Seq-descr chain obtained from top of Bioseq-set release file
- CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy = eAdaptive, TFlags flags = fDefault, int depth = -1);
+ CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy = eAdaptive, TFlags flags = fDefault);
private:
// Prohibit copy constructor & assignment operator
CRef<CBioseqIndex> GetBioseqIndex (CBioseq_Handle bsh);
// Get Bioseq index by mapped feature
CRef<CBioseqIndex> GetBioseqIndex (const CMappedFeat& mf);
-
- // Subrange processing creates a new CBioseqIndex around a temporary delta Bioseq
-
// Get Bioseq index by sublocation
CRef<CBioseqIndex> GetBioseqIndex (const CSeq_loc& loc);
- // Get Bioseq index by subrange
- CRef<CBioseqIndex> GetBioseqIndex (const string& accn, int from, int to, bool rev_comp);
- CRef<CBioseqIndex> GetBioseqIndex (int from, int to, bool rev_comp);
// Seqset exploration iterator
template<typename Fnc> size_t IterateSeqsets (Fnc m);
bool DistributedReferences(void);
+ void SetSnpFunc(FAddSnpFunc* snp);
+
+ FAddSnpFunc* GetSnpFunc(void);
+
+ void SetFeatDepth(int featDepth);
+
+ int GetFeatDepth(void);
+
+ void SetGapDepth(int gapDepth);
+
+ int GetGapDepth(void);
+
// Check all Bioseqs for failure to fetch remote sequence components or feature annotation
bool IsFetchFailure(void);
public:
// Initializers take the top-level sequence object
- void x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
- void x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
+ void x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
+ void x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
- void x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
- void x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
- void x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
- void x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
+ void x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
+ void x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
+ void x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
+ void x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
- void x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
- void x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth);
+ void x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
+ void x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
private:
// Prohibit copy constructor & assignment operator
CRef<CBioseqIndex> GetBioseqIndex (string& str);
// Get Bioseq index by feature
CRef<CBioseqIndex> GetBioseqIndex (const CMappedFeat& mf);
-
- // Subrange processing creates a new CBioseqIndex around a temporary delta Bioseq
// Get Bioseq index by sublocation
CRef<CBioseqIndex> GetBioseqIndex (const CSeq_loc& loc);
- // Get Bioseq index by subrange
- CRef<CBioseqIndex> GetBioseqIndex (const string& accn, int from, int to, bool rev_comp);
- CRef<CBioseqIndex> GetBioseqIndex (int from, int to, bool rev_comp);
// Seqset exploration iterator
template<typename Fnc> size_t IterateSeqsets (Fnc m);
bool DistributedReferences (void) const { return m_DistributedReferences; }
+ void SetSnpFunc(FAddSnpFunc* snp);
+
+ FAddSnpFunc* GetSnpFunc(void);
+
+ void SetFeatDepth(int featDepth);
+
+ int GetFeatDepth(void);
+
+ void SetGapDepth(int gapDepth);
+
+ int GetGapDepth(void);
+
// Check all Bioseqs for failure to fetch remote sequence components or remote feature annotation
bool IsFetchFailure(void);
// Recursive exploration to populate vector of index objects for Bioseqs in Seq-entry
void x_InitSeqs (const CSeq_entry& sep, CRef<CSeqsetIndex> prnt, int level = 0);
- CRef<CSeq_id> x_MakeUniqueId(void);
-
- // Create delta sequence referring to location, using temporary local ID
- CRef<CBioseqIndex> x_DeltaIndex(const CSeq_loc& loc);
-
- // Create location from range, to use in x_DeltaIndex
- CConstRef<CSeq_loc> x_SubRangeLoc(const string& accn, int from, int to, bool rev_comp);
-
private:
CRef<CObjectManager> m_Objmgr;
CRef<CScope> m_Scope;
CSeqEntryIndex::EPolicy m_Policy;
CSeqEntryIndex::TFlags m_Flags;
- int m_Depth;
vector<CRef<CBioseqIndex>> m_BsxList;
bool m_DistributedReferences;
+ FAddSnpFunc* m_SnpFunc;
+
+ int m_FeatDepth;
+ int m_GapDepth;
+
mutable CAtomicCounter m_Counter;
bool m_IndexFailure;
CRef<CScope> scope,
CSeqMasterIndex& idx,
CSeqEntryIndex::EPolicy policy,
- CSeqEntryIndex::TFlags flags,
- int depth,
- bool surrogate);
+ CSeqEntryIndex::TFlags flags);
// Destructor
~CBioseqIndex (void);
// Feature exploration iterator
template<typename Fnc> size_t IterateFeatures (Fnc m);
-
- template<typename Fnc> size_t IterateFeaturesByLoc (const CSeq_loc& slp, Fnc m);
+ template<typename Fnc> size_t IterateFeatures (CSeq_loc& slp, Fnc m);
// Getters
CBioseq_Handle GetBioseqHandle (void) const { return m_Bsh; }
CSeq_inst::TLength GetLength (void) const { return m_Length; }
bool IsDelta (void) const { return m_IsDelta; }
+ bool IsDeltaLitOnly (void) const { return m_IsDeltaLitOnly; }
bool IsVirtual (void) const { return m_IsVirtual; }
bool IsMap (void) const { return m_IsMap; }
// Seq-id fields
const string& GetAccession (void) const { return m_Accession; }
+ bool IsRefSeq (void) const { return m_IsRefSeq; }
bool IsNC (void) const { return m_IsNC; }
bool IsNM (void) const { return m_IsNM; }
bool IsNR (void) const { return m_IsNR; }
const string& GetCommon (void);
const string& GetLineage (void);
- int GetTaxid (void);
+ TTaxId GetTaxid (void);
bool IsUsingAnamorph (void);
CTempString GetGenus (void);
// Common feature collection, delayed until actually needed
void x_InitFeats (void);
- // Collect features by location
- void x_InitFeatsByLoc (const CSeq_loc& slp);
+ void x_InitFeats (CSeq_loc& slp);
+
+ void x_DefaultSelector(SAnnotSelector& sel, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, bool onlyNear, CScope& scope);
+
+ // common implementation method
+ void x_InitFeats (CSeq_loc* slpp);
// Set BioSource flags
void x_InitSource (void);
CSeqEntryIndex::EPolicy m_Policy;
CSeqEntryIndex::TFlags m_Flags;
- int m_Depth;
bool m_FetchFailure;
CSeq_inst::TLength m_Length;
bool m_IsDelta;
+ bool m_IsDeltaLitOnly;
bool m_IsVirtual;
bool m_IsMap;
// Seq-id fields
string m_Accession;
+ bool m_IsRefSeq;
bool m_IsNC;
bool m_IsNM;
bool m_IsNR;
string m_Common;
string m_Lineage;
- int m_Taxid;
+ TTaxId m_Taxid;
bool m_UsingAnamorph;
CTempString m_Genus;
// Map fields
string m_rEnzyme;
-
- // true if this index is for a temporary subrange delta Bioseq
- bool m_Surrogate;
};
// Constructor
CFeatureIndex (CSeq_feat_Handle sfh,
const CMappedFeat mf,
+ CConstRef<CSeq_loc> feat_loc,
CBioseqIndex& bsx);
private:
return count;
}
-// Visit CFeatureIndex objects for range of features
template<typename Fnc>
inline
-size_t CBioseqIndex::IterateFeaturesByLoc (const CSeq_loc& slp, Fnc m)
+size_t CBioseqIndex::IterateFeatures (CSeq_loc& slp, Fnc m)
{
int count = 0;
try {
// Delay feature collection until first request, but do not bail on m_FeatsInitialized flag
- x_InitFeatsByLoc(slp);
+ x_InitFeats(slp);
for (auto& sfx : m_SfxList) {
count++;
}
}
catch (CException& e) {
- LOG_POST(Error << "Error in CBioseqIndex::IterateFeaturesByLoc: " << e.what());
+ LOG_POST(Error << "Error in CBioseqIndex::IterateFeatures: " << e.what());
}
return count;
}
-/* $Id: blastdb_dataextract.hpp 591546 2019-08-16 16:59:06Z vasilche $
+/* $Id: blastdb_dataextract.hpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/// Cache the defline (for membership bits)
CRef<CBlast_def_line_set> m_Defline;
/// Pair with a gi2taxid map for one Oid
- pair<TOID, map<TGi, int> > m_Gi2TaxidMap;
+ pair<TOID, map<TGi, TTaxId> > m_Gi2TaxidMap;
/// Pair with a gi2taxid-set map for one Oid
- pair<TOID, map<TGi, set<int> > > m_Gi2TaxidSetMap;
+ pair<TOID, map<TGi, set<TTaxId> > > m_Gi2TaxidSetMap;
/// Pair with a gi2accesion map for one Oid
pair<TOID, map<TGi, string> > m_Gi2AccMap;
/// Pair with a gi2title map for one Oid
bool m_UseLongSeqIds;
private:
void x_ExtractMaskingData(CSeqDB::TSequenceRanges &ranges, int algo_id);
- int x_ExtractTaxId();
- void x_ExtractLeafTaxIds(set<int>& taxids);
+ TTaxId x_ExtractTaxId();
+ void x_ExtractLeafTaxIds(set<TTaxId>& taxids);
/// Sets the map
void x_SetGi2AccMap();
/// Sets the map
#ifndef OBJTOOLS_READERS_BLAST__SEQDB__SEQDB_LMDB_HPP
#define OBJTOOLS_READERS_BLAST__SEQDB__SEQDB_LMDB_HPP
-/* $Id: seqdb_lmdb.hpp 585739 2019-05-03 11:01:28Z fongah2 $
+/* $Id: seqdb_lmdb.hpp 616351 2020-09-15 12:19:15Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
{
public:
CSeqDBLMDB(const string & fname);
+ virtual ~CSeqDBLMDB();
CSeqDBLMDB& operator=(const CSeqDBLMDB&) = delete;
CSeqDBLMDB(const CSeqDBLMDB&) = delete;
/// Get Oids for Tax Ids list, idenitcal Oids are merged.
/// @param tax_ids Input tax ids /Output tax ids found
/// @param oids Oids found for input tax ids
- void GetOidsForTaxIds(const set<Int4> & tax_ids, vector<blastdb::TOid>& oids, vector<Int4> & tax_ids_found) const;
+ void GetOidsForTaxIds(const set<TTaxId> & tax_ids, vector<blastdb::TOid>& oids, vector<TTaxId> & tax_ids_found) const;
/// Get Oids to exclude for Tax ids
/// @parm ids Input tax ids to exclude /Output tax ids found
/// @param rv Oids to exclude based on input tax id list
- void NegativeTaxIdsToOids(const set<Int4>& ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const;
+ void NegativeTaxIdsToOids(const set<TTaxId>& ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const;
/// Get All Unique Tax Ids for db
/// @parma tax_ids Return all unique tax ids found in db
- void GetDBTaxIds(vector<Int4> & tax_ids) const;
+ void GetDBTaxIds(vector<TTaxId> & tax_ids) const;
/// Get Tax Ids for oid list
/// @param oids Input oid list
/// @param tax_ids Output tax id list
- void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const;
+ void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const;
private:
string m_LMDBFile;
string m_Oid2TaxIdsFile;
string m_TaxId2OidsFile;
string m_TaxId2OffsetsFile;
+ mutable bool m_LMDBFileOpened;
};
/// Build the canonical LMDB file name for BLAST databases
public:
static CBlastLMDBManager & GetInstance();
lmdb::env & GetReadEnvVol(const string & fname, MDB_dbi & db_volname, MDB_dbi & db_volinfo);
- lmdb::env & GetReadEnvAcc(const string & fname, MDB_dbi & db_acc);
- lmdb::env & GetReadEnvTax(const string & fname, MDB_dbi & db_tax);
+ lmdb::env & GetReadEnvAcc(const string & fname, MDB_dbi & db_acc, bool* opened = 0);
+ lmdb::env & GetReadEnvTax(const string & fname, MDB_dbi & db_tax, bool* opened = 0);
lmdb::env & GetWriteEnv(const string & fname, Uint8 map_size);
void CloseEnv(const string & fname);
public:
CBlastEnv(const string & fname, ELMDBFileType file_type, bool read_only = true, Uint8 map_size =0);
lmdb::env & GetEnv() { return m_Env; }
- const string & GetFilename () { return m_Filename; }
+ const string & GetFilename () const { return m_Filename; }
~CBlastEnv();
unsigned int AddReference(){ m_Count++; return m_Count;}
unsigned int RemoveReference(){ m_Count--; return m_Count;}
};
MDB_dbi GetDbi(EDbiType dbi_type);
void InitDbi(lmdb::env & env, ELMDBFileType file_type);
+ void SetMapSize(Uint8 map_size);
+ bool IsReadOnly() { return m_ReadOnly; }
+
private:
string m_Filename;
ELMDBFileType m_FileType;
lmdb::env m_Env;
unsigned int m_Count;
bool m_ReadOnly;
- Uint8 m_MapSize;
vector<MDB_dbi> m_dbis;
};
- CBlastEnv* GetBlastEnv(const string & fname, ELMDBFileType file_type);
+ CBlastEnv* GetBlastEnv(const string & fname, ELMDBFileType file_type, bool* opened = 0);
CBlastLMDBManager(){}
~CBlastLMDBManager();
friend class CSafeStatic_Allocator<CBlastLMDBManager>;
#ifndef OBJTOOLS_READERS_SEQDB__SEQDBTAX_HPP
#define OBJTOOLS_READERS_SEQDB__SEQDBTAX_HPP
-/* $Id: seqdbtax.hpp 553714 2017-12-20 18:36:44Z vakatov $
+/* $Id: seqdbtax.hpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/// @param locked
/// The lock holder object for this thread.
/// @return true if the taxonomic id was found
- static bool GetTaxNames(Int4 tax_id, SSeqDBTaxInfo & info);
+ static bool GetTaxNames(TTaxId tax_id, SSeqDBTaxInfo & info);
};
#ifndef OBJTOOLS_BLAST_SEQDB_READER___SEQDB__HPP
#define OBJTOOLS_BLAST_SEQDB_READER___SEQDB__HPP
-/* $Id: seqdb.hpp 605340 2020-04-09 16:06:43Z ivanov $
+/* $Id: seqdb.hpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
///
/// This finds the leaf-node TAXIDS associated with a given OID and
/// computes a mapping from GI to taxid. This mapping is added to the
- /// map<int,set<int>> provided by the user. If the "persist" flag is
+ /// map<TGi,set<TTaxId>> provided by the user. If the "persist" flag is
/// set to true, the new associations will simply be added to the
/// map. If it is false (the default), the map will be cleared
/// first.
/// If false, the map will be cleared before adding new entries.
void GetLeafTaxIDs(
int oid,
- map<TGi, set<int> >& gi_to_taxid_set,
+ map<TGi, set<TTaxId> >& gi_to_taxid_set,
bool persist = false
) const;
/// If false, the map will be cleared before adding new entries.
void GetLeafTaxIDs(
int oid,
- vector<int> & taxids,
+ vector<TTaxId> & taxids,
bool persist = false
) const;
/// @param persist
/// If false, the map will be cleared before adding new entries.
void GetTaxIDs(int oid,
- map<TGi, int> & gi_to_taxid,
+ map<TGi, TTaxId> & gi_to_taxid,
bool persist = false) const;
/// Get taxids for an OID.
/// @param persist
/// If false, the map will be cleared before adding new entries.
void GetTaxIDs(int oid,
- vector<int> & taxids,
+ vector<TTaxId> & taxids,
bool persist = false) const;
/// Get all tax ids for an oid
/// @param taxids
/// A returned a set of taxids.
void GetAllTaxIDs(int oid,
- set<int> & taxids) const;
+ set<TTaxId> & taxids) const;
/// Get a CBioseq for a sequence.
///
/// An integer identifying the taxid to fetch.
/// @param info
/// A structure containing taxonomic description strings.
- static void GetTaxInfo(int taxid, SSeqDBTaxInfo & info);
+ static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info);
/// Fetch data as a CSeq_data object.
///
/// Get Oid list for input tax ids
/// @param tax_ids taxonomy ids, return only tax ids found in db
// @param rv oids corrpond to tax ids
- void TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const;
+ void TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const;
/// Get all unique tax ids from db
/// @param tax_ids return taxonomy ids in db
- void GetDBTaxIds(set<Int4> & tax_ids) const;
+ void GetDBTaxIds(set<TTaxId> & tax_ids) const;
protected:
/// Implementation details are hidden. (See seqdbimpl.hpp).
#ifndef OBJTOOLS_BLAST_SEQDB_READER___SEQDBCOMMON__HPP
#define OBJTOOLS_BLAST_SEQDB_READER___SEQDBCOMMON__HPP
-/* $Id: seqdbcommon.hpp 605335 2020-04-09 16:04:38Z ivanov $
+/* $Id: seqdbcommon.hpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
};
struct STaxIdsOids {
- set<int> tax_ids;
+ set<TTaxId> tax_ids;
vector<blastdb::TOid> oids;
};
void GetPigList(vector<TPig>& pigs) const;
- set<Int4> & GetTaxIdsList()
+ set<TTaxId> & GetTaxIdsList()
{
return m_TaxIdsOids.tax_ids;
}
m_SisOids.push_back(si);
}
- void AddTaxIds(const set<int> & tax_ids)
+ void AddTaxIds(const set<TTaxId> & tax_ids)
{
- set<int> & tids = m_TaxIdsOids.tax_ids;
+ set<TTaxId> & tids = m_TaxIdsOids.tax_ids;
tids.insert(tax_ids.begin(), tax_ids.end());
}
return m_ListInfo;
}
- void AddTaxIds(const set<int> & tax_ids)
+ void AddTaxIds(const set<TTaxId> & tax_ids)
{
m_TaxIds.insert(tax_ids.begin(), tax_ids.end());
}
- set<Int4> & GetTaxIdsList()
+ set<TTaxId> & GetTaxIdsList()
{
return m_TaxIds;
}
/// SeqIds to exclude from the SeqDB instance.
vector<string> m_Sis;
- set<Int4> m_TaxIds;
+ set<TTaxId> m_TaxIds;
private:
/// Prevent copy constructor.
struct SSeqDBTaxInfo {
/// Default constructor
/// @param t the taxonomy ID to set for this structure
- SSeqDBTaxInfo(int t = 0)
+ SSeqDBTaxInfo(TTaxId t = ZERO_TAX_ID)
: taxid(t)
{
}
/// An identifier for this species or taxonomic group.
- int taxid;
+ TTaxId taxid;
/// Scientific name, such as "Aotus vociferans".
string scientific_name;
-/* $Id: taxid_set.hpp 208050 2010-10-13 15:48:11Z maning $
+/* $Id: taxid_set.hpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
class NCBI_XOBJWRITE_EXPORT CTaxIdSet : public CObject {
public:
- static const int kTaxIdNotSet = 0;
+ static const TTaxId kTaxIdNotSet;
- CTaxIdSet(int global_taxid = kTaxIdNotSet)
+ CTaxIdSet(TTaxId global_taxid = kTaxIdNotSet)
: m_GlobalTaxId(global_taxid),
m_Matched(true) {}
bool HasEverFixedId() const { return m_Matched; };
private:
- int m_GlobalTaxId;
- map< string, int > m_TaxIdMap;
- bool m_Matched;
+ TTaxId m_GlobalTaxId;
+ map< string, TTaxId > m_TaxIdMap;
+ bool m_Matched;
/// Selects the most suitable tax id for the input passed in, checking the
/// global taxid first, then the mapping provided by an input file, and
/// finally what's found in the defline argument
- int x_SelectBestTaxid(const objects::CBlast_def_line & defline);
+ TTaxId x_SelectBestTaxid(const objects::CBlast_def_line & defline);
};
BEGIN_NCBI_SCOPE
+#ifdef NCBI_OS_MSWIN
+#define DEFAULT_LMDB_MAP_SIZE 500000
+#define DEFAULT_TAXID_MAP_SIZE 500000
+#else
+#define DEFAULT_LMDB_MAP_SIZE 300000000000
+#define DEFAULT_TAXID_MAP_SIZE 100000000000
+#endif
/// This class supports creation of a string accession to integer OID
/// lmdb database
/// Constructor for LMDB write access
/// @param dbname Database name
- CWriteDB_LMDB(const string& dbname, Uint8 map_size = 300000000000, Uint8 capacity = 500000);
+ CWriteDB_LMDB(const string& dbname, Uint8 map_size = DEFAULT_LMDB_MAP_SIZE, Uint8 capacity = 500000);
// Destructor
~CWriteDB_LMDB();
void x_InsertEntry(const CRef<CSeq_id> &seqid, const blastdb::TOid oid);
void x_CreateOidToSeqidsLookupFile();
void x_Resize();
+ void x_IncreaseEnvMapSize();
+ void x_IncreaseEnvMapSize(const vector<string> & vol_names, const vector<blastdb::TOid> & vol_num_oids);
string m_Db;
lmdb::env &m_Env;
Uint8 m_ListCapacity;
unsigned int m_MaxEntryPerTxn;
+ size_t m_TotalIdsLength;
struct SKeyValuePair {
string id;
blastdb::TOid oid;
/// Constructor for LMDB write access
/// @param dbname Database name
- CWriteDB_TaxID(const string& dbname, Uint8 map_size = 300000000000, Uint8 capacity = 500000);
+ CWriteDB_TaxID(const string& dbname, Uint8 map_size = DEFAULT_TAXID_MAP_SIZE, Uint8 capacity = 500000);
// Destructor
~CWriteDB_TaxID();
/// @param tax_ids list for oid
/// @return number of rows added to database
/// @see InsertEntry
- int InsertEntries(const set<Int4> & tax_ids, const blastdb::TOid oid);
+ int InsertEntries(const set<TTaxId> & tax_ids, const blastdb::TOid oid);
private:
void x_CommitTransaction();
void x_CreateOidToTaxIdsLookupFile();
void x_CreateTaxIdToOidsLookupFile();
void x_Resize();
+ void x_IncreaseEnvMapSize();
+
string m_Db;
lmdb::env &m_Env;
unsigned int m_MaxEntryPerTxn;
template <class valueType>
struct SKeyValuePair {
- Int4 tax_id;
+ TTaxId tax_id;
valueType value;
- SKeyValuePair(int t, valueType v) : tax_id(t), value(v) {}
+ SKeyValuePair(TTaxId t, valueType v) : tax_id(t), value(v) {}
static bool cmp_key(const SKeyValuePair & v, const SKeyValuePair & k) {
if(v.tax_id == k.tax_id) {
return v.value < k.value;
#ifndef CLEANUP___CLEANUP__HPP
#define CLEANUP___CLEANUP__HPP
-/* $Id: cleanup.hpp 605251 2020-04-08 14:24:56Z ivanov $
+/* $Id: cleanup.hpp 614966 2020-08-25 16:46:33Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
class CSeq_feat_Handle;
class CCleanupChange;
+class IObjtoolsListener;
class NCBI_CLEANUP_EXPORT CCleanup : public CObject
{
/// Get labels for a pubdesc. To be used in citations.
static void GetPubdescLabels
(const CPubdesc& pd,
- vector<int>& pmids, vector<int>& muids, vector<int>& serials,
+ vector<TEntrezId>& pmids, vector<TEntrezId>& muids, vector<int>& serials,
vector<string>& published_labels, vector<string>& unpublished_labels);
/// Get list of pubs that can be used for citations for Seq-feat on a Bioseq-handle
/// @param str string from which to parse code break
/// @param scope scope in which to find sequences referenced (used for location comparisons)
/// @return bool indicates string was successfully parsed and code break was added
- static bool ParseCodeBreak(const CSeq_feat& feat, CCdregion& cds, const string& str, CScope& scope);
+ static bool ParseCodeBreak(const CSeq_feat& feat,
+ CCdregion& cds,
+ const CTempString& str,
+ CScope& scope,
+ IObjtoolsListener* pMessageListener=nullptr);
/// Parses all valid transl_except Gb-quals into code-breaks for cdregion,
/// then removes the transl_except Gb-quals that were successfully parsed
--- /dev/null
+#ifndef _CLEANUP_MESSAGE_HPP_
+#define _CLEANUP_MESSAGE_HPP_
+
+/* $Id: cleanup_message.hpp 608332 2020-05-14 16:04:14Z ivanov $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Justin Foley
+ *
+ * File Description:
+ * .......
+ *
+ */
+
+#include <objtools/logging/message.hpp>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(objects)
+
+class NCBI_CLEANUP_EXPORT CCleanupMessage : public CObjtoolsMessage
+{
+public:
+ enum class ECode {
+ eCodeBreak
+ };
+
+ enum class ESubcode {
+ eBadLocation,
+ eParseError
+ };
+
+ CCleanupMessage(string text, EDiagSev sev, ECode code, ESubcode subcode);
+
+ CCleanupMessage *Clone(void) const override;
+
+ int GetCode(void) const override {
+ return static_cast<int>(m_Code);
+ }
+ int GetSubCode(void) const override {
+ return static_cast<int>(m_Subcode);
+ }
+private:
+ ECode m_Code;
+ ESubcode m_Subcode;
+};
+
+END_SCOPE(objects)
+END_NCBI_SCOPE
+
+#endif // _CLEANUP_MESSAGE_HPP_
#ifndef OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP
#define OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP
-/* $Id: bdbloader.hpp 368048 2012-07-02 13:25:25Z camacho $
+/* $Id: bdbloader.hpp 612733 2020-07-27 11:38:27Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <objmgr/data_loader.hpp>
#include <objtools/data_loaders/blastdb/blastdb_adapter.hpp>
#include <objects/seqset/Seq_entry.hpp>
+#include <util/limited_size_map.hpp>
BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)
virtual TTSE_Lock GetBlobById(const TBlobId& blob_id);
/// A mapping from sequence identifier to blob ids.
- typedef map< CSeq_id_Handle, int > TIdMap;
+ typedef limited_size_map<CSeq_id_Handle, int> TIdMap;
/// @note this is added to temporarily comply with the toolkit's stable
/// components rule of having backwards compatible APIs
-/* $Id: feattable_edit.hpp 593415 2019-09-18 14:52:52Z ludwigf $
+/* $Id: feattable_edit.hpp 612521 2020-07-23 11:23:16Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
bool forEukaryote);
void GenerateMissingParentFeaturesForEukaryote();
void GenerateMissingParentFeaturesForProkaryote();
+ void ProcessCodonRecognized();
unsigned int PendingLocusTagNumber() const {
return mLocusTagNumber;
}
const CMappedFeat& mrna);
void xPutErrorDifferingProteinIds(
const CMappedFeat& mrna);
+ void xPutErrorBadCodonRecognized(
+ const string codonRecognized);
void xFeatureAddQualifier(
+/* $Id: remote_updater.hpp 614636 2020-08-20 13:02:57Z fukanchi $
+* ===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+* Authors: Sergiy Gotvyanskyy, NCBI
+* Colleen Bolin, NCBI
+*
+* File Description:
+* Front-end class for making remote request to MLA and taxon
+*
+* ===========================================================================
+*/
+
#ifndef __REMOTE_UPDATER_HPP_INCLUDED__
#define __REMOTE_UPDATER_HPP_INCLUDED__
+#include <corelib/ncbimisc.hpp>
#include<functional>
BEGIN_NCBI_SCOPE
class COrg_ref;
class CMLAClient;
class CAuth_list;
+class IObjtoolsListener;
+class CPub;
BEGIN_SCOPE(edit)
using FLogger = function<void(const string&)>;
+ // With this constructor, an exception is thrown
+ // if the updater cannot retrieve a publication for a PMID.
CRemoteUpdater(bool enable_caching = true);
+ // With this constructor, failure to retrieve
+ // a publication for a PMID is logged with the supplied message listener.
+ // If no message listener is supplied, an exception is thrown.
+ CRemoteUpdater(IObjtoolsListener* pMessageListener);
~CRemoteUpdater();
void UpdatePubReferences(CSerialObject& obj);
void UpdatePubReferences(CSeq_entry_EditHandle& obj);
+ void SetMaxMlaAttempts(int max);
- void UpdateOrgFromTaxon(FLogger f_logger, CSeq_entry& entry);
+ NCBI_DEPRECATED void UpdateOrgFromTaxon(FLogger /*f_logger*/, CSeq_entry& entry);
void UpdateOrgFromTaxon(FLogger f_logger, CSeq_entry_EditHandle& obj);
- void UpdateOrgFromTaxon(FLogger f_logger, CSeqdesc& obj);
+ NCBI_DEPRECATED void UpdateOrgFromTaxon(FLogger f_logger, CSeqdesc& obj);
+
+ void UpdateOrgFromTaxon(CSeq_entry& entry);
+ void UpdateOrgFromTaxon(CSeqdesc& desc);
+
+
void ClearCache();
static void ConvertToStandardAuthors(CAuth_list& auth_list);
static void PostProcessPubs(CSeq_entry_EditHandle& obj);
static void PostProcessPubs(CSeq_entry& obj);
static void PostProcessPubs(CPubdesc& pubdesc);
+ void SetMLAClient(CMLAClient& mlaClient);
// Use either shared singleton or individual instances
static CRemoteUpdater& GetInstance();
private:
void xUpdatePubReferences(CSeq_entry& entry);
void xUpdatePubReferences(CSeq_descr& descr);
- void xUpdateOrgTaxname(FLogger f_logger, COrg_ref& org);
-
+ void xUpdateOrgTaxname(FLogger f_logger, COrg_ref& org);
+ void xUpdateOrgTaxname(COrg_ref& org);
+ bool xUpdatePubPMID(list<CRef<CPub>>& pubs, TEntrezId id);
+ IObjtoolsListener* m_pMessageListener=nullptr;
CRef<CMLAClient> m_mlaClient;
auto_ptr<CCachedTaxon3_impl> m_taxClient;
- bool m_enable_caching;
+ bool m_enable_caching=true;
CMutex m_Mutex;
DECLARE_CLASS_STATIC_MUTEX(m_static_mutex);
+ int m_MaxMlaAttempts=3;
};
END_SCOPE(edit)
#ifndef OBJTOOLS_FORMAT___FLAT_FILE_CONFIG__HPP
#define OBJTOOLS_FORMAT___FLAT_FILE_CONFIG__HPP
-/* $Id: flat_file_config.hpp 607400 2020-05-04 14:18:10Z ivanov $
+/* $Id: flat_file_config.hpp 614736 2020-08-21 13:43:48Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
virtual void FormatTranscriptId(string& str, const CSeq_id& seq_id, const string& nuc_id) const = 0;
virtual void FormatNucSearch(CNcbiOstream& os, const string& id) const = 0;
virtual void FormatNucId(string& str, const CSeq_id& seq_id, TIntId gi, const string& acc_id) const = 0;
- virtual void FormatTaxid(string& str, const int taxid, const string& taxname) const = 0;
+ virtual void FormatTaxid(string& str, const TTaxId taxid, const string& taxname) const = 0;
virtual void FormatLocation(string& str, const CSeq_loc& loc, TIntId gi, const string& visible_text) const = 0;
virtual void FormatModelEvidence(string& str, const SModelEvidance& me) const = 0;
virtual void FormatTranscript(string& str, const string& name) const = 0;
void FormatTranscriptId(string& str, const CSeq_id& seq_id, const string& nuc_id) const;
void FormatNucSearch(CNcbiOstream& os, const string& id) const;
void FormatNucId(string& str, const CSeq_id& seq_id, TIntId gi, const string& acc_id) const;
- void FormatTaxid(string& str, const int taxid, const string& taxname) const;
+ void FormatTaxid(string& str, const TTaxId taxid, const string& taxname) const;
void FormatLocation(string& str, const CSeq_loc& loc, TIntId gi, const string& visible_text) const;
void FormatModelEvidence(string& str, const SModelEvidance& me) const;
void FormatTranscript(string& str, const string& name) const;
fIgnoreExistingTitle = 1 << 13,
fGeneRNACDSFeatures = 1 << 14,
fShowFtablePeptides = 1 << 15,
- fDisableReferenceCache = 1 << 16
+ fDisableReferenceCache = 1 << 16,
+ fShowDeflineModifiers = 1 << 17
};
enum EView {
ePolicy_Adaptive = 0,
ePolicy_Internal,
ePolicy_External,
- ePolicy_Exhaustive
+ ePolicy_Exhaustive,
+ ePolicy_Ftp,
+ ePolicy_Web
};
// These flags are used to select the GenBank sections to print or skip.
TStyle style = eStyle_Normal,
TFlags flags = 0,
TView view = fViewNucleotides,
- TPolicy policy = ePolicy_Adaptive);
+ TPolicy policy = ePolicy_Adaptive,
+ TCustom custom = 0);
// destructor
~CFlatFileConfig(void);
bool IsPolicyInternal(void) const { return m_Policy == ePolicy_Internal; }
bool IsPolicyExternal (void) const { return m_Policy == ePolicy_External; }
bool IsPolicyExhaustive (void) const { return m_Policy == ePolicy_Exhaustive; }
+ bool IsPolicyFtp (void) const { return m_Policy == ePolicy_Ftp; }
+ bool IsPolicyWeb (void) const { return m_Policy == ePolicy_Web; }
// setters
void SetPolicy(const TPolicy& Policy) { m_Policy = Policy; }
void SetPolicyAdaptive (void) { m_Policy = ePolicy_Adaptive; }
void SetPolicyInternal(void) { m_Policy = ePolicy_Internal; }
void SetPolicyExternal (void) { m_Policy = ePolicy_External; }
void SetPolicyExhaustive (void) { m_Policy = ePolicy_Exhaustive; }
+ void SetPolicyFtp (void) { m_Policy = ePolicy_Ftp; }
+ void SetPolicyWeb (void) { m_Policy = ePolicy_Web; }
// -- Flags
// getters
bool GeneRNACDSFeatures (void) const;
bool ShowFtablePeptides (void) const;
bool DisableReferenceCache (void) const;
+ bool ShowDeflineModifiers (void) const;
// setters
void SetCustom(const TCustom& custom) { m_Custom = custom; }
CFlatFileConfig& SetGeneRNACDSFeatures (bool val = true);
CFlatFileConfig& SetShowFtablePeptides (bool val = true);
CFlatFileConfig& SetDisableReferenceCache (bool val = true);
+ CFlatFileConfig& SetShowDeflineModifiers (bool val = true);
// adjust mode dependant flags for RefSeq
void SetRefSeqConventions(void);
+ int GetFeatDepth(void) const { return m_FeatDepth; }
+ void SetFeatDepth(const int featDepth) { m_FeatDepth = featDepth; }
+
+ int GetGapDepth(void) const { return m_GapDepth; }
+ void SetGapDepth(const int gapDepth) { m_GapDepth = gapDepth; }
+
void SetGenbankBlocks(const TGenbankBlocks& genbank_blocks)
{
const ICanceled * m_pCanceledCallback; // instance does NOT own it
bool m_BasicCleanup;
TCustom m_Custom;
+ int m_FeatDepth;
+ int m_GapDepth;
#ifdef NEW_HTML_FMT
CRef<IHTMLFormatter> m_html_formatter;
#endif
CUSTOM_ARG_IMP(GeneRNACDSFeatures)
CUSTOM_ARG_IMP(ShowFtablePeptides)
CUSTOM_ARG_IMP(DisableReferenceCache)
+CUSTOM_ARG_IMP(ShowDeflineModifiers)
#undef FLAG_ARG_IMP
#undef FLAG_ARG_GET
#ifndef OBJTOOLS_FORMAT___FLAT_FILE_GENERATOR__HPP
#define OBJTOOLS_FORMAT___FLAT_FILE_GENERATOR__HPP
-/* $Id: flat_file_generator.hpp 604097 2020-03-23 12:19:07Z ivanov $
+/* $Id: flat_file_generator.hpp 613149 2020-08-03 15:02:23Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CFlatFileConfig::TStyle style = CFlatFileConfig::eStyle_Normal,
CFlatFileConfig::TFlags flags = 0,
CFlatFileConfig::TView view = CFlatFileConfig::fViewNucleotides,
- CFlatFileConfig::TCustom custom = 0);
+ CFlatFileConfig::TCustom custom = 0,
+ CFlatFileConfig::TPolicy policy = CFlatFileConfig::ePolicy_Adaptive);
// destructor
~CFlatFileGenerator(void);
// Versions that loop through Bioseq components
void Generate(const CSeq_entry_Handle& entry, CNcbiOstream& os, bool useSeqEntryIndexing);
+ void Generate(const CBioseq_Handle& bsh, CNcbiOstream& os, bool useSeqEntryIndexing);
void Generate(const CSeq_entry_Handle& entry, CFlatItemOStream& item_os, bool useSeqEntryIndexing);
void Generate(const CSeq_loc& loc, CScope& scope, CNcbiOstream& os, bool useSeqEntryIndexing);
#ifndef OBJTOOLS_FORMAT___GATHER_ITEMS__HPP
#define OBJTOOLS_FORMAT___GATHER_ITEMS__HPP
-/* $Id: gather_items.hpp 607397 2020-05-04 14:17:25Z ivanov $
+/* $Id: gather_items.hpp 610489 2020-06-18 14:52:27Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
void x_GatherCDSReferences(TReferences& refs) const;
// features
- void x_GatherFeatures (void) const;
+ void x_GatherFeatures (void) const;
+ void x_GatherFeaturesIdx(void) const;
void x_GetFeatsOnCdsProduct(const CSeq_feat& feat, CBioseqContext& ctx,
CRef<CSeq_loc_Mapper> slice_mapper,
CConstRef<CFeatureItem> cdsFeatureItem = CConstRef<CFeatureItem>() ) const;
- void x_GetFeatsOnCdsProductIdx(CMappedFeat mf,const CSeq_feat& feat, CBioseqContext& ctx,
+ void x_GetFeatsOnCdsProductIdx(const CSeq_feat& feat, CBioseqContext& ctx,
CRef<CSeq_loc_Mapper> slice_mapper,
CConstRef<CFeatureItem> cdsFeatureItem = CConstRef<CFeatureItem>() ) const;
static void x_GiveOneResidueIntervalsBogusFuzz(CSeq_loc & loc);
CBioseqContext& ctx) const;
void x_GatherFeaturesOnRangeIdx(const CSeq_loc& loc, SAnnotSelector& sel,
CBioseqContext& ctx) const;
- size_t x_GatherFeaturesOnSegmentIdx(const CSeq_loc& loc, SAnnotSelector& sel,
- CBioseqContext& ctx) const;
void x_GatherFeaturesOnRange(const CSeq_loc& loc, SAnnotSelector& sel,
CBioseqContext& ctx) const;
#ifndef OBJTOOLS_FORMAT_ITEMS___FLAT_FEATURE__HPP
#define OBJTOOLS_FORMAT_ITEMS___FLAT_FEATURE__HPP
-/* $Id: feature_item.hpp 604098 2020-03-23 12:19:42Z ivanov $
+/* $Id: feature_item.hpp 615031 2020-08-26 13:38:14Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
void x_AddQualsRegion( CBioseqContext& );
void x_AddQualsProt( CBioseqContext&, bool );
void x_AddQualsPsecStr( CBioseqContext& );
+ void x_AddQualsNonStd(CBioseqContext& ctx );
+
void x_AddQualsHet( CBioseqContext& ctx );
void x_AddQualsVariation( CBioseqContext& ctx );
void x_AddFTableSiteQuals(const CSeqFeatData::TSite& site);
void x_AddFTablePsecStrQuals(const CSeqFeatData::TPsec_str& psec_str);
void x_AddFTablePsecStrQuals(const CSeqFeatData::THet& het);
+ void x_AddFTableNonStdQuals(const CSeqFeatData::TNon_std_residue& res);
void x_AddFTableBiosrcQuals(const CBioSource& src);
void x_AddFTableDbxref(const CSeq_feat::TDbxref& dbxref);
void x_AddFTableExtQuals(const CSeq_feat::TExt& ext);
#ifndef OBJTOOLS_FLAT___FLAT_QUAL_SLOTS__HPP
#define OBJTOOLS_FLAT___FLAT_QUAL_SLOTS__HPP
-/* $Id: flat_qual_slots.hpp 564513 2018-05-29 17:40:10Z kans $
+/* $Id: flat_qual_slots.hpp 613781 2020-08-12 16:42:43Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
eFQ_mol_wt,
eFQ_ncRNA_class,
eFQ_nomenclature,
+ eFQ_non_std_residue,
eFQ_number,
eFQ_old_locus_tag,
eFQ_operon,
#ifndef OBJTOOLS_FORMAT_ITEMS___REFERENCE_ITEM__HPP
#define OBJTOOLS_FORMAT_ITEMS___REFERENCE_ITEM__HPP
-/* $Id: reference_item.hpp 602636 2020-02-27 20:27:11Z kans $
+/* $Id: reference_item.hpp 614619 2020-08-20 13:00:42Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
bool IsSetJournal (void) const { return m_Journal.NotEmpty(); }
const CCit_jour& GetJournal (void) const { return *m_Journal; }
- int GetPMID (void) const { return m_PMID; }
- int GetMUID (void) const { return m_MUID; }
+ TEntrezId GetPMID (void) const { return m_PMID; }
+ TEntrezId GetMUID (void) const { return m_MUID; }
const string& GetDOI (void) const { return m_DOI; }
const string& GetPII (void) const { return m_ELocationPII; }
const string& GetOldPII (void) const { return m_OldPII; }
CConstRef<CCit_jour> m_Journal;
CConstRef<CSeq_loc> m_Loc;
CConstRef<CDate> m_Date;
- int m_PMID;
- int m_MUID;
+ TEntrezId m_PMID;
+ TEntrezId m_MUID;
string m_DOI;
string m_ELocationPII;
string m_OldPII;
const string& CReferenceItem::GetUniqueStr(void) const
{
// supress creation if other identifiers exist.
- if (m_MUID == 0 && m_PMID == 0) {
+ if (m_MUID == ZERO_ENTREZ_ID && m_PMID == ZERO_ENTREZ_ID) {
x_CreateUniqueStr();
}
return m_UniqueStr;
-/* $Id: listener.hpp 600659 2020-01-24 15:26:41Z foleyjp $
+/* $Id: listener.hpp 608330 2020-05-14 16:03:45Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#ifndef _OBJTOOLS_LISTENER_HPP_
#define _OBJTOOLS_LISTENER_HPP_
-#include <corelib/ncbi_message.hpp>
+#include <corelib/ncbistd.hpp>
#include <objtools/logging/message.hpp>
BEGIN_NCBI_SCOPE
-/* $Id: message.hpp 599046 2019-12-18 18:34:26Z ludwigf $
+/* $Id: message.hpp 608330 2020-05-14 16:03:45Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#ifndef _OBJTOOLS_MESSAGE_HPP_
#define _OBJTOOLS_MESSAGE_HPP_
-#include <corelib/ncbi_message.hpp>
+#include <corelib/ncbistd.hpp>
BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)
// ============================================================================
-class NCBI_XOBJUTIL_EXPORT IObjtoolsMessage : public IMessage
+class NCBI_XOBJUTIL_EXPORT IObjtoolsMessage
// ============================================================================
{
public:
virtual ~IObjtoolsMessage(void) = default;
+ virtual IObjtoolsMessage *Clone(void) const = 0;
+
+ virtual void Write(CNcbiOstream& out) const = 0;
+
virtual void Dump(CNcbiOstream& out) const = 0;
virtual void WriteAsXML(CNcbiOstream& out) const = 0;
virtual void DumpAsXML(CNcbiOstream& out) const = 0;
+
+ virtual string GetText(void) const = 0;
+ virtual EDiagSev GetSeverity(void) const = 0;
+ virtual int GetCode(void) const = 0;
+ virtual int GetSubCode(void) const = 0;
};
virtual CObjtoolsMessage *Clone(void) const;
- virtual string Compose(void) const;
+ NCBI_DEPRECATED virtual string Compose(void) const;
virtual void Write(CNcbiOstream& out) const;
#ifndef OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP
#define OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP
-/* $Id: psg_client.hpp 599706 2020-01-06 18:04:04Z sadyrovr $
+/* $Id: psg_client.hpp 612393 2020-07-21 13:51:24Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
// Get request ID
string GetId() const { return x_GetId(); }
+ /// Set hops
+ void SetHops(unsigned hops) { m_Hops = hops; }
+
protected:
CPSG_Request(shared_ptr<void> user_context = {},
CRef<CRequestContext> request_context = {})
shared_ptr<void> m_UserContext;
CRef<CRequestContext> m_RequestContext;
+ unsigned m_Hops = 0;
friend class CPSG_Queue;
};
/// Check whether the queue was stopped/reset and is now empty.
bool IsEmpty() const;
+
+ /// Check whether the queue has been initialized.
+ bool IsInitialized() const { return static_cast<bool>(m_Impl); }
+
+
+ /// Get an API lock.
+ /// Holding this API lock is essential if numerous short-lived queue instances are used.
+ /// It prevents an internal I/O implementation (threads, TCP connections, HTTP sessions, etc)
+ /// from being destroyed (on destroying last remaining queue instance)
+ /// and then re-created (with new queue instance).
+ using TApiLock = shared_ptr<void>;
+ static TApiLock GetApiLock();
+
+
CPSG_Queue(CPSG_Queue&&);
CPSG_Queue& operator=(CPSG_Queue&&);
#ifndef OBJTOOLS_READERS___FASTA__HPP
#define OBJTOOLS_READERS___FASTA__HPP
-/* $Id: fasta.hpp 598690 2019-12-12 14:55:16Z foleyjp $
+/* $Id: fasta.hpp 612524 2020-07-23 11:37:59Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
TSeqPos line_number,
CBioseq& bioseq,
ILineErrorListener* pMessageListener);
+
+ void x_SetDeflineParseInfo(SDefLineParseInfo& info);
+
+ bool m_bModifiedMaxIdLength=false;
+
protected:
struct SGap : public CObject {
enum EKnownSize {
#ifndef FASTA_READER_UTILS_HPP
#define FASTA_READER_UTILS_HPP
-/* $Id: fasta_reader_utils.hpp 599727 2020-01-06 20:18:10Z foleyjp $
+/* $Id: fasta_reader_utils.hpp 612524 2020-07-23 11:37:59Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
struct SDeflineParseInfo {
TBaseFlags fBaseFlags;
TFastaFlags fFastaFlags;
- TSeqPos maxIdLength;
+ TSeqPos maxIdLength=0; // If maxIdLength is zero, the code uses the
+ // default values specified in CSeq_id
TSeqPos lineNumber;
};
- /* $Id: gff2_reader.hpp 603569 2020-03-12 18:23:57Z ivanov $
+ /* $Id: gff2_reader.hpp 610837 2020-06-24 15:29:29Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
//
// new stuff:
//
+ virtual void xGetData(
+ ILineReader&,
+ TReaderData&);
+
+ bool IsInGenbankMode() const;
+
virtual bool xParseStructuredComment(
const string&);
CSeq_feat&,
CSeq_feat&);
+ bool xNeedsNewSeqAnnot(
+ const string&);
+
// data:
//
protected:
- /* $Id: gff3_reader.hpp 607807 2020-05-07 18:58:43Z ivanov $
+ /* $Id: gff3_reader.hpp 610837 2020-06-24 15:29:29Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
ILineReader& lr,
ILineErrorListener* pErrors=nullptr);
- bool IsInGenbankMode() const;
-
protected:
- virtual void xGetData(
- ILineReader&,
- TReaderData&);
-
virtual void xProcessData(
const TReaderData&,
CSeq_annot&);
virtual bool xParseAlignment(
const string& strLine);
- bool xNeedsNewSeqAnnot(
- const string&);
-
// Data:
map<string, string> mCdsParentMap;
map<string, CRef<CSeq_interval> > mMrnaLocs;
- /* $Id: gtf_reader.hpp 598212 2019-12-05 12:32:15Z ludwigf $
+ /* $Id: gtf_reader.hpp 610647 2020-06-22 11:31:17Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
const CGtfAttributes::MultiValue&,
CSeq_feat&);
- bool xCdsIsPartial(
+ NCBI_DEPRECATED bool xCdsIsPartial(
const CGtfReadRecord& );
typedef map< string, CRef< CSeq_feat > > TIdToFeature;
-/* $Id: message_listener.hpp 600664 2020-01-24 15:57:16Z foleyjp $
+/* $Id: message_listener.hpp 608381 2020-05-15 12:43:35Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
virtual ~ILineErrorListener() {}
// IListener::Post() implementation
- virtual void Post(const IMessage& message)
+ NCBI_STD_DEPRECATED("This method is no longer functional and will be removed in SC-25.")
+ virtual void Post(const IMessage& /*message*/)
{
- const ILineError* le = dynamic_cast<const ILineError*>(&message);
- if (!le) return;
- PutError(*le);
+ // Remove in SC-25
+ return;
}
+ NCBI_STD_DEPRECATED("This method is redundant and will be removed in SC-25.")
+ virtual void Push(const IObjtoolsMessage& message)
+ {
+ // Remove in SC-25
+ PutMessage(message);
+ }
/// Store error in the container, and
/// return true if error was stored fine, and
/// return false if the caller should terminate all further processing.
}
// IListener::Get() implementation
- virtual const IMessage& Get(size_t index) const
- { return const_cast<ILineErrorListener*>(this)->GetError(index); }
+ virtual const ILineError& Get(size_t index) const
+ { return this->GetError(index); }
/// 0-based error retrieval.
virtual const ILineError&
- GetError(
- size_t ) =0;
+ GetError(size_t ) const =0;
virtual size_t Count(void) const = 0;
const Uint8 iNumDone = 0,
const Uint8 iNumTotal = 0 ) = 0;
- virtual const IMessage& GetMessage(size_t index) const
+ virtual const ILineError& GetMessage(size_t index) const
{ return Get(index); }
virtual void Clear(void)
const ILineError&
GetError(
- size_t uPos ) {
+ size_t uPos ) const {
return *dynamic_cast<ILineError*>(m_Errors[ uPos ].get()); }
virtual void Dump()
#ifndef SERIAL_GRPC_INTEGRATION___GRPC_INTEGRATION__HPP
#define SERIAL_GRPC_INTEGRATION___GRPC_INTEGRATION__HPP
-/* $Id: grpc_integration.hpp 606576 2020-04-23 17:12:06Z ivanov $
+/* $Id: grpc_integration.hpp 608310 2020-05-14 12:35:38Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/// (in order of priority):
/// - Config file entry "[section] variable"
/// - Environment variables: env_var_name (if not empty/NULL);
-/// then "NCBI_CONFIG__<section>__<name>"; then "grpc_proxy"
+/// then "NCBI_CONFIG__<section>__<name>"; then "GRPC_PROXY"
/// - The hard-coded NCBI default "linkerd:4142"
+/// The value_source (if not null) will get CParamBase::EParamSource value
string g_NCBI_GRPC_GetAddress(const char* section,
const char* variable,
- const char* env_var_name = nullptr);
+ const char* env_var_name = nullptr,
+ int* value_source = nullptr);
END_NCBI_SCOPE
#ifndef SERIAL_GRPC_INTEGRATION_IMPL___GRPC_SUPPORT__HPP
#define SERIAL_GRPC_INTEGRATION_IMPL___GRPC_SUPPORT__HPP
-/* $Id: grpc_support.hpp 607417 2020-05-04 15:40:44Z ivanov $
+/* $Id: grpc_support.hpp 608345 2020-05-14 18:36:54Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <corelib/ncbimtx.hpp>
#include <corelib/request_ctx.hpp>
#include <corelib/request_status.hpp>
-#ifdef HAVE_LIBPROTOBUF
+#ifdef HAVE_LIBGRPC // HAVE_LIBPROTOBUF
# include <google/protobuf/message.h>
# if GOOGLE_PROTOBUF_VERSION >= 3002000
# define NCBI_GRPC_GET_BYTE_SIZE(msg) ((msg).ByteSizeLong())
private:
CDiagContext& m_DiagContext;
CRequestContext& m_RequestContext;
-#ifdef HAVE_LIBPROTOBUF
+#ifdef HAVE_LIBGRPC // HAVE_LIBPROTOBUF
const TMessage& m_Reply;
#endif
bool m_ManagingRequest;
const TMessage& reply)
: m_DiagContext(GetDiagContext()),
m_RequestContext(m_DiagContext.GetRequestContext()),
-#ifdef HAVE_LIBPROTOBUF
+#ifdef HAVE_LIBGRPC // HAVE_LIBPROTOBUF
m_Reply(reply),
#endif
m_ManagingRequest(false)
#ifndef SERIAL___RPCBASE__HPP
#define SERIAL___RPCBASE__HPP
-/* $Id: rpcbase.hpp 588592 2019-06-26 18:52:32Z ucko $
+/* $Id: rpcbase.hpp 615694 2020-09-02 18:14:03Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
protected CConnIniter
{
public:
- CRPCClient(const string& service = kEmptyStr,
- ESerialDataFormat format = eSerial_AsnBinary,
- unsigned int retry_limit = 3)
- : CRPCClient_Base(service, format, retry_limit),
+ CRPCClient(const string& service = kEmptyStr)
+ : CRPCClient_Base(service, eSerial_AsnBinary),
m_Timeout(kDefaultTimeout)
{}
+ CRPCClient(const string& service,
+ ESerialDataFormat format)
+ : CRPCClient_Base(service, format),
+ m_Timeout(kDefaultTimeout)
+ {}
+ CRPCClient(const string& service,
+ ESerialDataFormat format,
+ unsigned int try_limit)
+ : CRPCClient_Base(service, format, try_limit),
+ m_Timeout(kDefaultTimeout)
+ {}
virtual ~CRPCClient(void)
{
if ( !sx_IsSpecial(m_Timeout) ) {
#ifndef SERIAL___RPCBASE_IMPL__HPP
#define SERIAL___RPCBASE_IMPL__HPP
-/* $Id: rpcbase_impl.hpp 588592 2019-06-26 18:52:32Z ucko $
+/* $Id: rpcbase_impl.hpp 615694 2020-09-02 18:14:03Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
{
public:
CRPCClient_Base(const string& service,
- ESerialDataFormat format,
- unsigned int retry_limit);
+ ESerialDataFormat format);
+ CRPCClient_Base(const string& service,
+ ESerialDataFormat format,
+ unsigned int try_limit);
virtual ~CRPCClient_Base(void);
void Connect(void);
ESerialDataFormat GetFormat(void) const { return m_Format; }
void SetFormat(ESerialDataFormat fmt) { m_Format = fmt; }
- unsigned int GetRetryLimit(void) const { return m_RetryLimit; }
- void SetRetryLimit(unsigned int n) { m_RetryLimit = n; }
-
+ /// Get number of request attempts. If not set explicitly through SetTryLimit
+ /// or constructor argument, the following values are used:
+ /// - <upcase_service_name>__RPC_CLIENT__MAX_TRY environment varialbe
+ /// - [service_name.rpc_client] section, max_try value in the INI file
+ /// - 3 (global default)
+ unsigned int GetTryLimit(void) const { return m_TryLimit; }
+ void SetTryLimit(unsigned int n) { m_TryLimit = n > 0 ? n : 3; }
+ /// @deprecated Use GetTryLimit()
+ NCBI_DEPRECATED
+ unsigned int GetRetryLimit(void) const { return GetTryLimit(); }
+ /// @deprecated Use SetTryLimit()
+ NCBI_DEPRECATED
+ void SetRetryLimit(unsigned int n) { SetTryLimit(n); }
+
+ /// Get retry delay. If not set explicitly through SetRetryDelay, the following values are used:
+ /// - <upcase_service_name>__RPC_CLIENT__RETRY_DELAY environment varialbe
+ /// - [service_name.rpc_client] section, retry_delay value in the INI file
+ /// - 0 (global default)
const CTimeSpan GetRetryDelay(void) const { return m_RetryDelay; }
void SetRetryDelay(const CTimeSpan& ts) { m_RetryDelay = ts; }
ESerialDataFormat m_Format;
CMutex m_Mutex; ///< To allow sharing across threads.
CTimeSpan m_RetryDelay;
- unsigned int m_RetryCount;
+ unsigned int m_TryCount;
int m_RecursionCount;
protected:
unique_ptr<CObjectIStream> m_In;
unique_ptr<CObjectOStream> m_Out;
string m_Affinity;
- unsigned int m_RetryLimit;
+ unsigned int m_TryLimit;
CHttpRetryContext m_RetryCtx;
CConstIRef<ICanceled> m_Canceler;
// true. May reset the connection (or do anything else, really),
// but note that Ask() will always automatically reconnect if the
// stream is explicitly bad. (Ask() also takes care of enforcing
- // m_RetryLimit.)
+ // m_TryLimit.)
virtual bool x_ShouldRetry(unsigned int tries);
// Calculate effective retry delay. Returns value from CRetryContext
#ifndef STREAMITER__HPP
#define STREAMITER__HPP
-/* $Id: streamiter.hpp 583376 2019-03-27 18:06:15Z dicuccio $
+/* $Id: streamiter.hpp 609566 2020-06-03 19:29:58Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
case eAllRandom:
done.insert(mi_now);
// no break
- /* FALLTHROUGH */
+ NCBI_FALLTHROUGH;
case eAllSeq:
{
CObjectInfo oi = minfo.GetMember().GetTypeFamily() == eTypeFamilyPointer ?
class reference
{
public:
- reference(bvector<Alloc>& bv, size_type position)
+ reference(bvector<Alloc>& bv, size_type position) BMNOEXCEPT
: bv_(bv),
position_(position)
{}
- reference(const reference& ref)
+ reference(const reference& ref) BMNOEXCEPT
: bv_(ref.bv_),
position_(ref.position_)
{
bv_.set(position_, ref.bv_.get_bit(position_));
}
- operator bool() const
+ operator bool() const BMNOEXCEPT
{
return bv_.get_bit(position_);
}
return *this;
}
- const reference& operator=(bool value) const
+ const reference& operator=(bool value) const BMNOEXCEPT
{
bv_.set(position_, value);
return *this;
}
- bool operator==(const reference& ref) const
+ bool operator==(const reference& ref) const BMNOEXCEPT
{
return bool(*this) == bool(ref);
}
}
/*! Logical Not operator */
- bool operator!() const
+ bool operator!() const BMNOEXCEPT
{
return !bv_.get_bit(position_);
}
/*! Bit Not operator */
- bool operator~() const
+ bool operator~() const BMNOEXCEPT
{
return !bv_.get_bit(position_);
}
{
friend class bvector;
public:
- iterator_base() : bv_(0), position_(bm::id_max), block_(0) {}
+ iterator_base() BMNOEXCEPT
+ : bv_(0), position_(bm::id_max), block_(0), block_type_(0),
+ block_idx_(0)
+ {}
- bool operator==(const iterator_base& it) const
+ bool operator==(const iterator_base& it) const BMNOEXCEPT
{
return (position_ == it.position_) && (bv_ == it.bv_);
}
- bool operator!=(const iterator_base& it) const
+ bool operator!=(const iterator_base& it) const BMNOEXCEPT
{
return ! operator==(it);
}
- bool operator < (const iterator_base& it) const
+ bool operator < (const iterator_base& it) const BMNOEXCEPT
{
return position_ < it.position_;
}
- bool operator <= (const iterator_base& it) const
+ bool operator <= (const iterator_base& it) const BMNOEXCEPT
{
return position_ <= it.position_;
}
- bool operator > (const iterator_base& it) const
+ bool operator > (const iterator_base& it) const BMNOEXCEPT
{
return position_ > it.position_;
}
- bool operator >= (const iterator_base& it) const
+ bool operator >= (const iterator_base& it) const BMNOEXCEPT
{
return position_ >= it.position_;
}
\brief Checks if iterator is still valid. Analog of != 0 comparison for pointers.
\returns true if iterator is valid.
*/
- bool valid() const { return position_ != bm::id_max; }
+ bool valid() const BMNOEXCEPT { return position_ != bm::id_max; }
/**
\fn bool bm::bvector::iterator_base::invalidate()
\brief Turns iterator into an invalid state.
*/
- void invalidate() { position_ = bm::id_max; }
+ void invalidate() BMNOEXCEPT
+ { position_ = bm::id_max; block_type_ = ~0u;}
/** \brief Compare FSMs for testing purposes
\internal
*/
- bool compare_state(const iterator_base& ib) const
+ bool compare_state(const iterator_base& ib) const BMNOEXCEPT
{
if (this->bv_ != ib.bv_) return false;
if (this->position_ != ib.position_) return false;
public:
- /** Information about current bitblock. */
+ /** Bit-block descriptor
+ @internal
+ */
struct bitblock_descr
{
const bm::word_t* ptr; //!< Word pointer.
size_type pos; //!< Last bit position decode before
};
- /** Information about current DGAP block. */
+ /** Information about current DGAP block.
+ @internal
+ */
struct dgap_descr
{
const gap_word_t* ptr; //!< Word pointer.
typedef void pointer;
typedef void reference;
- insert_iterator() : bvect_(0), max_bit_(0) {}
+ insert_iterator() BMNOEXCEPT : bvect_(0), max_bit_(0) {}
- insert_iterator(bvector<Alloc>& bvect)
+ insert_iterator(bvector<Alloc>& bvect) BMNOEXCEPT
: bvect_(&bvect),
max_bit_(bvect.size())
{
typedef void pointer;
typedef void reference;
- bulk_insert_iterator()
+ bulk_insert_iterator() BMNOEXCEPT
: bvect_(0), buf_(0), buf_size_(0), sorted_(BM_UNKNOWN) {}
~bulk_insert_iterator()
bvect_->blockman_.get_allocator().free_bit_block((bm::word_t*)buf_);
}
- bulk_insert_iterator(bvector<Alloc>& bvect, bm::sort_order so = BM_UNKNOWN)
+ bulk_insert_iterator(bvector<Alloc>& bvect,
+ bm::sort_order so = BM_UNKNOWN) BMNOEXCEPT
: bvect_(&bvect), sorted_(so)
{
bvect_->init();
sorted_ = BM_UNKNOWN;
}
- bulk_insert_iterator(bulk_insert_iterator&& iit) BMNOEXEPT
+ bulk_insert_iterator(bulk_insert_iterator&& iit) BMNOEXCEPT
: bvect_(iit.bvect_)
{
buf_ = iit.buf_; iit.buf_ = 0;
return *this;
}
- bulk_insert_iterator& operator=(bulk_insert_iterator&& ii) BMNOEXEPT
+ bulk_insert_iterator& operator=(bulk_insert_iterator&& ii) BMNOEXCEPT
{
bvect_ = ii.bvect_;
if (buf_)
bvect_->sync_size();
}
- bvector_type* get_bvector() const { return bvect_; }
+ bvector_type* get_bvector() const BMNOEXCEPT { return bvect_; }
protected:
static
- size_type buf_size_max()
+ size_type buf_size_max() BMNOEXCEPT
{
#ifdef BM64ADDR
return bm::set_block_size / 2;
typedef unsigned& reference;
public:
- enumerator() : iterator_base()
+ enumerator() BMNOEXCEPT : iterator_base()
{}
/*! @brief Construct enumerator associated with a vector.
This construction creates unpositioned iterator with status
valid() == false. It can be re-positioned using go_first() or go_to()
*/
- enumerator(const bvector<Alloc>* bv)
+ enumerator(const bvector<Alloc>* bv) BMNOEXCEPT
: iterator_base()
{
this->bv_ = const_cast<bvector<Alloc>*>(bv);
}
+ /*! @brief Construct enumerator for bit vector
+ @param bv bit-vector reference
+ @param pos bit position in the vector
+ if position is 0, it finds the next 1 or becomes not valid
+ (en.valid() == false)
+ */
+ enumerator(const bvector<Alloc>& bv, size_type pos = 0) BMNOEXCEPT
+ : iterator_base()
+ {
+ this->bv_ = const_cast<bvector<Alloc>*>(&bv);
+ go_to(pos);
+ }
+
+
/*! @brief Construct enumerator for bit vector
@param bv bit-vector pointer
@param pos bit position in the vector
if position is 0, it finds the next 1 or becomes not valid
(en.valid() == false)
*/
- enumerator(const bvector<Alloc>* bv, size_type pos)
+ enumerator(const bvector<Alloc>* bv, size_type pos) BMNOEXCEPT
: iterator_base()
{
this->bv_ = const_cast<bvector<Alloc>*>(bv);
}
/*! \brief Get current position (value) */
- size_type operator*() const { return this->position_; }
+ size_type operator*() const BMNOEXCEPT { return this->position_; }
/*! \brief Get current position (value) */
- size_type value() const { return this->position_; }
+ size_type value() const BMNOEXCEPT { return this->position_; }
/*! \brief Advance enumerator forward to the next available bit */
- enumerator& operator++() { return this->go_up(); }
+ enumerator& operator++() BMNOEXCEPT { this->go_up(); return *this; }
/*! \brief Advance enumerator forward to the next available bit.
Possibly do NOT use this operator it is slower than the pre-fix increment.
*/
- enumerator operator++(int)
+ enumerator operator++(int) BMNOEXCEPT
{
enumerator tmp = *this;
this->go_up();
return tmp;
}
-
/*! \brief Position enumerator to the first available bit */
- void go_first()
- {
- BM_ASSERT(this->bv_);
-
- blocks_manager_type* bman = &(this->bv_->blockman_);
- if (!bman->is_init())
- {
- this->invalidate();
- return;
- }
-
- bm::word_t*** blk_root = bman->top_blocks_root();
-
- this->block_idx_ = this->position_= 0;
- unsigned i, j;
-
- for (i = 0; i < bman->top_block_size(); ++i)
- {
- bm::word_t** blk_blk = blk_root[i];
-
- if (blk_blk == 0) // not allocated
- {
- this->block_idx_ += bm::set_sub_array_size;
- this->position_ += bm::bits_in_array;
- continue;
- }
-
- if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
- blk_blk = FULL_SUB_BLOCK_REAL_ADDR;
-
- for (j = 0; j < bm::set_sub_array_size; ++j,++(this->block_idx_))
- {
- this->block_ = blk_blk[j];
-
- if (this->block_ == 0)
- {
- this->position_ += bits_in_block;
- continue;
- }
-
- if (BM_IS_GAP(this->block_))
- {
- this->block_type_ = 1;
- if (search_in_gapblock())
- {
- return;
- }
- }
- else
- {
- if (this->block_ == FULL_BLOCK_FAKE_ADDR)
- this->block_ = FULL_BLOCK_REAL_ADDR;
-
- this->block_type_ = 0;
- if (search_in_bitblock())
- {
- return;
- }
- }
-
- } // for j
-
- } // for i
-
- this->invalidate();
- }
-
- /// advance iterator forward by one
- void advance() { this->go_up(); }
+ void go_first() BMNOEXCEPT;
+ /*! advance iterator forward by one
+ @return true if advance was successfull and the enumerator is valid
+ */
+ bool advance() BMNOEXCEPT { return this->go_up(); }
/*! \brief Advance enumerator to the next available bit */
- enumerator& go_up()
- {
- BM_ASSERT(this->valid());
- BM_ASSERT_THROW(this->valid(), BM_ERR_RANGE);
+ bool go_up() BMNOEXCEPT;
- // Current block search.
- //
-
- block_descr_type* bdescr = &(this->bdescr_);
- switch (this->block_type_)
- {
- case 0: // BitBlock
- {
- // check if we can get the value from the bits traversal cache
- unsigned short idx = ++(bdescr->bit_.idx);
- if (idx < bdescr->bit_.cnt)
- {
- this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx];
- return *this;
- }
- this->position_ +=
- (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx];
-
- bdescr->bit_.ptr += bm::set_bitscan_wave_size;
- if (decode_bit_group(bdescr))
- {
- return *this;
- }
- }
- break;
- case 1: // DGAP Block
- {
- ++this->position_;
- if (--(bdescr->gap_.gap_len))
- {
- return *this;
- }
-
- // next gap is "OFF" by definition.
- if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
- {
- break;
- }
- gap_word_t prev = *(bdescr->gap_.ptr);
- unsigned int val = *(++(bdescr->gap_.ptr));
-
- this->position_ += val - prev;
- // next gap is now "ON"
- if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
- {
- break;
- }
- prev = *(bdescr->gap_.ptr);
- val = *(++(bdescr->gap_.ptr));
- bdescr->gap_.gap_len = (gap_word_t)(val - prev);
- return *this; // next "ON" found;
- }
- default:
- BM_ASSERT(0);
-
- } // switch
-
- if (search_in_blocks())
- return *this;
-
- this->invalidate();
- return *this;
- }
-
/*!
@brief Skip to specified relative rank
- @param rank - number of ON bits to go for
+ @param rank - number of ON bits to go for (must be: > 0)
+ @return true if skip was successfull and enumerator is valid
*/
- enumerator& skip_to_rank(size_type rank)
+ bool skip_to_rank(size_type rank) BMNOEXCEPT
{
+ BM_ASSERT(rank);
--rank;
if (!rank)
- return *this;
+ return this->valid();
return skip(rank);
}
/*!
@brief Skip specified number of bits from enumeration
@param rank - number of ON bits to skip
+ @return true if skip was successfull and enumerator is valid
*/
- enumerator& skip(size_type rank)
- {
- if (!this->valid() || !rank)
- return *this;
- for (; rank; --rank)
- {
- block_descr_type* bdescr = &(this->bdescr_);
- switch (this->block_type_)
- {
- case 0: // BitBlock
- for (; rank; --rank)
- {
- unsigned short idx = ++(bdescr->bit_.idx);
- if (idx < bdescr->bit_.cnt)
- {
- this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx];
- continue;
- }
- this->position_ +=
- (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx];
- bdescr->bit_.ptr += bm::set_bitscan_wave_size;
-
- if (!decode_bit_group(bdescr, rank))
- break;
- } // for rank
- break;
- case 1: // DGAP Block
- for (; rank; --rank) // TODO: better skip logic
- {
- ++this->position_;
- if (--(bdescr->gap_.gap_len))
- {
- continue;
- }
-
- // next gap is "OFF" by definition.
- if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
- {
- break;
- }
- gap_word_t prev = *(bdescr->gap_.ptr);
- unsigned int val = *(++(bdescr->gap_.ptr));
-
- this->position_ += val - prev;
- // next gap is now "ON"
- if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
- {
- break;
- }
- prev = *(bdescr->gap_.ptr);
- val = *(++(bdescr->gap_.ptr));
- bdescr->gap_.gap_len = (gap_word_t)(val - prev);
- } // for rank
- break;
- default:
- BM_ASSERT(0);
- } // switch
-
- if (!rank)
- return *this;
+ bool skip(size_type rank) BMNOEXCEPT;
- if (!search_in_blocks())
- {
- this->invalidate();
- return *this;
- }
- } // for rank
- return *this;
- }
-
/*!
@brief go to a specific position in the bit-vector (or next)
*/
- enumerator& go_to(size_type pos)
- {
- if (pos == 0)
- {
- go_first();
- return *this;
- }
-
- size_type new_pos = this->bv_->check_or_next(pos); // find the true pos
- if (new_pos == 0) // no bits available
- {
- this->invalidate();
- return *this;
- }
- BM_ASSERT(new_pos >= pos);
- pos = new_pos;
-
-
- this->position_ = pos;
- size_type nb = this->block_idx_ = (pos >> bm::set_block_shift);
- bm::bvector<Alloc>::blocks_manager_type& bman =
- this->bv_->get_blocks_manager();
- unsigned i0, j0;
- bm::get_block_coord(nb, i0, j0);
- this->block_ = bman.get_block(i0, j0);
-
- BM_ASSERT(this->block_);
-
- this->block_type_ = (bool)BM_IS_GAP(this->block_);
-
- block_descr_type* bdescr = &(this->bdescr_);
- unsigned nbit = unsigned(pos & bm::set_block_mask);
-
- if (this->block_type_) // gap
- {
- this->position_ = nb * bm::set_block_size * 32;
- search_in_gapblock();
-
- if (this->position_ == pos)
- return *this;
- this->position_ = pos;
-
- gap_word_t* gptr = BMGAP_PTR(this->block_);
- unsigned is_set;
- unsigned gpos = bm::gap_bfind(gptr, nbit, &is_set);
- BM_ASSERT(is_set);
-
- bdescr->gap_.ptr = gptr + gpos;
- if (gpos == 1)
- {
- bdescr->gap_.gap_len = bm::gap_word_t(gptr[gpos] - (nbit - 1));
- }
- else
- {
- bm::gap_word_t interval = bm::gap_word_t(gptr[gpos] - gptr[gpos - 1]);
- bm::gap_word_t interval2 = bm::gap_word_t(nbit - gptr[gpos - 1]);
- bdescr->gap_.gap_len = bm::gap_word_t(interval - interval2 + 1);
- }
- }
- else // bit
- {
- if (nbit == 0)
- {
- search_in_bitblock();
- return *this;
- }
-
- unsigned nword = unsigned(nbit >> bm::set_word_shift);
-
- // check if we need to step back to match the wave
- unsigned parity = nword % bm::set_bitscan_wave_size;
- bdescr->bit_.ptr = this->block_ + (nword - parity);
- bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits);
- BM_ASSERT(bdescr->bit_.cnt);
- bdescr->bit_.pos = (nb * bm::set_block_size * 32) + ((nword - parity) * 32);
- bdescr->bit_.idx = 0;
- nbit &= bm::set_word_mask;
- nbit += 32 * parity;
- for (unsigned i = 0; i < bdescr->bit_.cnt; ++i)
- {
- if (bdescr->bit_.bits[i] == nbit)
- return *this;
- bdescr->bit_.idx++;
- } // for
- BM_ASSERT(0);
- }
- return *this;
- }
-
+ bool go_to(size_type pos) BMNOEXCEPT;
private:
typedef typename iterator_base::block_descr block_descr_type;
- bool decode_wave(block_descr_type* bdescr)
- {
- bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits);
- if (bdescr->bit_.cnt) // found
- {
- bdescr->bit_.idx ^= bdescr->bit_.idx; // = 0;
- bdescr->bit_.pos = this->position_;
- this->position_ += bdescr->bit_.bits[0];
- return true;
- }
- return false;
- }
-
- bool decode_bit_group(block_descr_type* bdescr)
- {
- const word_t* block_end = this->block_ + bm::set_block_size;
- for (; bdescr->bit_.ptr < block_end;)
- {
- if (decode_wave(bdescr))
- return true;
- this->position_ += bm::set_bitscan_wave_size * 32; // wave size
- bdescr->bit_.ptr += bm::set_bitscan_wave_size;
- } // for
- return false;
- }
-
- bool decode_bit_group(block_descr_type* bdescr, size_type& rank)
- {
- const word_t* block_end = this->block_ + bm::set_block_size;
-
- for (; bdescr->bit_.ptr < block_end;)
- {
- const bm::id64_t* w64_p = (bm::id64_t*)bdescr->bit_.ptr;
- bm::id64_t w64 = *w64_p;
- unsigned cnt = bm::word_bitcount64(w64);
- if (rank > cnt)
- {
- rank -= cnt;
- }
- else
- {
- if (decode_wave(bdescr))
- return true;
- }
- this->position_ += bm::set_bitscan_wave_size * 32; // wave size
- bdescr->bit_.ptr += bm::set_bitscan_wave_size;
- } // for
- return false;
- }
-
- bool search_in_bitblock()
- {
- BM_ASSERT(this->block_type_ == 0);
-
- block_descr_type* bdescr = &(this->bdescr_);
- bdescr->bit_.ptr = this->block_;
-
- return decode_bit_group(bdescr);
- }
-
- bool search_in_gapblock()
- {
- BM_ASSERT(this->block_type_ == 1);
-
- block_descr_type* bdescr = &(this->bdescr_);
- bdescr->gap_.ptr = BMGAP_PTR(this->block_);
- unsigned bitval = *(bdescr->gap_.ptr) & 1;
-
- ++(bdescr->gap_.ptr);
-
- for (;true;)
- {
- unsigned val = *(bdescr->gap_.ptr);
- if (bitval)
- {
- gap_word_t* first = BMGAP_PTR(this->block_) + 1;
- if (bdescr->gap_.ptr == first)
- {
- bdescr->gap_.gap_len = (gap_word_t)(val + 1);
- }
- else
- {
- bdescr->gap_.gap_len =
- (gap_word_t)(val - *(bdescr->gap_.ptr-1));
- }
- return true;
- }
- this->position_ += val + 1;
- if (val == bm::gap_max_bits - 1)
- break;
- bitval ^= 1;
- ++(bdescr->gap_.ptr);
- }
- return false;
- }
-
- bool search_in_blocks()
- {
- ++(this->block_idx_);
- const blocks_manager_type& bman = this->bv_->blockman_;
- block_idx_type i = this->block_idx_ >> bm::set_array_shift;
- block_idx_type top_block_size = bman.top_block_size();
- bm::word_t*** blk_root = bman.top_blocks_root();
- for (; i < top_block_size; ++i)
- {
- bm::word_t** blk_blk = blk_root[i];
- if (blk_blk == 0)
- {
- // fast scan fwd in top level
- size_type bn = this->block_idx_ + bm::set_sub_array_size;
- size_type pos = this->position_ + bm::bits_in_array;
- for (++i; i < top_block_size; ++i)
- {
- if (blk_root[i])
- break;
- bn += bm::set_sub_array_size;
- pos += bm::bits_in_array;
- } // for i
- this->block_idx_ = bn;
- this->position_ = pos;
- if ((i < top_block_size) && blk_root[i])
- --i;
- continue;
- }
- if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
- blk_blk = FULL_SUB_BLOCK_REAL_ADDR;
-
- block_idx_type j = this->block_idx_ & bm::set_array_mask;
-
- for(; j < bm::set_sub_array_size; ++j, ++(this->block_idx_))
- {
- this->block_ = blk_blk[j];
-
- if (this->block_ == 0)
- {
- this->position_ += bm::bits_in_block;
- continue;
- }
+ static bool decode_wave(block_descr_type* bdescr) BMNOEXCEPT;
+ bool decode_bit_group(block_descr_type* bdescr) BMNOEXCEPT;
+ bool decode_bit_group(block_descr_type* bdescr,
+ size_type& rank) BMNOEXCEPT;
+ bool search_in_bitblock() BMNOEXCEPT;
+ bool search_in_gapblock() BMNOEXCEPT;
+ bool search_in_blocks() BMNOEXCEPT;
- this->block_type_ = BM_IS_GAP(this->block_);
- if (this->block_type_)
- {
- if (search_in_gapblock())
- return true;
- }
- else
- {
- if (this->block_ == FULL_BLOCK_FAKE_ADDR)
- this->block_ = FULL_BLOCK_REAL_ADDR;
- if (search_in_bitblock())
- return true;
- }
- } // for j
- } // for i
- return false;
- }
};
/*!
#ifndef BM_NO_STL
typedef std::input_iterator_tag iterator_category;
#endif
- counted_enumerator() : bit_count_(0){}
+ counted_enumerator() BMNOEXCEPT : bit_count_(0){}
- counted_enumerator(const enumerator& en) : enumerator(en)
+ counted_enumerator(const enumerator& en) BMNOEXCEPT : enumerator(en)
{
- if (this->valid())
- bit_count_ = 1;
+ bit_count_ = this->valid(); // 0 || 1
}
- counted_enumerator& operator=(const enumerator& en)
+ counted_enumerator& operator=(const enumerator& en) BMNOEXCEPT
{
enumerator* me = this;
*me = en;
return *this;
}
- counted_enumerator& operator++()
+ counted_enumerator& operator++() BMNOEXCEPT
{
this->go_up();
- if (this->valid())
- ++(this->bit_count_);
+ this->bit_count_ += this->valid();
return *this;
}
{
counted_enumerator tmp(*this);
this->go_up();
- if (this->valid())
- ++bit_count_;
+ this->bit_count_ += this->valid();
return tmp;
}
Method returns number of ON bits fromn the bit 0 to the current bit
For the first bit in bitvector it is 1, for the second 2
*/
- size_type count() const { return bit_count_; }
+ size_type count() const BMNOEXCEPT { return bit_count_; }
private:
/*! Function closed for usage */
counted_enumerator& go_to(size_type pos);
class mem_pool_guard
{
public:
- mem_pool_guard() : bv_(0)
+ mem_pool_guard() BMNOEXCEPT : bv_(0)
{}
- mem_pool_guard(allocator_pool_type& pool, bvector<Alloc>& bv)
+ mem_pool_guard(allocator_pool_type& pool, bvector<Alloc>& bv) BMNOEXCEPT
: bv_(&bv)
{
bv.set_allocator_pool(&pool);
}
/// check if vector has no assigned allocator and set one
- void assign_if_not_set(allocator_pool_type& pool, bvector<Alloc>& bv)
+ void assign_if_not_set(allocator_pool_type& pool,
+ bvector<Alloc>& bv) BMNOEXCEPT
{
- if (bv.get_allocator_pool() == 0) // alloc pool not set yet
+ if (!bv.get_allocator_pool()) // alloc pool not set yet
{
BM_ASSERT(!bv_);
bv_ = &bv;
- bv.set_allocator_pool(&pool);
+ bv_->set_allocator_pool(&pool);
}
}
const gap_word_t* glevel_len;
allocation_policy(bm::strategy s=BM_BIT,
- const gap_word_t* glevels = bm::gap_len_table<true>::_len)
+ const gap_word_t* glevels = bm::gap_len_table<true>::_len) BMNOEXCEPT
: strat(s), glevel_len(glevels)
{}
};
}
- ~bvector() BMNOEXEPT {}
+ ~bvector() BMNOEXCEPT {}
/*!
\brief Explicit post-construction initialization
*/
/*!
\brief Move constructor
*/
- bvector(bvector<Alloc>&& bvect) BMNOEXEPT
+ bvector(bvector<Alloc>&& bvect) BMNOEXCEPT
{
blockman_.move_from(bvect.blockman_);
size_ = bvect.size_;
/*!
\brief Move assignment operator
*/
- bvector& operator=(bvector<Alloc>&& bvect) BMNOEXEPT
+ bvector& operator=(bvector<Alloc>&& bvect) BMNOEXCEPT
{
this->move_from(bvect);
return *this;
/*!
\brief Move bvector content from another bvector
*/
- void move_from(bvector<Alloc>& bvect) BMNOEXEPT;
+ void move_from(bvector<Alloc>& bvect) BMNOEXCEPT;
/*! \brief Exchanges content of bv and this bvector.
*/
- void swap(bvector<Alloc>& bvect) BMNOEXEPT;
+ void swap(bvector<Alloc>& bvect) BMNOEXCEPT;
/*! \brief Merge/move content from another vector
return reference(*this, n);
}
- bool operator[](size_type n) const
+ bool operator[](size_type n) const BMNOEXCEPT
{
BM_ASSERT(n < size_);
return get_bit(n);
bool operator <= (const bvector<Alloc>& bv) const { return compare(bv)<=0; }
bool operator > (const bvector<Alloc>& bv) const { return compare(bv)>0; }
bool operator >= (const bvector<Alloc>& bv) const { return compare(bv) >= 0; }
- bool operator == (const bvector<Alloc>& bv) const { return equal(bv); }
- bool operator != (const bvector<Alloc>& bv) const { return !equal(bv); }
+ bool operator == (const bvector<Alloc>& bv) const BMNOEXCEPT { return equal(bv); }
+ bool operator != (const bvector<Alloc>& bv) const BMNOEXCEPT { return !equal(bv); }
bvector<Alloc> operator~() const { return bvector<Alloc>(*this).invert(); }
Alloc get_allocator() const
- {
- return blockman_.get_allocator();
- }
+ { return blockman_.get_allocator(); }
- /// Set allocator pool for local (non-threaded)
+ /// Set allocator pool for local (non-th readed)
/// memory cyclic(lots of alloc-free ops) opertations
///
- void set_allocator_pool(allocator_pool_type* pool_ptr)
+ void set_allocator_pool(allocator_pool_type* pool_ptr) BMNOEXCEPT
{ blockman_.get_allocator().set_pool(pool_ptr); }
/// Get curent allocator pool (if set)
/// @return pointer to the current pool or NULL
- allocator_pool_type* get_allocator_pool()
+ allocator_pool_type* get_allocator_pool() BMNOEXCEPT
{ return blockman_.get_allocator().get_pool(); }
// --------------------------------------------------------------------
*/
void set_bit_no_check(size_type n);
+ /**
+ \brief Set specified bit without checking preconditions (size, etc)
+ */
+ bool set_bit_no_check(size_type n, bool val);
/*!
\brief Sets all bits in the specified closed interval [left,right]
@sa set_range
*/
void clear_range(size_type left, size_type right)
- {
- set_range(left, right, false);
- }
+ { set_range(left, right, false); }
/*!
\param free_mem if "true" (default) bvector frees the memory,
otherwise sets blocks to 0.
*/
- void clear(bool free_mem = false)
- {
- blockman_.set_all_zero(free_mem);
- }
+ void clear(bool free_mem = false) { blockman_.set_all_zero(free_mem); }
/*!
\brief Clears every bit in the bitvector.
\return *this;
*/
- bvector<Alloc>& reset()
- {
- clear(true);
- return *this;
- }
+ bvector<Alloc>& reset() { clear(true); return *this; }
/*!
\brief Flips bit n
//size_type capacity() const { return blockman_.capacity(); }
/*! \brief return current size of the vector (bits) */
- size_type size() const { return size_; }
+ size_type size() const BMNOEXCEPT { return size_; }
/*!
\brief Change size of the bvector
//@}
// --------------------------------------------------------------------
- /*! @name Population counting and ranking methods
+ /*! @name Population counting, ranks, ranges and intervals
*/
//@{
/*!
\brief population cout (count of ON bits)
- \return Total number of bits ON.
+ \sa count_range
+ \return Total number of bits ON
*/
- size_type count() const;
+ size_type count() const BMNOEXCEPT;
/*! \brief Computes bitcount values for all bvector blocks
\param arr - pointer on array of block bit counts
This number +1 gives you number of arr elements initialized during the
function call.
*/
- block_idx_type count_blocks(unsigned* arr) const;
-
+ block_idx_type count_blocks(unsigned* arr) const BMNOEXCEPT;
+
+
/*!
\brief Returns count of 1 bits in the given range [left..right]
Uses rank-select index to accelerate the search
*/
size_type count_range(size_type left,
size_type right,
- const rs_index_type& rs_idx) const;
+ const rs_index_type& rs_idx) const BMNOEXCEPT;
/*!
\brief Returns count of 1 bits in the given range [left..right]
\return population count in the diapason
*/
- size_type count_range(size_type left,
- size_type right) const;
+ size_type count_range(size_type left, size_type right) const BMNOEXCEPT;
+
+ /*!
+ \brief Returns true if all bits in the range are 1s (saturated interval)
+ Function uses closed interval [left, right]
+
+ \param left - index of first bit start checking
+ \param right - index of last bit
+
+ \return true if all bits are 1, false otherwise
+ @sa any_range, count_range
+ */
+ bool is_all_one_range(size_type left, size_type right) const BMNOEXCEPT;
+
+ /*!
+ \brief Returns true if any bits in the range are 1s (non-empty interval)
+ Function uses closed interval [left, right]
+
+ \param left - index of first bit start checking
+ \param right - index of last bit
+
+ \return true if at least 1 bits is set
+ @sa is_all_one_range, count_range
+ */
+ bool any_range(size_type left, size_type right) const BMNOEXCEPT;
-
/*! \brief compute running total of all blocks in bit vector (rank-select index)
\param rs_idx - [out] pointer to index / count structure
should be prepared using build_rs_index
\return population count in the range [0..n]
\sa build_rs_index
- \sa count_to_test, select, rank
+ \sa count_to_test, select, rank, rank_corrected
*/
- size_type count_to(size_type n, const rs_index_type& rs_idx) const;
+ size_type count_to(size_type n,
+ const rs_index_type& rs_idx) const BMNOEXCEPT;
/*!
- \brief Returns rank of specified bit position
+ \brief Returns rank of specified bit position (same as count_to())
\param n - index of bit to rank
\param rs_idx - rank-select index
\return population count in the range [0..n]
\sa build_rs_index
- \sa count_to_test, select, rank
+ \sa count_to_test, select, rank, rank_corrected
*/
- size_type rank(size_type n, const rs_index_type& rs_idx) const
- { return count_to(n, rs_idx); }
+ size_type rank(size_type n,
+ const rs_index_type& rs_idx) const BMNOEXCEPT
+ { return count_to(n, rs_idx); }
+ /*!
+ \brief Returns rank corrceted by the requested border value (as -1)
+
+ This is rank function (bit-count) minus value of bit 'n'
+ if bit-n is true function returns rank()-1 if false returns rank()
+ faster than rank() + test().
+
+
+ \param n - index of bit to rank
+ \param rs_idx - rank-select index
+ \return population count in the range [0..n] corrected as -1 by the value of n
+ \sa build_rs_index
+ \sa count_to_test, select, rank
+ */
+ size_type rank_corrected(size_type n,
+ const rs_index_type& rs_idx) const BMNOEXCEPT;
/*!
\brief popcount in [0..right] range if test(right) == true
plus count_to()
\param n - index of bit to test and rank
- \param blocks_cnt - block count structure to accelerate search
- should be prepared using running_count_blocks
+ \param rs_idx - rank-select index
+ (block count structure to accelerate search)
+ should be prepared using build_rs_index()
\return population count in the diapason or 0 if right bit test failed
\sa build_rs_index
\sa count_to
*/
- size_type count_to_test(size_type n, const rs_index_type& blocks_cnt) const;
+ size_type
+ count_to_test(size_type n,
+ const rs_index_type& rs_idx) const BMNOEXCEPT;
/*! Recalculate bitcount (deprecated)
*/
- size_type recalc_count() { return count(); }
+ size_type recalc_count() BMNOEXCEPT { return count(); }
/*!
Disables count cache. (deprecated).
*/
- void forget_count() {}
+ void forget_count() BMNOEXCEPT {}
//@}
\param n - Index of the bit to check.
\return Bit value (1 or 0)
*/
- bool get_bit(size_type n) const;
+ bool get_bit(size_type n) const BMNOEXCEPT;
/*!
\brief returns true if bit n is set and false is bit n is 0.
\param n - Index of the bit to check.
\return Bit value (1 or 0)
*/
- bool test(size_type n) const { return get_bit(n); }
+ bool test(size_type n) const BMNOEXCEPT { return get_bit(n); }
//@}
\brief Returns true if any bits in this bitset are set, and otherwise returns false.
\return true if any bit is set
*/
- bool any() const;
+ bool any() const BMNOEXCEPT;
/*!
\brief Returns true if no bits are set, otherwise returns false.
*/
- bool none() const { return !any(); }
+ bool none() const BMNOEXCEPT { return !any(); }
//@}
// --------------------------------------------------------------------
/*!
\fn bool bvector::find(bm::id_t& pos) const
\brief Finds index of first 1 bit
- \param pos - index of the found 1 bit
+ \param pos - [out] index of the found 1 bit
\return true if search returned result
\sa get_first, get_next, extract_next, find_reverse, find_first_mismatch
*/
- bool find(size_type& pos) const;
+ bool find(size_type& pos) const BMNOEXCEPT;
/*!
\fn bool bvector::find(bm::id_t from, bm::id_t& pos) const
- \brief Finds index of 1 bit starting from position
+ \brief Find index of 1 bit starting from position
\param from - position to start search from
- \param pos - index of the found 1 bit
+ \param pos - [out] index of the found 1 bit
\return true if search returned result
\sa get_first, get_next, extract_next, find_reverse, find_first_mismatch
*/
- bool find(size_type from, size_type& pos) const;
+ bool find(size_type from, size_type& pos) const BMNOEXCEPT;
+
/*!
\fn bm::id_t bvector::get_first() const
\return Index of the first 1 bit, may return 0
\sa get_next, find, extract_next, find_reverse
*/
- size_type get_first() const { return check_or_next(0); }
+ size_type get_first() const BMNOEXCEPT { return check_or_next(0); }
/*!
\fn bm::id_t bvector::get_next(bm::id_t prev) const
\return Index of the next bit which is ON or 0 if not found.
\sa get_first, find, extract_next, find_reverse
*/
- size_type get_next(size_type prev) const
+ size_type get_next(size_type prev) const BMNOEXCEPT
{ return (++prev == bm::id_max) ? 0 : check_or_next(prev); }
/*!
\return true if search returned result
\sa get_first, get_next, extract_next, find, find_first_mismatch
*/
- bool find_reverse(size_type& pos) const;
+ bool find_reverse(size_type& pos) const BMNOEXCEPT;
/*!
\brief Finds dynamic range of bit-vector [first, last]
\return true if search returned result
\sa get_first, get_next, extract_next, find, find_reverse
*/
- bool find_range(size_type& first, size_type& last) const;
+ bool find_range(size_type& first, size_type& last) const BMNOEXCEPT;
/*!
\brief Find bit-vector position for the specified rank(bitcount)
\return true if requested rank was found
*/
- bool find_rank(size_type rank, size_type from, size_type& pos) const;
+ bool find_rank(size_type rank, size_type from,
+ size_type& pos) const BMNOEXCEPT;
/*!
\brief Find bit-vector position for the specified rank(bitcount)
\return true if requested rank was found
*/
bool find_rank(size_type rank, size_type from, size_type& pos,
- const rs_index_type& rs_idx) const;
+ const rs_index_type& rs_idx) const BMNOEXCEPT;
/*!
\brief select bit-vector position for the specified rank(bitcount)
\return true if requested rank was found
*/
- bool select(size_type rank, size_type& pos, const rs_index_type& rs_idx) const;
+ bool select(size_type rank, size_type& pos,
+ const rs_index_type& rs_idx) const BMNOEXCEPT;
//@}
@sa statistics
*/
- void calc_stat(struct bm::bvector<Alloc>::statistics* st) const;
+ void calc_stat(struct bm::bvector<Alloc>::statistics* st) const BMNOEXCEPT;
/*!
\brief Sets new blocks allocation strategy.
1 - Blocks mutation mode (adaptive algorithm)
\sa set_new_blocks_strat
*/
- strategy get_new_blocks_strat() const { return new_blocks_strat_; }
+ strategy get_new_blocks_strat() const BMNOEXCEPT
+ { return new_blocks_strat_; }
/*!
\brief Optimize memory bitvector's memory allocation.
Return true if bvector is initialized at all
@internal
*/
- bool is_init() const { return blockman_.is_init(); }
+ bool is_init() const BMNOEXCEPT { return blockman_.is_init(); }
//@}
@return 0 if this == arg, -1 if this < arg, 1 if this > arg
@sa find_first_mismatch
*/
- int compare(const bvector<Alloc>& bvect) const;
+ int compare(const bvector<Alloc>& bvect) const BMNOEXCEPT;
/*!
\brief Equal comparison with an agr bit-vector
@return true if vectors are identical
*/
- bool equal(const bvector<Alloc>& bvect) const
+ bool equal(const bvector<Alloc>& bvect) const BMNOEXCEPT
{
size_type pos;
bool found = find_first_mismatch(bvect, pos);
bool find_first_mismatch(const bvector<Alloc>& bvect,
size_type& pos,
size_type search_to = bm::id_max
- ) const;
+ ) const BMNOEXCEPT;
//@}
Use only if you are BitMagic library
@internal
*/
- const blocks_manager_type& get_blocks_manager() const { return blockman_; }
+ const blocks_manager_type& get_blocks_manager() const BMNOEXCEPT
+ { return blockman_; }
/**
\brief get access to memory manager (internal)
Use only if you are BitMagic library
@internal
*/
- blocks_manager_type& get_blocks_manager() { return blockman_; }
+ blocks_manager_type& get_blocks_manager() BMNOEXCEPT
+ { return blockman_; }
//@}
private:
- size_type check_or_next(size_type prev) const;
+ size_type check_or_next(size_type prev) const BMNOEXCEPT;
- /// set bit in GAP block withlength extension control
+ /// set bit in GAP block with GAP block length control
bool gap_block_set(bm::gap_word_t* gap_blk,
bool val, block_idx_type nblock, unsigned nbit);
-
+
+ /// set bit in GAP block with GAP block length control
+ void gap_block_set_no_ret(bm::gap_word_t* gap_blk,
+ bool val, block_idx_type nblock,
+ unsigned nbit);
+
/// check if specified bit is 1, and set it to 0
/// if specified bit is 0, scan for the next 1 and returns it
/// if no 1 found returns 0
size_type check_or_next_extract(size_type prev);
- /**
- \brief Set specified bit without checking preconditions (size, etc)
- */
- bool set_bit_no_check(size_type n, bool val);
/**
\brief AND specified bit without checking preconditions (size, etc)
size_type block_count_to(const bm::word_t* block,
block_idx_type nb,
unsigned nbit_right,
- const rs_index_type& blocks_cnt);
+ const rs_index_type& blocks_cnt) BMNOEXCEPT;
/**
Return value of first bit in the block
*/
- bool test_first_block_bit(block_idx_type nb) const;
+ bool test_first_block_bit(block_idx_type nb) const BMNOEXCEPT;
private:
blocks_manager_type blockman_; //!< bitblocks manager
// -----------------------------------------------------------------------
template<typename Alloc>
-void bvector<Alloc>::move_from(bvector<Alloc>& bvect) BMNOEXEPT
+void bvector<Alloc>::move_from(bvector<Alloc>& bvect) BMNOEXCEPT
{
if (this != &bvect)
{
// -----------------------------------------------------------------------
template<typename Alloc>
-typename bvector<Alloc>::size_type bvector<Alloc>::count() const
+typename bvector<Alloc>::size_type bvector<Alloc>::count() const BMNOEXCEPT
{
if (!blockman_.is_init())
return 0;
if (!found)
break;
blk_blk = blk_root[i];
+ BM_ASSERT(blk_blk);
+ if (!blk_blk)
+ break;
}
if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
{
// -----------------------------------------------------------------------
template<typename Alloc>
-bool bvector<Alloc>::any() const
+bool bvector<Alloc>::any() const BMNOEXCEPT
{
word_t*** blk_root = blockman_.top_blocks_root();
if (!blk_root)
template<typename Alloc>
typename bvector<Alloc>::block_idx_type
-bvector<Alloc>::count_blocks(unsigned* arr) const
+bvector<Alloc>::count_blocks(unsigned* arr) const BMNOEXCEPT
{
bm::word_t*** blk_root = blockman_.top_blocks_root();
if (blk_root == 0)
bvector<Alloc>::block_count_to(const bm::word_t* block,
block_idx_type nb,
unsigned nbit_right,
- const rs_index_type& rs_idx)
+ const rs_index_type& rs_idx) BMNOEXCEPT
{
size_type c;
unsigned sub_range = rs_idx.find_sub_range(nbit_right);
template<typename Alloc>
typename bvector<Alloc>::size_type
bvector<Alloc>::count_to(size_type right,
- const rs_index_type& rs_idx) const
+ const rs_index_type& rs_idx) const BMNOEXCEPT
{
BM_ASSERT(right < bm::id_max);
if (!blockman_.is_init())
template<typename Alloc>
typename bvector<Alloc>::size_type
bvector<Alloc>::count_to_test(size_type right,
- const rs_index_type& blocks_cnt) const
+ const rs_index_type& rs_idx) const BMNOEXCEPT
{
BM_ASSERT(right < bm::id_max);
if (!blockman_.is_init())
unsigned nblock_right = unsigned(right >> bm::set_block_shift);
unsigned nbit_right = unsigned(right & bm::set_block_mask);
- // running count of all blocks before target
- //
- size_type cnt = 0;
unsigned i, j;
bm::get_block_coord(nblock_right, i, j);
const bm::word_t* block = blockman_.get_block_ptr(i, j);
+ size_type cnt = 0;
if (!block)
- return 0;
+ return cnt;
bool gap = BM_IS_GAP(block);
if (gap)
if (bm::gap_test_unr(gap_blk, (gap_word_t)nbit_right))
cnt = bm::gap_bit_count_to(gap_blk, (gap_word_t)nbit_right);
else
- return 0;
+ return cnt;
}
else
{
w &= (1u << (nbit_right & bm::set_word_mask));
if (w)
{
- cnt = block_count_to(block, nblock_right, nbit_right, blocks_cnt);
+ cnt = block_count_to(block, nblock_right, nbit_right, rs_idx);
BM_ASSERT(cnt == bm::bit_block_calc_count_to(block, nbit_right));
}
else
- return 0;
+ {
+ return cnt;
+ }
}
}
- cnt += nblock_right ? blocks_cnt.rcount(nblock_right - 1) : 0;
+ cnt += nblock_right ? rs_idx.rcount(nblock_right - 1) : 0;
return cnt;
}
template<typename Alloc>
typename bvector<Alloc>::size_type
-bvector<Alloc>::count_range(size_type left, size_type right) const
+bvector<Alloc>::rank_corrected(size_type right,
+ const rs_index_type& rs_idx) const BMNOEXCEPT
+{
+ BM_ASSERT(right < bm::id_max);
+ if (!blockman_.is_init())
+ return 0;
+
+ unsigned nblock_right = unsigned(right >> bm::set_block_shift);
+ unsigned nbit_right = unsigned(right & bm::set_block_mask);
+
+ size_type cnt = nblock_right ? rs_idx.rcount(nblock_right - 1) : 0;
+
+ unsigned i, j;
+ bm::get_block_coord(nblock_right, i, j);
+ const bm::word_t* block = blockman_.get_block_ptr(i, j);
+
+ if (!block)
+ return cnt;
+
+ bool gap = BM_IS_GAP(block);
+ if (gap)
+ {
+ cnt += bm::gap_bit_count_to(BMGAP_PTR(block), (gap_word_t)nbit_right,
+ true /* rank corrected */);
+ }
+ else
+ {
+ if (block == FULL_BLOCK_FAKE_ADDR)
+ cnt += nbit_right;
+ else
+ {
+ cnt += block_count_to(block, nblock_right, nbit_right, rs_idx);
+ unsigned w = block[nbit_right >> bm::set_word_shift] &
+ (1u << (nbit_right & bm::set_word_mask));
+ cnt -= bool(w); // rank correction
+ }
+ }
+ return cnt;
+}
+
+
+// -----------------------------------------------------------------------
+
+template<typename Alloc>
+typename bvector<Alloc>::size_type
+bvector<Alloc>::count_range(size_type left, size_type right) const BMNOEXCEPT
{
BM_ASSERT(left < bm::id_max && right < bm::id_max);
- BM_ASSERT(left <= right);
+ if (left > right)
+ bm::xor_swap(left, right);
+ if (right == bm::id_max)
+ --right;
- BM_ASSERT_THROW(right < bm::id_max, BM_ERR_RANGE);
- BM_ASSERT_THROW(left <= right, BM_ERR_RANGE);
-
if (!blockman_.is_init())
return 0;
size_type cnt = 0;
// calculate logical number of start and destination blocks
- unsigned nblock_left = unsigned(left >> bm::set_block_shift);
- unsigned nblock_right = unsigned(right >> bm::set_block_shift);
+ block_idx_type nblock_left = (left >> bm::set_block_shift);
+ block_idx_type nblock_right = (right >> bm::set_block_shift);
unsigned i0, j0;
bm::get_block_coord(nblock_left, i0, j0);
{
return cnt;
}
-
+
+ // process all full mid-blocks
{
func.reset();
word_t*** blk_root = blockman_.top_blocks_root();
- unsigned top_blocks_size = blockman_.top_block_size();
+ block_idx_type top_blocks_size = blockman_.top_block_size();
- bm::for_each_nzblock_range(blk_root, top_blocks_size, nblock_left+1, nblock_right-1, func);
+ bm::for_each_nzblock_range(blk_root, top_blocks_size,
+ nblock_left+1, nblock_right-1, func);
cnt += func.count();
}
(gap_word_t)0,
(gap_word_t)nbit_right);
}
- else
+ else
+ {
+ cnt += bm::bit_block_calc_count_range(block, 0, nbit_right);
+ }
+ }
+ return cnt;
+}
+
+// -----------------------------------------------------------------------
+
+template<typename Alloc>
+bool bvector<Alloc>::is_all_one_range(size_type left,
+ size_type right) const BMNOEXCEPT
+{
+ if (!blockman_.is_init())
+ return false; // nothing to do
+
+ if (right < left)
+ bm::xor_swap(left, right);
+ if (right == bm::id_max)
+ --right;
+ if (left == right)
+ return test(left);
+
+ BM_ASSERT(left < bm::id_max && right < bm::id_max);
+
+ block_idx_type nblock_left = (left >> bm::set_block_shift);
+ block_idx_type nblock_right = (right >> bm::set_block_shift);
+
+ unsigned i0, j0;
+ bm::get_block_coord(nblock_left, i0, j0);
+ const bm::word_t* block = blockman_.get_block(i0, j0);
+
+ if (nblock_left == nblock_right) // hit in the same block
+ {
+ unsigned nbit_left = unsigned(left & bm::set_block_mask);
+ unsigned nbit_right = unsigned(right & bm::set_block_mask);
+ return bm::block_is_all_one_range(block, nbit_left, nbit_right);
+ }
+
+ // process entry point block
+ {
+ unsigned nbit_left = unsigned(left & bm::set_block_mask);
+ bool all_one = bm::block_is_all_one_range(block,
+ nbit_left, (bm::gap_max_bits-1));
+ if (!all_one)
+ return all_one;
+ ++nblock_left;
+ }
+
+ // process tail block
+ {
+ bm::get_block_coord(nblock_right, i0, j0);
+ block = blockman_.get_block(i0, j0);
+ unsigned nbit_right = unsigned(right & bm::set_block_mask);
+ bool all_one = bm::block_is_all_one_range(block, 0, nbit_right);
+ if (!all_one)
+ return all_one;
+ --nblock_right;
+ }
+
+ // check all blocks in the middle
+ //
+ if (nblock_left <= nblock_right)
+ {
+ unsigned i_from, j_from, i_to, j_to;
+ bm::get_block_coord(nblock_left, i_from, j_from);
+ bm::get_block_coord(nblock_right, i_to, j_to);
+
+ bm::word_t*** blk_root = blockman_.top_blocks_root();
+
+ for (unsigned i = i_from; i <= i_to; ++i)
+ {
+ bm::word_t** blk_blk = blk_root[i];
+ if (!blk_blk)
+ return false;
+ if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+ continue;
+
+ unsigned j = (i == i_from) ? j_from : 0;
+ unsigned j_limit = (i == i_to) ? j_to+1 : bm::set_sub_array_size;
+ do
+ {
+ bool all_one = bm::check_block_one(blk_blk[j], true);
+ if (!all_one)
+ return all_one;
+ } while (++j < j_limit);
+ } // for i
+ }
+ return true;
+}
+
+// -----------------------------------------------------------------------
+
+template<typename Alloc>
+bool bvector<Alloc>::any_range(size_type left, size_type right) const BMNOEXCEPT
+{
+ BM_ASSERT(left < bm::id_max && right < bm::id_max);
+
+ if (!blockman_.is_init())
+ return false; // nothing to do
+
+ if (right < left)
+ bm::xor_swap(left, right);
+ if (right == bm::id_max)
+ --right;
+ if (left == right)
+ return test(left);
+
+ block_idx_type nblock_left = (left >> bm::set_block_shift);
+ block_idx_type nblock_right = (right >> bm::set_block_shift);
+
+ unsigned i0, j0;
+ bm::get_block_coord(nblock_left, i0, j0);
+ const bm::word_t* block = blockman_.get_block(i0, j0);
+
+ if (nblock_left == nblock_right) // hit in the same block
+ {
+ unsigned nbit_left = unsigned(left & bm::set_block_mask);
+ unsigned nbit_right = unsigned(right & bm::set_block_mask);
+ return bm::block_any_range(block, nbit_left, nbit_right);
+ }
+
+ // process entry point block
+ {
+ unsigned nbit_left = unsigned(left & bm::set_block_mask);
+ bool any_one = bm::block_any_range(block,
+ nbit_left, (bm::gap_max_bits-1));
+ if (any_one)
+ return any_one;
+ ++nblock_left;
+ }
+
+ // process tail block
+ {
+ bm::get_block_coord(nblock_right, i0, j0);
+ block = blockman_.get_block(i0, j0);
+ unsigned nbit_right = unsigned(right & bm::set_block_mask);
+ bool any_one = bm::block_any_range(block, 0, nbit_right);
+ if (any_one)
+ return any_one;
+ --nblock_right;
+ }
+
+ // check all blocks in the middle
+ //
+ if (nblock_left <= nblock_right)
+ {
+ unsigned i_from, j_from, i_to, j_to;
+ bm::get_block_coord(nblock_left, i_from, j_from);
+ bm::get_block_coord(nblock_right, i_to, j_to);
+
+ bm::word_t*** blk_root = blockman_.top_blocks_root();
+ {
+ block_idx_type top_size = blockman_.top_block_size();
+ if (i_from >= top_size)
+ return false;
+ if (i_to >= top_size)
+ {
+ i_to = unsigned(top_size-1);
+ j_to = bm::set_sub_array_size-1;
+ }
+ }
+
+ for (unsigned i = i_from; i <= i_to; ++i)
{
- cnt += bm::bit_block_calc_count_range(block, 0, nbit_right);
- }
+ bm::word_t** blk_blk = blk_root[i];
+ if (!blk_blk)
+ continue;
+ if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+ return true;
+
+ unsigned j = (i == i_from) ? j_from : 0;
+ unsigned j_limit = (i == i_to) ? j_to+1 : bm::set_sub_array_size;
+ do
+ {
+ bool any_one = bm::block_any(blk_blk[j]);
+ if (any_one)
+ return any_one;
+ } while (++j < j_limit);
+ } // for i
}
- return cnt;
+ return false;
}
-
// -----------------------------------------------------------------------
template<typename Alloc>
typename bvector<Alloc>::size_type
bvector<Alloc>::count_range(size_type left,
size_type right,
- const rs_index_type& rs_idx) const
+ const rs_index_type& rs_idx) const BMNOEXCEPT
{
BM_ASSERT(left <= right);
+ if (left > right)
+ bm::xor_swap(left, right);
+
BM_ASSERT_THROW(right < bm::id_max, BM_ERR_RANGE);
- BM_ASSERT_THROW(left <= right, BM_ERR_RANGE);
if (left == right)
return this->test(left);
// -----------------------------------------------------------------------
template<typename Alloc>
-bool bvector<Alloc>::get_bit(size_type n) const
+bool bvector<Alloc>::get_bit(size_type n) const BMNOEXCEPT
{
BM_ASSERT(n < size_);
BM_ASSERT_THROW((n < size_), BM_ERR_RANGE);
// -----------------------------------------------------------------------
template<typename Alloc>
-int bvector<Alloc>::compare(const bvector<Alloc>& bv) const
+int bvector<Alloc>::compare(const bvector<Alloc>& bv) const BMNOEXCEPT
{
int res;
unsigned top_blocks = blockman_.top_block_size();
template<typename Alloc>
bool bvector<Alloc>::find_first_mismatch(
const bvector<Alloc>& bvect, size_type& pos,
- size_type search_to) const
+ size_type search_to) const BMNOEXCEPT
{
unsigned top_blocks = blockman_.top_block_size();
bm::word_t*** top_root = blockman_.top_blocks_root();
// -----------------------------------------------------------------------
template<typename Alloc>
-void bvector<Alloc>::swap(bvector<Alloc>& bvect) BMNOEXEPT
+void bvector<Alloc>::swap(bvector<Alloc>& bvect) BMNOEXCEPT
{
if (this != &bvect)
{
// -----------------------------------------------------------------------
template<typename Alloc>
-void bvector<Alloc>::calc_stat(struct bvector<Alloc>::statistics* st) const
+void bvector<Alloc>::calc_stat(
+ struct bvector<Alloc>::statistics* st) const BMNOEXCEPT
{
BM_ASSERT(st);
if (!found)
break;
blk_blk = blk_root[i];
+ BM_ASSERT(blk_blk);
+ if (!blk_blk)
+ break;
}
if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
continue;
if (block_type) // gap block
{
- bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
- gap_block_set(gap_blk, val, nblock, nbit);
+ this->gap_block_set_no_ret(BMGAP_PTR(blk), val, nblock, nbit);
}
else // bit block
{
block_idx_type nblock_end = (ids[size_in-1] >> bm::set_block_shift);
if (nblock == nblock_end) // special case: one block import
{
- import_block(ids, nblock, 0, stop);
+ if (stop == 1)
+ set_bit_no_check(ids[0]);
+ else
+ import_block(ids, nblock, 0, stop);
return;
}
}
stop = bm::idx_arr_block_lookup_u32(ids, size_in, nblock, start);
#endif
BM_ASSERT(start < stop);
- import_block(ids, nblock, start, stop);
+
+ if (stop - start == 1 && n < bm::id_max) // just one bit to set
+ set_bit_no_check(n);
+ else
+ import_block(ids, nblock, start, stop);
start = stop;
} while (start < size_in);
}
template<class Alloc>
void bvector<Alloc>::import_block(const size_type* ids,
- block_idx_type nblock,
- size_type start, size_type stop)
+ block_idx_type nblock,
+ size_type start,
+ size_type stop)
{
+ BM_ASSERT(stop > start);
int block_type;
bm::word_t* blk =
- blockman_.check_allocate_block(nblock, 1, 0, &block_type, true/*allow NULL ret*/);
+ blockman_.check_allocate_block(nblock, 1, 0, &block_type,
+ true/*allow NULL ret*/);
if (!IS_FULL_BLOCK(blk))
{
+ // TODO: add a special case when we import just a few bits per block
if (BM_IS_GAP(blk))
+ {
blk = blockman_.deoptimize_block(nblock); // TODO: try to avoid
-
+ }
#ifdef BM64ADDR
bm::set_block_bits_u64(blk, ids, start, stop);
#else
return false;
// calculate word number in block and bit
- unsigned nbit = unsigned(n & bm::set_block_mask);
-
+ unsigned nbit = unsigned(n & bm::set_block_mask);
if (block_type) // gap
{
- bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
- unsigned is_set = gap_block_set(gap_blk, val, nblock, nbit);
- return is_set;
+ return gap_block_set(BMGAP_PTR(blk), val, nblock, nbit);
}
else // bit block
{
unsigned nword = unsigned(nbit >> bm::set_word_shift);
nbit &= bm::set_word_mask;
-
bm::word_t* word = blk + nword;
bm::word_t mask = (((bm::word_t)1) << nbit);
if (val)
{
- if ( ((*word) & mask) == 0 )
- {
- *word |= mask; // set bit
- return true;
- }
+ val = ~(*word & mask);
+ *word |= mask; // set bit
+ return val;
}
else
{
- if ((*word) & mask)
- {
- *word &= ~mask; // clear bit
- return true;
- }
+ val = ~(*word & mask);
+ *word &= ~mask; // clear bit
+ return val;
}
}
- return false;
+ //return false;
}
// -----------------------------------------------------------------------
template<class Alloc>
bool bvector<Alloc>::gap_block_set(bm::gap_word_t* gap_blk,
- bool val, block_idx_type nblock, unsigned nbit)
+ bool val, block_idx_type nblock,
+ unsigned nbit)
{
- unsigned is_set, new_block_len;
- new_block_len =
- bm::gap_set_value(val, gap_blk, nbit, &is_set);
- if (is_set)
+ unsigned is_set, new_len, old_len;
+ old_len = bm::gap_length(gap_blk)-1;
+ new_len = bm::gap_set_value(val, gap_blk, nbit, &is_set);
+ if (old_len < new_len)
{
unsigned threshold = bm::gap_limit(gap_blk, blockman_.glen());
- if (new_block_len > threshold)
- {
+ if (new_len > threshold)
blockman_.extend_gap_block(nblock, gap_blk);
- }
}
return is_set;
}
+// -----------------------------------------------------------------------
+
+template<class Alloc>
+void bvector<Alloc>::gap_block_set_no_ret(bm::gap_word_t* gap_blk,
+ bool val, block_idx_type nblock, unsigned nbit)
+{
+ unsigned new_len, old_len;
+ old_len = bm::gap_length(gap_blk)-1;
+ new_len = bm::gap_set_value(val, gap_blk, nbit);
+ if (old_len < new_len)
+ {
+ unsigned threshold = bm::gap_limit(gap_blk, blockman_.glen());
+ if (new_len > threshold)
+ blockman_.extend_gap_block(nblock, gap_blk);
+ }
+}
+
+
// -----------------------------------------------------------------------
template<class Alloc>
//---------------------------------------------------------------------
template<class Alloc>
-bool bvector<Alloc>::find(size_type from, size_type& pos) const
+bool bvector<Alloc>::find(size_type from, size_type& pos) const BMNOEXCEPT
{
- BM_ASSERT_THROW(from < bm::id_max, BM_ERR_RANGE);
-
- if (from == 0)
+ if (from == bm::id_max)
+ return false;
+ if (!from)
{
return find(pos);
}
//---------------------------------------------------------------------
template<class Alloc>
-bool bvector<Alloc>::find_reverse(size_type& pos) const
+bool bvector<Alloc>::find_reverse(size_type& pos) const BMNOEXCEPT
{
bool found;
}
if (found)
{
- block_idx_type base_idx = block_idx_type(i) * bm::set_sub_array_size * bm::gap_max_bits;
+ block_idx_type base_idx =
+ block_idx_type(i) * bm::set_sub_array_size *
+ bm::gap_max_bits;
base_idx += j * bm::gap_max_bits;
pos = base_idx + block_pos;
return found;
//---------------------------------------------------------------------
template<class Alloc>
-bool bvector<Alloc>::find(size_type& pos) const
+bool bvector<Alloc>::find(size_type& pos) const BMNOEXCEPT
{
bool found;
//---------------------------------------------------------------------
template<class Alloc>
-bool bvector<Alloc>::find_range(size_type& in_first, size_type& in_last) const
+bool bvector<Alloc>::find_range(size_type& in_first,
+ size_type& in_last) const BMNOEXCEPT
{
bool found = find(in_first);
if (found)
template<class Alloc>
bool bvector<Alloc>::find_rank(size_type rank_in,
size_type from,
- size_type& pos) const
+ size_type& pos) const BMNOEXCEPT
{
BM_ASSERT_THROW(from < bm::id_max, BM_ERR_RANGE);
bool bvector<Alloc>::find_rank(size_type rank_in,
size_type from,
size_type& pos,
- const rs_index_type& rs_idx) const
+ const rs_index_type& rs_idx) const BMNOEXCEPT
{
BM_ASSERT_THROW(from < bm::id_max, BM_ERR_RANGE);
template<class Alloc>
bool bvector<Alloc>::select(size_type rank_in, size_type& pos,
- const rs_index_type& rs_idx) const
+ const rs_index_type& rs_idx) const BMNOEXCEPT
{
bool ret = false;
template<class Alloc>
typename bvector<Alloc>::size_type
-bvector<Alloc>::check_or_next(size_type prev) const
+bvector<Alloc>::check_or_next(size_type prev) const BMNOEXCEPT
{
if (!blockman_.is_init())
return 0;
//---------------------------------------------------------------------
template<class Alloc>
-bool bvector<Alloc>::test_first_block_bit(block_idx_type nb) const
+bool bvector<Alloc>::test_first_block_bit(block_idx_type nb) const BMNOEXCEPT
{
if (nb >= bm::set_total_blocks) // last possible block
return false;
BM_ASSERT(gfunc);
(*gfunc)(blk, BMGAP_PTR(arg_blk));
- blockman_.optimize_bit_block(nb);
+ // TODO: commented out optimization, because it can be very slow
+ // need to take into account previous operation not to make
+ // fruitless attempts here
+ //blockman_.optimize_bit_block(nb);
return;
}
}
//---------------------------------------------------------------------
+//
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::go_up() BMNOEXCEPT
+{
+ BM_ASSERT(this->valid());
+
+ block_descr_type* bdescr = &(this->bdescr_);
+ if (this->block_type_) // GAP
+ {
+ BM_ASSERT(this->block_type_ == 1);
+ ++this->position_;
+ if (--(bdescr->gap_.gap_len))
+ return true;
+ // next gap is "OFF" by definition.
+ if (*(bdescr->gap_.ptr) != bm::gap_max_bits - 1)
+ {
+ gap_word_t prev = *(bdescr->gap_.ptr);
+ unsigned val = *(++(bdescr->gap_.ptr));
+ this->position_ += val - prev;
+ // next gap is now "ON"
+ if (*(bdescr->gap_.ptr) != bm::gap_max_bits - 1)
+ {
+ prev = *(bdescr->gap_.ptr);
+ val = *(++(bdescr->gap_.ptr));
+ bdescr->gap_.gap_len = (gap_word_t)(val - prev);
+ return true; // next "ON" found;
+ }
+ }
+ }
+ else // BIT
+ {
+ unsigned short idx = ++(bdescr->bit_.idx);
+ if (idx < bdescr->bit_.cnt)
+ {
+ this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx];
+ return true;
+ }
+ this->position_ +=
+ (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx];
+ bdescr->bit_.ptr += bm::set_bitscan_wave_size;
+ if (decode_bit_group(bdescr))
+ return true;
+ }
+
+ if (search_in_blocks())
+ return true;
+
+ this->invalidate();
+ return false;
+}
+
+//---------------------------------------------------------------------
+
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::skip(size_type rank) BMNOEXCEPT
+{
+ if (!this->valid())
+ return false;
+ if (!rank)
+ return this->valid(); // nothing to do
+
+ for (; rank; --rank)
+ {
+ block_descr_type* bdescr = &(this->bdescr_);
+ switch (this->block_type_)
+ {
+ case 0: // BitBlock
+ for (; rank; --rank)
+ {
+ unsigned short idx = ++(bdescr->bit_.idx);
+ if (idx < bdescr->bit_.cnt)
+ {
+ this->position_ = bdescr->bit_.pos + bdescr->bit_.bits[idx];
+ continue;
+ }
+ this->position_ +=
+ (bm::set_bitscan_wave_size * 32) - bdescr->bit_.bits[--idx];
+ bdescr->bit_.ptr += bm::set_bitscan_wave_size;
+
+ if (!decode_bit_group(bdescr, rank))
+ break;
+ } // for rank
+ break;
+ case 1: // DGAP Block
+ for (; rank; --rank) // TODO: better skip logic
+ {
+ ++this->position_;
+ if (--(bdescr->gap_.gap_len))
+ continue;
+
+ // next gap is "OFF" by definition.
+ if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
+ break;
+ gap_word_t prev = *(bdescr->gap_.ptr);
+ unsigned int val = *(++(bdescr->gap_.ptr));
+
+ this->position_ += val - prev;
+ // next gap is now "ON"
+ if (*(bdescr->gap_.ptr) == bm::gap_max_bits - 1)
+ break;
+ prev = *(bdescr->gap_.ptr);
+ val = *(++(bdescr->gap_.ptr));
+ bdescr->gap_.gap_len = (gap_word_t)(val - prev);
+ } // for rank
+ break;
+ default:
+ BM_ASSERT(0);
+ } // switch
+
+ if (!rank)
+ return true;
+
+ if (!search_in_blocks())
+ {
+ this->invalidate();
+ return false;
+ }
+ } // for rank
+
+ return this->valid();
+}
+
+
+//---------------------------------------------------------------------
+
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::go_to(size_type pos) BMNOEXCEPT
+{
+ if (pos == 0)
+ {
+ go_first();
+ return this->valid();
+ }
+
+ size_type new_pos = this->bv_->check_or_next(pos); // find the true pos
+ if (!new_pos) // no bits available
+ {
+ this->invalidate();
+ return false;
+ }
+ BM_ASSERT(new_pos >= pos);
+ pos = new_pos;
+
+
+ this->position_ = pos;
+ size_type nb = this->block_idx_ = (pos >> bm::set_block_shift);
+ bm::bvector<Alloc>::blocks_manager_type& bman =
+ this->bv_->get_blocks_manager();
+ unsigned i0, j0;
+ bm::get_block_coord(nb, i0, j0);
+ this->block_ = bman.get_block(i0, j0);
+
+ BM_ASSERT(this->block_);
+
+ this->block_type_ = (bool)BM_IS_GAP(this->block_);
+
+ block_descr_type* bdescr = &(this->bdescr_);
+ unsigned nbit = unsigned(pos & bm::set_block_mask);
+
+ if (this->block_type_) // gap
+ {
+ this->position_ = nb * bm::set_block_size * 32;
+ search_in_gapblock();
+
+ if (this->position_ == pos)
+ return this->valid();
+ this->position_ = pos;
+
+ gap_word_t* gptr = BMGAP_PTR(this->block_);
+ unsigned is_set;
+ unsigned gpos = bm::gap_bfind(gptr, nbit, &is_set);
+ BM_ASSERT(is_set);
+
+ bdescr->gap_.ptr = gptr + gpos;
+ if (gpos == 1)
+ {
+ bdescr->gap_.gap_len = bm::gap_word_t(gptr[gpos] - (nbit - 1));
+ }
+ else
+ {
+ bm::gap_word_t interval = bm::gap_word_t(gptr[gpos] - gptr[gpos - 1]);
+ bm::gap_word_t interval2 = bm::gap_word_t(nbit - gptr[gpos - 1]);
+ bdescr->gap_.gap_len = bm::gap_word_t(interval - interval2 + 1);
+ }
+ }
+ else // bit
+ {
+ if (nbit == 0)
+ {
+ search_in_bitblock();
+ return this->valid();
+ }
+
+ unsigned nword = unsigned(nbit >> bm::set_word_shift);
+
+ // check if we need to step back to match the wave
+ unsigned parity = nword % bm::set_bitscan_wave_size;
+ bdescr->bit_.ptr = this->block_ + (nword - parity);
+ bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits);
+ BM_ASSERT(bdescr->bit_.cnt);
+ bdescr->bit_.pos = (nb * bm::set_block_size * 32) + ((nword - parity) * 32);
+ bdescr->bit_.idx = 0;
+ nbit &= bm::set_word_mask;
+ nbit += 32 * parity;
+ for (unsigned i = 0; i < bdescr->bit_.cnt; ++i)
+ {
+ if (bdescr->bit_.bits[i] == nbit)
+ return this->valid();
+ bdescr->bit_.idx++;
+ } // for
+ BM_ASSERT(0);
+ }
+ return this->valid();
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+void bvector<Alloc>::enumerator::go_first() BMNOEXCEPT
+{
+ BM_ASSERT(this->bv_);
+
+ blocks_manager_type* bman = &(this->bv_->blockman_);
+ if (!bman->is_init())
+ {
+ this->invalidate();
+ return;
+ }
+
+ bm::word_t*** blk_root = bman->top_blocks_root();
+ this->block_idx_ = this->position_= 0;
+ unsigned i, j;
+
+ for (i = 0; i < bman->top_block_size(); ++i)
+ {
+ bm::word_t** blk_blk = blk_root[i];
+ if (blk_blk == 0) // not allocated
+ {
+ this->block_idx_ += bm::set_sub_array_size;
+ this->position_ += bm::bits_in_array;
+ continue;
+ }
+
+ if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+ blk_blk = FULL_SUB_BLOCK_REAL_ADDR;
+
+ for (j = 0; j < bm::set_sub_array_size; ++j,++(this->block_idx_))
+ {
+ this->block_ = blk_blk[j];
+ if (this->block_ == 0)
+ {
+ this->position_ += bits_in_block;
+ continue;
+ }
+ if (BM_IS_GAP(this->block_))
+ {
+ this->block_type_ = 1;
+ if (search_in_gapblock())
+ return;
+ }
+ else
+ {
+ if (this->block_ == FULL_BLOCK_FAKE_ADDR)
+ this->block_ = FULL_BLOCK_REAL_ADDR;
+ this->block_type_ = 0;
+ if (search_in_bitblock())
+ return;
+ }
+ } // for j
+ } // for i
+
+ this->invalidate();
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool
+bvector<Alloc>::enumerator::decode_wave(block_descr_type* bdescr) BMNOEXCEPT
+{
+ bdescr->bit_.cnt = bm::bitscan_wave(bdescr->bit_.ptr, bdescr->bit_.bits);
+ if (bdescr->bit_.cnt) // found
+ {
+ bdescr->bit_.idx = 0;
+ return true;
+ }
+ return false;
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool
+bvector<Alloc>::enumerator::decode_bit_group(block_descr_type* bdescr) BMNOEXCEPT
+{
+ const word_t* block_end = this->block_ + bm::set_block_size;
+ for (; bdescr->bit_.ptr < block_end;)
+ {
+ if (decode_wave(bdescr))
+ {
+ bdescr->bit_.pos = this->position_;
+ this->position_ += bdescr->bit_.bits[0];
+ return true;
+ }
+ this->position_ += bm::set_bitscan_wave_size * 32; // wave size
+ bdescr->bit_.ptr += bm::set_bitscan_wave_size;
+ } // for
+ return false;
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool
+bvector<Alloc>::enumerator::decode_bit_group(block_descr_type* bdescr,
+ size_type& rank) BMNOEXCEPT
+{
+ const word_t* block_end = this->block_ + bm::set_block_size;
+ for (; bdescr->bit_.ptr < block_end;)
+ {
+ const bm::id64_t* w64_p = (bm::id64_t*)bdescr->bit_.ptr;
+ BM_ASSERT(bm::set_bitscan_wave_size == 4); // TODO: better handle this
+
+ unsigned cnt = bm::word_bitcount64(w64_p[0]);
+ cnt += bm::word_bitcount64(w64_p[1]);
+ if (rank > cnt)
+ {
+ rank -= cnt;
+ }
+ else
+ {
+ if (decode_wave(bdescr))
+ {
+ bdescr->bit_.pos = this->position_;
+ this->position_ += bdescr->bit_.bits[0];
+ return true;
+ }
+ }
+ this->position_ += bm::set_bitscan_wave_size * 32; // wave size
+ bdescr->bit_.ptr += bm::set_bitscan_wave_size;
+ } // for
+ return false;
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::search_in_bitblock() BMNOEXCEPT
+{
+ BM_ASSERT(this->block_type_ == 0);
+
+ block_descr_type* bdescr = &(this->bdescr_);
+ bdescr->bit_.ptr = this->block_;
+ return decode_bit_group(bdescr);
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::search_in_gapblock() BMNOEXCEPT
+{
+ BM_ASSERT(this->block_type_ == 1);
+
+ block_descr_type* bdescr = &(this->bdescr_);
+ bdescr->gap_.ptr = BMGAP_PTR(this->block_);
+ unsigned bitval = *(bdescr->gap_.ptr) & 1;
+
+ ++(bdescr->gap_.ptr);
+
+ for (;true;)
+ {
+ unsigned val = *(bdescr->gap_.ptr);
+ if (bitval)
+ {
+ gap_word_t* first = BMGAP_PTR(this->block_) + 1;
+ if (bdescr->gap_.ptr == first)
+ {
+ bdescr->gap_.gap_len = (gap_word_t)(val + 1);
+ }
+ else
+ {
+ bdescr->gap_.gap_len =
+ (gap_word_t)(val - *(bdescr->gap_.ptr-1));
+ }
+ return true;
+ }
+ this->position_ += val + 1;
+ if (val == bm::gap_max_bits - 1)
+ break;
+ bitval ^= 1;
+ ++(bdescr->gap_.ptr);
+ }
+ return false;
+}
+
+//---------------------------------------------------------------------
+
+template<class Alloc>
+bool bvector<Alloc>::enumerator::search_in_blocks() BMNOEXCEPT
+{
+ ++(this->block_idx_);
+ const blocks_manager_type& bman = this->bv_->blockman_;
+ block_idx_type i = this->block_idx_ >> bm::set_array_shift;
+ block_idx_type top_block_size = bman.top_block_size();
+ bm::word_t*** blk_root = bman.top_blocks_root();
+ for (; i < top_block_size; ++i)
+ {
+ bm::word_t** blk_blk = blk_root[i];
+ if (blk_blk == 0)
+ {
+ // fast scan fwd in top level
+ size_type bn = this->block_idx_ + bm::set_sub_array_size;
+ size_type pos = this->position_ + bm::bits_in_array;
+ for (++i; i < top_block_size; ++i)
+ {
+ if (blk_root[i])
+ break;
+ bn += bm::set_sub_array_size;
+ pos += bm::bits_in_array;
+ } // for i
+ this->block_idx_ = bn;
+ this->position_ = pos;
+ if ((i < top_block_size) && blk_root[i])
+ --i;
+ continue;
+ }
+ if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+ blk_blk = FULL_SUB_BLOCK_REAL_ADDR;
+
+ block_idx_type j = this->block_idx_ & bm::set_array_mask;
+
+ for(; j < bm::set_sub_array_size; ++j, ++(this->block_idx_))
+ {
+ this->block_ = blk_blk[j];
+ if (this->block_ == 0)
+ {
+ this->position_ += bm::bits_in_block;
+ continue;
+ }
+ this->block_type_ = BM_IS_GAP(this->block_);
+ if (this->block_type_)
+ {
+ if (search_in_gapblock())
+ return true;
+ }
+ else
+ {
+ if (this->block_ == FULL_BLOCK_FAKE_ADDR)
+ this->block_ = FULL_BLOCK_REAL_ADDR;
+ if (search_in_bitblock())
+ return true;
+ }
+ } // for j
+ } // for i
+ return false;
+}
+//---------------------------------------------------------------------
} // namespace
public:
+ // -----------------------------------------------------------------------
/*! @name Construction and setup */
//@{
aggregator();
void set_optimization(
typename bvector_type::optmode opt = bvector_type::opt_compress)
{ opt_mode_ = opt; }
+
+ void set_compute_count(bool count_mode)
+ {
+ compute_count_ = count_mode; count_ = 0;
+ }
+
//@}
@return current arg group size (0 if vector was not added (empty))
@sa reset
*/
- unsigned add(const bvector_type* bv, unsigned agr_group = 0);
+ unsigned add(const bvector_type* bv, unsigned agr_group = 0) BMNOEXCEPT;
/**
Reset aggregate groups, forget all attached vectors
*/
- void reset();
+ void reset() BMNOEXCEPT;
/**
Aggregate added group of vectors using logical OR
Set search hint for the range, where results needs to be searched
(experimental for internal use).
*/
- void set_range_hint(size_type from, size_type to);
+ void set_range_hint(size_type from, size_type to) BMNOEXCEPT;
+
+ size_type count() const { return count_; }
//@}
//@{
/** Get current operation code */
- int get_operation() const { return operation_; }
+ int get_operation() const BMNOEXCEPT { return operation_; }
/** Set operation code for the aggregator */
- void set_operation(int op_code) { operation_ = op_code; }
+ void set_operation(int op_code) BMNOEXCEPT { operation_ = op_code; }
/**
Prepare operation, create internal resources, analyse dependencies.
bool init_clear = true);
static
- unsigned max_top_blocks(const bvector_type_const_ptr* bv_src, unsigned src_size);
+ unsigned max_top_blocks(const bvector_type_const_ptr* bv_src,
+ unsigned src_size) BMNOEXCEPT;
bm::word_t* sort_input_blocks_or(const bvector_type_const_ptr* bv_src,
unsigned src_size,
unsigned i, unsigned j,
unsigned* arg_blk_count,
- unsigned* arg_blk_gap_count);
+ unsigned* arg_blk_gap_count) BMNOEXCEPT;
bm::word_t* sort_input_blocks_and(const bvector_type_const_ptr* bv_src,
unsigned src_size,
unsigned i, unsigned j,
unsigned* arg_blk_count,
- unsigned* arg_blk_gap_count);
+ unsigned* arg_blk_gap_count) BMNOEXCEPT;
bool process_bit_blocks_or(blocks_manager_type& bman_target,
unsigned find_effective_sub_block_size(unsigned i,
const bvector_type_const_ptr* bv_src,
unsigned src_size,
- bool top_null_as_zero);
-
- bool any_carry_overs(unsigned co_size) const;
+ bool top_null_as_zero) BMNOEXCEPT;
+
+ static
+ bool any_carry_overs(const unsigned char* carry_overs,
+ unsigned co_size) BMNOEXCEPT;
/**
@return carry over
*/
- bool process_shift_right_and(const bm::word_t* arg_blk,
- digest_type& digest,
- unsigned carry_over);
-
+ static
+ unsigned process_shift_right_and(bm::word_t* BMRESTRICT blk,
+ const bm::word_t* BMRESTRICT arg_blk,
+ digest_type& BMRESTRICT digest,
+ unsigned carry_over) BMNOEXCEPT;
+
+ static
const bm::word_t* get_arg_block(const bvector_type_const_ptr* bv_src,
- unsigned k, unsigned i, unsigned j);
+ unsigned k, unsigned i, unsigned j) BMNOEXCEPT;
bvector_type* check_create_target();
/// @internal
struct arena
{
- BM_DECLARE_TEMP_BLOCK(tb1);
- BM_DECLARE_TEMP_BLOCK(tb_opt); ///< temp block for results optimization
+ BM_DECLARE_TEMP_BLOCK(tb1)
+ BM_DECLARE_TEMP_BLOCK(tb_opt) ///< temp block for results optimization
const bm::word_t* v_arg_or_blk[max_aggregator_cap]; ///< source blocks list (OR)
const bm::gap_word_t* v_arg_or_blk_gap[max_aggregator_cap]; ///< source GAP blocks list (OR)
const bm::word_t* v_arg_and_blk[max_aggregator_cap]; ///< source blocks list (AND)
size_type range_from_ = bm::id_max; ///< search from
size_type range_to_ = bm::id_max; ///< search to
- typename bvector_type::optmode opt_mode_;
-
+ typename bvector_type::optmode opt_mode_; ///< perform search result optimization
+ bool compute_count_; ///< compute search result count
+ size_type count_; ///< search result count
};
template<typename BV>
aggregator<BV>::aggregator()
-: opt_mode_(bvector_type::opt_none)
+: opt_mode_(bvector_type::opt_none),
+ compute_count_(false),
+ count_(0)
{
ar_ = (arena*) bm::aligned_new_malloc(sizeof(arena));
}
// ------------------------------------------------------------------------
template<typename BV>
-void aggregator<BV>::reset()
+void aggregator<BV>::reset() BMNOEXCEPT
{
arg_group0_size = arg_group1_size = operation_ = top_block_size_ = 0;
operation_status_ = op_undefined;
range_set_ = false;
range_from_ = range_to_ = bm::id_max;
+ count_ = 0;
}
// ------------------------------------------------------------------------
template<typename BV>
-void aggregator<BV>::set_range_hint(size_type from, size_type to)
+void aggregator<BV>::set_range_hint(size_type from, size_type to) BMNOEXCEPT
{
range_from_ = from; range_to_ = to;
range_set_ = true;
// ------------------------------------------------------------------------
template<typename BV>
-typename aggregator<BV>::bvector_type* aggregator<BV>::check_create_target()
+typename aggregator<BV>::bvector_type*
+aggregator<BV>::check_create_target()
{
if (!bv_target_)
{
- bv_target_ = new bvector_type();
+ bv_target_ = new bvector_type(); //TODO: get rid of "new"
bv_target_->init();
}
return bv_target_;
// ------------------------------------------------------------------------
template<typename BV>
-unsigned aggregator<BV>::add(const bvector_type* bv, unsigned agr_group)
+unsigned aggregator<BV>::add(const bvector_type* bv,
+ unsigned agr_group) BMNOEXCEPT
{
BM_ASSERT_THROW(agr_group <= 1, BM_ERR_RANGE);
BM_ASSERT(agr_group <= 1);
template<typename BV>
void aggregator<BV>::combine_shift_right_and(bvector_type& bv_target)
{
+ count_ = 0;
combine_shift_right_and(bv_target, ar_->arg_bv0, arg_group0_size, false);
}
template<typename BV>
unsigned
-aggregator<BV>::find_effective_sub_block_size(unsigned i,
- const bvector_type_const_ptr* bv_src,
- unsigned src_size,
- bool top_null_as_zero)
+aggregator<BV>::find_effective_sub_block_size(
+ unsigned i,
+ const bvector_type_const_ptr* bv_src,
+ unsigned src_size,
+ bool top_null_as_zero) BMNOEXCEPT
{
// quick hack to avoid scanning large, arrays, where such scan can be quite
// expensive by itself (this makes this function approximate)
max_size = j;
break;
}
- }
+ } // for j
if (max_size == bm::set_sub_array_size - 1)
break;
} // for k
{
BM_ASSERT(src_size);
- typename bvector_type::blocks_manager_type& bman_target = bv_target.get_blocks_manager();
-
unsigned arg_blk_count = 0;
unsigned arg_blk_gap_count = 0;
bm::word_t* blk =
if (ar_->v_arg_and_blk[0] == FULL_BLOCK_REAL_ADDR)
{
// another nothing to do: one FULL block
+ blocks_manager_type& bman_target = bv_target.get_blocks_manager();
bman_target.check_alloc_top_subblock(i);
bman_target.set_block_ptr(i, j, blk);
if (++j == bm::set_sub_array_size)
- {
bman_target.validate_top_full(i);
- }
return;
}
}
//
if (arg_blk_gap_count)
{
- digest =
- process_gap_blocks_and(arg_blk_gap_count, digest);
+ digest = process_gap_blocks_and(arg_blk_gap_count, digest);
}
- if (digest) // some results
+ if (digest) // we have results , allocate block and copy from temp
{
- // we have some results, allocate block and copy from temp
+ blocks_manager_type& bman_target = bv_target.get_blocks_manager();
bman_target.opt_copy_bit_block(i, j, ar_->tb1,
- opt_mode_, ar_->tb_opt);
+ opt_mode_, ar_->tb_opt);
}
}
}
bool b = bm::gap_test_unr(ar_->v_arg_and_blk_gap[k], single_bit_idx);
if (!b)
return 0; // AND 0 causes result to turn 0
- }
+ } // for k
break;
}
}
template<typename BV>
unsigned
-aggregator<BV>::max_top_blocks(const bvector_type_const_ptr* bv_src, unsigned src_size)
+aggregator<BV>::max_top_blocks(const bvector_type_const_ptr* bv_src,
+ unsigned src_size) BMNOEXCEPT
{
unsigned top_blocks = 1;
// ------------------------------------------------------------------------
template<typename BV>
-bm::word_t* aggregator<BV>::sort_input_blocks_or(const bvector_type_const_ptr* bv_src,
- unsigned src_size,
- unsigned i, unsigned j,
- unsigned* arg_blk_count,
- unsigned* arg_blk_gap_count)
+bm::word_t* aggregator<BV>::sort_input_blocks_or(
+ const bvector_type_const_ptr* bv_src,
+ unsigned src_size,
+ unsigned i, unsigned j,
+ unsigned* arg_blk_count,
+ unsigned* arg_blk_gap_count) BMNOEXCEPT
{
bm::word_t* blk = 0;
for (unsigned k = 0; k < src_size; ++k)
// ------------------------------------------------------------------------
template<typename BV>
-bm::word_t* aggregator<BV>::sort_input_blocks_and(const bvector_type_const_ptr* bv_src,
- unsigned src_size,
- unsigned i, unsigned j,
- unsigned* arg_blk_count,
- unsigned* arg_blk_gap_count)
+bm::word_t* aggregator<BV>::sort_input_blocks_and(
+ const bvector_type_const_ptr* bv_src,
+ unsigned src_size,
+ unsigned i, unsigned j,
+ unsigned* arg_blk_count,
+ unsigned* arg_blk_gap_count) BMNOEXCEPT
{
unsigned full_blk_cnt = 0;
bm::word_t* blk = FULL_BLOCK_FAKE_ADDR;
{
if (i > top_block_size_)
{
- if (!this->any_carry_overs(src_and_size))
+ if (!any_carry_overs(&ar_->carry_overs_[0], src_and_size))
break; // quit early if there is nothing to carry on
}
unsigned j = 0;
do
{
- bool found = combine_shift_right_and(i, j, bv_target, bv_src_and, src_and_size);
+ bool found =
+ combine_shift_right_and(i, j, bv_target, bv_src_and, src_and_size);
if (found && any)
return found;
} while (++j < bm::set_sub_array_size);
} // for i
+ if (compute_count_)
+ return bool(count_);
+
return bv_target.any();
}
const bvector_type_const_ptr* bv_src,
unsigned src_size)
{
- blocks_manager_type& bman_target = bv_target.get_blocks_manager();
bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1;
unsigned char* carry_overs = &(ar_->carry_overs_[0]);
if (blk_zero) // delayed temp block 0-init requested
{
bm::bit_block_set(blk, 0);
- blk_zero = false;
+ blk_zero = !blk_zero; // = false
}
const bm::word_t* arg_blk = get_arg_block(bv_src, k, i, j);
- carry_overs[k] = process_shift_right_and(arg_blk, digest, carry_over);
+ carry_overs[k] = (unsigned char)
+ process_shift_right_and(blk, arg_blk, digest, carry_over);
+ BM_ASSERT(carry_overs[k] == 0 || carry_overs[k] == 1);
} // for k
-
+
+ if (blk_zero) // delayed temp block 0-init
+ {
+ bm::bit_block_set(blk, 0);
+ }
// block now gets emitted into the target bit-vector
if (digest)
{
BM_ASSERT(!bm::bit_is_all_zero(blk));
- bman_target.opt_copy_bit_block(i, j, blk,
- opt_mode_, ar_->tb_opt);
+
+ if (compute_count_)
+ {
+ unsigned cnt = bm::bit_block_count(blk, digest);
+ count_ += cnt;
+ }
+ else
+ {
+ blocks_manager_type& bman_target = bv_target.get_blocks_manager();
+ bman_target.opt_copy_bit_block(i, j, blk, opt_mode_, ar_->tb_opt);
+ }
return true;
}
return false;
// ------------------------------------------------------------------------
template<typename BV>
-bool aggregator<BV>::process_shift_right_and(const bm::word_t* arg_blk,
- digest_type& digest,
- unsigned carry_over)
+unsigned aggregator<BV>::process_shift_right_and(
+ bm::word_t* BMRESTRICT blk,
+ const bm::word_t* BMRESTRICT arg_blk,
+ digest_type& BMRESTRICT digest,
+ unsigned carry_over) BMNOEXCEPT
{
- bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1;
+ BM_ASSERT(carry_over == 1 || carry_over == 0);
if (BM_IS_GAP(arg_blk)) // GAP argument
{
if (digest)
{
carry_over =
- bm::bit_block_shift_r1_and_unr(blk, carry_over, arg_blk,
- &digest);
+ bm::bit_block_shift_r1_and_unr(blk, carry_over, arg_blk,
+ &digest);
}
else // digest == 0
{
}
else // arg is zero - target block => zero
{
- unsigned co = blk[bm::set_block_size-1] >> 31; // carry out
+ carry_over = blk[bm::set_block_size-1] >> 31; // carry out
if (digest)
{
bm::bit_block_set(blk, 0); // TODO: digest based set
- digest ^= digest;
+ digest = 0;
}
- carry_over = co;
}
}
return carry_over;
template<typename BV>
const bm::word_t* aggregator<BV>::get_arg_block(
- const bvector_type_const_ptr* bv_src,
- unsigned k, unsigned i, unsigned j)
+ const bvector_type_const_ptr* bv_src,
+ unsigned k, unsigned i, unsigned j) BMNOEXCEPT
{
- const blocks_manager_type& bman_arg = bv_src[k]->get_blocks_manager();
- return bman_arg.get_block(i, j);
+ return bv_src[k]->get_blocks_manager().get_block(i, j);
}
// ------------------------------------------------------------------------
template<typename BV>
-bool aggregator<BV>::any_carry_overs(unsigned co_size) const
+bool aggregator<BV>::any_carry_overs(const unsigned char* carry_overs,
+ unsigned co_size) BMNOEXCEPT
{
- for (unsigned i = 0; i < co_size; ++i)
- if (ar_->carry_overs_[i])
- return true;
- return false;
+ // TODO: loop unroll?
+ unsigned acc = carry_overs[0];
+ for (unsigned i = 1; i < co_size; ++i)
+ acc |= carry_overs[i];
+// if (ar_->carry_overs_[i])
+// return true;
+// return false;
+ return acc;
}
// ------------------------------------------------------------------------
{
if (i > top_block_size_)
{
- if (!this->any_carry_overs(arg_group0_size))
+ if (!this->any_carry_overs(&ar_->carry_overs_[0], arg_group0_size))
{
operation_status_ = op_done;
return operation_status_;
\ingroup setalgo
*/
template<class BV>
-typename BV::size_type count_and(const BV& bv1, const BV& bv2)
+typename BV::size_type count_and(const BV& bv1, const BV& bv2) BMNOEXCEPT
{
return bm::distance_and_operation(bv1, bv2);
}
\ingroup setalgo
*/
template<class BV>
-typename BV::size_type any_and(const BV& bv1, const BV& bv2)
+typename BV::size_type any_and(const BV& bv1, const BV& bv2) BMNOEXCEPT
{
distance_metric_descriptor dmd(bm::COUNT_AND);
*/
template<class BV>
bm::distance_metric_descriptor::size_type
-count_xor(const BV& bv1, const BV& bv2)
+count_xor(const BV& bv1, const BV& bv2) BMNOEXCEPT
{
distance_metric_descriptor dmd(bm::COUNT_XOR);
\ingroup setalgo
*/
template<class BV>
-typename BV::size_type any_xor(const BV& bv1, const BV& bv2)
+typename BV::size_type any_xor(const BV& bv1, const BV& bv2) BMNOEXCEPT
{
distance_metric_descriptor dmd(bm::COUNT_XOR);
\ingroup setalgo
*/
template<class BV>
-typename BV::size_type count_sub(const BV& bv1, const BV& bv2)
+typename BV::size_type count_sub(const BV& bv1, const BV& bv2) BMNOEXCEPT
{
distance_metric_descriptor dmd(bm::COUNT_SUB_AB);
\ingroup setalgo
*/
template<class BV>
-typename BV::size_type any_sub(const BV& bv1, const BV& bv2)
+typename BV::size_type any_sub(const BV& bv1, const BV& bv2) BMNOEXCEPT
{
distance_metric_descriptor dmd(bm::COUNT_SUB_AB);
\ingroup setalgo
*/
template<class BV>
-typename BV::size_type count_or(const BV& bv1, const BV& bv2)
+typename BV::size_type count_or(const BV& bv1, const BV& bv2) BMNOEXCEPT
{
distance_metric_descriptor dmd(bm::COUNT_OR);
\ingroup setalgo
*/
template<class BV>
-typename BV::size_type any_or(const BV& bv1, const BV& bv2)
+typename BV::size_type any_or(const BV& bv1, const BV& bv2) BMNOEXCEPT
{
distance_metric_descriptor dmd(bm::COUNT_OR);
#define BM_SCANNER_OP(x) \
- if (0 != (block = blk_blk[j+x])) \
+if (0 != (block = blk_blk[j+x])) \
+{ \
+ if (BM_IS_GAP(block)) \
{ \
- if (BM_IS_GAP(block)) \
- { \
- bm::for_each_gap_blk(BMGAP_PTR(block), (r+j+x)*bm::bits_in_block,\
- bit_functor); \
- } \
- else \
- { \
- bm::for_each_bit_blk(block, (r+j+x)*bm::bits_in_block,bit_functor); \
- } \
- }
+ bm::for_each_gap_blk(BMGAP_PTR(block), (r+j+x)*bm::bits_in_block,\
+ bit_functor); \
+ } \
+ else \
+ { \
+ bm::for_each_bit_blk(block, (r+j+x)*bm::bits_in_block,bit_functor); \
+ } \
+}
/**
@brief bit-vector visitor scanner to traverse each 1 bit using C++ visitor
@param bv - bit vector to scan
- @param bit_functor (should support add_bits() and add_range() methods
+ @param bit_functor - visitor: should support add_bits(), add_range()
\ingroup setalgo
+ @sa for_each_bit_range visit_each_bit
*/
template<class BV, class Func>
void for_each_bit(const BV& bv,
} // for i
}
+/**
+ @brief bit-vector range visitor to traverse each 1 bit
+
+ @param bv - bit vector to scan
+ @param right - start of closed interval [from..to]
+ @param left - end of close interval [from..to]
+ @param bit_functor - visitor: should support add_bits(), add_range()
+
+ \ingroup setalgo
+ @sa for_each_bit
+*/
+template<class BV, class Func>
+void for_each_bit_range(const BV& bv,
+ typename BV::size_type left,
+ typename BV::size_type right,
+ Func& bit_functor)
+{
+ if (left > right)
+ bm::xor_swap(left, right);
+ if (right == bm::id_max)
+ --right;
+ BM_ASSERT(left < bm::id_max && right < bm::id_max);
+
+ bm::for_each_bit_range_no_check(bv, left, right, bit_functor);
+}
+
+
#undef BM_SCANNER_OP
+
+/// private adaptor for C-style callbacks
+///
+/// @internal
+///
+template <class VCBT, class size_type>
+struct bit_vitor_callback_adaptor
+{
+ typedef VCBT bit_visitor_callback_type;
+
+ bit_vitor_callback_adaptor(void* h, bit_visitor_callback_type cb_func)
+ : handle_(h), func_(cb_func)
+ {}
+
+ void add_bits(size_type offset, const unsigned char* bits, unsigned size)
+ {
+ for (unsigned i = 0; i < size; ++i)
+ func_(handle_, offset + bits[i]);
+ }
+ void add_range(size_type offset, size_type size)
+ {
+ for (size_type i = 0; i < size; ++i)
+ func_(handle_, offset + i);
+ }
+
+ void* handle_;
+ bit_visitor_callback_type func_;
+};
+
+
+/// Functor for bit-copy (for testing)
+///
+/// @internal
+///
+template <class BV>
+struct bit_vistor_copy_functor
+{
+ typedef typename BV::size_type size_type;
+
+ bit_vistor_copy_functor(BV& bv)
+ : bv_(bv)
+ {
+ bv_.init();
+ }
+
+ void add_bits(size_type offset, const unsigned char* bits, unsigned size)
+ {
+ BM_ASSERT(size);
+ for (unsigned i = 0; i < size; ++i)
+ bv_.set_bit_no_check(offset + bits[i]);
+ }
+ void add_range(size_type offset, size_type size)
+ {
+ BM_ASSERT(size);
+ bv_.set_range(offset, offset + size - 1);
+ }
+
+ BV& bv_;
+ bit_visitor_callback_type func_;
+};
+
+
+
/**
- @brief bit-vector visitor scanner to traverse each 1 bit using C callback
+ @brief bvector visitor scanner to traverse each 1 bit using C callback
@param bv - bit vector to scan
@param handle_ptr - handle to private memory used by callback
bit_visitor_callback_type callback_ptr)
{
typedef typename BV::size_type size_type;
- // private adaptor for C-style callbacks
- struct callback_adaptor
+ bm::bit_vitor_callback_adaptor<bit_visitor_callback_type, size_type>
+ func(handle_ptr, callback_ptr);
+ bm::for_each_bit(bv, func);
+}
+
+/**
+ @brief bvector visitor scanner to traverse each bits in range (C callback)
+
+ @param bv - bit vector to scan
+ @param left - from [left..right]
+ @param right - to [left..right]
+ @param handle_ptr - handle to private memory used by callback
+ @param callback_ptr - callback function
+
+ \ingroup setalgo
+
+ @sa bit_visitor_callback_type for_each_bit
+*/
+template<class BV>
+void visit_each_bit_range(const BV& bv,
+ typename BV::size_type left,
+ typename BV::size_type right,
+ void* handle_ptr,
+ bit_visitor_callback_type callback_ptr)
+{
+ typedef typename BV::size_type size_type;
+ bm::bit_vitor_callback_adaptor<bit_visitor_callback_type, size_type>
+ func(handle_ptr, callback_ptr);
+ bm::for_each_bit_range(bv, left, right, func);
+}
+
+/**
+ @brief Algorithm to identify bit-vector ranges (splits) for the rank
+
+ Rank range split algorithm walks the bit-vector to create list of
+ non-overlapping ranges [s1..e1],[s2..e2]...[sN...eN] with requested
+ (rank) number of 1 bits. All ranges should be the same popcount weight,
+ except the last one, which may have less.
+ Scan is progressing from left to right so result ranges will be
+ naturally sorted.
+
+ @param bv - bit vector to perform the range split scan
+ @param rank - requested number of bits in each range
+ if 0 it will create single range [first..last]
+ to cover the whole bv
+ @param target_v - [out] STL(or STL-like) vector of pairs to keep pairs results
+
+ \ingroup setalgo
+ */
+template<typename BV, typename PairVect>
+void rank_range_split(const BV& bv,
+ typename BV::size_type rank,
+ PairVect& target_v)
+{
+ target_v.resize(0);
+ typename BV::size_type first, last, pos;
+ bool found = bv.find_range(first, last);
+ if (!found) // empty bit-vector
+ return;
+
+ if (!rank) // if rank is not defined, include the whole vector [first..last]
{
- callback_adaptor(void* h, bit_visitor_callback_type cb_func)
- : handle_(h), func_(cb_func)
- {}
-
- void add_bits(size_type offset, const unsigned char* bits, unsigned size)
+ typename PairVect::value_type pv;
+ pv.first = first; pv.second = last;
+ target_v.push_back(pv);
+ return;
+ }
+
+ while (1)
+ {
+ typename PairVect::value_type pv;
+ found = bv.find_rank(rank, first, pos);
+ if (found)
{
- for (unsigned i = 0; i < size; ++i)
- func_(handle_, offset + bits[i]);
+ pv.first = first; pv.second = pos;
+ target_v.push_back(pv);
+ if (pos >= last)
+ break;
+ first = pos + 1;
+ continue;
}
- void add_range(size_type offset, unsigned size)
+ // insufficient rank (last range)
+ found = bv.any_range(first, last);
+ if (found)
{
- for (unsigned i = 0; i < size; ++i)
- func_(handle_, offset + i);
+ pv.first = first; pv.second = last;
+ target_v.push_back(pv);
}
-
- void* handle_;
- bit_visitor_callback_type func_;
- };
-
- callback_adaptor func(handle_ptr, callback_ptr);
- bm::for_each_bit(bv, func);
+ break;
+ } // while
+
}
+
/**
Algorithms for rank compression of bit-vector
+
} // bm
#include "bmundef.h"
\ingroup distance
*/
inline
-distance_metric operation2metric(set_operation op)
+distance_metric operation2metric(set_operation op) BMNOEXCEPT
{
BM_ASSERT(is_const_set_operation(op));
if (op == set_COUNT) op = set_COUNT_B;
distance_metric metric;
size_type result;
- distance_metric_descriptor(distance_metric m)
+ distance_metric_descriptor(distance_metric m) BMNOEXCEPT
: metric(m),
result(0)
{}
- distance_metric_descriptor()
+ distance_metric_descriptor() BMNOEXCEPT
: metric(bm::COUNT_XOR),
result(0)
{}
/*!
\brief Sets metric result to 0
*/
- void reset()
+ void reset() BMNOEXCEPT
{
result = 0;
}
void combine_count_operation_with_block(const bm::word_t* blk,
const bm::word_t* arg_blk,
distance_metric_descriptor* dmit,
- distance_metric_descriptor* dmit_end)
+ distance_metric_descriptor* dmit_end) BMNOEXCEPT
{
gap_word_t* g1 = BMGAP_PTR(blk);
*/
inline
unsigned combine_count_and_operation_with_block(const bm::word_t* blk,
- const bm::word_t* arg_blk)
+ const bm::word_t* arg_blk) BMNOEXCEPT
{
unsigned gap = BM_IS_GAP(blk);
unsigned arg_gap = BM_IS_GAP(arg_blk);
const bm::word_t* arg_blk,
unsigned arg_gap,
distance_metric_descriptor* dmit,
- distance_metric_descriptor* dmit_end)
+ distance_metric_descriptor* dmit_end) BMNOEXCEPT
{
gap_word_t* res=0;
unsigned
combine_count_operation_with_block(const bm::word_t* blk,
const bm::word_t* arg_blk,
- distance_metric metric)
+ distance_metric metric) BMNOEXCEPT
{
distance_metric_descriptor dmd(metric);
combine_count_operation_with_block(blk, //gap,
unsigned gap,
const bm::word_t* arg_blk,
unsigned arg_gap,
- distance_metric metric)
+ distance_metric metric) BMNOEXCEPT
{
distance_metric_descriptor dmd(metric);
combine_any_operation_with_block(blk, gap,
inline
void distance_stage(const distance_metric_descriptor* dmit,
const distance_metric_descriptor* dmit_end,
- bool* is_all_and)
+ bool* is_all_and) BMNOEXCEPT
{
for (const distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
{
void distance_operation(const BV& bv1,
const BV& bv2,
distance_metric_descriptor* dmit,
- distance_metric_descriptor* dmit_end)
+ distance_metric_descriptor* dmit_end) BMNOEXCEPT
{
const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
*/
template<class BV>
typename BV::size_type distance_and_operation(const BV& bv1,
- const BV& bv2)
+ const BV& bv2) BMNOEXCEPT
{
const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
void distance_operation_any(const BV& bv1,
const BV& bv2,
distance_metric_descriptor* dmit,
- distance_metric_descriptor* dmit_end)
+ distance_metric_descriptor* dmit_end) BMNOEXCEPT
{
const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
\internal
*/
template<typename It, typename SIZE_TYPE>
-It block_range_scan(It first, It last, SIZE_TYPE nblock, SIZE_TYPE* max_id)
+It block_range_scan(It first, It last,
+ SIZE_TYPE nblock, SIZE_TYPE* max_id) BMNOEXCEPT
{
SIZE_TYPE m = *max_id;
It right;
typename BV::blocks_manager_type::block_idx_type st = 0;
bm::for_each_block(blk_root, bman.top_block_size(), func, st);
- return func.count();
+ typename BV::size_type intervals = func.count();
+ bool last_bit_set = bv.test(bm::id_max-1);
+
+ intervals -= last_bit_set; // correct last (out of range) interval
+ return intervals;
}
/*!
/*!
- \brief for-each visitor, calls a special visitor functor for each 1 bit group
+ \brief for-each visitor, calls a visitor functor for each 1 bit group
\param block - bit block buffer pointer
\param offset - global block offset (number of bits)
void for_each_bit_blk(const bm::word_t* block, SIZE_TYPE offset,
Func& bit_functor)
{
+ BM_ASSERT(block);
if (IS_FULL_BLOCK(block))
{
bit_functor.add_range(offset, bm::gap_max_bits);
} while (block < block_end);
}
+/*!
+ \brief for-each range visitor, calls a visitor functor for each 1 bit group
+
+ \param block - bit block buffer pointer
+ \param offset - global block offset (number of bits)
+ \param left - bit addredd in block from [from..to]
+ \param right - bit addredd in block to [from..to]
+ \param bit_functor - functor must support .add_bits(offset, bits_ptr, size)
+
+ @ingroup bitfunc
+ @internal
+*/
+template<typename Func, typename SIZE_TYPE>
+void for_each_bit_blk(const bm::word_t* block, SIZE_TYPE offset,
+ unsigned left, unsigned right,
+ Func& bit_functor)
+{
+ BM_ASSERT(block);
+ BM_ASSERT(left <= right);
+ BM_ASSERT(right < bm::bits_in_block);
+
+ if (IS_FULL_BLOCK(block))
+ {
+ unsigned sz = right - left + 1;
+ bit_functor.add_range(offset + left, sz);
+ return;
+ }
+ unsigned char bits[bm::set_bitscan_wave_size*32];
+
+ unsigned cnt, nword, nbit, bitcount, temp;
+ nbit = left & bm::set_word_mask;
+ const bm::word_t* word =
+ block + (nword = unsigned(left >> bm::set_word_shift));
+ if (left == right) // special case (only 1 bit to check)
+ {
+ if ((*word >> nbit) & 1u)
+ {
+ bits[0] = (unsigned char)nbit;
+ bit_functor.add_bits(offset + (nword * 32), bits, 1);
+ }
+ return;
+ }
+
+ bitcount = right - left + 1u;
+ if (nbit) // starting position is not aligned
+ {
+ unsigned right_margin = nbit + right - left;
+ if (right_margin < 32)
+ {
+ unsigned mask =
+ block_set_table<true>::_right[nbit] &
+ block_set_table<true>::_left[right_margin];
+ temp = (*word & mask);
+ cnt = bm::bitscan_popcnt(temp, bits);
+ if (cnt)
+ bit_functor.add_bits(offset + (nword * 32), bits, cnt);
+
+ return;
+ }
+ temp = *word & block_set_table<true>::_right[nbit];
+ cnt = bm::bitscan_popcnt(temp, bits);
+ if (cnt)
+ bit_functor.add_bits(offset + (nword * 32), bits, cnt);
+ bitcount -= 32 - nbit;
+ ++word; ++nword;
+ }
+ else
+ {
+ bitcount = right - left + 1u;
+ }
+ BM_ASSERT(bm::set_bitscan_wave_size == 4);
+ // now when we are word aligned, we can scan the bit-stream
+ // loop unrolled to evaluate 4 words at a time
+ for ( ;bitcount >= 128;
+ bitcount-=128, word+=bm::set_bitscan_wave_size,
+ nword += bm::set_bitscan_wave_size)
+ {
+ cnt = bm::bitscan_wave(word, bits);
+ if (cnt)
+ bit_functor.add_bits(offset + (nword * 32), bits, cnt);
+ } // for
+
+ for ( ;bitcount >= 32; bitcount-=32, ++word)
+ {
+ temp = *word;
+ cnt = bm::bitscan_popcnt(temp, bits);
+ if (cnt)
+ bit_functor.add_bits(offset + (nword * 32), bits, cnt);
+ ++nword;
+ } // for
+
+ BM_ASSERT(bitcount < 32);
+
+ if (bitcount) // we have a tail to count
+ {
+ temp = *word & block_set_table<true>::_left[bitcount-1];
+ cnt = bm::bitscan_popcnt(temp, bits);
+ if (cnt)
+ bit_functor.add_bits(offset + (nword * 32), bits, cnt);
+ }
+
+}
+
+
/*!
\brief for-each visitor, calls a special visitor functor for each 1 bit range
}
}
+/*!
+ \brief for-each visitor, calls a special visitor functor for each 1 bit range
+
+ \param buf - bit block buffer pointer
+ \param offset - global block offset (number of bits)
+ \param left - interval start [left..right]
+ \param right - intreval end [left..right]
+ \param bit_functor - functor must support .add_range(offset, bits_ptr, size)
+
+ @ingroup gapfunc
+ @internal
+*/
+template<typename T, typename Func, typename SIZE_TYPE>
+void for_each_gap_blk_range(const T* BMRESTRICT buf,
+ SIZE_TYPE offset,
+ unsigned left, unsigned right,
+ Func& bit_functor)
+{
+ BM_ASSERT(left <= right);
+ BM_ASSERT(right < bm::bits_in_block);
+
+ unsigned is_set;
+ unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
+ const T* BMRESTRICT pcurr = buf + start_pos;
+
+ if (is_set)
+ {
+ if (right <= *pcurr)
+ {
+ bit_functor.add_range(offset + left, (right + 1)-left);
+ return;
+ }
+ bit_functor.add_range(offset + left, (*pcurr + 1)-left);
+ ++pcurr;
+ }
+
+ const T* BMRESTRICT pend = buf + (*buf >> 3);
+ for (++pcurr; pcurr <= pend; pcurr += 2)
+ {
+ T prev = *(pcurr-1);
+ if (right <= *pcurr)
+ {
+ int sz = int(right) - int(prev);
+ if (sz > 0)
+ bit_functor.add_range(offset + prev + 1, unsigned(sz));
+ return;
+ }
+ bit_functor.add_range(offset + prev + 1, *pcurr - prev);
+ } // for
+}
+
+
+
+/*! For each non-zero block in [from, to] executes supplied functor
+ \internal
+*/
+template<typename T, typename N, typename F>
+void for_each_bit_block_range(T*** root,
+ N top_size, N nb_from, N nb_to, F& f)
+{
+ BM_ASSERT(top_size);
+ if (nb_from > nb_to)
+ return;
+ unsigned i_from = unsigned(nb_from >> bm::set_array_shift);
+ unsigned j_from = unsigned(nb_from & bm::set_array_mask);
+ unsigned i_to = unsigned(nb_to >> bm::set_array_shift);
+ unsigned j_to = unsigned(nb_to & bm::set_array_mask);
+
+ if (i_from >= top_size)
+ return;
+ if (i_to >= top_size)
+ {
+ i_to = unsigned(top_size-1);
+ j_to = bm::set_sub_array_size-1;
+ }
+
+ for (unsigned i = i_from; i <= i_to; ++i)
+ {
+ T** blk_blk = root[i];
+ if (!blk_blk)
+ continue;
+ if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+ {
+ unsigned j = (i == i_from) ? j_from : 0;
+ if (!j && (i != i_to)) // full sub-block
+ {
+ N base_idx = bm::get_super_block_start<N>(i);
+ f.add_range(base_idx, bm::set_sub_total_bits);
+ }
+ else
+ {
+ do
+ {
+ N base_idx = bm::get_block_start<N>(i, j);
+ f.add_range(base_idx, bm::gap_max_bits);
+ if ((i == i_to) && (j == j_to))
+ return;
+ } while (++j < bm::set_sub_array_size);
+ }
+ }
+ else
+ {
+ unsigned j = (i == i_from) ? j_from : 0;
+ do
+ {
+ const T* block;
+ if (blk_blk[j])
+ {
+ N base_idx = bm::get_block_start<N>(i, j);
+ if (0 != (block = blk_blk[j]))
+ {
+ if (BM_IS_GAP(block))
+ {
+ bm::for_each_gap_blk(BMGAP_PTR(block), base_idx, f);
+ }
+ else
+ {
+ bm::for_each_bit_blk(block, base_idx, f);
+ }
+ }
+ }
+
+ if ((i == i_to) && (j == j_to))
+ return;
+ } while (++j < bm::set_sub_array_size);
+ }
+ } // for i
+}
+
+
+/**
+ Implementation of for_each_bit_range without boilerplave checks
+ @internal
+*/
+template<class BV, class Func>
+void for_each_bit_range_no_check(const BV& bv,
+ typename BV::size_type left,
+ typename BV::size_type right,
+ Func& bit_functor)
+{
+ typedef typename BV::size_type size_type;
+ typedef typename BV::block_idx_type block_idx_type;
+
+ const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
+ bm::word_t*** blk_root = bman.top_blocks_root();
+ if (!blk_root)
+ return;
+
+ block_idx_type nblock_left = (left >> bm::set_block_shift);
+ block_idx_type nblock_right = (right >> bm::set_block_shift);
+
+ unsigned i0, j0;
+ bm::get_block_coord(nblock_left, i0, j0);
+ const bm::word_t* block = bman.get_block_ptr(i0, j0);
+ unsigned nbit_left = unsigned(left & bm::set_block_mask);
+ size_type offset = nblock_left * bm::bits_in_block;
+
+ if (nblock_left == nblock_right) // hit in the same block
+ {
+ if (!block)
+ return;
+ unsigned nbit_right = unsigned(right & bm::set_block_mask);
+ if (BM_IS_GAP(block))
+ {
+ bm::for_each_gap_blk_range(BMGAP_PTR(block), offset,
+ nbit_left, nbit_right, bit_functor);
+ }
+ else
+ {
+ bm::for_each_bit_blk(block, offset, nbit_left, nbit_right,
+ bit_functor);
+ }
+ return;
+ }
+ // process left block
+ if (nbit_left && block)
+ {
+ if (BM_IS_GAP(block))
+ {
+ bm::for_each_gap_blk_range(BMGAP_PTR(block), offset,
+ nbit_left, bm::bits_in_block-1, bit_functor);
+ }
+ else
+ {
+ bm::for_each_bit_blk(block, offset, nbit_left, bm::bits_in_block-1,
+ bit_functor);
+ }
+ ++nblock_left;
+ }
+
+ // process all complete blocks in the middle
+ {
+ block_idx_type top_blocks_size = bman.top_block_size();
+ bm::for_each_bit_block_range(blk_root, top_blocks_size,
+ nblock_left, nblock_right-1, bit_functor);
+ }
+
+ unsigned nbit_right = unsigned(right & bm::set_block_mask);
+ bm::get_block_coord(nblock_right, i0, j0);
+ block = bman.get_block_ptr(i0, j0);
+
+ if (block)
+ {
+ offset = nblock_right * bm::bits_in_block;
+ if (BM_IS_GAP(block))
+ {
+ bm::for_each_gap_blk_range(BMGAP_PTR(block), offset,
+ 0, nbit_right, bit_functor);
+ }
+ else
+ {
+ bm::for_each_bit_blk(block, offset, 0, nbit_right, bit_functor);
+ }
+ }
+}
+
+
} // namespace bm
ptr = (bm::word_t*) ::_mm_malloc(n * sizeof(bm::word_t), BM_ALLOC_ALIGN);
#endif
#else
- ptr = (bm::word_t*) ::malloc(n * sizeof(bm::word_t));
+ ptr = (bm::word_t*) ::malloc(n * sizeof(bm::word_t));
#endif
-
if (!ptr)
- {
throw std::bad_alloc();
- }
return ptr;
}
The member function frees storage for an array of n bm::word_t
elements, by calling free.
*/
- static void deallocate(bm::word_t* p, size_t)
+ static void deallocate(bm::word_t* p, size_t) BMNOEXCEPT
{
#ifdef BM_ALLOC_ALIGN
# ifdef _MSC_VER
{
void* ptr = ::malloc(n * sizeof(void*));
if (!ptr)
- {
throw std::bad_alloc();
- }
return ptr;
}
The member function frees storage for an array of n bm::word_t
elements, by calling free.
*/
- static void deallocate(void* p, size_t)
+ static void deallocate(void* p, size_t) BMNOEXCEPT
{
::free(p);
}
n_pool_max_size = BM_DEFAULT_POOL_SIZE
};
- pointer_pool_array() : size_(0)
+ pointer_pool_array() : pool_ptr_(0), size_(0)
{
allocate_pool(n_pool_max_size);
}
/// Push pointer to the pool (if it is not full)
///
/// @return 0 if pointer is not accepted (pool is full)
- unsigned push(void* ptr)
+ unsigned push(void* ptr) BMNOEXCEPT
{
if (size_ == n_pool_max_size - 1)
return 0;
/// Get a pointer if there are any vacant
///
- void* pop()
+ void* pop() BMNOEXCEPT
{
- if (size_ == 0)
+ if (!size_)
return 0;
return pool_ptr_[--size_];
}
private:
void allocate_pool(size_t pool_size)
{
+ BM_ASSERT(!pool_ptr_);
pool_ptr_ = (void**)::malloc(sizeof(void*) * pool_size);
if (!pool_ptr_)
throw std::bad_alloc();
}
- void free_pool()
+ void free_pool() BMNOEXCEPT
{
::free(pool_ptr_);
}
bm::word_t* alloc_bit_block()
{
bm::word_t* ptr = (bm::word_t*)block_pool_.pop();
- if (ptr == 0)
+ if (!ptr)
ptr = block_alloc_.allocate(bm::set_block_size, 0);
return ptr;
}
- void free_bit_block(bm::word_t* block)
+ void free_bit_block(bm::word_t* block) BMNOEXCEPT
{
BM_ASSERT(IS_VALID_ADDR(block));
if (!block_pool_.push(block))
- {
block_alloc_.deallocate(block, bm::set_block_size);
- }
}
- void free_pools()
+ void free_pools() BMNOEXCEPT
{
bm::word_t* block;
do
public:
- mem_alloc(const BA& block_alloc = BA(), const PA& ptr_alloc = PA())
+ mem_alloc(const BA& block_alloc = BA(), const PA& ptr_alloc = PA()) BMNOEXCEPT
: block_alloc_(block_alloc),
ptr_alloc_(ptr_alloc),
alloc_pool_p_(0)
{}
- mem_alloc(const mem_alloc& ma)
+ mem_alloc(const mem_alloc& ma) BMNOEXCEPT
: block_alloc_(ma.block_alloc_),
ptr_alloc_(ma.ptr_alloc_),
alloc_pool_p_(0) // do not inherit pool (has to be explicitly defined)
{}
- mem_alloc& operator=(const mem_alloc& ma)
+ mem_alloc& operator=(const mem_alloc& ma) BMNOEXCEPT
{
block_alloc_ = ma.block_alloc_;
ptr_alloc_ = ma.ptr_alloc_;
/*! @brief Returns copy of the block allocator object
*/
- block_allocator_type get_block_allocator() const
+ block_allocator_type get_block_allocator() const BMNOEXCEPT
{
return BA(block_alloc_);
}
/*! @brief Returns copy of the ptr allocator object
*/
- ptr_allocator_type get_ptr_allocator() const
+ ptr_allocator_type get_ptr_allocator() const BMNOEXCEPT
{
return PA(block_alloc_);
}
/*! @brief set pointer to external pool */
- void set_pool(allocator_pool_type* pool)
+ void set_pool(allocator_pool_type* pool) BMNOEXCEPT
{
alloc_pool_p_ = pool;
}
/*! @brief get pointer to allocation pool (if set) */
- allocator_pool_type* get_pool()
+ allocator_pool_type* get_pool() BMNOEXCEPT
{
return alloc_pool_p_;
}
/*! @brief Frees bit block allocated by alloc_bit_block.
*/
- void free_bit_block(bm::word_t* block, unsigned alloc_factor = 1)
+ void free_bit_block(bm::word_t* block, unsigned alloc_factor = 1) BMNOEXCEPT
{
BM_ASSERT(IS_VALID_ADDR(block));
if (alloc_pool_p_ && alloc_factor == 1)
/*! @brief Frees block of pointers.
*/
- void free_ptr(void* p, size_t size)
+ void free_ptr(void* p, size_t size) BMNOEXCEPT
{
if (p)
ptr_alloc_.deallocate(p, size);
///
/// @internal
inline
-void aligned_free(void* ptr)
+void aligned_free(void* ptr) BMNOEXCEPT
{
if (!ptr)
return;
return (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
}
+/*!
+ @brief Calculate population count based on digest
+
+ @return popcnt
+ @ingroup AVX2
+*/
+inline
+bm::id_t avx2_bit_block_count(const bm::word_t* const block,
+ bm::id64_t digest)
+{
+ bm::id_t count = 0;
+ bm::id64_t* cnt64;
+ BM_AVX2_POPCNT_PROLOG;
+ __m256i cnt = _mm256_setzero_si256();
+ while (digest)
+ {
+ bm::id64_t t = bm::bmi_blsi_u64(digest); // d & -d;
+
+ unsigned wave = _mm_popcnt_u64(t - 1);
+ unsigned off = wave * bm::set_block_digest_wave_size;
+
+ const __m256i* BMRESTRICT wave_src = (__m256i*)&block[off];
+
+ __m256i m1A, m1B, m1C, m1D;
+ m1A = _mm256_load_si256(wave_src);
+ m1B = _mm256_load_si256(wave_src+1);
+ if (!_mm256_testz_si256(m1A, m1A))
+ {
+ BM_AVX2_BIT_COUNT(bc, m1A)
+ cnt = _mm256_add_epi64(cnt, bc);
+ }
+ if (!_mm256_testz_si256(m1B, m1B))
+ {
+ BM_AVX2_BIT_COUNT(bc, m1B)
+ cnt = _mm256_add_epi64(cnt, bc);
+ }
+
+ m1C = _mm256_load_si256(wave_src+2);
+ m1D = _mm256_load_si256(wave_src+3);
+ if (!_mm256_testz_si256(m1C, m1C))
+ {
+ BM_AVX2_BIT_COUNT(bc, m1C)
+ cnt = _mm256_add_epi64(cnt, bc);
+ }
+ if (!_mm256_testz_si256(m1D, m1D))
+ {
+ BM_AVX2_BIT_COUNT(bc, m1D)
+ cnt = _mm256_add_epi64(cnt, bc);
+ }
+
+ digest = bm::bmi_bslr_u64(digest); // d &= d - 1;
+ } // while
+ cnt64 = (bm::id64_t*)&cnt;
+ count = (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
+ return count;
+
+}
+
+
+
/*!
@brief AND bit count for two aligned bit-blocks
@ingroup AVX2
return true;
}
+/*!
+ @brief check if wave of pointers is all 0xFFF
+ @ingroup AVX2
+*/
+BMFORCEINLINE
+bool avx2_test_all_one_wave(const void* ptr)
+{
+ __m256i maskF = _mm256_set1_epi32(~0u); // braodcast 0xFF
+ __m256i wcmpA = _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)ptr), maskF); // (w0 == maskF)
+ unsigned maskA = unsigned(_mm256_movemask_epi8(wcmpA));
+ return (maskA == ~0u);
+}
+
+
/*!
@brief check if wave of pointers is all NULL
@ingroup AVX2
}
/**
- hybrid binary search, starts as binary, then switches to scan
-
+ Hybrid binary search, starts as binary, then switches to scan
+
NOTE: AVX code uses _mm256_subs_epu16 - saturated substraction
which gives 0 if A-B=0 if A < B (not negative a value).
-
+
+ \param buf - GAP buffer pointer.
+ \param pos - index of the element.
+ \param is_set - output. GAP value (0 or 1).
+ \return GAP index.
+
@ingroup AVX2
*/
inline
-unsigned avx2_gap_test(const unsigned short* buf, unsigned pos)
+unsigned avx2_gap_bfind(const unsigned short* BMRESTRICT buf,
+ unsigned pos, unsigned* BMRESTRICT is_set)
{
+ BM_ASSERT(is_set);
+
const unsigned linear_cutoff = 48;
const unsigned unroll_factor = 16;
{
if (buf[start] >= pos)
{
- res = ((*buf) & 1) ^ ((--start) & 1);
- return res;
+ res = ((*buf) & 1) ^ ((start-1) & 1);
+ *is_set = res;
+ return start;
}
} // for
BM_ASSERT(0);
// but stay within allocated block memory
//
dsize = arr_end - start;
-
+
__m256i mZ = _mm256_setzero_si256();
__m256i mPos = _mm256_set1_epi16((unsigned short)pos);
__m256i vect16, mSub, mge_mask;
{
int lz = _tzcnt_u32(mask) / 2;
start += lz;
- res = ((*buf) & 1) ^ ((--start) & 1);
- return res;
+ res = ((*buf) & 1) ^ ((start-1) & 1);
+ *is_set = res;
+ return start;
}
} // for k
unsigned tail = unroll_factor - (end - start);
mSub = _mm256_subs_epu16(mPos, vect16);
mge_mask = _mm256_cmpeq_epi16(mSub, mZ);
int mask = _mm256_movemask_epi8(mge_mask);
- BM_ASSERT(mask);
- // TODO: if should be not needed, cleanup
- if (mask)
- {
- int lz = _tzcnt_u32(mask) / 2;
- start += lz;
- res = ((*buf) & 1) ^ ((--start) & 1);
- return res;
- }
- start += unroll_factor; // remove with if when sure
+ BM_ASSERT(mask); // the rersult MUST be here at this point
+
+ int lz = _tzcnt_u32(mask) / 2;
+ start += lz;
+ res = ((*buf) & 1) ^ ((start-1) & 1);
+ *is_set = res;
+ return start;
}
for (; start < end; ++start)
{
if (buf[start] >= pos)
break;
- }
+ } // for
break;
}
unsigned curr = (start + end) >> 1;
else
end = curr;
} // while
- res = ((*buf) & 1) ^ ((--start) & 1);
- return res;
+ res = ((*buf) & 1) ^ ((start-1) & 1);
+ *is_set = res;
+ return start;
+}
+
+
+/**
+ Hybrid binary search, starts as binary, then switches to scan
+ @ingroup AVX2
+*/
+inline
+unsigned avx2_gap_test(const unsigned short* BMRESTRICT buf, unsigned pos)
+{
+ unsigned is_set;
+ bm::avx2_gap_bfind(buf, pos, &is_set);
+ return is_set;
}
/**
#define VECT_BIT_BLOCK_XOR(t, src, src_xor, d) \
avx2_bit_block_xor(t, src, src_xor, d)
+#define VECT_GAP_BFIND(buf, pos, is_set) \
+ avx2_gap_bfind(buf, pos, is_set)
+
+#define VECT_BIT_COUNT_DIGEST(blk, d) \
+ avx2_bit_block_count(blk, d)
+
+
} // namespace
public:
typedef id_type size_type;
- bm_func_base(blocks_manager& bman) : bm_(bman) {}
+ bm_func_base(blocks_manager& bman) BMNOEXCEPT : bm_(bman) {}
- void on_empty_top(unsigned /* top_block_idx*/ ) {}
- void on_empty_block(block_idx_type /* block_idx*/ ) {}
+ void on_empty_top(unsigned /* top_block_idx*/ ) BMNOEXCEPT {}
+ void on_empty_block(block_idx_type /* block_idx*/ )BMNOEXCEPT {}
private:
bm_func_base(const bm_func_base&);
bm_func_base& operator=(const bm_func_base&);
{
public:
typedef id_type size_type;
- bm_func_base_const(const blocks_manager& bman) : bm_(bman) {}
+ bm_func_base_const(const blocks_manager& bman) BMNOEXCEPT : bm_(bman) {}
- void on_empty_top(unsigned /* top_block_idx*/ ) {}
- void on_empty_block(block_idx_type /* block_idx*/ ) {}
+ void on_empty_top(unsigned /* top_block_idx*/ ) BMNOEXCEPT {}
+ void on_empty_block(block_idx_type /* block_idx*/ ) BMNOEXCEPT {}
private:
- bm_func_base_const(const bm_func_base_const&);
- bm_func_base_const& operator=(const bm_func_base_const&);
+ bm_func_base_const(const bm_func_base_const&) BMNOEXCEPT;
+ bm_func_base_const& operator=(const bm_func_base_const&) BMNOEXCEPT;
protected:
const blocks_manager& bm_;
};
class block_count_base : public bm_func_base_const
{
protected:
- block_count_base(const blocks_manager& bm)
+ block_count_base(const blocks_manager& bm) BMNOEXCEPT
: bm_func_base_const(bm) {}
- bm::id_t block_count(const bm::word_t* block) const
+ bm::id_t block_count(const bm::word_t* block) const BMNOEXCEPT
{
return this->bm_.block_bitcount(block);
}
public:
typedef id_type size_type;
- block_count_func(const blocks_manager& bm)
+ block_count_func(const blocks_manager& bm) BMNOEXCEPT
: block_count_base(bm), count_(0) {}
- id_type count() const { return count_; }
+ id_type count() const BMNOEXCEPT { return count_; }
- void operator()(const bm::word_t* block)
+ void operator()(const bm::word_t* block) BMNOEXCEPT
{
count_ += this->block_count(block);
}
- void add_full(id_type c) { count_ += c; }
- void reset() { count_ = 0; }
+ void add_full(id_type c) BMNOEXCEPT { count_ += c; }
+ void reset() BMNOEXCEPT { count_ = 0; }
private:
id_type count_;
public:
typedef id_type size_type;
- block_count_arr_func(const blocks_manager& bm, unsigned* arr)
+ block_count_arr_func(const blocks_manager& bm, unsigned* arr) BMNOEXCEPT
: block_count_base(bm), arr_(arr), last_idx_(0)
{
arr_[0] = 0;
}
- void operator()(const bm::word_t* block, id_type idx)
+ void operator()(const bm::word_t* block, id_type idx) BMNOEXCEPT
{
while (++last_idx_ < idx)
- {
arr_[last_idx_] = 0;
- }
arr_[idx] = this->block_count(block);
last_idx_ = idx;
}
- id_type last_block() const { return last_idx_; }
- void on_non_empty_top(unsigned) {}
+ id_type last_block() const BMNOEXCEPT { return last_idx_; }
+ void on_non_empty_top(unsigned) BMNOEXCEPT {}
private:
unsigned* arr_;
public:
typedef id_type size_type;
- block_count_change_func(const blocks_manager& bm)
+ block_count_change_func(const blocks_manager& bm) BMNOEXCEPT
: bm_func_base_const(bm),
count_(0),
prev_block_border_bit_(0)
{}
- block_idx_type block_count(const bm::word_t* block, block_idx_type idx)
+ block_idx_type block_count(const bm::word_t* block,
+ block_idx_type idx) BMNOEXCEPT
{
block_idx_type cnt = 0;
id_type first_bit;
if (BM_IS_GAP(block))
{
gap_word_t* gap_block = BMGAP_PTR(block);
- cnt = gap_length(gap_block) - 1;
+ cnt = bm::gap_length(gap_block) - 1;
if (idx)
{
first_bit = bm::gap_test_unr(gap_block, 0);
return cnt;
}
- id_type count() const { return count_; }
+ id_type count() const BMNOEXCEPT { return count_; }
- void operator()(const bm::word_t* block, block_idx_type idx)
+ void operator()(const bm::word_t* block, block_idx_type idx) BMNOEXCEPT
{
count_ += block_count(block, idx);
}
public:
typedef id_type size_type;
- block_any_func(const blocks_manager& bm)
+ block_any_func(const blocks_manager& bm) BMNOEXCEPT
: bm_func_base_const(bm)
{}
- bool operator()(const bm::word_t* block, block_idx_type /*idx*/)
+ bool operator()
+ (const bm::word_t* block, block_idx_type /*idx*/) BMNOEXCEPT
{
if (BM_IS_GAP(block)) // gap block
return (!gap_is_all_zero(BMGAP_PTR(block)));
class gap_level_func : public bm_func_base
{
public:
- gap_level_func(blocks_manager& bm, const gap_word_t* glevel_len)
- : bm_func_base(bm),
- glevel_len_(glevel_len)
+ gap_level_func(blocks_manager& bm,
+ const gap_word_t* glevel_len) BMNOEXCEPT
+ : bm_func_base(bm), glevel_len_(glevel_len)
{
BM_ASSERT(glevel_len);
}
return;
}
- unsigned len = gap_length(gap_blk);
- int level = gap_calc_level(len, glevel_len_);
+ unsigned len = bm::gap_length(gap_blk);
+ int level = bm::gap_calc_level(len, glevel_len_);
if (level == -1)
{
- bm::word_t* blk =
- bman.get_allocator().alloc_bit_block();
+ bm::word_t* blk = bman.get_allocator().alloc_bit_block();
bman.set_block_ptr(idx, blk);
bm::gap_convert_to_bitset(blk, gap_blk);
}
else
{
gap_word_t* gap_blk_new =
- bman.allocate_gap_block(unsigned(level), gap_blk, glevel_len_);
+ bman.allocate_gap_block(unsigned(level), gap_blk, glevel_len_);
bm::word_t* p = (bm::word_t*) gap_blk_new;
BMSET_PTRGAP(p);
class block_one_func : public bm_func_base
{
public:
- block_one_func(blocks_manager& bm) : bm_func_base(bm) {}
+ block_one_func(blocks_manager& bm) BMNOEXCEPT : bm_func_base(bm) {}
void operator()(bm::word_t* block, block_idx_type idx)
{
}
#ifndef BM_NO_CXX11
- blocks_manager(blocks_manager&& blockman) BMNOEXEPT
+ blocks_manager(blocks_manager&& blockman) BMNOEXCEPT
: max_bits_(blockman.max_bits_),
top_blocks_(0),
top_block_size_(blockman.top_block_size_),
}
#endif
- ~blocks_manager() BMNOEXEPT
+ ~blocks_manager() BMNOEXCEPT
{
if (temp_block_)
alloc_.free_bit_block(temp_block_);
/*! \brief Swaps content
\param bm another blocks manager
*/
- void swap(blocks_manager& bm) BMNOEXEPT
+ void swap(blocks_manager& bm) BMNOEXCEPT
{
BM_ASSERT(this != &bm);
/*! \brief implementation of moving semantics
*/
- void move_from(blocks_manager& bm) BMNOEXEPT
+ void move_from(blocks_manager& bm) BMNOEXCEPT
{
deinit_tree();
swap(bm);
}
- void free_ptr(bm::word_t** ptr)
+ void free_ptr(bm::word_t** ptr) BMNOEXCEPT
{
- if (ptr) alloc_.free_ptr(ptr);
+ alloc_.free_ptr(ptr);
}
/**
\param bits_to_store - supposed capacity (number of bits)
\return size of the top level block
*/
- unsigned compute_top_block_size(id_type bits_to_store)
+ unsigned compute_top_block_size(id_type bits_to_store) const BMNOEXCEPT
{
if (bits_to_store >= bm::id_max) // working in full-range mode
return bm::set_top_array_size;
\param no_more_blocks - 1 if there are no more blocks at all
\return block adress or NULL if not yet allocated
*/
- bm::word_t* get_block(block_idx_type nb, int* no_more_blocks) const
+ const bm::word_t*
+ get_block(block_idx_type nb, int* no_more_blocks) const BMNOEXCEPT
{
BM_ASSERT(top_blocks_);
unsigned i = unsigned(nb >> bm::set_array_shift);
@return bm::set_total_blocks - no more blocks
*/
block_idx_type
- find_next_nz_block(block_idx_type nb, bool deep_scan = true) const
+ find_next_nz_block(block_idx_type nb, bool deep_scan=true) const BMNOEXCEPT
{
if (is_init())
{
\param j - second level block index
\return block adress or NULL if not yet allocated
*/
- const bm::word_t* get_block(unsigned i, unsigned j) const
+ const bm::word_t* get_block(unsigned i, unsigned j) const BMNOEXCEPT
{
if (!top_blocks_ || i >= top_block_size_) return 0;
const bm::word_t* const* blk_blk = top_blocks_[i];
\param j - second level block index
\return block adress or NULL if not yet allocated
*/
- const bm::word_t* get_block_ptr(unsigned i, unsigned j) const
+ const bm::word_t* get_block_ptr(unsigned i, unsigned j) const BMNOEXCEPT
{
if (!top_blocks_ || i >= top_block_size_) return 0;
\param j - second level block index
\return block adress or NULL if not yet allocated
*/
- bm::word_t* get_block_ptr(unsigned i, unsigned j)
+ bm::word_t* get_block_ptr(unsigned i, unsigned j) BMNOEXCEPT
{
- if (!top_blocks_ || i >= top_block_size_) return 0;
+ if (!top_blocks_ || i >= top_block_size_)
+ return 0;
bm::word_t* const* blk_blk = top_blocks_[i];
if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
return FULL_BLOCK_FAKE_ADDR;
\param i - top level block index
\return block adress or NULL if not yet allocated
*/
- const bm::word_t* const * get_topblock(unsigned i) const
+ const bm::word_t* const * get_topblock(unsigned i) const BMNOEXCEPT
{
return (!top_blocks_ || i >= top_block_size_) ? 0 : top_blocks_[i];
}
/**
\brief Returns root block in the tree.
*/
- bm::word_t*** top_blocks_root() const
+ bm::word_t*** top_blocks_root() const BMNOEXCEPT
{
blocks_manager* bm =
const_cast<blocks_manager*>(this);
{
gap_res = true;
new_block = (bm::word_t*)
- get_allocator().alloc_gap_block(unsigned(new_level), glen());
+ get_allocator().alloc_gap_block(unsigned(new_level), glen());
::memcpy(new_block, gap_block, len * sizeof(bm::gap_word_t));
bm::set_gap_level(new_block, new_level);
}
/*! @brief Fills all blocks with 0.
@param free_mem - if true function frees the resources (obsolete)
*/
- void set_all_zero(bool /*free_mem*/)
+ void set_all_zero(bool /*free_mem*/) BMNOEXCEPT
{
if (!is_init()) return;
deinit_tree(); // TODO: optimization of top-level realloc
bm::set_sub_array_size, func);
}
- void free_top_subblock(unsigned nblk_blk)
+ void free_top_subblock(unsigned nblk_blk) BMNOEXCEPT
{
BM_ASSERT(top_blocks_[nblk_blk]);
if ((bm::word_t*)top_blocks_[nblk_blk] != FULL_BLOCK_FAKE_ADDR)
Places new block into blocks table.
*/
BMFORCEINLINE
- void set_block_ptr(unsigned i, unsigned j, bm::word_t* block)
+ void set_block_ptr(unsigned i, unsigned j, bm::word_t* block) BMNOEXCEPT
{
BM_ASSERT(is_init());
BM_ASSERT(i < top_block_size_);
/**
Free block, make it zero pointer in the tree
*/
- void zero_gap_block_ptr(unsigned i, unsigned j)
+ void zero_gap_block_ptr(unsigned i, unsigned j) BMNOEXCEPT
{
BM_ASSERT(top_blocks_ && i < top_block_size_);
Count number of bits ON in the block
*/
static
- bm::id_t block_bitcount(const bm::word_t* block)
+ bm::id_t block_bitcount(const bm::word_t* block) BMNOEXCEPT
{
BM_ASSERT(block);
id_t count;
if (BM_IS_GAP(block))
- {
count = bm::gap_bit_count_unr(BMGAP_PTR(block));
- }
else // bitset
- {
count = (IS_FULL_BLOCK(block)) ? bm::bits_in_block
: bm::bit_block_count(block);
- }
return count;
}
}
/*! deallocate temp block */
- void free_temp_block()
+ void free_temp_block() BMNOEXCEPT
{
if (temp_block_)
{
temp_block_ = 0;
}
}
+
/*! Detach and return temp block.
if temp block is NULL allocates a bit-block
caller is responsible for returning
/*! Return temp block
if temp block already exists - block gets deallocated
*/
- void return_tempblock(bm::word_t* block)
+ void return_tempblock(bm::word_t* block) BMNOEXCEPT
{
BM_ASSERT(block != temp_block_);
BM_ASSERT(IS_VALID_ADDR(block));
}
/*! Assigns new GAP lengths vector */
- void set_glen(const gap_word_t* glevel_len)
+ void set_glen(const gap_word_t* glevel_len) BMNOEXCEPT
{
::memcpy(glevel_len_, glevel_len, sizeof(glevel_len_));
}
/** Returns true if second level block pointer is 0.
*/
- bool is_subblock_null(unsigned nsub) const
+ bool is_subblock_null(unsigned nsub) const BMNOEXCEPT
{
BM_ASSERT(top_blocks_);
if (nsub >= top_block_size_)
return top_blocks_[nsub] == NULL;
}
- bm::word_t*** top_blocks_root()
+ bm::word_t*** top_blocks_root() BMNOEXCEPT
{
return top_blocks_;
}
/*! \brief Returns current GAP level vector
*/
- const gap_word_t* glen() const
+ const gap_word_t* glen() const BMNOEXCEPT
{
return glevel_len_;
}
/*! \brief Returns GAP level length for specified level
\param level - level number
*/
- unsigned glen(unsigned level) const
+ unsigned glen(unsigned level) const BMNOEXCEPT
{
return glevel_len_[level];
}
/*! \brief Returns size of the top block array in the tree
*/
- unsigned top_block_size() const
+ unsigned top_block_size() const BMNOEXCEPT
{
return top_block_size_;
}
/** \brief Returns reference on the allocator
*/
- allocator_type& get_allocator() { return alloc_; }
+ allocator_type& get_allocator() BMNOEXCEPT { return alloc_; }
/** \brief Returns allocator
*/
- allocator_type get_allocator() const { return alloc_; }
+ allocator_type get_allocator() const BMNOEXCEPT { return alloc_; }
/// if tree of blocks already up
- bool is_init() const { return top_blocks_ != 0; }
+ bool is_init() const BMNOEXCEPT { return top_blocks_ != 0; }
/// allocate first level of descr. of blocks
void init_tree()
{
BM_ASSERT(top_blocks_ == 0);
-
if (top_block_size_)
{
top_blocks_ = (bm::word_t***) alloc_.alloc_ptr(top_block_size_);
alloc_.free_bit_block(blk); \
}
- void deallocate_top_subblock(unsigned nblk_blk)
+ void deallocate_top_subblock(unsigned nblk_blk) BMNOEXCEPT
{
if (!top_blocks_[nblk_blk])
return;
/** destroy tree, free memory in all blocks and control structures
Note: pointers are NOT assigned to zero(!)
*/
- void destroy_tree() BMNOEXEPT
+ void destroy_tree() BMNOEXCEPT
{
if (!top_blocks_)
return;
}
#undef BM_FREE_OP
- void deinit_tree() BMNOEXEPT
+ void deinit_tree() BMNOEXCEPT
{
destroy_tree();
top_blocks_ = 0; top_block_size_ = 0;
// ----------------------------------------------------------------
/// calculate top blocks which are not NULL and not FULL
- unsigned find_real_top_blocks() const
+ unsigned find_real_top_blocks() const BMNOEXCEPT
{
unsigned cnt = 0;
unsigned top_blocks = top_block_size();
// ----------------------------------------------------------------
/// calculate max top blocks size whithout NULL-tail
- unsigned find_max_top_blocks() const
+ unsigned find_max_top_blocks() const BMNOEXCEPT
{
unsigned top_blocks = top_block_size();
if (!top_blocks)
// ----------------------------------------------------------------
- void validate_top_zero(unsigned i)
+ void validate_top_zero(unsigned i) BMNOEXCEPT
{
BM_ASSERT(i < top_block_size());
bm::word_t** blk_blk = top_blocks_[i];
- // TODO: SIMD
+ // TODO: SIMD or unroll
for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
{
if (blk_blk[j])
// ----------------------------------------------------------------
- void validate_top_full(unsigned i)
+ void validate_top_full(unsigned i) BMNOEXCEPT
{
BM_ASSERT(i < top_block_size());
bm::word_t** blk_blk = top_blocks_[i];
Calculate approximate memory needed to serialize big runs
of 0000s and 111s (as blocks)
*/
- size_t calc_serialization_null_full() const
+ size_t calc_serialization_null_full() const BMNOEXCEPT
{
size_t s_size = sizeof(unsigned);
if (!top_blocks_)
}
nb_empty += (i - nb_prev) * bm::set_sub_array_size;
blk_blk = top_blocks_[i];
+ BM_ASSERT(blk_blk);
+ if (!blk_blk)
+ break;
}
if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
{
class bit_block_guard
{
public:
- bit_block_guard(BlocksManager& bman, bm::word_t* blk=0)
+ bit_block_guard(BlocksManager& bman, bm::word_t* blk=0) BMNOEXCEPT
: bman_(bman),
block_(blk)
{}
if (IS_VALID_ADDR(block_))
bman_.get_allocator().free_bit_block(block_, 3);
}
- void attach(bm::word_t* blk)
+
+ void attach(bm::word_t* blk) BMNOEXCEPT
{
if (IS_VALID_ADDR(block_))
bman_.get_allocator().free_bit_block(block_);
block_ = blk;
}
+
bm::word_t* allocate()
{
attach(bman_.get_allocator().alloc_bit_block(3));
return block_;
}
- bm::word_t* get() { return block_; }
+ bm::word_t* get() BMNOEXCEPT { return block_; }
private:
bit_block_guard(const bit_block_guard&);
allocation_policy_type ap = allocation_policy_type(),
size_type bv_max_size = bm::id_max,
const allocator_type& alloc = allocator_type());
- ~basic_bmatrix() BMNOEXEPT;
+ ~basic_bmatrix() BMNOEXCEPT;
/*! copy-ctor */
basic_bmatrix(const basic_bmatrix<BV>& bbm);
#ifndef BM_NO_CXX11
/*! move-ctor */
- basic_bmatrix(basic_bmatrix<BV>&& bbm) BMNOEXEPT;
+ basic_bmatrix(basic_bmatrix<BV>&& bbm) BMNOEXCEPT;
/*! move assignmment operator */
- basic_bmatrix<BV>& operator = (basic_bmatrix<BV>&& bbm) BMNOEXEPT
+ basic_bmatrix<BV>& operator = (basic_bmatrix<BV>&& bbm) BMNOEXCEPT
{
if (this != &bbm)
{
}
#endif
- void set_allocator_pool(allocator_pool_type* pool_ptr) { pool_ = pool_ptr; }
+ void set_allocator_pool(allocator_pool_type* pool_ptr) BMNOEXCEPT
+ { pool_ = pool_ptr; }
///@}
///@{
/*! Swap content */
- void swap(basic_bmatrix<BV>& bbm) BMNOEXEPT;
+ void swap(basic_bmatrix<BV>& bbm) BMNOEXCEPT;
/*! Copy content */
void copy_from(const basic_bmatrix<BV>& bbm);
/*! @name row access */
///@{
- /*! Get row bit-vector */
- const bvector_type* row(size_type i) const;
+ /*! Get row bit-vector. Can return NULL */
+ const bvector_type* row(size_type i) const BMNOEXCEPT;
- /*! Get row bit-vector */
- bvector_type_const_ptr get_row(size_type i) const;
+ /*! Get row bit-vector. Can return NULL */
+ bvector_type_const_ptr get_row(size_type i) const BMNOEXCEPT;
- /*! Get row bit-vector */
- bvector_type* get_row(size_type i);
+ /*! Get row bit-vector. Can return NULL */
+ bvector_type* get_row(size_type i) BMNOEXCEPT;
/*! get number of value rows */
- size_type rows() const { return rsize_; }
+ size_type rows() const BMNOEXCEPT { return rsize_; }
/*! Make sure row is constructed, return bit-vector */
bvector_type_ptr construct_row(size_type row);
@param pos - column position in the matrix
@param octet_idx - octet based row position (1 octet - 8 rows)
*/
- unsigned char get_octet(size_type pos, size_type octet_idx) const;
+ unsigned char get_octet(size_type pos, size_type octet_idx) const BMNOEXCEPT;
/*!
Compare vector[pos] with octet
@return 0 - equal, -1 - less(vect[pos] < octet), 1 - greater
*/
int compare_octet(size_type pos,
- size_type octet_idx, char octet) const;
+ size_type octet_idx, char octet) const BMNOEXCEPT;
///@}
///@{
/// Test if 4 rows from i are not NULL
- bool test_4rows(unsigned i) const;
+ bool test_4rows(unsigned i) const BMNOEXCEPT;
/// Get low level internal access to
- const bm::word_t* get_block(size_type p, unsigned i, unsigned j) const;
+ const bm::word_t* get_block(size_type p,
+ unsigned i, unsigned j) const BMNOEXCEPT;
- unsigned get_half_octet(size_type pos, size_type row_idx) const;
+ unsigned get_half_octet(size_type pos, size_type row_idx) const BMNOEXCEPT;
/*!
\brief run memory optimization for all bit-vector rows
protected:
void allocate_rows(size_type rsize);
- void free_rows() BMNOEXEPT;
+ void free_rows() BMNOEXCEPT;
bvector_type* construct_bvector(const bvector_type* bv) const;
void destruct_bvector(bvector_type* bv) const;
#ifndef BM_NO_CXX11
/*! move-ctor */
- base_sparse_vector(base_sparse_vector<Val, BV, MAX_SIZE>&& bsv) BMNOEXEPT
+ base_sparse_vector(base_sparse_vector<Val, BV, MAX_SIZE>&& bsv) BMNOEXCEPT
{
bmatr_.swap(bsv.bmatr_);
size_ = bsv.size_;
}
#endif
- void swap(base_sparse_vector<Val, BV, MAX_SIZE>& bsv) BMNOEXEPT;
+ void swap(base_sparse_vector<Val, BV, MAX_SIZE>& bsv) BMNOEXCEPT;
- size_type size() const { return size_; }
+ size_type size() const BMNOEXCEPT { return size_; }
void resize(size_type new_size);
void clear_range(size_type left, size_type right, bool set_null);
/*! \brief resize to zero, free memory */
- void clear() BMNOEXEPT;
+ void clear() BMNOEXCEPT;
/*! return true if empty */
- bool empty() const { return size() == 0; }
+ bool empty() const BMNOEXCEPT { return size() == 0; }
public:
/**
\brief check if container supports NULL(unassigned) values
*/
- bool is_nullable() const { return bmatr_.get_row(this->null_plain()) != 0; }
+ bool is_nullable() const BMNOEXCEPT
+ { return bmatr_.get_row(this->null_plain()) != 0; }
/**
\brief Get bit-vector of assigned values or NULL
(if not constructed that way)
*/
- const bvector_type* get_null_bvector() const
+ const bvector_type* get_null_bvector() const BMNOEXCEPT
{ return bmatr_.get_row(this->null_plain()); }
/** \brief test if specified element is NULL
\return true if it is NULL false if it was assigned or container
is not configured to support assignment flags
*/
- bool is_null(size_type idx) const;
+ bool is_null(size_type idx) const BMNOEXCEPT;
///@}
\return bit-vector for the bit plain or NULL
*/
bvector_type_const_ptr
- get_plain(unsigned i) const { return bmatr_.row(i); }
+ get_plain(unsigned i) const BMNOEXCEPT { return bmatr_.row(i); }
/*!
\brief get total number of bit-plains in the vector
*/
- static unsigned plains() { return value_bits(); }
+ static unsigned plains() BMNOEXCEPT { return value_bits(); }
/** Number of stored bit-plains (value plains + extra */
- static unsigned stored_plains() { return value_bits()+1; }
+ static unsigned stored_plains() BMNOEXCEPT { return value_bits()+1; }
/** Number of effective bit-plains in the value type */
- unsigned effective_plains() const { return effective_plains_ + 1; }
+ unsigned effective_plains() const BMNOEXCEPT
+ { return effective_plains_ + 1; }
/*!
\brief get access to bit-plain as is (can return NULL)
*/
- bvector_type_ptr plain(unsigned i) { return bmatr_.get_row(i); }
- const bvector_type_ptr plain(unsigned i) const { return bmatr_.get_row(i); }
+ bvector_type_ptr plain(unsigned i) BMNOEXCEPT { return bmatr_.get_row(i); }
+ bvector_type_const_ptr plain(unsigned i) const BMNOEXCEPT
+ { return bmatr_.get_row(i); }
bvector_type* get_null_bvect() { return bmatr_.get_row(this->null_plain());}
@return 64-bit mask
@internal
*/
- bm::id64_t get_plains_mask(unsigned element_idx) const;
+ bm::id64_t get_plains_mask(unsigned element_idx) const BMNOEXCEPT;
/*!
get read-only access to inetrnal bit-matrix
*/
- const bmatrix_type& get_bmatrix() const { return bmatr_; }
+ const bmatrix_type& get_bmatrix() const BMNOEXCEPT { return bmatr_; }
///@}
/*!
@sa statistics
*/
- void calc_stat(typename bvector_type::statistics* st) const;
+ void calc_stat(typename bvector_type::statistics* st) const BMNOEXCEPT;
/*!
\brief check if another sparse vector has the same content and size
\return true, if it is the same
*/
bool equal(const base_sparse_vector<Val, BV, MAX_SIZE>& sv,
- bm::null_support null_able = bm::use_null) const;
+ bm::null_support null_able = bm::use_null) const BMNOEXCEPT;
protected:
void copy_from(const base_sparse_vector<Val, BV, MAX_SIZE>& bsv);
typedef typename bvector_type::block_idx_type block_idx_type;
/** Number of total bit-plains in the value type*/
- static unsigned value_bits()
+ static unsigned value_bits() BMNOEXCEPT
{
return base_sparse_vector<Val, BV, MAX_SIZE>::sv_value_plains;
}
/** plain index for the "NOT NULL" flags plain */
- static unsigned null_plain() { return value_bits(); }
+ static unsigned null_plain() BMNOEXCEPT { return value_bits(); }
/** optimize block in all matrix plains */
void optimize_block(block_idx_type nb)
//---------------------------------------------------------------------
template<typename BV>
-basic_bmatrix<BV>::~basic_bmatrix() BMNOEXEPT
+basic_bmatrix<BV>::~basic_bmatrix() BMNOEXCEPT
{
free_rows();
}
//---------------------------------------------------------------------
template<typename BV>
-basic_bmatrix<BV>::basic_bmatrix(basic_bmatrix<BV>&& bbm) BMNOEXEPT
+basic_bmatrix<BV>::basic_bmatrix(basic_bmatrix<BV>&& bbm) BMNOEXCEPT
: bv_size_(bbm.bv_size_),
alloc_(bbm.alloc_),
ap_(bbm.ap_),
template<typename BV>
const typename basic_bmatrix<BV>::bvector_type*
-basic_bmatrix<BV>::row(size_type i) const
+basic_bmatrix<BV>::row(size_type i) const BMNOEXCEPT
{
BM_ASSERT(i < rsize_);
return bv_rows_[i];
template<typename BV>
const typename basic_bmatrix<BV>::bvector_type*
-basic_bmatrix<BV>::get_row(size_type i) const
+basic_bmatrix<BV>::get_row(size_type i) const BMNOEXCEPT
{
BM_ASSERT(i < rsize_);
return bv_rows_[i];
template<typename BV>
typename basic_bmatrix<BV>::bvector_type*
-basic_bmatrix<BV>::get_row(size_type i)
+basic_bmatrix<BV>::get_row(size_type i) BMNOEXCEPT
{
BM_ASSERT(i < rsize_);
return bv_rows_[i];
//---------------------------------------------------------------------
template<typename BV>
-bool basic_bmatrix<BV>::test_4rows(unsigned j) const
+bool basic_bmatrix<BV>::test_4rows(unsigned j) const BMNOEXCEPT
{
BM_ASSERT((j + 4) <= rsize_);
#if defined(BM64_SSE4)
__m256i w0 = _mm256_loadu_si256((__m256i*)(bv_rows_ + j));
return !_mm256_testz_si256(w0, w0);
#else
- bool b = bv_rows_[j + 0] || bv_rows_[j + 1] || bv_rows_[j + 2] || bv_rows_[j + 3];
+ bool b = bv_rows_[j + 0] || bv_rows_[j + 1] ||
+ bv_rows_[j + 2] || bv_rows_[j + 3];
return b;
#endif
}
//---------------------------------------------------------------------
template<typename BV>
-void basic_bmatrix<BV>::free_rows() BMNOEXEPT
+void basic_bmatrix<BV>::free_rows() BMNOEXCEPT
{
for (size_type i = 0; i < rsize_; ++i)
{
//---------------------------------------------------------------------
template<typename BV>
-void basic_bmatrix<BV>::swap(basic_bmatrix<BV>& bbm) BMNOEXEPT
+void basic_bmatrix<BV>::swap(basic_bmatrix<BV>& bbm) BMNOEXCEPT
{
if (this == &bbm)
return;
template<typename BV>
const bm::word_t*
-basic_bmatrix<BV>::get_block(size_type p, unsigned i, unsigned j) const
+basic_bmatrix<BV>::get_block(size_type p,
+ unsigned i, unsigned j) const BMNOEXCEPT
{
bvector_type_const_ptr bv = this->row(p);
if (bv)
{
- const typename bvector_type::blocks_manager_type& bman = bv->get_blocks_manager();
+ const typename bvector_type::blocks_manager_type& bman =
+ bv->get_blocks_manager();
return bman.get_block_ptr(i, j);
}
return 0;
template<typename BV>
unsigned char
-basic_bmatrix<BV>::get_octet(size_type pos, size_type octet_idx) const
+basic_bmatrix<BV>::get_octet(size_type pos, size_type octet_idx) const BMNOEXCEPT
{
unsigned v = 0;
template<typename BV>
int basic_bmatrix<BV>::compare_octet(size_type pos,
size_type octet_idx,
- char octet) const
+ char octet) const BMNOEXCEPT
{
char value = char(get_octet(pos, octet_idx));
return (value > octet) - (value < octet);
template<typename BV>
unsigned
-basic_bmatrix<BV>::get_half_octet(size_type pos, size_type row_idx) const
+basic_bmatrix<BV>::get_half_octet(size_type pos, size_type row_idx) const BMNOEXCEPT
{
unsigned v = 0;
template<class Val, class BV, unsigned MAX_SIZE>
void base_sparse_vector<Val, BV, MAX_SIZE>::swap(
- base_sparse_vector<Val, BV, MAX_SIZE>& bsv) BMNOEXEPT
+ base_sparse_vector<Val, BV, MAX_SIZE>& bsv) BMNOEXCEPT
{
if (this != &bsv)
{
//---------------------------------------------------------------------
template<class Val, class BV, unsigned MAX_SIZE>
-void base_sparse_vector<Val, BV, MAX_SIZE>::clear() BMNOEXEPT
+void base_sparse_vector<Val, BV, MAX_SIZE>::clear() BMNOEXCEPT
{
unsigned plains = value_bits();
for (size_type i = 0; i < plains; ++i)
//---------------------------------------------------------------------
template<class Val, class BV, unsigned MAX_SIZE>
-bool base_sparse_vector<Val, BV, MAX_SIZE>::is_null(size_type idx) const
+bool base_sparse_vector<Val, BV, MAX_SIZE>::is_null(
+ size_type idx) const BMNOEXCEPT
{
const bvector_type* bv_null = get_null_bvector();
return (bv_null) ? (!bv_null->test(idx)) : false;
template<class Val, class BV, unsigned MAX_SIZE>
bm::id64_t base_sparse_vector<Val, BV, MAX_SIZE>::get_plains_mask(
- unsigned element_idx) const
+ unsigned element_idx) const BMNOEXCEPT
{
BM_ASSERT(element_idx < MAX_SIZE);
bm::id64_t mask = 0;
template<class Val, class BV, unsigned MAX_SIZE>
void base_sparse_vector<Val, BV, MAX_SIZE>::calc_stat(
- typename bvector_type::statistics* st) const
+ typename bvector_type::statistics* st) const BMNOEXCEPT
{
BM_ASSERT(st);
template<class Val, class BV, unsigned MAX_SIZE>
void base_sparse_vector<Val, BV, MAX_SIZE>::insert_clear_value_plains_from(
- unsigned plain_idx, size_type idx)
+ unsigned plain_idx, size_type idx)
{
for (unsigned i = plain_idx; i < sv_value_plains; ++i)
{
template<class Val, class BV, unsigned MAX_SIZE>
bool base_sparse_vector<Val, BV, MAX_SIZE>::equal(
const base_sparse_vector<Val, BV, MAX_SIZE>& sv,
- bm::null_support null_able) const
+ bm::null_support null_able) const BMNOEXCEPT
{
size_type arg_size = sv.size();
if (this->size_ != arg_size)
bool eq = bv->equal(*arg_bv);
if (!eq)
return false;
- /*
- int cmp = bv->compare(*arg_bv);
- if (cmp != 0)
- return false;
- */
} // for j
if (null_able == bm::use_null)
bool eq = bv_null->equal(*bv_null_arg);
if (!eq)
return false;
- /*
- int cmp = bv_null->compare(*bv_null);
- if (cmp != 0)
- return false;
- */
}
return true;
}
class byte_buffer_ptr
{
public:
- byte_buffer_ptr()
+ byte_buffer_ptr() BMNOEXCEPT
: byte_buf_(0), size_(0)
{}
/// construct byte buffer pointer
///
- byte_buffer_ptr(unsigned char* in_buf, size_t in_size)
+ byte_buffer_ptr(unsigned char* in_buf, size_t in_size) BMNOEXCEPT
: byte_buf_(in_buf), size_(in_size)
{}
/// Set buffer pointer
- void set_buf(unsigned char* in_buf, size_t in_size)
+ void set_buf(unsigned char* in_buf, size_t in_size) BMNOEXCEPT
{
byte_buf_ = in_buf; size_= in_size;
}
/// Get buffer size
- size_t size() const { return size_; }
+ size_t size() const BMNOEXCEPT { return size_; }
/// Get read access to buffer memory
- const unsigned char* buf() const { return byte_buf_; }
+ const unsigned char* buf() const BMNOEXCEPT { return byte_buf_; }
/// Get write access to buffer memory
- unsigned char* data() { return byte_buf_; }
+ unsigned char* data() BMNOEXCEPT { return byte_buf_; }
- bool operator==(const byte_buffer_ptr& lhs) const { return equal(lhs); }
+ bool operator==(const byte_buffer_ptr& lhs) const BMNOEXCEPT { return equal(lhs); }
/// return true if content and size is the same
- bool equal(const byte_buffer_ptr& lhs) const
+ bool equal(const byte_buffer_ptr& lhs) const BMNOEXCEPT
{
if (this == &lhs)
return true;
typedef size_t size_type;
public:
- byte_buffer() : capacity_(0), alloc_factor_(0)
+ byte_buffer() BMNOEXCEPT : capacity_(0), alloc_factor_(0)
{}
byte_buffer(size_t in_capacity)
allocate(in_capacity);
}
- byte_buffer(const byte_buffer& lhs)
+ byte_buffer(const byte_buffer& lhs) BMNOEXCEPT
{
byte_buf_ = 0;
size_ = capacity_ = alloc_factor_ = 0;
#ifndef BM_NO_CXX11
/// Move constructor
- byte_buffer(byte_buffer&& in_buf) BMNOEXEPT
+ byte_buffer(byte_buffer&& in_buf) BMNOEXCEPT
{
byte_buf_ = in_buf.byte_buf_;
in_buf.byte_buf_ = 0;
}
/// Move assignment operator
- byte_buffer& operator=(byte_buffer&& lhs) BMNOEXEPT
+ byte_buffer& operator=(byte_buffer&& lhs) BMNOEXCEPT
{
move_from(lhs);
return *this;
}
#endif
- byte_buffer& operator=(const byte_buffer& lhs)
+ byte_buffer& operator=(const byte_buffer& lhs) BMNOEXCEPT
{
if (this == &lhs)
return *this;
}
/// swap content with another buffer
- void swap(byte_buffer& other) BMNOEXEPT
+ void swap(byte_buffer& other) BMNOEXCEPT
{
if (this == &other)
return;
}
/// take/move content from another buffer
- void move_from(byte_buffer& other) BMNOEXEPT
+ void move_from(byte_buffer& other) BMNOEXCEPT
{
if (this == &other)
return;
/// Get buffer capacity
- size_t capacity() const { return capacity_; }
+ size_t capacity() const BMNOEXCEPT { return capacity_; }
/// adjust current size (buffer content preserved)
void resize(size_t new_size, bool copy_content = true)
{
if (new_capacity <= capacity_)
return;
+ if (!capacity_)
+ {
+ allocate(new_capacity);
+ return;
+ }
byte_buffer tmp_buffer(new_capacity);
tmp_buffer = *this;
}
/// return memory consumtion
- size_t mem_usage() const
+ size_t mem_usage() const BMNOEXCEPT
{
return sizeof(capacity_) + sizeof(alloc_factor_) +
capacity();
void set_buf(unsigned char* buf, size_t size);
/// compute number of words for the desired capacity
- static size_t compute_words(size_t capacity)
+ static size_t compute_words(size_t capacity) BMNOEXCEPT
{
size_t words = (capacity / sizeof(bm::word_t))+1;
return words;
typedef Val value_type;
typedef typename buffer_type::size_type size_type;
- heap_vector()
- : buffer_()
- {
- }
+ heap_vector() BMNOEXCEPT : buffer_()
+ {}
heap_vector(const heap_vector<Val, BVAlloc, trivial_type>& hv)
: buffer_()
}
}
- value_type* data() { return (value_type*) buffer_.data(); }
+ value_type* data() BMNOEXCEPT { return (value_type*) buffer_.data(); }
- void swap(heap_vector<Val, BVAlloc, trivial_type>& other) BMNOEXEPT
+ void swap(heap_vector<Val, BVAlloc, trivial_type>& other) BMNOEXCEPT
{
buffer_.swap(other.buffer_);
}
- const value_type& operator[](size_type pos) const
+ const value_type& operator[](size_type pos) const BMNOEXCEPT
{
BM_ASSERT(pos < size());
size_type v_size = value_size();
return *reinterpret_cast<const value_type*>(p);
}
- value_type& operator[](size_type pos)
+ value_type& operator[](size_type pos) BMNOEXCEPT
{
BM_ASSERT(pos < size());
size_type v_size = value_size();
return *reinterpret_cast<value_type*>(p);
}
- const value_type* begin() const
+ const value_type* begin() const BMNOEXCEPT
{
return (const value_type*) buffer_.buf();
}
- size_type size() const
+ size_type size() const BMNOEXCEPT
{
return buffer_.size() / value_size();
}
- size_type capacity() const
+ size_type capacity() const BMNOEXCEPT
{
return buffer_.capacity() / value_size();
}
- bool empty() const
+ bool empty() const BMNOEXCEPT
{
return (buffer_.size() == 0);
}
buffer_.resize(new_size * v_size);
}
- static size_type value_size()
+ static size_type value_size() BMNOEXCEPT
{
size_type size_of = sizeof(value_type);
return size_of;
row_size_in_bytes = sizeof(value_type) * COLS
};
- static size_t rows() { return ROWS; }
- static size_t cols() { return COLS; }
+ static size_t rows() BMNOEXCEPT { return ROWS; }
+ static size_t cols() BMNOEXCEPT { return COLS; }
/**
By default object is constructed NOT allocated.
*/
- heap_matrix()
+ heap_matrix() BMNOEXCEPT
: buffer_()
{}
buffer_.resize(size_in_bytes);
}
- bool is_init() const
+ bool is_init() const BMNOEXCEPT
{
return buffer_.size();
}
- value_type get(size_type row_idx, size_type col_idx) const
+ value_type get(size_type row_idx, size_type col_idx) const BMNOEXCEPT
{
BM_ASSERT(row_idx < ROWS);
BM_ASSERT(col_idx < COLS);
return ((const value_type*)buf)[col_idx];
}
- const value_type* row(size_type row_idx) const
+ const value_type* row(size_type row_idx) const BMNOEXCEPT
{
BM_ASSERT(row_idx < ROWS);
BM_ASSERT(buffer_.size());
return (const value_type*) buf;
}
- value_type* row(size_type row_idx)
+ value_type* row(size_type row_idx) BMNOEXCEPT
{
BM_ASSERT(row_idx < ROWS);
BM_ASSERT(buffer_.size());
}
/** memset all buffer to all zeroes */
- void set_zero()
+ void set_zero() BMNOEXCEPT
{
::memset(buffer_.data(), 0, size_in_bytes);
}
/*! swap content
*/
- void swap(heap_matrix& other) BMNOEXEPT
+ void swap(heap_matrix& other) BMNOEXCEPT
{
buffer_.swap(other.buffer_);
}
/*! move content from another matrix
*/
- void move_from(heap_matrix& other) BMNOEXEPT
+ void move_from(heap_matrix& other) BMNOEXCEPT
{
buffer_.move_from(other.buffer_);
}
/*! remapping: vect[idx] = matrix[idx, vect[idx] ]
*/
template<typename VECT_TYPE>
- void remap(VECT_TYPE* vect, size_type size) const
+ void remap(VECT_TYPE* vect, size_type size) const BMNOEXCEPT
{
BM_ASSERT(size <= ROWS);
const unsigned char* buf = buffer_.buf();
/*! zero-terminated remap: vect[idx] = matrix[idx, vect[idx] ]
*/
template<typename VECT_TYPE>
- void remapz(VECT_TYPE* vect) const
+ void remapz(VECT_TYPE* vect) const BMNOEXCEPT
{
const unsigned char* buf = buffer_.buf();
for (size_type i = 0; i < ROWS; ++i)
buffer_.resize(size_in_bytes());
}
- bool is_init() const
+ bool is_init() const BMNOEXCEPT
{
return buffer_.size();
}
- const value_type* row(size_type row_idx) const
+ const value_type* row(size_type row_idx) const BMNOEXCEPT
{
BM_ASSERT(row_idx < rows_);
BM_ASSERT(buffer_.size());
return (const value_type*) buf;
}
- value_type* row(size_type row_idx)
+ value_type* row(size_type row_idx) BMNOEXCEPT
{
BM_ASSERT(row_idx < rows_);
BM_ASSERT(buffer_.size());
return (value_type*)buf;
}
+ value_type get(size_type row_idx, size_type col_idx) BMNOEXCEPT
+ {
+ BM_ASSERT(row_idx < rows_);
+ BM_ASSERT(col_idx < cols_);
+ const value_type* r = row(row_idx);
+ return r[col_idx];
+ }
+
+ void set(size_type row_idx, size_type col_idx, value_type v) BMNOEXCEPT
+ {
+ BM_ASSERT(row_idx < rows_);
+ BM_ASSERT(col_idx < cols_);
+ value_type* r = row(row_idx);
+ r[col_idx] = v;
+ }
+
/** memset all buffer to all zeroes */
- void set_zero()
+ void set_zero() BMNOEXCEPT
{
::memset(buffer_.data(), 0, size_in_bytes());
}
/*! swap content
*/
- void swap(dynamic_heap_matrix& other) BMNOEXEPT
+ void swap(dynamic_heap_matrix& other) BMNOEXCEPT
{
bm::xor_swap(rows_, other.rows_);
bm::xor_swap(cols_, other.cols_);
/*! move content from another matrix
*/
- void move_from(dynamic_heap_matrix& other) BMNOEXEPT
+ void move_from(dynamic_heap_matrix& other) BMNOEXCEPT
{
rows_ = other.rows_;
cols_ = other.cols_;
}
/** Get low-level buffer access */
- buffer_type& get_buffer() { return buffer_; }
+ buffer_type& get_buffer() BMNOEXCEPT { return buffer_; }
/** Get low-level buffer access */
- const buffer_type& get_buffer() const { return buffer_; }
+ const buffer_type& get_buffer() const BMNOEXCEPT { return buffer_; }
+
+ /**
+ copy values of the left triangle elements to the right triangle
+ (operation specific to matrices with symmetric distances)
+ */
+ void replicate_triange() BMNOEXCEPT
+ {
+ BM_ASSERT(rows_ == cols_);
+ for (size_type i = 0; i < rows_; ++i)
+ {
+ for (size_type j = i+1; j < cols_; ++j)
+ {
+ set(i, j, get(j, i));
+ }
+ }
+ }
+ /**
+ Sum of row elements
+ */
+ template<typename ACC>
+ void sum(ACC& acc, size_type row_idx) const BMNOEXCEPT
+ {
+ BM_ASSERT(row_idx < rows_);
+ ACC s = 0;
+ const value_type* r = row(row_idx);
+ for (size_type j = 0; j < cols_; ++j)
+ s += r[j];
+ acc = s;
+ }
protected:
- size_type size_in_bytes() const
+
+ size_type size_in_bytes() const BMNOEXCEPT
{
return sizeof(value_type) * cols_ * rows_;
}
- size_type row_size_in_bytes() const
+ size_type row_size_in_bytes() const BMNOEXCEPT
{
return sizeof(value_type) * cols_;
}
const unsigned set_array_mask = 0xFFu;
const unsigned set_total_blocks32 = (bm::set_array_size32 * bm::set_array_size32);
+const unsigned set_sub_total_bits = bm::set_sub_array_size * bm::gap_max_bits;
#ifdef BM64ADDR
const unsigned set_total_blocks48 = bm::id_max48 / bm::gap_max_bits;
const unsigned long long id_max = bm::id_max48;
-const unsigned long long set_array_size48 = 1 + (bm::id_max48 / (bm::set_sub_array_size * bm::gap_max_bits));
+const unsigned long long set_array_size48 = 1 + (bm::id_max48 / set_sub_total_bits);
const unsigned set_top_array_size = bm::set_array_size48;
const id64_t set_total_blocks = id64_t(bm::set_top_array_size) * set_sub_array_size;
-//bm::id_max / (bm::gap_max_bits * bm::set_sub_array_size);
#else
const unsigned id_max = bm::id_max32;
const unsigned set_top_array_size = bm::set_array_size32;
};
template<bool T> const char _copyright<T>::_p[] =
- "BitMagic C++ Library. v.6.0.0 (c) 2002-2020 Anatoliy Kuznetsov.";
-template<bool T> const unsigned _copyright<T>::_v[3] = {6, 0, 0};
+ "BitMagic C++ Library. v.6.4.0 (c) 2002-2020 Anatoliy Kuznetsov.";
+template<bool T> const unsigned _copyright<T>::_v[3] = {6, 4, 0};
}
template<class BV>
-unsigned compute_serialization_size(const BV& bv)
+size_t compute_serialization_size(const BV& bv)
{
BM_DECLARE_TEMP_BLOCK(tb)
unsigned char* buf = 0;
- unsigned blob_size = 0;
+ typename BV::size_type blob_size = 0;
try
{
bm::serializer<BV> bvs(typename BV::allocator_type(), tb);
- bvs.set_compression_level(4);
+ //bvs.set_compression_level(4);
typename BV::statistics st;
bv.calc_stat(&st);
const typename SV::bvector_type* bv1 = sim_vec[k].get_first();
const typename SV::bvector_type* bv2 = sim_vec[k].get_second();
- unsigned bv_size2 = compute_serialization_size(*bv2);
+ auto bv_size2 = compute_serialization_size(*bv2);
typename SV::bvector_type bvx(*bv2);
bvx ^= *bv1;
- unsigned bv_size_x = compute_serialization_size(bvx);
+ auto bv_size_x = compute_serialization_size(bvx);
if (bv_size_x < bv_size2) // true savings
{
size_t diff = bv_size2 - bv_size_x;
BM_ASSERT(!fname.empty());
bm::sparse_vector_serial_layout<SV> sv_lay;
-
+
+ bm::sparse_vector_serializer<SV> sv_serializer;
+ sv_serializer.set_xor_ref(true);
+
+ sv_serializer.serialize(sv, sv_lay);
+/*
BM_DECLARE_TEMP_BLOCK(tb)
bm::sparse_vector_serialize(sv, sv_lay, tb);
-
+*/
std::ofstream fout(fname.c_str(), std::ios::binary);
if (!fout.good())
{
}
-// comapre-check if sparse vector is excatly coresponds to vector
+// compare-check if sparse vector is excatly coresponds to vector
//
// returns 0 - if equal
// 1 - no size match
}
+template<class SV, class BV>
+void convert_bv2sv(SV& sv, const BV& bv)
+{
+ typename SV::back_insert_iterator bit = sv.get_back_inserter();
+ typename BV::enumerator en = bv.first();
+ for (; en.valid(); ++en)
+ {
+ auto v = en.value();
+ bit = v;
+ }
+ bit.flush();
+}
+
+
} // namespace
#include "bmundef.h"
// cxx11 features
//
#if defined(BM_NO_CXX11) || (defined(_MSC_VER) && _MSC_VER < 1900)
-# define BMNOEXEPT
+# define BMNOEXCEPT
+# define BMNOEXCEPT2
#else
-# ifndef BMNOEXEPT
-# define BMNOEXEPT noexcept
+# ifndef BMNOEXCEPT
+# define BMNOEXCEPT noexcept
+#if defined(__EMSCRIPTEN__)
+#else
+# define BMNOEXCEPT2
+#endif
# endif
#endif
// detects use of EMSCRIPTEN engine and tweaks settings
// WebAssemply compiles into 32-bit ptr yet 64-bit wordsize use GCC extensions
//
+// BMNOEXCEPT2 is to declare "noexcept" for WebAsm only where needed
+// and silence GCC warnings where not
#if defined(__EMSCRIPTEN__)
# define BM64OPT
# define BM_USE_GCC_BUILD
-//# define BM_FORBID_UNALIGNED_ACCESS
-#endif
-
-// disable 'register' keyword, which is obsolete in C++11
-//
-#ifndef BMREGISTER
-# define BMREGISTER
+# define BMNOEXCEPT2 noexcept
+#else
+# define BMNOEXCEPT2
#endif
inline
bm::id_t bit_block_calc_count_range(const bm::word_t* block,
bm::word_t left,
- bm::word_t right);
+ bm::word_t right) BMNOEXCEPT;
inline
bm::id_t bit_block_any_range(const bm::word_t* block,
bm::word_t left,
- bm::word_t right);
+ bm::word_t right) BMNOEXCEPT;
/*!
@brief Structure with statistical information about memory
unsigned long long gaps_by_level[bm::gap_levels]; ///< number of GAP blocks at each level
/// cound bit block
- void add_bit_block()
+ void add_bit_block() BMNOEXCEPT
{
++bit_blocks;
size_t mem_used = sizeof(bm::word_t) * bm::set_block_size;
}
/// count gap block
- void add_gap_block(unsigned capacity, unsigned length)
+ void add_gap_block(unsigned capacity, unsigned length) BMNOEXCEPT
{
++gap_blocks;
size_t mem_used = (capacity * sizeof(gap_word_t));
}
/// Reset statisctics
- void reset()
+ void reset() BMNOEXCEPT
{
bit_blocks = gap_blocks = ptr_sub_blocks = bv_count = 0;
max_serialize_mem = memory_used = gap_cap_overhead = 0;
}
/// Sum data from another sttructure
- void add(const bv_statistics& st)
+ void add(const bv_statistics& st) BMNOEXCEPT
{
bit_blocks += st.bit_blocks;
gap_blocks += st.gap_blocks;
+ ptr_sub_blocks += st.ptr_sub_blocks;
bv_count += st.bv_count;
max_serialize_mem += st.max_serialize_mem + 8;
memory_used += st.memory_used;
{
First first;
Second second;
+
+ pair(First f, Second s) : first(f), second(s) {}
};
/**
\internal
*/
template<typename BI_TYPE>
-void get_block_coord(BI_TYPE nb, unsigned& i, unsigned& j)
+BMFORCEINLINE
+void get_block_coord(BI_TYPE nb, unsigned& i, unsigned& j) BMNOEXCEPT
{
i = unsigned(nb >> bm::set_array_shift); // top block address
j = unsigned(nb & bm::set_array_mask); // address in sub-block
}
/**
- \brief ad-hoc conditional expressions
+ Compute bit address of the first bit in a superblock
\internal
*/
-template <bool b> struct conditional
+template<typename RTYPE>
+BMFORCEINLINE RTYPE get_super_block_start(unsigned i) BMNOEXCEPT
{
- static bool test() { return true; }
-};
-template <> struct conditional<false>
+ return RTYPE(i) * bm::set_sub_total_bits;
+}
+
+/**
+ Compute bit address of the first bit in a block
+ \internal
+*/
+template<typename RTYPE>
+BMFORCEINLINE RTYPE get_block_start(unsigned i, unsigned j) BMNOEXCEPT
{
- static bool test() { return false; }
-};
+ RTYPE idx = bm::get_super_block_start<RTYPE>(i);
+ idx += (j) * bm::gap_max_bits;
+ return idx;
+}
+
/*!
@defgroup gapfunc GAP functions
*/
-
-
-/*!
- Returns BSR value
- @ingroup bitfunc
-*/
-template <class T>
-unsigned bit_scan_reverse(T value)
-{
- BM_ASSERT(value);
-
- if (bm::conditional<sizeof(T)==8>::test())
- {
- #if defined(BM_USE_GCC_BUILD)
- return (unsigned) (63 - __builtin_clzll(value));
- #else
- bm::id64_t v8 = value;
- v8 >>= 32;
- unsigned v = (unsigned)v8;
- if (v)
- {
- v = bm::bit_scan_reverse32(v);
- return v + 32;
- }
- #endif
- }
- return bit_scan_reverse32((unsigned)value);
-}
-
-
/*!
Returns bit count
@ingroup bitfunc
*/
BMFORCEINLINE
-bm::id_t word_bitcount(bm::id_t w)
+bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
{
#if defined(BMSSE42OPT) || defined(BMAVX2OPT)
return bm::id_t(_mm_popcnt_u32(w));
}
inline
-int parallel_popcnt_32(unsigned int n)
+int parallel_popcnt_32(unsigned int n) BMNOEXCEPT
{
unsigned int tmp;
@ingroup bitfunc
*/
BMFORCEINLINE
-unsigned word_bitcount64(bm::id64_t x)
+unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
{
#if defined(BMSSE42OPT) || defined(BMAVX2OPT)
#if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
inline
unsigned bitcount64_4way(bm::id64_t x, bm::id64_t y,
- bm::id64_t u, bm::id64_t v)
+ bm::id64_t u, bm::id64_t v) BMNOEXCEPT
{
const bm::id64_t m1 = 0x5555555555555555U;
const bm::id64_t m2 = 0x3333333333333333U;
/*! @brief Adaptor to copy 1 bits to array
@internal
*/
-template<typename B> class copy_to_array_functor
+template<typename B>
+class copy_to_array_functor
{
public:
copy_to_array_functor(B* bits): bp_(bits)
B* ptr() { return bp_; }
- void operator()(unsigned bit_idx) { *bp_++ = (B)bit_idx; }
+ void operator()(unsigned bit_idx) BMNOEXCEPT { *bp_++ = (B)bit_idx; }
void operator()(unsigned bit_idx0,
- unsigned bit_idx1)
+ unsigned bit_idx1) BMNOEXCEPT
{
bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
bp_+=2;
void operator()(unsigned bit_idx0,
unsigned bit_idx1,
- unsigned bit_idx2)
+ unsigned bit_idx2) BMNOEXCEPT
{
bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2;
bp_+=3;
void operator()(unsigned bit_idx0,
unsigned bit_idx1,
unsigned bit_idx2,
- unsigned bit_idx3)
+ unsigned bit_idx3) BMNOEXCEPT
{
bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3;
@ingroup bitfunc
*/
-template<typename T,typename B> unsigned bit_list(T w, B* bits)
+template<typename T,typename B>
+unsigned bit_list(T w, B* bits) BMNOEXCEPT
{
copy_to_array_functor<B> func(bits);
bit_for_each(w, func);
@ingroup bitfunc
*/
-template<typename T,typename B> unsigned bit_list_4(T w, B* bits)
+template<typename T,typename B>
+unsigned bit_list_4(T w, B* bits) BMNOEXCEPT
{
copy_to_array_functor<B> func(bits);
bit_for_each_4(w, func);
@internal
*/
template<typename B>
-unsigned short bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs)
+unsigned short
+bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs) BMNOEXCEPT
{
unsigned pos = 0;
while (w)
@internal
*/
template<typename B>
-unsigned short bitscan_popcnt(bm::id_t w, B* bits)
+unsigned short bitscan_popcnt(bm::id_t w, B* bits) BMNOEXCEPT
{
unsigned pos = 0;
while (w)
@ingroup bitfunc
*/
template<typename B>
-unsigned short bitscan_popcnt64(bm::id64_t w, B* bits)
+unsigned short bitscan_popcnt64(bm::id64_t w, B* bits) BMNOEXCEPT
{
unsigned short pos = 0;
while (w)
{
- bm::id64_t t = w & -w;
+ bm::id64_t t = bmi_blsi_u64(w); // w & -w;
bits[pos++] = (B) bm::word_bitcount64(t - 1);
- w &= w - 1;
+ w = bmi_bslr_u64(w); // w &= w - 1;
+ }
+ return pos;
+}
+
+/*!
+ \brief Unpacks 64-bit word into list of ON bit indexes using popcnt method
+ \param w - value
+ \param bits - pointer on the result array
+ \param offs - value to add to bit position (programmed shift)
+ \return number of bits in the list
+ @ingroup bitfunc
+*/
+template<typename B>
+unsigned short
+bitscan_popcnt64(bm::id64_t w, B* bits, unsigned short offs) BMNOEXCEPT
+{
+ unsigned short pos = 0;
+ while (w)
+ {
+ bm::id64_t t = bmi_blsi_u64(w); // w & -w;
+ bits[pos++] = B(bm::word_bitcount64(t - 1) + offs);
+ w = bmi_bslr_u64(w); // w &= w - 1;
}
return pos;
}
+
template<typename V, typename B>
-unsigned short bitscan(V w, B* bits)
+unsigned short bitscan(V w, B* bits) BMNOEXCEPT
{
if (bm::conditional<sizeof(V) == 8>::test())
- {
return bm::bitscan_popcnt64(w, bits);
- }
else
- {
return bm::bitscan_popcnt((bm::word_t)w, bits);
- }
}
// --------------------------------------------------------------
\return selected value (inxed of bit set)
*/
inline
-unsigned word_select64_linear(bm::id64_t w, unsigned rank)
+unsigned word_select64_linear(bm::id64_t w, unsigned rank) BMNOEXCEPT
{
BM_ASSERT(w);
BM_ASSERT(rank);
\return selected value (inxed of bit set)
*/
inline
-unsigned word_select64_bitscan(bm::id64_t w, unsigned rank)
+unsigned word_select64_bitscan(bm::id64_t w, unsigned rank) BMNOEXCEPT
{
BM_ASSERT(w);
BM_ASSERT(rank);
\return selected value (inxed of bit set)
*/
inline
-unsigned word_select64(bm::id64_t w, unsigned rank)
+unsigned word_select64(bm::id64_t w, unsigned rank) BMNOEXCEPT
{
#if defined(BMI2_SELECT64)
return BMI2_SELECT64(w, rank);
@internal
*/
BMFORCEINLINE
-bm::id64_t widx_to_digest_mask(unsigned w_idx)
+bm::id64_t widx_to_digest_mask(unsigned w_idx) BMNOEXCEPT
{
bm::id64_t mask(1ull);
return mask << (w_idx / bm::set_block_digest_wave_size);
@internal
*/
BMFORCEINLINE
-bm::id64_t digest_mask(unsigned from, unsigned to)
+bm::id64_t digest_mask(unsigned from, unsigned to) BMNOEXCEPT
{
BM_ASSERT(from <= to);
@internal
*/
inline
-bool check_zero_digest(bm::id64_t digest, unsigned bitpos_from, unsigned bitpos_to)
+bool check_zero_digest(bm::id64_t digest,
+ unsigned bitpos_from, unsigned bitpos_to) BMNOEXCEPT
{
bm::id64_t mask = bm::digest_mask(bitpos_from, bitpos_to);
return !(digest & mask);
@internal
*/
inline
-void block_init_digest0(bm::word_t* const block, bm::id64_t digest)
+void block_init_digest0(bm::word_t* const block, bm::id64_t digest) BMNOEXCEPT
{
unsigned off;
for (unsigned i = 0; i < 64; ++i)
@internal
*/
inline
-bm::id64_t calc_block_digest0(const bm::word_t* const block)
+bm::id64_t calc_block_digest0(const bm::word_t* const block) BMNOEXCEPT
{
bm::id64_t digest0 = 0;
unsigned off;
@internal
*/
inline
-bm::id64_t update_block_digest0(const bm::word_t* const block, bm::id64_t digest)
+bm::id64_t
+update_block_digest0(const bm::word_t* const block, bm::id64_t digest) BMNOEXCEPT
{
const bm::id64_t mask(1ull);
bm::id64_t d = digest;
/// Returns true if set operation is constant (bitcount)
inline
-bool is_const_set_operation(set_operation op)
+bool is_const_set_operation(set_operation op) BMNOEXCEPT
{
return (int(op) >= int(set_COUNT));
}
Convert set operation to operation
*/
inline
-bm::operation setop2op(bm::set_operation op)
+bm::operation setop2op(bm::set_operation op) BMNOEXCEPT
{
BM_ASSERT(op == set_AND ||
op == set_OR ||
// version with minimal branching, super-scalar friendly
//
inline
- static bm::id64_t block_type(const bm::word_t* bp)
+ static bm::id64_t block_type(const bm::word_t* bp) BMNOEXCEPT
{
bm::id64_t type;
if (bm::conditional<sizeof(void*) == 8>::test())
}
BMFORCEINLINE
- static bool is_full_block(const bm::word_t* bp)
+ static bool is_full_block(const bm::word_t* bp) BMNOEXCEPT
{ return (bp == _block._p || bp == _block._p_fullp); }
BMFORCEINLINE
- static bool is_valid_block_addr(const bm::word_t* bp)
+ static bool is_valid_block_addr(const bm::word_t* bp) BMNOEXCEPT
{ return (bp && !(bp == _block._p || bp == _block._p_fullp)); }
static all_set_block _block;
/// XOR swap two scalar variables
template<typename W>
-void xor_swap(W& x, W& y)
+void xor_swap(W& x, W& y) BMNOEXCEPT
{
BM_ASSERT(&x != &y);
x ^= y;
@internal
*/
template<typename N>
-bool find_not_null_ptr(bm::word_t*** arr, N start, N size, N* pos)
+bool find_not_null_ptr(bm::word_t*** arr, N start, N size, N* pos) BMNOEXCEPT
{
BM_ASSERT(pos);
// BM_ASSERT(start < size);
@ingroup bitfunc
*/
inline
-bool bit_is_all_zero(const bm::word_t* BMRESTRICT start)
+bool bit_is_all_zero(const bm::word_t* BMRESTRICT start) BMNOEXCEPT
{
#if defined(VECT_IS_ZERO_BLOCK)
return VECT_IS_ZERO_BLOCK(start);
@ingroup gapfunc
*/
BMFORCEINLINE
-bool gap_is_all_zero(const bm::gap_word_t* buf)
+bool gap_is_all_zero(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT
{
// (almost) branchless variant:
return (!(*buf & 1u)) & (!(bm::gap_max_bits - 1 - buf[1]));
@ingroup gapfunc
*/
BMFORCEINLINE
-bool gap_is_all_one(const bm::gap_word_t* buf)
+bool gap_is_all_one(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT
{
return ((*buf & 1u) && (buf[1] == bm::gap_max_bits - 1));
}
@ingroup gapfunc
*/
BMFORCEINLINE
-bm::gap_word_t gap_length(const bm::gap_word_t* buf)
+bm::gap_word_t gap_length(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT
{
return (bm::gap_word_t)((*buf >> 3) + 1);
}
@ingroup gapfunc
*/
template<typename T>
-unsigned gap_capacity(const T* buf, const T* glevel_len)
+unsigned
+gap_capacity(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
{
return glevel_len[(*buf >> 1) & 3];
}
@ingroup gapfunc
*/
template<typename T>
-unsigned gap_limit(const T* buf, const T* glevel_len)
+unsigned
+gap_limit(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
{
return glevel_len[(*buf >> 1) & 3]-4;
}
@ingroup gapfunc
*/
template<typename T>
-T gap_level(const T* buf)
+T gap_level(const T* BMRESTRICT buf) BMNOEXCEPT
{
return T((*buf >> 1) & 3u);
}
@ingroup gapfunc
*/
template<typename T>
-unsigned gap_find_last(const T* buf, unsigned* last)
+unsigned
+gap_find_last(const T* BMRESTRICT buf, unsigned* BMRESTRICT last) BMNOEXCEPT
{
BM_ASSERT(last);
@ingroup gapfunc
*/
template<typename T>
-unsigned gap_find_first(const T* buf, unsigned* first)
+unsigned
+gap_find_first(const T* BMRESTRICT buf, unsigned* BMRESTRICT first) BMNOEXCEPT
{
BM_ASSERT(first);
@ingroup gapfunc
*/
template<typename T>
-unsigned gap_bfind(const T* buf, unsigned pos, unsigned* is_set)
+unsigned gap_bfind(const T* BMRESTRICT buf,
+ unsigned pos, unsigned* BMRESTRICT is_set) BMNOEXCEPT
{
BM_ASSERT(pos < bm::gap_max_bits);
- *is_set = (*buf) & 1;
+ #undef VECT_GAP_BFIND // TODO: VECTOR bfind causes performance degradation
+ #ifdef VECT_GAP_BFIND
+ return VECT_GAP_BFIND(buf, pos, is_set);
+ #else
+ *is_set = (*buf) & 1;
- unsigned start = 1;
- unsigned end = 1 + ((*buf) >> 3);
+ unsigned start = 1;
+ unsigned end = 1 + ((*buf) >> 3);
- while ( start != end )
- {
- unsigned curr = (start + end) >> 1;
- if ( buf[curr] < pos )
- start = curr + 1;
- else
- end = curr;
- }
- *is_set ^= ((start-1) & 1);
- return start;
+ while ( start != end )
+ {
+ unsigned curr = (start + end) >> 1;
+ if ( buf[curr] < pos )
+ start = curr + 1;
+ else
+ end = curr;
+ }
+ *is_set ^= ((start-1) & 1);
+ return start;
+ #endif
}
\return true if position is in "1" gap
@ingroup gapfunc
*/
-template<typename T> unsigned gap_test(const T* buf, unsigned pos)
+template<typename T>
+unsigned gap_test(const T* BMRESTRICT buf, unsigned pos) BMNOEXCEPT
{
BM_ASSERT(pos < bm::gap_max_bits);
@ingroup gapfunc
*/
template<typename T>
-unsigned gap_test_unr(const T* buf, const unsigned pos)
+unsigned gap_test_unr(const T* BMRESTRICT buf, const unsigned pos) BMNOEXCEPT
{
BM_ASSERT(pos < bm::gap_max_bits);
return (*buf) & 1;
}
#if defined(BMSSE2OPT)
- unsigned start = 1;
- unsigned end = 1 + ((*buf) >> 3);
- unsigned dsize = end - start;
-
- if (dsize < 17)
- {
- start = bm::sse2_gap_find(buf + 1, (bm::gap_word_t)pos, dsize);
- unsigned res = ((*buf) & 1) ^ ((start) & 1);
- BM_ASSERT(buf[start + 1] >= pos);
- BM_ASSERT(buf[start] < pos || (start == 0));
- BM_ASSERT(res == bm::gap_test(buf, pos));
- return res;
- }
- unsigned arr_end = end;
- while (start != end)
- {
- unsigned curr = (start + end) >> 1;
- if (buf[curr] < pos)
- start = curr + 1;
- else
- end = curr;
-
- unsigned size = end - start;
- if (size < 16)
- {
- size += (end != arr_end);
- unsigned idx = bm::sse2_gap_find(buf + start, (bm::gap_word_t)pos, size);
- start += idx;
-
- BM_ASSERT(buf[start] >= pos);
- BM_ASSERT(buf[start - 1] < pos || (start == 1));
- break;
- }
- }
-
- unsigned res = ((*buf) & 1) ^ ((--start) & 1);
-
+ unsigned res = bm::sse2_gap_test(buf, pos);
BM_ASSERT(res == bm::gap_test(buf, pos));
- return res;
-//#endif
#elif defined(BMSSE42OPT)
- unsigned start = 1;
- unsigned end = 1 + ((*buf) >> 3);
- unsigned dsize = end - start;
-
- if (dsize < 17)
- {
- start = bm::sse4_gap_find(buf+1, (bm::gap_word_t)pos, dsize);
- unsigned res = ((*buf) & 1) ^ ((start) & 1);
- BM_ASSERT(buf[start+1] >= pos);
- BM_ASSERT(buf[start] < pos || (start==0));
- BM_ASSERT(res == bm::gap_test(buf, pos));
- return res;
- }
- unsigned arr_end = end;
- while (start != end)
- {
- unsigned curr = (start + end) >> 1;
- if (buf[curr] < pos)
- start = curr + 1;
- else
- end = curr;
-
- unsigned size = end - start;
- if (size < 16)
- {
- size += (end != arr_end);
- unsigned idx = bm::sse4_gap_find(buf + start, (bm::gap_word_t)pos, size);
- start += idx;
-
- BM_ASSERT(buf[start] >= pos);
- BM_ASSERT(buf[start - 1] < pos || (start == 1));
- break;
- }
- }
-
- unsigned res = ((*buf) & 1) ^ ((--start) & 1);
-
+ unsigned res = bm::sse42_gap_test(buf, pos);
BM_ASSERT(res == bm::gap_test(buf, pos));
#elif defined(BMAVX2OPT)
unsigned res = bm::avx2_gap_test(buf, pos);
\internal
*/
template<typename T, typename N, typename F>
-void for_each_nzblock_range(T*** root, N top_size, N nb_from, N nb_to, F& f)
+void for_each_nzblock_range(T*** root,
+ N top_size, N nb_from, N nb_to, F& f) BMNOEXCEPT
{
BM_ASSERT(top_size);
if (nb_from > nb_to)
return;
- unsigned i_from = nb_from >> bm::set_array_shift;
- unsigned j_from = nb_from & bm::set_array_mask;
- unsigned i_to = nb_to >> bm::set_array_shift;
- unsigned j_to = nb_to & bm::set_array_mask;
+ unsigned i_from = unsigned(nb_from >> bm::set_array_shift);
+ unsigned j_from = unsigned(nb_from & bm::set_array_mask);
+ unsigned i_to = unsigned(nb_to >> bm::set_array_shift);
+ unsigned j_to = unsigned(nb_to & bm::set_array_mask);
if (i_from >= top_size)
return;
if (i_to >= top_size)
{
- i_to = top_size-1;
+ i_to = unsigned(top_size-1);
j_to = bm::set_sub_array_size-1;
}
{
T** blk_blk = root[i];
if (!blk_blk)
- {
continue;
- }
if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
{
unsigned j = (i == i_from) ? j_from : 0;
if (!j && (i != i_to)) // full sub-block
- {
- f.add_full(bm::set_sub_array_size * bm::gap_max_bits);
- }
+ f.add_full(bm::set_sub_total_bits);
else
{
do
do
{
if (blk_blk[j])
- {
f(blk_blk[j]);
- }
if ((i == i_to) && (j == j_to))
return;
- ++j;
- } while (j < bm::set_sub_array_size);
+ } while (++j < bm::set_sub_array_size);
}
} // for i
}
Function returns if function-predicate returns true
*/
template<typename T, typename BI, typename F>
-bool for_each_nzblock_if(T*** root, BI size1, F& f)
+bool for_each_nzblock_if(T*** root, BI size1, F& f) BMNOEXCEPT
{
BI block_idx = 0;
for (BI i = 0; i < size1; ++i)
/*! Computes SUM of all elements of the sequence
*/
template<typename T>
-bm::id64_t sum_arr(T* first, T* last)
+bm::id64_t sum_arr(const T* first, const T* last) BMNOEXCEPT
{
bm::id64_t sum = 0;
- while (first < last)
- {
+ for (;first < last; ++first)
sum += *first;
- ++first;
- }
return sum;
}
@ingroup gapfunc
*/
template<typename T>
-void gap_split(const T* buf, T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt)
+void gap_split(const T* buf,
+ T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt) BMNOEXCEPT
{
const T* pcurr = buf;
unsigned len = (*pcurr >> 3);
@ingroup gapfunc
*/
template<typename T>
-unsigned gap_bit_count(const T* buf, unsigned dsize=0)
+unsigned gap_bit_count(const T* buf, unsigned dsize=0) BMNOEXCEPT
{
const T* pcurr = buf;
if (dsize == 0)
bits_counter += *pcurr + 1;
++pcurr;
}
- ++pcurr; // set GAP to 1
-
- while (pcurr <= pend)
- {
+ for (++pcurr; pcurr <= pend; pcurr += 2)
bits_counter += *pcurr - *(pcurr-1);
- pcurr += 2; // jump to the next positive GAP
- }
-
return bits_counter;
}
\return Number of non-zero bits.
@ingroup gapfunc
*/
-template<typename T> unsigned gap_bit_count_unr(const T* buf)
+template<typename T>
+unsigned gap_bit_count_unr(const T* buf) BMNOEXCEPT
{
const T* pcurr = buf;
unsigned dsize = (*pcurr >> 3);
{
cnt += *pcurr - *(pcurr - 1);
}
- BM_ASSERT(cnt == gap_bit_count(buf));
+ BM_ASSERT(cnt == bm::gap_bit_count(buf));
return cnt;
}
@ingroup gapfunc
*/
template<typename T>
-unsigned gap_bit_count_range(const T* const buf, unsigned left, unsigned right)
+unsigned gap_bit_count_range(const T* const buf,
+ unsigned left, unsigned right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
+ BM_ASSERT(right < bm::gap_max_bits);
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
return bits_counter;
}
+/*!
+ \brief Test if all bits are 1 in GAP buffer in the [left, right] range.
+ \param buf - GAP buffer pointer.
+ \param left - leftmost bit index to start from
+ \param right- rightmost bit index
+ \return true if all bits are "11111"
+ @ingroup gapfunc
+*/
+template<typename T>
+bool gap_is_all_one_range(const T* const BMRESTRICT buf,
+ unsigned left, unsigned right) BMNOEXCEPT
+{
+ BM_ASSERT(left <= right);
+ BM_ASSERT(right < bm::gap_max_bits);
+
+ unsigned is_set;
+ unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
+ if (!is_set) // GAP is 0
+ return false;
+ const T* const pcurr = buf + start_pos;
+ return (right <= *pcurr);
+}
+
+/*!
+ \brief Test if any bits are 1 in GAP buffer in the [left, right] range.
+ \param buf - GAP buffer pointer.
+ \param left - leftmost bit index to start from
+ \param right- rightmost bit index
+ \return true if at least 1 "00010"
+ @ingroup gapfunc
+*/
+template<typename T>
+bool gap_any_range(const T* const BMRESTRICT buf,
+ unsigned left, unsigned right) BMNOEXCEPT
+{
+ BM_ASSERT(left <= right);
+ BM_ASSERT(right < bm::gap_max_bits);
+
+ unsigned is_set;
+ unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
+ const T* const pcurr = buf + start_pos;
+
+ if (!is_set) // start GAP is 0 ...
+ {
+ if (right <= *pcurr) // ...bit if the interval goes into at least 1 blk
+ return false; // .. nope
+ return true;
+ }
+ return true;
+}
+
+/*!
+ \brief Test if any bits are 1 in GAP buffer in the [left, right] range
+ and flanked with 0s
+ \param buf - GAP buffer pointer.
+ \param left - leftmost bit index to start from
+ \param right- rightmost bit index
+ \return true if "011110"
+ @ingroup gapfunc
+*/
+template<typename T>
+bool gap_is_interval(const T* const BMRESTRICT buf,
+ unsigned left, unsigned right) BMNOEXCEPT
+{
+ BM_ASSERT(left <= right);
+ BM_ASSERT(left > 0); // cannot check left-1 otherwise
+ BM_ASSERT(right < bm::gap_max_bits-1); // cannot check right+1 otherwise
+
+ unsigned is_set;
+ unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
+
+ const T* pcurr = buf + start_pos;
+ if (!is_set || (right != *pcurr) || (start_pos <= 1))
+ return false;
+ --pcurr;
+ if (*pcurr != left-1)
+ return false;
+ return true;
+}
+
+/**
+ \brief Searches for the last 1 bit in the 111 interval of a GAP block
+ \param buf - BIT block buffer
+ \param nbit - bit index to start checking from
+ \param pos - [out] found value
+
+ \return false if not found
+ @ingroup gapfunc
+*/
+template<typename T>
+bool gap_find_interval_end(const T* const BMRESTRICT buf,
+ unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
+{
+ BM_ASSERT(pos);
+ BM_ASSERT(nbit < bm::gap_max_bits);
+
+ unsigned is_set;
+ unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
+ if (!is_set)
+ return false;
+ *pos = buf[start_pos];
+ return true;
+}
+
+
+/**
+ \brief Searches for the first 1 bit in the 111 interval of a GAP block
+ \param buf - BIT block buffer
+ \param nbit - bit index to start checking from
+ \param pos - [out] found value
+
+ \return false if not found
+ @ingroup gapfunc
+*/
+template<typename T>
+bool gap_find_interval_start(const T* const BMRESTRICT buf,
+ unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
+{
+ BM_ASSERT(pos);
+ BM_ASSERT(nbit < bm::gap_max_bits);
+
+ unsigned is_set;
+ unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
+ if (!is_set)
+ return false;
+ --start_pos;
+ if (!start_pos)
+ *pos = 0;
+ else
+ *pos = buf[start_pos]+1;
+ return true;
+}
+
+
/*!
\brief GAP block find position for the rank
SIZE_TYPE gap_find_rank(const T* const block,
SIZE_TYPE rank,
unsigned nbit_from,
- unsigned& nbit_pos)
+ unsigned& nbit_pos) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(rank);
\brief Counts 1 bits in GAP buffer in the closed [0, right] range.
\param buf - GAP buffer pointer.
\param right- rightmost bit index
- \return Number of non-zero bits.
+ \param is_corrected - if true the result will be rank corrected
+ if right bit == true count=count-1
+ \return Number of non-zero bits
@ingroup gapfunc
*/
template<typename T>
-unsigned gap_bit_count_to(const T* const buf, T right)
+unsigned gap_bit_count_to(const T* const buf, T right,
+ bool is_corrected=false) BMNOEXCEPT
{
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
if (right <= *pcurr) // we are in the target block right now
{
bits_counter = (right + 1u) & is_set; // & is_set == if (is_set)
+ bits_counter -= (is_set & unsigned(is_corrected));
return bits_counter;
}
bits_counter += (*pcurr + 1u) & is_set;
{
bits_counter += (*pcurr - prev_gap) & is_set;
if (pcurr == pend)
+ {
+ bits_counter -= (is_set & unsigned(is_corrected));
return bits_counter;
+ }
prev_gap = *pcurr++;
}
bits_counter += (right - prev_gap) & is_set;
+ bits_counter -= (is_set & unsigned(is_corrected));
return bits_counter;
}
@ingroup gapfunc
*/
template<typename T>
-T* gap_2_dgap(const T* gap_buf, T* dgap_buf, bool copy_head=true)
+T* gap_2_dgap(const T* BMRESTRICT gap_buf,
+ T* BMRESTRICT dgap_buf, bool copy_head=true) BMNOEXCEPT
{
if (copy_head) // copy GAP header
{
@ingroup gapfunc
*/
template<typename T>
-void dgap_2_gap(const T* dgap_buf, T* gap_buf, T gap_header=0)
+void dgap_2_gap(const T* BMRESTRICT dgap_buf,
+ T* BMRESTRICT gap_buf, T gap_header=0) BMNOEXCEPT
{
const T* pcurr = dgap_buf;
unsigned len;
@ingroup gapfunc
*/
-template<typename T> int gapcmp(const T* buf1, const T* buf2)
+template<typename T>
+int gapcmp(const T* buf1, const T* buf2) BMNOEXCEPT
{
const T* pcurr1 = buf1;
const T* pend1 = pcurr1 + (*pcurr1 >> 3);
return (bitval1) ? 1 : -1;
}
}
-
++pcurr1; ++pcurr2;
-
bitval1 ^= 1;
bitval2 ^= 1;
}
template<typename T>
bool gap_find_first_diff(const T* BMRESTRICT buf1,
const T* BMRESTRICT buf2,
- unsigned* BMRESTRICT pos)
+ unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(buf1 && buf2 && pos);
return false;
}
-
+// -------------------------------------------------------------------------
+//
/*!
\brief Abstract operation for GAP buffers.
can be 0 or 1 (1 inverts the vector)
\param vect2 - operand 2 GAP encoded buffer.
\param vect2_mask - same as vect1_mask
- \param f - operation functor.
\param dlen - destination length after the operation
\note Internal function.
unsigned vect1_mask,
const T* BMRESTRICT vect2,
unsigned vect2_mask,
- F& f,
- unsigned& dlen)
+ unsigned& dlen) BMNOEXCEPT2
{
const T* cur1 = vect1;
const T* cur2 = vect2;
T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask);
T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask);
- T bitval = (T) f(bitval1, bitval2);
+ T bitval = (T) F::op(bitval1, bitval2);
T bitval_prev = bitval;
T* res = dest;
T c1 = *cur1; T c2 = *cur2;
while (1)
{
- bitval = (T) f(bitval1, bitval2);
+ bitval = (T) F::op(bitval1, bitval2);
// Check if GAP value changes and we need to
// start the next one
}
++cur2; c2 = *cur2;
}
-
} // while
dlen = (unsigned)(res - dest);
*dest = (T)((*dest & 7) + (dlen << 3));
}
+
/*!
\brief Abstract operation for GAP buffers (predicts legth)
Receives functor F as a template argument
\param vect1 - operand 1 GAP encoded buffer.
\param vect2 - operand 2 GAP encoded buffer.
- \param f - operation functor.
\param dlen - destination length after the operation
\param limit - maximum target length limit,
returns false if limit is reached
template<typename T, class F>
bool gap_buff_dry_op(const T* BMRESTRICT vect1,
const T* BMRESTRICT vect2,
- F& f,
unsigned& dlen,
- unsigned limit)
+ unsigned limit) BMNOEXCEPT2
{
const T* cur1 = vect1;
const T* cur2 = vect2;
T bitval1 = (T)((*cur1++ & 1));
T bitval2 = (T)((*cur2++ & 1));
- T bitval = (T) f(bitval1, bitval2);
+ T bitval = (T) F::op(bitval1, bitval2);
T bitval_prev = bitval;
unsigned len = 1;
T c1 = *cur1; T c2 = *cur2;
while (1)
{
- bitval = (T) f(bitval1, bitval2);
+ bitval = (T) F::op(bitval1, bitval2);
// Check if GAP value changes and we need to
// start the next one
can be 0 or 1 (1 inverts the vector)
\param vect2 - operand 2 GAP encoded buffer.
\param vect2_mask - same as vect1_mask
- \param f - operation functor.
\note Internal function.
\return non zero value if operation result returns any 1 bit
unsigned gap_buff_any_op(const T* BMRESTRICT vect1,
unsigned vect1_mask,
const T* BMRESTRICT vect2,
- unsigned vect2_mask,
- F f)
+ unsigned vect2_mask) BMNOEXCEPT2
{
const T* cur1 = vect1;
const T* cur2 = vect2;
unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask;
unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask;
- unsigned bitval = f(bitval1, bitval2);
+ unsigned bitval = F::op(bitval1, bitval2);
if (bitval)
return bitval;
unsigned bitval_prev = bitval;
while (1)
{
- bitval = f(bitval1, bitval2);
+ bitval = F::op(bitval1, bitval2);
if (bitval)
return bitval;
{
break;
}
-
++cur1;
- bitval1 ^= 1;
- bitval2 ^= 1;
+ bitval1 ^= 1; bitval2 ^= 1;
}
++cur2;
}
Receives functor F as a template argument
\param vect1 - operand 1 GAP encoded buffer.
\param vect2 - operand 2 GAP encoded buffer.
- \param f - operation functor.
\note Internal function.
@ingroup gapfunc
*/
template<typename T, class F>
-unsigned gap_buff_count_op(const T* vect1, const T* vect2, F f)
+unsigned gap_buff_count_op(const T* vect1, const T* vect2) BMNOEXCEPT2
{
unsigned count;// = 0;
const T* cur1 = vect1;
unsigned bitval1 = (*cur1++ & 1);
unsigned bitval2 = (*cur2++ & 1);
- unsigned bitval = count = f(bitval1, bitval2);
+ unsigned bitval = count = F::op(bitval1, bitval2);
unsigned bitval_prev = bitval;
- //if (bitval) ++count;
-
T res, res_prev;
res = res_prev = 0;
while (1)
{
- bitval = f(bitval1, bitval2);
-
+ bitval = F::op(bitval1, bitval2);
// Check if GAP value changes and we need to
// start the next one.
if (bitval != bitval_prev)
count += res - res_prev;
res_prev = res;
}
- ++cur1;
- bitval1 ^= 1;
+ ++cur1; bitval1 ^= 1;
}
else // >=
{
else // equal
{
if (*cur2 == (bm::gap_max_bits - 1))
- {
break;
- }
++cur1;
- bitval1 ^= 1;
- bitval2 ^= 1;
+ bitval1 ^= 1; bitval2 ^= 1;
}
++cur2;
}
}
+#ifdef __GNUG__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
/*!
\brief Sets or clears bit in the GAP buffer.
unsigned gap_set_value(unsigned val,
T* BMRESTRICT buf,
unsigned pos,
- unsigned* BMRESTRICT is_set)
+ unsigned* BMRESTRICT is_set) BMNOEXCEPT
{
BM_ASSERT(pos < bm::gap_max_bits);
- unsigned curr = gap_bfind(buf, pos, is_set);
+ unsigned curr = bm::gap_bfind(buf, pos, is_set);
T end = (T)(*buf >> 3);
if (*is_set == val)
{
// Special case, first bit GAP operation. There is no platform beside it.
// initial flag must be inverted.
- if (pos == 0)
+ if (!pos)
{
*buf ^= 1;
- if ( buf[1] ) // We need to insert a 1 bit platform here.
+ if (buf[1]) // We need to insert a 1 bit GAP here
{
::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
buf[1] = 0;
}
else // Only 1 bit in the GAP. We need to delete the first GAP.
{
- pprev = buf + 1;
- pcurr = pprev + 1;
- do
- {
- *pprev++ = *pcurr++;
- } while (pcurr < pend);
- --end;
+ pprev = buf + 1; pcurr = pprev + 1;
+ goto copy_gaps;
}
}
- else if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
+ else
+ if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
{
++(*pprev);
if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
{
--end;
- if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
+ if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
{
- --end;
++pcurr;
- do
- {
- *pprev++ = *pcurr++;
- } while (pcurr < pend);
+ copy_gaps:
+ --end;
+ do { *pprev++ = *pcurr++; } while (pcurr < pend);
}
}
}
- else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
+ else
+ if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
{
- --(*pcurr);
- if (pcurr == pend)
+ --(*pcurr);
+ end += (pcurr == pend);
+ }
+ else // Worst case: split current GAP
+ {
+ if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove
+ ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
+ end += 2;
+ pcurr[0] = (T)(pos-1);
+ pcurr[1] = (T)pos;
+ }
+
+ // Set correct length word and last border word
+ *buf = (T)((*buf & 7) + (end << 3));
+ buf[end] = bm::gap_max_bits-1;
+ return end;
+}
+
+/*!
+ \brief Sets or clears bit in the GAP buffer.
+
+ \param val - new bit value
+ \param buf - GAP buffer.
+ \param pos - Index of bit to set.
+
+ \return New GAP buffer length.
+
+ @ingroup gapfunc
+*/
+template<typename T>
+unsigned gap_set_value(unsigned val,
+ T* BMRESTRICT buf,
+ unsigned pos) BMNOEXCEPT
+{
+ BM_ASSERT(pos < bm::gap_max_bits);
+ unsigned is_set;
+ unsigned curr = bm::gap_bfind(buf, pos, &is_set);
+ T end = (T)(*buf >> 3);
+ if (is_set == val)
+ return end;
+
+ T* pcurr = buf + curr;
+ T* pprev = pcurr - 1;
+ T* pend = buf + end;
+
+ // Special case, first bit GAP operation. There is no platform beside it.
+ // initial flag must be inverted.
+ if (!pos)
+ {
+ *buf ^= 1;
+ if (buf[1]) // We need to insert a 1 bit GAP here
+ {
+ ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
+ buf[1] = 0;
+ ++end;
+ }
+ else // Only 1 bit in the GAP. We need to delete the first GAP.
{
- ++end;
+ pprev = buf + 1; pcurr = pprev + 1;
+ goto copy_gaps;
}
}
- else // Worst case we need to split current block.
+ else
+ if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
+ {
+ ++(*pprev);
+ if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
+ {
+ --end;
+ if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
+ {
+ ++pcurr;
+ copy_gaps:
+ --end;
+ do { *pprev++ = *pcurr++; } while (pcurr < pend);
+ }
+ }
+ }
+ else
+ if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
{
- ::memmove(pcurr+2, pcurr,(end - curr + 1)*sizeof(T));
- *pcurr++ = (T)(pos - 1);
- *pcurr = (T)pos;
- end = (T)(end + 2);
+ --(*pcurr);
+ end += (pcurr == pend);
+ }
+ else // Worst case: split current GAP
+ {
+ if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove
+ ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
+ end += 2;
+ pcurr[0] = (T)(pos-1);
+ pcurr[1] = (T)pos;
}
- // Set correct length word.
+ // Set correct length word and last border word
*buf = (T)((*buf & 7) + (end << 3));
-
- buf[end] = bm::gap_max_bits - 1;
+ buf[end] = bm::gap_max_bits-1;
return end;
}
@ingroup gapfunc
*/
template<typename T>
-unsigned gap_add_value(T* buf, unsigned pos)
+unsigned gap_add_value(T* buf, unsigned pos) BMNOEXCEPT
{
BM_ASSERT(pos < bm::gap_max_bits);
// Special case, first bit GAP operation. There is no platform beside it.
// initial flag must be inverted.
- if (pos == 0)
+ if (!pos)
{
*buf ^= 1;
if ( buf[1] ) // We need to insert a 1 bit platform here.
}
else // Only 1 bit in the GAP. We need to delete the first GAP.
{
- pprev = buf + 1;
- pcurr = pprev + 1;
- do
- {
- *pprev++ = *pcurr++;
- } while (pcurr < pend);
+ pprev = buf + 1; pcurr = pprev + 1;
--end;
+ do { *pprev++ = *pcurr++; } while (pcurr < pend);
}
}
else if (((unsigned)(*pprev))+1 == pos && (curr > 1) ) // Left border bit
if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
{
--end;
- if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
- {
- // TODO: should never get here...
- --end;
- ++pcurr;
- do
- {
- *pprev++ = *pcurr++;
- } while (pcurr < pend);
- }
- }
+ BM_ASSERT(pcurr == pend);
+ }
}
else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
{
--(*pcurr);
- if (pcurr == pend)
- {
- ++end;
- }
+ end += (pcurr == pend);
}
else // Worst case we need to split current block.
{
- *pcurr++ = (T)(pos - 1);
- *pcurr = (T)pos;
+ pcurr[0] = (T)(pos-1);
+ pcurr[1] = (T)pos;
end = (T)(end+2);
}
// Set correct length word.
*buf = (T)((*buf & 7) + (end << 3));
-
buf[end] = bm::gap_max_bits - 1;
return end;
}
+#ifdef __GNUG__
+#pragma GCC diagnostic pop
+#endif
+
+
/*!
@brief Right shift GAP block by 1 bit
@param buf - block pointer
@ingroup gapfunc
*/
template<typename T>
-bool gap_shift_r1(T* buf, unsigned co_flag, unsigned* new_len)
+bool gap_shift_r1(T* BMRESTRICT buf,
+ unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
{
BM_ASSERT(new_len);
bool co;
@ingroup gapfunc
*/
template<typename T>
-bool gap_shift_l1(T* buf, unsigned co_flag, unsigned* new_len)
+bool gap_shift_l1(T* BMRESTRICT buf,
+ unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
{
BM_ASSERT(new_len);
unsigned is_set;
*/
template<typename T>
-unsigned gap_set_array(T* buf, const T* arr, unsigned len)
+unsigned gap_set_array(T* buf, const T* arr, unsigned len) BMNOEXCEPT
{
*buf = (T)((*buf & 6u) + (1u << 3)); // gap header setup
@ingroup gapfunc
*/
template<typename T>
-unsigned bit_array_compute_gaps(const T* arr,
- unsigned len)
+unsigned bit_array_compute_gaps(const T* arr, unsigned len) BMNOEXCEPT
{
unsigned gap_count = 1;
T prev = arr[0];
@ingroup gapfunc
*/
template<typename T>
-unsigned gap_block_find(const T* buf,
+unsigned gap_block_find(const T* BMRESTRICT buf,
unsigned nbit,
- bm::id_t* prev)
+ bm::id_t* BMRESTRICT prev) BMNOEXCEPT
{
BM_ASSERT(nbit < bm::gap_max_bits);
*prev = nbit;
return 1u;
}
-
unsigned val = buf[gap_idx] + 1;
*prev = val;
-
return (val != bm::gap_max_bits); // no bug here.
}
+//------------------------------------------------------------------------
+
+
/*!
\brief Set 1 bit in a block
@ingroup bitfunc
*/
BMFORCEINLINE
-void set_bit(unsigned* dest, unsigned bitpos)
+void set_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT
{
unsigned nbit = unsigned(bitpos & bm::set_block_mask);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
@ingroup bitfunc
*/
BMFORCEINLINE
-void clear_bit(unsigned* dest, unsigned bitpos)
+void clear_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT
{
unsigned nbit = unsigned(bitpos & bm::set_block_mask);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
@ingroup bitfunc
*/
BMFORCEINLINE
-unsigned test_bit(const unsigned* block, unsigned bitpos)
+unsigned test_bit(const unsigned* block, unsigned bitpos) BMNOEXCEPT
{
unsigned nbit = unsigned(bitpos & bm::set_block_mask);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
@ingroup bitfunc
*/
inline
-void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount)
+void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
{
const unsigned maskFF = ~0u;
@ingroup bitfunc
*/
inline
-void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount)
+void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
{
const unsigned maskFF = ~0u;
*/
inline void xor_bit_block(unsigned* dest,
unsigned bitpos,
- unsigned bitcount)
+ unsigned bitcount) BMNOEXCEPT
{
unsigned nbit = unsigned(bitpos & bm::set_block_mask);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
@ingroup gapfunc
*/
template<typename T>
-void gap_sub_to_bitset(unsigned* dest, const T* pcurr)
+void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
+ const T* BMRESTRICT pcurr) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
@ingroup gapfunc
*/
template<typename T>
-void gap_sub_to_bitset(unsigned* dest, const T* pcurr, bm::id64_t digest0)
+void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
+ const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
@ingroup gapfunc
*/
template<typename T>
-void gap_xor_to_bitset(unsigned* dest, const T* pcurr)
+void gap_xor_to_bitset(unsigned* BMRESTRICT dest,
+ const T* BMRESTRICT pcurr) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
@ingroup gapfunc
*/
template<typename T>
-void gap_add_to_bitset(unsigned* dest, const T* pcurr, unsigned len)
+void gap_add_to_bitset(unsigned* BMRESTRICT dest,
+ const T* BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
@ingroup gapfunc
*/
template<typename T>
-void gap_add_to_bitset(unsigned* dest, const T* pcurr)
+void gap_add_to_bitset(unsigned* BMRESTRICT dest,
+ const T* BMRESTRICT pcurr) BMNOEXCEPT
{
unsigned len = (*pcurr >> 3);
gap_add_to_bitset(dest, pcurr, len);
@ingroup gapfunc
*/
template<typename T>
-void gap_and_to_bitset(unsigned* dest, const T* pcurr)
+void gap_and_to_bitset(unsigned* BMRESTRICT dest,
+ const T* BMRESTRICT pcurr) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
@ingroup gapfunc
*/
template<typename T>
-void gap_and_to_bitset(unsigned* dest, const T* pcurr, bm::id64_t digest0)
+void gap_and_to_bitset(unsigned* BMRESTRICT dest,
+ const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
if (!digest0)
@ingroup gapfunc
*/
template<typename T>
-bm::id_t gap_bitset_and_count(const unsigned* block, const T* pcurr)
+bm::id_t gap_bitset_and_count(const unsigned* BMRESTRICT block,
+ const T* BMRESTRICT pcurr) BMNOEXCEPT
{
BM_ASSERT(block);
const T* pend = pcurr + (*pcurr >> 3);
@ingroup gapfunc
*/
template<typename T>
-bm::id_t gap_bitset_and_any(const unsigned* block, const T* pcurr)
+bm::id_t gap_bitset_and_any(const unsigned* BMRESTRICT block,
+ const T* BMRESTRICT pcurr) BMNOEXCEPT
{
BM_ASSERT(block);
@ingroup gapfunc
*/
template<typename T>
-bm::id_t gap_bitset_sub_count(const unsigned* block, const T* buf)
+bm::id_t gap_bitset_sub_count(const unsigned* BMRESTRICT block,
+ const T* BMRESTRICT buf) BMNOEXCEPT
{
BM_ASSERT(block);
@ingroup gapfunc
*/
template<typename T>
-bm::id_t gap_bitset_sub_any(const unsigned* block, const T* buf)
+bm::id_t gap_bitset_sub_any(const unsigned* BMRESTRICT block,
+ const T* BMRESTRICT buf) BMNOEXCEPT
{
BM_ASSERT(block);
@ingroup gapfunc
*/
template<typename T>
-bm::id_t gap_bitset_xor_count(const unsigned* block, const T* buf)
+bm::id_t gap_bitset_xor_count(const unsigned* BMRESTRICT block,
+ const T* BMRESTRICT buf) BMNOEXCEPT
{
BM_ASSERT(block);
@ingroup gapfunc
*/
template<typename T>
-bm::id_t gap_bitset_xor_any(const unsigned* block, const T* buf)
+bm::id_t gap_bitset_xor_any(const unsigned* BMRESTRICT block,
+ const T* BMRESTRICT buf) BMNOEXCEPT
{
BM_ASSERT(block);
@ingroup gapfunc
*/
template<typename T>
-bm::id_t gap_bitset_or_count(const unsigned* block, const T* buf)
+bm::id_t gap_bitset_or_count(const unsigned* BMRESTRICT block,
+ const T* BMRESTRICT buf) BMNOEXCEPT
{
BM_ASSERT(block);
-
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
++pcurr;
@ingroup gapfunc
*/
template<typename T>
-bm::id_t gap_bitset_or_any(const unsigned* block, const T* buf)
+bm::id_t gap_bitset_or_any(const unsigned* BMRESTRICT block,
+ const T* BMRESTRICT buf) BMNOEXCEPT
{
bool b = !bm::gap_is_all_zero(buf) ||
!bm::bit_is_all_zero(block);
@ingroup bitfunc
*/
inline
-void bit_block_set(bm::word_t* BMRESTRICT dst, bm::word_t value)
+void bit_block_set(bm::word_t* BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
{
#ifdef BMVECTOPT
VECT_SET_BLOCK(dst, value);
@ingroup gapfunc
*/
template<typename T>
-void gap_convert_to_bitset(unsigned* dest, const T* buf)
+void gap_convert_to_bitset(unsigned* BMRESTRICT dest,
+ const T* BMRESTRICT buf) BMNOEXCEPT
{
bm::bit_block_set(dest, 0);
bm::gap_add_to_bitset(dest, buf);
@ingroup gapfunc
*/
template<typename T>
-unsigned* gap_convert_to_bitset_smart(unsigned* dest,
- const T* buf,
- id_t set_max)
+unsigned* gap_convert_to_bitset_smart(unsigned* BMRESTRICT dest,
+ const T* BMRESTRICT buf,
+ id_t set_max) BMNOEXCEPT
{
if (buf[1] == set_max - 1)
return (buf[0] & 1) ? FULL_BLOCK_REAL_ADDR : 0;
-
bm::gap_convert_to_bitset(dest, buf);
return dest;
}
@ingroup gapfunc
@internal
*/
-template<typename T> unsigned gap_control_sum(const T* buf)
+template<typename T>
+unsigned gap_control_sum(const T* buf) BMNOEXCEPT
{
unsigned end = *buf >> 3;
++pcurr;
}
++pcurr; // now we are in GAP "1" again
-
while (pcurr <= pend)
{
BM_ASSERT(*pcurr > *(pcurr-1));
@ingroup gapfunc
*/
-template<class T> void gap_set_all(T* buf,
- unsigned set_max,
- unsigned value)
+template<class T>
+void gap_set_all(T* buf, unsigned set_max, unsigned value) BMNOEXCEPT
{
BM_ASSERT(value == 0 || value == 1);
*buf = (T)((*buf & 6u) + (1u << 3) + value);
void gap_init_range_block(T* buf,
T from,
T to,
- T value)
- //unsigned set_max)
+ T value) BMNOEXCEPT
{
BM_ASSERT(value == 0 || value == 1);
const unsigned set_max = bm::bits_in_block;
@ingroup gapfunc
*/
-template<typename T> void gap_invert(T* buf)
+template<typename T> void gap_invert(T* buf) BMNOEXCEPT
{
*buf ^= 1;
}
@ingroup gapfunc
*/
template<typename T>
-void set_gap_level(T* buf, int level)
+void set_gap_level(T* buf, int level) BMNOEXCEPT
{
BM_ASSERT(level >= 0);
BM_ASSERT(unsigned(level) < bm::gap_levels);
@ingroup gapfunc
*/
template<typename T>
-inline int gap_calc_level(unsigned len, const T* glevel_len)
+int gap_calc_level(unsigned len, const T* glevel_len) BMNOEXCEPT
{
if (len <= unsigned(glevel_len[0]-4)) return 0;
if (len <= unsigned(glevel_len[1]-4)) return 1;
@ingroup gapfunc
*/
template<typename T>
-inline unsigned gap_free_elements(const T* buf, const T* glevel_len)
+inline unsigned gap_free_elements(const T* BMRESTRICT buf,
+ const T* BMRESTRICT glevel_len) BMNOEXCEPT
{
- unsigned len = gap_length(buf);
- unsigned capacity = gap_capacity(buf, glevel_len);
+ unsigned len = bm::gap_length(buf);
+ unsigned capacity = bm::gap_capacity(buf, glevel_len);
return capacity - len;
}
@ingroup bitfunc
*/
template<typename T>
-int bitcmp(const T* buf1, const T* buf2, unsigned len)
+int bitcmp(const T* buf1, const T* buf2, unsigned len) BMNOEXCEPT
{
BM_ASSERT(len);
const T* pend1 = buf1 + len;
@ingroup bitfunc
*/
inline
-bool bit_find_first_diff(const bm::word_t* blk1, const bm::word_t* blk2,
- unsigned* pos)
+bool bit_find_first_diff(const bm::word_t* BMRESTRICT blk1,
+ const bm::word_t* BMRESTRICT blk2,
+ unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(blk1 && blk2 && pos);
#ifdef VECT_BIT_FIND_DIFF
if (diff)
{
unsigned idx = bm::count_trailing_zeros_u64(diff);
- *pos = unsigned(idx + (i * 8u * sizeof(bm::wordop_t)));
+ *pos = unsigned(idx + (i * 8u * unsigned(sizeof(bm::wordop_t))));
return true;
}
} // for
\brief Converts bit block to GAP.
\param dest - Destinatio GAP buffer.
\param block - Source bitblock buffer.
- \param dest_len - length of the dest. buffer.
+ \param dest_len - length of the destination buffer.
\return New length of GAP block or 0 if conversion failed
(insufficicent space).
inline
unsigned bit_block_to_gap(gap_word_t* BMRESTRICT dest,
const unsigned* BMRESTRICT block,
- unsigned dest_len)
+ unsigned dest_len) BMNOEXCEPT
{
const unsigned* BMRESTRICT block_end = block + bm::set_block_size;
gap_word_t* BMRESTRICT pcurr = dest;
}
#endif
+/**
+ Convert bit block to GAP representation
+ @internal
+ @ingroup bitfunc
+*/
inline
unsigned bit_to_gap(gap_word_t* BMRESTRICT dest,
const unsigned* BMRESTRICT block,
- unsigned dest_len)
+ unsigned dest_len) BMNOEXCEPT
{
#if defined(VECT_BIT_TO_GAP)
return VECT_BIT_TO_GAP(dest, block, dest_len);
D gap_convert_to_arr(D* BMRESTRICT dest,
const T* BMRESTRICT buf,
unsigned dest_len,
- bool invert = false)
+ bool invert = false) BMNOEXCEPT
{
- BMREGISTER const T* BMRESTRICT pcurr = buf;
- BMREGISTER const T* pend = pcurr + (*pcurr >> 3);
+ const T* BMRESTRICT pcurr = buf;
+ const T* pend = pcurr + (*pcurr >> 3);
D* BMRESTRICT dest_curr = dest;
++pcurr;
@ingroup bitfunc
*/
inline
-bm::id_t bit_block_count(const bm::word_t* block)
+bm::id_t bit_block_count(const bm::word_t* block) BMNOEXCEPT
{
const bm::word_t* block_end = block + bm::set_block_size;
bm::id_t count = 0;
@ingroup bitfunc
*/
inline
-bm::id_t bit_block_count(const bm::word_t* const block, bm::id64_t digest)
+bm::id_t bit_block_count(const bm::word_t* const block,
+ bm::id64_t digest) BMNOEXCEPT
{
+#ifdef VECT_BIT_COUNT_DIGEST
+ return VECT_BIT_COUNT_DIGEST(block, digest);
+#else
bm::id_t count = 0;
bm::id64_t d = digest;
while (d)
d = bm::bmi_bslr_u64(d); // d &= d - 1;
} // while
return count;
+#endif
}
*/
inline
bm::id_t bit_block_calc_count(const bm::word_t* block,
- const bm::word_t* block_end)
+ const bm::word_t* block_end) BMNOEXCEPT
{
bm::id_t count = 0;
bm::word_t acc = *block++;
@ingroup bitfunc
*/
inline
-bm::id_t bit_count_change(bm::word_t w)
+bm::id_t bit_count_change(bm::word_t w) BMNOEXCEPT
{
unsigned count = 1;
w ^= (w >> 1);
@internal
*/
inline
-unsigned bit_block_change32(const bm::word_t* block, unsigned size)
+unsigned bit_block_change32(const bm::word_t* block, unsigned size) BMNOEXCEPT
{
unsigned gap_count = 1;
@internal
*/
inline
-void bit_block_change_bc(const bm::word_t* block, unsigned* gc, unsigned* bc)
+void bit_block_change_bc(const bm::word_t* BMRESTRICT block,
+ unsigned* BMRESTRICT gc, unsigned* BMRESTRICT bc) BMNOEXCEPT
{
BM_ASSERT(gc);
BM_ASSERT(bc);
@ingroup bitfunc
*/
inline
-unsigned bit_block_calc_change(const bm::word_t* block)
+unsigned bit_block_calc_change(const bm::word_t* block) BMNOEXCEPT
{
#if defined(VECT_BLOCK_CHANGE)
return VECT_BLOCK_CHANGE(block, bm::set_block_size);
#endif
}
+/*!
+ Check if all bits are 1 in [left, right] range
+ @ingroup bitfunc
+*/
+inline
+bool bit_block_is_all_one_range(const bm::word_t* const BMRESTRICT block,
+ bm::word_t left,
+ bm::word_t right) BMNOEXCEPT
+{
+ BM_ASSERT(left <= right);
+ BM_ASSERT(right <= bm::gap_max_bits-1);
+
+ unsigned nword, nbit, bitcount, temp;
+ nbit = left & bm::set_word_mask;
+ const bm::word_t* word =
+ block + (nword = unsigned(left >> bm::set_word_shift));
+ if (left == right) // special case (only 1 bit to check)
+ return (*word >> nbit) & 1u;
+
+ if (nbit) // starting position is not aligned
+ {
+ unsigned right_margin = nbit + right - left;
+ if (right_margin < 32)
+ {
+ unsigned mask =
+ block_set_table<true>::_right[nbit] &
+ block_set_table<true>::_left[right_margin];
+ return mask == (*word & mask);
+ }
+ temp = *word & block_set_table<true>::_right[nbit];
+ if (temp != block_set_table<true>::_right[nbit])
+ return false;
+ bitcount = (right - left + 1u) - (32 - nbit);
+ ++word;
+ }
+ else
+ {
+ bitcount = right - left + 1u;
+ }
+
+ // now when we are word aligned, we can scan the bit-stream
+ const bm::id64_t maskFF64 = ~0ull;
+ const bm::word_t maskFF = ~0u;
+ // loop unrolled to evaluate 4 words at a time
+ // SIMD showed no advantage, unless evaluate sub-wave intervals
+ //
+ for ( ;bitcount >= 128; bitcount-=128, word+=4)
+ {
+ bm::id64_t w64_0 = bm::id64_t(word[0]) + (bm::id64_t(word[1]) << 32);
+ bm::id64_t w64_1 = bm::id64_t(word[2]) + (bm::id64_t(word[3]) << 32);
+ if ((w64_0 ^ maskFF64) | (w64_1 ^ maskFF64))
+ return false;
+ } // for
+
+ for ( ;bitcount >= 32; bitcount-=32, ++word)
+ {
+ if (*word != maskFF)
+ return false;
+ } // for
+ BM_ASSERT(bitcount < 32);
+
+ if (bitcount) // we have a tail to count
+ {
+ temp = *word & block_set_table<true>::_left[bitcount-1];
+ if (temp != block_set_table<true>::_left[bitcount-1])
+ return false;
+ }
+
+ return true;
+}
+
+
/*!
inline
bm::id_t bit_block_calc_count_range(const bm::word_t* block,
bm::word_t left,
- bm::word_t right)
+ bm::word_t right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
BM_ASSERT(right <= bm::gap_max_bits-1);
*/
inline
bm::id_t bit_block_calc_count_to(const bm::word_t* block,
- bm::word_t right)
+ bm::word_t right) BMNOEXCEPT
{
BM_ASSERT(block);
if (!right) // special case, first bit check
@ingroup bitfunc
*/
inline
-void bit_block_rotate_left_1(bm::word_t* block)
+void bit_block_rotate_left_1(bm::word_t* block) BMNOEXCEPT
{
bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
for (unsigned i = 0; i < bm::set_block_size-1; ++i)
@ingroup bitfunc
*/
inline
-void bit_block_rotate_left_1_unr(bm::word_t* block)
+void bit_block_rotate_left_1_unr(bm::word_t* block) BMNOEXCEPT
{
bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
const unsigned unroll_factor = 4;
@ingroup bitfunc
*/
inline
-bm::word_t bit_block_insert(bm::word_t* block, unsigned bitpos, bool value)
+bm::word_t bit_block_insert(bm::word_t* BMRESTRICT block,
+ unsigned bitpos, bool value) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(bitpos < 65536);
@ingroup bitfunc
*/
inline
-bool bit_block_shift_r1(bm::word_t* block,
- bm::word_t* empty_acc, bm::word_t co_flag)
+bool bit_block_shift_r1(bm::word_t* BMRESTRICT block,
+ bm::word_t* BMRESTRICT empty_acc,
+ bm::word_t co_flag) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(empty_acc);
@ingroup bitfunc
*/
inline
-bool bit_block_shift_r1_unr(bm::word_t* block,
- bm::word_t* empty_acc, bm::word_t co_flag)
+bool bit_block_shift_r1_unr(bm::word_t* BMRESTRICT block,
+ bm::word_t* BMRESTRICT empty_acc,
+ bm::word_t co_flag) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(empty_acc);
*/
inline
bool bit_block_shift_l1(bm::word_t* block,
- bm::word_t* empty_acc, bm::word_t co_flag)
+ bm::word_t* empty_acc, bm::word_t co_flag) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(empty_acc);
*/
inline
bool bit_block_shift_l1_unr(bm::word_t* block,
- bm::word_t* empty_acc, bm::word_t co_flag)
+ bm::word_t* empty_acc,
+ bm::word_t co_flag) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(empty_acc);
@ingroup bitfunc
*/
inline
-void bit_block_erase(bm::word_t* block, unsigned bitpos, bool carry_over)
+void bit_block_erase(bm::word_t* block,
+ unsigned bitpos,
+ bool carry_over) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(bitpos < 65536);
if (!bitpos)
{
bm::word_t acc;
- bit_block_shift_l1_unr(block, &acc, carry_over);
+ bm::bit_block_shift_l1_unr(block, &acc, carry_over);
return;
}
bool bit_block_shift_r1_and(bm::word_t* BMRESTRICT block,
bm::word_t co_flag,
const bm::word_t* BMRESTRICT mask_block,
- bm::id64_t* BMRESTRICT digest)
+ bm::id64_t* BMRESTRICT digest) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(mask_block);
block[d_base] = co_flag & mask_block[d_base];
if (block[d_base])
- d |= dmask; // update d
+ d |= dmask; // update digest
co_flag = 0;
}
}
bool bit_block_shift_r1_and_unr(bm::word_t* BMRESTRICT block,
bm::word_t co_flag,
const bm::word_t* BMRESTRICT mask_block,
- bm::id64_t* BMRESTRICT digest)
+ bm::id64_t* BMRESTRICT digest) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(mask_block);
@ingroup bitfunc
*/
inline
-bm::id_t bit_block_any_range(const bm::word_t* block,
+bm::id_t bit_block_any_range(const bm::word_t* const BMRESTRICT block,
bm::word_t left,
- bm::word_t right)
+ bm::word_t right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
unsigned mask =
block_set_table<true>::_right[nbit] &
block_set_table<true>::_left[right_margin];
- acc = *word & mask;
- return acc;
+ return *word & mask;
}
else
{
++word;
}
- // now when we are word aligned, we can check bits the usual way
- for ( ;bitcount >= 32; bitcount -= 32)
+ // loop unrolled to evaluate 4 words at a time
+ // SIMD showed no advantage, unless evaluate sub-wave intervals
+ //
+ for ( ;bitcount >= 128; bitcount-=128, word+=4)
{
- acc = *word++;
- if (acc)
+ acc = word[0] | word[1] | word[2] | word[3];
+ if (acc)
return acc;
- }
+ } // for
- if (bitcount) // we have a tail to count
+ acc = 0;
+ for ( ;bitcount >= 32; bitcount -= 32)
{
- acc = (*word) & block_set_table<true>::_left[bitcount-1];
- if (acc)
- return acc;
- }
+ acc |= *word++;
+ } // for
- return 0;
+ if (bitcount) // we have a tail to count
+ acc |= (*word) & block_set_table<true>::_left[bitcount-1];
+
+ return acc;
}
// ----------------------------------------------------------------------
/*! Function inverts block of bits
@ingroup bitfunc
*/
-template<typename T> void bit_invert(T* start)
+template<typename T>
+void bit_invert(T* start) BMNOEXCEPT
{
BM_ASSERT(IS_VALID_ADDR((bm::word_t*)start));
#ifdef BMVECTOPT
@ingroup bitfunc
*/
inline
-bool is_bits_one(const bm::wordop_t* start)
+bool is_bits_one(const bm::wordop_t* start) BMNOEXCEPT
{
#if defined(BMSSE42OPT) || defined(BMAVX2OPT)
return VECT_IS_ONE_BLOCK(start);
// ----------------------------------------------------------------------
-// GAP blocks manipulation functions:
+/*! @brief Returns "true" if all bits are 1 in the block [left, right]
+ Function check for block varieties
+ @internal
+*/
+inline
+bool block_is_all_one_range(const bm::word_t* const BMRESTRICT block,
+ unsigned left, unsigned right) BMNOEXCEPT
+{
+ BM_ASSERT(left <= right);
+ BM_ASSERT(right < bm::gap_max_bits);
+ if (block)
+ {
+ if (BM_IS_GAP(block))
+ return bm::gap_is_all_one_range(BMGAP_PTR(block), left, right);
+ if (block == FULL_BLOCK_FAKE_ADDR)
+ return true;
+ return bm::bit_block_is_all_one_range(block, left, right);
+ }
+ return false;
+}
+
+/*! @brief Returns "true" if all bits are 1 in the block [left, right]
+ and border bits are 0
+ @internal
+*/
+inline
+bool block_is_interval(const bm::word_t* const BMRESTRICT block,
+ unsigned left, unsigned right) BMNOEXCEPT
+{
+ BM_ASSERT(left <= right);
+ BM_ASSERT(right < bm::gap_max_bits-1);
+
+ if (block)
+ {
+ bool is_left, is_right, all_one;
+ if (BM_IS_GAP(block))
+ {
+ const bm::gap_word_t* gap = BMGAP_PTR(block);
+ all_one = bm::gap_is_interval(gap, left, right);
+ return all_one;
+ }
+ else // bit-block
+ {
+ if (block == FULL_BLOCK_FAKE_ADDR)
+ return false;
+ unsigned nword = ((left-1) >> bm::set_word_shift);
+ is_left = block[nword] & (1u << ((left-1) & bm::set_word_mask));
+ if (is_left == false)
+ {
+ nword = ((right + 1) >> bm::set_word_shift);
+ is_right = block[nword] & (1u << ((right + 1) & bm::set_word_mask));
+ if (is_right == false)
+ {
+ all_one = bm::bit_block_is_all_one_range(block, left, right);
+ return all_one;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+// ----------------------------------------------------------------------
+
+/**
+ \brief Searches for the last 1 bit in the 111 interval of a BIT block
+ \param block - BIT buffer
+ \param nbit - bit index to start checking from
+ \param pos - [out] found value
+
+ \return false if not found
+ @ingroup bitfunc
+*/
+inline
+bool bit_block_find_interval_end(const bm::word_t* BMRESTRICT block,
+ unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
+{
+ BM_ASSERT(block);
+ BM_ASSERT(pos);
+
+ unsigned nword = unsigned(nbit >> bm::set_word_shift);
+ unsigned bit_pos = (nbit & bm::set_word_mask);
+ bm::word_t w = block[nword];
+ w &= (1u << bit_pos);
+ if (!w)
+ return false;
+
+ if (nbit == bm::gap_max_bits-1)
+ {
+ *pos = bm::gap_max_bits-1;
+ return true;
+ }
+ *pos = nbit;
+
+ ++nbit;
+ nword = unsigned(nbit >> bm::set_word_shift);
+ bit_pos = (nbit & bm::set_word_mask);
+
+ w = (~block[nword]) >> bit_pos;
+ w <<= bit_pos; // clear the trailing bits
+ if (w)
+ {
+ bit_pos = bm::bit_scan_forward32(w); // trailing zeros
+ *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
+ return true;
+ }
+
+ for (++nword; nword < bm::set_block_size; ++nword)
+ {
+ w = ~block[nword];
+ if (w)
+ {
+ bit_pos = bm::bit_scan_forward32(w); // trailing zeros
+ *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
+ return true;
+ }
+ } // for nword
+
+ // 0 not found, all block is 1s...
+ *pos = bm::gap_max_bits-1;
+ return true;
+}
+
+
+/*! @brief Find end of the current 111 interval
+ @return search result code 0 - not found, 1 found, 2 - found at the end
+ @internal
+*/
+inline
+unsigned block_find_interval_end(const bm::word_t* BMRESTRICT block,
+ unsigned nbit_from,
+ unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
+{
+ BM_ASSERT(block && found_nbit);
+ BM_ASSERT(nbit_from < bm::gap_max_bits);
+
+ bool b;
+ if (BM_IS_GAP(block))
+ {
+ const bm::gap_word_t* gap = BMGAP_PTR(block);
+ b = bm::gap_find_interval_end(gap, nbit_from, found_nbit);
+ if (b && *found_nbit == bm::gap_max_bits-1)
+ return 2; // end of block, keep searching
+ }
+ else // bit-block
+ {
+ if (IS_FULL_BLOCK(block))
+ {
+ *found_nbit = bm::gap_max_bits-1;
+ return 2;
+ }
+ b = bm::bit_block_find_interval_end(block, nbit_from, found_nbit);
+ if (b && *found_nbit == bm::gap_max_bits-1)
+ return 2; // end of block, keep searching
+ }
+ return b;
+}
-/*! \brief GAP and functor */
-BMFORCEINLINE unsigned and_op(unsigned v1, unsigned v2)
+// ----------------------------------------------------------------------
+
+/**
+ \brief Searches for the first 1 bit in the 111 interval of a BIT block
+ \param block - BIT buffer
+ \param nbit - bit index to start checking from
+ \param pos - [out] found value
+
+ \return false if not found
+ @ingroup bitfunc
+*/
+inline
+bool bit_block_find_interval_start(const bm::word_t* BMRESTRICT block,
+ unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
{
- return v1 & v2;
+ BM_ASSERT(block);
+ BM_ASSERT(pos);
+
+ unsigned nword = unsigned(nbit >> bm::set_word_shift);
+ unsigned bit_pos = (nbit & bm::set_word_mask);
+ bm::word_t w = block[nword];
+ w &= (1u << bit_pos);
+ if (!w)
+ return false;
+
+ if (nbit == 0)
+ {
+ *pos = 0;
+ return true;
+ }
+ *pos = nbit;
+
+ --nbit;
+ nword = unsigned(nbit >> bm::set_word_shift);
+ bit_pos = (nbit & bm::set_word_mask);
+
+ w = (~block[nword]) & block_set_table<true>::_left[bit_pos];
+ if (w)
+ {
+ bit_pos = bm::bit_scan_reverse32(w);
+ *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
+ return true;
+ }
+
+ if (nword)
+ {
+ for (--nword; true; --nword)
+ {
+ w = ~block[nword];
+ if (w)
+ {
+ bit_pos = bm::bit_scan_reverse32(w); // trailing zeros
+ *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
+ return true;
+ }
+ if (!nword)
+ break;
+ } // for nword
+ }
+
+ // 0 not found, all block is 1s...
+ *pos = 0;
+ return true;
}
-/*! \brief GAP xor functor */
-BMFORCEINLINE unsigned xor_op(unsigned v1, unsigned v2)
+/*! @brief Find start of the current 111 interval
+ @return search result code 0 - not found, 1 found, 2 - found at the start
+ @internal
+*/
+inline
+unsigned block_find_interval_start(const bm::word_t* BMRESTRICT block,
+ unsigned nbit_from,
+ unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
{
- return v1 ^ v2;
+ BM_ASSERT(block && found_nbit);
+ BM_ASSERT(nbit_from < bm::gap_max_bits);
+ bool b;
+ if (BM_IS_GAP(block))
+ {
+ const bm::gap_word_t* gap = BMGAP_PTR(block);
+ b = bm::gap_find_interval_start(gap, nbit_from, found_nbit);
+ if (b && *found_nbit == 0)
+ return 2; // start of block, keep searching
+ }
+ else // bit-block
+ {
+ if (IS_FULL_BLOCK(block))
+ {
+ *found_nbit = 0;
+ return 2;
+ }
+ b = bm::bit_block_find_interval_start(block, nbit_from, found_nbit);
+ if (b && *found_nbit == 0)
+ return 2; // start of block, keep searching
+ }
+ return b;
}
+// ----------------------------------------------------------------------
-/*! \brief GAP or functor */
-BMFORCEINLINE unsigned or_op(unsigned v1, unsigned v2)
+/*! @brief Returns "true" if one bit is set in the block [left, right]
+ Function check for block varieties
+ @internal
+*/
+inline
+bool block_any_range(const bm::word_t* const BMRESTRICT block,
+ unsigned left, unsigned right) BMNOEXCEPT
{
- return v1 | v2;
+ BM_ASSERT(left <= right);
+ BM_ASSERT(right < bm::gap_max_bits);
+ if (!block)
+ return false;
+ if (BM_IS_GAP(block))
+ return bm::gap_any_range(BMGAP_PTR(block), left, right);
+ if (IS_FULL_BLOCK(block))
+ return true;
+ return bm::bit_block_any_range(block, left, right);
}
-/*! \brief GAP or functor */
-BMFORCEINLINE unsigned sub_op(unsigned v1, unsigned v2)
+// ----------------------------------------------------------------------
+
+/*! @brief Returns "true" if one bit is set in the block
+ Function check for block varieties
+ @internal
+*/
+inline
+bool block_any(const bm::word_t* const BMRESTRICT block) BMNOEXCEPT
{
- return v1 & ~v2;
+ if (!block)
+ return false;
+ if (IS_FULL_BLOCK(block))
+ return true;
+ bool all_zero = (BM_IS_GAP(block)) ?
+ bm::gap_is_all_zero(BMGAP_PTR(block))
+ : bm::bit_is_all_zero(block);
+ return !all_zero;
}
+
+// ----------------------------------------------------------------------
+
+// GAP blocks manipulation functions:
+
+
/*!
\brief GAP AND operation.
@ingroup gapfunc
*/
-BMFORCEINLINE
+inline
gap_word_t* gap_operation_and(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2,
gap_word_t* BMRESTRICT tmp_buf,
- unsigned& dsize)
+ unsigned& dsize) BMNOEXCEPT
{
- bm::gap_buff_op(tmp_buf, vect1, 0, vect2, 0, bm::and_op, dsize);
+ bm::gap_buff_op<bm::gap_word_t, bm::and_func>(
+ tmp_buf, vect1, 0, vect2, 0, dsize);
return tmp_buf;
}
@ingroup gapfunc
*/
-BMFORCEINLINE
+inline
unsigned gap_operation_any_and(const gap_word_t* BMRESTRICT vect1,
- const gap_word_t* BMRESTRICT vect2)
+ const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
- return gap_buff_any_op(vect1, 0, vect2, 0, and_op);
+ return gap_buff_any_op<bm::gap_word_t, bm::and_func>(vect1, 0, vect2, 0);
}
*/
inline
unsigned gap_count_and(const gap_word_t* BMRESTRICT vect1,
- const gap_word_t* BMRESTRICT vect2)
+ const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
- return gap_buff_count_op(vect1, vect2, and_op);
+ return bm::gap_buff_count_op<bm::gap_word_t, bm::and_func>(vect1, vect2);
}
gap_word_t* gap_operation_xor(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2,
gap_word_t* BMRESTRICT tmp_buf,
- unsigned& dsize)
+ unsigned& dsize) BMNOEXCEPT
{
- gap_buff_op(tmp_buf, vect1, 0, vect2, 0, bm::xor_op, dsize);
+ bm::gap_buff_op<bm::gap_word_t, bm::xor_func>(
+ tmp_buf, vect1, 0, vect2, 0, dsize);
return tmp_buf;
}
bool gap_operation_dry_xor(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2,
unsigned& dsize,
- unsigned limit)
+ unsigned limit) BMNOEXCEPT
{
- return gap_buff_dry_op(vect1, vect2, bm::xor_op, dsize, limit);
+ return
+ bm::gap_buff_dry_op<bm::gap_word_t, bm::xor_func>(vect1, vect2, dsize, limit);
}
*/
BMFORCEINLINE
unsigned gap_operation_any_xor(const gap_word_t* BMRESTRICT vect1,
- const gap_word_t* BMRESTRICT vect2)
+ const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
- return gap_buff_any_op(vect1, 0, vect2, 0, bm::xor_op);
+ return gap_buff_any_op<bm::gap_word_t, bm::xor_func>(vect1, 0, vect2, 0);
}
/*!
@ingroup gapfunc
*/
-BMFORCEINLINE
+BMFORCEINLINE
unsigned gap_count_xor(const gap_word_t* BMRESTRICT vect1,
- const gap_word_t* BMRESTRICT vect2)
+ const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
- return gap_buff_count_op(vect1, vect2, bm::xor_op);
+ return bm::gap_buff_count_op<bm::gap_word_t, bm::xor_func>(vect1, vect2);
}
gap_word_t* gap_operation_or(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2,
gap_word_t* BMRESTRICT tmp_buf,
- unsigned& dsize)
+ unsigned& dsize) BMNOEXCEPT
{
- gap_buff_op(tmp_buf, vect1, 1, vect2, 1, bm::and_op, dsize);
- gap_invert(tmp_buf);
+ bm::gap_buff_op<bm::gap_word_t, bm::and_func>(tmp_buf, vect1, 1, vect2, 1, dsize);
+ bm::gap_invert(tmp_buf);
return tmp_buf;
}
*/
BMFORCEINLINE
unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1,
- const gap_word_t* BMRESTRICT vect2)
+ const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
- return gap_buff_count_op(vect1, vect2, bm::or_op);
+ return gap_buff_count_op<bm::gap_word_t, bm::or_func>(vect1, vect2);
}
@ingroup gapfunc
*/
-inline gap_word_t* gap_operation_sub(const gap_word_t* BMRESTRICT vect1,
- const gap_word_t* BMRESTRICT vect2,
- gap_word_t* BMRESTRICT tmp_buf,
- unsigned& dsize)
+inline
+gap_word_t* gap_operation_sub(const gap_word_t* BMRESTRICT vect1,
+ const gap_word_t* BMRESTRICT vect2,
+ gap_word_t* BMRESTRICT tmp_buf,
+ unsigned& dsize) BMNOEXCEPT
{
- gap_buff_op(tmp_buf, vect1, 0, vect2, 1, and_op, dsize);
+ bm::gap_buff_op<bm::gap_word_t, bm::and_func>( // no bug here
+ tmp_buf, vect1, 0, vect2, 1, dsize);
return tmp_buf;
}
@ingroup gapfunc
*/
-BMFORCEINLINE
+inline
unsigned gap_operation_any_sub(const gap_word_t* BMRESTRICT vect1,
- const gap_word_t* BMRESTRICT vect2)
+ const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
- return gap_buff_any_op(vect1, 0, vect2, 1, bm::and_op);
+ return
+ bm::gap_buff_any_op<bm::gap_word_t, bm::and_func>( // no bug here
+ vect1, 0, vect2, 1);
}
*/
BMFORCEINLINE
unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1,
- const gap_word_t* BMRESTRICT vect2)
+ const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
- return gap_buff_count_op(vect1, vect2, bm::sub_op);
+ return bm::gap_buff_count_op<bm::gap_word_t, bm::sub_func>(vect1, vect2);
}
@ingroup bitfunc
*/
inline
-void bit_block_copy(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src)
+void bit_block_copy(bm::word_t* BMRESTRICT dst,
+ const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
#ifdef BMVECTOPT
VECT_COPY_BLOCK(dst, src);
@ingroup bitfunc
*/
inline
-void bit_block_stream(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src)
+void bit_block_stream(bm::word_t* BMRESTRICT dst,
+ const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
#ifdef VECT_STREAM_BLOCK
VECT_STREAM_BLOCK(dst, src);
@ingroup bitfunc
*/
inline
-bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src)
+bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst,
+ const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src);
inline
bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src,
- bm::id64_t digest)
+ bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src);
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2,
const bm::word_t* BMRESTRICT src3,
- bm::id64_t digest)
+ bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src0 && src1 && src2 && src3);
bm::id64_t bit_block_and_2way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2,
- bm::id64_t digest)
+ bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src1 && src2);
*/
inline
unsigned bit_block_and_count(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count;
const bm::word_t* src1_end = src1 + bm::set_block_size;
*/
inline
unsigned bit_block_and_any(const bm::word_t* src1,
- const bm::word_t* src2)
+ const bm::word_t* src2) BMNOEXCEPT
{
unsigned count = 0;
const bm::word_t* src1_end = src1 + bm::set_block_size;
*/
inline
unsigned bit_block_xor_count(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count;
const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
*/
inline
unsigned bit_block_xor_any(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count = 0;
const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
*/
inline
unsigned bit_block_sub_count(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count;
const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
*/
inline
unsigned bit_block_sub_any(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count = 0;
const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
*/
inline
unsigned bit_block_or_count(const bm::word_t* src1,
- const bm::word_t* src2)
+ const bm::word_t* src2) BMNOEXCEPT
{
unsigned count;
const bm::word_t* src1_end = src1 + bm::set_block_size;
*/
inline
unsigned bit_block_or_any(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count = 0;
const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
@ingroup bitfunc
*/
inline bm::word_t* bit_operation_and(bm::word_t* BMRESTRICT dst,
- const bm::word_t* BMRESTRICT src)
+ const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst || src);
*/
inline
bm::id_t bit_operation_and_count(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
return 0;
*/
inline
bm::id_t bit_operation_and_any(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
return 0;
*/
inline
bm::id_t bit_operation_sub_count(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (src1 == src2)
return 0;
*/
inline
bm::id_t bit_operation_sub_count_inv(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
return bit_operation_sub_count(src2, src1);
}
*/
inline
bm::id_t bit_operation_sub_any(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (IS_EMPTY_BLOCK(src1))
return 0;
*/
inline
bm::id_t bit_operation_or_count(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (IS_FULL_BLOCK(src1) || IS_FULL_BLOCK(src2))
return bm::gap_max_bits;
*/
inline
bm::id_t bit_operation_or_any(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (IS_EMPTY_BLOCK(src1))
{
*/
inline
bool bit_block_or(bm::word_t* BMRESTRICT dst,
- const bm::word_t* BMRESTRICT src)
+ const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
#ifdef BMVECTOPT
return VECT_OR_BLOCK(dst, src);
inline
bool bit_block_or_2way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
#ifdef BMVECTOPT
return VECT_OR_BLOCK_2WAY(dst, src1, src2);
inline
bm::id64_t bit_block_xor_2way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
#ifdef BMVECTOPT
return VECT_XOR_BLOCK_2WAY(dst, src1, src2);
inline
bool bit_block_or_3way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
#ifdef BMVECTOPT
return VECT_OR_BLOCK_3WAY(dst, src1, src2);
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2,
const bm::word_t* BMRESTRICT src3,
- const bm::word_t* BMRESTRICT src4)
+ const bm::word_t* BMRESTRICT src4) BMNOEXCEPT
{
#ifdef BMVECTOPT
return VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4);
*/
inline
bm::word_t* bit_operation_or(bm::word_t* BMRESTRICT dst,
- const bm::word_t* BMRESTRICT src)
+ const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst || src);
*/
inline
bm::id64_t bit_block_sub(bm::word_t* BMRESTRICT dst,
- const bm::word_t* BMRESTRICT src)
+ const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
#ifdef BMVECTOPT
bm::id64_t acc = VECT_SUB_BLOCK(dst, src);
inline
bm::id64_t bit_block_sub(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src,
- bm::id64_t digest)
+ bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src);
bm::id64_t bit_block_sub_2way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2,
- bm::id64_t digest)
+ bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src1 && src2);
*/
inline
bm::word_t* bit_operation_sub(bm::word_t* BMRESTRICT dst,
- const bm::word_t* BMRESTRICT src)
+ const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst || src);
*/
inline
bm::id64_t bit_block_xor(bm::word_t* BMRESTRICT dst,
- const bm::word_t* BMRESTRICT src)
+ const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src);
*/
inline
void bit_andnot_arr_ffmask(bm::word_t* BMRESTRICT dst,
- const bm::word_t* BMRESTRICT src)
+ const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
const bm::word_t* BMRESTRICT src_end = src + bm::set_block_size;
#ifdef BMVECTOPT
*/
inline
bm::word_t* bit_operation_xor(bm::word_t* BMRESTRICT dst,
- const bm::word_t* BMRESTRICT src)
+ const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst || src);
if (src == dst) return 0; // XOR rule
*/
inline
bm::id_t bit_operation_xor_count(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (src1 == src2)
return 0;
*/
inline
bm::id_t bit_operation_xor_any(const bm::word_t* BMRESTRICT src1,
- const bm::word_t* BMRESTRICT src2)
+ const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (src1 == src2)
return 0;
@ingroup bitfunc
*/
template<class T>
-unsigned bit_count_nonzero_size(const T* blk, unsigned data_size)
+unsigned bit_count_nonzero_size(const T* blk, unsigned data_size) BMNOEXCEPT
{
BM_ASSERT(blk && data_size);
unsigned count = 0;
@ingroup bitfunc
*/
inline
-unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos)
+unsigned bit_block_find(const bm::word_t* BMRESTRICT block,
+ unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(pos);
if (w)
{
bit_pos = bm::bit_scan_forward32(w); // trailing zeros
- *pos = unsigned(bit_pos + (nword * 8u * sizeof(bm::word_t)));
+ *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
return 1;
}
if (w)
{
bit_pos = bm::bit_scan_forward32(w); // trailing zeros
- *pos = unsigned(bit_pos + (i * 8u * sizeof(bm::word_t)));
+ *pos = unsigned(bit_pos + (i * 8u * unsigned(sizeof(bm::word_t))));
return w;
}
} // for i
}
+
+
/*!
\brief BIT block find the last set bit (backward search)
@ingroup bitfunc
*/
inline
-unsigned bit_find_last(const bm::word_t* block, unsigned* last)
+unsigned bit_find_last(const bm::word_t* BMRESTRICT block,
+ unsigned* BMRESTRICT last) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(last);
if (w)
{
unsigned idx = bm::bit_scan_reverse(w);
- *last = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
+ *last = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t))));
return w;
}
if (i == 0)
@internal
*/
inline
-bool bit_find_first(const bm::word_t* block, unsigned* pos)
+bool bit_find_first(const bm::word_t* BMRESTRICT block,
+ unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(pos);
if (w)
{
unsigned idx = bm::bit_scan_forward32(w); // trailing zeros
- *pos = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
+ *pos = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t))));
return w;
}
} // for i
@ingroup bitfunc
*/
inline
-unsigned bit_find_first(const bm::word_t* block,
- unsigned* first,
- bm::id64_t digest)
+unsigned bit_find_first(const bm::word_t* BMRESTRICT block,
+ unsigned* BMRESTRICT first,
+ bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(first);
if (w)
{
unsigned idx = bit_scan_forward32(w); // trailing zeros
- *first = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
+ *first = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t))));
return w;
}
} // for i
@ingroup bitfunc
*/
inline
-bool bit_find_first_if_1(const bm::word_t* block,
- unsigned* first,
- bm::id64_t digest)
+bool bit_find_first_if_1(const bm::word_t* BMRESTRICT block,
+ unsigned* BMRESTRICT first,
+ bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(first);
SIZE_TYPE bit_find_rank(const bm::word_t* const block,
SIZE_TYPE rank,
unsigned nbit_from,
- unsigned& nbit_pos)
+ unsigned& nbit_pos) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(rank);
SIZE_TYPE block_find_rank(const bm::word_t* const block,
SIZE_TYPE rank,
unsigned nbit_from,
- unsigned& nbit_pos)
+ unsigned& nbit_pos) BMNOEXCEPT
{
if (BM_IS_GAP(block))
{
bm::set_representation best_representation(unsigned bit_count,
unsigned total_possible_bitcount,
unsigned gap_count,
- unsigned block_size)
+ unsigned block_size) BMNOEXCEPT
{
unsigned arr_size = unsigned(sizeof(bm::gap_word_t) * bit_count + sizeof(bm::gap_word_t));
unsigned gap_size = unsigned(sizeof(bm::gap_word_t) * gap_count + sizeof(bm::gap_word_t));
const unsigned* BMRESTRICT src,
bm::id_t bits,
unsigned dest_len,
- unsigned mask = 0)
+ unsigned mask = 0) BMNOEXCEPT
{
T* BMRESTRICT pcurr = dest;
- for (unsigned bit_idx=0; bit_idx < bits; ++src,bit_idx += unsigned(sizeof(*src) * 8))
+ for (unsigned bit_idx=0; bit_idx < bits;
+ ++src,bit_idx += unsigned(sizeof(*src) * 8))
{
unsigned val = *src ^ mask; // invert value by XOR 0xFF..
if (val == 0)
continue;
- if (pcurr + sizeof(val)*8 >= dest + dest_len) // insufficient space
+ if (pcurr + unsigned(sizeof(val)*8) >= dest + dest_len) // insufficient space
return 0;
// popscan loop to decode bits in a word
while (val)
@internal
*/
inline
-bool check_block_zero(const bm::word_t* blk, bool deep_scan)
+bool check_block_zero(const bm::word_t* blk, bool deep_scan) BMNOEXCEPT
{
if (!blk) return true;
if (IS_FULL_BLOCK(blk)) return false;
@internal
*/
inline
-bool check_block_one(const bm::word_t* blk, bool deep_scan)
+bool check_block_one(const bm::word_t* blk, bool deep_scan) BMNOEXCEPT
{
if (blk == 0) return false;
template<typename T>
unsigned gap_overhead(const T* length,
const T* length_end,
- const T* glevel_len)
+ const T* glevel_len) BMNOEXCEPT
{
BM_ASSERT(length && length_end && glevel_len);
template<typename T>
bool improve_gap_levels(const T* length,
const T* length_end,
- T* glevel_len)
+ T* glevel_len) BMNOEXCEPT
{
BM_ASSERT(length && length_end && glevel_len);
inline
bool block_find_first_diff(const bm::word_t* BMRESTRICT blk,
const bm::word_t* BMRESTRICT arg_blk,
- unsigned* BMRESTRICT pos)
+ unsigned* BMRESTRICT pos) BMNOEXCEPT
{
// If one block is zero we check if the other one has at least
// one bit ON
bitblock_get_adapter(const bm::word_t* bit_block) : b_(bit_block) {}
BMFORCEINLINE
- bm::word_t get_32() { return *b_++; }
+ bm::word_t get_32() BMNOEXCEPT { return *b_++; }
private:
const bm::word_t* b_;
};
public:
bitblock_sum_adapter() : sum_(0) {}
BMFORCEINLINE
- void push_back(bm::word_t w) { this->sum_+= w; }
+ void push_back(bm::word_t w) BMNOEXCEPT { this->sum_+= w; }
/// Get accumulated sum
- bm::word_t sum() const { return this->sum_; }
+ bm::word_t sum() const BMNOEXCEPT { return this->sum_; }
private:
bm::word_t sum_;
};
cnt_(0)
{}
- bm::word_t get_32()
+ bm::word_t get_32() BMNOEXCEPT
{
if (cnt_ < from_ || cnt_ > to_)
{
void bit_recomb(It1& it1, It2& it2,
BinaryOp& op,
Encoder& enc,
- unsigned block_size = bm::set_block_size)
+ unsigned block_size = bm::set_block_size) BMNOEXCEPT
{
for (unsigned i = 0; i < block_size; ++i)
{
/// Bit AND functor
template<typename W> struct bit_AND
{
- W operator()(W w1, W w2) { return w1 & w2; }
+ W operator()(W w1, W w2) BMNOEXCEPT { return w1 & w2; }
};
/// Bit OR functor
template<typename W> struct bit_OR
{
- W operator()(W w1, W w2) { return w1 | w2; }
+ W operator()(W w1, W w2) BMNOEXCEPT { return w1 | w2; }
};
/// Bit SUB functor
template<typename W> struct bit_SUB
{
- W operator()(W w1, W w2) { return w1 & ~w2; }
+ W operator()(W w1, W w2) BMNOEXCEPT { return w1 & ~w2; }
};
/// Bit XOR functor
template<typename W> struct bit_XOR
{
- W operator()(W w1, W w2) { return w1 ^ w2; }
+ W operator()(W w1, W w2) BMNOEXCEPT { return w1 ^ w2; }
};
/// Bit ASSIGN functor
template<typename W> struct bit_ASSIGN
{
- W operator()(W, W w2) { return w2; }
+ W operator()(W, W w2) BMNOEXCEPT { return w2; }
};
/// Bit COUNT functor
template<typename W> struct bit_COUNT
{
- W operator()(W w1, W w2)
+ W operator()(W w1, W w2) BMNOEXCEPT
{
w1 = 0;
BM_INCWORD_BITCOUNT(w1, w2);
/// Bit COUNT AND functor
template<typename W> struct bit_COUNT_AND
{
- W operator()(W w1, W w2)
+ W operator()(W w1, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w1 & w2);
/// Bit COUNT XOR functor
template<typename W> struct bit_COUNT_XOR
{
- W operator()(W w1, W w2)
+ W operator()(W w1, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w1 ^ w2);
/// Bit COUNT OR functor
template<typename W> struct bit_COUNT_OR
{
- W operator()(W w1, W w2)
+ W operator()(W w1, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w1 | w2);
/// Bit COUNT SUB AB functor
template<typename W> struct bit_COUNT_SUB_AB
{
- W operator()(W w1, W w2)
+ W operator()(W w1, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w1 & (~w2));
/// Bit SUB BA functor
template<typename W> struct bit_COUNT_SUB_BA
{
- W operator()(W w1, W w2)
+ W operator()(W w1, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w2 & (~w1));
/// Bit COUNT A functor
template<typename W> struct bit_COUNT_A
{
- W operator()(W w1, W )
+ W operator()(W w1, W ) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w1);
/// Bit COUNT B functor
template<typename W> struct bit_COUNT_B
{
- W operator()(W, W w2)
+ W operator()(W, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w2);
0, // set_COUNT_B
};
-
-const unsigned short set_bitscan_wave_size = 2;
+/**
+ Size of bit decode wave in words
+ @internal
+ */
+const unsigned short set_bitscan_wave_size = 4;
/*!
\brief Unpacks word wave (Nx 32-bit words)
\param w_ptr - pointer on wave start
@internal
*/
inline
-unsigned short bitscan_wave(const bm::word_t* w_ptr, unsigned char* bits)
+unsigned short
+bitscan_wave(const bm::word_t* BMRESTRICT w_ptr,
+ unsigned char* BMRESTRICT bits) BMNOEXCEPT
{
bm::word_t w0, w1;
- unsigned short cnt0;
+ unsigned int cnt0;
w0 = w_ptr[0];
w1 = w_ptr[1];
#if defined(BMAVX512OPT) || defined(BMAVX2OPT) || defined(BMSSE42OPT)
// combine into 64-bit word and scan (when HW popcnt64 is available)
bm::id64_t w = (bm::id64_t(w1) << 32) | w0;
- cnt0 = (unsigned short) bm::bitscan_popcnt64(w, bits);
+ cnt0 = bm::bitscan_popcnt64(w, bits);
+
+ w0 = w_ptr[2];
+ w1 = w_ptr[3];
+ w = (bm::id64_t(w1) << 32) | w0;
+ cnt0 += bm::bitscan_popcnt64(w, bits + cnt0, 64);
#else
- unsigned short cnt1;
// decode wave as two 32-bit bitscan decodes
- cnt0 = w0 ? bm::bitscan_popcnt(w0, bits) : 0;
- cnt1 = w1 ? bm::bitscan_popcnt(w1, bits + cnt0, 32) : 0;
- cnt0 = (unsigned short)(cnt0 + cnt1);
+ cnt0 = bm::bitscan_popcnt(w0, bits);
+ cnt0 += bm::bitscan_popcnt(w1, bits + cnt0, 32);
+
+ w0 = w_ptr[2];
+ w1 = w_ptr[3];
+ cnt0 += bm::bitscan_popcnt(w0, bits + cnt0, 64);
+ cnt0 += bm::bitscan_popcnt(w1, bits + cnt0, 64+32);
#endif
- return cnt0;
+ return static_cast<unsigned short>(cnt0);
}
#if defined (BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
@internal
*/
inline
-void bit_block_gather_scatter(unsigned* arr, const bm::word_t* blk,
- const unsigned* idx, unsigned size, unsigned start,
- unsigned bit_idx)
+void bit_block_gather_scatter(unsigned* BMRESTRICT arr,
+ const bm::word_t* BMRESTRICT blk,
+ const unsigned* BMRESTRICT idx,
+ unsigned size, unsigned start,
+ unsigned bit_idx) BMNOEXCEPT
{
typedef unsigned TRGW;
typedef unsigned IDX;
@internal
*/
template<typename TRGW, typename IDX, typename SZ>
-void bit_block_gather_scatter(TRGW* arr, const bm::word_t* blk,
- const IDX* idx, SZ size, SZ start, unsigned bit_idx)
+void bit_block_gather_scatter(TRGW* BMRESTRICT arr,
+ const bm::word_t* BMRESTRICT blk,
+ const IDX* BMRESTRICT idx,
+ SZ size, SZ start, unsigned bit_idx) BMNOEXCEPT
{
// TODO: SIMD for 64-bit index sizes and 64-bit target value size
//
{
const SZ base = start + k;
const unsigned nbitA = unsigned(idx[base] & bm::set_block_mask);
- arr[base] |= (TRGW(bool(blk[nbitA >> bm::set_word_shift] & (mask1 << (nbitA & bm::set_word_mask)))) << bit_idx);
+ arr[base] |= (TRGW(bool(blk[nbitA >> bm::set_word_shift] &
+ (mask1 << (nbitA & bm::set_word_mask)))) << bit_idx);
const unsigned nbitB = unsigned(idx[base + 1] & bm::set_block_mask);
- arr[base+1] |= (TRGW(bool(blk[nbitB >> bm::set_word_shift] & (mask1 << (nbitB & bm::set_word_mask)))) << bit_idx);
+ arr[base+1] |= (TRGW(bool(blk[nbitB >> bm::set_word_shift] &
+ (mask1 << (nbitB & bm::set_word_mask)))) << bit_idx);
} // for k
-
for (; k < len; ++k)
{
unsigned nbit = unsigned(idx[start + k] & bm::set_block_mask);
- arr[start + k] |= (TRGW(bool(blk[nbit >> bm::set_word_shift] & (mask1 << (nbit & bm::set_word_mask)))) << bit_idx);
+ arr[start + k] |= (TRGW(bool(blk[nbit >> bm::set_word_shift] &
+ (mask1 << (nbit & bm::set_word_mask)))) << bit_idx);
} // for k
}
@internal
*/
inline
-bm::id64_t idx_arr_block_lookup_u64(const bm::id64_t* idx, bm::id64_t size, bm::id64_t nb, bm::id64_t start)
+bm::id64_t idx_arr_block_lookup_u64(const bm::id64_t* idx,
+ bm::id64_t size, bm::id64_t nb, bm::id64_t start) BMNOEXCEPT
{
BM_ASSERT(idx);
BM_ASSERT(start < size);
@internal
*/
inline
-unsigned idx_arr_block_lookup_u32(const unsigned* idx, unsigned size, unsigned nb, unsigned start)
+unsigned idx_arr_block_lookup_u32(const unsigned* idx,
+ unsigned size, unsigned nb, unsigned start) BMNOEXCEPT
{
BM_ASSERT(idx);
BM_ASSERT(start < size);
inline
void set_block_bits_u64(bm::word_t* BMRESTRICT block,
const bm::id64_t* BMRESTRICT idx,
- bm::id64_t start, bm::id64_t stop)
+ bm::id64_t start, bm::id64_t stop) BMNOEXCEPT
{
// TODO: SIMD for 64-bit mode
for (bm::id64_t i = start; i < stop; ++i)
unsigned nbit = unsigned(n & bm::set_block_mask);
unsigned nword = nbit >> bm::set_word_shift;
nbit &= bm::set_word_mask;
- bm::word_t mask = (1u << nbit);
- block[nword] |= mask;
+ block[nword] |= (1u << nbit);
} // for i
}
inline
void set_block_bits_u32(bm::word_t* BMRESTRICT block,
const unsigned* BMRESTRICT idx,
- unsigned start, unsigned stop )
+ unsigned start, unsigned stop ) BMNOEXCEPT
{
#if defined(VECT_SET_BLOCK_BITS)
VECT_SET_BLOCK_BITS(block, idx, start, stop);
unsigned nbit = unsigned(n & bm::set_block_mask);
unsigned nword = nbit >> bm::set_word_shift;
nbit &= bm::set_word_mask;
- bm::word_t mask = (1u << nbit);
- block[nword] |= mask;
+ block[nword] |= (1u << nbit);
} // for i
#endif
}
@internal
*/
inline
-bool block_ptr_array_range(bm::word_t** arr, unsigned& left, unsigned& right)
+bool block_ptr_array_range(bm::word_t** arr,
+ unsigned& left, unsigned& right) BMNOEXCEPT
{
BM_ASSERT(arr);
*/
inline
unsigned lower_bound_linear_u32(const unsigned* arr, unsigned target,
- unsigned from, unsigned to)
+ unsigned from, unsigned to) BMNOEXCEPT
{
BM_ASSERT(arr);
BM_ASSERT(from <= to);
@internal
*/
inline
-unsigned lower_bound_linear_u64(const unsigned long long* arr, unsigned long long target,
- unsigned from, unsigned to)
+unsigned lower_bound_linear_u64(const unsigned long long* arr,
+ unsigned long long target,
+ unsigned from, unsigned to) BMNOEXCEPT
{
BM_ASSERT(arr);
BM_ASSERT(from <= to);
*/
inline
unsigned lower_bound_u32(const unsigned* arr, unsigned target,
- unsigned from, unsigned to)
+ unsigned from, unsigned to) BMNOEXCEPT
{
BM_ASSERT(arr);
BM_ASSERT(from <= to);
@internal
*/
inline
-unsigned lower_bound_u64(const unsigned long long* arr, unsigned long long target,
- unsigned from, unsigned to)
+unsigned lower_bound_u64(const unsigned long long* arr,
+ unsigned long long target,
+ unsigned from, unsigned to) BMNOEXCEPT
{
BM_ASSERT(arr);
BM_ASSERT(from <= to);
*/
#ifdef BM64ADDR
inline
-bm::id64_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx)
+bm::id64_t block_to_global_index(unsigned i, unsigned j,
+ unsigned block_idx) BMNOEXCEPT
{
bm::id64_t base_idx = bm::id64_t(i) * bm::set_sub_array_size * bm::gap_max_bits;
base_idx += j * bm::gap_max_bits;
}
#else
inline
-bm::id_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx)
+bm::id_t block_to_global_index(unsigned i, unsigned j,
+ unsigned block_idx) BMNOEXCEPT
{
unsigned base_idx = i * bm::set_sub_array_size * bm::gap_max_bits;
base_idx += j * bm::gap_max_bits;
@internal
*/
inline
-bm::id64_t ptrp_test(ptr_payload_t ptr, bm::gap_word_t v)
+bm::id64_t ptrp_test(ptr_payload_t ptr, bm::gap_word_t v) BMNOEXCEPT
{
if (v == 0)
{
class gamma_decoder
{
public:
- gamma_decoder(TBitIO& bin) : bin_(bin)
+ gamma_decoder(TBitIO& bin) BMNOEXEPT : bin_(bin)
{}
/**
Start encoding sequence
*/
- void start()
+ void start() BMNOEXEPT
{}
/**
Stop decoding sequence
*/
- void stop()
+ void stop() BMNOEXEPT
{}
/**
Decode word
*/
- T operator()(void)
+ T operator()(void) BMNOEXEPT
{
unsigned l = bin_.eat_zero_bits();
bin_.get_bit(); // get border bit
--- /dev/null
+#ifndef BMINTERVALS__H__INCLUDED__
+#define BMINTERVALS__H__INCLUDED__
+
+/*
+Copyright(c) 2002-2020 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+For more information please visit: http://bitmagic.io
+*/
+/*! \file bmintervals.h
+ \brief Algorithms for bit ranges and intervals
+*/
+
+#ifndef BM__H__INCLUDED__
+// BitMagic utility headers do not include main "bm.h" declaration
+// #include "bm.h" or "bm64.h" explicitly
+# error missing include (bm.h or bm64.h)
+#endif
+
+#include "bmdef.h"
+
+/** \defgroup bvintervals Algorithms for bit intervals
+ Algorithms and iterators for bit ranges and intervals
+ @ingroup bvector
+ */
+
+
+namespace bm
+{
+
+/*!
+ \brief forward iterator class to traverse bit-vector as ranges
+
+ Traverse enumerator for forward walking bit-vector as intervals:
+ series of consequtive 1111s flanked with zeroes.
+ Enumerator can traverse the whole bit-vector or jump(go_to) to position.
+
+ \ingroup bvintervals
+*/
+template<typename BV>
+class interval_enumerator
+{
+public:
+#ifndef BM_NO_STL
+ typedef std::input_iterator_tag iterator_category;
+#endif
+ typedef BV bvector_type;
+ typedef typename bvector_type::size_type size_type;
+ typedef typename bvector_type::allocator_type allocator_type;
+ typedef bm::byte_buffer<allocator_type> buffer_type;
+ typedef bm::pair<size_type, size_type> pair_type;
+
+public:
+ /*! @name Construction and assignment */
+ //@{
+
+ interval_enumerator()
+ : bv_(0), interval_(bm::id_max, bm::id_max), gap_ptr_(0)
+ {}
+
+ /**
+ Construct enumerator for the bit-vector
+ */
+ interval_enumerator(const BV& bv)
+ : bv_(&bv), interval_(bm::id_max, bm::id_max), gap_ptr_(0)
+ {
+ go_to_impl(0, false);
+ }
+
+ /**
+ Construct enumerator for the specified position
+ @param bv - source bit-vector
+ @param start_pos - position on bit-vector to search for interval
+ @param extend_start - flag to extend interval start to the start if
+ true start happenes to be less than start_pos
+ @sa go_to
+ */
+ interval_enumerator(const BV& bv, size_type start_pos, bool extend_start)
+ : bv_(&bv), interval_(bm::id_max, bm::id_max), gap_ptr_(0)
+ {
+ go_to_impl(start_pos, extend_start);
+ }
+
+ /**
+ Copy constructor
+ */
+ interval_enumerator(const interval_enumerator<BV>& ien)
+ : bv_(ien.bv_), interval_(bm::id_max, bm::id_max), gap_ptr_(0)
+ {
+ go_to_impl(ien.start(), false);
+ }
+
+ /**
+ Assignment operator
+ */
+ interval_enumerator& operator=(const interval_enumerator<BV>& ien)
+ {
+ bv_ = ien.bv_; gap_ptr_ = 0;
+ go_to_impl(ien.start(), false);
+ }
+
+#ifndef BM_NO_CXX11
+ /** move-ctor */
+ interval_enumerator(interval_enumerator<BV>&& ien) BMNOEXCEPT
+ : bv_(0), interval_(bm::id_max, bm::id_max), gap_ptr_(0)
+ {
+ this->swap(ien);
+ }
+
+ /** move assignmment operator */
+ interval_enumerator<BV>& operator=(interval_enumerator<BV>&& ien) BMNOEXCEPT
+ {
+ if (this != &ien)
+ this->swap(ien);
+ return *this;
+ }
+#endif
+
+ //@}
+
+
+ // -----------------------------------------------------------------
+
+ /*! @name Comparison methods all use start position to compare */
+ //@{
+
+ bool operator==(const interval_enumerator<BV>& ien) const BMNOEXCEPT
+ { return (start() == ien.start()); }
+ bool operator!=(const interval_enumerator<BV>& ien) const BMNOEXCEPT
+ { return (start() != ien.start()); }
+ bool operator < (const interval_enumerator<BV>& ien) const BMNOEXCEPT
+ { return (start() < ien.start()); }
+ bool operator <= (const interval_enumerator<BV>& ien) const BMNOEXCEPT
+ { return (start() <= ien.start()); }
+ bool operator > (const interval_enumerator<BV>& ien) const BMNOEXCEPT
+ { return (start() > ien.start()); }
+ bool operator >= (const interval_enumerator<BV>& ien) const BMNOEXCEPT
+ { return (start() >= ien.start()); }
+ //@}
+
+
+ /// Return interval start/left as bit-vector coordinate 011110 [left..right]
+ size_type start() const BMNOEXCEPT;
+ /// Return interval end/right as bit-vector coordinate 011110 [left..right]
+ size_type end() const BMNOEXCEPT;
+
+ const pair_type& operator*() const BMNOEXCEPT { return interval_; }
+
+ /// Get interval pair
+ const pair_type& get() const BMNOEXCEPT { return interval_; }
+
+ /// Returns true if enumerator is valid (false if traversal is done)
+ bool valid() const BMNOEXCEPT;
+
+ // -----------------------------------------------------------------
+
+ /*! @name enumerator positioning */
+ //@{
+
+ /*!
+ @brief Go to inetrval at specified position
+ Jump to position with interval. If interval is not available at
+ the specified position (o bit) enumerator will find the next interval.
+ If interval is present we have an option to find interval start [left..]
+ and set enumerator from the effective start coodrinate
+
+ @param pos - position on bit-vector
+ @param extend_start - find effective start if it is less than the
+ go to position
+ @return true if enumerator remains valid after the jump
+ */
+ bool go_to(size_type pos, bool extend_start = true);
+
+ /*! Advance to the next interval
+ @return true if interval is available
+ @sa valid
+ */
+ bool advance();
+
+ /*! \brief Advance enumerator forward to the next available bit */
+ interval_enumerator<BV>& operator++() BMNOEXCEPT
+ { advance(); return *this; }
+
+ /*! \brief Advance enumerator forward to the next available bit */
+ interval_enumerator<BV> operator++(int) BMNOEXCEPT
+ {
+ interval_enumerator<BV> tmp = *this;
+ advance();
+ return tmp;
+ }
+ //@}
+
+ /**
+ swap enumerator with another one
+ */
+ void swap(interval_enumerator<BV>& ien) BMNOEXCEPT;
+
+protected:
+ typedef typename bvector_type::block_idx_type block_idx_type;
+ typedef typename bvector_type::allocator_type bv_allocator_type;
+ typedef bm::heap_vector<unsigned short, bv_allocator_type, true>
+ gap_vector_type;
+
+
+ bool go_to_impl(size_type pos, bool extend_start);
+
+ /// Turn FSM into invalid state (out of range)
+ void invalidate() BMNOEXCEPT;
+
+private:
+ const BV* bv_; ///!< bit-vector for traversal
+ gap_vector_type gap_buf_; ///!< GAP buf.vector for bit-block
+ pair_type interval_; ///! current inetrval
+ const bm::gap_word_t* gap_ptr_; ///!< current pointer in GAP block
+};
+
+//----------------------------------------------------------------------------
+
+/*!
+ \brief Returns true if range is all 1s flanked with 0s
+ Function performs the test on a closed range [left, right]
+ true interval is all 1s AND test(left-1)==false AND test(right+1)==false
+ Examples:
+ 01110 [1,3] - true
+ 11110 [0,3] - true
+ 11110 [1,3] - false
+ \param bv - bit-vector for check
+ \param left - index of first bit start checking
+ \param right - index of last bit
+ \return true/false
+
+ \ingroup bvintervals
+
+ @sa is_all_one_range
+*/
+template<class BV>
+bool is_interval(const BV& bv,
+ typename BV::size_type left,
+ typename BV::size_type right) BMNOEXCEPT
+{
+ typedef typename BV::block_idx_type block_idx_type;
+
+ const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
+
+ if (!bman.is_init())
+ return false; // nothing to do
+
+ if (right < left)
+ bm::xor_swap(left, right);
+ if (left == bm::id_max) // out of range
+ return false;
+ if (right == bm::id_max)
+ --right;
+
+ block_idx_type nblock_left = (left >> bm::set_block_shift);
+ block_idx_type nblock_right = (right >> bm::set_block_shift);
+
+ if (nblock_left == nblock_right) // same block (fast case)
+ {
+ unsigned nbit_left = unsigned(left & bm::set_block_mask);
+ unsigned nbit_right = unsigned(right & bm::set_block_mask);
+ if ((nbit_left > 0) && (nbit_right < bm::gap_max_bits-1))
+ {
+ unsigned i0, j0;
+ bm::get_block_coord(nblock_left, i0, j0);
+ const bm::word_t* block = bman.get_block_ptr(i0, j0);
+ bool b = bm::block_is_interval(block, nbit_left, nbit_right);
+ return b;
+ }
+ }
+ bool is_left, is_right, is_all_one;
+ is_left = left > 0 ? bv.test(left-1) : false;
+ if (is_left == false)
+ {
+ is_right = (right < (bm::id_max - 1)) ? bv.test(right + 1) : false;
+ if (is_left == false && is_right == false)
+ {
+ is_all_one = bv.is_all_one_range(left, right);
+ return is_all_one;
+ }
+ }
+ return false;
+}
+
+
+//----------------------------------------------------------------------------
+
+/*!
+
+ \brief Reverse find index of first 1 bit gap (01110) starting from position
+ Reverse scan for the first 1 in a block of continious 1s.
+ Method employs closed interval semantics: 0[pos..from]
+
+ \param bv - bit-vector for search
+ \param from - position to start reverse search from
+ \param pos - [out] index of the found first 1 bit in a gap of bits
+ \return true if search returned result, false if not found
+ (start point is zero)
+
+ \sa is_interval, find_interval_end
+ \ingroup bvintervals
+*/
+template<class BV>
+bool find_interval_start(const BV& bv,
+ typename BV::size_type from,
+ typename BV::size_type& pos) BMNOEXCEPT
+{
+ typedef typename BV::size_type size_type;
+ typedef typename BV::block_idx_type block_idx_type;
+
+ const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
+
+ if (!bman.is_init())
+ return false; // nothing to do
+ if (!from)
+ {
+ pos = from;
+ return bv.test(from);
+ }
+
+ block_idx_type nb = (from >> bm::set_block_shift);
+ unsigned i0, j0;
+ bm::get_block_coord(nb, i0, j0);
+
+ size_type base_idx;
+ unsigned found_nbit;
+
+ const bm::word_t* block = bman.get_block_ptr(i0, j0);
+ if (!block)
+ return false;
+ unsigned nbit = unsigned(from & bm::set_block_mask);
+ unsigned res = bm::block_find_interval_start(block, nbit, &found_nbit);
+
+ switch (res)
+ {
+ case 0: // not interval
+ return false;
+ case 1: // interval found
+ pos = found_nbit + (nb * bm::gap_max_bits);
+ return true;
+ case 2: // keep scanning
+ base_idx = bm::get_block_start<size_type>(i0, j0);
+ pos = base_idx + found_nbit;
+ if (!nb)
+ return true;
+ break;
+ default:
+ BM_ASSERT(0);
+ } // switch
+
+ --nb;
+ bm::get_block_coord(nb, i0, j0);
+ bm::word_t*** blk_root = bman.top_blocks_root();
+
+ for (unsigned i = i0; true; --i)
+ {
+ bm::word_t** blk_blk = blk_root[i];
+ if (!blk_blk)
+ return true;
+ if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+ {
+ pos = bm::get_super_block_start<size_type>(i);
+ if (!i)
+ break;
+ continue;
+ }
+ unsigned j = (i == i0) ? j0 : 255;
+ for (; true; --j)
+ {
+ if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+ {
+ pos = bm::get_block_start<size_type>(i, j);
+ goto loop_j_end; // continue
+ }
+
+ block = blk_blk[j];
+ if (!block)
+ return true;
+
+ res = bm::block_find_interval_start(block,
+ bm::gap_max_bits-1, &found_nbit);
+ switch (res)
+ {
+ case 0: // not interval (but it was the interval, so last result
+ return true;
+ case 1: // interval found
+ base_idx = bm::get_block_start<size_type>(i, j);
+ pos = base_idx + found_nbit;
+ return true;
+ case 2: // keep scanning
+ pos = bm::get_block_start<size_type>(i, j);
+ break;
+ default:
+ BM_ASSERT(0);
+ } // switch
+
+ loop_j_end: // continue point
+ if (!j)
+ break;
+ } // for j
+
+ if (!i)
+ break;
+ } // for i
+
+ return true;
+}
+
+
+//----------------------------------------------------------------------------
+
+/*!
+ \brief Reverse find index of first 1 bit gap (01110) starting from position
+ Reverse scan for the first 1 in a block of continious 1s.
+ Method employs closed interval semantics: 0[pos..from]
+
+ \param bv - bit-vector for search
+ \param from - position to start reverse search from
+ \param pos - [out] index of the found first 1 bit in a gap of bits
+ \return true if search returned result, false if not found
+ (start point is zero)
+
+ \sa is_interval, find_interval_end
+ \ingroup bvintervals
+*/
+template <typename BV>
+bool find_interval_end(const BV& bv,
+ typename BV::size_type from,
+ typename BV::size_type & pos) BMNOEXCEPT
+{
+ typedef typename BV::block_idx_type block_idx_type;
+
+ if (from == bm::id_max)
+ return false;
+ const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
+
+ if (!bman.is_init())
+ return false; // nothing to do
+ if (from == bm::id_max-1)
+ {
+ pos = from;
+ return bv.test(from);
+ }
+
+ block_idx_type nb = (from >> bm::set_block_shift);
+ unsigned i0, j0;
+ bm::get_block_coord(nb, i0, j0);
+
+ unsigned found_nbit;
+
+ const bm::word_t* block = bman.get_block_ptr(i0, j0);
+ if (!block)
+ return false;
+ unsigned nbit = unsigned(from & bm::set_block_mask);
+ unsigned res = bm::block_find_interval_end(block, nbit, &found_nbit);
+ switch (res)
+ {
+ case 0: // not interval
+ return false;
+ case 1: // interval found
+ pos = found_nbit + (nb * bm::gap_max_bits);
+ return true;
+ case 2: // keep scanning
+ pos = found_nbit + (nb * bm::gap_max_bits);
+ break;
+ default:
+ BM_ASSERT(0);
+ } // switch
+
+ block_idx_type nblock_right = (bm::id_max >> bm::set_block_shift);
+ unsigned i_from, j_from, i_to, j_to;
+ bm::get_block_coord(nblock_right, i_to, j_to);
+ block_idx_type top_size = bman.top_block_size();
+ if (i_to >= top_size)
+ i_to = unsigned(top_size-1);
+
+ ++nb;
+ bm::word_t*** blk_root = bman.top_blocks_root();
+ bm::get_block_coord(nb, i_from, j_from);
+
+ for (unsigned i = i_from; i <= i_to; ++i)
+ {
+ bm::word_t** blk_blk = blk_root[i];
+ if (!blk_blk)
+ return true;
+ if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+ {
+ if (i > i_from)
+ {
+ pos += bm::gap_max_bits * bm::set_sub_array_size;
+ continue;
+ }
+ else
+ {
+ // TODO: optimization to avoid scanning rest of the super block
+ }
+ }
+
+ unsigned j = (i == i_from) ? j_from : 0;
+ do
+ {
+ if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
+ {
+ pos += bm::gap_max_bits;
+ continue;
+ }
+
+ block = blk_blk[j];
+ if (!block)
+ return true;
+
+ res = bm::block_find_interval_end(block, 0, &found_nbit);
+ switch (res)
+ {
+ case 0: // not interval (but it was the interval, so last result
+ return true;
+ case 1: // interval found
+ pos += found_nbit+1;
+ return true;
+ case 2: // keep scanning
+ pos += bm::gap_max_bits;
+ break;
+ default:
+ BM_ASSERT(0);
+ } // switch
+ } while (++j < bm::set_sub_array_size);
+ } // for i
+
+ return true;
+}
+
+
+
+//----------------------------------------------------------------------------
+//
+//----------------------------------------------------------------------------
+
+template<typename BV>
+typename interval_enumerator<BV>::size_type
+interval_enumerator<BV>::start() const BMNOEXCEPT
+{
+ return interval_.first;
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+typename interval_enumerator<BV>::size_type
+interval_enumerator<BV>::end() const BMNOEXCEPT
+{
+ return interval_.second;
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+bool interval_enumerator<BV>::valid() const BMNOEXCEPT
+{
+ return (interval_.first != bm::id_max);
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+void interval_enumerator<BV>::invalidate() BMNOEXCEPT
+{
+ interval_.first = interval_.second = bm::id_max;
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+bool interval_enumerator<BV>::go_to(size_type pos, bool extend_start)
+{
+ return go_to_impl(pos, extend_start);
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+bool interval_enumerator<BV>::go_to_impl(size_type pos, bool extend_start)
+{
+ if (!bv_ || !bv_->is_init() || (pos >= bm::id_max))
+ {
+ invalidate();
+ return false;
+ }
+
+ bool found;
+ size_type start_pos;
+
+ // go to prolog: identify the true interval start position
+ //
+ if (extend_start)
+ {
+ found = bm::find_interval_start(*bv_, pos, start_pos);
+ if (!found)
+ {
+ found = bv_->find(pos, start_pos);
+ if (!found)
+ {
+ invalidate();
+ return false;
+ }
+ }
+ }
+ else
+ {
+ found = bv_->find(pos, start_pos);
+ if (!found)
+ {
+ invalidate();
+ return false;
+ }
+ }
+
+ // start position established, start decoding from it
+ interval_.first = pos = start_pos;
+
+ block_idx_type nb = (pos >> bm::set_block_shift);
+ const typename BV::blocks_manager_type& bman = bv_->get_blocks_manager();
+ unsigned i0, j0;
+ bm::get_block_coord(nb, i0, j0);
+ const bm::word_t* block = bman.get_block_ptr(i0, j0);
+ BM_ASSERT(block);
+
+ if (block == FULL_BLOCK_FAKE_ADDR)
+ {
+ // super-long interval, find the end of it
+ found = bm::find_interval_end(*bv_, pos, interval_.second);
+ BM_ASSERT(found);
+ gap_ptr_ = 0;
+ return true;
+ }
+
+ if (BM_IS_GAP(block))
+ {
+ const bm::gap_word_t* BMRESTRICT gap_block = BMGAP_PTR(block);
+ unsigned nbit = unsigned(pos & bm::set_block_mask);
+
+ unsigned is_set;
+ unsigned gap_pos = bm::gap_bfind(gap_block, nbit, &is_set);
+ BM_ASSERT(is_set);
+
+ interval_.second = (nb * bm::gap_max_bits) + gap_block[gap_pos];
+ if (gap_block[gap_pos] == bm::gap_max_bits-1)
+ {
+ // it is the end of the GAP block - run search
+ //
+ if (interval_.second == bm::id_max-1)
+ {
+ gap_ptr_ = 0;
+ return true;
+ }
+ found = bm::find_interval_end(*bv_, interval_.second + 1, start_pos);
+ if (found)
+ interval_.second = start_pos;
+ gap_ptr_ = 0;
+ return true;
+ }
+ gap_ptr_ = gap_block + gap_pos;
+ return true;
+ }
+
+ // bit-block: turn to GAP and position there
+ //
+ if (gap_buf_.size() == 0)
+ {
+ gap_buf_.resize(bm::gap_max_bits+64);
+ }
+ bm::gap_word_t* gap_tmp = gap_buf_.data();
+ unsigned len = bm::bit_to_gap(gap_tmp, block, bm::gap_max_bits+64);
+ BM_ASSERT(len);
+
+
+ size_type base_idx = (nb * bm::gap_max_bits);
+ for (unsigned i = 1; i <= len; ++i)
+ {
+ size_type gap_pos = base_idx + gap_tmp[i];
+ if (gap_pos >= pos)
+ {
+ if (gap_tmp[i] == bm::gap_max_bits - 1)
+ {
+ found = bm::find_interval_end(*bv_, gap_pos, interval_.second);
+ BM_ASSERT(found);
+ gap_ptr_ = 0;
+ return true;
+ }
+
+ gap_ptr_ = &gap_tmp[i];
+ interval_.second = gap_pos;
+ return true;
+ }
+ if (gap_tmp[i] == bm::gap_max_bits - 1)
+ break;
+ } // for
+
+ BM_ASSERT(0);
+
+ return false;
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+bool interval_enumerator<BV>::advance()
+{
+ BM_ASSERT(valid());
+
+ if (interval_.second == bm::id_max-1)
+ {
+ invalidate();
+ return false;
+ }
+ block_idx_type nb = (interval_.first >> bm::set_block_shift);
+
+ bool found;
+ if (gap_ptr_) // in GAP block
+ {
+ ++gap_ptr_; // 0 - GAP
+ if (*gap_ptr_ == bm::gap_max_bits-1) // GAP block end
+ {
+ return go_to_impl(((nb+1) * bm::gap_max_bits), false);
+ }
+ unsigned prev = *gap_ptr_;
+
+ ++gap_ptr_; // 1 - GAP
+ BM_ASSERT(*gap_ptr_ > prev);
+ interval_.first = (nb * bm::gap_max_bits) + prev + 1;
+ if (*gap_ptr_ == bm::gap_max_bits-1) // GAP block end
+ {
+ found = bm::find_interval_end(*bv_, interval_.first, interval_.second);
+ BM_ASSERT(found); (void)found;
+ gap_ptr_ = 0;
+ return true;
+ }
+ interval_.second = (nb * bm::gap_max_bits) + *gap_ptr_;
+ return true;
+ }
+ return go_to_impl(interval_.second + 1, false);
+}
+
+//----------------------------------------------------------------------------
+
+template<typename BV>
+void interval_enumerator<BV>::swap(interval_enumerator<BV>& ien) BMNOEXCEPT
+{
+ const BV* bv_tmp = bv_;
+ bv_ = ien.bv_;
+ ien.bv_ = bv_tmp;
+
+ gap_buf_.swap(ien.gap_buf_);
+ bm::xor_swap(interval_.first, ien.interval_.first);
+ bm::xor_swap(interval_.second, ien.interval_.second);
+
+ const bm::gap_word_t* gap_tmp = gap_ptr_;
+ gap_ptr_ = ien.gap_ptr_;
+ ien.gap_ptr_ = gap_tmp;
+}
+
+//----------------------------------------------------------------------------
+//
+//----------------------------------------------------------------------------
+
+
+} // namespace bm
+
+#include "bmundef.h"
+
+#endif
unsigned process_word(bm::word_t* blk_out,
const bm::word_t* blk_src,
unsigned nword,
- unsigned take_count);
+ unsigned take_count) BMNOEXCEPT;
static
void get_random_array(bm::word_t* blk_out,
unsigned count);
static
unsigned compute_take_count(unsigned bc,
- size_type in_count, size_type sample_count);
+ size_type in_count, size_type sample_count) BMNOEXCEPT;
private:
}
template<class BV>
-unsigned random_subset<BV>::compute_take_count(unsigned bc,
- size_type in_count,
- size_type sample_count)
+unsigned random_subset<BV>::compute_take_count(
+ unsigned bc,
+ size_type in_count,
+ size_type sample_count) BMNOEXCEPT
{
float block_percent = float(bc) / float(in_count);
float bits_to_take = float(sample_count) * block_percent;
}
// now transform vacant bits to array, then pick random elements
//
- unsigned arr_len = bit_convert_to_arr(bit_list_,
+ unsigned arr_len = bm::bit_convert_to_arr(bit_list_,
sub_block_,
bm::gap_max_bits,
bm::gap_max_bits,
unsigned random_subset<BV>::process_word(bm::word_t* blk_out,
const bm::word_t* blk_src,
unsigned nword,
- unsigned take_count)
+ unsigned take_count) BMNOEXCEPT
{
unsigned new_bits, mask;
do
rs_index(const rs_index& rsi);
/// init arrays to zeros
- void init() BMNOEXEPT;
+ void init() BMNOEXCEPT;
/// copy rs index
void copy_from(const rs_index& rsi);
template<typename BVAlloc>
-void rs_index<BVAlloc>::init() BMNOEXEPT
+void rs_index<BVAlloc>::init() BMNOEXCEPT
{
sblock_count_.resize(0);
sblock_row_idx_.resize(0);
class serializer
{
public:
- typedef BV bvector_type;
- typedef typename bvector_type::allocator_type allocator_type;
- typedef typename bvector_type::blocks_manager_type blocks_manager_type;
- typedef typename bvector_type::statistics statistics_type;
- typedef typename bvector_type::block_idx_type block_idx_type;
- typedef typename bvector_type::size_type size_type;
+ typedef BV bvector_type;
+ typedef typename bvector_type::allocator_type allocator_type;
+ typedef typename bvector_type::blocks_manager_type blocks_manager_type;
+ typedef typename bvector_type::statistics statistics_type;
+ typedef typename bvector_type::block_idx_type block_idx_type;
+ typedef typename bvector_type::size_type size_type;
typedef byte_buffer<allocator_type> buffer;
typedef bm::bv_ref_vector<BV> bv_ref_vector_type;
@param clevel - compression level (0-5)
@sa get_compression_level
*/
- void set_compression_level(unsigned clevel);
+ void set_compression_level(unsigned clevel) BMNOEXCEPT;
/**
Get compression level (0-5), Default 5 (recommended)
Recommended: use 3 or 5
*/
- unsigned get_compression_level() const { return compression_level_; }
+ unsigned get_compression_level() const BMNOEXCEPT
+ { return compression_level_; }
//@}
Return serialization counter vector
@internal
*/
- const size_type* get_compression_stat() const { return compression_stat_; }
+ const size_type* get_compression_stat() const BMNOEXCEPT
+ { return compression_stat_; }
/**
Set GAP length serialization (serializes GAP levels of the original vector)
@param value - when TRUE serialized vector includes GAP levels parameters
*/
- void gap_length_serialization(bool value);
+ void gap_length_serialization(bool value) BMNOEXCEPT;
/**
Set byte-order serialization (for cross platform compatibility)
@param value - TRUE serialization format includes byte-order marker
*/
- void byte_order_serialization(bool value);
+ void byte_order_serialization(bool value) BMNOEXCEPT;
/**
Add skip-markers to serialization BLOB for faster range decode
smaller interval means more bookmarks added to the skip list thus
more increasing the BLOB size
*/
- void set_bookmarks(bool enable, unsigned bm_interval = 256);
+ void set_bookmarks(bool enable, unsigned bm_interval = 256) BMNOEXCEPT;
/**
Attach collection of reference vectors for XOR serialization
Set current index in rer.vector collection
(not a row idx or plain idx)
*/
- void set_curr_ref_idx(size_type ref_idx);
+ void set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT;
protected:
/**
Encode serialization header information
*/
- void encode_header(const BV& bv, bm::encoder& enc);
+ void encode_header(const BV& bv, bm::encoder& enc) BMNOEXCEPT;
/*! Encode GAP block */
void encode_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc);
/*! Encode GAP block with Elias Gamma coder */
- void gamma_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc);
+ void gamma_gap_block(const bm::gap_word_t* gap_block,
+ bm::encoder& enc) BMNOEXCEPT;
/**
Encode GAP block as delta-array with Elias Gamma coder
void gamma_gap_array(const bm::gap_word_t* gap_block,
unsigned arr_len,
bm::encoder& enc,
- bool inverted = false);
+ bool inverted = false) BMNOEXCEPT;
/// Encode bit-block as an array of bits
void encode_bit_array(const bm::word_t* block,
- bm::encoder& enc, bool inverted);
+ bm::encoder& enc, bool inverted) BMNOEXCEPT;
void gamma_gap_bit_block(const bm::word_t* block,
- bm::encoder& enc);
+ bm::encoder& enc) BMNOEXCEPT;
void gamma_arr_bit_block(const bm::word_t* block,
- bm::encoder& enc, bool inverted);
+ bm::encoder& enc, bool inverted) BMNOEXCEPT;
void bienc_arr_bit_block(const bm::word_t* block,
- bm::encoder& enc, bool inverted);
+ bm::encoder& enc, bool inverted) BMNOEXCEPT;
/// encode bit-block as interpolated bit block of gaps
- void bienc_gap_bit_block(const bm::word_t* block, bm::encoder& enc);
+ void bienc_gap_bit_block(const bm::word_t* block,
+ bm::encoder& enc) BMNOEXCEPT;
void interpolated_arr_bit_block(const bm::word_t* block,
- bm::encoder& enc, bool inverted);
+ bm::encoder& enc, bool inverted) BMNOEXCEPT;
/// encode bit-block as interpolated gap block
void interpolated_gap_bit_block(const bm::word_t* block,
- bm::encoder& enc);
+ bm::encoder& enc) BMNOEXCEPT;
/**
Encode GAP block as an array with binary interpolated coder
void interpolated_gap_array(const bm::gap_word_t* gap_block,
unsigned arr_len,
bm::encoder& enc,
- bool inverted);
+ bool inverted) BMNOEXCEPT;
void interpolated_gap_array_v0(const bm::gap_word_t* gap_block,
unsigned arr_len,
bm::encoder& enc,
- bool inverted);
+ bool inverted) BMNOEXCEPT;
/*! Encode GAP block with using binary interpolated encoder */
void interpolated_encode_gap_block(
- const bm::gap_word_t* gap_block, bm::encoder& enc);
+ const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT;
/**
Encode BIT block with repeatable runs of zeroes
*/
void encode_bit_interval(const bm::word_t* blk,
bm::encoder& enc,
- unsigned size_control);
+ unsigned size_control) BMNOEXCEPT;
/**
Encode bit-block using digest (hierarchical compression)
*/
void encode_bit_digest(const bm::word_t* blk,
- bm::encoder& enc,
- bm::id64_t d0);
+ bm::encoder& enc,
+ bm::id64_t d0) BMNOEXCEPT;
/**
Determine best representation for GAP block based
@internal
*/
- unsigned char find_gap_best_encoding(const bm::gap_word_t* gap_block);
+ unsigned char
+ find_gap_best_encoding(const bm::gap_word_t* gap_block) BMNOEXCEPT;
/// Determine best representation for a bit-block
- unsigned char find_bit_best_encoding(const bm::word_t* block);
+ unsigned char find_bit_best_encoding(const bm::word_t* block) BMNOEXCEPT;
/// Determine best representation for a bit-block (level 5)
- unsigned char find_bit_best_encoding_l5(const bm::word_t* block);
+ unsigned char find_bit_best_encoding_l5(const bm::word_t* block) BMNOEXCEPT;
/// Reset all accumulated compression statistics
- void reset_compression_stats();
+ void reset_compression_stats() BMNOEXCEPT;
- void reset_models() { mod_size_ = 0; }
- void add_model(unsigned char mod, unsigned score);
+ void reset_models() BMNOEXCEPT { mod_size_ = 0; }
+ void add_model(unsigned char mod, unsigned score) BMNOEXCEPT;
protected:
/// Bookmark state structure
struct bookmark_state
{
- bookmark_state(block_idx_type nb_range)
+ bookmark_state(block_idx_type nb_range) BMNOEXCEPT
: ptr_(0), nb_(0),
nb_range_(nb_range), bm_type_(0)
{
*/
static
void process_bookmark(block_idx_type nb, bookmark_state& bookm,
- bm::encoder& enc);
+ bm::encoder& enc) BMNOEXCEPT;
private:
serializer(const serializer&);
protected:
typedef DEC decoder_type;
typedef BLOCK_IDX block_idx_type;
+ typedef bm::bit_in<DEC> bit_in_type;
+
protected:
deseriaizer_base()
: id_array_(0), bookmark_idx_(0), skip_offset_(0), skip_pos_(0)
bm::gap_word_t* dst_arr);
/// Read binary interpolated list into a bit-set
- void read_bic_arr(decoder_type& decoder, bm::word_t* blk);
+ void read_bic_arr(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT;
/// Read binary interpolated gap blocks into a bitset
- void read_bic_gap(decoder_type& decoder, bm::word_t* blk);
+ void read_bic_gap(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT;
/// Read inverted binary interpolated list into a bit-set
- void read_bic_arr_inv(decoder_type& decoder, bm::word_t* blk);
+ void read_bic_arr_inv(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT;
/// Read digest0-type bit-block
- void read_digest0_block(decoder_type& decoder, bm::word_t* blk);
+ void read_digest0_block(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT;
/// read bit-block encoded as runs
static
- void read_0runs_block(decoder_type& decoder, bm::word_t* blk);
+ void read_0runs_block(decoder_type& decoder, bm::word_t* blk) BMNOEXCEPT;
static
- const char* err_msg() { return "BM::Invalid serialization format"; }
+ const char* err_msg() BMNOEXCEPT { return "BM::Invalid serialization format"; }
/// Try to skip if skip bookmark is available within reach
/// @return new block idx if skip went well
///
- block_idx_type try_skip(decoder_type& decoder, block_idx_type nb, block_idx_type expect_nb);
+ block_idx_type try_skip(decoder_type& decoder,
+ block_idx_type nb,
+ block_idx_type expect_nb) BMNOEXCEPT;
protected:
bm::gap_word_t* id_array_; ///< ptr to idx array for temp decode use
is not guaranteed to be absent
@sa unset_range()
*/
- void set_range(size_type from, size_type to)
+ void set_range(size_type from, size_type to) BMNOEXCEPT
{
is_range_set_ = 1; idx_from_ = from; idx_to_ = to;
}
Disable range deserialization
@sa set_range()
*/
- void unset_range() { is_range_set_ = 0; }
+ void unset_range() BMNOEXCEPT { is_range_set_ = 0; }
protected:
typedef typename BV::blocks_manager_type blocks_manager_type;
void set_range(size_type from, size_type to);
/// disable range filtration
- void unset_range() { is_range_set_ = false; }
+ void unset_range() BMNOEXCEPT { is_range_set_ = false; }
size_type deserialize(bvector_type& bv,
serial_iterator_type& sit,
serial_iterator_type& sit,
set_operation op);
static
- const char* err_msg() { return "BM::de-serialization format error"; }
+ const char* err_msg() BMNOEXCEPT
+ { return "BM::de-serialization format error"; }
private:
bool is_range_set_ = false;
size_type nb_range_from_ = 0;
void next();
/// skip all zero or all-one blocks
- block_idx_type skip_mono_blocks();
+ block_idx_type skip_mono_blocks() BMNOEXCEPT;
/// read bit block, using logical operation
unsigned get_bit_block(bm::word_t* dst_block,
};
/// Returns iterator internal state
- iterator_state state() const { return this->state_; }
+ iterator_state state() const BMNOEXCEPT { return this->state_; }
- iterator_state get_state() const { return this->state_; }
+ iterator_state get_state() const BMNOEXCEPT { return this->state_; }
/// Number of ids in the inverted list (valid for e_list_ids)
- unsigned get_id_count() const { return this->id_cnt_; }
+ unsigned get_id_count() const BMNOEXCEPT { return this->id_cnt_; }
/// Get last id from the id list
- bm::id_t get_id() const { return this->last_id_; }
+ bm::id_t get_id() const BMNOEXCEPT { return this->last_id_; }
/// Get current block index
- block_idx_type block_idx() const { return this->block_idx_; }
+ block_idx_type block_idx() const BMNOEXCEPT { return this->block_idx_; }
public:
/// member function pointer for bitset-bitset get operations
/// (Converts inverted list into bits)
/// Returns number of words (bits) being read
unsigned get_arr_bit(bm::word_t* dst_block,
- bool clear_target=true);
+ bool clear_target=true) BMNOEXCEPT;
/// Get current block type
- unsigned get_block_type() const { return block_type_; }
+ unsigned get_block_type() const BMNOEXCEPT { return block_type_; }
- unsigned get_bit();
+ unsigned get_bit() BMNOEXCEPT;
- void get_inv_arr(bm::word_t* block);
+ void get_inv_arr(bm::word_t* block) BMNOEXCEPT;
/// Try to skip if skip bookmark is available within reach
/// @return true if skip went well
///
- bool try_skip(block_idx_type nb, block_idx_type expect_nb)
+ bool try_skip(block_idx_type nb, block_idx_type expect_nb) BMNOEXCEPT
{
block_idx_type new_nb = parent_type::try_skip(decoder_, nb, expect_nb);
if (new_nb)
gap_serial_(false),
byte_order_serial_(true),
sb_bookmarks_(false),
+ sb_range_(0),
compression_level_(bm::set_compression_default),
ref_vect_(0),
ref_idx_(0),
template<class BV>
-void serializer<BV>::reset_compression_stats()
+void serializer<BV>::reset_compression_stats() BMNOEXCEPT
{
for (unsigned i = 0; i < 256; ++i)
compression_stat_[i] = 0;
template<class BV>
-void serializer<BV>::set_compression_level(unsigned clevel)
+void serializer<BV>::set_compression_level(unsigned clevel) BMNOEXCEPT
{
if (clevel <= bm::set_compression_max)
compression_level_ = clevel;
}
template<class BV>
-void serializer<BV>::gap_length_serialization(bool value)
+void serializer<BV>::gap_length_serialization(bool value) BMNOEXCEPT
{
gap_serial_ = value;
}
template<class BV>
-void serializer<BV>::byte_order_serialization(bool value)
+void serializer<BV>::byte_order_serialization(bool value) BMNOEXCEPT
{
byte_order_serial_ = value;
}
template<class BV>
-void serializer<BV>::set_bookmarks(bool enable, unsigned bm_interval)
+void serializer<BV>::set_bookmarks(bool enable, unsigned bm_interval) BMNOEXCEPT
{
sb_bookmarks_ = enable;
if (enable)
- {
+ {
if (bm_interval > 512)
bm_interval = 512;
else
}
template<class BV>
-void serializer<BV>::set_curr_ref_idx(size_type ref_idx)
+void serializer<BV>::set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT
{
ref_idx_ = ref_idx;
}
template<class BV>
-void serializer<BV>::encode_header(const BV& bv, bm::encoder& enc)
+void serializer<BV>::encode_header(const BV& bv, bm::encoder& enc) BMNOEXCEPT
{
const blocks_manager_type& bman = bv.get_blocks_manager();
template<class BV>
void serializer<BV>::interpolated_encode_gap_block(
- const bm::gap_word_t* gap_block, bm::encoder& enc)
+ const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT
{
unsigned len = bm::gap_length(gap_block);
if (len > 4) // BIC encoding
template<class BV>
-void serializer<BV>::gamma_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc)
+void serializer<BV>::gamma_gap_block(const bm::gap_word_t* gap_block,
+ bm::encoder& enc) BMNOEXCEPT
{
unsigned len = gap_length(gap_block);
if (len > 3 && (compression_level_ > 3)) // Use Elias Gamma encoding
void serializer<BV>::gamma_gap_array(const bm::gap_word_t* gap_array,
unsigned arr_len,
bm::encoder& enc,
- bool inverted)
+ bool inverted) BMNOEXCEPT
{
unsigned char scode = inverted ? bm::set_block_arrgap_egamma_inv
: bm::set_block_arrgap_egamma;
template<class BV>
-void serializer<BV>::interpolated_gap_array_v0(const bm::gap_word_t* gap_block,
- unsigned arr_len,
- bm::encoder& enc,
- bool inverted)
+void serializer<BV>::interpolated_gap_array_v0(
+ const bm::gap_word_t* gap_block,
+ unsigned arr_len,
+ bm::encoder& enc,
+ bool inverted) BMNOEXCEPT
{
BM_ASSERT(arr_len <= 65535);
unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv
void serializer<BV>::interpolated_gap_array(const bm::gap_word_t* gap_block,
unsigned arr_len,
bm::encoder& enc,
- bool inverted)
+ bool inverted) BMNOEXCEPT
{
BM_ASSERT(arr_len <= 65535);
template<class BV>
-void serializer<BV>::add_model(unsigned char mod, unsigned score)
+void serializer<BV>::add_model(unsigned char mod, unsigned score) BMNOEXCEPT
{
BM_ASSERT(mod_size_ < 64); // too many models (memory corruption?)
scores_[mod_size_] = score; models_[mod_size_] = mod;
}
template<class BV>
-unsigned char serializer<BV>::find_bit_best_encoding_l5(const bm::word_t* block)
+unsigned char
+serializer<BV>::find_bit_best_encoding_l5(const bm::word_t* block) BMNOEXCEPT
{
unsigned bc, bit_gaps;
}
template<class BV>
-unsigned char serializer<BV>::find_bit_best_encoding(const bm::word_t* block)
+unsigned char
+serializer<BV>::find_bit_best_encoding(const bm::word_t* block) BMNOEXCEPT
{
reset_models();
template<class BV>
unsigned char
-serializer<BV>::find_gap_best_encoding(const bm::gap_word_t* gap_block)
+serializer<BV>::find_gap_best_encoding(const bm::gap_word_t* gap_block)BMNOEXCEPT
{
// heuristics and hard-coded rules to determine
// the best representation for d-GAP block
break;
case bm::set_block_bit_1bit:
- arr_len = gap_convert_to_arr(gap_temp_block,
- gap_block,
- bm::gap_equiv_len-10);
+ arr_len = bm::gap_convert_to_arr(gap_temp_block,
+ gap_block,
+ bm::gap_equiv_len-10);
BM_ASSERT(arr_len == 1);
enc.put_8(bm::set_block_bit_1bit);
enc.put_16(gap_temp_block[0]);
void serializer<BV>::encode_bit_interval(const bm::word_t* blk,
bm::encoder& enc,
unsigned //size_control
- )
+ ) BMNOEXCEPT
{
enc.put_8(bm::set_block_bit_0runs);
enc.put_8((blk[0]==0) ? 0 : 1); // encode start
template<class BV>
void serializer<BV>::encode_bit_digest(const bm::word_t* block,
bm::encoder& enc,
- bm::id64_t d0)
+ bm::id64_t d0) BMNOEXCEPT
{
// evaluate a few "sure" models here and pick the best
//
template<class BV>
void serializer<BV>::encode_bit_array(const bm::word_t* block,
bm::encoder& enc,
- bool inverted)
+ bool inverted) BMNOEXCEPT
{
unsigned arr_len;
unsigned mask = inverted ? ~0u : 0u;
// TODO: get rid of max bits
- arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
- block,
- bm::gap_max_bits,
- bm::gap_max_bits_cmrz,
- mask);
+ arr_len = bm::bit_convert_to_arr(bit_idx_arr_.data(),
+ block,
+ bm::gap_max_bits,
+ bm::gap_max_bits_cmrz,
+ mask);
if (arr_len)
{
unsigned char scode =
template<class BV>
void serializer<BV>::gamma_gap_bit_block(const bm::word_t* block,
- bm::encoder& enc)
+ bm::encoder& enc) BMNOEXCEPT
{
unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_equiv_len);
BM_ASSERT(len); (void)len;
template<class BV>
void serializer<BV>::gamma_arr_bit_block(const bm::word_t* block,
- bm::encoder& enc, bool inverted)
+ bm::encoder& enc,
+ bool inverted) BMNOEXCEPT
{
unsigned mask = inverted ? ~0u : 0u;
unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
template<class BV>
void serializer<BV>::bienc_arr_bit_block(const bm::word_t* block,
- bm::encoder& enc, bool inverted)
+ bm::encoder& enc,
+ bool inverted) BMNOEXCEPT
{
unsigned mask = inverted ? ~0u : 0u;
unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
template<class BV>
void serializer<BV>::interpolated_gap_bit_block(const bm::word_t* block,
- bm::encoder& enc)
+ bm::encoder& enc) BMNOEXCEPT
{
unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
BM_ASSERT(len); (void)len;
template<class BV>
void serializer<BV>::bienc_gap_bit_block(const bm::word_t* block,
- bm::encoder& enc)
+ bm::encoder& enc) BMNOEXCEPT
{
unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
BM_ASSERT(len); (void)len;
template<class BV>
-void serializer<BV>::interpolated_arr_bit_block(const bm::word_t* block,
- bm::encoder& enc, bool inverted)
+void
+serializer<BV>::interpolated_arr_bit_block(const bm::word_t* block,
+ bm::encoder& enc,
+ bool inverted) BMNOEXCEPT
{
unsigned mask = inverted ? ~0u : 0u;
unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
template<class BV>
void serializer<BV>::process_bookmark(block_idx_type nb,
bookmark_state& bookm,
- bm::encoder& enc)
+ bm::encoder& enc) BMNOEXCEPT
{
BM_ASSERT(bookm.nb_range_);
unsigned block_type,
bm::gap_word_t* dst_arr)
{
- typedef bit_in<DEC> bit_in_type;
-
bm::gap_word_t len = 0;
switch (block_type)
}
template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_bic_arr(decoder_type& dec,
- bm::word_t* blk)
+void
+deseriaizer_base<DEC, BLOCK_IDX>::read_bic_arr(decoder_type& dec,
+ bm::word_t* blk) BMNOEXCEPT
{
BM_ASSERT(!BM_IS_GAP(blk));
- typedef bit_in<DEC> bit_in_type;
bm::gap_word_t min_v = dec.get_16();
bm::gap_word_t max_v = dec.get_16();
unsigned arr_len = dec.get_16();
}
template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_bic_arr_inv(decoder_type& decoder, bm::word_t* blk)
+void
+deseriaizer_base<DEC, BLOCK_IDX>::read_bic_arr_inv(decoder_type& decoder,
+ bm::word_t* blk) BMNOEXCEPT
{
// TODO: optimization
bm::bit_block_set(blk, 0);
}
template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_bic_gap(decoder_type& dec, bm::word_t* blk)
+void deseriaizer_base<DEC, BLOCK_IDX>::read_bic_gap(decoder_type& dec,
+ bm::word_t* blk) BMNOEXCEPT
{
BM_ASSERT(!BM_IS_GAP(blk));
- typedef bit_in<DEC> bit_in_type;
-
bm::gap_word_t head = dec.get_8();
unsigned arr_len = dec.get_16();
bm::gap_word_t min_v = dec.get_16();
BM_ASSERT(arr_len <= bie_cut_off);
-
id_array_[0] = head;
id_array_[1] = min_v;
bin.bic_decode_u16(&id_array_[2], arr_len-2, min_v, 65535);
if (!IS_VALID_ADDR(blk))
- {
return;
- }
bm::gap_add_to_bitset(blk, id_array_, arr_len);
}
template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_digest0_block(decoder_type& dec,
- bm::word_t* block)
+void deseriaizer_base<DEC, BLOCK_IDX>::read_digest0_block(
+ decoder_type& dec,
+ bm::word_t* block) BMNOEXCEPT
{
bm::id64_t d0 = dec.get_64();
while (d0)
}
template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_0runs_block(decoder_type& dec,
- bm::word_t* blk)
+void deseriaizer_base<DEC, BLOCK_IDX>::read_0runs_block(
+ decoder_type& dec,
+ bm::word_t* blk) BMNOEXCEPT
{
//TODO: optimization if block exists and it is OR-ed read
bm::bit_block_set(blk, 0);
template<typename DEC, typename BLOCK_IDX>
-void deseriaizer_base<DEC, BLOCK_IDX>::read_gap_block(decoder_type& decoder,
+void
+deseriaizer_base<DEC, BLOCK_IDX>::read_gap_block(decoder_type& decoder,
unsigned block_type,
bm::gap_word_t* dst_block,
bm::gap_word_t& gap_head)
{
- typedef bit_in<DEC> bit_in_type;
-
+// typedef bit_in<DEC> bit_in_type;
switch (block_type)
{
case set_block_gap:
for (gap_word_t k = 0; k < len; ++k)
{
gap_word_t bit_idx = decoder.get_16();
- gap_add_value(dst_block, bit_idx);
+ bm::gap_add_value(dst_block, bit_idx);
} // for
}
break;
deseriaizer_base<DEC, BLOCK_IDX>::try_skip(
decoder_type& decoder,
block_idx_type nb,
- block_idx_type expect_nb)
+ block_idx_type expect_nb) BMNOEXCEPT
{
if (skip_offset_) // skip bookmark is available
{
nb_sync = decoder.get_32();
break;
case set_nb_sync_mark48:
+ nb_sync = block_idx_type(decoder.get_48());
#ifndef BM64ADDR
BM_ASSERT(0);
- #ifndef BM_NO_STL
- throw std::logic_error(this->err_msg());
- #else
- BM_THROW(BM_ERR_SERIALFORMAT);
- #endif
+ decoder.set_pos(save_pos);
+ skip_offset_ = 0;
+ return 0; // invalid bookmark from 64-bit serialization
#endif
- nb_sync = block_idx_type(decoder.get_48());
break;
case set_nb_sync_mark64:
+ nb_sync = block_idx_type(decoder.get_64());
#ifndef BM64ADDR
BM_ASSERT(0);
- #ifndef BM_NO_STL
- throw std::logic_error(this->err_msg());
- #else
- BM_THROW(BM_ERR_SERIALFORMAT);
- #endif
+ decoder.set_pos(save_pos);
+ skip_offset_ = 0;
+ return 0; // invalid bookmark from 64-bit serialization
#endif
- nb_sync = block_idx_type(decoder.get_64());
break;
default:
BM_ASSERT(0);
nb_sync += nb;
if (nb_sync <= expect_nb) // within reach
{
- //block_idx_ = nb_sync;
- //state_ = e_blocks;
skip_offset_ = 0;
return nb_sync;
}
{
// 64-bit vector cannot be deserialized into 32-bit
BM_ASSERT(sizeof(block_idx_type)==8);
+ bv_size = (block_idx_type)dec.get_64();
#ifndef BM64ADDR
#ifndef BM_NO_STL
throw std::logic_error(this->err_msg());
BM_THROW(BM_ERR_SERIALFORMAT);
#endif
#endif
- bv_size = (block_idx_type)dec.get_64();
}
else
bv_size = dec.get_32();
goto process_full_blocks;
#else
BM_ASSERT(0); // 32-bit vector cannot read 64-bit
+ dec.get_64();
#ifndef BM_NO_STL
throw std::logic_error(this->err_msg());
#else
BM_THROW(BM_ERR_SERIALFORMAT);
#endif
- dec.get_64();
#endif
process_full_blocks:
{
template<class BV, class DEC>
void deserializer<BV, DEC>::xor_decode(size_type x_ref_idx, bm::id64_t x_ref_d64,
blocks_manager_type& bman,
- block_idx_type nb)
+ block_idx_type nb)
{
BM_ASSERT(ref_vect_);
}
state_ = e_blocks;
}
- block_idx_arr_ = (gap_word_t*) ::malloc(sizeof(gap_word_t) * bm::gap_max_bits);
+ block_idx_arr_=(gap_word_t*)::malloc(sizeof(gap_word_t) * bm::gap_max_bits);
+ if (!block_idx_arr_)
+ {
+ #ifndef BM_NO_STL
+ throw std::bad_alloc();
+ #else
+ BM_THROW(BM_ERR_BADALLOC);
+ #endif
+ }
this->id_array_ = block_idx_arr_;
}
template<typename DEC, typename BLOCK_IDX>
typename serial_stream_iterator<DEC, BLOCK_IDX>::block_idx_type
-serial_stream_iterator<DEC, BLOCK_IDX>::skip_mono_blocks()
+serial_stream_iterator<DEC, BLOCK_IDX>::skip_mono_blocks() BMNOEXCEPT
{
BM_ASSERT(state_ == e_zero_blocks || state_ == e_one_blocks);
if (!mono_block_cnt_)
}
template<typename DEC, typename BLOCK_IDX>
-void serial_stream_iterator<DEC, BLOCK_IDX>::get_inv_arr(bm::word_t* block)
+void
+serial_stream_iterator<DEC, BLOCK_IDX>::get_inv_arr(bm::word_t* block) BMNOEXCEPT
{
gap_word_t len = decoder_.get_16();
if (block)
bm::bit_block_set(block, ~0u);
for (unsigned k = 0; k < len; ++k)
{
- gap_word_t bit_idx = decoder_.get_16();
+ bm::gap_word_t bit_idx = decoder_.get_16();
bm::clear_bit(block, bit_idx);
}
}
template<typename DEC, typename BLOCK_IDX>
unsigned serial_stream_iterator<DEC, BLOCK_IDX>::get_arr_bit(
- bm::word_t* dst_block,
- bool clear_target)
+ bm::word_t* dst_block,
+ bool clear_target) BMNOEXCEPT
{
BM_ASSERT(this->block_type_ == set_block_arrbit ||
this->block_type_ == set_block_bit_1bit);
else
{
if (this->block_type_ == set_block_bit_1bit)
- {
- return 1; // nothing to do: len var already consumed 16bits
- }
- // fwd the decocing stream
+ return 1; // nothing to do: len var already consumed 16 bits
+
+ // fwd the decode stream
decoder_.seek(len * 2);
}
return len;
}
template<typename DEC, typename BLOCK_IDX>
-unsigned serial_stream_iterator<DEC, BLOCK_IDX>::get_bit()
+unsigned serial_stream_iterator<DEC, BLOCK_IDX>::get_bit() BMNOEXCEPT
{
BM_ASSERT(this->block_type_ == set_block_bit_1bit);
++(this->block_idx_);
#include "bmtrans.h"
-#include "bmalgo.h"
+#include "bmalgo_impl.h"
#include "bmbuffer.h"
#include "bmbmatrix.h"
#include "bmdef.h"
class reference
{
public:
- reference(sparse_vector<Val, BV>& sv, size_type idx) BMNOEXEPT
+ reference(sparse_vector<Val, BV>& sv, size_type idx) BMNOEXCEPT
: sv_(sv), idx_(idx)
{}
- operator value_type() const { return sv_.get(idx_); }
+ operator value_type() const BMNOEXCEPT { return sv_.get(idx_); }
reference& operator=(const reference& ref)
{
sv_.set(idx_, (value_type)ref);
sv_.set(idx_, val);
return *this;
}
- bool operator==(const reference& ref) const
+ bool operator==(const reference& ref) const BMNOEXCEPT
{ return bool(*this) == bool(ref); }
- bool is_null() const { return sv_.is_null(idx_); }
+ bool is_null() const BMNOEXCEPT { return sv_.is_null(idx_); }
private:
sparse_vector<Val, BV>& sv_;
size_type idx_;
typedef value_type& reference;
public:
- const_iterator();
- const_iterator(const sparse_vector_type* sv);
- const_iterator(const sparse_vector_type* sv, size_type pos);
- const_iterator(const const_iterator& it);
+ const_iterator() BMNOEXCEPT;
+ const_iterator(const sparse_vector_type* sv) BMNOEXCEPT;
+ const_iterator(const sparse_vector_type* sv, size_type pos) BMNOEXCEPT;
+ const_iterator(const const_iterator& it) BMNOEXCEPT;
- bool operator==(const const_iterator& it) const
+ bool operator==(const const_iterator& it) const BMNOEXCEPT
{ return (pos_ == it.pos_) && (sv_ == it.sv_); }
- bool operator!=(const const_iterator& it) const
+ bool operator!=(const const_iterator& it) const BMNOEXCEPT
{ return ! operator==(it); }
- bool operator < (const const_iterator& it) const
+ bool operator < (const const_iterator& it) const BMNOEXCEPT
{ return pos_ < it.pos_; }
- bool operator <= (const const_iterator& it) const
+ bool operator <= (const const_iterator& it) const BMNOEXCEPT
{ return pos_ <= it.pos_; }
- bool operator > (const const_iterator& it) const
+ bool operator > (const const_iterator& it) const BMNOEXCEPT
{ return pos_ > it.pos_; }
- bool operator >= (const const_iterator& it) const
+ bool operator >= (const const_iterator& it) const BMNOEXCEPT
{ return pos_ >= it.pos_; }
/// \brief Get current position (value)
- value_type operator*() const { return this->value(); }
+ value_type operator*() const { return this->value(); }
/// \brief Advance to the next available value
- const_iterator& operator++() { this->advance(); return *this; }
+ const_iterator& operator++() BMNOEXCEPT { this->advance(); return *this; }
/// \brief Advance to the next available value
const_iterator& operator++(int)
value_type value() const;
/// \brief Get NULL status
- bool is_null() const;
+ bool is_null() const BMNOEXCEPT;
/// Returns true if iterator is at a valid position
- bool valid() const { return pos_ != bm::id_max; }
+ bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
/// Invalidate current iterator
- void invalidate() { pos_ = bm::id_max; }
+ void invalidate() BMNOEXCEPT { pos_ = bm::id_max; }
/// Current position (index) in the vector
- size_type pos() const { return pos_; }
+ size_type pos() const BMNOEXCEPT{ return pos_; }
/// re-position to a specified position
- void go_to(size_type pos);
+ void go_to(size_type pos) BMNOEXCEPT;
/// advance iterator forward by one
- void advance();
+ /// @return true if it is still valid
+ bool advance() BMNOEXCEPT;
- void skip_zero_values();
+ void skip_zero_values() BMNOEXCEPT;
private:
enum buf_size_e
{
size_type pos_; ///!< Position
mutable buffer_type buffer_; ///!< value buffer
mutable value_type* buf_ptr_; ///!< position in the buffer
- mutable allocator_pool_type pool_;
};
/**
Get access to not-null vector
@internal
*/
- bvector_type* get_null_bvect() const { return bv_null_; }
+ bvector_type* get_null_bvect() const BMNOEXCEPT { return bv_null_; }
/** add value to the buffer without changing the NULL vector
@param v - value to push back
size_type add_value_no_null(value_type v);
/**
- Reconf back inserter not to touch the NULL vector
+ Reconfшпгку back inserter not to touch the NULL vector
*/
- void disable_set_null() { set_not_null_ = false; }
+ void disable_set_null() BMNOEXCEPT { set_not_null_ = false; }
// ---------------------------------------------------------------
protected:
#ifndef BM_NO_CXX11
/*! move-ctor */
- sparse_vector(sparse_vector<Val, BV>&& sv) BMNOEXEPT;
+ sparse_vector(sparse_vector<Val, BV>&& sv) BMNOEXCEPT;
/*! move assignmment operator */
- sparse_vector<Val,BV>& operator = (sparse_vector<Val, BV>&& sv) BMNOEXEPT
+ sparse_vector<Val,BV>& operator = (sparse_vector<Val, BV>&& sv) BMNOEXCEPT
{
if (this != &sv)
{
}
#endif
- ~sparse_vector() BMNOEXEPT;
+ ~sparse_vector() BMNOEXCEPT;
///@}
///@{
/** \brief Operator to get write access to an element */
- reference operator[](size_type idx) { return reference(*this, idx); }
+ reference operator[](size_type idx) BMNOEXCEPT
+ { return reference(*this, idx); }
/*!
\brief get specified element without bounds checking
\param idx - element index
\return value of the element
*/
- value_type operator[](size_type idx) const { return this->get(idx); }
+ value_type operator[](size_type idx) const BMNOEXCEPT
+ { return this->get(idx); }
/*!
\brief access specified element with bounds checking
\param idx - element index
\return value of the element
*/
- value_type get(size_type idx) const;
+ value_type get(size_type idx) const BMNOEXCEPT;
/*!
\brief set specified element with bounds checking and automatic resize
//@{
/** Provide const iterator access to container content */
- const_iterator begin() const;
+ const_iterator begin() const BMNOEXCEPT;
/** Provide const iterator access to the end */
- const_iterator end() const { return const_iterator(this, bm::id_max); }
+ const_iterator end() const BMNOEXCEPT
+ { return const_iterator(this, bm::id_max); }
/** Get const_itertor re-positioned to specific element
@param idx - position in the sparse vector
*/
- const_iterator get_const_iterator(size_type idx) const { return const_iterator(this, idx); }
+ const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
+ { return const_iterator(this, idx); }
/** Provide back insert iterator
Back insert iterator implements buffered insertion,
which is faster, than random access or push_back
*/
- back_insert_iterator get_back_inserter() { return back_insert_iterator(this); }
+ back_insert_iterator get_back_inserter()
+ { return back_insert_iterator(this); }
///@}
/** \brief trait if sparse vector is "compressed" (false)
*/
static
- bool is_compressed() { return false; }
+ bool is_compressed() BMNOEXCEPT { return false; }
///@}
/*! \brief content exchange
*/
- void swap(sparse_vector<Val, BV>& sv) BMNOEXEPT;
+ void swap(sparse_vector<Val, BV>& sv) BMNOEXCEPT;
// ------------------------------------------------------------
/*! @name Clear */
///@{
/*! \brief resize to zero, free memory */
- void clear() BMNOEXEPT;
+ void clear() BMNOEXCEPT;
/*!
\brief clear range (assign bit 0 for all plains)
/*! \brief return size of the vector
\return size of sparse vector
*/
- size_type size() const { return this->size_; }
+ size_type size() const BMNOEXCEPT { return this->size_; }
/*! \brief return true if vector is empty
\return true if empty
*/
- bool empty() const { return (size() == 0); }
+ bool empty() const BMNOEXCEPT { return (size() == 0); }
/*! \brief resize vector
\param sz - new size
\return true, if it is the same
*/
bool equal(const sparse_vector<Val, BV>& sv,
- bm::null_support null_able = bm::use_null) const;
+ bm::null_support null_able = bm::use_null) const BMNOEXCEPT;
///@}
\return 0 - equal, < 0 - vect[i] < str, >0 otherwise
*/
- int compare(size_type idx, const value_type val) const;
+ int compare(size_type idx, const value_type val) const BMNOEXCEPT;
///@}
\param stat - memory allocation statistics after optimization
*/
void optimize(bm::word_t* temp_block = 0,
- typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
- typename sparse_vector<Val, BV>::statistics* stat = 0);
+ typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
+ typename sparse_vector<Val, BV>::statistics* stat = 0);
+
/*!
\brief Optimize sizes of GAP blocks
@sa statistics
*/
- void calc_stat(struct sparse_vector<Val, BV>::statistics* st) const;
+ void calc_stat(
+ struct sparse_vector<Val, BV>::statistics* st) const BMNOEXCEPT;
///@}
// ------------------------------------------------------------
\param offset - target index in the sparse vector to export from
\param zero_mem - set to false if target array is pre-initialized
with 0s to avoid performance penalty
- \param pool_ptr - optional pointer to block allocation pool
\return number of exported elements
\sa decode
size_type extract(value_type* arr,
size_type size,
size_type offset = 0,
- bool zero_mem = true,
- allocator_pool_type* pool_ptr = 0) const;
+ bool zero_mem = true) const BMNOEXCEPT2;
/** \brief extract small window without use of masking vector
\sa decode
\internal
*/
static
- size_type translate_address(size_type i) { return i; }
+ size_type translate_address(size_type i) BMNOEXCEPT { return i; }
/**
\brief throw range error
\brief find position of compressed element by its rank
*/
static
- bool find_rank(size_type rank, size_type& pos);
+ bool find_rank(size_type rank, size_type& pos) BMNOEXCEPT;
/**
\brief size of sparse vector (may be different for RSC)
*/
- size_type effective_size() const { return size(); }
+ size_type effective_size() const BMNOEXCEPT { return size(); }
/**
\brief Always 1 (non-matrix type)
*/
- size_type effective_vector_max() const { return 1; }
+ size_type effective_vector_max() const BMNOEXCEPT { return 1; }
///@}
/// Set allocator pool for local (non-threaded)
/// memory cyclic(lots of alloc-free ops) opertations
///
- void set_allocator_pool(allocator_pool_type* pool_ptr);
+ void set_allocator_pool(allocator_pool_type* pool_ptr) BMNOEXCEPT;
protected:
enum octet_plains
void insert_value_no_null(size_type idx, value_type v);
void resize_internal(size_type sz) { resize(sz); }
- size_type size_internal() const { return size(); }
+ size_type size_internal() const BMNOEXCEPT { return size(); }
- bool is_remap() const { return false; }
- size_t remap_size() const { return 0; }
- const unsigned char* get_remap_buffer() const { return 0; }
- unsigned char* init_remap_buffer() { return 0; }
- void set_remap() { }
+ bool is_remap() const BMNOEXCEPT { return false; }
+ size_t remap_size() const BMNOEXCEPT { return 0; }
+ const unsigned char* get_remap_buffer() const BMNOEXCEPT { return 0; }
+ unsigned char* init_remap_buffer() BMNOEXCEPT { return 0; }
+ void set_remap() BMNOEXCEPT { }
bool resolve_range(size_type from, size_type to,
- size_type* idx_from, size_type* idx_to) const
+ size_type* idx_from, size_type* idx_to) const BMNOEXCEPT
{
*idx_from = from; *idx_to = to; return true;
}
+ /// Increment element by 1 without chnaging NULL vector or size
+ void inc_no_null(size_type idx);
+
+ /// increment by v without chnaging NULL vector or size
+ void inc_no_null(size_type idx, value_type v);
+
protected:
template<class V, class SV> friend class rsc_sparse_vector;
template<class SVect> friend class sparse_vector_scanner;
#ifndef BM_NO_CXX11
template<class Val, class BV>
-sparse_vector<Val, BV>::sparse_vector(sparse_vector<Val, BV>&& sv) BMNOEXEPT
+sparse_vector<Val, BV>::sparse_vector(sparse_vector<Val, BV>&& sv) BMNOEXCEPT
{
parent_type::swap(sv);
}
//---------------------------------------------------------------------
template<class Val, class BV>
-sparse_vector<Val, BV>::~sparse_vector() BMNOEXEPT
+sparse_vector<Val, BV>::~sparse_vector() BMNOEXCEPT
{}
//---------------------------------------------------------------------
template<class Val, class BV>
-void sparse_vector<Val, BV>::swap(sparse_vector<Val, BV>& sv) BMNOEXEPT
+void sparse_vector<Val, BV>::swap(sparse_vector<Val, BV>& sv) BMNOEXCEPT
{
parent_type::swap(sv);
}
size_type dec_size,
bool zero_mem) const
{
- if (dec_size < 32)
- {
- return extract_range(arr, dec_size, idx_from, zero_mem);
- }
- return extract_plains(arr, dec_size, idx_from, zero_mem);
- // TODO: write proper extract() based on for_each_range() and a visitor
- /*
- if (dec_size < 1024)
- {
- return extract_plains(arr, dec_size, idx_from, zero_mem);
- }
return extract(arr, dec_size, idx_from, zero_mem);
- */
}
//---------------------------------------------------------------------
template<class Val, class BV>
typename sparse_vector<Val, BV>::size_type
-sparse_vector<Val, BV>::extract(value_type* arr,
+sparse_vector<Val, BV>::extract(value_type* BMRESTRICT arr,
size_type size,
size_type offset,
- bool zero_mem,
- allocator_pool_type* pool_ptr) const
+ bool zero_mem) const BMNOEXCEPT2
{
/// Decoder functor
/// @internal
///
struct sv_decode_visitor_func
{
- sv_decode_visitor_func(value_type* varr,
+ sv_decode_visitor_func(value_type* BMRESTRICT varr,
value_type mask,
- size_type off)
- : arr_(varr), mask_(mask), off_(off)
+ size_type off) BMNOEXCEPT2
+ : arr_(varr), mask_(mask), sv_off_(off)
{}
-
- void add_bits(size_type arr_offset, const unsigned char* bits, unsigned bits_size)
+
+ void add_bits(size_type bv_offset,
+ const unsigned char* bits, unsigned bits_size) BMNOEXCEPT
{
- size_type idx_base = arr_offset - off_;
- const value_type m = mask_;
- unsigned i = 0;
- for (; i < bits_size; ++i)
- arr_[idx_base + bits[i]] |= m;
+ // can be negative (-1) when bv base offset = 0 and sv = 1,2..
+ size_type base = bv_offset - sv_off_;
+ value_type m = mask_;
+ for (unsigned i = 0; i < bits_size; ++i)
+ arr_[bits[i] + base] |= m;
}
-
- void add_range(size_type arr_offset, unsigned sz)
+ void add_range(size_type bv_offset, size_type sz) BMNOEXCEPT
{
- size_type idx_base = arr_offset - off_;
- const value_type m = mask_;
- for (unsigned i = 0; i < sz; ++i)
- arr_[i + idx_base] |= m;
+ auto base = bv_offset - sv_off_;
+ value_type m = mask_;
+ for (size_type i = 0; i < sz; ++i)
+ arr_[i + base] |= m;
}
- value_type* arr_;
- value_type mask_;
- size_type off_;
- };
+ value_type* BMRESTRICT arr_; ///< target array for reverse transpose
+ value_type mask_; ///< bit-plane mask
+ size_type sv_off_; ///< SV read offset
+ };
- if (size == 0)
+ if (!size)
return 0;
if (zero_mem)
::memset(arr, 0, sizeof(value_type)*size);
- size_type start = offset;
- size_type end = start + size;
+ size_type end = offset + size;
if (end > this->size_)
- {
end = this->size_;
- }
-
- bool masked_scan = !(offset == 0 && size == this->size());
- if (masked_scan) // use temp vector to decompress the area
- {
- bvector_type bv_mask;
- bv_mask.set_allocator_pool(pool_ptr);
-
- for (size_type i = 0; i < parent_type::value_bits(); ++i)
- {
- const bvector_type* bv = this->bmatr_.get_row(i);
- if (bv)
- {
- bv_mask.copy_range(*bv, offset, end - 1);
- sv_decode_visitor_func func(arr, (value_type(1) << i), offset);
- bm::for_each_bit(bv_mask, func);
- }
- } // for i
- }
- else
- {
- for (size_type i = 0; i < parent_type::value_bits(); ++i)
- {
- const bvector_type* bv = this->bmatr_.get_row(i);
- if (bv)
- {
- sv_decode_visitor_func func(arr, (value_type(1) << i), 0);
- bm::for_each_bit(*bv, func);
- }
- } // for i
- }
- return end - start;
+ sv_decode_visitor_func func(arr, 0, offset);
+
+ for (size_type i = 0; i < parent_type::value_bits(); ++i)
+ {
+ const bvector_type* bv = this->bmatr_.get_row(i);
+ if (!bv)
+ continue;
+ func.mask_ = (value_type(1) << i); // set target plane OR mask
+ bm::for_each_bit_range_no_check(*bv, offset, end-1, func);
+ } // for i
+ return end - offset;
}
//---------------------------------------------------------------------
template<class Val, class BV>
typename sparse_vector<Val, BV>::value_type
-sparse_vector<Val, BV>::get(typename sparse_vector<Val, BV>::size_type i) const
+sparse_vector<Val, BV>::get(
+ typename sparse_vector<Val, BV>::size_type i) const BMNOEXCEPT
{
BM_ASSERT(i < bm::id_max);
BM_ASSERT(i < size());
bool b = this->bmatr_.test_4rows(j);
if (b)
{
- value_type vm = this->bmatr_.get_half_octet(i, j);
+ value_type vm = (value_type)this->bmatr_.get_half_octet(i, j);
v |= vm << j;
}
} // for j
{
if (idx >= this->size_)
this->size_ = idx+1;
+ inc_no_null(idx);
+ bvector_type* bv_null = this->get_null_bvect();
+ if (bv_null)
+ bv_null->set_bit_no_check(idx);
+}
+
+//---------------------------------------------------------------------
+template<class Val, class BV>
+void sparse_vector<Val, BV>::inc_no_null(size_type idx)
+{
for (unsigned i = 0; i < parent_type::sv_value_plains; ++i)
{
bvector_type* bv = this->get_plain(i);
if (!carry_over)
break;
}
- bvector_type* bv_null = this->get_null_bvect();
- if (bv_null)
- bv_null->set_bit_no_check(idx);
}
//---------------------------------------------------------------------
template<class Val, class BV>
-void sparse_vector<Val, BV>::clear() BMNOEXEPT
+void sparse_vector<Val, BV>::inc_no_null(size_type idx, value_type v)
+{
+ value_type v_prev = get(idx);
+ set_value_no_null(idx, v + v_prev);
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class BV>
+void sparse_vector<Val, BV>::clear() BMNOEXCEPT
{
parent_type::clear();
}
//---------------------------------------------------------------------
template<class Val, class BV>
-bool sparse_vector<Val, BV>::find_rank(size_type rank, size_type& pos)
+bool sparse_vector<Val, BV>::find_rank(size_type rank, size_type& pos) BMNOEXCEPT
{
BM_ASSERT(rank);
pos = rank - 1;
template<class Val, class BV>
void sparse_vector<Val, BV>::calc_stat(
- struct sparse_vector<Val, BV>::statistics* st) const
+ struct sparse_vector<Val, BV>::statistics* st) const BMNOEXCEPT
{
BM_ASSERT(st);
typename bvector_type::statistics stbv;
//---------------------------------------------------------------------
template<class Val, class BV>
-int sparse_vector<Val, BV>::compare(size_type idx, const value_type val) const
+int sparse_vector<Val, BV>::compare(size_type idx,
+ const value_type val) const BMNOEXCEPT
{
// TODO: consider bit-by-bit comparison to minimize CPU hit miss in plans get()
value_type sv_value = get(idx);
template<class Val, class BV>
bool sparse_vector<Val, BV>::equal(const sparse_vector<Val, BV>& sv,
- bm::null_support null_able) const
+ bm::null_support null_able) const BMNOEXCEPT
{
return parent_type::equal(sv, null_able);
}
template<class Val, class BV>
typename sparse_vector<Val, BV>::const_iterator
-sparse_vector<Val, BV>::begin() const
+sparse_vector<Val, BV>::begin() const BMNOEXCEPT
{
typedef typename sparse_vector<Val, BV>::const_iterator it_type;
return it_type(this);
template<class Val, class BV>
void sparse_vector<Val, BV>::set_allocator_pool(
- typename sparse_vector<Val, BV>::allocator_pool_type* pool_ptr)
+ typename sparse_vector<Val, BV>::allocator_pool_type* pool_ptr) BMNOEXCEPT
{
this->bmatr_.set_allocator_pool(pool_ptr);
}
template<class Val, class BV>
-sparse_vector<Val, BV>::const_iterator::const_iterator()
+sparse_vector<Val, BV>::const_iterator::const_iterator() BMNOEXCEPT
: sv_(0), pos_(bm::id_max), buf_ptr_(0)
{}
template<class Val, class BV>
sparse_vector<Val, BV>::const_iterator::const_iterator(
- const typename sparse_vector<Val, BV>::const_iterator& it)
+ const typename sparse_vector<Val, BV>::const_iterator& it) BMNOEXCEPT
: sv_(it.sv_), pos_(it.pos_), buf_ptr_(0)
{}
template<class Val, class BV>
sparse_vector<Val, BV>::const_iterator::const_iterator(
- const typename sparse_vector<Val, BV>::const_iterator::sparse_vector_type* sv)
+ const typename sparse_vector<Val, BV>::const_iterator::sparse_vector_type* sv
+ ) BMNOEXCEPT
: sv_(sv), buf_ptr_(0)
{
BM_ASSERT(sv_);
template<class Val, class BV>
sparse_vector<Val, BV>::const_iterator::const_iterator(
const typename sparse_vector<Val, BV>::const_iterator::sparse_vector_type* sv,
- typename sparse_vector<Val, BV>::size_type pos)
+ typename sparse_vector<Val, BV>::size_type pos) BMNOEXCEPT
: sv_(sv), buf_ptr_(0)
{
BM_ASSERT(sv_);
//---------------------------------------------------------------------
template<class Val, class BV>
-void sparse_vector<Val, BV>::const_iterator::go_to(size_type pos)
+void sparse_vector<Val, BV>::const_iterator::go_to(size_type pos) BMNOEXCEPT
{
pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos;
buf_ptr_ = 0;
//---------------------------------------------------------------------
template<class Val, class BV>
-void sparse_vector<Val, BV>::const_iterator::advance()
+bool sparse_vector<Val, BV>::const_iterator::advance() BMNOEXCEPT
{
if (pos_ == bm::id_max) // nothing to do, we are at the end
- return;
+ return false;
++pos_;
if (pos_ >= sv_->size())
+ {
this->invalidate();
- else
+ return false;
+ }
+ if (buf_ptr_)
{
- if (buf_ptr_)
- {
- ++buf_ptr_;
- if (buf_ptr_ - ((value_type*)buffer_.data()) >= n_buf_size)
- buf_ptr_ = 0;
- }
+ ++buf_ptr_;
+ if (buf_ptr_ - ((value_type*)buffer_.data()) >= n_buf_size)
+ buf_ptr_ = 0;
}
+ return true;
}
//---------------------------------------------------------------------
{
buffer_.reserve(n_buf_size * sizeof(value_type));
buf_ptr_ = (value_type*)(buffer_.data());
- sv_->extract(buf_ptr_, n_buf_size, pos_, true, &pool_);
+ sv_->extract(buf_ptr_, n_buf_size, pos_, true);
}
v = *buf_ptr_;
return v;
//---------------------------------------------------------------------
template<class Val, class BV>
-void sparse_vector<Val, BV>::const_iterator::skip_zero_values()
+void sparse_vector<Val, BV>::const_iterator::skip_zero_values() BMNOEXCEPT
{
value_type v = value();
if (buf_ptr_)
//---------------------------------------------------------------------
template<class Val, class BV>
-bool sparse_vector<Val, BV>::const_iterator::is_null() const
+bool sparse_vector<Val, BV>::const_iterator::is_null() const BMNOEXCEPT
{
return sv_->is_null(pos_);
}
For more information please visit: http://bitmagic.io
*/
/*! \file bmsparsevec_algo.h
- \brief Algorithms for sparse_vector<>
+ \brief Algorithms for bm::sparse_vector
*/
#ifndef BM__H__INCLUDED__
#include "bmsparsevec.h"
#include "bmaggregator.h"
#include "bmbuffer.h"
+#include "bmalgo.h"
#include "bmdef.h"
#ifdef _MSC_VER
} // null_proc
}
- for (unsigned i = 0; mismatch & (i < plains1); ++i)
+ for (unsigned i = 0; mismatch && (i < plains1); ++i)
{
typename SV::bvector_type_const_ptr bv1 = sv1.get_plain(i);
typename SV::bvector_type_const_ptr bv2 = sv2.get_plain(i);
/**
\brief reset sparse vector binding
*/
- void reset_binding();
+ void reset_binding() BMNOEXCEPT;
/**
\brief find all sparse vector elements EQ to search value
int compare_str(const SV& sv, size_type idx, const value_type* str);
/// compare sv[idx] with input value
- int compare(const SV& sv, size_type idx, const value_type val);
+ int compare(const SV& sv, size_type idx, const value_type val) BMNOEXCEPT;
protected:
sparse_vector_scanner(const sparse_vector_scanner&) = delete;
{
sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
bv_out.set(&gb_->buffer_[0], buf_cnt, BM_SORTED);
- buf_cnt ^= buf_cnt;
+ buf_cnt = 0;
}
nb_old = nb;
gb_->gather_idx_[buf_cnt++] = idx;
{
sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
- buf_cnt ^= buf_cnt;
+ buf_cnt = 0;
}
} // for en
if (buf_cnt)
//----------------------------------------------------------------------------
template<typename SV>
-void sparse_vector_scanner<SV>::reset_binding()
+void sparse_vector_scanner<SV>::reset_binding() BMNOEXCEPT
{
bound_sv_ = 0;
effective_str_max_ = 0;
template<typename SV>
int sparse_vector_scanner<SV>::compare(const SV& sv,
size_type idx,
- const value_type val)
+ const value_type val) BMNOEXCEPT
{
// TODO: implement sentinel elements cache (similar to compare_str())
return sv.compare(idx, val);
class reference
{
public:
- reference(rsc_sparse_vector<Val, SV>& csv, size_type idx) BMNOEXEPT
+ reference(rsc_sparse_vector<Val, SV>& csv, size_type idx) BMNOEXCEPT
: csv_(csv), idx_(idx)
{}
- operator value_type() const { return csv_.get(idx_); }
- bool operator==(const reference& ref) const
+ operator value_type() const BMNOEXCEPT { return csv_.get(idx_); }
+ bool operator==(const reference& ref) const BMNOEXCEPT
{ return bool(*this) == bool(ref); }
- bool is_null() const { return csv_.is_null(idx_); }
+ bool is_null() const BMNOEXCEPT { return csv_.is_null(idx_); }
private:
rsc_sparse_vector<Val, SV>& csv_;
size_type idx_;
};
+ /**
+ Const iterator to traverse the rsc sparse vector.
+
+ Implementation uses buffer for decoding so, competing changes
+ to the original vector may not match the iterator returned values.
+
+ This iterator keeps an operational buffer, memory footprint is not
+ negligable
+
+ @ingroup sv
+ */
+ class const_iterator
+ {
+ public:
+ friend class rsc_sparse_vector;
+
+#ifndef BM_NO_STL
+ typedef std::input_iterator_tag iterator_category;
+#endif
+ typedef rsc_sparse_vector<Val, SV> rsc_sparse_vector_type;
+ typedef rsc_sparse_vector_type* rsc_sparse_vector_type_ptr;
+ typedef typename rsc_sparse_vector_type::value_type value_type;
+ typedef typename rsc_sparse_vector_type::size_type size_type;
+ typedef typename rsc_sparse_vector_type::bvector_type bvector_type;
+ typedef typename bvector_type::allocator_type allocator_type;
+ typedef typename
+ bvector_type::allocator_type::allocator_pool_type allocator_pool_type;
+ typedef bm::byte_buffer<allocator_type> buffer_type;
+
+ typedef unsigned difference_type;
+ typedef unsigned* pointer;
+ typedef value_type& reference;
+
+ public:
+ const_iterator() BMNOEXCEPT;
+ const_iterator(const rsc_sparse_vector_type* csv) BMNOEXCEPT;
+ const_iterator(const rsc_sparse_vector_type* csv, size_type pos) BMNOEXCEPT;
+ const_iterator(const const_iterator& it) BMNOEXCEPT;
+
+ bool operator==(const const_iterator& it) const BMNOEXCEPT
+ { return (pos_ == it.pos_) && (csv_ == it.csv_); }
+ bool operator!=(const const_iterator& it) const BMNOEXCEPT
+ { return ! operator==(it); }
+ bool operator < (const const_iterator& it) const BMNOEXCEPT
+ { return pos_ < it.pos_; }
+ bool operator <= (const const_iterator& it) const BMNOEXCEPT
+ { return pos_ <= it.pos_; }
+ bool operator > (const const_iterator& it) const BMNOEXCEPT
+ { return pos_ > it.pos_; }
+ bool operator >= (const const_iterator& it) const BMNOEXCEPT
+ { return pos_ >= it.pos_; }
+
+ /// \brief Get current position (value)
+ value_type operator*() const { return this->value(); }
+
+
+ /// \brief Advance to the next available value
+ const_iterator& operator++() BMNOEXCEPT { this->advance(); return *this; }
+
+ /// \brief Advance to the next available value
+ const_iterator& operator++(int)
+ { const_iterator tmp(*this);this->advance(); return tmp; }
+
+
+ /// \brief Get current position (value)
+ value_type value() const;
+
+ /// \brief Get NULL status
+ bool is_null() const BMNOEXCEPT;
+
+ /// Returns true if iterator is at a valid position
+ bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
+
+ /// Invalidate current iterator
+ void invalidate() BMNOEXCEPT { pos_ = bm::id_max; }
+
+ /// Current position (index) in the vector
+ size_type pos() const BMNOEXCEPT{ return pos_; }
+
+ /// re-position to a specified position
+ void go_to(size_type pos) BMNOEXCEPT;
+
+ /// advance iterator forward by one
+ /// @return true if it is still valid
+ bool advance() BMNOEXCEPT;
+
+ void skip_zero_values() BMNOEXCEPT;
+ private:
+ enum buf_size_e
+ {
+ n_buf_size = 1024 * 8
+ };
+
+ private:
+ const rsc_sparse_vector_type* csv_; ///!< ptr to parent
+ size_type pos_; ///!< Position
+ mutable buffer_type vbuffer_; ///!< value buffer
+ mutable buffer_type tbuffer_; ///!< temp buffer
+ mutable value_type* buf_ptr_; ///!< position in the buffer
+ };
+
+
/**
Back insert iterator implements buffered insert, faster than generic
typedef void reference;
public:
- back_insert_iterator();
- back_insert_iterator(rsc_sparse_vector_type* csv);
+ back_insert_iterator() BMNOEXCEPT;
+ back_insert_iterator(rsc_sparse_vector_type* csv) BMNOEXCEPT;
back_insert_iterator& operator=(const back_insert_iterator& bi)
{
~back_insert_iterator();
/** push value to the vector */
- back_insert_iterator& operator=(value_type v) { this->add(v); return *this; }
+ back_insert_iterator& operator=(value_type v)
+ { this->add(v); return *this; }
/** noop */
back_insert_iterator& operator*() { return *this; }
/** noop */
void add(value_type v);
/** add NULL (no-value) to the container */
- void add_null();
+ void add_null() BMNOEXCEPT;
/** add a series of consequitve NULLs (no-value) to the container */
- void add_null(size_type count);
+ void add_null(size_type count) BMNOEXCEPT;
/** flush the accumulated buffer */
void flush();
///size_type add_value(value_type v);
typedef rsc_sparse_vector_type::sparse_vector_type sparse_vector_type;
- typedef typename sparse_vector_type::back_insert_iterator sparse_vector_bi;
+ typedef
+ typename sparse_vector_type::back_insert_iterator sparse_vector_bi;
private:
rsc_sparse_vector_type* csv_; ///!< pointer on the parent vector
sparse_vector_bi sv_bi_;
public:
// ------------------------------------------------------------
/*! @name Construction and assignment */
+
//@{
rsc_sparse_vector(bm::null_support null_able = bm::use_null,
allocation_policy_type ap = allocation_policy_type(),
size_type bv_max_size = bm::id_max,
const allocator_type& alloc = allocator_type());
+
+ /**
+ Contructor to pre-initialize the list of assigned (not NULL) elements.
+
+ If the list of not NULL elements is known upfront it can help to
+ pre-declare it, enable rank-select index and then use set function.
+ This scenario gives significant speed boost, comparing random assignment
+
+ @param bv_null - not NULL vector for the container
+ */
+ rsc_sparse_vector(const bvector_type& bv_null);
+
~rsc_sparse_vector();
/*! copy-ctor */
/*! copy assignmment operator */
- rsc_sparse_vector<Val,SV>& operator = (const rsc_sparse_vector<Val, SV>& csv)
+ rsc_sparse_vector<Val,SV>& operator=(const rsc_sparse_vector<Val, SV>& csv)
{
if (this != &csv)
{
}
return *this;
}
-
+
#ifndef BM_NO_CXX11
/*! move-ctor */
- rsc_sparse_vector(rsc_sparse_vector<Val,SV>&& csv) BMNOEXEPT;
+ rsc_sparse_vector(rsc_sparse_vector<Val,SV>&& csv) BMNOEXCEPT;
/*! move assignmment operator */
- rsc_sparse_vector<Val,SV>& operator=(rsc_sparse_vector<Val,SV>&& csv) BMNOEXEPT
+ rsc_sparse_vector<Val,SV>& operator=(rsc_sparse_vector<Val,SV>&& csv) BMNOEXCEPT
{
if (this != &csv)
{
/*! \brief return size of the vector
\return size of sparse vector
*/
- size_type size() const;
+ size_type size() const BMNOEXCEPT;
/*! \brief return true if vector is empty
\return true if empty
\param idx - element index
\return value of the element
*/
- value_type get(size_type idx) const;
+ value_type get(size_type idx) const BMNOEXCEPT;
/*!
\brief set specified element with bounds checking and automatic resize
*/
void set(size_type idx, value_type v);
+
+ /*!
+ \brief increment specified element by one
+ \param idx - element index
+ */
+ void inc(size_type idx);
+
+ /*!
+ \brief increment specified element by one
+ \param idx - element index
+ \param v - increment value
+ */
+ void inc(size_type idx, value_type v);
+
+ /*!
+ \brief increment specified element by one, element MUST be NOT NULL
+ Faster than just inc() if element is NULL - behavior is undefined
+ \param idx - element index
+ \param v - increment value
+ @sa inc
+ */
+ void inc_not_null(size_type idx, value_type v);
+
/*!
\brief set specified element to NULL
RSC vector actually erases element when it is set to NULL (expensive).
void set_null(size_type idx);
-
/** \brief test if specified element is NULL
\param idx - element index
\return true if it is NULL false if it was assigned or container
is not configured to support assignment flags
*/
- bool is_null(size_type idx) const;
+ bool is_null(size_type idx) const BMNOEXCEPT;
/**
\brief Get bit-vector of assigned values (or NULL)
*/
- const bvector_type* get_null_bvector() const;
+ const bvector_type* get_null_bvector() const BMNOEXCEPT;
/**
\brief find position of compressed element by its rank
\param rank - rank (virtual index in sparse vector)
\param idx - index (true position)
*/
- bool find_rank(size_type rank, size_type& idx) const;
+ bool find_rank(size_type rank, size_type& idx) const BMNOEXCEPT;
//@}
// ------------------------------------------------------------
/*! @name Export content to C-stype array */
///@{
-
+
+ /**
+ \brief C-style decode
+ \param arr - decode target array (must be properly sized)
+ \param idx_from - start address to decode
+ \param size - number of elements to decode
+ \param zero_mem - flag if array needs to beset to zeros first
+
+ @return actual decoded size
+ @sa decode_buf
+ */
size_type decode(value_type* arr,
size_type idx_from,
size_type size,
bool zero_mem = true) const;
+
+ /**
+ \brief C-style decode (variant with external memory)
+ Analog of decode, but requires two arrays.
+ Faster than decode in many cases.
+
+ \param arr - decode target array (must be properly sized)
+ \param arr_buf_tmp - decode temp bufer (must be same size of arr)
+ \param idx_from - start address to decode
+ \param size - number of elements to decode
+ \param zero_mem - flag if array needs to beset to zeros first
+
+ @return actual decoded size
+ @sa decode
+ */
+ size_type decode_buf(value_type* arr,
+ value_type* arr_buf_tmp,
+ size_type idx_from,
+ size_type size,
+ bool zero_mem = true) const BMNOEXCEPT;
+
///@}
\brief check if another vector has the same content
\return true, if it is the same
*/
- bool equal(const rsc_sparse_vector<Val, SV>& csv) const;
+ bool equal(const rsc_sparse_vector<Val, SV>& csv) const BMNOEXCEPT;
//@}
/*! @name Iterator access */
//@{
+ /** Provide const iterator access to container content */
+ const_iterator begin() const BMNOEXCEPT
+ { return const_iterator(this); }
+
+ /** Provide const iterator access to the end */
+ const_iterator end() const BMNOEXCEPT
+ { return const_iterator(this, bm::id_max); }
+
+ /** Get const_itertor re-positioned to specific element
+ @param idx - position in the sparse vector
+ */
+ const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
+ { return const_iterator(this, idx); }
+
back_insert_iterator get_back_inserter() { return back_insert_iterator(this); }
///@}
\param opt_mode - requested compression depth
\param stat - memory allocation statistics after optimization
*/
- void optimize(bm::word_t* temp_block = 0,
- typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
- statistics* stat = 0);
+ void optimize(
+ bm::word_t* temp_block = 0,
+ typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
+ statistics* stat = 0);
/*! \brief resize to zero, free memory
*/
- void clear() BMNOEXEPT;
+ void clear() BMNOEXCEPT;
/*!
@brief Calculates memory statistics.
@sa statistics
*/
- void calc_stat(struct rsc_sparse_vector<Val, SV>::statistics* st) const;
+ void calc_stat(
+ struct rsc_sparse_vector<Val, SV>::statistics* st) const BMNOEXCEPT;
///@}
void copy_range(const rsc_sparse_vector<Val, SV>& csv,
size_type left, size_type right);
+ /**
+ @brief merge two vectors (argument gets destroyed)
+ It is important that both vectors have the same NULL vectors
+ @param csv - [in,out] argumnet vector to merge
+ (works like move so arg should not be used after the merge)
+ */
+ void merge_not_null(rsc_sparse_vector<Val, SV>& csv);
+
///@}
// ------------------------------------------------------------
/*!
\brief returns true if prefix sum table is in sync with the vector
*/
- bool in_sync() const { return in_sync_; }
+ bool in_sync() const BMNOEXCEPT { return in_sync_; }
/*!
\brief Unsync the prefix sum table
*/
- void unsync() { in_sync_ = false; }
+ void unsync() BMNOEXCEPT { in_sync_ = false; }
///@}
// ------------------------------------------------------------
\brief get access to bit-plain, function checks and creates a plain
\return bit-vector for the bit plain
*/
- bvector_type_const_ptr get_plain(unsigned i) const { return sv_.get_plain(i); }
+ bvector_type_const_ptr get_plain(unsigned i) const BMNOEXCEPT
+ { return sv_.get_plain(i); }
- bvector_type_ptr get_plain(unsigned i) { return sv_.get_plain(i); }
+ bvector_type_ptr get_plain(unsigned i) BMNOEXCEPT
+ { return sv_.get_plain(i); }
/*!
Number of effective bit-plains in the value type
*/
- unsigned effective_plains() const { return sv_.effective_plains(); }
+ unsigned effective_plains() const BMNOEXCEPT
+ { return sv_.effective_plains(); }
/*!
\brief get total number of bit-plains in the vector
*/
- static unsigned plains() { return sparse_vector_type::plains(); }
+ static unsigned plains() BMNOEXCEPT
+ { return sparse_vector_type::plains(); }
/** Number of stored bit-plains (value plains + extra */
static unsigned stored_plains()
/*!
\brief access dense vector
*/
- const sparse_vector_type& get_sv() const { return sv_; }
+ const sparse_vector_type& get_sv() const BMNOEXCEPT { return sv_; }
/*!
\brief size of internal dense vector
*/
- size_type effective_size() const { return sv_.size(); }
+ size_type effective_size() const BMNOEXCEPT { return sv_.size(); }
/**
\brief Always 1 (non-matrix type)
*/
- size_type effective_vector_max() const { return 1; }
+ size_type effective_vector_max() const BMNOEXCEPT { return 1; }
/*!
get read-only access to inetrnal bit-matrix
*/
- const bmatrix_type& get_bmatrix() const { return sv_.get_bmatrix(); }
+ const bmatrix_type& get_bmatrix() const BMNOEXCEPT
+ { return sv_.get_bmatrix(); }
///@}
\return true if id is known and resolved successfully
*/
- bool resolve(size_type idx, size_type* idx_to) const;
+ bool resolve(size_type idx, size_type* idx_to) const BMNOEXCEPT;
bool resolve_range(size_type from, size_type to,
- size_type* idx_from, size_type* idx_to) const;
+ size_type* idx_from, size_type* idx_to) const BMNOEXCEPT;
void resize_internal(size_type sz) { sv_.resize_internal(sz); }
- size_type size_internal() const { return sv_.size(); }
+ size_type size_internal() const BMNOEXCEPT { return sv_.size(); }
- bool is_remap() const { return false; }
- size_t remap_size() const { return 0; }
- const unsigned char* get_remap_buffer() const { return 0; }
- unsigned char* init_remap_buffer() { return 0; }
- void set_remap() { }
+ bool is_remap() const BMNOEXCEPT { return false; }
+ size_t remap_size() const BMNOEXCEPT { return 0; }
+ const unsigned char* get_remap_buffer() const BMNOEXCEPT { return 0; }
+ unsigned char* init_remap_buffer() BMNOEXCEPT { return 0; }
+ void set_remap() BMNOEXCEPT { }
void push_back_no_check(size_type idx, value_type v);
private:
- void construct_bv_blocks();
- void free_bv_blocks();
+
+ /// Allocate memory for RS index
+ void construct_rs_index();
+ /// Free rs-index
+ void free_rs_index();
protected:
template<class SVect> friend class sparse_vector_scanner;
allocation_policy_type ap,
size_type bv_max_size,
const allocator_type& alloc)
-: sv_(null_able, ap, bv_max_size, alloc),
- in_sync_(false)
+: sv_(null_able, ap, bv_max_size, alloc), in_sync_(false)
{
BM_ASSERT(null_able == bm::use_null);
BM_ASSERT(int(sv_value_plains) == int(SV::sv_value_plains));
size_ = max_id_ = 0;
- construct_bv_blocks();
+ construct_rs_index();
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+rsc_sparse_vector<Val, SV>::rsc_sparse_vector(const bvector_type& bv_null)
+: sv_(bm::use_null), in_sync_(false)
+{
+ construct_rs_index();
+ bvector_type* bv = sv_.get_null_bvect();
+ BM_ASSERT(bv);
+ *bv = bv_null;
+
+ bool found = bv->find_reverse(max_id_);
+ if (found)
+ {
+ size_ = max_id_ + 1;
+ size_type sz = bv->count();
+ sv_.resize(sz);
+ }
+ else
+ {
+ BM_ASSERT(!bv->any());
+ size_ = max_id_ = 0;
+ }
}
//---------------------------------------------------------------------
template<class Val, class SV>
rsc_sparse_vector<Val, SV>::~rsc_sparse_vector()
{
- free_bv_blocks();
+ free_rs_index();
}
//---------------------------------------------------------------------
template<class Val, class SV>
rsc_sparse_vector<Val, SV>::rsc_sparse_vector(
const rsc_sparse_vector<Val, SV>& csv)
-: sv_(csv.sv_),
- size_(csv.size_),
- max_id_(csv.max_id_),
- in_sync_(csv.in_sync_)
+: sv_(csv.sv_), size_(csv.size_), max_id_(csv.max_id_), in_sync_(csv.in_sync_)
{
BM_ASSERT(int(sv_value_plains) == int(SV::sv_value_plains));
- construct_bv_blocks();
+ construct_rs_index();
if (in_sync_)
- {
bv_blocks_ptr_->copy_from(*(csv.bv_blocks_ptr_));
- }
}
//---------------------------------------------------------------------
template<class Val, class SV>
-rsc_sparse_vector<Val, SV>::rsc_sparse_vector(rsc_sparse_vector<Val,SV>&& csv) BMNOEXEPT
+rsc_sparse_vector<Val, SV>::rsc_sparse_vector(
+ rsc_sparse_vector<Val,SV>&& csv) BMNOEXCEPT
: sv_(bm::use_null),
size_(0),
max_id_(0), in_sync_(false)
template<class Val, class SV>
typename rsc_sparse_vector<Val, SV>::size_type
-rsc_sparse_vector<Val, SV>::size() const
+rsc_sparse_vector<Val, SV>::size() const BMNOEXCEPT
{
return size_;
}
size_type sv_idx = bv_null->count_range(0, idx);
bv_null->clear_bit_no_check(idx);
sv_.erase(--sv_idx);
+ in_sync_ = false;
}
}
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::inc(size_type idx)
+{
+ bvector_type* bv_null = sv_.get_null_bvect();
+ BM_ASSERT(bv_null);
+
+ size_type sv_idx;
+ bool found = bv_null->test(idx);
+
+ sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
+ : bv_null->count_range(0, idx); // TODO: make test'n'count
+
+ if (found)
+ {
+ sv_.inc_no_null(--sv_idx);
+ }
+ else
+ {
+ sv_.insert_value_no_null(sv_idx, 1);
+ bv_null->set_bit_no_check(idx);
+
+ if (idx > max_id_)
+ {
+ max_id_ = idx;
+ size_ = max_id_ + 1;
+ }
+ in_sync_ = false;
+ }
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::inc(size_type idx, value_type v)
+{
+ bvector_type* bv_null = sv_.get_null_bvect();
+ BM_ASSERT(bv_null);
+
+ size_type sv_idx;
+ bool found = bv_null->test(idx);
+
+ sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
+ : bv_null->count_range(0, idx); // TODO: make test'n'count
+
+ if (found)
+ {
+ sv_.inc_no_null(--sv_idx, v);
+ }
+ else
+ {
+ sv_.insert_value_no_null(sv_idx, v);
+ bv_null->set_bit_no_check(idx);
+
+ if (idx > max_id_)
+ {
+ max_id_ = idx;
+ size_ = max_id_ + 1;
+ }
+ in_sync_ = false;
+ }
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::inc_not_null(size_type idx, value_type v)
+{
+ bvector_type* bv_null = sv_.get_null_bvect();
+ BM_ASSERT(bv_null->test(idx)); // idx must be NOT NULL
+
+ size_type sv_idx;
+ sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
+ : bv_null->count_range(0, idx); // TODO: make test'n'count
+ --sv_idx;
+ if (v == 1)
+ sv_.inc_no_null(sv_idx);
+ else
+ sv_.inc_no_null(sv_idx, v);
+}
+
+
//---------------------------------------------------------------------
template<class Val, class SV>
{
bvector_type* bv_null = sv_.get_null_bvect();
BM_ASSERT(bv_null);
-
+
+ size_type sv_idx;
bool found = bv_null->test(idx);
- size_type sv_idx = bv_null->count_range(0, idx); // TODO: make test'n'count
-// size_type sv_idx;
-// bool found = resolve(idx, &sv_idx);
+
+ sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
+ : bv_null->count_range(0, idx); // TODO: make test'n'count
if (found)
{
- //sv_.set(--sv_idx, v);
sv_.set_value_no_null(--sv_idx, v);
}
else
template<class Val, class SV>
bool rsc_sparse_vector<Val, SV>::equal(
- const rsc_sparse_vector<Val, SV>& csv) const
+ const rsc_sparse_vector<Val, SV>& csv) const BMNOEXCEPT
{
if (this == &csv)
return true;
template<class Val, class SV>
void rsc_sparse_vector<Val, SV>::load_from(
- const sparse_vector_type& sv_src)
+ const sparse_vector_type& sv_src)
{
max_id_ = size_ = 0;
//---------------------------------------------------------------------
template<class Val, class SV>
-bool rsc_sparse_vector<Val, SV>::resolve(size_type idx, size_type* idx_to) const
+bool rsc_sparse_vector<Val, SV>::resolve(size_type idx,
+ size_type* idx_to) const BMNOEXCEPT
{
BM_ASSERT(idx_to);
-
const bvector_type* bv_null = sv_.get_null_bvector();
if (in_sync_)
{
else // slow access
{
bool found = bv_null->test(idx);
- if (!found)
- {
- *idx_to = 0;
- }
- else
- {
- *idx_to = bv_null->count_range(0, idx);
- }
+ *idx_to = found ? bv_null->count_range(0, idx) : 0;
}
return bool(*idx_to);
}
+
//---------------------------------------------------------------------
template<class Val, class SV>
bool rsc_sparse_vector<Val, SV>::resolve_range(
size_type from, size_type to,
- size_type* idx_from, size_type* idx_to) const
+ size_type* idx_from, size_type* idx_to) const BMNOEXCEPT
{
BM_ASSERT(idx_to && idx_from);
const bvector_type* bv_null = sv_.get_null_bvector();
copy_sz = bv_null->count_range(from, to);
if (!copy_sz)
return false;
+
if (in_sync_)
- sv_left = bv_null->count_range(0, from, *bv_blocks_ptr_);
+ sv_left = bv_null->rank_corrected(from, *bv_blocks_ptr_);
else
+ {
sv_left = bv_null->count_range(0, from);
- bool tl = bv_null->test(from); // TODO: add count and test
- sv_left -= tl; // rank correction
+ bool tl = bv_null->test(from); // TODO: add count and test
+ sv_left -= tl; // rank correction
+ }
*idx_from = sv_left; *idx_to = sv_left + copy_sz - 1;
return true;
template<class Val, class SV>
typename rsc_sparse_vector<Val, SV>::value_type
-rsc_sparse_vector<Val, SV>::get(size_type idx) const
+rsc_sparse_vector<Val, SV>::get(size_type idx) const BMNOEXCEPT
{
size_type sv_idx;
bool found = resolve(idx, &sv_idx);
//---------------------------------------------------------------------
template<class Val, class SV>
-bool rsc_sparse_vector<Val, SV>::is_null(size_type idx) const
+bool rsc_sparse_vector<Val, SV>::is_null(size_type idx) const BMNOEXCEPT
{
const bvector_type* bv_null = sv_.get_null_bvector();
BM_ASSERT(bv_null);
//---------------------------------------------------------------------
template<class Val, class SV>
-void rsc_sparse_vector<Val, SV>::clear() BMNOEXEPT
+void rsc_sparse_vector<Val, SV>::clear() BMNOEXCEPT
{
sv_.clear();
in_sync_ = false; max_id_ = size_ = 0;
template<class Val, class SV>
void rsc_sparse_vector<Val, SV>::calc_stat(
- struct rsc_sparse_vector<Val, SV>::statistics* st) const
+ struct rsc_sparse_vector<Val, SV>::statistics* st) const BMNOEXCEPT
{
BM_ASSERT(st);
sv_.calc_stat((typename sparse_vector_type::statistics*)st);
template<class Val, class SV>
const typename rsc_sparse_vector<Val, SV>::bvector_type*
-rsc_sparse_vector<Val, SV>::get_null_bvector() const
+rsc_sparse_vector<Val, SV>::get_null_bvector() const BMNOEXCEPT
{
return sv_.get_null_bvector();
}
template<class Val, class SV>
bool
-rsc_sparse_vector<Val, SV>::find_rank(size_type rank, size_type& idx) const
+rsc_sparse_vector<Val, SV>::find_rank(size_type rank,
+ size_type& idx) const BMNOEXCEPT
{
BM_ASSERT(rank);
bool b;
rsc_sparse_vector<Val, SV>::decode(value_type* arr,
size_type idx_from,
size_type size,
- bool /*zero_mem*/) const
+ bool zero_mem) const
{
if (size == 0)
return 0;
if ((bm::id_max - size) <= idx_from)
size = bm::id_max - idx_from;
+ if ((idx_from + size) > this->size())
+ size = this->size() - idx_from;
const bvector_type* bv_null = sv_.get_null_bvector();
+ size_type rank = bv_null->rank_corrected(idx_from, *bv_blocks_ptr_);
+
+ BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
- size_type rank = bv_null->count_to(idx_from, *bv_blocks_ptr_);
- bool b = bv_null->test(idx_from);
-
bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from);
- size_type i = *en_i;
- if (idx_from + size <= i) // empty space (all zeros)
+ BM_ASSERT(en_i.valid());
+
+ if (zero_mem)
+ ::memset(arr, 0, sizeof(value_type)*size);
+
+ sparse_vector_const_iterator it = sv_.get_const_iterator(rank);
+ size_type i = 0;
+ if (it.valid())
{
+ do
+ {
+ size_type en_idx = *en_i;
+ size_type delta = en_idx - idx_from;
+ idx_from += delta;
+ i += delta;
+ if (i >= size)
+ return size;
+ arr[i++] = it.value();
+ if (!en_i.advance())
+ break;
+ if (!it.advance())
+ break;
+ ++idx_from;
+ } while (i < size);
+ }
+ return i;
+}
+
+
+template<class Val, class SV>
+typename rsc_sparse_vector<Val, SV>::size_type
+rsc_sparse_vector<Val, SV>::decode_buf(value_type* arr,
+ value_type* arr_buf_tmp,
+ size_type idx_from,
+ size_type size,
+ bool zero_mem) const BMNOEXCEPT
+{
+ if (!size || (idx_from >= this->size()))
+ return 0;
+
+ BM_ASSERT(arr && arr_buf_tmp);
+ BM_ASSERT(arr != arr_buf_tmp);
+ BM_ASSERT(in_sync_); // call sync() before decoding
+ BM_ASSERT(bv_blocks_ptr_);
+
+ if ((bm::id_max - size) <= idx_from)
+ size = bm::id_max - idx_from;
+ if ((idx_from + size) > this->size())
+ size = this->size() - idx_from;
+
+ if (zero_mem)
::memset(arr, 0, sizeof(value_type)*size);
+
+ const bvector_type* bv_null = sv_.get_null_bvector();
+ size_type rank = bv_null->rank_corrected(idx_from, *bv_blocks_ptr_);
+
+ BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
+
+ bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from);
+ if (!en_i.valid())
return size;
- }
- rank -= b;
- sparse_vector_const_iterator it = sv_.get_const_iterator(rank);
- i = 0;
- while (it.valid())
+
+ size_type i = en_i.value();
+ if (idx_from + size <= i) // empty space (all zeros)
+ return size;
+
+ size_type extract_cnt =
+ bv_null->count_range(idx_from, idx_from + size - 1, *bv_blocks_ptr_);
+
+ BM_ASSERT(extract_cnt <= this->size());
+ auto ex_sz = sv_.decode(arr_buf_tmp, rank, extract_cnt, true);
+ BM_ASSERT(ex_sz == extract_cnt); (void) ex_sz;
+
+ for (i = 0; i < extract_cnt; ++i)
{
- if (!en_i.valid())
- break;
+ BM_ASSERT(en_i.valid());
size_type en_idx = *en_i;
- while (idx_from < en_idx) // zero the empty prefix
- {
- arr[i] ^= arr[i];
- ++i; ++idx_from;
- if (i == size)
- return i;
- }
- BM_ASSERT(idx_from == en_idx);
- arr[i] = *it;
- ++i; ++idx_from;
- if (i == size)
- return i;
-
+ arr[en_idx-idx_from] = arr_buf_tmp[i];
en_i.advance();
- it.advance();
- } // while
-
- return i;
+ } // for i
+
+ return size;
}
+
//---------------------------------------------------------------------
template<class Val, class SV>
-void rsc_sparse_vector<Val, SV>::construct_bv_blocks()
+void rsc_sparse_vector<Val, SV>::construct_rs_index()
{
if (bv_blocks_ptr_)
return;
//---------------------------------------------------------------------
template<class Val, class SV>
-void rsc_sparse_vector<Val, SV>::free_bv_blocks()
+void rsc_sparse_vector<Val, SV>::free_rs_index()
{
if (bv_blocks_ptr_)
{
}
}
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::copy_range(
+ const rsc_sparse_vector<Val, SV>& csv,
+ size_type left, size_type right)
+{
+ if (left > right)
+ bm::xor_swap(left, right);
+
+ if (left >= csv.size())
+ return;
+
+ size_ = csv.size_; max_id_ = csv.max_id_;
+ in_sync_ = false;
+
+ const bvector_type* arg_bv_null = csv.sv_.get_null_bvector();
+ size_type sv_left, sv_right;
+ bool range_valid = csv.resolve_range(left, right, &sv_left, &sv_right);
+ if (!range_valid)
+ {
+ sv_.clear(); sv_.resize(size_);
+ bvector_type* bv_null = sv_.get_null_bvect();
+ bv_null->copy_range(*arg_bv_null, 0, right);
+ return;
+ }
+ bvector_type* bv_null = sv_.get_null_bvect();
+ bv_null->copy_range(*arg_bv_null, 0, right); // not NULL vector gets a full copy
+ sv_.copy_range(csv.sv_, sv_left, sv_right, bm::no_null); // don't copy NULL
+}
+
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::merge_not_null(rsc_sparse_vector<Val, SV>& csv)
+{
+ // MUST have the same NULL to work
+ BM_ASSERT(sv_.get_null_bvector()->equal(*csv.sv_.get_null_bvector()));
+
+ sv_.merge(csv.sv_);
+}
+
+
//---------------------------------------------------------------------
//
//---------------------------------------------------------------------
template<class Val, class SV>
-rsc_sparse_vector<Val, SV>::back_insert_iterator::back_insert_iterator()
+rsc_sparse_vector<Val, SV>::back_insert_iterator::back_insert_iterator() BMNOEXCEPT
: csv_(0)
{}
template<class Val, class SV>
rsc_sparse_vector<Val, SV>::back_insert_iterator::back_insert_iterator
- (rsc_sparse_vector_type* csv)
+ (rsc_sparse_vector_type* csv) BMNOEXCEPT
{
csv_ = csv;
sv_bi_ = csv->sv_.get_back_inserter();
//---------------------------------------------------------------------
template<class Val, class SV>
-void rsc_sparse_vector<Val, SV>::back_insert_iterator::add_null()
+void rsc_sparse_vector<Val, SV>::back_insert_iterator::add_null() BMNOEXCEPT
{
BM_ASSERT(csv_);
csv_->max_id_++;
template<class Val, class SV>
void rsc_sparse_vector<Val, SV>::back_insert_iterator::add_null(
- rsc_sparse_vector<Val, SV>::back_insert_iterator::size_type count)
+ rsc_sparse_vector<Val, SV>::back_insert_iterator::size_type count) BMNOEXCEPT
{
BM_ASSERT(csv_);
csv_->max_id_+=count;
csv_->in_sync_ = false;
}
+//---------------------------------------------------------------------
+//
+//---------------------------------------------------------------------
+
+template<class Val, class BV>
+rsc_sparse_vector<Val, BV>::const_iterator::const_iterator() BMNOEXCEPT
+: csv_(0), pos_(bm::id_max), buf_ptr_(0)
+{}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+rsc_sparse_vector<Val, SV>::const_iterator::const_iterator(
+ const typename rsc_sparse_vector<Val, SV>::const_iterator& it) BMNOEXCEPT
+: csv_(it.csv_), pos_(it.pos_), buf_ptr_(0)
+{}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+rsc_sparse_vector<Val, SV>::const_iterator::const_iterator(
+ const typename rsc_sparse_vector<Val, SV>::const_iterator::rsc_sparse_vector_type* csv
+ ) BMNOEXCEPT
+: csv_(csv), buf_ptr_(0)
+{
+ BM_ASSERT(csv_);
+ pos_ = csv_->empty() ? bm::id_max : 0u;
+}
+
//---------------------------------------------------------------------
template<class Val, class SV>
-void rsc_sparse_vector<Val, SV>::copy_range(
- const rsc_sparse_vector<Val, SV>& csv,
- size_type left, size_type right)
+rsc_sparse_vector<Val, SV>::const_iterator::const_iterator(
+ const typename rsc_sparse_vector<Val, SV>::const_iterator::rsc_sparse_vector_type* csv,
+ typename rsc_sparse_vector<Val, SV>::size_type pos) BMNOEXCEPT
+: csv_(csv), buf_ptr_(0)
{
- if (left > right)
- bm::xor_swap(left, right);
+ BM_ASSERT(csv_);
+ this->go_to(pos);
+}
- if (left >= csv.size())
- return;
-
- size_ = csv.size_; max_id_ = csv.max_id_;
- in_sync_ = false;
+//---------------------------------------------------------------------
- const bvector_type* arg_bv_null = csv.sv_.get_null_bvector();
- size_type sv_left, sv_right;
- bool range_valid = csv.resolve_range(left, right, &sv_left, &sv_right);
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::const_iterator::go_to(size_type pos) BMNOEXCEPT
+{
+ pos_ = (!csv_ || pos >= csv_->size()) ? bm::id_max : pos;
+ buf_ptr_ = 0;
+}
- if (!range_valid)
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+bool rsc_sparse_vector<Val, SV>::const_iterator::advance() BMNOEXCEPT
+{
+ if (pos_ == bm::id_max) // nothing to do, we are at the end
+ return false;
+ ++pos_;
+ if (pos_ >= csv_->size())
{
- sv_.clear();
- sv_.resize(size_);
- bvector_type* bv_null = sv_.get_null_bvect();
- bv_null->copy_range(*arg_bv_null, 0, right);
- return;
+ this->invalidate();
+ return false;
+ }
+ if (buf_ptr_)
+ {
+ ++buf_ptr_;
+ if (buf_ptr_ - ((value_type*)vbuffer_.data()) >= n_buf_size)
+ buf_ptr_ = 0;
+ }
+ return true;
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+typename rsc_sparse_vector<Val, SV>::const_iterator::value_type
+rsc_sparse_vector<Val, SV>::const_iterator::value() const
+{
+ BM_ASSERT(this->valid());
+ value_type v;
+
+ if (!buf_ptr_)
+ {
+ vbuffer_.reserve(n_buf_size * sizeof(value_type));
+ tbuffer_.reserve(n_buf_size * sizeof(value_type));
+ buf_ptr_ = (value_type*)(vbuffer_.data());
+ value_type* tmp_buf_ptr = (value_type*) (tbuffer_.data());
+
+ csv_->decode_buf(buf_ptr_, tmp_buf_ptr, pos_, n_buf_size, true);
+ }
+ v = *buf_ptr_;
+ return v;
+}
+
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+void rsc_sparse_vector<Val, SV>::const_iterator::skip_zero_values() BMNOEXCEPT
+{
+ value_type v = value();
+ if (buf_ptr_)
+ {
+ v = *buf_ptr_;
+ value_type* buf_end = ((value_type*)vbuffer_.data()) + n_buf_size;
+ while(!v)
+ {
+ ++pos_;
+ if (++buf_ptr_ < buf_end)
+ v = *buf_ptr_;
+ else
+ break;
+ }
+ if (pos_ >= csv_->size())
+ {
+ pos_ = bm::id_max;
+ return;
+ }
+ if (buf_ptr_ >= buf_end)
+ buf_ptr_ = 0;
}
- bvector_type* bv_null = sv_.get_null_bvect();
- bv_null->copy_range(*arg_bv_null, 0, right); // not NULL vector gets a full copy
- sv_.copy_range(csv.sv_, sv_left, sv_right, bm::no_null); // don't copy NULL
}
+//---------------------------------------------------------------------
+
+template<class Val, class SV>
+bool rsc_sparse_vector<Val, SV>::const_iterator::is_null() const BMNOEXCEPT
+{
+ return csv_->is_null(pos_);
+}
+
+
+//---------------------------------------------------------------------
+
+
} // namespace bm
BM_ASSERT(h1 == 'B' && (h2 == 'M' || h2 == 'C'));
- if (h1 != 'B' && (h2 != 'M' || h2 != 'C')) // no magic header?
+ bool sig2_ok = (h2 == 'M' || h2 == 'C');
+ if (h1 != 'B' || !sig2_ok) //&& (h2 != 'M' || h2 != 'C')) // no magic header?
raise_invalid_header();
unsigned char bv_bo = dec.get_8(); (void) bv_bo;
/*!
\brief Move content from the argument address resolver
*/
- void move_from(bvps_addr_resolver& addr_res) BMNOEXEPT;
+ void move_from(bvps_addr_resolver& addr_res) BMNOEXCEPT;
/*!
\brief Resolve id to integer id (address)
\return true if id is known and resolved successfully
*/
- bool resolve(size_type id_from, size_type* id_to) const;
+ bool resolve(size_type id_from, size_type* id_to) const BMNOEXCEPT;
/*!
\brief Resolve id to integer id (address) without sync check
\return true if id is known and resolved successfully
*/
- bool get(size_type id_from, size_type* id_to) const;
+ bool get(size_type id_from, size_type* id_to) const BMNOEXCEPT;
/*!
\brief Set id (bit) to address resolver
/*!
\brief equality comparison
*/
- bool equal(const bvps_addr_resolver& addr_res) const;
+ bool equal(const bvps_addr_resolver& addr_res) const BMNOEXCEPT;
protected:
void construct_rs_index();
template<class BV>
-void bvps_addr_resolver<BV>::move_from(bvps_addr_resolver& addr_res) BMNOEXEPT
+void bvps_addr_resolver<BV>::move_from(bvps_addr_resolver& addr_res) BMNOEXCEPT
{
if (this != &addr_res)
{
//---------------------------------------------------------------------
template<class BV>
-bool bvps_addr_resolver<BV>::resolve(size_type id_from, size_type* id_to) const
+bool bvps_addr_resolver<BV>::resolve(size_type id_from,
+ size_type* id_to) const BMNOEXCEPT
{
BM_ASSERT(id_to);
if (in_sync_)
//---------------------------------------------------------------------
template<class BV>
-bool bvps_addr_resolver<BV>::get(size_type id_from, size_type* id_to) const
+bool bvps_addr_resolver<BV>::get(size_type id_from,
+ size_type* id_to) const BMNOEXCEPT
{
BM_ASSERT(id_to);
BM_ASSERT(in_sync_);
//---------------------------------------------------------------------
template<class BV>
-bool bvps_addr_resolver<BV>::equal(const bvps_addr_resolver& addr_res) const
+bool bvps_addr_resolver<BV>::equal(
+ const bvps_addr_resolver& addr_res) const BMNOEXCEPT
{
- int cmp = addr_bv_.compare(addr_res.addr_bv_);
- return (cmp == 0);
+ return addr_bv_.equal(addr_res.addr_bv_);
}
//---------------------------------------------------------------------
}
return size;
}
+
+/**
+ Hybrid binary search, starts as binary, then switches to linear scan
+
+ \param buf - GAP buffer pointer.
+ \param pos - index of the element.
+ \param is_set - output. GAP value (0 or 1).
+ \return GAP index.
+
+ @ingroup SSE2
+*/
+inline
+unsigned sse2_gap_bfind(const unsigned short* BMRESTRICT buf,
+ unsigned pos, unsigned* BMRESTRICT is_set)
+{
+ unsigned start = 1;
+ unsigned end = 1 + ((*buf) >> 3);
+ unsigned dsize = end - start;
+
+ if (dsize < 17)
+ {
+ start = bm::sse2_gap_find(buf+1, (bm::gap_word_t)pos, dsize);
+ *is_set = ((*buf) & 1) ^ (start & 1);
+ BM_ASSERT(buf[start+1] >= pos);
+ BM_ASSERT(buf[start] < pos || (start==0));
+
+ return start+1;
+ }
+ unsigned arr_end = end;
+ while (start != end)
+ {
+ unsigned curr = (start + end) >> 1;
+ if (buf[curr] < pos)
+ start = curr + 1;
+ else
+ end = curr;
+
+ unsigned size = end - start;
+ if (size < 16)
+ {
+ size += (end != arr_end);
+ unsigned idx =
+ bm::sse2_gap_find(buf + start, (bm::gap_word_t)pos, size);
+ start += idx;
+
+ BM_ASSERT(buf[start] >= pos);
+ BM_ASSERT(buf[start - 1] < pos || (start == 1));
+ break;
+ }
+ }
+
+ *is_set = ((*buf) & 1) ^ ((start-1) & 1);
+ return start;
+}
+
+/**
+ Hybrid binary search, starts as binary, then switches to scan
+ @ingroup SSE2
+*/
+inline
+unsigned sse2_gap_test(const unsigned short* BMRESTRICT buf, unsigned pos)
+{
+ unsigned is_set;
+ bm::sse2_gap_bfind(buf, pos, &is_set);
+ return is_set;
+}
+
+
#ifdef __GNUG__
#pragma GCC diagnostic pop
#endif
#define VECT_SET_BLOCK(dst, value) \
sse2_set_block((__m128i*) dst, value)
+#define VECT_GAP_BFIND(buf, pos, is_set) \
+ sse2_gap_bfind(buf, pos, is_set)
} // namespace
return true;
}
+/*!
+ @brief check if SSE wave is all oxFFFF...FFF
+ @ingroup SSE4
+*/
+BMFORCEINLINE
+bool sse42_test_all_one_wave(const void* ptr)
+{
+ return _mm_test_all_ones(_mm_loadu_si128((__m128i*)ptr));
+}
+
+
/*!
@brief check if wave of pointers is all NULL
@ingroup SSE4
#endif
/*!
- SSE4.2 check for one to two (variable len) 128 bit SSE lines for gap search results (8 elements)
+ SSE4.2 check for one to two (variable len) 128 bit SSE lines
+ for gap search results (8 elements)
@ingroup SSE4
\internal
*/
inline
-unsigned sse4_gap_find(const bm::gap_word_t* BMRESTRICT pbuf, const bm::gap_word_t pos, const unsigned size)
+unsigned sse4_gap_find(const bm::gap_word_t* BMRESTRICT pbuf,
+ const bm::gap_word_t pos, const unsigned size)
{
BM_ASSERT(size <= 16);
BM_ASSERT(size);
return size - bc;
}
+/**
+ Hybrid binary search, starts as binary, then switches to linear scan
+
+ \param buf - GAP buffer pointer.
+ \param pos - index of the element.
+ \param is_set - output. GAP value (0 or 1).
+ \return GAP index.
+
+ @ingroup SSE4
+*/
+inline
+unsigned sse42_gap_bfind(const unsigned short* BMRESTRICT buf,
+ unsigned pos, unsigned* BMRESTRICT is_set)
+{
+ unsigned start = 1;
+ unsigned end = 1 + ((*buf) >> 3);
+ unsigned dsize = end - start;
+
+ if (dsize < 17)
+ {
+ start = bm::sse4_gap_find(buf+1, (bm::gap_word_t)pos, dsize);
+ *is_set = ((*buf) & 1) ^ (start & 1);
+ BM_ASSERT(buf[start+1] >= pos);
+ BM_ASSERT(buf[start] < pos || (start==0));
+
+ return start+1;
+ }
+ unsigned arr_end = end;
+ while (start != end)
+ {
+ unsigned curr = (start + end) >> 1;
+ if (buf[curr] < pos)
+ start = curr + 1;
+ else
+ end = curr;
+
+ unsigned size = end - start;
+ if (size < 16)
+ {
+ size += (end != arr_end);
+ unsigned idx =
+ bm::sse4_gap_find(buf + start, (bm::gap_word_t)pos, size);
+ start += idx;
+
+ BM_ASSERT(buf[start] >= pos);
+ BM_ASSERT(buf[start - 1] < pos || (start == 1));
+ break;
+ }
+ }
+
+ *is_set = ((*buf) & 1) ^ ((start-1) & 1);
+ return start;
+}
+
+/**
+ Hybrid binary search, starts as binary, then switches to scan
+ @ingroup SSE4
+*/
+inline
+unsigned sse42_gap_test(const unsigned short* BMRESTRICT buf, unsigned pos)
+{
+ unsigned is_set;
+ bm::sse42_gap_bfind(buf, pos, &is_set);
+ return is_set;
+}
+
+
+
/**
Experimental (test) function to do SIMD vector search (lower bound)
in sorted, growing array
#define VECT_BIT_BLOCK_XOR(t, src, src_xor, d) \
sse42_bit_block_xor(t, src, src_xor, d)
+#define VECT_GAP_BFIND(buf, pos, is_set) \
+ sse42_gap_bfind(buf, pos, is_set)
#ifdef __GNUG__
#pragma GCC diagnostic pop
inline
void sse2_invert_block(__m128i* dst)
{
- //__m128i mZ = _mm_setzero_si128();
- //__m128i maskF = _mm_cmpeq_epi8(mZ, mZ); // 0xFF..
-
__m128i maskF = _mm_set1_epi32(~0u);
__m128i* BMRESTRICT dst_end =
(__m128i*)((bm::word_t*)(dst) + bm::set_block_size);
{
public:
const_reference(const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv,
- size_type idx) BMNOEXEPT
+ size_type idx) BMNOEXCEPT
: str_sv_(str_sv), idx_(idx)
{}
- operator const value_type*() const
+ operator const value_type*() const BMNOEXCEPT
{
str_sv_.get(idx_, buf_, MAX_STR_SIZE);
return &(buf_[0]);
}
- bool operator==(const const_reference& ref) const
+ bool operator==(const const_reference& ref) const BMNOEXCEPT
{ return bool(*this) == bool(ref); }
- bool is_null() const { return str_sv_.is_null(idx_); }
+ bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
private:
const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv_;
size_type idx_;
{
public:
reference(str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv,
- size_type idx) BMNOEXEPT
+ size_type idx) BMNOEXCEPT
: str_sv_(str_sv), idx_(idx)
{}
- operator const value_type*() const
+ operator const value_type*() const BMNOEXCEPT
{
str_sv_.get(idx_, buf_, MAX_STR_SIZE);
return &(buf_[0]);
str_sv_.set(idx_, str);
return *this;
}
- bool operator==(const reference& ref) const
+ bool operator==(const reference& ref) const BMNOEXCEPT
{ return bool(*this) == bool(ref); }
- bool is_null() const { return str_sv_.is_null(idx_); }
+ bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
private:
str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv_;
size_type idx_;
typedef CharType* pointer;
typedef CharType*& reference;
public:
- const_iterator();
- const_iterator(const str_sparse_vector_type* sv);
- const_iterator(const str_sparse_vector_type* sv, size_type pos);
- const_iterator(const const_iterator& it);
+ const_iterator() BMNOEXCEPT;
+ const_iterator(const str_sparse_vector_type* sv) BMNOEXCEPT;
+ const_iterator(const str_sparse_vector_type* sv, size_type pos) BMNOEXCEPT;
+ const_iterator(const const_iterator& it) BMNOEXCEPT;
- bool operator==(const const_iterator& it) const
+ bool operator==(const const_iterator& it) const BMNOEXCEPT
{ return (pos_ == it.pos_) && (sv_ == it.sv_); }
- bool operator!=(const const_iterator& it) const
+ bool operator!=(const const_iterator& it) const BMNOEXCEPT
{ return ! operator==(it); }
- bool operator < (const const_iterator& it) const
+ bool operator < (const const_iterator& it) const BMNOEXCEPT
{ return pos_ < it.pos_; }
- bool operator <= (const const_iterator& it) const
+ bool operator <= (const const_iterator& it) const BMNOEXCEPT
{ return pos_ <= it.pos_; }
- bool operator > (const const_iterator& it) const
+ bool operator > (const const_iterator& it) const BMNOEXCEPT
{ return pos_ > it.pos_; }
- bool operator >= (const const_iterator& it) const
+ bool operator >= (const const_iterator& it) const BMNOEXCEPT
{ return pos_ >= it.pos_; }
/// \brief Get current position (value)
- const value_type* operator*() const { return this->value(); }
+ const value_type* operator*() const BMNOEXCEPT { return this->value(); }
/// \brief Advance to the next available value
- const_iterator& operator++() { this->advance(); return *this; }
+ const_iterator& operator++() BMNOEXCEPT
+ { this->advance(); return *this; }
/// \brief Advance to the next available value
- const_iterator& operator++(int)
+ const_iterator& operator++(int) BMNOEXCEPT
{ const_iterator tmp(*this);this->advance(); return tmp; }
/// \brief Get current position (value)
- const value_type* value() const;
+ const value_type* value() const BMNOEXCEPT;
/// \brief Get NULL status
- bool is_null() const { return sv_->is_null(this->pos_); }
+ bool is_null() const BMNOEXCEPT { return sv_->is_null(this->pos_); }
/// Returns true if iterator is at a valid position
- bool valid() const { return pos_ != bm::id_max; }
+ bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
/// Invalidate current iterator
- void invalidate() { pos_ = bm::id_max; }
+ void invalidate() BMNOEXCEPT { pos_ = bm::id_max; }
/// Current position (index) in the vector
- size_type pos() const { return pos_; }
+ size_type pos() const BMNOEXCEPT { return pos_; }
/// re-position to a specified position
- void go_to(size_type pos);
+ void go_to(size_type pos) BMNOEXCEPT;
/// advance iterator forward by one
- void advance();
+ void advance() BMNOEXCEPT;
protected:
typedef bm::heap_matrix<CharType,
typedef void reference;
public:
- back_insert_iterator();
- back_insert_iterator(str_sparse_vector_type* sv);
- back_insert_iterator(const back_insert_iterator& bi);
+ back_insert_iterator() BMNOEXCEPT;
+ back_insert_iterator(str_sparse_vector_type* sv) BMNOEXCEPT;
+ back_insert_iterator(const back_insert_iterator& bi) BMNOEXCEPT;
back_insert_iterator& operator=(const back_insert_iterator& bi)
{
void add_null(size_type count);
/** return true if insertion buffer is empty */
- bool empty() const;
+ bool empty() const BMNOEXCEPT;
/** flush the accumulated buffer */
void flush();
}
#ifndef BM_NO_CXX11
/*! move-ctor */
- str_sparse_vector(str_sparse_vector<CharType, BV, MAX_STR_SIZE>&& str_sv) BMNOEXEPT
+ str_sparse_vector(str_sparse_vector<CharType, BV, MAX_STR_SIZE>&& str_sv) BMNOEXCEPT
{
parent_type::swap(str_sv);
remap_flags_ = str_sv.remap_flags_;
/*! move assignmment operator */
str_sparse_vector<CharType, BV, MAX_STR_SIZE>& operator =
- (str_sparse_vector<CharType, BV, MAX_STR_SIZE>&& str_sv) BMNOEXEPT
+ (str_sparse_vector<CharType, BV, MAX_STR_SIZE>&& str_sv) BMNOEXCEPT
{
if (this != &str_sv)
{
@return string length
*/
- size_type get(size_type idx, value_type* str, size_type buf_size) const;
+ size_type get(size_type idx,
+ value_type* str, size_type buf_size) const BMNOEXCEPT;
/*!
\brief set specified element with bounds checking and automatic resize
}
/*! Swap content */
- void swap(str_sparse_vector& str_sv) BMNOEXEPT;
+ void swap(str_sparse_vector& str_sv) BMNOEXCEPT;
///@}
\return 0 - equal, < 0 - vect[i] < str, >0 otherwise
*/
- int compare(size_type idx, const value_type* str) const;
+ int compare(size_type idx, const value_type* str) const BMNOEXCEPT;
/**
\brief Find size of common prefix between two vector elements in octets
\return size of common prefix
*/
- unsigned common_prefix_length(size_type idx1, size_type idx2) const;
+ unsigned common_prefix_length(size_type idx1, size_type idx2) const BMNOEXCEPT;
///@}
///@{
/*! \brief resize to zero, free memory */
- void clear() BMNOEXEPT;
+ void clear() BMNOEXCEPT;
/*!
\brief clear range (assign bit 0 for all plains)
static size_type max_str() { return sv_octet_plains; }
/*! \brief get effective string length used in vector
-
- Method returns efficiency, how close are we
- to reserved maximum.
-
+ Calculate and returns efficiency, how close are we
+ to the reserved maximum.
\return current string length maximum
*/
- size_type effective_max_str() const;
+ size_type effective_max_str() const BMNOEXCEPT;
/*! \brief get effective string length used in vector
\return current string length maximum
@sa statistics
*/
- void calc_stat(struct str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* st) const;
+ void calc_stat(
+ struct str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* st
+ ) const BMNOEXCEPT;
///@}
//@{
/** Provide const iterator access to container content */
- const_iterator begin() const;
+ const_iterator begin() const BMNOEXCEPT;
/** Provide const iterator access to the end */
- const_iterator end() const { return const_iterator(this, bm::id_max); }
+ const_iterator end() const BMNOEXCEPT { return const_iterator(this, bm::id_max); }
/** Get const_itertor re-positioned to specific element
@param idx - position in the sparse vector
*/
- const_iterator get_const_iterator(size_type idx) const
+ const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
{ return const_iterator(this, idx); }
/** Provide back insert iterator
/** \brief trait if sparse vector is "compressed" (false)
*/
static
- bool is_compressed() { return false; }
+ bool is_compressed() BMNOEXCEPT { return false; }
///@}
/**
Get remapping status (true|false)
*/
- bool is_remap() const { return remap_flags_ != 0; }
+ bool is_remap() const BMNOEXCEPT { return remap_flags_ != 0; }
/**
Build remapping profile and load content from another sparse vector
Calculate flags which octets are present on each byte-plain.
@internal
*/
- void calc_octet_stat(plain_octet_matrix_type& octet_matrix) const;
+ void calc_octet_stat(plain_octet_matrix_type& octet_matrix) const BMNOEXCEPT;
static
void build_octet_remap(
@internal
*/
static
- bool remap_tosv(value_type* sv_str,
- size_type buf_size,
- const value_type* str,
- const plain_octet_matrix_type& octet_remap_matrix2);
+ bool remap_tosv(value_type* BMRESTRICT sv_str,
+ size_type buf_size,
+ const value_type* BMRESTRICT str,
+ const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix2
+ ) BMNOEXCEPT;
/*!
remap string from external (ASCII) system to matrix internal code
*/
bool remap_tosv(value_type* sv_str,
size_type buf_size,
- const value_type* str) const
+ const value_type* str) const BMNOEXCEPT
{
return remap_tosv(sv_str, buf_size, str, remap_matrix2_);
}
@internal
*/
static
- bool remap_fromsv(value_type* str,
- size_type buf_size,
- const value_type* sv_str,
- const plain_octet_matrix_type& octet_remap_matrix1);
+ bool remap_fromsv(
+ value_type* BMRESTRICT str,
+ size_type buf_size,
+ const value_type* BMRESTRICT sv_str,
+ const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix1
+ ) BMNOEXCEPT;
/*!
re-calculate remap matrix2 based on matrix1
@internal
\return true, if it is the same
*/
bool equal(const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& sv,
- bm::null_support null_able = bm::use_null) const;
+ bm::null_support null_able = bm::use_null) const BMNOEXCEPT;
/**
\brief find position of compressed element by its rank
*/
static
- bool find_rank(size_type rank, size_type& pos);
+ bool find_rank(size_type rank, size_type& pos) BMNOEXCEPT;
/**
\brief size of sparse vector (may be different for RSC)
*/
- size_type effective_size() const { return size(); }
+ size_type effective_size() const BMNOEXCEPT { return size(); }
protected:
//---------------------------------------------------------------------
template<class CharType, class BV, unsigned MAX_STR_SIZE>
-void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::swap(str_sparse_vector& str_sv) BMNOEXEPT
+void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::swap(
+ str_sparse_vector& str_sv) BMNOEXCEPT
{
parent_type::swap(str_sv);
bm::xor_swap(remap_flags_, str_sv.remap_flags_);
template<class CharType, class BV, unsigned MAX_STR_SIZE>
typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type
str_sparse_vector<CharType, BV, MAX_STR_SIZE>::get(
- size_type idx, value_type* str, size_type buf_size) const
+ size_type idx, value_type* str, size_type buf_size) const BMNOEXCEPT
{
size_type i = 0;
for (; i < MAX_STR_SIZE; ++i)
template<class CharType, class BV, unsigned MAX_STR_SIZE>
void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::calc_stat(
- struct str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* st) const
+ struct str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* st
+ ) const BMNOEXCEPT
{
BM_ASSERT(st);
typename bvector_type::statistics stbv;
template<class CharType, class BV, unsigned MAX_STR_SIZE>
int str_sparse_vector<CharType, BV, MAX_STR_SIZE>::compare(
size_type idx,
- const value_type* str) const
+ const value_type* str) const BMNOEXCEPT
{
BM_ASSERT(str);
int res = 0;
template<class CharType, class BV, unsigned MAX_STR_SIZE>
unsigned str_sparse_vector<CharType, BV, MAX_STR_SIZE>::common_prefix_length(
- size_type idx1, size_type idx2) const
+ size_type idx1, size_type idx2) const BMNOEXCEPT
{
unsigned i = 0;
for (; i < MAX_STR_SIZE; ++i)
template<class CharType, class BV, unsigned MAX_STR_SIZE>
bool
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::find_rank(size_type rank,
- size_type& pos)
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::find_rank(
+ size_type rank,
+ size_type& pos) BMNOEXCEPT
{
BM_ASSERT(rank);
pos = rank - 1;
template<class CharType, class BV, unsigned MAX_STR_SIZE>
typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::effective_max_str() const
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::effective_max_str()
+ const BMNOEXCEPT
{
for (int i = MAX_STR_SIZE-1; i >= 0; --i)
{
template<class CharType, class BV, unsigned MAX_STR_SIZE>
void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::calc_octet_stat(
- plain_octet_matrix_type& octet_matrix) const
+ plain_octet_matrix_type& octet_matrix) const BMNOEXCEPT
{
octet_matrix.init();
octet_matrix.set_zero();
template<class CharType, class BV, unsigned MAX_STR_SIZE>
bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_tosv(
- value_type* sv_str,
- size_type buf_size,
- const value_type* str,
- const plain_octet_matrix_type& octet_remap_matrix2)
+ value_type* BMRESTRICT sv_str,
+ size_type buf_size,
+ const value_type* BMRESTRICT str,
+ const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
{
for (unsigned i = 0; i < buf_size; ++i)
{
template<class CharType, class BV, unsigned MAX_STR_SIZE>
bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_fromsv(
- value_type* str,
- size_type buf_size,
- const value_type* sv_str,
- const plain_octet_matrix_type& octet_remap_matrix1)
+ value_type* BMRESTRICT str,
+ size_type buf_size,
+ const value_type* BMRESTRICT sv_str,
+ const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix1
+ ) BMNOEXCEPT
{
for (unsigned i = 0; i < buf_size; ++i)
{
//---------------------------------------------------------------------
template<class CharType, class BV, unsigned MAX_STR_SIZE>
-void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_from(const str_sparse_vector& str_sv)
+void
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_from(const str_sparse_vector& str_sv)
{
if (str_sv.is_remap())
{
template<class CharType, class BV, unsigned MAX_STR_SIZE>
bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::equal(
const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& sv,
- bm::null_support null_able) const
+ bm::null_support null_able) const BMNOEXCEPT
{
// at this point both vectors should have the same remap settings
// to be considered "equal".
template<class CharType, class BV, unsigned MAX_STR_SIZE>
typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::begin() const
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::begin() const BMNOEXCEPT
{
typedef typename
str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator it_type;
//---------------------------------------------------------------------
template<class CharType, class BV, unsigned MAX_STR_SIZE>
-void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::clear() BMNOEXEPT
+void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::clear() BMNOEXCEPT
{
parent_type::clear();
}
template<class CharType, class BV, unsigned MAX_STR_SIZE>
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator()
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator() BMNOEXCEPT
: sv_(0), pos_(bm::id_max), pos_in_buf_(~size_type(0))
{}
template<class CharType, class BV, unsigned MAX_STR_SIZE>
str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
- const str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator& it)
+ const str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator& it) BMNOEXCEPT
: sv_(it.sv_), pos_(it.pos_), pos_in_buf_(~size_type(0))
{}
template<class CharType, class BV, unsigned MAX_STR_SIZE>
str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
- const str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv)
+ const str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv) BMNOEXCEPT
: sv_(sv), pos_(sv->empty() ? bm::id_max : 0), pos_in_buf_(~size_type(0))
{}
template<class CharType, class BV, unsigned MAX_STR_SIZE>
str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
const str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv,
- typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type pos)
+ typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type pos) BMNOEXCEPT
: sv_(sv), pos_(pos >= sv->size() ? bm::id_max : pos), pos_in_buf_(~size_type(0))
{}
template<class CharType, class BV, unsigned MAX_STR_SIZE>
const typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::value_type*
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::value() const
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::value() const BMNOEXCEPT
{
BM_ASSERT(sv_);
BM_ASSERT(this->valid());
//---------------------------------------------------------------------
template<class CharType, class BV, unsigned MAX_STR_SIZE>
-void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::go_to(
- typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type pos)
+void
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::go_to(
+ typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type pos
+ ) BMNOEXCEPT
{
pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos;
pos_in_buf_ = ~size_type(0);
//---------------------------------------------------------------------
template<class CharType, class BV, unsigned MAX_STR_SIZE>
-void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::advance()
+void
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::advance() BMNOEXCEPT
{
if (pos_ == bm::id_max) // nothing to do, we are at the end
return;
//---------------------------------------------------------------------
template<class CharType, class BV, unsigned MAX_STR_SIZE>
-str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert_iterator()
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert_iterator() BMNOEXCEPT
: sv_(0), bv_null_(0), pos_in_buf_(~size_type(0)), prev_nb_(0)
{}
template<class CharType, class BV, unsigned MAX_STR_SIZE>
str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert_iterator(
- str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv)
+ str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv) BMNOEXCEPT
: sv_(sv), pos_in_buf_(~size_type(0))
{
if (sv)
template<class CharType, class BV, unsigned MAX_STR_SIZE>
str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert_iterator(
-const str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator& bi)
+const str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator& bi) BMNOEXCEPT
: sv_(bi.sv_), bv_null_(bi.bv_null_), pos_in_buf_(~size_type(0)), prev_nb_(bi.prev_nb_)
{
BM_ASSERT(bi.empty());
//---------------------------------------------------------------------
template<class CharType, class BV, unsigned MAX_STR_SIZE>
-bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::empty() const
+bool
+str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::empty()
+ const BMNOEXCEPT
{
return (pos_in_buf_ == ~size_type(0) || !sv_);
}
std::chrono::duration<double, std::milli> duration;
unsigned repeats;
- statistics() : repeats(1) {}
+ statistics() : duration(0), repeats(1) {}
statistics(std::chrono::duration<double, std::milli> d, unsigned r)
: duration(d), repeats(r)
if (ms > 1000)
{
double sec = ms / 1000;
- std::cout << it->first << "; " << std::setprecision(4) << sec << " sec" << std::endl;
+ if (sec > 60)
+ {
+ double min = sec / 60;
+ std::cout << it->first << "; " << std::setprecision(4) << min << " min" << std::endl;
+ }
+ else
+ std::cout << it->first << "; " << std::setprecision(4) << sec << " sec" << std::endl;
}
else
std::cout << it->first << "; " << it->second.duration.count() << " ms" << std::endl;
#undef VECT_ARR_BLOCK_LOOKUP
#undef VECT_SET_BLOCK_BITS
+
#undef VECT_BLOCK_CHANGE
+#undef VECT_BLOCK_CHANGE_BC
+
#undef VECT_BIT_TO_GAP
#undef VECT_AND_DIGEST
#undef VECT_AND_DIGEST_5WAY
#undef VECT_BLOCK_SET_DIGEST
+#undef VECT_BLOCK_XOR_CHANGE
+#undef VECT_BIT_BLOCK_XOR
+
+#undef VECT_BIT_FIND_FIRST
#undef VECT_BIT_FIND_DIFF
+#undef VECT_GAP_BFIND
#undef BMI1_SELECT64
#undef BMI2_SELECT64
bm::word_t* end() { return (b_.w32 + bm::set_block_size); }
};
-
/**
Get minimum of 2 values
*/
template<typename T>
-T min_value(T v1, T v2)
+T min_value(T v1, T v2) BMNOEXCEPT
{
return v1 < v2 ? v1 : v2;
}
+/**
+ \brief ad-hoc conditional expressions
+ \internal
+*/
+template <bool b> struct conditional
+{
+ static bool test() { return true; }
+};
+template <> struct conditional<false>
+{
+ static bool test() { return false; }
+};
+
/**
Fast loop-less function to find LOG2
*/
template<typename T>
-T ilog2(T x)
+T ilog2(T x) BMNOEXCEPT
{
unsigned int l = 0;
}
template<>
-inline bm::gap_word_t ilog2(gap_word_t x)
+inline bm::gap_word_t ilog2(gap_word_t x) BMNOEXCEPT
{
unsigned int l = 0;
if (x >= 1<<8) { x = (bm::gap_word_t)(x >> 8); l |= 8; }
class ptr_guard
{
public:
- ptr_guard(T* p) : ptr_(p) {}
+ ptr_guard(T* p) BMNOEXCEPT : ptr_(p) {}
~ptr_guard() { delete ptr_; }
private:
ptr_guard(const ptr_guard<T>& p);
@ingroup bitfunc
@internal
*/
-inline
-unsigned count_leading_zeros(unsigned x)
+inline unsigned count_leading_zeros(unsigned x) BMNOEXCEPT
{
unsigned n =
(x >= (1U << 16)) ?
@internal
*/
inline
-unsigned count_trailing_zeros(unsigned v)
+unsigned count_trailing_zeros(unsigned v) BMNOEXCEPT
{
// (v & -v) isolates the last set bit
return unsigned(bm::tzcnt_table<true>::_lut[(-v & v) % 37]);
Lookup table based integer LOG2
*/
template<typename T>
-T ilog2_LUT(T x)
+T ilog2_LUT(T x) BMNOEXCEPT
{
unsigned l = 0;
if (x & 0xffff0000)
Lookup table based short integer LOG2
*/
template<>
-inline bm::gap_word_t ilog2_LUT<bm::gap_word_t>(bm::gap_word_t x)
+inline bm::gap_word_t ilog2_LUT<bm::gap_word_t>(bm::gap_word_t x) BMNOEXCEPT
{
bm::gap_word_t l = 0;
if (x & 0xff00)
#ifdef __GNUG__
BMFORCEINLINE
-unsigned bsf_asm32(unsigned int v)
+unsigned bsf_asm32(unsigned int v) BMNOEXCEPT
{
unsigned r;
asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) );
}
BMFORCEINLINE
-unsigned bsr_asm32(unsigned int v)
+unsigned bsr_asm32(unsigned int v) BMNOEXCEPT
{
unsigned r;
asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) );
#if defined(_M_AMD64) || defined(_M_X64) // inline assembly not supported
BMFORCEINLINE
-unsigned int bsr_asm32(unsigned int value)
+unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
{
unsigned long r;
_BitScanReverse(&r, value);
}
BMFORCEINLINE
-unsigned int bsf_asm32(unsigned int value)
+unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
{
unsigned long r;
_BitScanForward(&r, value);
#else
BMFORCEINLINE
-unsigned int bsr_asm32(unsigned int value)
+unsigned int bsr_asm32(unsigned int value) BMNOEXCEPT
{
__asm bsr eax, value
}
BMFORCEINLINE
-unsigned int bsf_asm32(unsigned int value)
+unsigned int bsf_asm32(unsigned int value) BMNOEXCEPT
{
__asm bsf eax, value
}
// http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.8562
//
template<typename T>
-T bit_scan_fwd(T v)
+T bit_scan_fwd(T v) BMNOEXCEPT
{
return
DeBruijn_bit_position<true>::_multiply[(((v & -v) * 0x077CB531U)) >> 27];
}
inline
-unsigned bit_scan_reverse32(unsigned value)
+unsigned bit_scan_reverse32(unsigned value) BMNOEXCEPT
{
BM_ASSERT(value);
#if defined(BM_USE_GCC_BUILD)
}
inline
-unsigned bit_scan_forward32(unsigned value)
+unsigned bit_scan_forward32(unsigned value) BMNOEXCEPT
{
BM_ASSERT(value);
#if defined(BM_USE_GCC_BUILD)
BMFORCEINLINE
-unsigned long long bmi_bslr_u64(unsigned long long w)
+unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
{
#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
return _blsr_u64(w);
/// 64-bit bit-scan reverse
inline
-unsigned count_leading_zeros_u64(bm::id64_t w)
+unsigned count_leading_zeros_u64(bm::id64_t w) BMNOEXCEPT
{
BM_ASSERT(w);
#if defined(BMAVX2OPT) || defined (BMAVX512OPT)
/// 64-bit bit-scan fwd
inline
-unsigned count_trailing_zeros_u64(bm::id64_t w)
+unsigned count_trailing_zeros_u64(bm::id64_t w) BMNOEXCEPT
{
BM_ASSERT(w);
+/*!
+ Returns BSR value
+ @ingroup bitfunc
+*/
+template <class T>
+unsigned bit_scan_reverse(T value) BMNOEXCEPT
+{
+ BM_ASSERT(value);
+
+ if (bm::conditional<sizeof(T)==8>::test())
+ {
+ #if defined(BM_USE_GCC_BUILD)
+ return (unsigned) (63 - __builtin_clzll(value));
+ #else
+ bm::id64_t v8 = value;
+ v8 >>= 32;
+ unsigned v = (unsigned)v8;
+ if (v)
+ {
+ v = bm::bit_scan_reverse32(v);
+ return v + 32;
+ }
+ #endif
+ }
+ return bm::bit_scan_reverse32((unsigned)value);
+}
+
+/*! \brief and functor
+ \internal
+ */
+struct and_func
+{
+ static
+ BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
+ { return v1 & v2; }
+};
+/*! \brief xor functor
+ \internal
+ */
+struct xor_func
+{
+ static
+ BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
+ { return v1 ^ v2; }
+};
+/*! \brief or functor
+ \internal
+ */
+struct or_func
+{
+ static
+ BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
+ { return v1 | v2; }
+};
+/*! \brief sub functor
+ \internal
+ */
+struct sub_func
+{
+ static
+ BMFORCEINLINE unsigned op(unsigned v1, unsigned v2) BMNOEXCEPT2
+ { return v1 & ~v2; }
+};
+
+
+
#ifdef __GNUG__
#pragma GCC diagnostic pop
#endif
inline
unsigned bit_block_xor_change32(const bm::word_t* BMRESTRICT block,
const bm::word_t* BMRESTRICT xor_block,
- unsigned size)
+ unsigned size) BMNOEXCEPT
{
unsigned gap_count = 1;
inline
unsigned bit_block_xor_change(const bm::word_t* BMRESTRICT block,
const bm::word_t* BMRESTRICT xor_block,
- unsigned size)
+ unsigned size) BMNOEXCEPT
{
#ifdef VECT_BLOCK_XOR_CHANGE
return VECT_BLOCK_XOR_CHANGE(block, xor_block, size);
inline
void compute_complexity_descr(
const bm::word_t* BMRESTRICT block,
- block_waves_xor_descr& BMRESTRICT x_descr)
+ block_waves_xor_descr& BMRESTRICT x_descr) BMNOEXCEPT
{
for (unsigned i = 0; i < bm::block_waves; ++i)
{
const bm::word_t* BMRESTRICT block,
const bm::word_t* BMRESTRICT xor_block,
block_waves_xor_descr& BMRESTRICT x_descr,
- unsigned& block_gain)
+ unsigned& BMRESTRICT block_gain) BMNOEXCEPT
{
block_gain = 0; // approximate block gain (sum of sub-waves)
bm::id64_t digest = 0;
inline
void bit_block_xor(bm::word_t* target_block,
const bm::word_t* block, const bm::word_t* xor_block,
- bm::id64_t digest)
+ bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(target_block);
BM_ASSERT(block);
}
/// Get reference list size
- size_type size() const { return (size_type)ref_bvects_.size(); }
+ size_type size() const BMNOEXCEPT { return (size_type)ref_bvects_.size(); }
/// Get reference vector by the index in this ref-vector
- const bvector_type* get_bv(size_type idx) const { return ref_bvects_[idx]; }
+ const bvector_type* get_bv(size_type idx) const BMNOEXCEPT
+ { return ref_bvects_[idx]; }
/// Get reference row index by the index in this ref-vector
- size_type get_row_idx(size_type idx) const { return (size_type)ref_bvects_rows_[idx]; }
+ size_type get_row_idx(size_type idx) const BMNOEXCEPT
+ { return (size_type)ref_bvects_rows_[idx]; }
/// not-found value for find methods
static
- size_type not_found() { return ~(size_type(0)); }
+ size_type not_found() BMNOEXCEPT { return ~(size_type(0)); }
/// Find vector index by the reference index
/// @return ~0 if not found
- size_type find(std::size_t ref_idx) const
+ size_type find(std::size_t ref_idx) const BMNOEXCEPT
{
size_type sz = size();
for (size_type i = 0; i < sz; ++i)
typedef typename bvector_type::size_type size_type;
public:
- void set_ref_vector(const bv_ref_vector_type* ref_vect) { ref_vect_ = ref_vect; }
- const bv_ref_vector_type& get_ref_vector() const { return *ref_vect_; }
+ void set_ref_vector(const bv_ref_vector_type* ref_vect) BMNOEXCEPT
+ { ref_vect_ = ref_vect; }
+
+ const bv_ref_vector_type& get_ref_vector() const BMNOEXCEPT
+ { return *ref_vect_; }
/** Compute statistics for the anchor search vector
@param block - bit-block target
*/
- void compute_x_block_stats(const bm::word_t* block);
+ void compute_x_block_stats(const bm::word_t* block) BMNOEXCEPT;
/** Scan for all candidate bit-blocks to find mask or match
@return true if XOR complement or matching vector found
/**
Validate serialization target
*/
- bool validate_found(bm::word_t* xor_block, const bm::word_t* block) const;
+ bool validate_found(bm::word_t* xor_block,
+ const bm::word_t* block) const BMNOEXCEPT;
- size_type found_ridx() const { return found_ridx_; }
- const bm::word_t* get_found_block() const { return found_block_xor_; }
- unsigned get_x_best_metric() const { return x_best_metric_; }
- bm::id64_t get_xor_digest() const { return x_d64_; }
+ size_type found_ridx() const BMNOEXCEPT { return found_ridx_; }
+ const bm::word_t* get_found_block() const BMNOEXCEPT
+ { return found_block_xor_; }
+ unsigned get_x_best_metric() const BMNOEXCEPT { return x_best_metric_; }
+ bm::id64_t get_xor_digest() const BMNOEXCEPT { return x_d64_; }
/// true if completely identical vector found
- bool is_eq_found() const { return !x_best_metric_; }
+ bool is_eq_found() const BMNOEXCEPT { return !x_best_metric_; }
- unsigned get_x_bc() const { return x_bc_; }
- unsigned get_x_gc() const { return x_gc_; }
- unsigned get_x_block_best() const { return x_block_best_metric_; }
+ unsigned get_x_bc() const BMNOEXCEPT { return x_bc_; }
+ unsigned get_x_gc() const BMNOEXCEPT { return x_gc_; }
+ unsigned get_x_block_best() const BMNOEXCEPT
+ { return x_block_best_metric_; }
- bm::block_waves_xor_descr& get_descr() { return x_descr_; }
+ bm::block_waves_xor_descr& get_descr() BMNOEXCEPT { return x_descr_; }
private:
const bv_ref_vector_type* ref_vect_ = 0; ///< ref.vect for XOR filter
// --------------------------------------------------------------------------
template<typename BV>
-void xor_scanner<BV>::compute_x_block_stats(const bm::word_t* block)
+void xor_scanner<BV>::compute_x_block_stats(const bm::word_t* block) BMNOEXCEPT
{
BM_ASSERT(IS_VALID_ADDR(block));
BM_ASSERT(!BM_IS_GAP(block));
{
const bvector_type* bv = ref_vect_->get_bv(ri);
BM_ASSERT(bv);
- const typename bvector_type::blocks_manager_type& bman = bv->get_blocks_manager();
+ const typename bvector_type::blocks_manager_type& bman =
+ bv->get_blocks_manager();
const bm::word_t* block_xor = bman.get_block_ptr(i, j);
if (!IS_VALID_ADDR(block_xor) || BM_IS_GAP(block_xor))
continue;
if (!xor_bc) // completely identical block?
{
unsigned pos;
- bool f = bit_find_first_diff(block, block_xor, &pos);
+ bool f = bm::bit_find_first_diff(block, block_xor, &pos);
x_best_metric_ += f;
}
}
template<typename BV>
bool xor_scanner<BV>::validate_found(bm::word_t* xor_block,
- const bm::word_t* block) const
+ const bm::word_t* block) const BMNOEXCEPT
{
bm::id64_t d64 = get_xor_digest();
BM_ASSERT(d64);
public:
typedef unsigned char* position_type;
public:
- encoder(unsigned char* buf, size_t size);
- void put_8(unsigned char c);
- void put_16(bm::short_t s);
- void put_16(const bm::short_t* s, unsigned count);
- void put_24(bm::word_t w);
- void put_32(bm::word_t w);
- void put_32(const bm::word_t* w, unsigned count);
- void put_48(bm::id64_t w);
- void put_64(bm::id64_t w);
+ encoder(unsigned char* buf, size_t size) BMNOEXCEPT;
+ void put_8(unsigned char c) BMNOEXCEPT;
+ void put_16(bm::short_t s) BMNOEXCEPT;
+ void put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT;
+ void put_24(bm::word_t w) BMNOEXCEPT;
+ void put_32(bm::word_t w) BMNOEXCEPT;
+ void put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT;
+ void put_48(bm::id64_t w) BMNOEXCEPT;
+ void put_64(bm::id64_t w) BMNOEXCEPT;
void put_prefixed_array_32(unsigned char c,
- const bm::word_t* w, unsigned count);
+ const bm::word_t* w, unsigned count) BMNOEXCEPT;
void put_prefixed_array_16(unsigned char c,
const bm::short_t* s, unsigned count,
- bool encode_count);
- void memcpy(const unsigned char* src, size_t count);
- size_t size() const;
- unsigned char* get_pos() const;
- void set_pos(unsigned char* buf_pos);
+ bool encode_count) BMNOEXCEPT;
+ void memcpy(const unsigned char* src, size_t count) BMNOEXCEPT;
+ size_t size() const BMNOEXCEPT;
+ unsigned char* get_pos() const BMNOEXCEPT;
+ void set_pos(unsigned char* buf_pos) BMNOEXCEPT;
private:
unsigned char* buf_;
unsigned char* start_;
class decoder_base
{
public:
- decoder_base(const unsigned char* buf) { buf_ = start_ = buf; }
+ decoder_base(const unsigned char* buf) BMNOEXCEPT { buf_ = start_ = buf; }
/// Reads character from the decoding buffer.
- unsigned char get_8() { return *buf_++; }
+ unsigned char get_8() BMNOEXCEPT { return *buf_++; }
/// Returns size of the current decoding stream.
- size_t size() const { return size_t(buf_ - start_); }
+ size_t size() const BMNOEXCEPT { return size_t(buf_ - start_); }
/// change current position
- void seek(int delta) { buf_ += delta; }
+ void seek(int delta) BMNOEXCEPT { buf_ += delta; }
/// read bytes from the decode buffer
- void memcpy(unsigned char* dst, size_t count);
+ void memcpy(unsigned char* dst, size_t count) BMNOEXCEPT;
/// Return current buffer pointer
- const unsigned char* get_pos() const { return buf_; }
+ const unsigned char* get_pos() const BMNOEXCEPT { return buf_; }
/// Set current buffer pointer
- void set_pos(const unsigned char* pos) { buf_ = pos; }
+ void set_pos(const unsigned char* pos) BMNOEXCEPT { buf_ = pos; }
protected:
const unsigned char* buf_;
const unsigned char* start_;
class decoder : public decoder_base
{
public:
- decoder(const unsigned char* buf);
- bm::short_t get_16();
- bm::word_t get_24();
- bm::word_t get_32();
- bm::id64_t get_48();
- bm::id64_t get_64();
- void get_32(bm::word_t* w, unsigned count);
- bool get_32_OR(bm::word_t* w, unsigned count);
- void get_32_AND(bm::word_t* w, unsigned count);
- void get_16(bm::short_t* s, unsigned count);
+ decoder(const unsigned char* buf) BMNOEXCEPT;
+ bm::short_t get_16() BMNOEXCEPT;
+ bm::word_t get_24() BMNOEXCEPT;
+ bm::word_t get_32() BMNOEXCEPT;
+ bm::id64_t get_48() BMNOEXCEPT;
+ bm::id64_t get_64() BMNOEXCEPT;
+ void get_32(bm::word_t* w, unsigned count) BMNOEXCEPT;
+ bool get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT;
+ void get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT;
+ void get_16(bm::short_t* s, unsigned count) BMNOEXCEPT;
};
// ----------------------------------------------------------------
~bit_out() { flush(); }
/// issue single bit into encode bit-stream
- void put_bit(unsigned value);
+ void put_bit(unsigned value) BMNOEXCEPT;
/// issue count bits out of value
- void put_bits(unsigned value, unsigned count);
+ void put_bits(unsigned value, unsigned count) BMNOEXCEPT;
/// issue 0 into output stream
- void put_zero_bit();
+ void put_zero_bit() BMNOEXCEPT;
/// issue specified number of 0s
- void put_zero_bits(unsigned count);
+ void put_zero_bits(unsigned count) BMNOEXCEPT;
/// Elias Gamma encode the specified value
- void gamma(unsigned value);
+ void gamma(unsigned value) BMNOEXCEPT;
/// Binary Interpolative array decode
void bic_encode_u16(const bm::gap_word_t* arr, unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi)
+ bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
{
bic_encode_u16_cm(arr, sz, lo, hi);
}
/// Binary Interpolative encoding (array of 16-bit ints)
void bic_encode_u16_rg(const bm::gap_word_t* arr, unsigned sz,
bm::gap_word_t lo,
- bm::gap_word_t hi);
+ bm::gap_word_t hi) BMNOEXCEPT;
/// Binary Interpolative encoding (array of 16-bit ints)
/// cm - "center-minimal"
void bic_encode_u16_cm(const bm::gap_word_t* arr, unsigned sz,
bm::gap_word_t lo,
- bm::gap_word_t hi);
+ bm::gap_word_t hi) BMNOEXCEPT;
/// Binary Interpolative encoding (array of 32-bit ints)
/// cm - "center-minimal"
void bic_encode_u32_cm(const bm::word_t* arr, unsigned sz,
- bm::word_t lo, bm::word_t hi);
+ bm::word_t lo, bm::word_t hi) BMNOEXCEPT;
/// Flush the incomplete 32-bit accumulator word
- void flush() { if (used_bits_) flush_accum(); }
+ void flush() BMNOEXCEPT { if (used_bits_) flush_accum(); }
private:
- void flush_accum()
+ void flush_accum() BMNOEXCEPT
{
dest_.put_32(accum_);
used_bits_ = accum_ = 0;
class bit_in
{
public:
- bit_in(TDecoder& decoder)
+ bit_in(TDecoder& decoder) BMNOEXCEPT
: src_(decoder),
used_bits_(unsigned(sizeof(accum_) * 8)),
- accum_(0)
+ accum_(0)
{}
/// decode unsigned value using Elias Gamma coding
- unsigned gamma();
+ unsigned gamma() BMNOEXCEPT;
/// read number of bits out of the stream
- unsigned get_bits(unsigned count);
+ unsigned get_bits(unsigned count) BMNOEXCEPT;
/// Binary Interpolative array decode
void bic_decode_u16(bm::gap_word_t* arr, unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi)
+ bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
{
bic_decode_u16_cm(arr, sz, lo, hi);
}
void bic_decode_u16_bitset(bm::word_t* block, unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi)
+ bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
{
bic_decode_u16_cm_bitset(block, sz, lo, hi);
}
- void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
+ void bic_decode_u16_dry(unsigned sz,
+ bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
{
bic_decode_u16_cm_dry(sz, lo, hi);
}
/// Binary Interpolative array decode
void bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi);
+ bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT;
/// Binary Interpolative array decode
void bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi);
+ bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT;
/// Binary Interpolative array decode (32-bit)
void bic_decode_u32_cm(bm::word_t* arr, unsigned sz,
- bm::word_t lo, bm::word_t hi);
+ bm::word_t lo, bm::word_t hi) BMNOEXCEPT;
/// Binary Interpolative array decode into bitset (32-bit based)
void bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi);
+ bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT;
/// Binary Interpolative array decode into /dev/null
- void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi);
+ void bic_decode_u16_rg_dry(unsigned sz,
+ bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT;
/// Binary Interpolative array decode into bitset (32-bit based)
void bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi);
+ bm::gap_word_t lo,
+ bm::gap_word_t hi) BMNOEXCEPT;
/// Binary Interpolative array decode into /dev/null
- void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi);
+ void bic_decode_u16_cm_dry(unsigned sz,
+ bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT;
private:
bit_in(const bit_in&);
\param buf - memory buffer pointer.
\param size - size of the buffer
*/
-inline encoder::encoder(unsigned char* buf, size_t a_size)
+inline encoder::encoder(unsigned char* buf, size_t a_size) BMNOEXCEPT
: buf_(buf), start_(buf)
{
size_ = a_size;
*/
inline void encoder::put_prefixed_array_32(unsigned char c,
const bm::word_t* w,
- unsigned count)
+ unsigned count) BMNOEXCEPT
{
put_8(c);
put_32(w, count);
inline void encoder::put_prefixed_array_16(unsigned char c,
const bm::short_t* s,
unsigned count,
- bool encode_count)
+ bool encode_count) BMNOEXCEPT
{
put_8(c);
if (encode_count)
\brief Puts one character into the encoding buffer.
\param c - character to encode
*/
-BMFORCEINLINE void encoder::put_8(unsigned char c)
+BMFORCEINLINE void encoder::put_8(unsigned char c) BMNOEXCEPT
{
*buf_++ = c;
}
\brief Puts short word (16 bits) into the encoding buffer.
\param s - short word to encode
*/
-BMFORCEINLINE void encoder::put_16(bm::short_t s)
+BMFORCEINLINE void encoder::put_16(bm::short_t s) BMNOEXCEPT
{
#if (BM_UNALIGNED_ACCESS_OK == 1)
::memcpy(buf_, &s, sizeof(bm::short_t)); // optimizer takes care of it
/*!
\brief Method puts array of short words (16 bits) into the encoding buffer.
*/
-inline void encoder::put_16(const bm::short_t* s, unsigned count)
+inline void encoder::put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT
{
#if (BM_UNALIGNED_ACCESS_OK == 1)
::memcpy(buf_, s, sizeof(bm::short_t)*count);
\brief copy bytes into target buffer or just rewind if src is NULL
*/
inline
-void encoder::memcpy(const unsigned char* src, size_t count)
+void encoder::memcpy(const unsigned char* src, size_t count) BMNOEXCEPT
{
BM_ASSERT((buf_ + count) < (start_ + size_));
if (src)
\fn unsigned encoder::size() const
\brief Returns size of the current encoding stream.
*/
-inline size_t encoder::size() const
+inline size_t encoder::size() const BMNOEXCEPT
{
return size_t(buf_ - start_);
}
/**
\brief Get current memory stream position
*/
-inline encoder::position_type encoder::get_pos() const
+inline encoder::position_type encoder::get_pos() const BMNOEXCEPT
{
return buf_;
}
/**
\brief Set current memory stream position
*/
-inline void encoder::set_pos(encoder::position_type buf_pos)
+inline void encoder::set_pos(encoder::position_type buf_pos) BMNOEXCEPT
{
buf_ = buf_pos;
}
\brief Puts 24 bits word into encoding buffer.
\param w - word to encode.
*/
-inline void encoder::put_24(bm::word_t w)
+inline void encoder::put_24(bm::word_t w) BMNOEXCEPT
{
BM_ASSERT((w & ~(0xFFFFFFU)) == 0);
\brief Puts 32 bits word into encoding buffer.
\param w - word to encode.
*/
-inline void encoder::put_32(bm::word_t w)
+inline void encoder::put_32(bm::word_t w) BMNOEXCEPT
{
#if (BM_UNALIGNED_ACCESS_OK == 1)
::memcpy(buf_, &w, sizeof(bm::word_t));
\brief Puts 48 bits word into encoding buffer.
\param w - word to encode.
*/
-inline void encoder::put_48(bm::id64_t w)
+inline void encoder::put_48(bm::id64_t w) BMNOEXCEPT
{
BM_ASSERT((w & ~(0xFFFFFFFFFFFFUL)) == 0);
*buf_++ = (unsigned char)w;
\brief Puts 64 bits word into encoding buffer.
\param w - word to encode.
*/
-inline void encoder::put_64(bm::id64_t w)
+inline void encoder::put_64(bm::id64_t w) BMNOEXCEPT
{
#if (BM_UNALIGNED_ACCESS_OK == 1)
::memcpy(buf_, &w, sizeof(bm::id64_t));
/*!
\brief Encodes array of 32-bit words
*/
-inline
-void encoder::put_32(const bm::word_t* w, unsigned count)
+inline void encoder::put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT
{
#if (BM_UNALIGNED_ACCESS_OK == 1)
+ // use memcpy() because compilers now understand it as an idiom and inline
::memcpy(buf_, w, sizeof(bm::word_t) * count);
buf_ += sizeof(bm::word_t) * count;
#else
Load bytes from the decode buffer
*/
inline
-void decoder_base::memcpy(unsigned char* dst, size_t count)
+void decoder_base::memcpy(unsigned char* dst, size_t count) BMNOEXCEPT
{
if (dst)
::memcpy(dst, buf_, count);
\brief Construction
\param buf - pointer to the decoding memory.
*/
-inline decoder::decoder(const unsigned char* buf)
+inline decoder::decoder(const unsigned char* buf) BMNOEXCEPT
: decoder_base(buf)
{
}
\fn bm::short_t decoder::get_16()
\brief Reads 16-bit word from the decoding buffer.
*/
-BMFORCEINLINE bm::short_t decoder::get_16()
+BMFORCEINLINE bm::short_t decoder::get_16() BMNOEXCEPT
{
#if (BM_UNALIGNED_ACCESS_OK == 1)
bm::short_t a;
\fn bm::word_t decoder::get_24()
\brief Reads 32-bit word from the decoding buffer.
*/
-inline bm::word_t decoder::get_24()
+inline bm::word_t decoder::get_24() BMNOEXCEPT
{
bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
((unsigned)buf_[2] << 16);
\fn bm::word_t decoder::get_32()
\brief Reads 32-bit word from the decoding buffer.
*/
-BMFORCEINLINE bm::word_t decoder::get_32()
+BMFORCEINLINE bm::word_t decoder::get_32() BMNOEXCEPT
{
#if (BM_UNALIGNED_ACCESS_OK == 1)
bm::word_t a;
\brief Reads 64-bit word from the decoding buffer.
*/
inline
-bm::id64_t decoder::get_48()
+bm::id64_t decoder::get_48() BMNOEXCEPT
{
bm::id64_t a = buf_[0] +
((bm::id64_t)buf_[1] << 8) +
\brief Reads 64-bit word from the decoding buffer.
*/
inline
-bm::id64_t decoder::get_64()
+bm::id64_t decoder::get_64() BMNOEXCEPT
{
#if (BM_UNALIGNED_ACCESS_OK == 1)
bm::id64_t a;
\param w - pointer on memory block to read into.
\param count - size of memory block in words.
*/
-inline void decoder::get_32(bm::word_t* w, unsigned count)
+inline void decoder::get_32(bm::word_t* w, unsigned count) BMNOEXCEPT
{
if (!w)
{
\param count - should match bm::set_block_size
*/
inline
-bool decoder::get_32_OR(bm::word_t* w, unsigned count)
+bool decoder::get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT
{
if (!w)
{
\param count - should match bm::set_block_size
*/
inline
-void decoder::get_32_AND(bm::word_t* w, unsigned count)
+void decoder::get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT
{
if (!w)
{
\param s - pointer on memory block to read into.
\param count - size of memory block in words.
*/
-inline void decoder::get_16(bm::short_t* s, unsigned count)
+inline void decoder::get_16(bm::short_t* s, unsigned count) BMNOEXCEPT
{
if (!s)
{
//
template<typename TEncoder>
-void bit_out<TEncoder>::put_bit(unsigned value)
+void bit_out<TEncoder>::put_bit(unsigned value) BMNOEXCEPT
{
BM_ASSERT(value <= 1);
accum_ |= (value << used_bits_);
// ----------------------------------------------------------------------
template<typename TEncoder>
-void bit_out<TEncoder>::put_bits(unsigned value, unsigned count)
+void bit_out<TEncoder>::put_bits(unsigned value, unsigned count) BMNOEXCEPT
{
unsigned used = used_bits_;
unsigned acc = accum_;
// ----------------------------------------------------------------------
template<typename TEncoder>
-void bit_out<TEncoder>::put_zero_bit()
+void bit_out<TEncoder>::put_zero_bit() BMNOEXCEPT
{
if (++used_bits_ == (sizeof(accum_) * 8))
flush_accum();
// ----------------------------------------------------------------------
template<typename TEncoder>
-void bit_out<TEncoder>::put_zero_bits(unsigned count)
+void bit_out<TEncoder>::put_zero_bits(unsigned count) BMNOEXCEPT
{
unsigned used = used_bits_;
unsigned free_bits = (sizeof(accum_) * 8) - used;
// ----------------------------------------------------------------------
template<typename TEncoder>
-void bit_out<TEncoder>::gamma(unsigned value)
+void bit_out<TEncoder>::gamma(unsigned value) BMNOEXCEPT
{
BM_ASSERT(value);
// ----------------------------------------------------------------------
template<typename TEncoder>
-void bit_out<TEncoder>::bic_encode_u16_rg(const bm::gap_word_t* arr,
- unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi)
+void bit_out<TEncoder>::bic_encode_u16_rg(
+ const bm::gap_word_t* arr,
+ unsigned sz,
+ bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
{
for (;sz;)
{
template<typename TEncoder>
void bit_out<TEncoder>::bic_encode_u32_cm(const bm::word_t* arr,
unsigned sz,
- bm::word_t lo, bm::word_t hi)
+ bm::word_t lo,
+ bm::word_t hi) BMNOEXCEPT
{
for (;sz;)
{
void bit_out<TEncoder>::bic_encode_u16_cm(const bm::gap_word_t* arr,
unsigned sz_i,
bm::gap_word_t lo_i,
- bm::gap_word_t hi_i)
+ bm::gap_word_t hi_i) BMNOEXCEPT
{
BM_ASSERT(sz_i <= 65535);
template<typename TEncoder>
void bit_out<TEncoder>::bic_encode_u16_cm(const bm::gap_word_t* arr,
unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi)
+ bm::gap_word_t lo,
+ bm::gap_word_t hi) BMNOEXCEPT
{
for (;sz;)
{
template<class TDecoder>
void bit_in<TDecoder>::bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi)
+ bm::gap_word_t lo,
+ bm::gap_word_t hi) BMNOEXCEPT
{
for (;sz;)
{
template<class TDecoder>
void bit_in<TDecoder>::bic_decode_u32_cm(bm::word_t* arr, unsigned sz,
- bm::word_t lo, bm::word_t hi)
+ bm::word_t lo,
+ bm::word_t hi) BMNOEXCEPT
{
for (;sz;)
{
template<class TDecoder>
void bit_in<TDecoder>::bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi)
+ bm::gap_word_t lo,
+ bm::gap_word_t hi) BMNOEXCEPT
{
for (;sz;)
{
template<class TDecoder>
void bit_in<TDecoder>::bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi)
+ bm::gap_word_t lo,
+ bm::gap_word_t hi) BMNOEXCEPT
{
for (;sz;)
{
template<class TDecoder>
void bit_in<TDecoder>::bic_decode_u16_cm_dry(unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi)
+ bm::gap_word_t lo,
+ bm::gap_word_t hi) BMNOEXCEPT
{
for (;sz;)
{
template<class TDecoder>
void bit_in<TDecoder>::bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi)
+ bm::gap_word_t lo,
+ bm::gap_word_t hi) BMNOEXCEPT
{
for (;sz;)
{
if (sz == 1)
return;
bic_decode_u16_rg_bitset(block, mid_idx, lo, bm::gap_word_t(val - 1));
- // tail recursion:
+ // tail recursion of:
//bic_decode_u16_bitset(block, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
sz -= mid_idx + 1;
lo = bm::gap_word_t(val + 1);
template<class TDecoder>
void bit_in<TDecoder>::bic_decode_u16_rg_dry(unsigned sz,
- bm::gap_word_t lo, bm::gap_word_t hi)
+ bm::gap_word_t lo,
+ bm::gap_word_t hi) BMNOEXCEPT
{
for (;sz;)
{
if (sz == 1)
return;
bic_decode_u16_rg_dry(mid_idx, lo, bm::gap_word_t(val - 1));
- //bic_decode_u16_dry(sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
sz -= mid_idx + 1;
lo = bm::gap_word_t(val + 1);
} // for sz
// ----------------------------------------------------------------------
template<class TDecoder>
-unsigned bit_in<TDecoder>::gamma()
+unsigned bit_in<TDecoder>::gamma() BMNOEXCEPT
{
unsigned acc = accum_;
unsigned used = used_bits_;
// ----------------------------------------------------------------------
template<class TDecoder>
-unsigned bit_in<TDecoder>::get_bits(unsigned count)
+unsigned bit_in<TDecoder>::get_bits(unsigned count) BMNOEXCEPT
{
BM_ASSERT(count);
const unsigned maskFF = ~0u;
#ifndef FORMATGUESS__HPP
#define FORMATGUESS__HPP
-/* $Id: format_guess.hpp 596735 2019-11-12 16:36:21Z ludwigf $
+/* $Id: format_guess.hpp 612523 2020-07-23 11:23:30Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
eGffAugustus = 34, ///< GFFish output of Augustus Gene Prediction
eJSON = 35, ///< JSON
ePsl = 36, ///< PSL alignment format
+ // The following formats are not yet recognized by CFormatGuess - CXX-10039
+ eAltGraphX = 37,
+ eBed5FloatScore = 38,
+ eBedGraph = 39,
+ eBedRnaElements = 40,
+ eBigBarChart = 41,
+ eBigBed = 42,
+ eBigPsl = 43,
+ eBigChain = 44,
+ eBigMaf = 45,
+ eBigWig = 46,
+ eBroadPeak = 47,
+ eChain = 48,
+ eClonePos = 49,
+ eColoredExon = 50,
+ eCtgPos = 51,
+ eDownloadsOnly = 52,
+ eEncodeFiveC = 53,
+ eExpRatio = 54,
+ eFactorSource = 55,
+ eGenePred = 56,
+ eLd2 = 57,
+ eNarrowPeak = 58,
+ eNetAlign = 59,
+ ePeptideMapping = 60,
+ eRmsk = 61,
+ eSnake = 62,
+ eVcfTabix = 63,
+ eWigMaf = 64,
+
+ // The following formats *are* recognized by CFormatGuess:
+ eFlatFileGenbank = 65,
+ eFlatFileEna = 66,
+ eFlatFileUniProt = 67,
+
+ // *** Adding new format codes? ***
+ // (1) A sanity check in the implementation depends on the format codes being
+ // consecutive. Hence no gaps allowed!
+ // (2) Heed the warning above about never changing an already existing
+ // format code!
+ // (3) You must provide a display name for the new format. Do that in
+ // sm_FormatNames.
+ // (4) You must add your new format to sm_CheckOrder (unless you don't want your
+ // format actually being checked and recognized.
+
/// Max value of EFormat
eFormat_max
};
eThrowOnBadSource, ///< Throw an exception if the data source (stream, file) can't be read
};
+ static bool IsSupportedFormat(EFormat format);
+
/// Hints for guessing formats. Two hint types can be used: preferred and
/// disabled. Preferred are checked before any other formats. Disabled
/// formats are not checked at all.
~CFormatGuess();
+
NCBI_DEPRECATED EFormat GuessFormat(EMode);
NCBI_DEPRECATED bool TestFormat(EFormat, EMode);
bool TestFormatJson(EMode);
bool TestFormatPsl(EMode);
+ bool TestFormatFlatFileGenbank(EMode);
+ bool TestFormatFlatFileEna(EMode);
+ bool TestFormatFlatFileUniProt(EMode);
+
bool IsInputRepeatMaskerWithoutHeader();
bool IsInputRepeatMaskerWithHeader();
bool x_IsBlankOrNumbers(const string& testString) const;
// data:
- static const char* const sm_FormatNames[eFormat_max];
+ using NAME_MAP = map<EFormat, const char*>;
+ static const NAME_MAP sm_FormatNames;
bool x_TryProcessCLUSTALSeqData(const string& line, string& id, size_t& seg_length) const;
bool x_LooksLikeCLUSTALConservedInfo(const string& line) const;
protected:
- static int s_CheckOrder[];
+ static vector<int> sm_CheckOrder;
static const streamsize s_iTestBufferGranularity = 8096;
+
CNcbiIstream& m_Stream;
bool m_bOwnsStream;
char* m_pTestBuffer;
#ifndef UTIL__LIMITED_SIZE_MAP__HPP
#define UTIL__LIMITED_SIZE_MAP__HPP
-/* $Id: limited_size_map.hpp 402322 2013-06-06 17:13:46Z vasilche $
+/* $Id: limited_size_map.hpp 612734 2020-07-27 11:38:33Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
return !(*this == a);
}
};
-
+
+ explicit
limited_size_map(size_t size_limit = 0)
: m_SizeLimit(size_limit)
{
shift
# Run test
-"$inspxe" -collect mi3 -knob detect-leaks-on-exit=false -knob enable-memory-growth-detection=false -knob enable-on-demand-leak-detection=false -knob still-allocated-memory=false -knob detect-resource-leaks=false -knob stack-depth=32 -result-dir $rd -return-app-exitcode -suppression-file "$suppress_dir" -- $exe "$@"
+"$inspxe" -collect mi3 -knob detect-leaks-on-exit=false -knob enable-memory-growth-detection=false -knob enable-on-demand-leak-detection=false -knob still-allocated-memory=false -knob detect-resource-leaks=false -knob stack-depth=16 -result-dir $rd -return-app-exitcode -suppression-file "$suppress_dir" -- $exe "$@"
app_result=$?
sleep 5
if test ! -d $rd; then
race:^ncbi::CDiagContext::ApproveMessage(ncbi::SDiagMessage&,
+# ncbi_url.cpp default encoder. Leave as is. CXX-10543
+race:^ncbi::CSafeStatic<ncbi::CDefaultUrlEncoder, ncbi::CSafeStatic_Callbacks<ncbi::CDefaultUrlEncoder> >::x_Init()$
+
################################################################
# ivanov
echo "[`date`]"
-svn_location=`echo '$URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.10.1/c++/scripts/common/impl/install.sh $' | sed "s%\\$[U]RL: *\\([^$][^$]*\\) \\$.*%\\1%"`
+svn_location=`echo '$URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.11.0/c++/scripts/common/impl/install.sh $' | sed "s%\\$[U]RL: *\\([^$][^$]*\\) \\$.*%\\1%"`
svn_revision=`echo '$Revision: 429376 $' | sed "s%\\$[R]evision: *\\([^$][^$]*\\) \\$.*%\\1%"`
script_name=`basename $0`
#
# Filename: Manifest
#
-# $Id: Manifest 598028 2019-12-03 15:46:49Z camacho $
+# $Id: Manifest 615066 2020-08-26 16:41:29Z fongah2 $
#
# Author: Christiam Camacho
#
# that. The build-root is needed so that rpmbuild can find the proper directories
# to copy the binaries from
Linux64-Centos : icc : ICC.sh 1900 --with-static --without-dll --with-bin-release --with-strip --without-debug --without-pcre --with-mt --with-openmp --with-flat-makefile --with-experimental=Int8GI --without-vdb --without-gnutls --without-gcrypt <ENV>OPENMP_FLAGS='-qopenmp -qopenmp-link=static';LDFLAGS=-Wl,--as-needed</ENV>
+
#Linux64-Centos : gcc : GCC.sh --with-static --without-dll --with-bin-release --with-strip --without-debug --without-pcre --with-mt --with-openmp --with-flat-makefile --with-experimental=Int8GI --without-vdb --without-gnutls --without-gcrypt
#Linux64-Centos : gcc-debug : GCC.sh --with-strip --with-debug --without-dll --without-pcre --with-mt --with-openmp --with-flat-makefile --with-experimental=Int8GI --without-vdb --without-gnutls --without-gcrypt
dbase 24.0
web 24.0
objects 24.0
-objtools 24.0
-algo 24.0
-app 24.0
+objtools 24.1
+algo 24.1
+app 24.1
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
use strict;
+use warnings;
my $inputfile=shift (@ARGV);
open(in_handle, $inputfile);
-[^gi].*/app
-[^g].*/unit_test
-algo/ms
+-internal/gbench/app/msaviewer
+-internal/gbench/app/sviewer
+-internal/gbench/app/treeview
+-internal/gbench/app/uud
+Release 4.42.1 cloned from 4.41.0 (2020-09-21)
+
+ * NetSchedule: cannot start with string to unsigned int conversion
+ (CXX-11350)
+ * NetSchedule: update program name when a client changes its session
+ (CXX-11283)
+
Release 4.42.0 cloned from 4.41.0 (2020-02-11)
* NetSchedule: extend DUMP command (CXX-10344)
+Release 1.10.0 (2020-07-10)
+
+ * PSG: add anti recursion flag (CXX-11438)
+ * PSG: create and use a high level Reply object at the very beginning
+ (CXX-11425)
+ * PSG server MaxDebug configuration segfault (CXX-11402)
+ * PSG server get_na processor filter (CXX-11401)
+ * PSG: incorrect handling of the CHttpReply<> instance (CXX-11397)
+ * PSG - add API to retrieve biodata from other (non-Cassandra/LMDB) sources
+ (CXX-11312)
+
Release 1.9.0 (2020-04-09)
* Updated libuv 1.35.0, datastax 2.15.1, lmdb 0.9.24 (CXX-11268)
app$
app/pubseq_gateway$
app/pubseq_gateway/server$
+app/pubseq_gateway/server/test$
+app/pubseq_gateway/server/test/input$
+app/pubseq_gateway/server/integrationsmoketest$
+app/pubseq_gateway/server/integrationsmoketest/input$
+app/pubseq_gateway/server/integrationsmoketest/baseline$
objtools$
objtools/pubseq_gateway$
#############################################################################
-# $Id: CMakeLists.txt 608162 2020-05-12 16:01:31Z blastadm $
+# $Id: CMakeLists.txt 617723 2020-10-06 07:10:56Z blastadm $
#############################################################################
-cmake_minimum_required(VERSION 3.3)
-include(build-system/cmake/CMakeLists.top_builddir.txt)
+cmake_minimum_required(VERSION 3.7)
+if ("${PROJECT_NAME}" STREQUAL "")
+ project(ncbi_cpp)
+endif()
+include(${CMAKE_CURRENT_LIST_DIR}/build-system/cmake/CMakeLists.top_builddir.txt)
-# $Id: Makefile.in 608162 2020-05-12 16:01:31Z blastadm $
+# $Id: Makefile.in 617723 2020-10-06 07:10:56Z blastadm $
# Master (top-level) makefile for all NCBI C++ projects
##################################################################
#############################################################################
-# $Id: CMakeLists.xblast.lib.txt 594157 2019-09-30 18:28:48Z gouriano $
+# $Id: CMakeLists.xblast.lib.txt 615334 2020-08-31 15:35:33Z fukanchi $
#############################################################################
set(SRC_BLAST_CXX_CORE
deltablast
magicblast_options
magicblast
+ blast_node
+ blast_usage_report
)
-# $Id: Makefile.xblast.lib 553565 2017-12-18 22:23:38Z fongah2 $
+# $Id: Makefile.xblast.lib 615334 2020-08-31 15:35:33Z fukanchi $
include $(srcdir)/../core/Makefile.blast.lib
deltablast_options \
deltablast \
magicblast_options \
-magicblast
+magicblast \
+blast_node \
+blast_usage_report
SRC = $(SRC_C:%=.core_%) $(SRC_CXX)
-/* $Id: blast_aux.cpp 519527 2016-11-16 14:19:45Z camacho $
+/* $Id: blast_aux.cpp 615182 2020-08-28 04:28:48Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
top_bh.GetSeqMap().CanResolveRange(&*scope, sel);
}
+void CBlastAppDiagHandler::Post(const SDiagMessage & mess)
+{
+ if(m_handler != NULL) {
+ m_handler->Post(mess);
+ }
+ if(m_save) {
+ CRef<CBlast4_error> d(new CBlast4_error);
+ string m;
+ mess.Write(m);
+ d->SetMessage(NStr::Sanitize(m));
+ d->SetCode((int)mess.m_Severity);
+ {
+ DEFINE_STATIC_MUTEX(mx);
+ CMutexGuard guard(mx);
+ m_messages.push_back(d);
+ }
+ }
+}
+
+void CBlastAppDiagHandler::ResetMessages()
+{
+ DEFINE_STATIC_MUTEX(mx);
+ CMutexGuard guard(mx);
+ m_messages.clear();
+}
+
+CBlastAppDiagHandler::~CBlastAppDiagHandler()
+{
+ if(m_handler) {
+ SetDiagHandler(m_handler);
+ m_handler = NULL;
+ }
+}
+
+void CBlastAppDiagHandler::DoNotSaveMessages(void)
+{
+ m_save = false;
+ ResetMessages();
+}
+
+
END_SCOPE(blast)
END_NCBI_SCOPE
--- /dev/null
+/* $Id:
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file blast_node.cpp
+ * BLAST node api
+ */
+
+#include <ncbi_pch.hpp>
+#include <corelib/ncbiapp.hpp>
+#include <algo/blast/api/remote_blast.hpp>
+#include <algo/blast/blastinput/blast_fasta_input.hpp>
+#include <algo/blast/api/blast_node.hpp>
+
+#if defined(NCBI_OS_UNIX)
+#include <unistd.h>
+#endif
+
+#ifndef SKIP_DOXYGEN_PROCESSING
+USING_NCBI_SCOPE;
+USING_SCOPE(blast);
+USING_SCOPE(objects);
+#endif
+
+void CBlastNodeMailbox::SendMsg(CRef<CBlastNodeMsg> msg)
+{
+ CFastMutexGuard guard(m_Mutex);
+ m_MsgQueue.push_back(msg);
+ m_Notify.SignalSome();
+}
+
+CBlastNode::CBlastNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+ CBlastAppDiagHandler & bah, EProgram blast_program,
+ int query_index, int num_queries, CBlastNodeMailbox * mailbox):
+ m_NodeNum(node_num), m_NcbiArgs(ncbi_args), m_Args(args),
+ m_Bah(bah), m_BlastProgram(blast_program),
+ m_QueryIndex(query_index), m_NumOfQueries(num_queries),
+ m_QueriesLength(0)
+{
+ if(mailbox != NULL) {
+ m_Mailbox.Reset(mailbox);
+ }
+ string p("Query ");
+ p+=NStr::IntToString(query_index) + "-" + NStr::IntToString(query_index + num_queries -1);
+ m_NodeIdStr = p;
+}
+
+CBlastNode::~CBlastNode () {
+ if(m_Mailbox.NotEmpty()) {
+ m_Mailbox.Reset();
+ }
+}
+
+void CBlastNode::SendMsg(CBlastNodeMsg::EMsgType msg_type, void* ptr)
+{
+ if (m_Mailbox.NotEmpty()) {
+ CRef<CBlastNodeMsg> m( new CBlastNodeMsg(msg_type, ptr));
+ m_Mailbox->SendMsg(m);
+ }
+}
+
+CBlastMasterNode::CBlastMasterNode(CNcbiOstream & out_stream, int num_threads):
+ m_OutputStream(out_stream), m_MaxNumThreads(num_threads), m_MaxNumNodes(num_threads + 2),
+ m_NumErrStatus(0), m_NumQueries(0), m_QueriesLength(0)
+{
+ m_StopWatch.Start();
+}
+
+void
+CBlastMasterNode::x_WaitForNewEvent()
+{
+ CFastMutexGuard guard(m_Mutex);
+ m_NewEvent.WaitForSignal(m_Mutex);
+}
+
+void
+CBlastMasterNode::RegisterNode(CBlastNode * node, CBlastNodeMailbox * mailbox)
+{
+ if(node == NULL) {
+ NCBI_THROW(CBlastException, eInvalidArgument, "Empty Node" );
+ }
+ if(mailbox == NULL) {
+ NCBI_THROW(CBlastException, eInvalidArgument, "Empty mailbox" );
+ }
+ if(mailbox->GetNodeNum() != node->GetNodeNum()) {
+ NCBI_THROW(CBlastException, eCoreBlastError, "Invalid mailbox node number" );
+ }
+ {
+ CFastMutexGuard guard(m_Mutex);
+ int node_num = node->GetNodeNum();
+ if ((m_PostOffice.find(node_num) != m_PostOffice.end()) ||
+ (m_RegisteredNodes.find(node_num) != m_RegisteredNodes.end())){
+ NCBI_THROW(CBlastException, eInvalidArgument, "Duplicate chunk num" );
+ }
+ m_PostOffice[node_num]= mailbox;
+ m_RegisteredNodes[node_num] = node;
+ }
+}
+
+bool CBlastMasterNode::Processing()
+{
+ NON_CONST_ITERATE(TPostOffice, itr, m_PostOffice) {
+ if(itr->second->GetNumMsgs() > 0) {
+ CRef<CBlastNodeMsg> msg = itr->second->ReadMsg();
+ int chunk_num = itr->first;
+ if (msg.NotEmpty()) {
+ switch (msg->GetMsgType()) {
+ case CBlastNodeMsg::eRunRequest:
+ {
+ if ((int) m_ActiveNodes.size() < m_MaxNumThreads) {
+ CBlastNode * n = (CBlastNode *) msg->GetMsgBody();
+ if(n != NULL) {
+ double start_time = m_StopWatch.Elapsed();
+ n->Run();
+ pair< int, double > p(chunk_num, start_time);
+ m_ActiveNodes.insert(p);
+ CRef<CBlastNodeMsg> empty_msg;
+ pair<int,CRef<CBlastNodeMsg> > m(chunk_num, empty_msg);
+ m_FormatQueue.insert(m);
+ _TRACE("Starting Chunk # " << chunk_num) ;
+ }
+ else {
+ NCBI_THROW(CBlastException, eCoreBlastError, "Invalid mailbox node number" );
+ }
+ }
+ else {
+ itr->second->UnreadMsg(msg);
+ FormatResults();
+ if (IsFull()) {
+ x_WaitForNewEvent();
+ }
+ return true;
+ }
+ break;
+ }
+ case CBlastNodeMsg::ePostResult:
+ case CBlastNodeMsg::eErrorExit:
+ {
+ m_FormatQueue[itr->first] = msg;
+ double diff = m_StopWatch.Elapsed() - m_ActiveNodes[itr->first];
+ m_ActiveNodes.erase(chunk_num);
+ CTimeSpan s(diff);
+ _TRACE("Chunk #" << chunk_num << " completed in " << s.AsSmartString());
+ break;
+ }
+ case CBlastNodeMsg::ePostLog:
+ {
+ break;
+ }
+ default:
+ {
+ NCBI_THROW(CBlastException, eCoreBlastError, "Invalid node message type");
+ break;
+ }
+ }
+ }
+ }
+ }
+ FormatResults();
+ return IsActive();
+}
+
+void CBlastMasterNode::FormatResults()
+{
+ TFormatQueue::iterator itr= m_FormatQueue.begin();
+
+ while (itr != m_FormatQueue.end()){
+ CRef<CBlastNodeMsg> msg(itr->second);
+ if(msg.Empty()) {
+ break;
+ }
+ CBlastNode * n = (CBlastNode *) msg->GetMsgBody();
+ if(n == NULL) {
+ string err_msg = "Empty formatting msg for chunk num # " + NStr::IntToString(itr->first);
+ NCBI_THROW(CBlastException, eCoreBlastError, err_msg);
+ }
+ int node_num = n->GetNodeNum();
+ if (msg->GetMsgType() == CBlastNodeMsg::ePostResult) {
+ string results;
+ n->GetBlastResults(results);
+ if (results != kEmptyStr) {
+ m_OutputStream << results;
+ }
+ }
+ else if (msg->GetMsgType() == CBlastNodeMsg::eErrorExit) {
+ m_NumErrStatus++;
+ ERR_POST("Chunk # " << node_num << " exit with error (" << n->GetStatus() << ")");
+ }
+ else {
+ NCBI_THROW(CBlastException, eCoreBlastError, "Invalid msg type");
+ }
+ m_NumQueries += n->GetNumOfQueries();
+ m_QueriesLength += n->GetQueriesLength();
+ n->Detach();
+ m_PostOffice.erase(node_num);
+ m_RegisteredNodes.erase(node_num);
+
+ itr++;
+ }
+
+ if (itr != m_FormatQueue.begin()) {
+ m_FormatQueue.erase(m_FormatQueue.begin(), itr);
+ }
+}
+
+int CBlastMasterNode::IsFull()
+{
+ TRegisteredNodes::reverse_iterator rr = m_RegisteredNodes.rbegin();
+ TActiveNodes::reverse_iterator ra = m_ActiveNodes.rbegin();
+ unsigned int in_buffer = m_MaxNumThreads;
+ if ((!m_RegisteredNodes.empty()) && (!m_ActiveNodes.empty())) {
+ in_buffer = rr->first - ra->first;
+ }
+ return ((int) (m_ActiveNodes.size() + in_buffer) >= m_MaxNumNodes);
+}
+
+
+bool s_IsSeqID(string & line)
+{
+ static const int kMainAccSize = 32;
+ size_t digit_pos = line.find_last_of("0123456789|", kMainAccSize);
+ if (digit_pos != NPOS) {
+ return true;
+ }
+
+ return false;
+}
+
+int
+CBlastNodeInputReader::GetQueryBatch(string & queries, int & query_no)
+{
+ CNcbiOstrstream ss;
+ int q_size = 0;
+ int q_count = 0;
+ queries.clear();
+ query_no = -1;
+
+ while ( !AtEOF()) {
+ string line = NStr::TruncateSpaces_Unsafe(*++(*this), NStr::eTrunc_Begin);
+ if (line.empty()) {
+ continue;
+ }
+ char c =line[0];
+ if (c == '!' || c == '#' || c == ';') {
+ continue;
+ }
+ bool isId = s_IsSeqID(line);
+ if ( isId || ( c == '>' )) {
+ if (q_size >= m_QueryBatchSize) {
+ UngetLine();
+ break;
+ }
+ q_count ++;
+ }
+ if (c != '>') {
+ q_size += isId? m_EstAvgQueryLength : line.size();
+ }
+ ss << line << endl;
+ }
+ ss << std::ends;
+ ss.flush();
+ if (q_count > 0){
+ queries = ss.str();
+ query_no = m_QueryCount +1;
+ m_QueryCount +=q_count;
+ }
+ return q_count;
+}
--- /dev/null
+/* $Id:
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file blast_usage_report.cpp
+ * BLAST usage report api
+ */
+
+#include <ncbi_pch.hpp>
+#include <algo/blast/api/blast_usage_report.hpp>
+#include <corelib/ncbifile.hpp>
+
+#ifndef SKIP_DOXYGEN_PROCESSING
+USING_NCBI_SCOPE;
+USING_SCOPE(blast);
+#endif
+
+static const string kNcbiAppName="standalone-blast";
+static const string kIdFile="/sys/class/dmi/id/sys_vendor";
+
+void CBlastUsageReport::x_CheckRunEnv()
+{
+ char * blast_docker = getenv("BLAST_DOCKER");
+ if(blast_docker != NULL){
+ AddParam(eDocker, true);
+ }
+
+ CFile id_file(kIdFile);
+ if(id_file.Exists()){
+ CNcbiIfstream s(id_file.GetPath().c_str(), IOS_BASE::in);
+ string line;
+ NcbiGetlineEOL(s, line);
+ NStr::ToUpper(line);
+ if (line.find("GOOGLE") != NPOS) {
+ AddParam(eGCP, true);
+ }
+ else if (line.find("AMAZON")!= NPOS){
+ AddParam(eAWS, true);
+ }
+ }
+
+ char* elb_job_id = getenv("BLAST_ELB_JOB_ID");
+ if(elb_job_id != NULL){
+ string j_id(elb_job_id);
+ AddParam(eELBJobId, j_id);
+ }
+ char* elb_batch_num = getenv("BLAST_ELB_BATCH_NUM");
+ if(elb_batch_num != NULL){
+ int bn = NStr::StringToInt(CTempString(elb_batch_num), NStr::fConvErr_NoThrow);
+ AddParam(eELBBatchNum, bn);
+ }
+}
+
+CBlastUsageReport::CBlastUsageReport()
+{
+ x_CheckBlastUsageEnv();
+ AddParam(eApp, kNcbiAppName);
+ x_CheckRunEnv();
+}
+
+CBlastUsageReport::~CBlastUsageReport()
+{
+ if (IsEnabled()) {
+ Send(m_Params);
+ Wait();
+ Finish();
+ }
+}
+
+string CBlastUsageReport::x_EUsageParmsToString(EUsageParams p)
+{
+ string retval;
+ switch (p) {
+ case eApp: retval.assign("ncbi_app"); break;
+ case eVersion: retval.assign("version"); break;
+ case eProgram: retval.assign("program"); break;
+ case eTask: retval.assign("task"); break;
+ case eExitStatus: retval.assign("exit_status"); break;
+ case eRunTime: retval.assign("run_time"); break;
+ case eDBName: retval.assign("db_name"); break;
+ case eDBLength: retval.assign("db_length"); break;
+ case eDBNumSeqs: retval.assign("db_num_seqs"); break;
+ case eDBDate: retval.assign("db_date"); break;
+ case eBl2seq: retval.assign("bl2seq"); break;
+ case eNumSubjects: retval.assign("num_subjects"); break;
+ case eSubjectsLength: retval.assign("subjects_length"); break;
+ case eNumQueries: retval.assign("num_queries"); break;
+ case eTotalQueryLength: retval.assign("queries_length"); break;
+ case eEvalueThreshold: retval.assign("evalue_threshold"); break;
+ case eNumThreads: retval.assign("num_threads"); break;
+ case eHitListSize: retval.assign("hitlist_size"); break;
+ case eOutputFmt: retval.assign("output_fmt"); break;
+ case eTaxIdList: retval.assign("taxidlist"); break;
+ case eNegTaxIdList: retval.assign("negative_taxidlist"); break;
+ case eGIList: retval.assign("gilist"); break;
+ case eNegGIList: retval.assign("negative_gilist"); break;
+ case eSeqIdList: retval.assign("seqidlist"); break;
+ case eNegSeqIdList: retval.assign("negative_seqidlist"); break;
+ case eIPGList: retval.assign("ipglist"); break;
+ case eNegIPGList: retval.assign("negative_ipglist"); break;
+ case eMaskAlgo: retval.assign("mask_algo"); break;
+ case eCompBasedStats: retval.assign("comp_based_stats"); break;
+ case eRange: retval.assign("range"); break;
+ case eMTMode: retval.assign("mt_mode"); break;
+ case eNumQueryBatches: retval.assign("num_query_batches"); break;
+ case eNumErrStatus: retval.assign("num_error_status"); break;
+ case ePSSMInput: retval.assign("pssm_input"); break;
+ case eConverged: retval.assign("converged"); break;
+ case eArchiveInput: retval.assign("archive"); break;
+ case eRIDInput: retval.assign("rid"); break;
+ case eDBInfo: retval.assign("db_info"); break;
+ case eDBTaxInfo: retval.assign("db_tax_info"); break;
+ case eDBEntry: retval.assign("db_entry"); break;
+ case eDBDumpAll: retval.assign("db_entry_all"); break;
+ case eDBType: retval.assign("db_type"); break;
+ case eInputType: retval.assign("input_type"); break;
+ case eParseSeqIDs: retval.assign("parse_seqids"); break;
+ case eSeqType: retval.assign("seq_type"); break;
+ case eDBTest: retval.assign("db_test"); break;
+ case eDBAliasMode: retval.assign("db_alias_mode"); break;
+ case eDocker: retval.assign("docker"); break;
+ case eGCP: retval.assign("gcp"); break;
+ case eAWS: retval.assign("aws"); break;
+ case eELBJobId: retval.assign("elb_job_id"); break;
+ case eELBBatchNum: retval.assign("elb_batch_num"); break;
+ default:
+ LOG_POST(Warning <<"Invalid usage params: " << (int)p);
+ abort();
+ break;
+ }
+ return retval;
+}
+
+void CBlastUsageReport::AddParam(EUsageParams p, int val)
+{
+ if (IsEnabled()){
+ string t = x_EUsageParmsToString(p);
+ m_Params.Add(t, NStr::IntToString(val));
+ }
+}
+
+void CBlastUsageReport::AddParam(EUsageParams p, const string & val)
+{
+ if (IsEnabled()) {
+ string t = x_EUsageParmsToString(p);
+ m_Params.Add(t, val);
+ }
+}
+
+void CBlastUsageReport::AddParam(EUsageParams p, const double & val)
+{
+ if (IsEnabled()) {
+ string t = x_EUsageParmsToString(p);
+ m_Params.Add(t, val);
+ }
+}
+
+void CBlastUsageReport::x_CheckBlastUsageEnv()
+{
+ char * blast_usage_env = getenv("BLAST_USAGE_REPORT");
+ if(blast_usage_env != NULL){
+ bool enable = NStr::StringToBool(blast_usage_env);
+ if (!enable) {
+ SetEnabled(false);
+ CUsageReportAPI::SetEnabled(false);
+ LOG_POST(Info <<"Phone home disabled");
+ return ;
+ }
+ }
+
+ CNcbiIstrstream empty_stream(kEmptyCStr);
+ CRef<CNcbiRegistry> registry(new CNcbiRegistry(empty_stream, IRegistry::fWithNcbirc));
+ if (registry->HasEntry("BLAST", "BLAST_USAGE_REPORT")) {
+ bool enable = NStr::StringToBool(registry->Get("BLAST", "BLAST_USAGE_REPORT"));
+ if (!enable) {
+ SetEnabled(false);
+ CUsageReportAPI::SetEnabled(false);
+ LOG_POST(Info <<"Phone home disabled by config setting");
+ return ;
+ }
+ }
+ CUsageReportAPI::SetEnabled(true);
+ SetEnabled(true);
+ LOG_POST(Info <<"Phone home enabled");
+}
+
+void CBlastUsageReport::AddParam(EUsageParams p, Int8 val)
+{
+ if (IsEnabled()) {
+ string t = x_EUsageParmsToString(p);
+ m_Params.Add(t, val);
+ }
+
+}
+
+void CBlastUsageReport::AddParam(EUsageParams p, bool val)
+{
+ if (IsEnabled()) {
+ string t = x_EUsageParmsToString(p);
+ m_Params.Add(t, val);
+ }
+
+}
-/* $Id: blast_input.cpp 550028 2017-10-30 16:49:00Z rackerst $
+/* $Id: blast_input.cpp 615335 2020-08-31 15:36:38Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
retval->AddQuery(q);
}
+ m_NumSeqs +=retval->Size();
+ m_TotalLength += size_read;
_TRACE("Read " << retval->Size() << " queries");
return retval;
}
-/* $Id: cmdline_flags.cpp 605536 2020-04-13 11:07:50Z ivanov $
+/* $Id: cmdline_flags.cpp 615184 2020-08-28 04:29:55Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
const string kArgUnalignedOutput("out_unaligned");
const string kArgUnalignedFormat("unaligned_fmt");
+const string kArgMTMode("mt_mode");
+
END_SCOPE(blast)
END_NCBI_SCOPE
-/* $Id: rpsblast_args.cpp 544441 2017-08-23 11:55:51Z camacho $
+/* $Id: rpsblast_args.cpp 615193 2020-08-28 04:31:11Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
NStr::IntToString(kDfltRpsThreadingMode));
arg_desc.SetConstraint(kArgNumThreads,
new CArgAllowValuesGreaterThanOrEqual(0));
+ arg_desc.AddDefaultKey(kArgMTMode, "int_value",
+ "Multi-thread mode to use in RPS BLAST search:\n "
+ "0 (auto) split by database vols\n "
+ "1 split by queries",
+ CArgDescriptions::eInteger,
+ NStr::IntToString(0));
+ arg_desc.SetConstraint(kArgMTMode,
+ new CArgAllowValuesBetween(0, 1, true));
#endif
arg_desc.SetCurrentGroup("");
}
return blast::GetQueryBatchSize(eRPSBlast, m_IsUngapped, is_remote);
}
+/// Get the input stream
+CNcbiIstream&
+CRPSBlastAppArgs::GetInputStream()
+{
+ return CBlastAppArgs::GetInputStream();
+}
+/// Get the output stream
+CNcbiOstream&
+CRPSBlastAppArgs::GetOutputStream()
+{
+ return CBlastAppArgs::GetOutputStream();
+}
+
+/// Get the input stream
+CNcbiIstream&
+CRPSBlastNodeArgs::GetInputStream()
+{
+ if ( !m_InputStream ) {
+ abort();
+ }
+ return *m_InputStream;
+}
+/// Get the output stream
+CNcbiOstream&
+CRPSBlastNodeArgs::GetOutputStream()
+{
+ return m_OutputStream;
+}
+
+CRPSBlastNodeArgs::CRPSBlastNodeArgs(const string & input)
+{
+ m_InputStream = new CNcbiIstrstream(input.c_str(), input.length());
+}
+
+CRPSBlastNodeArgs::~CRPSBlastNodeArgs()
+{
+ if (m_InputStream) {
+ delete m_InputStream;
+ m_InputStream = NULL;
+ }
+}
+
+int
+CRPSBlastNodeArgs::GetQueryBatchSize() const
+{
+ bool is_remote = (m_RemoteArgs.NotEmpty() && m_RemoteArgs->ExecuteRemotely());
+ return blast::GetQueryBatchSize(eRPSBlast, m_IsUngapped, is_remote);
+}
+
+CRef<CBlastOptionsHandle>
+CRPSBlastNodeArgs::x_CreateOptionsHandle(CBlastOptions::EAPILocality locality,
+ const CArgs& /*args*/)
+{
+ CRef<CBlastOptionsHandle> retval
+ (new CBlastRPSOptionsHandle(locality));
+ return retval;
+}
END_SCOPE(blast)
END_NCBI_SCOPE
-/* $Id: rpstblastn_args.cpp 505234 2016-06-23 13:16:57Z fongah2 $
+/* $Id: rpstblastn_args.cpp 615193 2020-08-28 04:31:11Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <algo/blast/blastinput/rpstblastn_args.hpp>
#include <algo/blast/api/rpstblastn_options.hpp>
#include <algo/blast/blastinput/blast_input_aux.hpp>
+#include <algo/blast/blastinput/rpsblast_args.hpp>
#include <algo/blast/api/version.hpp>
BEGIN_NCBI_SCOPE
arg.Reset(m_FormattingArgs);
m_Args.push_back(arg);
- m_MTArgs.Reset(new CMTArgs(true));
+ m_MTArgs.Reset(new CRPSBlastMTArgs());
arg.Reset(m_MTArgs);
m_Args.push_back(arg);
return blast::GetQueryBatchSize(eRPSTblastn, m_IsUngapped, is_remote);
}
+/// Get the input stream
+CNcbiIstream&
+CRPSTBlastnAppArgs::GetInputStream()
+{
+ return CBlastAppArgs::GetInputStream();
+}
+/// Get the output stream
+CNcbiOstream&
+CRPSTBlastnAppArgs::GetOutputStream()
+{
+ return CBlastAppArgs::GetOutputStream();
+}
+
+/// Get the input stream
+CNcbiIstream&
+CRPSTBlastnNodeArgs::GetInputStream()
+{
+ if ( !m_InputStream ) {
+ abort();
+ }
+ return *m_InputStream;
+}
+/// Get the output stream
+CNcbiOstream&
+CRPSTBlastnNodeArgs::GetOutputStream()
+{
+ return m_OutputStream;
+}
+
+CRPSTBlastnNodeArgs::CRPSTBlastnNodeArgs(const string & input)
+{
+ m_InputStream = new CNcbiIstrstream(input.c_str(), input.length());
+}
+
+CRPSTBlastnNodeArgs::~CRPSTBlastnNodeArgs()
+{
+ if (m_InputStream) {
+ delete m_InputStream;
+ m_InputStream = NULL;
+ }
+}
+
+int
+CRPSTBlastnNodeArgs::GetQueryBatchSize() const
+{
+ bool is_remote = (m_RemoteArgs.NotEmpty() && m_RemoteArgs->ExecuteRemotely());
+ return blast::GetQueryBatchSize(eRPSTblastn, m_IsUngapped, is_remote);
+}
+
+CRef<CBlastOptionsHandle>
+CRPSTBlastnNodeArgs::x_CreateOptionsHandle(CBlastOptions::EAPILocality locality,
+ const CArgs& /*args*/)
+{
+ CRef<CBlastOptionsHandle> retval
+ (new CRPSTBlastnOptionsHandle(locality));
+ return retval;
+}
+
END_SCOPE(blast)
END_NCBI_SCOPE
-/* $Id: blast_engine.c 604741 2020-04-01 15:15:25Z ivanov $
+/* $Id: blast_engine.c 617226 2020-09-28 18:25:19Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#define CONV_NUCL2PROT_COORDINATES(length) (length) / CODON_LENGTH
NCBI_XBLAST_EXPORT const int kBlastMajorVersion = 2;
-NCBI_XBLAST_EXPORT const int kBlastMinorVersion = 10;
-NCBI_XBLAST_EXPORT const int kBlastPatchVersion = 1;
-NCBI_XBLAST_EXPORT const char* kBlastReleaseDate = "June-01-2020";
+NCBI_XBLAST_EXPORT const int kBlastMinorVersion = 11;
+NCBI_XBLAST_EXPORT const int kBlastPatchVersion = 0;
+NCBI_XBLAST_EXPORT const char* kBlastReleaseDate = "Oct-15-2020";
/** Structure to be passed to s_BlastSearchEngineCore, containing pointers
to various preallocated structures and arrays. */
Boolean jumper = (ext_options->ePrelimGapExt == eJumperWithTraceback);
Int4 offset_array_size = GetOffsetArraySize(lookup_wrap);
+ if(phi_lookup) {
+ offset_array_size = PHI_MAX_HIT;
+ }
ASSERT(seq_src);
*aux_struct_ptr = aux_struct = (BlastCoreAuxStruct*)
-/* $Id: blast_kappa.c 605341 2020-04-09 16:06:51Z ivanov $
+/* $Id: blast_kappa.c 616357 2020-09-15 12:19:52Z ivanov $
* ==========================================================================
*
* PUBLIC DOMAIN NOTICE
int tid = 0;
#ifdef _OPENMP
- tid = omp_get_thread_num();
+ if(actual_num_threads > 1) {
+ tid = omp_get_thread_num();
+ }
#endif
seqSrc = seqsrc_tld[tid];
scoringParams = score_params_tld[tid];
if (seqSrc) {
continue;
}
+ if(actual_num_threads > 1) {
#pragma omp critical(intrpt)
- interrupt = TRUE;
+ interrupt = TRUE;
#pragma omp flush(interrupt)
- continue;
+ continue;
+ }
}
if (BlastCompo_EarlyTermination(
if (seqSrc) {
continue;
}
+ if(actual_num_threads > 1) {
#pragma omp critical(intrpt)
- interrupt = TRUE;
+ interrupt = TRUE;
#pragma omp flush(interrupt)
- continue;
+ continue;
+ }
}
query_index = localMatch->query_index;
}
s_MatchingSequenceRelease(&matchingSeq);
BlastCompo_AlignmentsFree(&incoming_aligns, NULL);
- if (*pStatusCode != 0 || !seqSrc) {
+ if ((actual_num_threads > 1) &&
+ (*pStatusCode != 0 || !seqSrc)) {
#pragma omp critical(intrpt)
interrupt = TRUE;
#pragma omp flush(interrupt)
m_SamFormatter.reset(new CBlast_SAM_Formatter(m_Outfile, *m_Scope,
m_CustomOutputFormatSpec, pg));
}
+
+bool s_SetCompBasedStats(EProgram program)
+{
+ if (program == eBlastp || program == eTblastn ||
+ program == ePSIBlast || program == ePSITblastn ||
+ program == eRPSBlast || program == eRPSTblastn ||
+ program == eBlastx || program == eDeltaBlast) {
+ return true;
+ }
+ return false;
+}
+
+void CBlastFormat::LogBlastSearchInfo(CBlastUsageReport & report)
+{
+ if (report.IsEnabled()) {
+ report.AddParam(CBlastUsageReport::eProgram, m_Program);
+ EProgram task = m_Options->GetProgram();
+ string task_str = EProgramToTaskName(task);
+ report.AddParam(CBlastUsageReport::eTask, task_str);
+ report.AddParam(CBlastUsageReport::eEvalueThreshold, m_Options->GetEvalueThreshold());
+ report.AddParam(CBlastUsageReport::eHitListSize, m_Options->GetHitlistSize());
+ report.AddParam(CBlastUsageReport::eOutputFmt, m_FormatType);
+
+ if (s_SetCompBasedStats(task)) {
+ report.AddParam(CBlastUsageReport::eCompBasedStats, m_Options->GetCompositionBasedStats());
+ }
+
+ int num_seqs = 0;
+ for (size_t i = 0; i < m_DbInfo.size(); i++) {
+ num_seqs += m_DbInfo[i].number_seqs;
+ }
+ if( m_IsBl2Seq) {
+ report.AddParam(CBlastUsageReport::eBl2seq, "true");
+ if (m_IsDbScan) {
+ report.AddParam(CBlastUsageReport::eNumSubjects, num_seqs);
+ report.AddParam(CBlastUsageReport::eSubjectsLength, GetDbTotalLength());
+ }
+ else if (m_SeqInfoSrc.NotEmpty()){
+ report.AddParam(CBlastUsageReport::eNumSubjects, (int) m_SeqInfoSrc->Size());
+ int total_subj_length = 0;
+ for (size_t i = 0; i < m_SeqInfoSrc->Size(); i++) {
+ total_subj_length += m_SeqInfoSrc->GetLength(i);
+ }
+ report.AddParam(CBlastUsageReport::eSubjectsLength, total_subj_length);
+ }
+ }
+ else {
+ string dir = kEmptyStr;
+ CFile::SplitPath(m_DbName, &dir);
+ string db_name = m_DbName;
+ if (dir != kEmptyStr) {
+ db_name = m_DbName.substr(dir.length());
+ }
+ report.AddParam(CBlastUsageReport::eDBName, db_name);
+ report.AddParam(CBlastUsageReport::eDBLength, GetDbTotalLength());
+ report.AddParam(CBlastUsageReport::eDBNumSeqs, num_seqs);
+ report.AddParam(CBlastUsageReport::eDBDate, m_DbInfo[0].date);
+ if(m_SearchDb.NotEmpty()){
+ if(m_SearchDb->GetGiList().NotEmpty()) {
+ CRef<CSeqDBGiList> l = m_SearchDb->GetGiList();
+ if (l->GetNumGis()) {
+ report.AddParam(CBlastUsageReport::eGIList, true);
+ }
+ if (l->GetNumSis()){
+ report.AddParam(CBlastUsageReport::eSeqIdList, true);
+ }
+ if (l->GetNumTaxIds()){
+ report.AddParam(CBlastUsageReport::eTaxIdList, true);
+ }
+ if (l->GetNumPigs()) {
+ report.AddParam(CBlastUsageReport::eIPGList, true);
+ }
+ }
+ if(m_SearchDb->GetNegativeGiList().NotEmpty()) {
+ CRef<CSeqDBGiList> l = m_SearchDb->GetNegativeGiList();
+ if (l->GetNumGis()) {
+ report.AddParam(CBlastUsageReport::eNegGIList, true);
+ }
+ if (l->GetNumSis()){
+ report.AddParam(CBlastUsageReport::eNegSeqIdList, true);
+ }
+ if (l->GetNumTaxIds()){
+ report.AddParam(CBlastUsageReport::eNegTaxIdList, true);
+ }
+ if (l->GetNumPigs()) {
+ report.AddParam(CBlastUsageReport::eNegIPGList, true);
+ }
+ }
+ }
+ }
+ }
+}
-/* $Id: version_reference_unit_test.cpp 604741 2020-04-01 15:15:25Z ivanov $
+/* $Id: version_reference_unit_test.cpp 617227 2020-09-28 18:26:44Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
BOOST_AUTO_TEST_CASE(testVersion) {
const int kMajor = 2;
- const int kMinor = 10;
- const int kPatch = 1;
+ const int kMinor = 11;
+ const int kPatch = 0;
blast::CBlastVersion v;
BOOST_REQUIRE_EQUAL(kMajor, v.GetMajor());
BOOST_REQUIRE_EQUAL(kMinor, v.GetMinor());
#############################################################################
-# $Id: CMakeLists.txt 594373 2019-10-03 13:30:50Z gouriano $
+# $Id: CMakeLists.txt 612980 2020-07-30 19:13:50Z ivanov $
#############################################################################
# Include projects from this directory
convert_seq
discrepancy_report
dustmasker
+ flat2asn
formatguess
gap_stats
gi2taxid
-# $Id: Makefile.in 591515 2019-08-16 14:37:05Z ludwigf $
+# $Id: Makefile.in 612980 2020-07-30 19:13:50Z ivanov $
# Miscellaneous applications
#################################
-SUB_PROJ = asn2asn asn2fasta asn2flat asnval asn_cleanup \
+SUB_PROJ = asn2asn asn2fasta asn2flat flat2asn asnval asn_cleanup \
id1_fetch blast convert_seq \
nmer_repeats objmgr gi2taxid netschedule grid netstorage igblast \
winmasker dustmasker segmasker blastdb vecscreen \
srcchk tableval ncbi_encrypt ssub_fork asn_cache magicblast \
multipattern prt2fsm \
pub_report gff_deconcat sub_fuse \
- feat_import
+ feat_import
EXPENDABLE_SUB_PROJ = split_cache wig2table netcache rmblastn dblb tls idfetch pubseq_gateway
#############################################################################
-# $Id: CMakeLists.rpsblast.app.txt 593591 2019-09-20 14:53:34Z gouriano $
+# $Id: CMakeLists.rpsblast.app.txt 615197 2020-08-28 04:31:45Z fukanchi $
#############################################################################
NCBI_begin_app(rpsblast)
- NCBI_sources(rpsblast_app)
+ NCBI_sources(rpsblast_node rpsblast_app)
NCBI_add_definitions(NCBI_MODULE=BLAST)
NCBI_uses_toolkit_libraries(blast_app_util)
NCBI_requires(-Cygwin)
#############################################################################
-# $Id: CMakeLists.rpstblastn.app.txt 593591 2019-09-20 14:53:34Z gouriano $
+# $Id: CMakeLists.rpstblastn.app.txt 615200 2020-08-28 04:32:09Z fukanchi $
#############################################################################
NCBI_begin_app(rpstblastn)
- NCBI_sources(rpstblastn_app)
+ NCBI_sources(rpstblastn_node rpstblastn_app)
NCBI_add_definitions(NCBI_MODULE=BLAST)
NCBI_uses_toolkit_libraries(blast_app_util)
NCBI_requires(-Cygwin)
WATCHERS = camacho madden fongah2
APP = rpsblast
-SRC = rpsblast_app
+SRC = rpsblast_node rpsblast_app
LIB_ = $(BLAST_INPUT_LIBS) $(BLAST_LIBS) xregexp $(PCRE_LIB) $(OBJMGR_LIBS)
LIB = blast_app_util $(LIB_:%=%$(STATIC))
WATCHERS = camacho madden fongah2
APP = rpstblastn
-SRC = rpstblastn_app
+SRC = rpstblastn_node rpstblastn_app
LIB_ = $(BLAST_INPUT_LIBS) $(BLAST_LIBS) xregexp $(PCRE_LIB) $(OBJMGR_LIBS)
LIB = blast_app_util $(LIB_:%=%$(STATIC))
-/* $Id: blast_app_util.cpp 592833 2019-09-09 13:01:28Z fongah2 $
+/* $Id: blast_app_util.cpp 615351 2020-08-31 15:38:53Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
return filename;
}
-
-void CBlastAppDiagHandler::Post(const SDiagMessage & mess)
-{
- if(m_handler != NULL) {
- m_handler->Post(mess);
- }
- if(m_save) {
- CRef<CBlast4_error> d(new CBlast4_error);
- string m;
- mess.Write(m);
- d->SetMessage(NStr::Sanitize(m));
- d->SetCode((int)mess.m_Severity);
- {
- DEFINE_STATIC_MUTEX(mx);
- CMutexGuard guard(mx);
- m_messages.push_back(d);
- }
- }
-}
-
-void CBlastAppDiagHandler::ResetMessages()
-{
- DEFINE_STATIC_MUTEX(mx);
- CMutexGuard guard(mx);
- m_messages.clear();
-}
-
-CBlastAppDiagHandler::~CBlastAppDiagHandler()
-{
- if(m_handler) {
- SetDiagHandler(m_handler);
- m_handler = NULL;
- }
-}
-
-void CBlastAppDiagHandler::DoNotSaveMessages(void)
-{
- m_save = false;
- ResetMessages();
-}
-
void PrintErrorArchive(const CArgs & a, const list<CRef<CBlast4_error> > & msg)
{
try {
}
+void LogQueryInfo(CBlastUsageReport & report, const CBlastInput & q_info)
+{
+ report.AddParam(CBlastUsageReport::eTotalQueryLength, q_info.GetTotalLengthProcessed());
+ report.AddParam(CBlastUsageReport::eNumQueries, q_info.GetNumSeqsProcessed());
+}
+
+
+void LogRPSBlastOptions(blast::CBlastUsageReport & report, const CBlastOptions & opt)
+{
+ report.AddParam(CBlastUsageReport::eProgram, Blast_ProgramNameFromType(opt.GetProgramType()));
+ report.AddParam(CBlastUsageReport::eEvalueThreshold, opt.GetEvalueThreshold());
+ report.AddParam(CBlastUsageReport::eHitListSize, opt.GetHitlistSize());
+ report.AddParam(CBlastUsageReport::eCompBasedStats, opt.GetCompositionBasedStats());
+}
+
+void LogRPSCmdOptions(blast::CBlastUsageReport & report, const CBlastAppArgs & args)
+{
+ if (args.GetBlastDatabaseArgs().NotEmpty() &&
+ args.GetBlastDatabaseArgs()->GetSearchDatabase().NotEmpty() &&
+ args.GetBlastDatabaseArgs()->GetSearchDatabase()->GetSeqDb().NotEmpty()) {
+
+ CRef<CSeqDB> db = args.GetBlastDatabaseArgs()->GetSearchDatabase()->GetSeqDb();
+ string db_name = db->GetDBNameList();
+ int off = db_name.find_last_of(CFile::GetPathSeparator());
+ if (off != -1) {
+ db_name.erase(0, off+1);
+ }
+ report.AddParam(CBlastUsageReport::eDBName, db_name);
+ report.AddParam(CBlastUsageReport::eDBLength, (Int8) db->GetTotalLength());
+ report.AddParam(CBlastUsageReport::eDBNumSeqs, db->GetNumSeqs());
+ report.AddParam(CBlastUsageReport::eDBDate, db->GetDate());
+ }
+
+ if(args.GetFormattingArgs().NotEmpty()){
+ report.AddParam(CBlastUsageReport::eOutputFmt, args.GetFormattingArgs()->GetFormattedOutputChoice());
+ }
+}
END_NCBI_SCOPE
-/* $Id: blast_app_util.hpp 570350 2018-09-07 12:47:53Z fongah2 $
+/* $Id: blast_app_util.hpp 615351 2020-08-31 15:38:53Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <objtools/readers/reader_exception.hpp>
#include <objtools/blast/seqdb_reader/seqdb.hpp>
#include <algo/blast/blastinput/blast_args.hpp>
+#include <algo/blast/blastinput/blast_input.hpp>
#include <objects/blast/Blast4_request.hpp>
#include <algo/blast/api/uniform_search.hpp>
#include <objtools/blast/seqdb_writer/writedb_error.hpp>
#include <algo/blast/format/blastfmtutil.hpp> // for CBlastFormatUtil
#include <algo/blast/blastinput/blast_scope_src.hpp> // for SDataLoaderConfig
+#include <algo/blast/api/blast_usage_report.hpp>
BEGIN_NCBI_SCOPE
string
GetSubjectFile(const CArgs& args);
-/// Class to capture message from diag handler
-class CBlastAppDiagHandler : public CDiagHandler
-{
-public:
- /// Constructor
- CBlastAppDiagHandler():m_handler(GetDiagHandler(true)), m_save (true) {}
- /// Destructor
- ~CBlastAppDiagHandler();
- /// Save and post diag message
- virtual void Post (const SDiagMessage & mess);
- /// Reset messgae buffer, erase all saved message
- void ResetMessages(void);
- /// Call to turn off saving diag message, discard all saved message
- void DoNotSaveMessages(void);
- /// Return list of saved diag messages
- list<CRef<CBlast4_error> > & GetMessages(void) { return m_messages;}
-private :
- CDiagHandler * m_handler;
- list<CRef<CBlast4_error> > m_messages;
- bool m_save;
-};
-
/// Function to print blast archive with only error messages (search failed)
/// to output stream
/// @param a cmdline args [in]
/// Clean up formatter scope and release
void QueryBatchCleanup();
+void LogQueryInfo(blast::CBlastUsageReport & report, const blast::CBlastInput & q_info);
+
+/// Log blast usage opts for rpsblast apps
+void LogRPSBlastOptions(blast::CBlastUsageReport & report, const blast::CBlastOptions & opt);
+void LogRPSCmdOptions(blast::CBlastUsageReport & report, const blast::CBlastAppArgs & args);
+
END_NCBI_SCOPE
#endif /* APP__BLAST_APP_UTIL__HPP */
-/* $Id: blast_formatter.cpp 591152 2019-08-12 11:18:21Z fongah2 $
+/* $Id: blast_formatter.cpp 616875 2020-09-22 13:14:55Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <algo/blast/blastinput/blast_input_aux.hpp>
#include <algo/blast/format/blast_format.hpp>
#include <algo/blast/api/objmgr_query_data.hpp>
+#include <objtools/data_loaders/blastdb/bdbloader_rmt.hpp>
+#include <objtools/data_loaders/genbank/gbloader.hpp>
+#include <objtools/data_loaders/genbank/id2/reader_id2.hpp>
#include "blast_app_util.hpp"
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
- m_LoadFromArchive = false;
+ m_LoadFromArchive = false;
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blast_formatter");
+ }
+ }
+
+ ~CBlastFormatterApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
+
private:
/** @inheritDoc */
virtual void Init();
/// @param scope Scope object to add the sequence data to [in|out]
SSeqLoc x_QueryBioseqToSSeqLoc(const CBioseq& bioseq, CRef<CScope> scope);
+ void x_AddCmdOptions();
+
/// Our link to the NCBI BLAST service
CRef<CRemoteBlast> m_RmtBlast;
/// Tracks whether results come from an archive file.
bool m_LoadFromArchive;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
void CBlastFormatterApp::Init()
return retval;
}
+bool
+s_InitializeSubject(CRef<blast::CBlastDatabaseArgs> db_args,
+ CRef<blast::CBlastOptionsHandle> opts_hndl,
+ CRef<blast::CLocalDbAdapter>& db_adapter,
+ CRef<objects::CScope>& scope)
+{
+ bool isRemote = false;
+ db_adapter.Reset();
+
+ _ASSERT(db_args.NotEmpty());
+ CRef<CSearchDatabase> search_db = db_args->GetSearchDatabase();
+
+ if (scope.Empty()) {
+ scope.Reset(new CScope(*CObjectManager::GetInstance()));
+ }
+
+ CRef<IQueryFactory> subjects;
+ if ( (subjects = db_args->GetSubjects(scope)) ) {
+ _ASSERT(search_db.Empty());
+ char* bl2seq_legacy = getenv("BL2SEQ_LEGACY");
+ if (bl2seq_legacy) {
+ db_adapter.Reset(new CLocalDbAdapter(subjects, opts_hndl, false));
+ }
+ else {
+ db_adapter.Reset(new CLocalDbAdapter(subjects, opts_hndl, true));
+ }
+ } else {
+ _ASSERT(search_db.NotEmpty());
+ try {
+ // Try to open the BLAST database even for remote searches, as if
+ // it is available locally, it will be better to fetch the
+ // sequence data for formatting from this (local) source
+ CRef<CSeqDB> seqdb = search_db->GetSeqDb();
+ db_adapter.Reset(new CLocalDbAdapter(*search_db));
+ scope->AddDataLoader(RegisterOMDataLoader(seqdb), CBlastDatabaseArgs::kSubjectsDataLoaderPriority);
+ LOG_POST(Info <<"Add local loader " << search_db->GetDatabaseName());
+ } catch (const CSeqDBException&) {
+ SetDiagPostLevel(eDiag_Critical);
+ string remote_loader = kEmptyStr;
+ try {
+ db_adapter.Reset(new CLocalDbAdapter(*search_db));
+ remote_loader = CRemoteBlastDbDataLoader::RegisterInObjectManager
+ (*( CObjectManager::GetInstance()),
+ search_db->GetDatabaseName(),
+ search_db->IsProtein() ? CBlastDbDataLoader::eProtein : CBlastDbDataLoader::eNucleotide,
+ true, CObjectManager::eDefault, CBlastDatabaseArgs::kSubjectsDataLoaderPriority)
+ .GetLoader()->GetName();
+ scope->AddDataLoader(remote_loader, CBlastDatabaseArgs::kSubjectsDataLoaderPriority);
+ SetDiagPostLevel(eDiag_Warning);
+ isRemote = true;
+ LOG_POST(Info <<"Remote " << search_db->GetDatabaseName());
+ }
+ catch (CException & e) {
+ SetDiagPostLevel(eDiag_Warning);
+ NCBI_THROW(CException, eUnknown, "Fail to initialize local or remote DB" );
+ }
+ }
+ }
+ try {
+ const int kGenbankLoaderPriority = 99;
+ CRef<CReader> reader(new CId2Reader);
+ reader->SetPreopenConnection(false);
+ string genbank_loader = CGBDataLoader::RegisterInObjectManager
+ (*( CObjectManager::GetInstance()), reader,CObjectManager::eNonDefault).GetLoader()->GetName();
+ scope->AddDataLoader(genbank_loader, kGenbankLoaderPriority);
+ } catch (const CException& e) {
+ LOG_POST(Info << "Failed to add genbank dataloader");
+ // It's ok not to have genbank loader
+ }
+ return isRemote;
+}
+
int CBlastFormatterApp::PrintFormattedOutput(void)
{
int retval = 0;
}
CRef<CLocalDbAdapter> db_adapter;
- InitializeSubject(db_args, opts_handle, true, db_adapter, scope);
+ bool isRemoteLoader = s_InitializeSubject(db_args, opts_handle, db_adapter, scope);
const string kTask = m_RmtBlast->GetTask();
opts.GetQueryGeneticCode(),
opts.GetDbGeneticCode(),
opts.GetSumStatisticsMode(),
- !kRid.empty(),
+ (!kRid.empty() || isRemoteLoader),
filtering_algorithm,
fmt_args.GetCustomOutputFormatSpec(),
kTask == "megablast",
else {
scope->AddScope(*(queries->GetScope(0)));
}
- InitializeSubject(db_args, opts_handle, true, db_adapter, scope);
+ s_InitializeSubject(db_args, opts_handle, db_adapter, scope);
}
}
formatter.PrintEpilog(opts);
}
} CATCH_ALL(status)
+ x_AddCmdOptions();
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
+void CBlastFormatterApp::x_AddCmdOptions()
+{
+ const CArgs & args = GetArgs();
+ if (args[kArgRid].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eRIDInput, args[kArgRid].AsString());
+ }
+ else if (args[kArgArchive].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eArchiveInput, true);
+ }
+
+ if(args["outfmt"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString());
+ }
+}
+
#ifndef SKIP_DOXYGEN_PROCESSING
int main(int argc, const char* argv[] /*, const char* envp[]*/)
-/* $Id: blastn_app.cpp 574693 2018-11-16 17:46:37Z zaretska $
+/* $Id: blastn_app.cpp 615344 2020-08-31 15:37:55Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ }
+ }
+
+ ~CBlastnApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
/** @inheritDoc */
/// This application's command line args
CRef<CBlastnAppArgs> m_CmdLineArgs;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
void CBlastnApp::Init()
/*** Get the query sequence(s) ***/
CRef<CQueryOptionsArgs> query_opts =
m_CmdLineArgs->GetQueryOptionsArgs();
+
SDataLoaderConfig dlconfig =
InitializeQueryDataLoaderConfiguration(query_opts->QueryIsProtein(),
db_adapter);
opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
}
+ LogQueryInfo(m_UsageReport, input);
+ formatter.LogBlastSearchInfo(m_UsageReport);
} CATCH_ALL(status)
if(!bah.GetMessages().empty()) {
const CArgs & a = GetArgs();
PrintErrorArchive(a, bah.GetMessages());
}
+
+ m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
-/* $Id: blastp_app.cpp 574693 2018-11-16 17:46:37Z zaretska $
+/* $Id: blastp_app.cpp 616355 2020-09-15 12:19:36Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ }
+ }
+
+ ~CBlastpApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
/** @inheritDoc */
/// This application's command line args
CRef<CBlastpAppArgs> m_CmdLineArgs;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
void CBlastpApp::Init()
opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
}
+ LogQueryInfo(m_UsageReport, input);
+ formatter.LogBlastSearchInfo(m_UsageReport);
} CATCH_ALL(status)
if(!bah.GetMessages().empty()) {
const CArgs & a = GetArgs();
PrintErrorArchive(a, bah.GetMessages());
}
+
+ m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
-/* $Id: blastx_app.cpp 574693 2018-11-16 17:46:37Z zaretska $
+/* $Id: blastx_app.cpp 615342 2020-08-31 15:37:39Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ }
}
+
+ ~CBlastxApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
+ }
private:
/** @inheritDoc */
virtual void Init();
/// This application's command line args
CRef<CBlastxAppArgs> m_CmdLineArgs;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
void CBlastxApp::Init()
opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
}
+ LogQueryInfo(m_UsageReport, input);
+ formatter.LogBlastSearchInfo(m_UsageReport);
} CATCH_ALL(status)
if(!bah.GetMessages().empty()) {
const CArgs & a = GetArgs();
PrintErrorArchive(a, bah.GetMessages());
}
+ m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
-/* $Id: deltablast_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $
+/* $Id: deltablast_app.cpp 615345 2020-08-31 15:38:03Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ }
+ }
+ ~CDeltaBlastApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
/** @inheritDoc */
CRef<CBlastAncillaryData> m_AncillaryData;
CBlastAppDiagHandler m_bah;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
void CDeltaBlastApp::Init()
opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
}
+ LogQueryInfo(m_UsageReport, input);
+ formatter.LogBlastSearchInfo(m_UsageReport);
} CATCH_ALL(status)
if(!m_bah.GetMessages().empty()) {
const CArgs & a = GetArgs();
PrintErrorArchive(a, m_bah.GetMessages());
}
+ m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
#!/bin/bash
-# $Id: get_species_taxids.sh 588462 2019-06-24 18:46:42Z camacho $
+# $Id: get_species_taxids.sh 617228 2020-09-28 18:26:52Z ivanov $
# ===========================================================================
#
# PUBLIC DOMAIN NOTICE
error_exit "esummary error" $?
fi
+ sed -i 's/,\|{/\n/g' $TMP
grep 'uid\|rank\|division\|scientificname\|commonname' $TMP | \
- grep -v "uids\|genbankdivision" | tr -d '"\|,' | tr -s ' ' | \
+ grep -v "uids\|genbankdivision" | tr '"\|,' " " | tr -s ' ' | \
sed 's/uid/\nTaxid/g;s/name/ name/g' > $OUTPUT
echo -e "\n$NUM_RESULTS matche(s) found.\n" >> $OUTPUT
-#! /usr/bin/perl -w
-# $Id: legacy_blast.pl 195935 2010-06-28 20:32:08Z camacho $
+#! /usr/bin/env perl
+# $Id: legacy_blast.pl 609147 2020-05-27 11:52:21Z ivanov $
# ===========================================================================
#
# PUBLIC DOMAIN NOTICE
} elsif ($application eq "seedtop") {
$cmd = &handle_seedtop(\$print_only);
} elsif ($application =~ /version/) {
- my $revision = '$Revision: 195935 $';
+ my $revision = '$Revision: 609147 $';
$revision =~ s/\$Revision: | \$//g;
print "$0 version $revision\n";
goto CLEAN_UP;
-/* $Id: psiblast_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $
+/* $Id: psiblast_app.cpp 617621 2020-10-05 13:24:26Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ }
+ }
+ ~CPsiBlastApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
/** @inheritDoc */
CConstRef<CBlastAncillaryData> m_AncillaryData;
CBlastAppDiagHandler m_bah;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
void CPsiBlastApp::Init()
retval = x_RunLocalPsiBlastIterations(query, pssm, scope, db_adapter,
opts_hndl, formatter, kNumIterations);
}
+ m_UsageReport.AddParam(CBlastUsageReport::eConverged, retval);
return retval;
}
_TRACE("PSI-BLAST running with FASTA input");
} else {
_TRACE("PSI-BLAST running with PSSM input");
+ m_UsageReport.AddParam(CBlastUsageReport::ePSSMInput, true);
}
/*** Get the formatting options ***/
if (m_CmdLineArgs->ProduceDebugOutput())
opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
+ if(input) {
+ LogQueryInfo(m_UsageReport, *input);
+ }
+
+ formatter.LogBlastSearchInfo(m_UsageReport);
} CATCH_ALL(status)
if(!m_bah.GetMessages().empty()) {
const CArgs & a = GetArgs();
PrintErrorArchive(a, m_bah.GetMessages());
}
+ m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
-/* $Id: rpsblast_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $
+/* $Id: rpsblast_app.cpp 615351 2020-08-31 15:38:53Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <algo/blast/api/objmgr_query_data.hpp>
#include <algo/blast/format/blast_format.hpp>
#include "blast_app_util.hpp"
+#include "rpsblast_node.hpp"
#include <algo/blast/api/rpsblast_local.hpp>
#include <algo/blast/api/rps_aux.hpp>
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ }
+ }
+ ~CRPSBlastApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
/** @inheritDoc */
/** @inheritDoc */
virtual int Run();
+ int x_RunMTBySplitDB();
+ int x_RunMTBySplitQuery();
+
/// This application's command line args
CRef<CRPSBlastAppArgs> m_CmdLineArgs;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
void CRPSBlastApp::Init()
}
int CRPSBlastApp::Run(void)
+{
+ const CArgs& args = GetArgs();
+ if ((args[kArgMTMode].AsInteger() == 0) || (args[kArgNumThreads].AsInteger() <= 1)){
+ return x_RunMTBySplitDB();
+ }
+ else {
+ m_UsageReport.AddParam(CBlastUsageReport::eMTMode, args[kArgMTMode].AsInteger());
+ return x_RunMTBySplitQuery();
+ }
+}
+
+int CRPSBlastApp::x_RunMTBySplitDB(void)
{
int status = BLAST_EXIT_SUCCESS;
CBlastAppDiagHandler bah;
opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
}
+ LogQueryInfo(m_UsageReport, input);
+ formatter.LogBlastSearchInfo(m_UsageReport);
} CATCH_ALL(status)
if(!bah.GetMessages().empty()) {
const CArgs & a = GetArgs();
PrintErrorArchive(a, bah.GetMessages());
}
+ m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
+
+int CRPSBlastApp::x_RunMTBySplitQuery(void)
+{
+ int status = BLAST_EXIT_SUCCESS;
+ CBlastAppDiagHandler bah;
+ int batch_size = 3600;
+
+ char * mt_query_batch_env = getenv("BLAST_MT_QUERY_BATCH_SIZE");
+ if (mt_query_batch_env) {
+ batch_size = NStr::StringToInt(mt_query_batch_env);
+ }
+ cerr << "Batch Size: " << batch_size << endl;
+ // Allow the fasta reader to complain on invalid sequence input
+ SetDiagPostLevel(eDiag_Warning);
+ SetDiagPostPrefix("rpsblast");
+ SetDiagHandler(&bah, false);
+
+ try {
+ const CArgs& args = GetArgs();
+ const int kMaxNumOfThreads = args[kArgNumThreads].AsInteger();
+ CRef<CBlastOptionsHandle> opts_hndl;
+ if(RecoverSearchStrategy(args, m_CmdLineArgs)) {
+ opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
+ }
+ else {
+ opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
+ }
+ if(IsIStreamEmpty(m_CmdLineArgs->GetInputStream())){
+ ERR_POST(Warning << "Query is Empty!");
+ return BLAST_EXIT_SUCCESS;
+ }
+ CNcbiOstream & out_stream = m_CmdLineArgs->GetOutputStream();
+ CBlastMasterNode master_node(out_stream, kMaxNumOfThreads);
+ int chunk_num = 0;
+
+ LogRPSBlastOptions(m_UsageReport, opts_hndl->GetOptions());
+ LogRPSCmdOptions(m_UsageReport, *m_CmdLineArgs);
+ CBlastNodeInputReader input(m_CmdLineArgs->GetInputStream(), batch_size, 360);
+ while (master_node.Processing()) {
+ if (!input.AtEOF()) {
+ if (!master_node.IsFull()) {
+ string qb;
+ int q_index = 0;
+ int num_q = input.GetQueryBatch(qb, q_index);
+ if (num_q > 0) {
+ CBlastNodeMailbox * mb(new CBlastNodeMailbox(chunk_num, master_node.GetBuzzer()));
+ CRPSBlastNode * t(new CRPSBlastNode(chunk_num, GetArguments(), args, bah, qb, q_index, num_q, mb));
+ master_node.RegisterNode(t, mb);
+ chunk_num ++;
+ }
+ }
+ }
+ else {
+ master_node.Shutdown();
+ m_UsageReport.AddParam(CBlastUsageReport::eNumQueries, master_node.GetNumOfQueries());
+ m_UsageReport.AddParam(CBlastUsageReport::eTotalQueryLength, master_node.GetQueriesLength());
+ m_UsageReport.AddParam(CBlastUsageReport::eNumErrStatus, master_node.GetNumErrStatus());
+ m_UsageReport.AddParam(CBlastUsageReport::eNumQueryBatches, chunk_num);
+ }
+ }
+
+ } CATCH_ALL (status)
+
+ if(!bah.GetMessages().empty()) {
+ const CArgs & a = GetArgs();
+ PrintErrorArchive(a, bah.GetMessages());
+ }
+ m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
+ return status;
+
+}
+
#ifndef SKIP_DOXYGEN_PROCESSING
int main(int argc, const char* argv[] /*, const char* envp[]*/)
{
--- /dev/null
+/* $Id:
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file rpsblast_node.cpp
+ * RPSBLAST node api
+ */
+
+#include <ncbi_pch.hpp>
+#include <corelib/ncbiapp.hpp>
+#include <algo/blast/api/local_blast.hpp>
+#include <algo/blast/api/remote_blast.hpp>
+#include <algo/blast/blastinput/blast_fasta_input.hpp>
+#include <algo/blast/blastinput/rpsblast_args.hpp>
+#include <algo/blast/api/objmgr_query_data.hpp>
+#include <algo/blast/format/blast_format.hpp>
+#include "blast_app_util.hpp"
+#include "rpsblast_node.hpp"
+#include <algo/blast/api/rpsblast_local.hpp>
+#include <algo/blast/api/rps_aux.hpp>
+
+#ifndef SKIP_DOXYGEN_PROCESSING
+USING_NCBI_SCOPE;
+USING_SCOPE(blast);
+USING_SCOPE(objects);
+#endif
+
+CRPSBlastNode::CRPSBlastNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+ CBlastAppDiagHandler & bah, const string & input,
+ int query_index, int num_queries, CBlastNodeMailbox * mailbox):
+ CBlastNode(node_num, ncbi_args, args, bah, eRPSBlast, query_index, num_queries, mailbox), m_Input(input)
+{
+ m_CmdLineArgs.Reset(new CRPSBlastNodeArgs(m_Input));
+ SetState(eInitialized);
+ SendMsg(CBlastNodeMsg::eRunRequest, (void*) this);
+}
+
+int CRPSBlastNode::GetBlastResults(string & results)
+{
+ if(GetState() == eDone) {
+ results = CNcbiOstrstreamToString(m_CmdLineArgs->GetOutputStrStream());
+ return GetStatus();
+ }
+ return -1;
+}
+
+CRPSBlastNode::~CRPSBlastNode()
+{
+ m_CmdLineArgs.Reset();
+}
+
+void *
+CRPSBlastNode::Main()
+{
+ int status = BLAST_EXIT_SUCCESS;
+ CBlastAppDiagHandler & bah = GetDiagHandler();
+ SetDiagPostPrefix(GetNodeIdStr().c_str());
+
+ SetState(eRunning);
+ try {
+ const CArgs& args = GetArgs();
+ CRef<CBlastOptionsHandle> opts_hndl;
+ if(RecoverSearchStrategy(args, m_CmdLineArgs)) {
+ opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
+ }
+ else {
+ opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
+ }
+
+ CheckForFreqRatioFile(m_CmdLineArgs->GetBlastDatabaseArgs()->GetDatabaseName(),
+ opts_hndl, true);
+ const CBlastOptions& opt = opts_hndl->GetOptions();
+
+ /*** Initialize the database ***/
+ CRef<CBlastDatabaseArgs> db_args(m_CmdLineArgs->GetBlastDatabaseArgs());
+ CRef<CLocalDbAdapter> db_adapter;
+ CRef<CScope> scope;
+ InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
+ db_adapter, scope);
+ _ASSERT(db_adapter && scope);
+
+ /*** Get the query sequence(s) ***/
+ CRef<CQueryOptionsArgs> query_opts =
+ m_CmdLineArgs->GetQueryOptionsArgs();
+ SDataLoaderConfig dlconfig =
+ InitializeQueryDataLoaderConfiguration(query_opts->QueryIsProtein(),
+ db_adapter);
+ CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
+ query_opts->UseLowercaseMasks(),
+ query_opts->GetParseDeflines(),
+ query_opts->GetRange());
+ CBlastFastaInputSource fasta(m_CmdLineArgs->GetInputStream(), iconfig);
+ CBlastInput input(&fasta, m_CmdLineArgs->GetQueryBatchSize());
+
+ /*** Get the formatting options ***/
+ CRef<CFormattingArgs> fmt_args(m_CmdLineArgs->GetFormattingArgs());
+ bool isArchiveFormat = fmt_args->ArchiveFormatRequested(args);
+ if(!isArchiveFormat) {
+ bah.DoNotSaveMessages();
+ }
+ CBlastFormat formatter(opt, *db_adapter,
+ fmt_args->GetFormattedOutputChoice(),
+ query_opts->GetParseDeflines(),
+ m_CmdLineArgs->GetOutputStream(),
+ fmt_args->GetNumDescriptions(),
+ fmt_args->GetNumAlignments(),
+ *scope,
+ opt.GetMatrixName(),
+ fmt_args->ShowGis(),
+ fmt_args->DisplayHtmlOutput(),
+ opt.GetQueryGeneticCode(),
+ opt.GetDbGeneticCode(),
+ opt.GetSumStatisticsMode(),
+ m_CmdLineArgs->ExecuteRemotely(),
+ db_adapter->GetFilteringAlgorithm(),
+ fmt_args->GetCustomOutputFormatSpec(),
+ false, false, NULL, NULL,
+ GetCmdlineArgs(GetArguments()));
+
+ formatter.SetQueryRange(query_opts->GetRange());
+ formatter.SetLineLength(fmt_args->GetLineLength());
+ if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) {
+ formatter.SetBaseFile(args[kArgOutput].AsString());
+ }
+ formatter.PrintProlog();
+
+ /*** Process the input ***/
+ for (; !input.End(); formatter.ResetScopeHistory(), QueryBatchCleanup()) {
+
+ CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope));
+ CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*query_batch));
+
+ SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
+
+ CRef<CSearchResultSet> results;
+
+ if (m_CmdLineArgs->ExecuteRemotely())
+ {
+ CRef<CRemoteBlast> rmt_blast =
+ InitializeRemoteBlast(queries, db_args, opts_hndl,
+ m_CmdLineArgs->ProduceDebugRemoteOutput(),
+ m_CmdLineArgs->GetClientId());
+ results = rmt_blast->GetResultSet();
+ }
+ else
+ {
+ CLocalRPSBlast local_search (query_batch, db_args->GetDatabaseName(), opts_hndl, 1);
+ results = local_search.Run();
+ }
+
+ if (fmt_args->ArchiveFormatRequested(args)) {
+ formatter.WriteArchive(*queries, *opts_hndl, *results, 0, bah.GetMessages());
+ bah.ResetMessages();
+ } else {
+ BlastFormatter_PreFetchSequenceData(*results, scope,
+ fmt_args->GetFormattedOutputChoice());
+ ITERATE(CSearchResultSet, result, *results) {
+ formatter.PrintOneResultSet(**result, query_batch);
+ }
+ }
+ }
+
+ formatter.PrintEpilog(opt);
+
+ if (m_CmdLineArgs->ProduceDebugOutput()) {
+ opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
+ }
+ SetQueriesLength(input.GetTotalLengthProcessed());
+ } CATCH_ALL(status)
+
+ SetStatus(status);
+ if (status == BLAST_EXIT_SUCCESS) {
+ SetState(eDone);
+ SendMsg(CBlastNodeMsg::ePostResult, (void *) this);
+
+ }
+ else {
+ SetState(eError);
+ SendMsg(CBlastNodeMsg::eErrorExit, (void *) this);
+
+ }
+
+ return NULL;
+}
+
--- /dev/null
+/* $Id:
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file rpsblast_node.hpp
+ * RPSBLAST node api
+ */
+
+#ifndef APP__RPSBLAST_NODE__HPP
+#define APP__RPSBLAST_NODE__HPP
+
+#include <algo/blast/blastinput/rpsblast_args.hpp>
+#include <algo/blast/api/blast_node.hpp>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(blast)
+
+class CRPSBlastNode : public CBlastNode
+{
+public :
+
+ CRPSBlastNode (int check_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+ CBlastAppDiagHandler & bah, const string & input,
+ int query_index, int num_queries, CBlastNodeMailbox * mailbox = NULL);
+ virtual int GetBlastResults(string & results);
+protected:
+ virtual ~CRPSBlastNode(void);
+ virtual void* Main(void);
+private:
+ string m_Input;
+ CRef<CRPSBlastNodeArgs> m_CmdLineArgs;
+};
+
+END_SCOPE(blast)
+END_NCBI_SCOPE
+
+#endif /* APP__RPSBLAST_NODE__HPP */
-/* $Id: rpstblastn_app.cpp 570608 2018-09-12 12:17:57Z fongah2 $
+/* $Id: rpstblastn_app.cpp 615352 2020-08-31 15:39:03Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <algo/blast/api/objmgr_query_data.hpp>
#include <algo/blast/format/blast_format.hpp>
#include "blast_app_util.hpp"
+#include "rpstblastn_node.hpp"
#include <objtools/blast/seqdb_reader/seqdb.hpp>
#include <algo/blast/api/rpsblast_local.hpp>
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ }
+ }
+ ~CRPSTBlastnApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
/** @inheritDoc */
/** @inheritDoc */
virtual int Run();
+ int x_RunMTBySplitDB();
+ int x_RunMTBySplitQuery();
+
/// This application's command line args
CRef<CRPSTBlastnAppArgs> m_CmdLineArgs;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
void CRPSTBlastnApp::Init()
SetupArgDescriptions(m_CmdLineArgs->SetCommandLine());
}
+
int CRPSTBlastnApp::Run(void)
+{
+ const CArgs& args = GetArgs();
+ if ((args[kArgMTMode].AsInteger() == 0) || (args[kArgNumThreads].AsInteger() <= 1)){
+ return x_RunMTBySplitDB();
+ }
+ else {
+ m_UsageReport.AddParam(CBlastUsageReport::eMTMode, args[kArgMTMode].AsInteger());
+ return x_RunMTBySplitQuery();
+ }
+}
+
+int CRPSTBlastnApp::x_RunMTBySplitDB(void)
{
int status = BLAST_EXIT_SUCCESS;
CBlastAppDiagHandler bah;
opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
}
+ LogQueryInfo(m_UsageReport, input);
+ formatter.LogBlastSearchInfo(m_UsageReport);
} CATCH_ALL(status)
if(!bah.GetMessages().empty()) {
const CArgs & a = GetArgs();
PrintErrorArchive(a, bah.GetMessages());
}
+ m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
+ return status;
+}
+
+int CRPSTBlastnApp::x_RunMTBySplitQuery(void)
+{
+ int status = BLAST_EXIT_SUCCESS;
+ CBlastAppDiagHandler bah;
+ int batch_size = 8000;
+
+ char * mt_query_batch_env = getenv("BLAST_MT_QUERY_BATCH_SIZE");
+ if (mt_query_batch_env) {
+ batch_size = NStr::StringToInt(mt_query_batch_env);
+ }
+ cerr << "Batch Size: " << batch_size << endl;
+ // Allow the fasta reader to complain on invalid sequence input
+ SetDiagPostLevel(eDiag_Warning);
+ SetDiagPostPrefix("rpstblastn_mt");
+ SetDiagHandler(&bah, false);
+
+ try {
+ const CArgs& args = GetArgs();
+ const int kMaxNumOfThreads = args[kArgNumThreads].AsInteger();
+ CRef<CBlastOptionsHandle> opts_hndl;
+ if(RecoverSearchStrategy(args, m_CmdLineArgs)) {
+ opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
+ }
+ else {
+ opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
+ }
+ if(IsIStreamEmpty(m_CmdLineArgs->GetInputStream())){
+ ERR_POST(Warning << "Query is Empty!");
+ return BLAST_EXIT_SUCCESS;
+ }
+ CNcbiOstream & out_stream = m_CmdLineArgs->GetOutputStream();
+ CBlastMasterNode master_node(out_stream, kMaxNumOfThreads);
+ int chunk_num = 0;
+
+ LogRPSBlastOptions(m_UsageReport, opts_hndl->GetOptions());
+ LogRPSCmdOptions(m_UsageReport, *m_CmdLineArgs);
+ CBlastNodeInputReader input(m_CmdLineArgs->GetInputStream(), batch_size, 4500);
+ while (master_node.Processing()) {
+ if (!input.AtEOF()) {
+ if (!master_node.IsFull()) {
+ int q_index = 0;
+ string qb;
+ int num_q = input.GetQueryBatch(qb, q_index);
+ if (num_q > 0) {
+ CBlastNodeMailbox * mb(new CBlastNodeMailbox(chunk_num, master_node.GetBuzzer()));
+ CRPSTBlastnNode * t(new CRPSTBlastnNode(chunk_num, GetArguments(), args, bah, qb, q_index, num_q, mb));
+ master_node.RegisterNode(t, mb);
+ chunk_num ++;
+ }
+ }
+ }
+ else {
+ master_node.Shutdown();
+ m_UsageReport.AddParam(CBlastUsageReport::eNumQueries, master_node.GetNumOfQueries());
+ m_UsageReport.AddParam(CBlastUsageReport::eTotalQueryLength, master_node.GetQueriesLength());
+ m_UsageReport.AddParam(CBlastUsageReport::eNumErrStatus, master_node.GetNumErrStatus());
+ m_UsageReport.AddParam(CBlastUsageReport::eNumQueryBatches, chunk_num);
+ }
+
+ }
+
+ } CATCH_ALL (status)
+
+ if(!bah.GetMessages().empty()) {
+ const CArgs & a = GetArgs();
+ PrintErrorArchive(a, bah.GetMessages());
+ }
+ m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
+
#ifndef SKIP_DOXYGEN_PROCESSING
int main(int argc, const char* argv[] /*, const char* envp[]*/)
{
--- /dev/null
+/* $Id:
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file rpstblastn_node.cpp
+ * RPSTBLASTN MT command line application
+ */
+
+#include <ncbi_pch.hpp>
+#include <corelib/ncbiapp.hpp>
+#include <algo/blast/api/local_blast.hpp>
+#include <algo/blast/api/remote_blast.hpp>
+#include <algo/blast/blastinput/blast_fasta_input.hpp>
+#include <algo/blast/blastinput/rpstblastn_args.hpp>
+#include <algo/blast/api/objmgr_query_data.hpp>
+#include <algo/blast/format/blast_format.hpp>
+#include "blast_app_util.hpp"
+#include "rpstblastn_node.hpp"
+#include <algo/blast/api/rpsblast_local.hpp>
+
+#ifndef SKIP_DOXYGEN_PROCESSING
+USING_NCBI_SCOPE;
+USING_SCOPE(blast);
+USING_SCOPE(objects);
+#endif
+
+CRPSTBlastnNode::CRPSTBlastnNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+ CBlastAppDiagHandler & bah, const string & input,
+ int query_index, int num_queries,CBlastNodeMailbox * mailbox):
+ CBlastNode(node_num, ncbi_args, args, bah, eRPSTblastn, query_index, num_queries, mailbox), m_Input(input)
+{
+ m_CmdLineArgs.Reset(new CRPSTBlastnNodeArgs(m_Input));
+ SetState(eInitialized);
+ SendMsg(CBlastNodeMsg::eRunRequest, (void*) this);
+}
+
+int CRPSTBlastnNode::GetBlastResults(string & results)
+{
+ if(GetState() == eDone) {
+ results = CNcbiOstrstreamToString(m_CmdLineArgs->GetOutputStrStream());
+ return GetStatus();
+ }
+ return -1;
+}
+
+CRPSTBlastnNode::~CRPSTBlastnNode()
+{
+ m_CmdLineArgs.Reset();
+}
+
+void *
+CRPSTBlastnNode::Main()
+{
+ int status = BLAST_EXIT_SUCCESS;
+ CBlastAppDiagHandler & bah = GetDiagHandler();
+ SetDiagPostPrefix(GetNodeIdStr().c_str());
+
+ SetState(eRunning);
+ try {
+ const CArgs& args = GetArgs();
+ CRef<CBlastOptionsHandle> opts_hndl;
+ if(RecoverSearchStrategy(args, m_CmdLineArgs)) {
+ opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
+ }
+ else {
+ opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
+ }
+
+ CheckForFreqRatioFile(m_CmdLineArgs->GetBlastDatabaseArgs()->GetDatabaseName(),
+ opts_hndl, true);
+ const CBlastOptions& opt = opts_hndl->GetOptions();
+
+ /*** Initialize the database ***/
+ CRef<CBlastDatabaseArgs> db_args(m_CmdLineArgs->GetBlastDatabaseArgs());
+ CRef<CLocalDbAdapter> db_adapter;
+ CRef<CScope> scope;
+ InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
+ db_adapter, scope);
+ _ASSERT(db_adapter && scope);
+
+ /*** Get the query sequence(s) ***/
+ CRef<CQueryOptionsArgs> query_opts =
+ m_CmdLineArgs->GetQueryOptionsArgs();
+ SDataLoaderConfig dlconfig =
+ InitializeQueryDataLoaderConfiguration(query_opts->QueryIsProtein(),
+ db_adapter);
+ CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
+ query_opts->UseLowercaseMasks(),
+ query_opts->GetParseDeflines(),
+ query_opts->GetRange());
+ CBlastFastaInputSource fasta(m_CmdLineArgs->GetInputStream(), iconfig);
+ CBlastInput input(&fasta, m_CmdLineArgs->GetQueryBatchSize());
+
+ /*** Get the formatting options ***/
+ CRef<CFormattingArgs> fmt_args(m_CmdLineArgs->GetFormattingArgs());
+ bool isArchiveFormat = fmt_args->ArchiveFormatRequested(args);
+ if(!isArchiveFormat) {
+ bah.DoNotSaveMessages();
+ }
+ CBlastFormat formatter(opt, *db_adapter,
+ fmt_args->GetFormattedOutputChoice(),
+ query_opts->GetParseDeflines(),
+ m_CmdLineArgs->GetOutputStream(),
+ fmt_args->GetNumDescriptions(),
+ fmt_args->GetNumAlignments(),
+ *scope,
+ opt.GetMatrixName(),
+ fmt_args->ShowGis(),
+ fmt_args->DisplayHtmlOutput(),
+ opt.GetQueryGeneticCode(),
+ opt.GetDbGeneticCode(),
+ opt.GetSumStatisticsMode(),
+ m_CmdLineArgs->ExecuteRemotely(),
+ db_adapter->GetFilteringAlgorithm(),
+ fmt_args->GetCustomOutputFormatSpec(),
+ false, false, NULL, NULL,
+ GetCmdlineArgs(GetArguments()));
+
+ formatter.SetQueryRange(query_opts->GetRange());
+ formatter.SetLineLength(fmt_args->GetLineLength());
+ if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) {
+ formatter.SetBaseFile(args[kArgOutput].AsString());
+ }
+ formatter.PrintProlog();
+
+ /*** Process the input ***/
+ for (; !input.End(); formatter.ResetScopeHistory(), QueryBatchCleanup()) {
+
+ CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope));
+ CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*query_batch));
+
+ SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
+
+ CRef<CSearchResultSet> results;
+
+ if (m_CmdLineArgs->ExecuteRemotely())
+ {
+ CRef<CRemoteBlast> rmt_blast =
+ InitializeRemoteBlast(queries, db_args, opts_hndl,
+ m_CmdLineArgs->ProduceDebugRemoteOutput(),
+ m_CmdLineArgs->GetClientId());
+ results = rmt_blast->GetResultSet();
+ }
+ else
+ {
+ CLocalRPSBlast local_search (query_batch, db_args->GetDatabaseName(), opts_hndl, 1);
+ results = local_search.Run();
+ }
+
+ if (fmt_args->ArchiveFormatRequested(args)) {
+ formatter.WriteArchive(*queries, *opts_hndl, *results, 0, bah.GetMessages());
+ bah.ResetMessages();
+ } else {
+ BlastFormatter_PreFetchSequenceData(*results, scope,
+ fmt_args->GetFormattedOutputChoice());
+ ITERATE(CSearchResultSet, result, *results) {
+ formatter.PrintOneResultSet(**result, query_batch);
+ }
+ }
+ }
+
+ formatter.PrintEpilog(opt);
+
+ if (m_CmdLineArgs->ProduceDebugOutput()) {
+ opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
+ }
+
+ SetQueriesLength(input.GetTotalLengthProcessed());
+ } CATCH_ALL(status)
+
+ SetStatus(status);
+ if (status == BLAST_EXIT_SUCCESS) {
+ SetState(eDone);
+ SendMsg(CBlastNodeMsg::ePostResult, (void *) this);
+
+ }
+ else {
+ SetState(eError);
+ SendMsg(CBlastNodeMsg::eErrorExit, (void *) this);
+
+ }
+
+ return NULL;
+}
--- /dev/null
+/* $Id:
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Authors: Amelia Fong
+ *
+ */
+
+/** @file rpstblastn_node.hpp
+ * RPSTBLASTN node api
+ */
+
+#ifndef APP__RPSTBLASTN_NODE__HPP
+#define APP__RPSTBLASTN_NODE__HPP
+
+#include <algo/blast/blastinput/rpstblastn_args.hpp>
+#include <algo/blast/api/blast_node.hpp>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(blast)
+
+class CRPSTBlastnNode : public CBlastNode
+{
+public :
+
+ CRPSTBlastnNode (int check_num, const CNcbiArguments & ncbi_args, const CArgs& args,
+ CBlastAppDiagHandler & bah, const string & input,
+ int query_index, int num_queries, CBlastNodeMailbox * mailbox = NULL);
+ virtual int GetBlastResults(string & results);
+protected:
+ virtual ~CRPSTBlastnNode(void);
+ virtual void* Main(void);
+private:
+ string m_Input;
+ CRef<CRPSTBlastnNodeArgs> m_CmdLineArgs;
+};
+
+END_SCOPE(blast)
+END_NCBI_SCOPE
+
+#endif /* APP__RPSTBLASTN_NODE__HPP */
-/* $Id: tblastn_app.cpp 574693 2018-11-16 17:46:37Z zaretska $
+/* $Id: tblastn_app.cpp 616358 2020-09-15 12:19:53Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ }
+ }
+
+ ~CTblastnApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
/** @inheritDoc */
virtual int Run();
/// This application's command line args
CRef<CTblastnAppArgs> m_CmdLineArgs;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
void CTblastnApp::Init()
if (m_CmdLineArgs->ProduceDebugOutput()) {
opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
}
+ if (input) {
+ LogQueryInfo(m_UsageReport, *input);
+ }
+ formatter.LogBlastSearchInfo(m_UsageReport);
} CATCH_ALL(status)
if(!bah.GetMessages().empty()) {
const CArgs & a = GetArgs();
PrintErrorArchive(a, bah.GetMessages());
}
+
+ m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
-/* $Id: tblastx_app.cpp 574693 2018-11-16 17:46:37Z zaretska $
+/* $Id: tblastx_app.cpp 615343 2020-08-31 15:37:47Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ }
+ }
+
+ ~CTblastxApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
/** @inheritDoc */
/// This application's command line args
CRef<CTblastxAppArgs> m_CmdLineArgs;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
void CTblastxApp::Init()
opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
}
+ LogQueryInfo(m_UsageReport, input);
+ formatter.LogBlastSearchInfo(m_UsageReport);
} CATCH_ALL(status)
if(!bah.GetMessages().empty()) {
const CArgs & a = GetArgs();
PrintErrorArchive(a, bah.GetMessages());
}
+ m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
#!/usr/bin/env perl
-# $Id: update_blastdb.pl 608134 2020-05-12 15:44:10Z ivanov $
+# $Id: update_blastdb.pl 608596 2020-05-19 10:56:17Z ivanov $
# ===========================================================================
#
# PUBLIC DOMAIN NOTICE
$|++;
if ($opt_show_version) {
- my $revision = '$Revision: 608134 $';
+ my $revision = '$Revision: 608596 $';
$revision =~ s/\$Revision: | \$//g;
print "$0 version $revision\n";
exit($exit_code);
# Try to auto-detect whether we're on the cloud
if (defined($curl)) {
my $tmpfile = File::Temp->new();
- my $gcp_cmd = "$curl --connect-timeout 1 -sfo $tmpfile -H 'Metadata-Flavor: Google' " . GCP_URL;
- my $aws_cmd = "$curl --connect-timeout 1 -sfo /dev/null " . AMI_URL;
+ my $gcp_cmd = "$curl --connect-timeout 3 --retry 3 --retry-max-time 30 -sfo $tmpfile -H 'Metadata-Flavor: Google' " . GCP_URL;
+ my $aws_cmd = "$curl --connect-timeout 3 --retry 3 --retry-max-time 30 -sfo /dev/null " . AMI_URL;
print "$gcp_cmd\n" if DEBUG;
if (system($gcp_cmd) == 0) {
- # status not always reliable. Chekc that return is all digits.
- my $tmpfile_content = do { local $/; <$tmpfile>};
- print "tempfile: $tmpfile_content\n" if DEBUG;
- if ($tmpfile_content =~ m/^(\d+)$/) {
- $location = "GCP";
- }
+ # status not always reliable. Check that curl output is all digits.
+ my $tmpfile_content = do { local $/; <$tmpfile>};
+ print "curl output $tmpfile_content\n" if DEBUG;
+ $location = "GCP" if ($tmpfile_content =~ m/^(\d+)$/);
+ } elsif (DEBUG) {
+ # Consult https://ec.haxx.se/usingcurl/usingcurl-returns
+ print "curl to GCP metadata server returned ", $?>>8, "\n";
}
+
print "$aws_cmd\n" if DEBUG;
- $location = "AWS" if (system($aws_cmd) == 0);
- print "Loation is $location\n" if DEBUG;
+ if (system($aws_cmd) == 0) {
+ $location = "AWS";
+ } elsif (DEBUG) {
+ # Consult https://ec.haxx.se/usingcurl/usingcurl-returns
+ print "curl to AWS metadata server returned ", $?>>8, "\n";
+ }
+ print "Location is $location\n" if DEBUG;
}
}
if ($location =~ /aws|gcp/i and not defined $curl) {
#############################################################################
-# $Id: CMakeLists.convert2blastmask.app.txt 593591 2019-09-20 14:53:34Z gouriano $
+# $Id: CMakeLists.convert2blastmask.app.txt 615546 2020-09-01 12:05:24Z ivanov $
#############################################################################
NCBI_begin_app(convert2blastmask)
NCBI_sources(convert2blastmask)
- NCBI_uses_toolkit_libraries(blast seqmasks_io)
+ NCBI_uses_toolkit_libraries(blast seqmasks_io xblast)
NCBI_add_definitions(NCBI_MODULE=BLASTDB)
NCBI_project_watchers(camacho fongah2)
NCBI_end_app()
-/* $Id: blastdb_aliastool.cpp 593112 2019-09-12 12:56:14Z fongah2 $
+/* $Id: blastdb_aliastool.cpp 615362 2020-08-31 15:39:55Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdb_aliastool");
+ }
+ }
+ ~CBlastDBAliasApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
/** @inheritDoc */
int x_ConvertSeqIDFile() const;
void x_SeqIDFileInfo() const;
+ void x_AddCmdOptions();
/// Documentation for this program
static const char * const DOCUMENTATION;
}
vector<string> x_GetDbsToAggregate(const string dbs, const string file) const;
void x_AddVDBsToAliasFile( string filename, bool append, string title = kEmptyStr) const;
+
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
const char * const CBlastDBAliasApp::DOCUMENTATION = "\n\n"
}
} CATCH_ALL(status)
+ x_AddCmdOptions();
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
+void CBlastDBAliasApp::x_AddCmdOptions()
+{
+ const CArgs & args = GetArgs();
+ if (args["gi_file_in"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "gi_file_conversion");
+ }
+ else if (args["seqid_file_in"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "seqid_file_conversion");
+ }
+ else if (args["seqid_file_info"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "get_seqid_file_info");
+ }
+
+ if (args["dblist"].HasValue() || args["dblist_file"].HasValue() || args["num_volumes"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_alias_db");
+ }
+ else if (args[kArgDb].HasValue() && args[kArgGiList]){
+ m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_gilist_alias_db");
+ }
+ else if (args[kArgDb].HasValue() && args[kArgSeqIdList]){
+ m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_seqidlist_alias_db");
+ }
+ else if (args[kArgDb].HasValue() && args[kArgTaxIdListFile]) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_taxidlist_alias_db");
+ }
+
+ if (args["vdblist"].HasValue() || args["vdblist_file"].HasValue()) {
+ if (args["dblist"].HasValue() || args["dblist_file"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "add_vdblist");
+ }
+ else {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBAliasMode, (string) "create_vdb_alias_db");
+ }
+ }
+}
+
+
#ifndef SKIP_DOXYGEN_PROCESSING
int main(int argc, const char* argv[] /*, const char* envp[]*/)
-/* $Id: blastdb_convert.cpp 598221 2019-12-05 15:33:01Z fongah2 $
+/* $Id: blastdb_convert.cpp 615364 2020-08-31 15:40:14Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdb_convert");
+ }
+ }
+ ~CBlastdbConvertApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
virtual int Run();
CNcbiOstream * m_LogFile;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
void CBlastdbConvertApp::Init()
-/* $Id: blastdbcheck.cpp 538739 2017-06-13 18:26:55Z rackerst $
+/* $Id: blastdbcheck.cpp 615362 2020-08-31 15:39:55Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdbcheck");
+ }
+ }
+ ~CBlastDbCheckApplication() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
virtual int Run(void);
/** @inheritDoc */
virtual void Exit(void);
+
+ void x_AddCmdOptions();
+
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
status = okay ? 0 : 1;
} CATCH_ALL(status)
+
+ x_AddCmdOptions();
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
+void CBlastDbCheckApplication::x_AddCmdOptions()
+{
+ const CArgs & args = GetArgs();
+ if(args["random"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "random");
+ }
+ else if (args["full"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "full");
+ }
+ else if (args["stride"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "stride");
+ }
+ else if(args["ends"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "end");
+ }
+ else {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBTest, (string) "default");
+ }
+}
+
/////////////////////////////////////////////////////////////////////////////
// Cleanup
-/* $Id: blastdbcmd.cpp 598336 2019-12-06 18:17:01Z merezhuk $
+/* $Id: blastdbcmd.cpp 616873 2020-09-22 13:14:39Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdbcmd");
+ }
+ }
+ ~CBlastDBCmdApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
/** @inheritDoc */
set<Int4> m_TaxIdList;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
+
/// Initializes Blast DB
void x_InitBlastDB();
void x_InitBlastDB_TaxIdList();
void x_PrintBlastDatabaseTaxInformation();
int x_ProcessBatchPig(CBlastDB_Formatter & fmt);
+
+ void x_AddCmdOptions();
};
+
string s_PreProcessAccessionsForDBv5(const string & id)
{
string rv = id;
}
+
bool
CBlastDBCmdApp::x_GetOids(const string & id, vector<int> & oids)
{
ids[i] = s_PreProcessAccessionsForDBv5(ids[i]);
}
}
+ try {
m_BlastDb->AccessionsToOids(ids, oids);
+ }
+ catch (CSeqDBException & e) {
+ if (e.GetMsg().find("DB contains no accession info") == NPOS){
+ NCBI_RETHROW_SAME(e, e.GetMsg());
+ }
+ }
for(unsigned i=0; i < ids.size(); i++) {
if(oids[i] == kSeqDBEntryNotFound) {
Int8 num_id = NStr::StringToNumeric<Int8>(ids[i], NStr::fConvErr_NoThrow);
x_InitBlastDB();
status = x_ProcessSearchRequest();
}
+ x_AddCmdOptions();
} CATCH_ALL(status)
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
+void CBlastDBCmdApp::x_AddCmdOptions()
+{
+ const CArgs & args = GetArgs();
+ if (args["info"]) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBInfo, true);
+ }
+ else if (args["tax_info"]) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBTaxInfo, true);
+ }
+ else if(args[kArgTaxIdList].HasValue() || args[kArgTaxIdListFile].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eTaxIdList, true);
+ }
+ else if(args["ipg"].HasValue() || args["ipg_batch"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eIPGList, true);
+ }
+ else if(args["entry"].HasValue() || args["entry_batch"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBEntry, true);
+ if (args["entry"].HasValue() && args["entry"].AsString() == "all") {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBDumpAll, true);
+ }
+ else {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBEntry, true);
+ }
+ }
+ if(args["outfmt"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString());
+ }
+
+
+ string db_name = m_BlastDb->GetDBNameList();
+ int off = db_name.find_last_of(CFile::GetPathSeparator());
+ if (off != -1) {
+ db_name.erase(0, off+1);
+ }
+ m_UsageReport.AddParam(CBlastUsageReport::eDBName, db_name);
+ m_UsageReport.AddParam(CBlastUsageReport::eDBLength, (Int8) m_BlastDb->GetTotalLength());
+ m_UsageReport.AddParam(CBlastUsageReport::eDBNumSeqs, m_BlastDb->GetNumSeqs());
+ m_UsageReport.AddParam(CBlastUsageReport::eDBDate, m_BlastDb->GetDate());
+}
+
+
#ifndef SKIP_DOXYGEN_PROCESSING
int main(int argc, const char* argv[] /*, const char* envp[]*/)
-/* $Id: blastdbcp.cpp 605535 2020-04-13 11:07:03Z ivanov $
+/* $Id: blastdbcp.cpp 615363 2020-08-31 15:40:04Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <objtools/blast/seqdb_writer/build_db.hpp>
#include <objtools/blast/seqdb_writer/impl/criteria.hpp>
#include <objtools/blast/blastdb_format/invalid_data_exception.hpp>
+#include <algo/blast/api/blast_usage_report.hpp>
USING_NCBI_SCOPE;
USING_SCOPE(blast);
{
public:
BlastdbCopyApplication();
+ ~BlastdbCopyApplication() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
+ }
private: /* Private Methods */
virtual void Init(void);
const string kTargetOnly;
const string kMembershipBits;
const string kCopyOnly;
+
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
/////////////////////////////////////////////////////////////////////////////
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(1, 0);
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdbcp");
+ }
}
-/* # $Id: convert2blastmask.cpp 492284 2016-02-16 16:55:37Z camacho $
+/* # $Id: convert2blastmask.cpp 615362 2020-08-31 15:39:55Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "convert2blastmask");
+ }
+ }
+ ~CConvert2BlastMaskApplication() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
CMaskFromFasta* x_GetReader();
CMaskWriterBlastDbMaskInfo* x_GetWriter();
+ void x_AddCmdOptions();
+
/// Contains the description of this application
static const char * const USAGE_LINE;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
const char * const CConvert2BlastMaskApplication::USAGE_LINE
cerr << e.what() << endl;
retval = 1;
}
+ x_AddCmdOptions();
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, retval);
return retval;
}
SetDiagStream(0);
}
+void CConvert2BlastMaskApplication::x_AddCmdOptions()
+{
+ const CArgs & args = GetArgs();
+ if (args["masking_algorithm"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eMaskAlgo, args["masking_algorithm"].AsString());
+ }
+ if (args["outfmt"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString());
+ }
+ if (args["parse_seqids"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eParseSeqIDs, true);
+ }
+
+}
+
#ifndef SKIP_DOXYGEN_PROCESSING
int main(int argc, const char* argv[])
{
-/* $Id: makeblastdb.cpp 592321 2019-08-29 17:58:35Z fongah2 $
+/* $Id: makeblastdb.cpp 615359 2020-08-31 15:39:39Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "makeblastdb");
+ }
+ }
+ ~CMakeBlastDBApp() {
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
private:
void x_VerifyInputFilesType(const vector<CTempString>& filenames,
CMakeBlastDBApp::ESupportedInputFormats input_type);
+ void x_AddCmdOptions();
+
// Data
CNcbiOstream * m_LogFile;
bool m_IsModifyMode;
bool m_SkipUnver;
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
/// Reads an object defined in a NCBI ASN.1 spec from a stream in multiple
int status = 0;
try { x_BuildDatabase(); }
CATCH_ALL(status)
+ x_AddCmdOptions();
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
+void CMakeBlastDBApp::x_AddCmdOptions()
+{
+ const CArgs & args = GetArgs();
+ if (args["input_type"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eInputType, args["input_type"].AsString());
+ }
+ if (args[kArgDbType].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eSeqType, args[kArgDbType].AsString());
+ }
+ if(args["taxid"].HasValue() || args["taxid_map"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eTaxIdList, true);
+ }
+ if(args["parse_seqids"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eParseSeqIDs, args["parse_seqids"].AsBoolean());
+ }
+ if (args["gi_mask"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eGIList, true);
+ }
+ else if(args["mask_data"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eMaskAlgo, true);
+ }
+}
+
#ifndef SKIP_DOXYGEN_PROCESSING
int main(int argc, const char* argv[] /*, const char* envp[]*/)
-/* $Id: makeprofiledb.cpp 596198 2019-11-04 15:01:48Z boratyng $
+/* $Id: makeprofiledb.cpp 615360 2020-08-31 15:39:46Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
int x_Run(void);
+ void x_AddCmdOptions();
+
// Data
CNcbiOstream * m_LogFile;
CNcbiIstream * m_InPssmList;
bool m_UpdateFreqRatios;
bool m_UseModelThreshold;
+
+ CBlastUsageReport m_UsageReport;
+ CStopWatch m_StopWatch;
};
CMakeProfileDBApp::CMakeProfileDBApp(void)
CRef<CVersion> version(new CVersion());
version->SetVersionInfo(new CBlastVersion());
SetFullVersion(version);
+ m_StopWatch.Start();
+ if (m_UsageReport.IsEnabled()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
+ m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "makeprofiledb");
+ }
}
CMakeProfileDBApp::~CMakeProfileDBApp()
string pog_str = m_OutDbName + ".pog";
CFile(pog_str).Remove();
}
+ m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
}
void CMakeProfileDBApp::x_SetupArgDescriptions(void)
LOG_POST(Error << "Error: Unknown exception");
status = BLAST_UNKNOWN_ERROR;
}
+
+ x_AddCmdOptions();
+ m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
return status;
}
+void CMakeProfileDBApp::x_AddCmdOptions()
+{
+ const CArgs & args = GetArgs();
+ if (args["dbtype"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eDBType, args["dbtype"].AsString());
+ }
+ if(args["taxid"].HasValue() || args["taxid_map"].HasValue()) {
+ m_UsageReport.AddParam(CBlastUsageReport::eTaxIdList, true);
+ }
+}
+
+
#ifndef SKIP_DOXYGEN_PROCESSING
int main(int argc, const char* argv[] /*, const char* envp[]*/)
{
#################################
-# $Id: Makefile.mk.in 606338 2020-04-20 16:30:59Z ivanov $
+# $Id: Makefile.mk.in 616396 2020-09-15 18:22:00Z ivanov $
# Author: Denis Vakatov (vakatov@ncbi.nlm.nih.gov)
#################################
#
BZ2_LIB = @BZ2_LIB@
LZO_INCLUDE = @LZO_INCLUDE@
LZO_LIBS = @LZO_LIBS@
+ZSTD_INCLUDE= @ZSTD_INCLUDE@
+ZSTD_LIBS = @ZSTD_LIBS@
CMPRS_INCLUDE = $(Z_INCLUDE) $(BZ2_INCLUDE) $(LZO_INCLUDE)
CMPRS_LIBS = $(Z_LIBS) $(BZ2_LIBS) $(LZO_LIBS)
HIREDIS_LIBS = @HIREDIS_LIBS@
HIREDIS_STATIC_LIBS = @HIREDIS_STATIC_LIBS@
+# Apache Arrow (specifically focusing on Parquet)
+APACHE_ARROW_INCLUDE = @APACHE_ARROW_INCLUDE@
+APACHE_ARROW_LIBS = @APACHE_ARROW_LIBS@
+APACHE_ARROW_STATIC_LIBS = @APACHE_ARROW_STATIC_LIBS@
+
+# Kafka
+LIBRDKAFKA_INCLUDE = @LIBRDKAFKA_INCLUDE@
+LIBRDKAFKA_LIBS = @LIBRDKAFKA_LIBS@
+LIBRDKAFKA_STATIC_LIBS = @LIBRDKAFKA_STATIC_LIBS@
+CPPKAFKA_INCLUDE = @CPPKAFKA_INCLUDE@
+CPPKAFKA_LIBS = @CPPKAFKA_LIBS@
+CPPKAFKA_STATIC_LIBS = @CPPKAFKA_STATIC_LIBS@
+
# Compress
COMPRESS_LDEP = $(CMPRS_LIB)
COMPRESS_LIBS = xcompress $(COMPRESS_LDEP)
OBJREAD_LIBS = xobjread variation submit xlogging
# formatting code
-XFORMAT_LIBS = xformat xcleanup gbseq mlacli mla medlars pubmed valid $(OBJEDIT_LIBS)
+XFORMAT_LIBS = xformat xcleanup gbseq $(OBJEDIT_LIBS)
# object editing library
-OBJEDIT_LIBS = xobjedit $(OBJREAD_LIBS) taxon3
+OBJEDIT_LIBS = xobjedit $(OBJREAD_LIBS) taxon3 mlacli mla medlars pubmed valid
# standard data loader configuration, plus supporting libraries
DATA_LOADERS_UTIL_LIB = data_loaders_util \
-# $Id: Makefile.xcode.tmpl 563416 2018-05-09 11:59:33Z ivanov $
+# $Id: Makefile.xcode.tmpl 608826 2020-05-21 18:14:06Z ivanov $
# Makefile template for Xcode
#######################################################################
include ./Makefile.mk
-DEVSDK = /Developer/SDKs
-SDKDIR = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
-SDK = $(firstword $(wildcard $(DEVSDK)/*.sdk) $(wildcard $(SDKDIR)/*.sdk))
+# DEVSDK = /Developer/SDKs
+# SDKDIR = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
+# SDK = $(firstword $(wildcard $(DEVSDK)/*.sdk) $(wildcard $(SDKDIR)/*.sdk))
ifneq "" "$(wildcard ${SDK})"
SDKFLAG = -sdk ${SDK}
else
#############################################################################
-# $Id: CMake.NCBIComponents.cmake 607658 2020-05-06 12:48:50Z ivanov $
+# $Id: CMake.NCBIComponents.cmake 609371 2020-06-01 14:13:18Z ivanov $
#############################################################################
##
if(WIN32)
set(NCBI_COMPONENT_local_lbsm_FOUND NO)
else()
- if (EXISTS ${NCBI_SRC_ROOT}/connect/ncbi_lbsm.c)
+ if (EXISTS ${NCBITK_SRC_ROOT}/connect/ncbi_lbsm.c)
# message("local_lbsm found at ${NCBI_SRC_ROOT}/connect")
set(NCBI_COMPONENT_local_lbsm_FOUND YES)
set(HAVE_LOCAL_LBSM 1)
#############################################################################
# LocalPCRE
-if (EXISTS ${includedir}/util/regexp)
+if (EXISTS ${NCBITK_INC_ROOT}/util/regexp)
set(NCBI_COMPONENT_LocalPCRE_FOUND YES)
- set(NCBI_COMPONENT_LocalPCRE_INCLUDE ${includedir}/util/regexp)
+ set(NCBI_COMPONENT_LocalPCRE_INCLUDE ${NCBITK_INC_ROOT}/util/regexp)
set(NCBI_COMPONENT_LocalPCRE_NCBILIB regexp)
else()
set(NCBI_COMPONENT_LocalPCRE_FOUND NO)
#############################################################################
# LocalZ
-if (EXISTS ${includedir}/util/compress/zlib)
+if (EXISTS ${NCBITK_INC_ROOT}/util/compress/zlib)
set(NCBI_COMPONENT_LocalZ_FOUND YES)
- set(NCBI_COMPONENT_LocalZ_INCLUDE ${includedir}/util/compress/zlib)
+ set(NCBI_COMPONENT_LocalZ_INCLUDE ${NCBITK_INC_ROOT}/util/compress/zlib)
set(NCBI_COMPONENT_LocalZ_NCBILIB z)
else()
set(NCBI_COMPONENT_LocalZ_FOUND NO)
#############################################################################
# LocalBZ2
-if (EXISTS ${includedir}/util/compress/bzip2)
+if (EXISTS ${NCBITK_INC_ROOT}/util/compress/bzip2)
set(NCBI_COMPONENT_LocalBZ2_FOUND YES)
- set(NCBI_COMPONENT_LocalBZ2_INCLUDE ${includedir}/util/compress/bzip2)
+ set(NCBI_COMPONENT_LocalBZ2_INCLUDE ${NCBITK_INC_ROOT}/util/compress/bzip2)
set(NCBI_COMPONENT_LocalBZ2_NCBILIB bz2)
else()
set(NCBI_COMPONENT_LocalBZ2_FOUND NO)
#############################################################################
#LocalLMDB
-if (EXISTS ${includedir}/util/lmdb)
+if (EXISTS ${NCBITK_INC_ROOT}/util/lmdb)
set(NCBI_COMPONENT_LocalLMDB_FOUND YES)
- set(NCBI_COMPONENT_LocalLMDB_INCLUDE ${includedir}/util/lmdb)
+ set(NCBI_COMPONENT_LocalLMDB_INCLUDE ${NCBITK_INC_ROOT}/util/lmdb)
set(NCBI_COMPONENT_LocalLMDB_NCBILIB lmdb)
else()
set(NCBI_COMPONENT_LocalLMDB_FOUND NO)
#############################################################################
# FreeTDS
-set(FTDS95_INCLUDE ${includedir}/dbapi/driver/ftds95 ${includedir}/dbapi/driver/ftds95/freetds)
-set(FTDS100_INCLUDE ${includedir}/dbapi/driver/ftds100 ${includedir}/dbapi/driver/ftds100/freetds)
+set(FTDS95_INCLUDE ${NCBITK_INC_ROOT}/dbapi/driver/ftds95 ${NCBITK_INC_ROOT}/dbapi/driver/ftds95/freetds)
+set(FTDS100_INCLUDE ${NCBITK_INC_ROOT}/dbapi/driver/ftds100 ${NCBITK_INC_ROOT}/dbapi/driver/ftds100/freetds)
set(NCBI_COMPONENT_FreeTDS_FOUND YES)
set(NCBI_COMPONENT_FreeTDS_INCLUDE ${FTDS100_INCLUDE})
if (MSVC)
include(${NCBI_TREE_CMAKECFG}/CMake.NCBIComponentsMSVC.cmake)
-elseif (XCODE)
+elseif (APPLE)
include(${NCBI_TREE_CMAKECFG}/CMake.NCBIComponentsXCODE.cmake)
else()
if(NCBI_EXPERIMENTAL_CFG)
#############################################################################
# FreeTDS
-set(FTDS95_INCLUDE ${includedir}/dbapi/driver/ftds95 ${includedir}/dbapi/driver/ftds95/freetds)
-set(FTDS100_INCLUDE ${includedir}/dbapi/driver/ftds100 ${includedir}/dbapi/driver/ftds100/freetds)
+set(FTDS95_INCLUDE ${NCBITK_INC_ROOT}/dbapi/driver/ftds95 ${NCBITK_INC_ROOT}/dbapi/driver/ftds95/freetds)
+set(FTDS100_INCLUDE ${NCBITK_INC_ROOT}/dbapi/driver/ftds100 ${NCBITK_INC_ROOT}/dbapi/driver/ftds100/freetds)
#############################################################################
list(SORT NCBI_ALL_COMPONENTS)
#############################################################################
-# $Id: CMake.NCBIComponentsMSVC.cmake 607786 2020-05-07 15:35:50Z ivanov $
+# $Id: CMake.NCBIComponentsMSVC.cmake 609371 2020-06-01 14:13:18Z ivanov $
#############################################################################
##
## HAVE_XXX
-set(NCBI_COMPONENT_MSWin_FOUND YES)
+set(NCBI_REQUIRE_MSWin_FOUND YES)
#to debug
#set(NCBI_TRACE_COMPONENT_GRPC ON)
#############################################################################
#############################################################################
-# $Id: CMake.NCBIComponentsUNIX.cmake 605517 2020-04-12 00:56:13Z ucko $
+# $Id: CMake.NCBIComponentsUNIX.cmake 611999 2020-07-14 15:30:59Z ivanov $
#############################################################################
##
find_external_library(VDB
INCLUDES sra/sradb.h
LIBS ncbi-vdb
- INCLUDE_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.5\\interfaces"
- LIBS_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.5\\win\\release\\x86_64\\lib")
+ INCLUDE_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.8\\interfaces"
+ LIBS_HINTS "\\\\snowman\\trace_software\\vdb\\vdb-versions\\2.10.8\\win\\release\\x86_64\\lib")
else (WIN32)
find_external_library(VDB
INCLUDES sra/sradb.h
#############################################################################
-# $Id: CMake.NCBIComponentsUNIXex.cmake 607786 2020-05-07 15:35:50Z ivanov $
+# $Id: CMake.NCBIComponentsUNIXex.cmake 609371 2020-06-01 14:13:18Z ivanov $
#############################################################################
##
## HAVE_LIBXXX
## HAVE_XXX
-set(NCBI_COMPONENT_unix_FOUND YES)
-set(NCBI_COMPONENT_Linux_FOUND YES)
+set(NCBI_REQUIRE_unix_FOUND YES)
+if(NOT APPLE)
+set(NCBI_REQUIRE_Linux_FOUND YES)
+endif()
option(USE_LOCAL_BZLIB "Use a local copy of libbz2")
option(USE_LOCAL_PCRE "Use a local copy of libpcre")
#to debug
if(EXISTS ${NCBI_ThirdParty_BACKWARD}/include)
set(LIBBACKWARD_INCLUDE ${NCBI_ThirdParty_BACKWARD}/include)
set(HAVE_LIBBACKWARD_CPP YES)
+ set(NCBI_COMPONENT_BACKWARD_FOUND YES)
+ set(NCBI_COMPONENT_BACKWARD_INCLUDE ${LIBBACKWARD_INCLUDE})
+ list(APPEND NCBI_ALL_COMPONENTS BACKWARD)
+ else()
+ message("NOT FOUND BACKWARD")
endif()
find_library(LIBBACKWARD_LIBS NAMES backward HINTS ${NCBI_ThirdParty_BACKWARD}/lib)
find_library(LIBDW_LIBS NAMES dw)
if (LIBDW_LIBS)
set(HAVE_LIBDW YES)
endif()
-
if(HAVE_LIBBACKWARD_CPP AND HAVE_LIBDW)
- set(NCBI_COMPONENT_BACKWARD_FOUND YES)
- set(NCBI_COMPONENT_BACKWARD_INCLUDE ${LIBBACKWARD_INCLUDE})
set(NCBI_COMPONENT_BACKWARD_LIBS ${LIBDW_LIBS})
# set(NCBI_COMPONENT_BACKWARD_LIBS ${LIBBACKWARD_LIBS} ${LIBDW_LIBS})
- list(APPEND NCBI_ALL_COMPONENTS BACKWARD)
- else()
- message("NOT FOUND BACKWARD")
endif()
else(NOT NCBI_COMPONENT_BACKWARD_DISABLED)
message("DISABLED BACKWARD")
#############################################################################
-# $Id: CMake.NCBIComponentsXCODE.cmake 607661 2020-05-06 12:49:33Z ivanov $
+# $Id: CMake.NCBIComponentsXCODE.cmake 611999 2020-07-14 15:30:59Z ivanov $
#############################################################################
##
## HAVE_XXX
-set(NCBI_COMPONENT_XCODE_FOUND YES)
-set(NCBI_COMPONENT_unix_FOUND YES)
+set(NCBI_REQUIRE_unix_FOUND YES)
+if(XCODE)
+set(NCBI_REQUIRE_XCODE_FOUND YES)
+endif()
+#to debug
+#set(NCBI_TRACE_COMPONENT_JPEG ON)
#############################################################################
# common settings
set(NCBI_TOOLS_ROOT $ENV{NCBI})
############################################################################
set(NCBI_ThirdPartyBasePath ${NCBI_TOOLS_ROOT})
+set(NCBI_ThirdParty_BACKWARD ${NCBI_ThirdPartyBasePath}/backward-cpp-1.3.20180206-44ae960)
set(NCBI_ThirdParty_TLS ${NCBI_ThirdPartyBasePath}/gnutls-3.4.0)
#set(NCBI_ThirdParty_FASTCGI
set(NCBI_ThirdParty_Boost ${NCBI_ThirdPartyBasePath}/boost-1.62.0-ncbi1)
set(NCBI_ThirdParty_XML ${NCBI_ThirdPartyBasePath}/libxml-2.7.8)
set(NCBI_ThirdParty_XSLT ${NCBI_ThirdPartyBasePath}/libxml-2.7.8)
set(NCBI_ThirdParty_EXSLT ${NCBI_ThirdParty_XSLT})
-set(NCBI_ThirdParty_SQLITE3 ${NCBI_ThirdPartyBasePath}/sqlite-3.8.10.1-ncbi1)
+set(NCBI_ThirdParty_SQLITE3 ${NCBI_ThirdPartyBasePath}/sqlite-3.26.0-ncbi1)
#set(NCBI_ThirdParty_Sybase
-set(NCBI_ThirdParty_VDB "/net/snowman/vol/projects/trace_software/vdb/vdb-versions/2.10.5")
+set(NCBI_ThirdParty_VDB "/net/snowman/vol/projects/trace_software/vdb/vdb-versions/2.10.8")
set(NCBI_ThirdParty_VDB_ARCH x86_64)
set(NCBI_ThirdParty_wxWidgets ${NCBI_ThirdPartyBasePath}/wxWidgets-3.1.3-ncbi1)
set(NCBI_ThirdParty_GLEW ${NCBI_ThirdPartyBasePath}/glew-1.5.8)
set(NCBI_ThirdParty_FTGL ${NCBI_ThirdPartyBasePath}/ftgl-2.1.3-rc5)
set(NCBI_ThirdParty_FreeType ${NCBI_OPT_ROOT})
+set(NCBI_ThirdParty_NGHTTP2 ${NCBI_ThirdPartyBasePath}/nghttp2-1.40.0)
+set(NCBI_ThirdParty_UV ${NCBI_ThirdPartyBasePath}/libuv-1.35.0)
+set(NCBI_ThirdParty_GL2PS ${NCBI_ThirdPartyBasePath}/gl2ps-1.4.0)
+set(NCBI_ThirdParty_Nettle ${NCBI_ThirdPartyBasePath}/nettle-3.1.1)
+set(NCBI_ThirdParty_GMP ${NCBI_ThirdPartyBasePath}/gmp-6.0.0a)
#############################################################################
#############################################################################
-set(_XCODE_EXTRA_LIBS)
function(NCBI_define_component _name)
if(NCBI_COMPONENT_${_name}_DISABLED)
set(_suffixes .a .dylib)
endif()
set(_roots ${_root})
-# set(_subdirs Release${NCBI_PlatformBits}/lib lib64 lib)
- set(_subdirs Release${NCBI_PlatformBits}/lib lib64 ${_XCODE_EXTRA_LIBS})
+ set(_subdirs Release${NCBI_PlatformBits}/lib lib64 lib)
+# set(_subdirs Release${NCBI_PlatformBits}/lib lib64 ${_XCODE_EXTRA_LIBS})
if (BUILD_SHARED_LIBS AND DEFINED NCBI_ThirdParty_${_name}_SHLIB)
set(_roots ${NCBI_ThirdParty_${_name}_SHLIB} ${_roots})
set(_subdirs shlib64 shlib lib64 lib)
set(_all_libs "")
foreach(_lib IN LISTS _args)
set(_this_found NO)
+ if(NCBI_TRACE_COMPONENT_${_name})
+ message("${_name}: checking ${_root}/${_libdir}/lib${_lib}")
+ endif()
foreach(_sfx IN LISTS _suffixes)
if(EXISTS ${_root}/${_libdir}/lib${_lib}${_sfx})
list(APPEND _all_libs ${_root}/${_libdir}/lib${_lib}${_sfx})
set(_this_found YES)
break()
+ else()
+ if(NCBI_TRACE_COMPONENT_${_name})
+ message("${_name}: ${_root}/${_libdir}/lib${_lib}${_sfx} not found")
+ endif()
endif()
endforeach()
if(NOT _this_found)
set(NCBI_COMPONENT_NCBI_C_FOUND NO)
#############################################################################
-# STACKTRACE
-set(NCBI_COMPONENT_STACKTRACE_FOUND NO)
+# BACKWARD, UNWIND
+if(NOT NCBI_COMPONENT_BACKWARD_DISABLED)
+ if(EXISTS ${NCBI_ThirdParty_BACKWARD}/include)
+ set(LIBBACKWARD_INCLUDE ${NCBI_ThirdParty_BACKWARD}/include)
+ set(HAVE_LIBBACKWARD_CPP YES)
+ set(NCBI_COMPONENT_BACKWARD_FOUND YES)
+ set(NCBI_COMPONENT_BACKWARD_INCLUDE ${LIBBACKWARD_INCLUDE})
+ list(APPEND NCBI_ALL_COMPONENTS BACKWARD)
+ else()
+ message("NOT FOUND BACKWARD")
+ endif()
+else(NOT NCBI_COMPONENT_BACKWARD_DISABLED)
+ message("DISABLED BACKWARD")
+endif(NOT NCBI_COMPONENT_BACKWARD_DISABLED)
#############################################################################
#LMDB
set(NCBI_COMPONENT_Boost.Test.Included_FOUND NO)
endif()
-set(_XCODE_EXTRA_LIBS lib)
#############################################################################
# Boost.Test
NCBI_define_component(Boost.Test boost_unit_test_framework)
#############################################################################
# Boost.Spirit
NCBI_define_component(Boost.Spirit boost_thread-mt)
-set(_XCODE_EXTRA_LIBS "")
#############################################################################
# JPEG
#############################################################################
# FreeType
-set(_XCODE_EXTRA_LIBS lib)
NCBI_define_component(FreeType freetype)
if(NCBI_COMPONENT_FreeType_FOUND)
set(NCBI_COMPONENT_FreeType_INCLUDE ${NCBI_COMPONENT_FreeType_INCLUDE} ${NCBI_COMPONENT_FreeType_INCLUDE}/freetype2)
endif()
-set(_XCODE_EXTRA_LIBS "")
+#############################################################################
+# NGHTTP2
+NCBI_define_component(NGHTTP2 nghttp2)
+
+#############################################################################
+# UV
+NCBI_define_component(UV uv)
+
+#############################################################################
+# GL2PS
+NCBI_define_component(GL2PS gl2ps)
+
+#############################################################################
+# Nettle
+NCBI_define_component(Nettle nettle hogweed)
+
+#############################################################################
+# GMP
+#NCBI_define_component(GMP gmp)
#############################################################################
-# $Id: CMake.NCBIptb.cmake 607660 2020-05-06 12:49:19Z ivanov $
+# $Id: CMake.NCBIptb.cmake 609379 2020-06-01 14:15:14Z ivanov $
#############################################################################
#############################################################################
##
#############################################################################
# deprecated
macro(NCBI_add_root_subdirectory)
+ message(WARNING "NCBI_add_root_subdirectory is deprecated, use NCBI_add_subdirectory instead")
NCBI_add_subdirectory(${ARGV})
endmacro()
if(NCBI_PTBMODE_PARTS)
return()
endif()
- if(NCBI_PTBCFG_ENABLE_COLLECTOR AND NOT NCBI_PTB_HAS_ROOT)
+
+ if(NOT DEFINED NCBI_CURRENT_SOURCE_DIR)
set(NCBI_CURRENT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+ endif()
+ if("${NCBI_CURRENT_SOURCE_DIR}" STREQUAL "${NCBITK_SRC_ROOT}")
+ set(NCBI_TREE_ROOT ${NCBITK_TREE_ROOT})
+ set(NCBI_SRC_ROOT ${NCBITK_SRC_ROOT})
+ set(NCBI_INC_ROOT ${NCBITK_INC_ROOT})
+ elseif("${NCBI_CURRENT_SOURCE_DIR}" STREQUAL "${NCBITK_TREE_ROOT}")
+ set(NCBI_TREE_ROOT ${NCBITK_TREE_ROOT})
+ set(NCBI_SRC_ROOT ${NCBITK_TREE_ROOT})
+ set(NCBI_INC_ROOT ${NCBITK_TREE_ROOT})
+ endif()
+
+ if(NOT NCBI_PTB_HAS_ROOT)
NCBI_internal_analyze_tree()
- variable_watch(CMAKE_CURRENT_LIST_DIR NCBI_internal_end_of_config)
endif()
if(NCBI_PTBMODE_COLLECT_DEPS)
if(DEFINED NCBI_PTB_ALLOWED_DIRS)
set(_is_good FALSE)
foreach(_dir IN LISTS NCBI_PTB_ALLOWED_DIRS)
- NCBI_util_match_path(${_dir} ${NCBI_CURRENT_SOURCE_DIR} _is_good)
- if(_is_good)
+ string(FIND ${_dir} ${NCBI_CURRENT_SOURCE_DIR} _pos)
+ if(${_pos} EQUAL 0)
+ set(_is_good TRUE)
break()
endif()
endforeach()
else()
- set(_is_good TRUE)
+ NCBI_internal_process_project_filters( _is_good)
+ if(NOT _is_good)
+ if(NOT "${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "")
+ foreach(_dir IN LISTS NCBI_PTBCFG_PROJECT_LIST)
+ string(FIND "${NCBI_SRC_ROOT}/${_dir}" "${NCBI_CURRENT_SOURCE_DIR}" _pos)
+ if(${_pos} EQUAL 0)
+ set(_is_good TRUE)
+ break()
+ endif()
+ endforeach()
+ endif()
+ endif()
endif()
if (_is_good AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_sub}/CMakeLists.txt")
add_subdirectory(${_sub})
if (NCBI_PTBMODE_COLLECT_DEPS OR TARGET ${_name})
set(_appname ${_appname}-app)
endif()
+ elseif(NOT NCBI_PTBCFG_ENABLE_COLLECTOR)
+ if (TARGET ${_name})
+ set(_appname ${_appname}-app)
+ endif()
endif()
endif()
set(NCBI_PROJECT ${_appname})
endif()
endfunction()
+##############################################################################
+macro(NCBI_util_elapsed _value)
+ if(DEFINED NCBI_TIMESTAMP_START)
+ string(TIMESTAMP _curtime "%s")
+ math(EXPR _delta "${_curtime} - ${NCBI_TIMESTAMP_START}")
+ string(TIMESTAMP _curtime "%H:%M:%S")
+ set(${_value} "${_curtime} (${_delta}s)")
+ else()
+ string(TIMESTAMP ${_value} "%H:%M:%S")
+ endif()
+endmacro()
+
##############################################################################
macro(NCBI_util_parse_sign _input _value _negative)
string(SUBSTRING ${_input} 0 1 _sign)
set_property(GLOBAL PROPERTY NCBI_PTBPROP_COUNT_${_type} 0)
endforeach()
- if(NOT DEFINED NCBI_PTBCFG_KNOWN_FOLDERS OR "${NCBI_PTBCFG_KNOWN_FOLDERS}" STREQUAL "")
- file(GLOB _files LIST_DIRECTORIES TRUE "${NCBI_CURRENT_SOURCE_DIR}/*")
- foreach(_file IN LISTS _files)
- if(IS_DIRECTORY ${_file} AND EXISTS ${_file}/CMakeLists.txt)
- get_filename_component(_basename ${_file} NAME)
- list(APPEND NCBI_PTBCFG_KNOWN_FOLDERS ${_basename})
- endif()
- endforeach()
+ if( "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "" AND
+ "${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "" AND
+ "${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "")
+ set(NCBI_PTB_NOFILTERS TRUE)
+ endif()
+ if (NCBI_PTBCFG_ENABLE_COLLECTOR AND NCBI_PTB_NOFILTERS AND NOT NCBI_PTBCFG_ALLOW_COMPOSITE)
+ set(NCBI_PTBCFG_ENABLE_COLLECTOR FALSE)
+ set(NCBI_PTBCFG_ENABLE_COLLECTOR FALSE PARENT_SCOPE)
endif()
- message("Analyzing source tree...")
- set_property(GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS "")
+ if(NCBI_PTBCFG_ENABLE_COLLECTOR)
+ if(NOT DEFINED NCBI_PTBCFG_KNOWN_FOLDERS OR "${NCBI_PTBCFG_KNOWN_FOLDERS}" STREQUAL "")
+ file(GLOB _files LIST_DIRECTORIES TRUE "${NCBI_CURRENT_SOURCE_DIR}/*")
+ foreach(_file IN LISTS _files)
+ if(IS_DIRECTORY ${_file} AND EXISTS ${_file}/CMakeLists.txt)
+ get_filename_component(_basename ${_file} NAME)
+ list(APPEND NCBI_PTBCFG_KNOWN_FOLDERS ${_basename})
+ endif()
+ endforeach()
+ endif()
+ list(LENGTH NCBI_PTBCFG_KNOWN_FOLDERS _count)
+ if(NOT ${_count} EQUAL 1)
+ set(NCBI_PTB_THIS_SRC_ROOT ${NCBI_SRC_ROOT} PARENT_SCOPE)
+ set(NCBI_PTB_THIS_SRC_ROOT ${NCBI_SRC_ROOT})
+ endif()
- set(NCBI_PTBMODE_COLLECT_DEPS ON)
- NCBI_add_subdirectory(${NCBI_PTBCFG_KNOWN_FOLDERS})
- set(NCBI_PTB_CALLBACK_ALL_PARSED TRUE)
- set(NCBI_PTBMODE_COLLECT_DEPS OFF)
+ NCBI_util_elapsed(_elapsed)
+ message("${_elapsed}: Analyzing source tree...")
+ set_property(GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS "")
- get_property(_allprojects GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS)
- get_property(_allowedprojects GLOBAL PROPERTY NCBI_PTBPROP_ALLOWED_PROJECTS)
+ set(NCBI_PTBMODE_COLLECT_DEPS ON)
+ set(_known ${NCBI_PTBCFG_KNOWN_FOLDERS})
+ unset(NCBI_PTBCFG_KNOWN_FOLDERS)
+ NCBI_add_subdirectory(${_known})
+ set(NCBI_PTB_CALLBACK_ALL_PARSED TRUE)
+ set(NCBI_PTBMODE_COLLECT_DEPS OFF)
-if(OFF)
-message("NCBI_PTBPROP_ALL_PROJECTS: ${_allprojects}")
-foreach(_prj IN LISTS _allprojects)
- get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
- message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}")
-endforeach()
-message("NCBI_PTBPROP_ALLOWED_PROJECTS: ${_allowedprojects}")
-endif()
+ get_property(_allprojects GLOBAL PROPERTY NCBI_PTBPROP_ALL_PROJECTS)
+ get_property(_allowedprojects GLOBAL PROPERTY NCBI_PTBPROP_ALLOWED_PROJECTS)
- if("${_allowedprojects}" STREQUAL "")
- message(FATAL_ERROR "List of projects is empty")
- return()
- endif()
+ if(OFF)
+ message("NCBI_PTBPROP_ALL_PROJECTS: ${_allprojects}")
+ foreach(_prj IN LISTS _allprojects)
+ get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
+ message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}")
+ endforeach()
+ message("NCBI_PTBPROP_ALLOWED_PROJECTS: ${_allowedprojects}")
+ endif()
- message("Collecting projects...")
- list(REMOVE_DUPLICATES _allowedprojects)
- foreach(_prj IN LISTS _allowedprojects)
- NCBI_internal_collect_dependencies(${_prj})
- get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
- get_property(_host GLOBAL PROPERTY NCBI_PTBPROP_HOST_${_prj})
- set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} ${_host} ${_prj} ${_prjdeps})
- endforeach()
- list(SORT NCBI_PTB_ALLOWED_PROJECTS)
- list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_PROJECTS)
- if(NCBI_PTBCFG_ALLOW_COMPOSITE)
- set(_allowedprojects ${NCBI_PTB_ALLOWED_PROJECTS})
+ if("${_allowedprojects}" STREQUAL "")
+ message(FATAL_ERROR "List of projects is empty")
+ return()
+ endif()
+
+ NCBI_util_elapsed(_elapsed)
+ message("${_elapsed}: Collecting projects...")
+ list(REMOVE_DUPLICATES _allowedprojects)
foreach(_prj IN LISTS _allowedprojects)
NCBI_internal_collect_dependencies(${_prj})
get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
endforeach()
list(SORT NCBI_PTB_ALLOWED_PROJECTS)
list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_PROJECTS)
- endif()
- foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
- get_property(_dir GLOBAL PROPERTY NCBI_PTBPROP_DIR_${_prj})
- list(APPEND NCBI_PTB_ALLOWED_DIRS ${_dir})
- endforeach()
- list(SORT NCBI_PTB_ALLOWED_DIRS)
- list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_DIRS)
-if(OFF)
-message("NCBI_PTB_ALLOWED_PROJECTS: ${NCBI_PTB_ALLOWED_PROJECTS}")
-foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
- get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
- message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}")
-endforeach()
-endif()
+ if(NCBI_PTBCFG_ALLOW_COMPOSITE)
+ set(_allowedprojects ${NCBI_PTB_ALLOWED_PROJECTS})
+ foreach(_prj IN LISTS _allowedprojects)
+ NCBI_internal_collect_dependencies(${_prj})
+ get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
+ get_property(_host GLOBAL PROPERTY NCBI_PTBPROP_HOST_${_prj})
+ set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} ${_host} ${_prj} ${_prjdeps})
+ endforeach()
+ list(SORT NCBI_PTB_ALLOWED_PROJECTS)
+ list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_PROJECTS)
+ endif()
+ if(NOT NCBI_PTB_NOFILTERS)
+ foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
+ get_property(_dir GLOBAL PROPERTY NCBI_PTBPROP_DIR_${_prj})
+ list(APPEND NCBI_PTB_ALLOWED_DIRS ${_dir})
+ endforeach()
+ list(SORT NCBI_PTB_ALLOWED_DIRS)
+ list(REMOVE_DUPLICATES NCBI_PTB_ALLOWED_DIRS)
+ set(NCBI_PTB_ALLOWED_DIRS ${NCBI_PTB_ALLOWED_DIRS} PARENT_SCOPE)
+ endif()
- foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
- NCBI_internal_collect_requires(${_prj})
- endforeach()
- set(NCBI_PTB_CALLBACK_COLLECTED TRUE)
- foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
- if (NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${_prj})
- NCBI_internal_print_project_info(${_prj})
+ if(OFF)
+ message("NCBI_PTB_ALLOWED_PROJECTS: ${NCBI_PTB_ALLOWED_PROJECTS}")
+ foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
+ get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${_prj})
+ message("NCBI_PTBPROP_DEPS_${_prj}: ${_prjdeps}")
+ endforeach()
endif()
- endforeach()
- set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} PARENT_SCOPE)
- set(NCBI_PTB_ALLOWED_DIRS ${NCBI_PTB_ALLOWED_DIRS} PARENT_SCOPE)
- message("Configuring projects...")
+ foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
+ NCBI_internal_collect_requires(${_prj})
+ endforeach()
+ set(NCBI_PTB_CALLBACK_COLLECTED TRUE)
+ foreach(_prj IN LISTS NCBI_PTB_ALLOWED_PROJECTS)
+ if (NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${_prj})
+ NCBI_internal_print_project_info(${_prj})
+ endif()
+ endforeach()
+
+ set(NCBI_PTB_ALLOWED_PROJECTS ${NCBI_PTB_ALLOWED_PROJECTS} PARENT_SCOPE)
+ else()
+ message("Source tree analysis skipped")
+ endif()
+ NCBI_util_elapsed(_elapsed)
+ message("${_elapsed}: Configuring projects...")
+ variable_watch(CMAKE_CURRENT_LIST_DIR NCBI_internal_end_of_config)
endmacro()
#############################################################################
set(NCBI_PTB_CALLBACK_ALL_ADDED TRUE)
NCBI_internal_print_report("Processed" TOTAL)
NCBI_internal_print_report("Added" COUNT)
+ NCBI_util_elapsed(_elapsed)
+ message("${_elapsed}: Done")
endfunction()
#############################################################################
NCBI_internal_collect_parts(_result)
if(_result)
- if (NCBI_PTBMODE_COLLECT_DEPS)
+ if (NCBI_PTBMODE_COLLECT_DEPS OR NOT NCBI_PTBCFG_ENABLE_COLLECTOR)
#set_property(GLOBAL PROPERTY NCBI_PTBPROP_PARTS_${NCBI_PROJECT_ID} ${NCBITMP_PROJECT_PART_IDS})
foreach(_part IN LISTS NCBITMP_PROJECT_PART_IDS)
set_property(GLOBAL PROPERTY NCBI_PTBPROP_HOSTID_${_part} ${NCBI_PROJECT_ID})
##############################################################################
function(NCBI_internal_verify_libs)
set(_optimize NO)
- if (WIN32 AND NOT NCBI_PTBMODE_COLLECT_DEPS AND NOT DEFINED NCBI_EXTERNAL_TREE_ROOT AND NOT DEFINED NCBI_PTBCFG_DOINSTALL)
+ if (WIN32 AND NCBI_PTBCFG_ENABLE_COLLECTOR
+ AND NOT NCBI_PTBMODE_COLLECT_DEPS
+ AND NOT DEFINED NCBI_EXTERNAL_TREE_ROOT
+ AND NOT DEFINED NCBI_PTBCFG_DOINSTALL)
if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "STATIC")
# set(_ncbilib ${NCBITMP_NCBILIB})
get_property(_ncbilib GLOBAL PROPERTY NCBI_PTBPROP_DEPS_${NCBI_PROJECT})
##############################################################################
function(NCBI_internal_process_project_filters _result)
- if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "")
+ if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "" AND NOT "${NCBI_PROJECT}" STREQUAL "")
foreach(_prj IN LISTS NCBI_PTBCFG_PROJECT_TARGETS)
if("${_prj}" STREQUAL "")
continue()
endforeach()
endif()
- if(NOT "${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "")
+ if(NOT "${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "" AND NOT "${NCBI_PROJECT}" STREQUAL "")
set(_alltags ${NCBI__PROJTAG} ${NCBI_${NCBI_PROJECT}_PROJTAG})
if("${NCBI_PTBCFG_PROJECT_TAGS}" STREQUAL "-")
if(NOT "${_alltags}" STREQUAL "")
if(NOT "${NCBI_PTBCFG_PROJECT_LIST}" STREQUAL "")
set(_is_good FALSE)
set(_hasp FALSE)
+ if(DEFINED NCBI_PTB_THIS_SRC_ROOT)
+ set(_src_root ${NCBI_PTB_THIS_SRC_ROOT})
+ else()
+ set(_src_root ${NCBI_SRC_ROOT})
+ endif()
foreach(_dir IN LISTS NCBI_PTBCFG_PROJECT_LIST)
if("${_dir}" STREQUAL "")
continue()
endif()
NCBI_util_parse_sign( ${_dir} _value _negate)
if(_negate)
- NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${NCBI_SRC_ROOT}/${_value} _match)
+ NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${_src_root}/${_value} _match)
if(_match)
set(${_result} FALSE PARENT_SCOPE)
return()
endif()
else()
set(_hasp TRUE)
- NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${NCBI_SRC_ROOT}/${_value} _match)
+ NCBI_util_match_path(${NCBI_CURRENT_SOURCE_DIR} ${_src_root}/${_value} _match)
if(_match)
set(_is_good TRUE)
endif()
endif()
endif()
- if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "")
+ if(NOT "${NCBI_PTBCFG_PROJECT_TARGETS}" STREQUAL "" AND NOT "${NCBI_PROJECT}" STREQUAL "")
set(_is_good FALSE)
set(_hasp FALSE)
foreach(_prj IN LISTS NCBI_PTBCFG_PROJECT_TARGETS)
set(_report "")
foreach( _type IN ITEMS CONSOLEAPP GUIAPP STATIC SHARED CUSTOM)
get_property(_cnt GLOBAL PROPERTY NCBI_PTBPROP_${_counter}_${_type})
- if( ${_cnt} GREATER 0)
+ if( NOT "${_cnt}" STREQUAL "" AND "${_cnt}" GREATER 0)
if( NOT "${_report}" STREQUAL "")
string(APPEND _report ",")
endif()
get_property(_hosted GLOBAL PROPERTY NCBI_PTBPROP_HOST_${NCBI_PROJECT})
endif()
- if (NOT NCBI_PTBMODE_PARTS AND NOT NCBI_PTBMODE_COLLECT_DEPS AND NCBI_PTBCFG_ENABLE_COLLECTOR)
+ if (NCBI_PTBCFG_ENABLE_COLLECTOR AND NOT NCBI_PTBMODE_PARTS AND NOT NCBI_PTBMODE_COLLECT_DEPS)
if(DEFINED NCBI_PTB_ALLOWED_PROJECTS)
if(NOT ${NCBI_PROJECT} IN_LIST NCBI_PTB_ALLOWED_PROJECTS)
if(NCBI_VERBOSE_ALLPROJECTS OR NCBI_VERBOSE_PROJECT_${NCBI_PROJECT})
endif()
endif()
+ if(NOT NCBI_PTBCFG_ENABLE_COLLECTOR AND NOT NCBI_PTBMODE_PARTS)
+ get_property(_count GLOBAL PROPERTY NCBI_PTBPROP_TOTAL_${NCBI_${NCBI_PROJECT}_TYPE})
+ math(EXPR _count "${_count} + 1")
+ set_property(GLOBAL PROPERTY NCBI_PTBPROP_TOTAL_${NCBI_${NCBI_PROJECT}_TYPE} ${_count})
+ NCBI_internal_process_project_filters(_allowed)
+ if (NOT _allowed)
+ if ("${ARGC}" GREATER "0")
+ set(${ARGV0} FALSE PARENT_SCOPE)
+ endif()
+ return()
+ endif()
+ endif()
+
if (NCBI_PTBMODE_COLLECT_DEPS)
get_property(_prjdeps GLOBAL PROPERTY NCBI_PTBPROP_DIRECT_DEPS_${NCBI_PROJECT} SET)
if (_prjdeps AND NOT DEFINED NCBI_${NCBI_PROJECT}_PARTS)
endif()
return()
endif()
+ elseif(NOT NCBI_PTBCFG_ENABLE_COLLECTOR AND NCBI_PTBMODE_PARTS)
+ set(NCBITMP_PROJECT_PART_IDS ${NCBITMP_PROJECT_PART_IDS} ${NCBI_PROJECT_ID} PARENT_SCOPE )
+ set(NCBITMP_PROJECT_PARTS ${NCBITMP_PROJECT_PARTS} ${NCBI_PROJECT_PARTNAME} PARENT_SCOPE )
endif()
#message("processing ${NCBI_PROJECT_ID}")
message("WARNING: App target ${NCBI_${NCBI_PROJECT}_OUTPUT} (${NCBI_CURRENT_SOURCE_DIR}) cannot be created")
message(" because there is already a target with the same name in ${_dir}")
message(" App target ${NCBI_${NCBI_PROJECT}_OUTPUT} will be renamed into ${NCBI_PROJECT}")
+ elseif(NOT NCBI_PTBCFG_ENABLE_COLLECTOR)
+ message("WARNING: App target ${NCBI_${NCBI_PROJECT}_OUTPUT} (${NCBI_CURRENT_SOURCE_DIR}) cannot be created")
+ message(" because there is already a target with the same name elsewhere")
+ message(" App target ${NCBI_${NCBI_PROJECT}_OUTPUT} will be renamed into ${NCBI_PROJECT}")
endif()
endif()
set_target_properties(${NCBI_PROJECT} PROPERTIES OUTPUT_NAME ${NCBI_${NCBI_PROJECT}_OUTPUT})
message(" NCBITMP_PROJECT_SOURCES ${NCBITMP_PROJECT_SOURCES}")
message(" NCBITMP_PROJECT_HEADERS ${NCBITMP_PROJECT_HEADERS}")
message(" NCBITMP_PROJECT_RESOURCES ${NCBITMP_PROJECT_RESOURCES}")
+#message(" NCBI_SRC_ROOT ${NCBI_SRC_ROOT}")
+#message(" NCBI_INC_ROOT ${NCBI_INC_ROOT}")
endif()
if (${NCBI_${NCBI_PROJECT}_TYPE} STREQUAL "CUSTOM")
#############################################################################
-# $Id: CMake.NCBIptb.ntest.cmake 607666 2020-05-06 12:51:46Z ivanov $
+# $Id: CMake.NCBIptb.ntest.cmake 609363 2020-06-01 14:11:57Z ivanov $
#############################################################################
#############################################################################
##
##############################################################################
function(NCBI_internal_add_ncbi_checktarget)
- if(DEFINED NCBI_EXTERNAL_TREE_ROOT)
- set(SCRIPT_NAME "${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_COMMON_SCRIPTS}/check/check_make_unix_cmake.sh")
- else()
- set(SCRIPT_NAME "${NCBI_TREE_ROOT}/${NCBI_DIRNAME_COMMON_SCRIPTS}/check/check_make_unix_cmake.sh")
- endif()
+ set(SCRIPT_NAME "${NCBITK_TREE_ROOT}/${NCBI_DIRNAME_COMMON_SCRIPTS}/check/check_make_unix_cmake.sh")
set(WORKDIR ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD})
set(_checkdir ../check)
set(_checkroot ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/${_checkdir})
#############################################################################
-# $Id: CMake.NCBItoolkit.cmake 603345 2020-03-10 17:24:45Z ivanov $
+# $Id: CMake.NCBItoolkit.cmake 609379 2020-06-01 14:15:14Z ivanov $
#############################################################################
if(NOT DEFINED NCBI_TOOLKIT_NCBIPTB_BUILD_SYSTEM_INCLUDED)
set(NCBI_EXPERIMENTAL_SUBDIRS ON)
set(NCBI_EXPERIMENTAL_DISABLE_HUNTER ON)
set(NCBI_VERBOSE_ALLPROJECTS OFF)
- set(NCBI_PTBCFG_ENABLE_COLLECTOR ON)
+ if(NCBI_PTBCFG_SKIP_ANALYSIS)
+ set(NCBI_PTBCFG_ENABLE_COLLECTOR OFF)
+ else()
+ set(NCBI_PTBCFG_ENABLE_COLLECTOR ON)
+ endif()
if(BUILD_SHARED_LIBS)
if(WIN32 OR XCODE)
endif()
endif()
-if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
- set(_prefix "${NCBI_EXTERNAL_TREE_ROOT}/src/")
-else()
- if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/build-system/cmake/CMake.NCBIptb.cmake")
- set(_prefix "")
- elseif (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/build-system/cmake/CMake.NCBIptb.cmake")
- set(_prefix "src/")
- else()
- message(FATAL_ERROR "Cannot find NCBIptb build system in ${CMAKE_SOURCE_DIR}")
- endif()
+set(_listdir "${CMAKE_CURRENT_LIST_DIR}")
+if (NOT EXISTS "${_listdir}/CMake.NCBIptb.cmake")
+ message(FATAL_ERROR "Cannot find NCBIptb build system in ${_listdir}")
endif()
-include(${_prefix}build-system/cmake/CMakeMacros.cmake)
-include(${_prefix}build-system/cmake/CMakeChecks.cmake)
-include(${_prefix}build-system/cmake/CMake.NCBIptb.cmake)
-include(${_prefix}build-system/cmake/CMake.NCBIptb.ncbi.cmake)
-include(${_prefix}build-system/cmake/CMake.NCBIptb.datatool.cmake)
-include(${_prefix}build-system/cmake/CMake.NCBIptb.grpc.cmake)
-include(${_prefix}build-system/cmake/CMake.NCBIptb.ctest.cmake)
+include(${_listdir}/CMakeMacros.cmake)
+include(${_listdir}/CMakeChecks.cmake)
+include(${_listdir}/CMake.NCBIptb.cmake)
+include(${_listdir}/CMake.NCBIptb.ncbi.cmake)
+include(${_listdir}/CMake.NCBIptb.datatool.cmake)
+include(${_listdir}/CMake.NCBIptb.grpc.cmake)
+include(${_listdir}/CMake.NCBIptb.ctest.cmake)
if(NCBI_PTBCFG_ADDCHECK)
- include(${_prefix}build-system/cmake/CMake.NCBIptb.ntest.cmake)
+ include(${_listdir}/CMake.NCBIptb.ntest.cmake)
endif()
if(NCBI_PTBCFG_DOINSTALL)
- include(${_prefix}build-system/cmake/CMake.NCBIptb.install.cmake)
+ include(${_listdir}/CMake.NCBIptb.install.cmake)
endif()
-include(${_prefix}build-system/cmake/CMake.NCBIptb.legacy.cmake)
+include(${_listdir}/CMake.NCBIptb.legacy.cmake)
if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
if (EXISTS ${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.cmake)
NCBI_import_hostinfo(${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.hostinfo)
endif()
-include(${_prefix}build-system/cmake/CMakeChecks.final-message.cmake)
+include(${_listdir}/CMakeChecks.final-message.cmake)
endif(NOT DEFINED NCBI_TOOLKIT_NCBIPTB_BUILD_SYSTEM_INCLUDED)
#set(Boost_DEBUG ON)
find_package(Boost
- COMPONENTS filesystem iostreams date_time regex system serialization
+ COMPONENTS filesystem iostreams date_time regex system serialization thread
REQUIRED)
set(CMAKE_PREFIX_PATH ${_foo_CMAKE_PREFIX_PATH})
#############################################################################
-# $Id: CMakeChecks.cmake 607666 2020-05-06 12:51:46Z ivanov $
+# $Id: CMakeChecks.cmake 609374 2020-06-01 14:13:44Z ivanov $
#############################################################################
#
# Note:
endif()
endif()
+string(TIMESTAMP NCBI_TIMESTAMP_START "%s")
+string(TIMESTAMP _start)
+message("Started: ${_start}")
+
#############################################################################
# Source tree description
#
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/build-system/cmake/CMake.NCBIptb.cmake")
- set(top_src_dir ${CMAKE_CURRENT_SOURCE_DIR}/..)
- set(abs_top_src_dir ${CMAKE_CURRENT_SOURCE_DIR}/..)
+ set(_this_root ${CMAKE_CURRENT_SOURCE_DIR}/..)
else()
- set(top_src_dir ${CMAKE_SOURCE_DIR})
- set(abs_top_src_dir ${CMAKE_SOURCE_DIR})
+ set(_this_root ${CMAKE_SOURCE_DIR})
endif()
-get_filename_component(top_src_dir "${top_src_dir}" ABSOLUTE)
-get_filename_component(abs_top_src_dir "${abs_top_src_dir}" ABSOLUTE)
-
-set(NCBI_TREE_ROOT ${top_src_dir})
-set(NCBI_SRC_ROOT ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_SRC})
-set(NCBI_INC_ROOT ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE})
+get_filename_component(_this_root "${_this_root}" ABSOLUTE)
+get_filename_component(top_src_dir "${CMAKE_CURRENT_LIST_DIR}/../../.." ABSOLUTE)
+
+set(NCBI_TREE_ROOT ${_this_root})
+set(NCBI_SRC_ROOT ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_SRC})
+set(NCBI_INC_ROOT ${NCBI_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE})
+set(NCBITK_TREE_ROOT ${top_src_dir})
+set(NCBITK_SRC_ROOT ${NCBITK_TREE_ROOT}/${NCBI_DIRNAME_SRC})
+set(NCBITK_INC_ROOT ${NCBITK_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE})
if (NOT EXISTS "${NCBI_SRC_ROOT}")
set(NCBI_SRC_ROOT ${NCBI_TREE_ROOT})
endif()
set(incdir ${CMAKE_BINARY_DIR}/${NCBI_DIRNAME_CFGINC})
set(incinternal ${NCBI_INC_ROOT}/${NCBI_DIRNAME_INTERNAL})
-
set(NCBI_DIRNAME_BUILD build)
-if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
+#if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
+if (OFF)
string(FIND ${CMAKE_BINARY_DIR} ${NCBI_TREE_ROOT} _pos_root)
string(FIND ${CMAKE_BINARY_DIR} ${NCBI_SRC_ROOT} _pos_src)
if(NOT "${_pos_root}" LESS "0" AND "${_pos_src}" LESS "0" AND NOT "${CMAKE_BINARY_DIR}" STREQUAL "${NCBI_TREE_ROOT}")
endif()
endif()
endif()
-if (NOT IS_DIRECTORY ${incinternal})
- set(incinternal "")
-endif()
if (NCBI_EXPERIMENTAL_CFG)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_RUNTIME}")
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${LIBRARY_OUTPUT_PATH}")
endif()
-if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
- set(NCBI_TREE_BUILDCFG "${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_BUILDCFG}")
- set(NCBI_TREE_CMAKECFG "${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_CMAKECFG}")
- set(NCBI_TREE_COMMON_INCLUDE ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/common)
-else()
- set(NCBI_TREE_BUILDCFG "${NCBI_TREE_ROOT}/${NCBI_DIRNAME_BUILDCFG}")
- set(NCBI_TREE_CMAKECFG "${NCBI_TREE_ROOT}/${NCBI_DIRNAME_CMAKECFG}")
- set(NCBI_TREE_COMMON_INCLUDE ${NCBI_INC_ROOT}/common)
-endif()
-if(OFF)
-message("CMAKE_SOURCE_DIR = ${CMAKE_SOURCE_DIR}")
-message("NCBI_TREE_ROOT = ${NCBI_TREE_ROOT}")
-message("NCBI_SRC_ROOT = ${NCBI_SRC_ROOT}")
-message("NCBI_INC_ROOT = ${NCBI_INC_ROOT}")
-message("NCBI_BUILD_ROOT = ${NCBI_BUILD_ROOT}")
-message("NCBI_CFGINC_ROOT = ${NCBI_CFGINC_ROOT}")
-message("NCBI_TREE_BUILDCFG = ${NCBI_TREE_BUILDCFG}")
-message("NCBI_TREE_CMAKECFG = ${NCBI_TREE_CMAKECFG}")
-message("NCBI_TREE_COMMON_INCLUDE = ${NCBI_TREE_COMMON_INCLUDE}")
-endif()
+set(NCBI_TREE_CMAKECFG "${CMAKE_CURRENT_LIST_DIR}")
+get_filename_component(NCBI_TREE_BUILDCFG "${CMAKE_CURRENT_LIST_DIR}/.." ABSOLUTE)
+
if(EXISTS ${NCBI_TREE_ROOT}/CMake.CustomConfig.txt)
include(${NCBI_TREE_ROOT}/CMake.CustomConfig.txt)
endif()
endif()
set(NCBI_DIRNAME_PREBUILT ${_prebuilt_loc})
+set(_tk_includedir ${NCBITK_INC_ROOT})
+set(_tk_incinternal ${NCBITK_INC_ROOT}/${NCBI_DIRNAME_INTERNAL})
+set(_inc_dirs)
+foreach( _inc IN ITEMS ${includedir} ${incinternal} ${_tk_includedir} ${_tk_incinternal})
+ if (IS_DIRECTORY ${_inc})
+ list(APPEND _inc_dirs ${_inc})
+ endif()
+endforeach()
+list(REMOVE_DUPLICATES _inc_dirs)
+include_directories(${incdir} ${_inc_dirs})
+include_regular_expression("^.*[.](h|hpp|c|cpp|inl|inc)$")
+if(OFF)
+message("CMAKE_SOURCE_DIR = ${CMAKE_SOURCE_DIR}")
+message("NCBI_TREE_ROOT = ${NCBI_TREE_ROOT}")
+message("NCBI_SRC_ROOT = ${NCBI_SRC_ROOT}")
+message("NCBI_INC_ROOT = ${NCBI_INC_ROOT}")
+message("NCBITK_TREE_ROOT = ${NCBITK_TREE_ROOT}")
+message("NCBITK_SRC_ROOT = ${NCBITK_SRC_ROOT}")
+message("NCBITK_INC_ROOT = ${NCBITK_INC_ROOT}")
+message("NCBI_BUILD_ROOT = ${NCBI_BUILD_ROOT}")
+message("NCBI_CFGINC_ROOT = ${NCBI_CFGINC_ROOT}")
+message("NCBI_TREE_BUILDCFG = ${NCBI_TREE_BUILDCFG}")
+message("NCBI_TREE_CMAKECFG = ${NCBI_TREE_CMAKECFG}")
+message("include_directories(${incdir} ${_inc_dirs})")
+endif()
+
if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
set(NCBI_EXTERNAL_BUILD_ROOT ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_PREBUILT})
-
- if (IS_DIRECTORY ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE})
- set(_ext_includedir0 ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE})
- if (IS_DIRECTORY ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/${NCBI_DIRNAME_INTERNAL})
- set(_ext_incinternal ${NCBI_EXTERNAL_TREE_ROOT}/${NCBI_DIRNAME_INCLUDE}/${NCBI_DIRNAME_INTERNAL})
- endif()
- endif()
if (NOT EXISTS ${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}/${NCBI_PTBCFG_INSTALL_EXPORT}.cmake)
message(FATAL_ERROR "${NCBI_PTBCFG_INSTALL_EXPORT} was not found in ${NCBI_EXTERNAL_BUILD_ROOT}/${NCBI_DIRNAME_EXPORT}")
endif()
- include_directories(${incdir} ${NCBI_INC_ROOT} ${incinternal} ${_ext_includedir0} ${_ext_incinternal})
-else()
- include_directories(${incdir} ${includedir0} ${incinternal})
endif()
-include_regular_expression("^.*[.](h|hpp|c|cpp|inl|inc)$")
#set(CMAKE_MODULE_PATH "${NCBI_SRC_ROOT}/build-system/cmake/" ${CMAKE_MODULE_PATH})
list(APPEND CMAKE_MODULE_PATH "${NCBI_TREE_CMAKECFG}")
# This sets a version to be used throughout our config process
# NOTE: Adjust as needed
#
-set(NCBI_CPP_TOOLKIT_VERSION_MAJOR 23)
+set(NCBI_CPP_TOOLKIT_VERSION_MAJOR 24)
set(NCBI_CPP_TOOLKIT_VERSION_MINOR 0)
set(NCBI_CPP_TOOLKIT_VERSION_PATCH 0)
set(NCBI_CPP_TOOLKIT_VERSION_EXTRA "")
if (NCBI_EXPERIMENTAL_CFG)
+ set(_tk_common_include "${NCBITK_INC_ROOT}/common")
if (WIN32 OR XCODE)
foreach(_cfg ${NCBI_CONFIGURATION_TYPES})
configure_file(${NCBI_TREE_CMAKECFG}/config.cmake.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbiconf_xcode.h)
endif()
endif()
- if (EXISTS ${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in)
- configure_file(${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbicfg.cfg.c)
+ if (EXISTS ${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in)
+ configure_file(${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/config/ncbicfg.cfg.c)
+ endif()
+ configure_file(${_tk_common_include}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/ncbi_build_ver.h)
+ if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
+ configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h)
+ else()
+ configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBITK_INC_ROOT}/common/ncbi_revision.h)
endif()
- configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/${_cfg}/common/ncbi_build_ver.h)
- configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h)
endforeach()
if(NOT EXISTS ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c)
file(WRITE ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c "#include <common/config/ncbicfg.cfg.c>\n")
set(NCBI_SIGNATURE "${NCBI_COMPILER}_${NCBI_COMPILER_VERSION}-${NCBI_BUILD_TYPE}--${HOST_CPU}-${HOST_OS_WITH_VERSION}-${_local_host_name}")
configure_file(${NCBI_TREE_CMAKECFG}/config.cmake.h.in ${NCBI_CFGINC_ROOT}/ncbiconf_unix.h)
- if (EXISTS ${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in)
- configure_file(${NCBI_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c)
+ if (EXISTS ${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in)
+ configure_file(${NCBITK_SRC_ROOT}/corelib/ncbicfg.c.in ${NCBI_BUILD_ROOT}/${NCBI_DIRNAME_BUILD}/corelib/ncbicfg.c)
endif()
- configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/common/ncbi_build_ver.h)
- configure_file(${NCBI_TREE_COMMON_INCLUDE}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h)
+ configure_file(${_tk_common_include}/ncbi_build_ver.h.in ${NCBI_CFGINC_ROOT}/common/ncbi_build_ver.h)
+ configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h)
+ if (DEFINED NCBI_EXTERNAL_TREE_ROOT)
+ configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBI_INC_ROOT}/common/ncbi_revision.h)
+ else()
+ configure_file(${_tk_common_include}/ncbi_revision.h.in ${NCBITK_INC_ROOT}/common/ncbi_revision.h)
+ endif()
endif()
else (NCBI_EXPERIMENTAL_CFG)
#############################################################################
-# $Id: CMakeChecks.compiler.cmake 608131 2020-05-12 15:15:17Z ivanov $
+# $Id: CMakeChecks.compiler.cmake 609369 2020-06-01 14:12:55Z ivanov $
#############################################################################
#
# This config is designed to capture all compiler and linker definitions and search parameters
set(buildconf0 ${CMAKE_BUILD_TYPE})
set(NCBI_BUILD_TYPE "${CMAKE_BUILD_TYPE}MT64")
endif (NOT buildconf)
+set(NCBI_CONFIGURATION_TYPES "${CMAKE_BUILD_TYPE}")
if(MaxDebug IN_LIST NCBI_PTBCFG_PROJECT_FEATURES)
add_definitions(-D_GLIBCXX_DEBUG)
set(CMAKE_SHARED_LINKER_FLAGS_RDYNAMIC "${CMAKE_SHARED_LINKER_FLAGS}") # for smooth transition, please don't use
set(CMAKE_SHARED_LINKER_FLAGS_ALLOW_UNDEFINED "${CMAKE_SHARED_LINKER_FLAGS}")
-if ((NOT DEFINED ${APPLE}) OR (NOT ${APPLE}))
+if (NOT APPLE)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined")
endif ()
SET(CMAKE_INSTALL_RPATH "/$ORIGIN/../lib")
#this add RUNPATH to binaries (RPATH is already there anyway), which makes it more like binaries built by C++ Toolkit
-if (NOT WIN32)
+if (NOT WIN32 AND NOT APPLE)
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--enable-new-dtags")
endif()
#############################################################################
-# $Id: CMakeLists.top_builddir.txt 603341 2020-03-10 17:23:52Z ivanov $
+# $Id: CMakeLists.top_builddir.txt 609363 2020-06-01 14:11:57Z ivanov $
#############################################################################
##############################################################################
${NCBI_PTBCFG_KNOWN_FOLDERS}
)
-include(build-system/cmake/CMake.NCBItoolkit.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/CMake.NCBItoolkit.cmake)
NCBI_add_subdirectory( ${NCBI_PTBCFG_KNOWN_FOLDERS})
#!/bin/sh
#############################################################################
-# $Id: cmake-cfg-unix.sh 607664 2020-05-06 12:50:47Z ivanov $
+# $Id: cmake-cfg-unix.sh 609379 2020-06-01 14:15:14Z ivanov $
# Configure NCBI C++ toolkit using CMake build system.
# Author: Andrei Gourianov, gouriano@ncbi
#############################################################################
NCBI_EXPERIMENTAL="ON"
host_os=`uname`
-if test $host_os = "Darwin"; then
+if test -z "${CMAKE_CMD}" -a $host_os = "Darwin"; then
CMAKE_CMD=/Applications/CMake.app/Contents/bin/cmake
fi
if [ -z "${CMAKE_CMD}" ]; then
BUILD_SHARED_LIBS="OFF"
USE_CCACHE="ON"
USE_DISTCC="ON"
+SKIP_ANALYSIS="OFF"
#############################################################################
Check_function_exists() {
--with-build-root=name -- specify a non-default build directory name
--without-ccache -- do not use ccache
--without-distcc -- do not use distcc
+ --without-analysis -- skip source tree analysis
--with-generator="X" -- use generator X
EOF
--without-distcc)
USE_DISTCC="OFF"
;;
+ --without-analysis)
+ SKIP_ANALYSIS="ON"
+ ;;
--with-projects=*)
PROJECT_LIST=${1#*=}
- if [ -e "${tree_root}/$PROJECT_LIST" ]; then
+ if [ -f "${tree_root}/$PROJECT_LIST" ]; then
PROJECT_LIST="${tree_root}/$PROJECT_LIST"
fi
;;
--with-tags=*)
PROJECT_TAGS=${1#*=}
- if [ -e "${tree_root}/$PROJECT_TAGS" ]; then
+ if [ -f "${tree_root}/$PROJECT_TAGS" ]; then
PROJECT_TAGS="${tree_root}/$PROJECT_TAGS"
fi
;;
--with-targets=*)
PROJECT_TARGETS=${1#*=}
- if [ -e "${tree_root}/$PROJECT_TARGETS" ]; then
+ if [ -f "${tree_root}/$PROJECT_TARGETS" ]; then
PROJECT_TARGETS="${tree_root}/$PROJECT_TARGETS"
fi
;;
if test $host_os = "Darwin"; then
CC_NAME=`$CC --version 2>/dev/null | awk 'NR==1{print $2}'`
CC_VERSION=`$CC --version 2>/dev/null | awk 'NR==1{print $4}' | sed 's/[.]//g'`
+ if [ $CC_NAME = "clang" ]; then
+ CC_NAME="Clang"
+ fi
else
CC_NAME=`$CC --version | awk 'NR==1{print $1}' | tr '[:lower:]' '[:upper:]'`
ver=`$CC -dumpfullversion 2>/dev/null || $CC -dumpversion 2>/dev/null`
CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TAGS=$(Quote "${PROJECT_TAGS}")"
CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TARGETS=$(Quote "${PROJECT_TARGETS}")"
CMAKE_ARGS="$CMAKE_ARGS -DNCBI_VERBOSE_PROJECTS=$(Quote "${PROJECT_DETAILS}")"
+CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_SKIP_ANALYSIS=$(Quote "${SKIP_ANALYSIS}")"
if [ -n "$INSTALL_PATH" ]; then
CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_INSTALL_PATH=$(Quote "${INSTALL_PATH}")"
fi
@echo off
setlocal ENABLEDELAYEDEXPANSION
REM #########################################################################
-REM $Id: cmake-cfg-vs.bat 607666 2020-05-06 12:51:46Z ivanov $
+REM $Id: cmake-cfg-vs.bat 609379 2020-06-01 14:15:14Z ivanov $
REM Configure NCBI C++ toolkit for Visual Studio using CMake build system.
REM Author: Andrei Gourianov, gouriano@ncbi
REM #########################################################################
REM defaults
set BUILD_SHARED_LIBS=OFF
set VISUAL_STUDIO=2017
+set SKIP_ANALYSIS=OFF
goto :RUN
REM #########################################################################
echo --with-features="LIST" -- specify compilation features
echo examples: --with-features="StrictGI"
echo --with-build-root=name -- specify a non-default build directory name
+echo --without-analysis -- skip source tree analysis
echo --with-vs=N -- use Visual Studio N generator
echo examples: --with-vs=2017 (default)
echo --with-vs=2019
if "%1"=="--with-details" (set PROJECT_DETAILS=%~2& shift& goto :CONTINUEPARSEARGS)
if "%1"=="--with-vs" (set VISUAL_STUDIO=%~2& shift& goto :CONTINUEPARSEARGS)
if "%1"=="--with-install" (set INSTALL_PATH=%~2& shift& goto :CONTINUEPARSEARGS)
+if "%1"=="--without-analysis" (set SKIP_ANALYSIS=ON& goto :CONTINUEPARSEARGS)
if "%1"=="--with-generator" (set CMAKE_GENERATOR=%~2& shift& goto :CONTINUEPARSEARGS)
if "%1"=="--with-prebuilt" (set prebuilt_dir=%~dp2& set prebuilt_name=%~nx2& shift& goto :CONTINUEPARSEARGS)
set unknown=%unknown% %1
if not "%PROJECT_LIST%"=="" (
if exist "%tree_root%\%PROJECT_LIST%" (
- set PROJECT_LIST=%tree_root%\%PROJECT_LIST%
+ type "%tree_root%\%PROJECT_LIST%" >NUL 2>&1
+ if not errorlevel 1 (
+ set PROJECT_LIST=%tree_root%\%PROJECT_LIST%
+ )
)
)
if not "%PROJECT_TAGS%"=="" (
if exist "%tree_root%\%PROJECT_TAGS%" (
- set PROJECT_TAGS=%tree_root%\%PROJECT_TAGS%
+ type "%tree_root%\%PROJECT_TAGS%" >NUL 2>&1
+ if not errorlevel 1 (
+ set PROJECT_TAGS=%tree_root%\%PROJECT_TAGS%
+ )
)
)
if not "%PROJECT_TARGETS%"=="" (
if exist "%tree_root%\%PROJECT_TARGETS%" (
- set PROJECT_TARGETS=%tree_root%\%PROJECT_TARGETS%
+ type "%tree_root%\%PROJECT_TARGETS%" >NUL 2>&1
+ if not errorlevel 1 (
+ set PROJECT_TARGETS=%tree_root%\%PROJECT_TARGETS%
+ )
)
)
set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_PROJECT_TAGS="%PROJECT_TAGS%"
set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_PROJECT_TARGETS="%PROJECT_TARGETS%"
set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_VERBOSE_PROJECTS="%PROJECT_DETAILS%"
+set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_SKIP_ANALYSIS=%SKIP_ANALYSIS%
if not "%INSTALL_PATH%"=="" (
set CMAKE_ARGS=%CMAKE_ARGS% -DNCBI_PTBCFG_INSTALL_PATH="%INSTALL_PATH%"
)
#!/bin/sh
#############################################################################
-# $Id: cmake-cfg-xcode.sh 603557 2020-03-12 16:26:27Z ivanov $
+# $Id: cmake-cfg-xcode.sh 609379 2020-06-01 14:15:14Z ivanov $
# Configure NCBI C++ toolkit for XCode using CMake build system.
# Author: Andrei Gourianov, gouriano@ncbi
#############################################################################
#############################################################################
# defaults
BUILD_SHARED_LIBS="OFF"
+SKIP_ANALYSIS="OFF"
#############################################################################
Check_function_exists() {
--with-features="LIST" -- specify compilation features
examples: --with-features="StrictGI"
--with-build-root=name -- specify a non-default build directory name
+ --without-analysis -- skip source tree analysis
EOF
Check_function_exists configure_ext_Usage && configure_ext_Usage
;;
--with-projects=*)
PROJECT_LIST=${1#*=}
- if [ -e "${tree_root}/$PROJECT_LIST" ]; then
+ if [ -f "${tree_root}/$PROJECT_LIST" ]; then
PROJECT_LIST="${tree_root}/$PROJECT_LIST"
fi
;;
--with-tags=*)
PROJECT_TAGS=${1#*=}
- if [ -e "${tree_root}/$PROJECT_TAGS" ]; then
+ if [ -f "${tree_root}/$PROJECT_TAGS" ]; then
PROJECT_TAGS="${tree_root}/$PROJECT_TAGS"
fi
;;
--with-targets=*)
PROJECT_TARGETS=${1#*=}
- if [ -e "${tree_root}/$PROJECT_TARGETS" ]; then
+ if [ -f "${tree_root}/$PROJECT_TARGETS" ]; then
PROJECT_TARGETS="${tree_root}/$PROJECT_TARGETS"
fi
;;
prebuilt_dir=`dirname $prebuilt_path`
prebuilt_name=`basename $prebuilt_path`
;;
+ --without-analysis)
+ SKIP_ANALYSIS="ON"
+ ;;
*)
unknown="$unknown $1"
;;
CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TAGS=$(Quote "${PROJECT_TAGS}")"
CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_PROJECT_TARGETS=$(Quote "${PROJECT_TARGETS}")"
CMAKE_ARGS="$CMAKE_ARGS -DNCBI_VERBOSE_PROJECTS=$(Quote "${PROJECT_DETAILS}")"
+CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_SKIP_ANALYSIS=$(Quote "${SKIP_ANALYSIS}")"
if [ -n "$INSTALL_PATH" ]; then
CMAKE_ARGS="$CMAKE_ARGS -DNCBI_PTBCFG_INSTALL_PATH=$(Quote "${INSTALL_PATH}")"
fi
#!/bin/sh
#############################################################################
-# $Id: cmake_configure_ext_gpipe.sh 600386 2020-01-16 17:00:37Z gouriano $
+# $Id: cmake_configure_ext_gpipe.sh 609574 2020-06-03 20:26:39Z whlavina $
#############################################################################
_ext_check=`type -t Check_function_exists`
BUILD_TYPE="Release"
BUILD_SHARED_LIBS="ON"
PROJECT_FEATURES="${PROJECT_FEATURES};Int8GI"
- BUILD_ROOT="Release"
+ : "${BUILD_ROOT:=../Release}"
add_gpipe_warnings
;;
"--gpipe-dev")
BUILD_TYPE="Debug"
BUILD_SHARED_LIBS="ON"
PROJECT_FEATURES="${PROJECT_FEATURES};StrictGI"
- BUILD_ROOT="Debug"
+ : "${BUILD_ROOT:=../Debug}"
add_gpipe_warnings
;;
"--gpipe-cgi")
BUILD_TYPE="Release"
BUILD_SHARED_LIBS="OFF"
PROJECT_FEATURES="${PROJECT_FEATURES};Int8GI"
- BUILD_ROOT="Static"
+ : "${BUILD_ROOT:=../Static}"
add_gpipe_warnings
;;
"--gpipe-distrib")
BUILD_TYPE="Release"
BUILD_SHARED_LIBS="OFF"
PROJECT_COMPONENTS="${PROJECT_COMPONENTS};-PCRE"
- BUILD_ROOT="Distrib"
+ : "${BUILD_ROOT:=../Distrib}"
add_gpipe_warnings
;;
*)
/* Define to 1 if you have the `lchown' function. */
#undef HAVE_LCHOWN
+/* Define to 1 if libparquet is available. */
+#undef HAVE_LIBAPACHE_ARROW
+
/* Define to 1 if libavrocpp is available. */
#undef HAVE_LIBAVRO
/* Define to 1 if non-public CONNECT extensions are available. */
#undef HAVE_LIBCONNEXT
+/* Define to 1 if libcppkafka is available. */
+#undef HAVE_LIBCPPKAFKA
+
/* Define to 1 if CRYPT is available, either in its own library or as part of
the standard libraries. */
#undef HAVE_LIBCRYPT
/* Define to 1 if libprotobuf$PROTOBUF_SFX is available. */
#undef HAVE_LIBPROTOBUF
+/* Define to 1 if librdkafka is available. */
+#undef HAVE_LIBRDKAFKA
+
/* Define to 1 if RPCSVC is available, either in its own library or as part of
the standard libraries. */
#undef HAVE_LIBRPCSVC
/* Define to 1 if libz is available. */
#undef HAVE_LIBZ
+/* Define to 1 if libzstd is available. */
+#undef HAVE_LIBZSTD
+
/* Define to 1 if you have the <limits> header file. */
#undef HAVE_LIMITS
UNLESS_PUBSEQOS
ncbi_xreader_pubseqos2
ncbi_xreader_pubseqos
+CPPKAFKA_STATIC_LIBS
+LIBRDKAFKA_STATIC_LIBS
+APACHE_ARROW_STATIC_LIBS
HIREDIS_STATIC_LIBS
AWS_SDK_STATIC_LIBS
MSGSL_INCLUDE
top_srcdir
build_root
signature
+CPPKAFKA_LIBS
+CPPKAFKA_INCLUDE
+LIBRDKAFKA_LIBS
+LIBRDKAFKA_INCLUDE
+APACHE_ARROW_LIBS
+APACHE_ARROW_INCLUDE
HIREDIS_LIBS
HIREDIS_INCLUDE
AWS_SDK_LIBS
MBEDTLS_INCLUDE
PCRE_LIBS
PCRE_INCLUDE
+ZSTD_LIBS
+ZSTD_INCLUDE
LZO_LIBS
LZO_INCLUDE
BZ2_LIBS
with_z
with_bz2
with_lzo
+with_zstd
with_pcre
with_mbedtls
with_gmp
with_msgsl
with_aws_sdk
with_hiredis
+with_apache_arrow
+with_librdkafka
+with_cppkafka
with_3psw
with_local_lbsm
with_ncbi_crypt
ncbi-c wxwidgets wxwidgets-ucs fastcgi sss sssdb sssutils included-sss \
geo included-geo vdb downloaded-vdb static-vdb ngs libunwind libdw \
backward-cpp backward-cpp-sig \
-z bz2 lzo pcre mbedtls gmp gcrypt nettle gnutls static-gnutls openssl krb5 \
+z bz2 lzo zstd pcre mbedtls \
+gmp gcrypt nettle gnutls static-gnutls openssl krb5 \
sybase sybase-local sybase-new ftds mysql \
orbacus freetype ftgl opengl mesa glut glew glew-mx gl2ps \
bdb python perl jni sqlite3 icu boost boost-tag \
mongodb mongodb3 leveldb gmock lapack lmdb \
libuv libssh2 cassandra nghttp2 h2o influxdb \
libxlsxwriter protobuf grpc msgsl aws-sdk hiredis \
+apache-arrow librdkafka cppkafka \
3psw local-lbsm ncbi-crypt connext \
serial objects dbapi app ctools gui algo internal gbench"
--srcdir=* | --x-includes=* | --x-libraries=* | --with-tcheck=* \
| --with-ncbi-c=* | --with-sss=* | --with-vdb=* | --with-ngs=* \
| --with-libunwind=* | --with-libdw=* | --with-backward-cpp=* \
- | --with-z=* | --with-bz2=* | --with-lzo=* \
+ | --with-z=* | --with-bz2=* | --with-lzo=* | --with-zstd=* \
| --with-pcre=* | --with-mbedtls=* \
| --with-gmp=* | --with-gcrypt=* | --with-nettle=* \
| --with-gnutls=* | --with-openssl=* | --with-krb5=* \
--without-bz2 use internal copy of bzlib
--with-lzo=DIR use LZO installation in DIR (requires 2.x or up)
--without-lzo do not use LZO
+ --with-zstd=DIR use Zstandard installation in DIR
+ --without-zstd do not use Zstandard
--with-pcre=DIR use PCRE installation in DIR
--without-pcre use internal copy of PCRE
--with-mbedtls(=DIR) use external mbedTLS installation (in DIR)
--without-aws-sdk do not use the Amazon Web Services SDK
--with-hiredis=DIR use Hiredis installation in DIR
--without-hiredis do not use Hiredis
+ --with-apache-arrow=DIR use Apache Arrow installation in DIR
+ --without-apache-arrow do not use Apache Arrow
+ --with-librdkafka=DIR use librdkafka installation in DIR
+ --without-librdkafka do not use librdkafka
+ --with-cppkafka=DIR use cppkafka installation in DIR
+ --without-cppkafka do not use cppkafka
--with-3psw=std:netopt favor standard (system) builds of the above pkgs.
--without-3psw do not use any of the above packages
--without-local-lbsm turn off support for IPC with locally running LBSMD
else
with_lzo=no
fi
+ if test "${with_zstd-no}" != "no"; then
+ as_fn_error $? "incompatible options: --with-zstd but --without-3psw"
+ else
+ with_zstd=no
+ fi
if test "${with_pcre-no}" != "no"; then
as_fn_error $? "incompatible options: --with-pcre but --without-3psw"
else
else
with_hiredis=no
fi
+ if test "${with_apache-arrow-no}" != "no"; then
+ as_fn_error $? "incompatible options: --with-apache-arrow but --without-3psw"
+ else
+ with_apache-arrow=no
+ fi
+ if test "${with_librdkafka-no}" != "no"; then
+ as_fn_error $? "incompatible options: --with-librdkafka but --without-3psw"
+ else
+ with_librdkafka=no
+ fi
+ if test "${with_cppkafka-no}" != "no"; then
+ as_fn_error $? "incompatible options: --with-cppkafka but --without-3psw"
+ else
+ with_cppkafka=no
+ fi
{ NCBI=; unset NCBI;}
;;
fi
+# Check whether --with-zstd was given.
+if test "${with_zstd+set}" = set; then :
+ withval=$with_zstd;
+fi
+
+
+# Check whether --with-zstd was given.
+if test "${with_zstd+set}" = set; then :
+ withval=$with_zstd;
+fi
+
+
# Check whether --with-pcre was given.
if test "${with_pcre+set}" = set; then :
withval=$with_pcre;
fi
-# Check whether --with-grpc was given.
-if test "${with_grpc+set}" = set; then :
- withval=$with_grpc;
+# Check whether --with-hiredis was given.
+if test "${with_hiredis+set}" = set; then :
+ withval=$with_hiredis;
+fi
+
+
+# Check whether --with-apache-arrow was given.
+if test "${with_apache_arrow+set}" = set; then :
+ withval=$with_apache_arrow;
+fi
+
+
+# Check whether --with-apache-arrow was given.
+if test "${with_apache_arrow+set}" = set; then :
+ withval=$with_apache_arrow;
+fi
+
+
+# Check whether --with-librdkafka was given.
+if test "${with_librdkafka+set}" = set; then :
+ withval=$with_librdkafka;
+fi
+
+
+# Check whether --with-librdkafka was given.
+if test "${with_librdkafka+set}" = set; then :
+ withval=$with_librdkafka;
+fi
+
+
+# Check whether --with-cppkafka was given.
+if test "${with_cppkafka+set}" = set; then :
+ withval=$with_cppkafka;
+fi
+
+
+# Check whether --with-cppkafka was given.
+if test "${with_cppkafka+set}" = set; then :
+ withval=$with_cppkafka;
fi
LZO_LIBS="$LZO_LIBPATH -llzo2-static"
fi
+if test -d "$ZSTD_PATH"; then
+ ncbi_fix_dir_tmp=`if cd $ZSTD_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+ /.*) ncbi_fix_dir_tmp2=`cd $ZSTD_PATH && $smart_pwd 2>/dev/null`
+ if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+ ZSTD_PATH=$ncbi_fix_dir_tmp2
+ else
+ case "$ZSTD_PATH" in
+ /*) ;;
+ * ) ZSTD_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ fi
+ ;;
+ /*) ZSTD_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+fi
+if test "$with_zstd" != "no"; then
+ case "$ZSTD_PATH:$with_zstd" in
+ *:yes | *: | $with_zstd* ) ;;
+ * ) ZSTD_PATH=$with_zstd ;;
+ esac
+ if test "$ZSTD_PATH" != /usr -a -d "$ZSTD_PATH"; then
+ in_path=" in $ZSTD_PATH"
+ if test -z "$ZSTD_INCLUDE" -a -d "$ZSTD_PATH/include"; then
+ ZSTD_INCLUDE="-I$ZSTD_PATH/include"
+ fi
+ if test -n "$ZSTD_LIBPATH"; then
+ :
+ elif test -d "$ZSTD_PATH/lib${bit64_sfx}"; then
+ ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+ for x in $ZSTD_PATH/lib${bit64_sfx}; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ done
+ ZSTD_LIBPATH="${ncbi_rp_L_flags}"
+ else
+ ncbi_rp_R_flags=
+ ncbi_rp_R_sep=" $CONF_f_runpath"
+ for x in $ZSTD_PATH/lib${bit64_sfx}; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ x=`echo $x | sed -e "$ncbi_rpath_sed"`
+ ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+ ncbi_rp_R_sep=:
+ done
+ ZSTD_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+ elif test -d "$ZSTD_PATH/lib"; then
+ ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+ for x in $ZSTD_PATH/lib; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ done
+ ZSTD_LIBPATH="${ncbi_rp_L_flags}"
+ else
+ ncbi_rp_R_flags=
+ ncbi_rp_R_sep=" $CONF_f_runpath"
+ for x in $ZSTD_PATH/lib; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ x=`echo $x | sed -e "$ncbi_rpath_sed"`
+ ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+ ncbi_rp_R_sep=:
+ done
+ ZSTD_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+ fi
+ ZSTD_LIBS="$ZSTD_LIBPATH -lzstd "
+ else
+ ZSTD_INCLUDE=""
+ ZSTD_LIBS="-lzstd "
+ in_path=
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libzstd$in_path" >&5
+$as_echo_n "checking for libzstd$in_path... " >&6; }
+if ${ncbi_cv_lib_zstd+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ CPPFLAGS=" $ZSTD_INCLUDE $orig_CPPFLAGS"
+ LIBS="$ZSTD_LIBS $orig_LIBS"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <zstd.h>
+int
+main ()
+{
+ZSTD_CCtx* cctx = ZSTD_createCCtx();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+ ncbi_cv_lib_zstd=yes
+else
+ ncbi_cv_lib_zstd=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_zstd" >&5
+$as_echo "$ncbi_cv_lib_zstd" >&6; }
+ if test "$ncbi_cv_lib_zstd" = "no"; then
+ if test "${with_zstd:=no}" != no; then
+ as_fn_error $? "--with-zstd explicitly specified, but no usable version found." "$LINENO" 5
+ fi
+ fi
+ fi
+ if test "$with_zstd" = "no"; then
+ ZSTD_PATH="No_ZSTD"
+ ZSTD_INCLUDE=
+ ZSTD_LIBS=
+ else
+ WithPackages="$WithPackages${WithPackagesSep}ZSTD"; WithPackagesSep=" "
+ ZSTD_INCLUDE=" $ZSTD_INCLUDE"
+
+$as_echo "#define HAVE_LIBZSTD 1" >>confdefs.h
+
+ fi
+
+
+
+
if test -z "$PCRE_PATH" && pcre-config --version >/dev/null 2>&1; then
p=`pcre-config --prefix`
test "x$p" = "x/usr" || PCRE_PATH=$p
## FreeType and FTGL
if test "$with_freetype" != "no" ; then
- : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin}
- # Extract the first word of "freetype-config", so it can be a program name with args.
+ ft2pc="env PKG_CONFIG_PATH=$FREETYPE_PATH/lib/pkgconfig pkg-config freetype2"
+ if $ft2pc --exists >/dev/null 2>&1; then
+ freetype_config=$ft2pc
+ FREETYPE_PATH=`$ft2pc --variable=exec_prefix`
+ else
+ : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin}
+ # Extract the first word of "freetype-config", so it can be a program name with args.
set dummy freetype-config; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
fi
+ fi
if test -n "$freetype_config" ; then
- : ${FREETYPE_BINPATH=`dirname $freetype_config`}
: ${FREETYPE_INCLUDE=`$freetype_config --cflags`}
if test -z "${FREETYPE_LIBS+set}"; then
if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
done
fi
if $grpc_pc grpc++ --exists 2>/dev/null; then
+ GRPC_SED=sed
if test -f "$GRPC_PATH/lib/libboringssl.a"; then
GRPC_SED="sed -e s/-lssl/-lboringssl/g -e s/-lcrypto/-lboringcrypto/g"
- elif test -f /usr/lib/libssl.dylib -a \
+ fi
+ if test -f /usr/lib/libssl.dylib -a \
x"`$grpc_pc grpc++ --variable=prefix`" != x/sw; then
- GRPC_SED="sed -e s,-L/sw/lib,,"
- else
+ GRPC_SED="$GRPC_SED -e s,-L/sw/lib,,"
+ fi
+ if test "$GRPC_SED" = sed; then
GRPC_SED=cat
fi
GRPC_CONFIG_LIBS="`$grpc_pc grpc++ grpc --libs | $GRPC_SED`"
if test -n "$GRPC_CONFIG_LIBS"; then
GRPC_LIBS="$GRPC_CONFIG_LIBS $PROTOBUF_LIBS $GRPC_LDEP"
GRPC_UNSECURE_LIBS="`$grpc_pc grpc++_unsecure grpc_unsecure --libs`"
- case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in
- *:::*" -lupb "* ) ;;
- *" -lupb "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -lupb" ;;
- esac
+ for x in address_sorting upb cares; do
+ case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in
+ *:::*" -l$x "* ) ;;
+ *" -l$x "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -l$x" ;;
+ esac
+ done
GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS $PROTOBUF_LIBS $GRPC_LDEP"
else
LDFLAGS="$orig_LDFLAGS $GRPC_LIBPATH"
fi
done
fi
+AWS_SDK_LDEP=
+AWS_SDK_STATIC_LDEP=
+for d in "$AWS_SDK_PATH/lib$bit64_sfx" "$AWS_SDK_PATH/lib" \
+ /usr/lib/$multiarch /usr/lib$bit64_sfx /usr/lib \
+ /usr/local/lib$bit64_sfx /usr/local/lib; do
+ if test -f "$d/libaws-cpp-sdk-s3.a"; then
+ AWS_SDK_LIBDIR=$d
+ if test -f "$AWS_SDK_LIBDIR/libaws-c-event-stream.a"; then
+ AWS_SDK_LDEP="-laws-c-event-stream -laws-checksums -laws-c-common"
+ AWS_SDK_STATIC_LDEP="-laws-c-event-stream-static -laws-checksums-static -laws-c-common-static"
+ fi
+ break
+ fi
+done
if test "$with_aws_sdk" != "no"; then
case "$AWS_SDK_PATH:$with_aws_sdk" in
*:yes | *: | $with_aws_sdk* ) ;;
AWS_SDK_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
fi
fi
- AWS_SDK_LIBS="$AWS_SDK_LIBPATH -laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core"
+ AWS_SDK_LIBS="$AWS_SDK_LIBPATH -laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core $AWS_SDK_LDEP"
else
AWS_SDK_INCLUDE=""
- AWS_SDK_LIBS="-laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core"
+ AWS_SDK_LIBS="-laws-cpp-sdk-s3 -laws-cpp-sdk-ec2 -laws-cpp-sdk-core $AWS_SDK_LDEP"
in_path=
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for libaws-cpp-sdk-s3$in_path" >&5
if test "$with_aws_sdk" != no -a \
- -f "$AWS_SDK_PATH/lib$bit64_sfx/libaws-cpp-sdk-s3-static.a"; then
- AWS_SDK_STATIC_LIBS="-L$AWS_SDK_PATH/lib -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static"
+ -f "$AWS_SDK_LIBDIR/libaws-cpp-sdk-s3-static.a"; then
+ AWS_SDK_STATIC_LIBS="-L$AWS_SDK_LIBDIR -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static $AWS_SDK_STATIC_LDEP"
else
AWS_SDK_STATIC_LIBS=$AWS_SDK_LIBS
fi
HIREDIS_STATIC_LIBS=$HIREDIS_LIBS
fi
+case "$with_apache_arrow" in
+ yes | no | '' ) ;;
+ * ) APACHE_ARROW_PATH=$with_apache_arrow ;;
+esac
+if test -d "$APACHE_ARROW_PATH"; then
+ ncbi_fix_dir_tmp=`if cd $APACHE_ARROW_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+ /.*) ncbi_fix_dir_tmp2=`cd $APACHE_ARROW_PATH && $smart_pwd 2>/dev/null`
+ if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+ APACHE_ARROW_PATH=$ncbi_fix_dir_tmp2
+ else
+ case "$APACHE_ARROW_PATH" in
+ /*) ;;
+ * ) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ fi
+ ;;
+ /*) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ for d in "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$asan_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$asan_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+ if test -d "$d"; then
+ APACHE_ARROW_PATH=$d
+ ncbi_fix_dir_tmp=`if cd $APACHE_ARROW_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+ /.*) ncbi_fix_dir_tmp2=`cd $APACHE_ARROW_PATH && $smart_pwd 2>/dev/null`
+ if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+ APACHE_ARROW_PATH=$ncbi_fix_dir_tmp2
+ else
+ case "$APACHE_ARROW_PATH" in
+ /*) ;;
+ * ) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ fi
+ ;;
+ /*) APACHE_ARROW_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ break
+ fi
+ done
+fi
+
+if test "$with_apache_arrow" != "no"; then
+ case "$APACHE_ARROW_PATH:$with_apache_arrow" in
+ *:yes | *: | $with_apache_arrow* ) ;;
+ * ) APACHE_ARROW_PATH=$with_apache_arrow ;;
+ esac
+ if test "$APACHE_ARROW_PATH" != /usr -a -d "$APACHE_ARROW_PATH"; then
+ in_path=" in $APACHE_ARROW_PATH"
+ if test -z "$APACHE_ARROW_INCLUDE" -a -d "$APACHE_ARROW_PATH/include"; then
+ APACHE_ARROW_INCLUDE="-I$APACHE_ARROW_PATH/include"
+ fi
+ if test -n "$APACHE_ARROW_LIBPATH"; then
+ :
+ elif test -d "$APACHE_ARROW_PATH/lib${bit64_sfx}"; then
+ ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+ for x in $APACHE_ARROW_PATH/lib${bit64_sfx}; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ done
+ APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}"
+ else
+ ncbi_rp_R_flags=
+ ncbi_rp_R_sep=" $CONF_f_runpath"
+ for x in $APACHE_ARROW_PATH/lib${bit64_sfx}; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ x=`echo $x | sed -e "$ncbi_rpath_sed"`
+ ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+ ncbi_rp_R_sep=:
+ done
+ APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+ elif test -d "$APACHE_ARROW_PATH/lib"; then
+ ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+ for x in $APACHE_ARROW_PATH/lib; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ done
+ APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}"
+ else
+ ncbi_rp_R_flags=
+ ncbi_rp_R_sep=" $CONF_f_runpath"
+ for x in $APACHE_ARROW_PATH/lib; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ x=`echo $x | sed -e "$ncbi_rpath_sed"`
+ ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+ ncbi_rp_R_sep=:
+ done
+ APACHE_ARROW_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+ fi
+ APACHE_ARROW_LIBS="$APACHE_ARROW_LIBPATH -lparquet -larrow"
+ else
+ APACHE_ARROW_INCLUDE=""
+ APACHE_ARROW_LIBS="-lparquet -larrow"
+ in_path=
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libparquet$in_path" >&5
+$as_echo_n "checking for libparquet$in_path... " >&6; }
+if ${ncbi_cv_lib_apache_arrow+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ CPPFLAGS=" $APACHE_ARROW_INCLUDE $orig_CPPFLAGS"
+ LIBS="$APACHE_ARROW_LIBS $orig_LIBS"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <parquet/api/reader.h>
+int
+main ()
+{
+parquet::ParquetFileReader pfr;
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+ ncbi_cv_lib_apache_arrow=yes
+else
+ ncbi_cv_lib_apache_arrow=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_apache_arrow" >&5
+$as_echo "$ncbi_cv_lib_apache_arrow" >&6; }
+ if test "$ncbi_cv_lib_apache_arrow" = "no"; then
+ if test "${with_apache_arrow:=no}" != no; then
+ as_fn_error $? "--with-apache_arrow explicitly specified, but no usable version found." "$LINENO" 5
+ fi
+ fi
+ fi
+ if test "$with_apache_arrow" = "no"; then
+ APACHE_ARROW_PATH="No_APACHE_ARROW"
+ APACHE_ARROW_INCLUDE=
+ APACHE_ARROW_LIBS=
+ else
+ WithPackages="$WithPackages${WithPackagesSep}APACHE_ARROW"; WithPackagesSep=" "
+ APACHE_ARROW_INCLUDE=" $APACHE_ARROW_INCLUDE"
+
+$as_echo "#define HAVE_LIBAPACHE_ARROW 1" >>confdefs.h
+
+ fi
+
+
+
+if test "$with_apache_arrow" != no -a \
+ -f "$APACHE_ARROW_LIBDIR/libparquet-static.a"; then
+ APACHE_ARROW_STATIC_LIBS="-L$APACHE_ARROW_LIBDIR -lparquet-static -larrow-static -larrow_bundled_dependencies-static $BZ2_LIBS $Z_LIBS -lzstd"
+else
+ APACHE_ARROW_STATIC_LIBS=$APACHE_ARROW_LIBS
+fi
+
+case "$with_librdkafka" in
+ yes | no | '' ) ;;
+ * ) LIBRDKAFKA_PATH=$with_librdkafka ;;
+esac
+if test -d "$LIBRDKAFKA_PATH"; then
+ ncbi_fix_dir_tmp=`if cd $LIBRDKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+ /.*) ncbi_fix_dir_tmp2=`cd $LIBRDKAFKA_PATH && $smart_pwd 2>/dev/null`
+ if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+ LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp2
+ else
+ case "$LIBRDKAFKA_PATH" in
+ /*) ;;
+ * ) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ fi
+ ;;
+ /*) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ for d in "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+ "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+ if test -d "$d"; then
+ LIBRDKAFKA_PATH=$d
+ ncbi_fix_dir_tmp=`if cd $LIBRDKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+ /.*) ncbi_fix_dir_tmp2=`cd $LIBRDKAFKA_PATH && $smart_pwd 2>/dev/null`
+ if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+ LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp2
+ else
+ case "$LIBRDKAFKA_PATH" in
+ /*) ;;
+ * ) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ fi
+ ;;
+ /*) LIBRDKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ break
+ fi
+ done
+fi
+
+if test "$with_librdkafka" != "no"; then
+ case "$LIBRDKAFKA_PATH:$with_librdkafka" in
+ *:yes | *: | $with_librdkafka* ) ;;
+ * ) LIBRDKAFKA_PATH=$with_librdkafka ;;
+ esac
+ if test "$LIBRDKAFKA_PATH" != /usr -a -d "$LIBRDKAFKA_PATH"; then
+ in_path=" in $LIBRDKAFKA_PATH"
+ if test -z "$LIBRDKAFKA_INCLUDE" -a -d "$LIBRDKAFKA_PATH/include"; then
+ LIBRDKAFKA_INCLUDE="-I$LIBRDKAFKA_PATH/include"
+ fi
+ if test -n "$LIBRDKAFKA_LIBPATH"; then
+ :
+ elif test -d "$LIBRDKAFKA_PATH/lib${bit64_sfx}"; then
+ ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+ for x in $LIBRDKAFKA_PATH/lib${bit64_sfx}; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ done
+ LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}"
+ else
+ ncbi_rp_R_flags=
+ ncbi_rp_R_sep=" $CONF_f_runpath"
+ for x in $LIBRDKAFKA_PATH/lib${bit64_sfx}; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ x=`echo $x | sed -e "$ncbi_rpath_sed"`
+ ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+ ncbi_rp_R_sep=:
+ done
+ LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+ elif test -d "$LIBRDKAFKA_PATH/lib"; then
+ ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+ for x in $LIBRDKAFKA_PATH/lib; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ done
+ LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}"
+ else
+ ncbi_rp_R_flags=
+ ncbi_rp_R_sep=" $CONF_f_runpath"
+ for x in $LIBRDKAFKA_PATH/lib; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ x=`echo $x | sed -e "$ncbi_rpath_sed"`
+ ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+ ncbi_rp_R_sep=:
+ done
+ LIBRDKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+ fi
+ LIBRDKAFKA_LIBS="$LIBRDKAFKA_LIBPATH -lrdkafka "
+ else
+ LIBRDKAFKA_INCLUDE=""
+ LIBRDKAFKA_LIBS="-lrdkafka "
+ in_path=
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for librdkafka$in_path" >&5
+$as_echo_n "checking for librdkafka$in_path... " >&6; }
+if ${ncbi_cv_lib_librdkafka+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ CPPFLAGS=" $LIBRDKAFKA_INCLUDE $orig_CPPFLAGS"
+ LIBS="$LIBRDKAFKA_LIBS $orig_LIBS"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <librdkafka/rdkafka.h>
+int
+main ()
+{
+rd_kafka_conf_t *conf = rd_kafka_conf_new();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+ ncbi_cv_lib_librdkafka=yes
+else
+ ncbi_cv_lib_librdkafka=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_librdkafka" >&5
+$as_echo "$ncbi_cv_lib_librdkafka" >&6; }
+ if test "$ncbi_cv_lib_librdkafka" = "no"; then
+ if test "${with_librdkafka:=no}" != no; then
+ as_fn_error $? "--with-librdkafka explicitly specified, but no usable version found." "$LINENO" 5
+ fi
+ fi
+ fi
+ if test "$with_librdkafka" = "no"; then
+ LIBRDKAFKA_PATH="No_LIBRDKAFKA"
+ LIBRDKAFKA_INCLUDE=
+ LIBRDKAFKA_LIBS=
+ else
+ WithPackages="$WithPackages${WithPackagesSep}LIBRDKAFKA"; WithPackagesSep=" "
+ LIBRDKAFKA_INCLUDE=" $LIBRDKAFKA_INCLUDE"
+
+$as_echo "#define HAVE_LIBRDKAFKA 1" >>confdefs.h
+
+ fi
+
+
+
+if test "$with_librdkafka" != no -a \
+ -f "$LIBRDKAFKA_PATH/lib$bit64_sfx/librdkafka-static.a"; then
+ LIBRDKAFKA_STATIC_LIBS="-L$LIBRDKAFKA_PATH/lib$bit64_sfx -lrdkafka-static"
+else
+ LIBRDKAFKA_STATIC_LIBS=$LIBRDKAFKA_LIBS
+fi
+
+case "$with_cppkafka" in
+ yes | no | '' ) ;;
+ * ) CPPKAFKA_PATH=$with_cppkafka ;;
+esac
+if test -d "$CPPKAFKA_PATH"; then
+ ncbi_fix_dir_tmp=`if cd $CPPKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+ /.*) ncbi_fix_dir_tmp2=`cd $CPPKAFKA_PATH && $smart_pwd 2>/dev/null`
+ if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+ CPPKAFKA_PATH=$ncbi_fix_dir_tmp2
+ else
+ case "$CPPKAFKA_PATH" in
+ /*) ;;
+ * ) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ fi
+ ;;
+ /*) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ for d in "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+ "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+ if test -d "$d"; then
+ CPPKAFKA_PATH=$d
+ ncbi_fix_dir_tmp=`if cd $CPPKAFKA_PATH; then { PWD=; unset PWD;}; /bin/pwd; fi`
+ case "$ncbi_fix_dir_tmp" in
+ /.*) ncbi_fix_dir_tmp2=`cd $CPPKAFKA_PATH && $smart_pwd 2>/dev/null`
+ if test -n "$ncbi_fix_dir_tmp2" -a -d "$ncbi_fix_dir_tmp2"; then
+ CPPKAFKA_PATH=$ncbi_fix_dir_tmp2
+ else
+ case "$CPPKAFKA_PATH" in
+ /*) ;;
+ * ) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ fi
+ ;;
+ /*) CPPKAFKA_PATH=$ncbi_fix_dir_tmp ;;
+ esac
+ break
+ fi
+ done
+fi
+
+if test "$with_cppkafka" != "no"; then
+ case "$CPPKAFKA_PATH:$with_cppkafka" in
+ *:yes | *: | $with_cppkafka* ) ;;
+ * ) CPPKAFKA_PATH=$with_cppkafka ;;
+ esac
+ if test "$CPPKAFKA_PATH" != /usr -a -d "$CPPKAFKA_PATH"; then
+ in_path=" in $CPPKAFKA_PATH"
+ if test -z "$CPPKAFKA_INCLUDE" -a -d "$CPPKAFKA_PATH/include"; then
+ CPPKAFKA_INCLUDE="-I$CPPKAFKA_PATH/include"
+ fi
+ if test -n "$CPPKAFKA_LIBPATH"; then
+ :
+ elif test -d "$CPPKAFKA_PATH/lib${bit64_sfx}"; then
+ ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+ for x in $CPPKAFKA_PATH/lib${bit64_sfx}; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ done
+ CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}"
+ else
+ ncbi_rp_R_flags=
+ ncbi_rp_R_sep=" $CONF_f_runpath"
+ for x in $CPPKAFKA_PATH/lib${bit64_sfx}; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ x=`echo $x | sed -e "$ncbi_rpath_sed"`
+ ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+ ncbi_rp_R_sep=:
+ done
+ CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+ elif test -d "$CPPKAFKA_PATH/lib"; then
+ ncbi_rp_L_flags=
+ ncbi_rp_L_sep=$CONF_f_libpath
+ if test "x${CONF_f_runpath}" = "x${CONF_f_libpath}"; then
+ for x in $CPPKAFKA_PATH/lib; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ done
+ CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}"
+ else
+ ncbi_rp_R_flags=
+ ncbi_rp_R_sep=" $CONF_f_runpath"
+ for x in $CPPKAFKA_PATH/lib; do
+ case "$x" in
+ /lib | /usr/lib | /usr/lib32 | /usr/lib64 | /usr/lib/$multiarch )
+ continue
+ ;;
+ esac
+ ncbi_rp_L_flags="${ncbi_rp_L_flags}${ncbi_rp_L_sep}$x"
+ ncbi_rp_L_sep=" $CONF_f_libpath"
+ x=`echo $x | sed -e "$ncbi_rpath_sed"`
+ ncbi_rp_R_flags="${ncbi_rp_R_flags}${ncbi_rp_R_sep}$x"
+ ncbi_rp_R_sep=:
+ done
+ CPPKAFKA_LIBPATH="${ncbi_rp_L_flags}${ncbi_rp_R_flags}"
+ fi
+ fi
+ CPPKAFKA_LIBS="$CPPKAFKA_LIBPATH -lcppkafka $LIBRDKAFKA_LIBS"
+ else
+ CPPKAFKA_INCLUDE=""
+ CPPKAFKA_LIBS="-lcppkafka $LIBRDKAFKA_LIBS"
+ in_path=
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for libcppkafka$in_path" >&5
+$as_echo_n "checking for libcppkafka$in_path... " >&6; }
+if ${ncbi_cv_lib_cppkafka+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ CPPFLAGS="$LIBRDKAFKA_INCLUDE $CPPKAFKA_INCLUDE $orig_CPPFLAGS"
+ LIBS="$CPPKAFKA_LIBS $orig_LIBS"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <cppkafka/configuration.h>
+int
+main ()
+{
+cppkafka::Configuration cfg; cfg.set("foo", "bar");
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+ ncbi_cv_lib_cppkafka=yes
+else
+ ncbi_cv_lib_cppkafka=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ncbi_cv_lib_cppkafka" >&5
+$as_echo "$ncbi_cv_lib_cppkafka" >&6; }
+ if test "$ncbi_cv_lib_cppkafka" = "no"; then
+ if test "${with_cppkafka:=no}" != no; then
+ as_fn_error $? "--with-cppkafka explicitly specified, but no usable version found." "$LINENO" 5
+ fi
+ fi
+ fi
+ if test "$with_cppkafka" = "no"; then
+ CPPKAFKA_PATH="No_CPPKAFKA"
+ CPPKAFKA_INCLUDE=
+ CPPKAFKA_LIBS=
+ else
+ WithPackages="$WithPackages${WithPackagesSep}CPPKAFKA"; WithPackagesSep=" "
+ CPPKAFKA_INCLUDE="$LIBRDKAFKA_INCLUDE $CPPKAFKA_INCLUDE"
+
+$as_echo "#define HAVE_LIBCPPKAFKA 1" >>confdefs.h
+
+ fi
+
+
+
+if test "$with_cppkafka" != no -a \
+ -f "$CPPKAFKA_PATH/lib$bit64_sfx/libcppkafka-static.a"; then
+ CPPKAFKA_STATIC_LIBS="-L$CPPKAFKA_PATH/lib$bit64_sfx -lcppkafka-static $LIBRDKAFKA_STATIC_LIBS"
+else
+ CPPKAFKA_STATIC_LIBS=$CPPKAFKA_LIBS
+fi
+
### Restore original compiler/linker flags
LIBS="$orig_LIBS"
CPPFLAGS="$orig_CPPFLAGS"
;;
esac
done
- for x in UUID FUSE Iconv LIBUNWIND LIBDW BACKWARD_CPP Z LocalZ BZ2 LocalBZ2 LZO PCRE LocalPCRE MBEDTLS GMP GCRYPT NETTLE GNUTLS OPENSSL KRB5 CURL Sybase DBLib FreeTDS MySQL BerkeleyDB BerkeleyDB++ ODBC PYTHON PYTHON25 PYTHON26 PYTHON27 PYTHON3 PERL Boost.Chrono Boost.Filesystem Boost.Iostreams Boost.Program-Options Boost.Regex Boost.Serialization Boost.Spirit Boost.System Boost.Test Boost.Test.Included Boost.Thread C-Toolkit OpenGL MESA GLUT GLEW wxWidgets wx2.8 Fast-CGI LocalSSS LocalMSGMAIL2 SSSUTILS LocalNCBILS NCBILS2 SSSDB SP ORBacus ICU EXPAT SABLOT LIBXML LIBXSLT LIBEXSLT Xerces Xalan Zorba SQLITE3 SQLITE3ASYNC VDB NGS OECHEM SGE MUPARSER HDF5 JPEG PNG TIFF GIF UNGIF XPM GL2PS FreeType FTGL MAGIC MIMETIC GSOAP AVRO Cereal SASL2 MONGODB MONGODB3 LEVELDB GMOCK LAPACK LMDB LocalLMDB LIBUV LIBSSH2 CASSANDRA NGHTTP2 H2O INFLUXDB LIBXLSXWRITER PROTOBUF GRPC MSGSL AWS_SDK HIREDIS; do
+ for x in UUID FUSE Iconv LIBUNWIND LIBDW BACKWARD_CPP Z LocalZ BZ2 LocalBZ2 LZO ZSTD PCRE LocalPCRE MBEDTLS GMP GCRYPT NETTLE GNUTLS OPENSSL KRB5 CURL Sybase DBLib FreeTDS MySQL BerkeleyDB BerkeleyDB++ ODBC PYTHON PYTHON25 PYTHON26 PYTHON27 PYTHON3 PERL Boost.Chrono Boost.Filesystem Boost.Iostreams Boost.Program-Options Boost.Regex Boost.Serialization Boost.Spirit Boost.System Boost.Test Boost.Test.Included Boost.Thread C-Toolkit OpenGL MESA GLUT GLEW wxWidgets wx2.8 Fast-CGI LocalSSS LocalMSGMAIL2 SSSUTILS LocalNCBILS NCBILS2 SSSDB SP ORBacus ICU EXPAT SABLOT LIBXML LIBXSLT LIBEXSLT Xerces Xalan Zorba SQLITE3 SQLITE3ASYNC VDB NGS OECHEM SGE MUPARSER HDF5 JPEG PNG TIFF GIF UNGIF XPM GL2PS FreeType FTGL MAGIC MIMETIC GSOAP AVRO Cereal SASL2 MONGODB MONGODB3 LEVELDB GMOCK LAPACK LMDB LocalLMDB LIBUV LIBSSH2 CASSANDRA NGHTTP2 H2O INFLUXDB LIBXLSXWRITER PROTOBUF GRPC MSGSL AWS_SDK HIREDIS APACHE_ARROW LIBRDKAFKA CPPKAFKA; do
case " $WithPackages " in
*" $x "*) ;;
*) WithoutPackages="$WithoutPackages$WithoutPackagesSep$x"
+
+
+
#############################################################################
-# $Id: configure.ac 608058 2020-05-11 16:30:05Z ivanov $
+# $Id: configure.ac 616396 2020-09-15 18:22:00Z ivanov $
# Derived from configure.in version 1.173.
# ==========================================================================
#
with_ncbi_c=no
fi
m4_foreach(X, [sss, sssutils, sssdb, vdb, ngs, libunwind,
- z, bz2, lzo, pcre, mbedtls,
+ z, bz2, lzo, zstd, pcre, mbedtls,
gmp, gcrypt, nettle, gnutls, openssl, krb5, boost, lmdb,
sybase, ftds, mysql, opengl, mesa, glut, glew, gl2ps,
wxwidgets, freetype, ftgl, fastcgi, bdb, orbacus, odbc,
curl, gsoap, avro, cereal, sasl2,
mongodb, mongodb3, leveldb, gmock, lapack,
libuv, libssh2, cassandra, nghttp2, h2o, influxdb,
- libxlsxwriter, protobuf, grpc, msgsl, aws-sdk, hiredis],
+ libxlsxwriter, protobuf, grpc, msgsl, aws-sdk, hiredis,
+ apache-arrow, librdkafka, cppkafka],
[if test "${[with_]X-no}" != "no"; then
AC_MSG_ERROR([incompatible options: --with-]X[ but --without-3psw])
else
[ --with-lzo=DIR use LZO installation in DIR (requires 2.x or up)])
AC_ARG_WITH(lzo,
[ --without-lzo do not use LZO])
+AC_ARG_WITH(zstd,
+ [ --with-zstd=DIR use Zstandard installation in DIR])
+AC_ARG_WITH(zstd,
+ [ --without-zstd do not use Zstandard])
AC_ARG_WITH(pcre,
[ --with-pcre=DIR use PCRE installation in DIR])
AC_ARG_WITH(pcre,
[ --without-aws-sdk do not use the Amazon Web Services SDK])
AC_ARG_WITH(hiredis,
[ --with-hiredis=DIR use Hiredis installation in DIR])
-AC_ARG_WITH(grpc,
+AC_ARG_WITH(hiredis,
[ --without-hiredis do not use Hiredis])
+AC_ARG_WITH(apache-arrow,
+ [ --with-apache-arrow=DIR use Apache Arrow installation in DIR])
+AC_ARG_WITH(apache-arrow,
+ [ --without-apache-arrow do not use Apache Arrow])
+AC_ARG_WITH(librdkafka,
+ [ --with-librdkafka=DIR use librdkafka installation in DIR])
+AC_ARG_WITH(librdkafka,
+ [ --without-librdkafka do not use librdkafka])
+AC_ARG_WITH(cppkafka,
+ [ --with-cppkafka=DIR use cppkafka installation in DIR])
+AC_ARG_WITH(cppkafka,
+ [ --without-cppkafka do not use cppkafka])
AC_ARG_WITH(3psw,
[ --with-3psw=std:netopt favor standard (system) builds of the above pkgs.])
AC_ARG_WITH(3psw,
ncbi-c wxwidgets wxwidgets-ucs fastcgi sss sssdb sssutils included-sss \
geo included-geo vdb downloaded-vdb static-vdb ngs libunwind libdw \
backward-cpp backward-cpp-sig \
-z bz2 lzo pcre mbedtls gmp gcrypt nettle gnutls static-gnutls openssl krb5 \
+z bz2 lzo zstd pcre mbedtls \
+gmp gcrypt nettle gnutls static-gnutls openssl krb5 \
sybase sybase-local sybase-new ftds mysql \
orbacus freetype ftgl opengl mesa glut glew glew-mx gl2ps \
bdb python perl jni sqlite3 icu boost boost-tag \
mongodb mongodb3 leveldb gmock lapack lmdb \
libuv libssh2 cassandra nghttp2 h2o influxdb \
libxlsxwriter protobuf grpc msgsl aws-sdk hiredis \
+apache-arrow librdkafka cppkafka \
3psw local-lbsm ncbi-crypt connext \
serial objects dbapi app ctools gui algo internal gbench"
--srcdir=* | --x-includes=* | --x-libraries=* | --with-tcheck=* \
| --with-ncbi-c=* | --with-sss=* | --with-vdb=* | --with-ngs=* \
| --with-libunwind=* | --with-libdw=* | --with-backward-cpp=* \
- | --with-z=* | --with-bz2=* | --with-lzo=* \
+ | --with-z=* | --with-bz2=* | --with-lzo=* | --with-zstd=* \
| --with-pcre=* | --with-mbedtls=* \
| --with-gmp=* | --with-gcrypt=* | --with-nettle=* \
| --with-gnutls=* | --with-openssl=* | --with-krb5=* \
LZO_LIBS="$LZO_LIBPATH -llzo2-static"
fi
+if test -d "$ZSTD_PATH"; then
+ NCBI_FIX_DIR(ZSTD_PATH)
+fi
+NCBI_CHECK_THIRD_PARTY_LIB(zstd,
+ [AC_LANG_PROGRAM([@%:@include <zstd.h>],
+ [[ZSTD_CCtx* cctx = ZSTD_createCCtx();]])])
+
if test -z "$PCRE_PATH" && pcre-config --version >/dev/null 2>&1; then
p=`pcre-config --prefix`
test "x$p" = "x/usr" || PCRE_PATH=$p
## FreeType and FTGL
if test "$with_freetype" != "no" ; then
- : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin}
- AC_PATH_PROG(freetype_config, freetype-config, [],
- [$FREETYPE_BINPATH:$PATH])
+ ft2pc="env PKG_CONFIG_PATH=$FREETYPE_PATH/lib/pkgconfig pkg-config freetype2"
+ if $ft2pc --exists >/dev/null 2>&1; then
+ freetype_config=$ft2pc
+ FREETYPE_PATH=`$ft2pc --variable=exec_prefix`
+ else
+ : ${FREETYPE_BINPATH=$FREETYPE_PATH/bin}
+ AC_PATH_PROG(freetype_config, freetype-config, [],
+ [$FREETYPE_BINPATH:$PATH])
+ fi
if test -n "$freetype_config" ; then
- : ${FREETYPE_BINPATH=`dirname $freetype_config`}
: ${FREETYPE_INCLUDE=`$freetype_config --cflags`}
NCBI_RPATHIFY_OUTPUT_COND(FREETYPE_LIBS, $freetype_config --libs,
[$no_usr_lib])
done
fi
if $grpc_pc grpc++ --exists 2>/dev/null; then
+ GRPC_SED=sed
if test -f "$GRPC_PATH/lib/libboringssl.a"; then
GRPC_SED="sed -e s/-lssl/-lboringssl/g -e s/-lcrypto/-lboringcrypto/g"
- elif test -f /usr/lib/libssl.dylib -a \
+ fi
+ if test -f /usr/lib/libssl.dylib -a \
x"`$grpc_pc grpc++ --variable=prefix`" != x/sw; then
- GRPC_SED="sed -e s,-L/sw/lib,,"
- else
+ GRPC_SED="$GRPC_SED -e s,-L/sw/lib,,"
+ fi
+ if test "$GRPC_SED" = sed; then
GRPC_SED=cat
fi
GRPC_CONFIG_LIBS="`$grpc_pc grpc++ grpc --libs | $GRPC_SED`"
if test -n "$GRPC_CONFIG_LIBS"; then
GRPC_LIBS="$GRPC_CONFIG_LIBS $PROTOBUF_LIBS $GRPC_LDEP"
GRPC_UNSECURE_LIBS="`$grpc_pc grpc++_unsecure grpc_unsecure --libs`"
- case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in
- *:::*" -lupb "* ) ;;
- *" -lupb "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -lupb" ;;
- esac
+ for x in address_sorting upb cares; do
+ case " $GRPC_LIBS ::: $GRPC_UNSECURE_LIBS " in
+ *:::*" -l$x "* ) ;;
+ *" -l$x "* ) GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS -l$x" ;;
+ esac
+ done
GRPC_UNSECURE_LIBS="$GRPC_UNSECURE_LIBS $PROTOBUF_LIBS $GRPC_LDEP"
else
LDFLAGS="$orig_LDFLAGS $GRPC_LIBPATH"
fi
done
fi
+AWS_SDK_LDEP=
+AWS_SDK_STATIC_LDEP=
+for d in "$AWS_SDK_PATH/lib$bit64_sfx" "$AWS_SDK_PATH/lib" \
+ /usr/lib/$multiarch /usr/lib$bit64_sfx /usr/lib \
+ /usr/local/lib$bit64_sfx /usr/local/lib; do
+ if test -f "$d/libaws-cpp-sdk-s3.a"; then
+ AWS_SDK_LIBDIR=$d
+ if test -f "$AWS_SDK_LIBDIR/libaws-c-event-stream.a"; then
+ AWS_SDK_LDEP="-laws-c-event-stream -laws-checksums -laws-c-common"
+ AWS_SDK_STATIC_LDEP="-laws-c-event-stream-static -laws-checksums-static -laws-c-common-static"
+ fi
+ break
+ fi
+done
NCBI_CHECK_THIRD_PARTY_LIB_EX(aws_sdk, AWS_SDK, aws-cpp-sdk-s3,
[AC_LANG_PROGRAM([[@%:@include <aws/s3/S3Client.h>
@%:@include <aws/ec2/EC2Client.h>]],
[[Aws::S3::S3Client s3cli;
Aws::EC2::EC2Client ec2cli;]])],
- [-laws-cpp-sdk-ec2 -laws-cpp-sdk-core], [$CURL_LIBS $OPENSSL_LIBS $Z_LIBS])
+ [-laws-cpp-sdk-ec2 -laws-cpp-sdk-core $AWS_SDK_LDEP],
+ [$CURL_LIBS $OPENSSL_LIBS $Z_LIBS])
if test "$with_aws_sdk" != no -a \
- -f "$AWS_SDK_PATH/lib$bit64_sfx/libaws-cpp-sdk-s3-static.a"; then
- AWS_SDK_STATIC_LIBS="-L$AWS_SDK_PATH/lib -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static"
+ -f "$AWS_SDK_LIBDIR/libaws-cpp-sdk-s3-static.a"; then
+ AWS_SDK_STATIC_LIBS="-L$AWS_SDK_LIBDIR -laws-cpp-sdk-s3-static -laws-cpp-sdk-ec2-static -laws-cpp-sdk-core-static $AWS_SDK_STATIC_LDEP"
else
AWS_SDK_STATIC_LIBS=$AWS_SDK_LIBS
fi
HIREDIS_STATIC_LIBS=$HIREDIS_LIBS
fi
+case "$with_apache_arrow" in
+ yes | no | '' ) ;;
+ * ) APACHE_ARROW_PATH=$with_apache_arrow ;;
+esac
+if test -d "$APACHE_ARROW_PATH"; then
+ NCBI_FIX_DIR(APACHE_ARROW_PATH)
+ for d in "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx$asan_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$asan_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$asan_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+ "$APACHE_ARROW_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+ if test -d "$d"; then
+ APACHE_ARROW_PATH=$d
+ NCBI_FIX_DIR(APACHE_ARROW_PATH)
+ break
+ fi
+ done
+fi
+
+NCBI_CHECK_THIRD_PARTY_LIB_EX(apache_arrow, APACHE_ARROW, parquet,
+ [AC_LANG_PROGRAM([[@%:@include <parquet/api/reader.h>]],
+ [[parquet::ParquetFileReader pfr;]])],
+ [-larrow])
+if test "$with_apache_arrow" != no -a \
+ -f "$APACHE_ARROW_LIBDIR/libparquet-static.a"; then
+ APACHE_ARROW_STATIC_LIBS="-L$APACHE_ARROW_LIBDIR -lparquet-static -larrow-static -larrow_bundled_dependencies-static $BZ2_LIBS $Z_LIBS -lzstd"
+else
+ APACHE_ARROW_STATIC_LIBS=$APACHE_ARROW_LIBS
+fi
+
+case "$with_librdkafka" in
+ yes | no | '' ) ;;
+ * ) LIBRDKAFKA_PATH=$with_librdkafka ;;
+esac
+if test -d "$LIBRDKAFKA_PATH"; then
+ NCBI_FIX_DIR(LIBRDKAFKA_PATH)
+ for d in "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$LIBRDKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+ "$LIBRDKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+ if test -d "$d"; then
+ LIBRDKAFKA_PATH=$d
+ NCBI_FIX_DIR(LIBRDKAFKA_PATH)
+ break
+ fi
+ done
+fi
+
+NCBI_CHECK_THIRD_PARTY_LIB_EX(librdkafka, LIBRDKAFKA, rdkafka,
+ [AC_LANG_PROGRAM([[@%:@include <librdkafka/rdkafka.h>]],
+ [[rd_kafka_conf_t *conf = rd_kafka_conf_new();]])])
+if test "$with_librdkafka" != no -a \
+ -f "$LIBRDKAFKA_PATH/lib$bit64_sfx/librdkafka-static.a"; then
+ LIBRDKAFKA_STATIC_LIBS="-L$LIBRDKAFKA_PATH/lib$bit64_sfx -lrdkafka-static"
+else
+ LIBRDKAFKA_STATIC_LIBS=$LIBRDKAFKA_LIBS
+fi
+
+case "$with_cppkafka" in
+ yes | no | '' ) ;;
+ * ) CPPKAFKA_PATH=$with_cppkafka ;;
+esac
+if test -d "$CPPKAFKA_PATH"; then
+ NCBI_FIX_DIR(CPPKAFKA_PATH)
+ for d in "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx$bit64_sfx" \
+ "$CPPKAFKA_PATH/$compiler_vpfx$DEBUG_SFX$mt_sfx" \
+ "$CPPKAFKA_PATH/$compiler_pfx$DEBUG_SFX$mt_sfx"; do
+ if test -d "$d"; then
+ CPPKAFKA_PATH=$d
+ NCBI_FIX_DIR(CPPKAFKA_PATH)
+ break
+ fi
+ done
+fi
+
+NCBI_CHECK_THIRD_PARTY_LIB(cppkafka,
+ [AC_LANG_PROGRAM([[@%:@include <cppkafka/configuration.h>]],
+ [[cppkafka::Configuration cfg; cfg.set("foo", "bar");]])],
+ [$LIBRDKAFKA_LIBS], [], [$LIBRDKAFKA_INCLUDE])
+if test "$with_cppkafka" != no -a \
+ -f "$CPPKAFKA_PATH/lib$bit64_sfx/libcppkafka-static.a"; then
+ CPPKAFKA_STATIC_LIBS="-L$CPPKAFKA_PATH/lib$bit64_sfx -lcppkafka-static $LIBRDKAFKA_STATIC_LIBS"
+else
+ CPPKAFKA_STATIC_LIBS=$CPPKAFKA_LIBS
+fi
+
### Restore original compiler/linker flags
LIBS="$orig_LIBS"
CPPFLAGS="$orig_CPPFLAGS"
AC_SUBST(MSGSL_INCLUDE)
AC_SUBST(AWS_SDK_STATIC_LIBS)
AC_SUBST(HIREDIS_STATIC_LIBS)
+AC_SUBST(APACHE_ARROW_STATIC_LIBS)
+AC_SUBST(LIBRDKAFKA_STATIC_LIBS)
+AC_SUBST(CPPKAFKA_STATIC_LIBS)
AC_SUBST(ncbi_xreader_pubseqos)
AC_SUBST(ncbi_xreader_pubseqos2)
AC_SUBST(UNLESS_PUBSEQOS)
echo "[`date`]"
-svn_location=`echo '$HeadURL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.10.1/c++/src/build-system/install.sh.in $' | sed "s%\\$[H]eadURL: *\\([^$][^$]*\\) \\$.*%\\1%"`
+svn_location=`echo '$HeadURL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.11.0/c++/src/build-system/install.sh.in $' | sed "s%\\$[H]eadURL: *\\([^$][^$]*\\) \\$.*%\\1%"`
svn_revision=`echo '$Revision: 541872 $' | sed "s%\\$[R]evision: *\\([^$][^$]*\\) \\$.*%\\1%"`
script_name=`basename $0`
-# $Id: project_tree_builder.ini 607715 2020-05-06 17:37:02Z ivanov $
+# $Id: project_tree_builder.ini 617210 2020-09-28 17:22:08Z ivanov $
###############################################################################
#----------------------------------------------------------------------------
# Location of custom code generators
-CustomCodeGenerator.proto = \\\\snowman\\win-coremake\\Lib\\ThirdParty\\grpc\\$(msvc_3rd)\\1.21.1-ncbi1\\bin\\ReleaseDLL
+CustomCodeGenerator.proto = \\\\snowman\\win-coremake\\Lib\\ThirdParty\\grpc\\$(msvc_3rd)\\1.28.1\\bin\\ReleaseDLL
XCode_CustomCodeGenerator.proto = /netopt/ncbi_tools/grpc-1.28.1-ncbi1/Release/bin
#----------------------------------------------------------------------------
ThirdParty_GLEW = $(ThirdPartyBasePath)\\glew\\$(msvc_3rd)\\1.5.8
ThirdParty_GL2PS = $(ThirdPartyBasePath)\\gl2ps\\$(msvc_3rd)\\1.4.0
ThirdParty_GNUTLS = $(ThirdPartyBasePath)\\gnutls\\$(msvc_3rd)\\3.4.9
-ThirdParty_GRPC = $(ThirdPartyBasePath)\\grpc\\$(msvc_3rd)\\1.21.1-ncbi1
+ThirdParty_GRPC = $(ThirdPartyBasePath)\\grpc\\$(msvc_3rd)\\1.28.1
ThirdParty_INFLUXDB = $(ThirdPartyBasePath)\\influxdb\\$(msvc_3rd)\\20190426
###ThirdParty_ICU = $(ThirdPartyBasePath)\\icu\\$(msvc_3rd)\\3.2
ThirdParty_JDK = $(ThirdPartyBasePath)\\jdk\\1.6.0_25
ThirdParty_XML = $(ThirdPartyBasePath)\\xml\\$(msvc_3rd)\\2.7.8
ThirdParty_XSLT = $(ThirdPartyBasePath)\\xslt\\$(msvc_3rd)\\1.1.26
ThirdParty_Z = $(ThirdPartyBasePath)\\z\\$(msvc_3rd)\\1.2.11
-ThirdParty_VDB = $(ThirdPartyVDBBasePath)\\vdb\\vdb-versions\\2.10.5
+ThirdParty_VDB = $(ThirdPartyVDBBasePath)\\vdb\\vdb-versions\\2.10.8
PYTHON_PATH = $(ThirdPartyAppsBasePath)\\Python252\\$(msvc_3rd)
ThirdParty_wxWidgets = $(XCode_ThirdPartyBasePath)/wxWidgets-3.1.3-ncbi1
ThirdParty_FreeType = /opt/X11
ThirdParty_FTGL = $(XCode_ThirdPartyBasePath)/ftgl-2.1.3-rc5
-ThirdParty_VDB = $(XCode_ThirdPartyVDBBasePath)/vdb/vdb-versions/2.10.5
+ThirdParty_VDB = $(XCode_ThirdPartyVDBBasePath)/vdb/vdb-versions/2.10.8
ThirdParty_GMP = $(Xcode_ThirdPartyBasePath)/gmp-6.0.0a
ThirdParty_Nettle = $(Xcode_ThirdPartyBasePath)/nettle-3.1.1
ThirdParty_GNUTLS = $(Xcode_ThirdPartyBasePath)/gnutls-3.4.0
DEFINES = _WIN32_WINNT=0x0600
[GRPC.debug]
LIBPATH = $(ThirdParty_GRPC)\\lib\\DebugDLL
-LIB = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobufd.lib boringssl.lib boringcrypto.lib
+LIB = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobufd.lib upb.lib crypto.lib ssl.lib absl_throw_delegate.lib absl_strings.lib absl_bad_optional_access.lib absl_str_format_internal.lib absl_raw_logging_internal.lib absl_int128.lib
[GRPC.release]
LIBPATH = $(ThirdParty_GRPC)\\lib\\ReleaseDLL
-LIB = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobuf.lib boringssl.lib boringcrypto.lib
+LIB = grpc++.lib grpc.lib gpr.lib address_sorting.lib cares.lib libprotobuf.lib upb.lib crypto.lib ssl.lib absl_throw_delegate.lib absl_strings.lib absl_bad_optional_access.lib absl_str_format_internal.lib absl_raw_logging_internal.lib absl_int128.lib
[GRPC.xcode]
INCLUDE = $(ThirdParty_GRPC)/include
[GRPC.xcode.debug]
INCLUDE = $(ThirdParty_GRPC)/Debug/include
LIBPATH = $(ThirdParty_GRPC)/Debug/lib
-LIB = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobufd -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lssl -lcrypto
+LIB = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobufd -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lboringssl -lboringcrypto
[GRPC.xcode.release]
INCLUDE = $(ThirdParty_GRPC)/Release/include
LIBPATH = $(ThirdParty_GRPC)/Release/lib
-LIB = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobuf -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lssl -lcrypto
+LIB = -lgrpc++ -lgrpc -lgpr -laddress_sorting -lupb -lcares -lprotobuf -labsl_bad_optional_access -labsl_str_format_internal -labsl_strings -labsl_strings_internal -labsl_base -labsl_spinlock_wait -labsl_dynamic_annotations -labsl_int128 -labsl_throw_delegate -labsl_raw_logging_internal -labsl_log_severity -lboringssl -lboringcrypto
[HAVE_LIBGRPC]
Component = PROTOBUF GRPC
@script_shell@
-# $Id: relocate.sh.in 608163 2020-05-12 16:03:04Z blastadm $
+# $Id: relocate.sh.in 617724 2020-10-06 07:11:17Z blastadm $
# Author: Denis Vakatov, NCBI
#
# Adjust paths to this build tree and the relevant source tree
-/* $Id: ncbi_param.cpp 598497 2019-12-10 14:23:27Z grichenk $
+/* $Id: ncbi_param.cpp 608309 2020-05-14 12:35:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
}
const char* dvalue = default_value? default_value: "";
+ if ( src ) *src = default_value? CParamBase::eSource_Default: CParamBase::eSource_NotSet;
#ifdef NCBI_PARAM_ENABLE_CONFIG_DUMP
if ( s_CanDumpConfig() ) {
if ( section && *section ) {
-/* $Id: ncbi_stack.cpp 569055 2018-08-15 17:40:18Z vasilche $
+/* $Id: ncbi_stack.cpp 613683 2020-08-11 17:27:52Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
+static const vector<string> s_StackFilters {
+ "ncbi::CStackTrace::", "ncbi::CStackTraceImpl::", "ncbi::CException::",
+ "backward::"
+};
+
void CStackTrace::Write(CNcbiOstream& os) const
{
x_ExpandStackTrace();
}
ITERATE(TStack, it, m_Stack) {
- os << m_Prefix << it->AsString() << endl;
+ string s = it->AsString();
+ bool skip = false;
+ for (auto filter : s_StackFilters) {
+ if (s.find(filter) != NPOS) {
+ skip = true;
+ break;
+ }
+ }
+ if (skip) continue;
+ os << m_Prefix << s << endl;
}
}
-/* $Id: ncbi_system.cpp 601275 2020-02-04 21:52:35Z vakatov $
+/* $Id: ncbi_system.cpp 613789 2020-08-12 18:02:48Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
# define HAVE_MADVISE 1
#endif //NCBI_OS_UNIX
+#if defined(NCBI_OS_LINUX)
+# include <sched.h>
+#endif
+
#ifdef NCBI_OS_DARWIN
extern "C" {
# include <mach/mach.h>
/////////////////////////////////////////////////////////////////////////////
//
-// SetHeapLimit
+// Memory limits
//
#ifdef USE_SETMEMLIMIT
rl.rlim_cur = rl.rlim_max = RLIM_INFINITY;
}
if (setrlimit(RLIMIT_DATA, &rl) != 0) {
+ CNcbiError::SetFromErrno();
return false;
}
# if !defined(NCBI_OS_SOLARIS)
if (setrlimit(RLIMIT_AS, &rl) != 0) {
+ CNcbiError::SetFromErrno();
return false;
}
# endif //NCBI_OS_SOLARIS
rlimit rl;
if (getrlimit(RLIMIT_DATA, &rl) != 0) {
+ CNcbiError::SetFromErrno();
return false;
}
if ( max_size ) {
rl.rlim_cur = RLIM_INFINITY;
}
if (setrlimit(RLIMIT_DATA, &rl) != 0) {
+ CNcbiError::SetFromErrno();
return false;
}
# if !defined(NCBI_OS_SOLARIS)
rlimit rlas;
if (getrlimit(RLIMIT_AS, &rlas) != 0) {
+ CNcbiError::SetFromErrno();
return false;
}
rl.rlim_max = rlas.rlim_max;
if (setrlimit(RLIMIT_AS, &rl) != 0) {
+ CNcbiError::SetFromErrno();
return false;
}
# endif //NCBI_OS_SOLARIS
size_t cur_soft_limit = 0;
rlimit rl;
if (getrlimit(RLIMIT_DATA, &rl) != 0) {
+ CNcbiError::SetFromErrno();
return false;
}
if ( max_size ) {
rl.rlim_max = RLIM_INFINITY;
}
if (setrlimit(RLIMIT_DATA, &rl) != 0) {
+ CNcbiError::SetFromErrno();
return false;
}
# if !defined(NCBI_OS_SOLARIS)
rlimit rlas;
if (getrlimit(RLIMIT_AS, &rlas) != 0) {
+ CNcbiError::SetFromErrno();
return false;
}
if ( max_size ) {
rlas.rlim_max = RLIM_INFINITY;
}
if (setrlimit(RLIMIT_AS, &rlas) != 0) {
+ CNcbiError::SetFromErrno();
return false;
}
# endif //NCBI_OS_SOLARIS
rl.rlim_cur = rl.rlim_max = RLIM_INFINITY;
}
if (setrlimit(RLIMIT_DATA, &rl) != 0) {
+ CNcbiError::SetFromErrno();
return false;
}
s_MemoryLimitSoft = max_size;
}
+size_t GetVirtualMemoryLimitSoft(void)
+{
+ // Query limits from kernel, s_MemoryLimit* values can not reflect real limits.
+ rlimit rl = {0,0};
+# if !defined(NCBI_OS_SOLARIS)
+ if (getrlimit(RLIMIT_AS, &rl) != 0) {
+ CNcbiError::SetFromErrno();
+ return 0;
+ }
+ if (rl.rlim_cur == RLIM_INFINITY) {
+ return 0;
+ }
+#else
+ CNcbiError::Set(CNcbiError::eNotSupported);
+#endif
+ return rl.rlim_cur;
+}
+
+
+size_t GetVirtualMemoryLimitHard(void)
+{
+ // Query limits from kernel, s_MemoryLimit* values can not reflect real limits.
+ rlimit rl = {0,0};
+# if !defined(NCBI_OS_SOLARIS)
+ if (getrlimit(RLIMIT_AS, &rl) != 0) {
+ CNcbiError::SetFromErrno();
+ return 0;
+ }
+ if (rl.rlim_max == RLIM_INFINITY) {
+ return 0;
+ }
+#else
+ CNcbiError::Set(CNcbiError::eNotSupported);
+#endif
+ return rl.rlim_max;
+}
+
+
#else
bool SetMemoryLimit(size_t max_size,
TLimitsPrintHandler handler,
TLimitsPrintParameter parameter)
{
- return false;
+ CNcbiError::Set(CNcbiError::eNotSupported);
+ return false;
}
bool SetMemoryLimitSoft(size_t max_size,
TLimitsPrintHandler handler,
TLimitsPrintParameter parameter)
{
- return false;
+ CNcbiError::Set(CNcbiError::eNotSupported);
+ return false;
}
bool SetMemoryLimitHard(size_t max_size,
TLimitsPrintHandler handler,
TLimitsPrintParameter parameter)
{
- return false;
+ CNcbiError::Set(CNcbiError::eNotSupported);
+ return false;
}
bool SetHeapLimit(size_t max_size,
TLimitsPrintHandler handler,
TLimitsPrintParameter parameter)
{
- return false;
+ CNcbiError::Set(CNcbiError::eNotSupported);
+ return false;
+}
+
+size_t GetVirtualMemoryLimitSoft(void)
+{
+ CNcbiError::Set(CNcbiError::eNotSupported);
+ return 0;
+}
+
+size_t GetVirtualMemoryLimitHard(void)
+{
+ CNcbiError::Set(CNcbiError::eNotSupported);
+ return 0;
}
#endif //USE_SETMEMLIMIT
}
+unsigned int CSystemInfo::GetCpuCountAllowed(void)
+{
+
+#if defined(NCBI_OS_MSWIN)
+
+ DWORD_PTR proc_mask = 0, sys_mask = 0;
+ if (!::GetProcessAffinityMask(::GetCurrentProcess(), &proc_mask, &sys_mask)) {
+ return 0;
+ }
+ unsigned int n = 0; // number of bits set in proc_mask
+ for (; proc_mask; proc_mask >>= 1) {
+ n += proc_mask & 1;
+ }
+ return n;
+
+#elif defined(NCBI_OS_LINUX)
+
+ unsigned int total_cpus = CSystemInfo::GetCpuCount();
+ if (total_cpus == 1) {
+ // GetCpuCount() returns 1 if unable to get real number
+ return 1;
+ }
+ // Standard type cpu_set_t can be limited if used directly,
+ // so use dynamic allocation approach
+ cpu_set_t* cpuset_ptr = CPU_ALLOC(total_cpus);
+ if (cpuset_ptr == NULL) {
+ return 0;
+ }
+ size_t cpuset_size = CPU_ALLOC_SIZE(total_cpus);
+ CPU_ZERO_S(cpuset_size, cpuset_ptr);
+
+ if (sched_getaffinity(getpid(), cpuset_size, cpuset_ptr) != 0) {
+ CPU_FREE(cpuset_ptr);
+ return 0;
+ }
+ int n = CPU_COUNT_S(cpuset_size, cpuset_ptr);
+ CPU_FREE(cpuset_ptr);
+ return (n < 0) ? 0 : static_cast<unsigned int>(n);
+
+#endif //NCBI_OS_...
+
+ // TODO: add support for other UNIX versions where possible
+
+ return 0;
+}
+
+
double CSystemInfo::GetUptime(void)
{
#if defined(NCBI_OS_MSWIN)
-/* $Id: ncbiapp.cpp 604618 2020-03-31 13:29:46Z ivanov $
+/* $Id: ncbiapp.cpp 610397 2020-06-16 18:45:55Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
// Constants
//
-static const char* s_ArgLogFile = "-logfile";
-static const char* s_ArgCfgFile = "-conffile";
-static const char* s_ArgVersion = "-version";
-static const char* s_ArgFullVersion = "-version-full";
-static const char* s_ArgDryRun = "-dryrun";
+static const char* s_ArgLogFile = "-logfile";
+static const char* s_ArgCfgFile = "-conffile";
+static const char* s_ArgVersion = "-version";
+static const char* s_ArgFullVersion = "-version-full";
+static const char* s_ArgFullVersionXml = "-version-full-xml";
+static const char* s_ArgFullVersionJson = "-version-full-json";
+static const char* s_ArgDryRun = "-dryrun";
/////////////////////////////////////////////////////////////////////////////
m_DryRun = false;
}
+void CNcbiApplicationAPI::ExecuteOnExitActions()
+{
+ m_OnExitActions.ExecuteActions();
+}
+
CNcbiApplicationAPI::~CNcbiApplicationAPI(void)
{
CThread::sm_IsExiting = true;
+
// Execute exit actions before waiting for all threads to stop.
- m_OnExitActions.ExecuteActions();
+ // NOTE: The exit actions may already be executed by higher-level
+ // destructors. This is a final fail-safe place for this.
+ ExecuteOnExitActions();
+
#if defined(NCBI_THREADS)
CThread::WaitForAllThreads();
#endif
CNcbiApplication::~CNcbiApplication()
{
+ // This earlier execution of the actions allows a safe use of
+ // CNcbiApplication::Instance() from the exit action functions. Instance()
+ // can return NULL pointer if called as part of CNcbiApplicationAPI dtor
+ // when the CNcbiApplication dtor already finished.
+ ExecuteOnExitActions();
}
} else if ( NStr::strcmp(argv[i], s_ArgVersion) == 0 ) {
delete[] v;
// Print VERSION
- cout << GetFullVersion().Print( appname,
- CVersionAPI::fVersionInfo | CVersionAPI::fPackageShort );
+ cout << GetFullVersion().Print( appname, CVersionAPI::fVersionInfo | CVersionAPI::fPackageShort );
diag_context.DiscardMessages();
return 0;
cout << GetFullVersion().Print( appname );
diag_context.DiscardMessages();
return 0;
+ } else if ( NStr::strcmp(argv[i], s_ArgFullVersionXml) == 0 ) {
+ delete[] v;
+ // Print full VERSION in XML format
+ cout << GetFullVersion().PrintXml( appname );
+ diag_context.DiscardMessages();
+ return 0;
+ } else if ( NStr::strcmp(argv[i], s_ArgFullVersionJson) == 0 ) {
+ delete[] v;
+ // Print full VERSION in JSON format
+ cout << GetFullVersion().PrintJson( appname );
+ diag_context.DiscardMessages();
+ return 0;
// Dry run
} else if ( NStr::strcmp(argv[i], s_ArgDryRun) == 0 ) {
void CNcbiApplicationAPI::x_AddDefaultArgs(void)
{
if ( !m_DisableArgDesc ) {
- for(CArgDescriptions* desc : m_ArgDesc->GetAllDescriptions()) {
- if (desc->IsAutoHelpEnabled()) {
- if ((m_HideArgs & fHideHelp) != 0) {
- if (desc->Exist("h")) {
- desc->Delete("h");
+ for(CArgDescriptions* desc : m_ArgDesc->GetAllDescriptions())
+ {
+ if (desc->IsAutoHelpEnabled()) {
+ if ((m_HideArgs & fHideHelp) != 0) {
+ if (desc->Exist("h")) {
+ desc->Delete("h");
+ }
}
}
- }
- if ((m_HideArgs & fHideFullHelp) != 0) {
- if (desc->Exist("help")) {
- desc->Delete("help");
- }
- }
- if ((m_HideArgs & fHideXmlHelp) != 0) {
- if (desc->Exist("xmlhelp")) {
- desc->Delete("xmlhelp");
- }
- }
- if ((m_HideArgs & fHideLogfile) != 0) {
- if (desc->Exist(s_ArgLogFile + 1)) {
- desc->Delete(s_ArgLogFile + 1);
- }
- } else {
- if (!desc->Exist(s_ArgLogFile + 1)) {
- desc->AddOptionalKey
- (s_ArgLogFile+1, "File_Name",
- "File to which the program log should be redirected",
- CArgDescriptions::eOutputFile);
- }
- }
- if ((m_HideArgs & fHideConffile) != 0) {
- if (desc->Exist(s_ArgCfgFile + 1)) {
- desc->Delete(s_ArgCfgFile + 1);
- }
- } else {
- if (!desc->Exist(s_ArgCfgFile + 1)) {
- desc->AddOptionalKey
- (s_ArgCfgFile + 1, "File_Name",
- "Program's configuration (registry) data file",
- CArgDescriptions::eInputFile);
+ if ((m_HideArgs & fHideFullHelp) != 0) {
+ if (desc->Exist("help")) {
+ desc->Delete("help");
+ }
}
- }
- if ((m_HideArgs & fHideVersion) != 0) {
- if (desc->Exist(s_ArgVersion + 1)) {
- desc->Delete(s_ArgVersion + 1);
+ if ((m_HideArgs & fHideXmlHelp) != 0) {
+ if (desc->Exist("xmlhelp")) {
+ desc->Delete("xmlhelp");
+ }
}
- } else {
- if (!desc->Exist(s_ArgVersion + 1)) {
- desc->AddFlag
- (s_ArgVersion + 1,
- "Print version number; ignore other arguments");
+ if ((m_HideArgs & fHideLogfile) != 0) {
+ if (desc->Exist(s_ArgLogFile + 1)) {
+ desc->Delete(s_ArgLogFile + 1);
+ }
+ } else {
+ if (!desc->Exist(s_ArgLogFile + 1)) {
+ desc->AddOptionalKey
+ (s_ArgLogFile+1, "File_Name",
+ "File to which the program log should be redirected",
+ CArgDescriptions::eOutputFile);
+ }
}
- }
- if ((m_HideArgs & fHideFullVersion) != 0) {
- if (desc->Exist(s_ArgFullVersion + 1)) {
- desc->Delete(s_ArgFullVersion + 1);
+ if ((m_HideArgs & fHideConffile) != 0) {
+ if (desc->Exist(s_ArgCfgFile + 1)) {
+ desc->Delete(s_ArgCfgFile + 1);
+ }
+ } else {
+ if (!desc->Exist(s_ArgCfgFile + 1)) {
+ desc->AddOptionalKey
+ (s_ArgCfgFile + 1, "File_Name",
+ "Program's configuration (registry) data file",
+ CArgDescriptions::eInputFile);
+ }
}
- } else {
- if (!desc->Exist(s_ArgFullVersion + 1)) {
- desc->AddFlag
- (s_ArgFullVersion + 1,
- "Print extended version data; ignore other arguments");
+ if ((m_HideArgs & fHideVersion) != 0) {
+ if (desc->Exist(s_ArgVersion + 1)) {
+ desc->Delete(s_ArgVersion + 1);
+ }
+ } else {
+ if (!desc->Exist(s_ArgVersion + 1)) {
+ desc->AddFlag
+ (s_ArgVersion + 1,
+ "Print version number; ignore other arguments");
+ }
}
- }
- if ((m_HideArgs & fHideDryRun) != 0) {
- if (desc->Exist(s_ArgDryRun + 1)) {
- desc->Delete(s_ArgDryRun + 1);
+ if ((m_HideArgs & fHideFullVersion) != 0) {
+ if (desc->Exist(s_ArgFullVersion + 1)) {
+ desc->Delete(s_ArgFullVersion + 1);
+ }
+ if (desc->Exist(s_ArgFullVersionXml+ 1)) {
+ desc->Delete(s_ArgFullVersionXml + 1);
+ }
+ if (desc->Exist(s_ArgFullVersionJson + 1)) {
+ desc->Delete(s_ArgFullVersionJson + 1);
+ }
+ } else {
+ if (!desc->Exist(s_ArgFullVersion + 1)) {
+ desc->AddFlag
+ (s_ArgFullVersion + 1,
+ "Print extended version data; ignore other arguments");
+ }
+ if (!desc->Exist(s_ArgFullVersionXml + 1)) {
+ desc->AddFlag
+ (s_ArgFullVersionXml + 1,
+ "Print extended version data in XML format; ignore other arguments");
+ }
+ if (!desc->Exist(s_ArgFullVersionJson + 1)) {
+ desc->AddFlag
+ (s_ArgFullVersionJson + 1,
+ "Print extended version data in JSON format; ignore other arguments");
+ }
}
- } else {
- if (!desc->Exist(s_ArgDryRun + 1)) {
- desc->AddFlag
- (s_ArgDryRun + 1,
- "Dry run the application: do nothing, only test all preconditions");
+ if ((m_HideArgs & fHideDryRun) != 0) {
+ if (desc->Exist(s_ArgDryRun + 1)) {
+ desc->Delete(s_ArgDryRun + 1);
+ }
+ } else {
+ if (!desc->Exist(s_ArgDryRun + 1)) {
+ desc->AddFlag
+ (s_ArgDryRun + 1,
+ "Dry run the application: do nothing, only test all preconditions");
+ }
}
}
- }
}
}
-/* $Id: ncbiargs.cpp 604618 2020-03-31 13:29:46Z ivanov $
+/* $Id: ncbiargs.cpp 609368 2020-06-01 14:12:44Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
{
if ((arg_desc.GetFlags() & CArgDescriptions::fIgnoreInvalidValue) == 0) {
// Re-process invalid value to throw the same exception
- arg_desc.ProcessArgument(value);
+ return arg_desc.ProcessArgument(value);
// Should never get past ProcessArgument()
}
- if ((arg_desc.GetFlags() & CArgDescriptions::fWarnOnInvalidValue) == 0) {
+ if ((arg_desc.GetFlags() & CArgDescriptions::fWarnOnInvalidValue) != 0) {
ERR_POST_X(22, Warning << "Invalid value " << value <<
" for argument " << arg_desc.GetName() <<
" - argument will be ignored.");
-/* $Id: ncbidiag.cpp 606469 2020-04-22 14:13:58Z ivanov $
+/* $Id: ncbidiag.cpp 615738 2020-09-03 11:26:10Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <common/ncbi_source_ver.h>
#include <common/ncbi_package_ver.h>
#include <corelib/ncbiexpt.hpp>
+#include <corelib/version.hpp>
#include <corelib/ncbi_process.hpp>
#include <corelib/ncbifile.hpp>
#include <corelib/syslog.hpp>
NCBI_PARAM_DECL(bool, Diag, Old_Post_Format);
NCBI_PARAM_DEF_EX(bool, Diag, Old_Post_Format, true, eParam_NoThread,
DIAG_OLD_POST_FORMAT);
-static CSafeStatic<NCBI_PARAM_TYPE(Diag, Old_Post_Format)> s_OldPostFormat;
+static CSafeStatic<NCBI_PARAM_TYPE(Diag, Old_Post_Format)> s_OldPostFormat(
+ CSafeStaticLifeSpan(CSafeStaticLifeSpan::eLifeSpan_Long, 2));
// Auto-print context properties on set/change.
NCBI_PARAM_DECL(bool, Diag, AutoWrite_Context);
CNcbiApplication* ins = CNcbiApplication::Instance();
if (ins) {
Print("ncbi_app_path", ins->GetProgramExecutablePath());
- const CVersion& ver = ins->GetFullVersion();
+ const CVersionAPI& ver = ins->GetFullVersion();
if (!ver.GetBuildInfo().date.empty()) {
Print("ncbi_app_build_date", ver.GetBuildInfo().date);
}
{
CNcbiApplication* ins = CNcbiApplication::Instance();
if (ins) {
- const CVersion& ver = ins->GetFullVersion();
+ const CVersionAPI& ver = ins->GetFullVersion();
const CVersionInfo& vi = ver.GetVersionInfo();
//#if defined (NCBI_SC_VERSION) && NCBI_SC_VERSION <= 21
#if 1
}
+extern string GetDiagFilter(EDiagFilter what)
+{
+ CDiagLock lock(CDiagLock::eWrite);
+ if (what == eDiagFilter_Trace)
+ return s_TraceFilter->GetFilterStr();
+
+ if (what == eDiagFilter_Post)
+ return s_PostFilter->GetFilterStr();
+
+ return kEmptyStr;
+}
+
+
+extern void AppendDiagFilter(EDiagFilter what, const char* filter_str)
+{
+ CDiagLock lock(CDiagLock::eWrite);
+ if (what == eDiagFilter_Trace || what == eDiagFilter_All)
+ s_TraceFilter->Append(filter_str);
+
+ if (what == eDiagFilter_Post || what == eDiagFilter_All)
+ s_PostFilter->Append(filter_str);
+}
+
+
///////////////////////////////////////////////////////
// CNcbiDiag::
-/* $Id: ncbidiag_p.cpp 486111 2015-12-01 17:17:39Z grichenk $
+/* $Id: ncbidiag_p.cpp 611708 2020-07-09 17:56:10Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
void CDiagFilter::Fill(const char* filter_string)
{
try {
+ m_Filter.clear();
CDiagSyntaxParser parser;
CNcbiIstrstream in(filter_string);
parser.Parse(in, *this);
+ m_Filter = filter_string;
}
catch (const CDiagSyntaxParser::TErrorInfo& err_info) {
CNcbiOstrstream message;
}
}
+void CDiagFilter::Append(const char* filter_string)
+{
+ string new_filter = m_Filter + " " + filter_string;
+ Fill(new_filter.c_str());
+}
+
+
EDiagFilterAction CDiagFilter::Check(const CNcbiDiag& msg,
const CException* ex) const
{
if ( !isspace((unsigned char) symbol) ) {
if ( symbol == '[' ||
symbol == '(' ||
- (symbol == '!' && CT_TO_CHAR_TYPE(in.peek()) == '(')) {
+ symbol == '/' ||
+ (symbol == '!' && CT_TO_CHAR_TYPE(in.peek()) == '(') ||
+ (symbol == '!' && CT_TO_CHAR_TYPE(in.peek()) == '/')) {
in.putback( symbol );
--m_Pos;
state = eStart;
#ifndef CORELIB___NCBIDIAG_P__HPP
#define CORELIB___NCBIDIAG_P__HPP
-/* $Id: ncbidiag_p.hpp 505891 2016-06-29 17:58:41Z gouriano $
+/* $Id: ncbidiag_p.hpp 611708 2020-07-09 17:56:10Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/// Print state
void Print(ostream& out) const;
+ const string& GetFilterStr(void) const { return m_Filter; }
+
+ void Append(const char* filter_string);
+
private:
/// Check if the filter accepts errcode
EDiagFilterAction x_CheckErrCode(int code, int subcode, EDiagSev sev) const;
private:
typedef deque< AutoPtr<CDiagMatcher> > TMatchers;
+ string m_Filter;
TMatchers m_Matchers;
size_t m_NotMatchersNum;
};
-/* $Id: ncbifile.cpp 604618 2020-03-31 13:29:46Z ivanov $
+/* $Id: ncbifile.cpp 610319 2020-06-15 17:06:08Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
memset(&st, 0, sizeof(st)); \
if (statvfs(path.c_str(), &st) != 0) { \
CNcbiError::SetFromErrno(); \
- NCBI_THROW(CFileErrnoException, eFileSystemInfo, msg); \
+ NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path); \
} \
info->total_space = (Uint8)st.f_bsize * st.f_blocks; \
if (st.f_frsize) { \
memset(&st, 0, sizeof(st)); \
if (statfs(path.c_str(), &st) != 0) { \
CNcbiError::SetFromErrno(); \
- NCBI_THROW(CFileErrnoException, eFileSystemInfo, msg); \
+ NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path); \
} \
info->total_space = (Uint8)st.f_bsize * st.f_blocks; \
info->free_space = (Uint8)st.f_bsize * st.f_bavail; \
&fs_flags,
fs_name,
sizeof(fs_name)/sizeof(fs_name[0])) ) {
- NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + xpath);
+ NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path);
}
info->filename_max = filename_max;
ufs_name = _T_CSTRING(fs_name);
if ( !::GetDiskFreeSpaceEx(_T_XCSTRING(xpath),
(PULARGE_INTEGER)&info->free_space,
(PULARGE_INTEGER)&info->total_space, 0) ) {
- NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + xpath);
+ NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path);
}
}
if ( !::GetDiskFreeSpace(_T_XCSTRING(xpath),
&dwSectPerClust, &dwBytesPerSect,
NULL, NULL) ) {
- NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + xpath);
+ NCBI_THROW(CFileErrnoException, eFileSystemInfo, string(msg) + path);
}
info->block_size = dwBytesPerSect * dwSectPerClust;
}
# elif defined(NCBI_OS_DARWIN) && defined(HAVE_STATFS)
GET_STATFS_INFO;
- // Seems statfs structure on Darwin dont have any information
+ // Seems statfs structure on Darwin doesn't have any information
// about name length, so rely on pathconf() only.
//if (need_name_max) {
// info->filename_max = (unsigned long)st.f_namelen;
-/* $Id: version.cpp 591546 2019-08-16 16:59:06Z vasilche $
+/* $Id: version.cpp 612086 2020-07-15 11:49:39Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
*/
#include <ncbi_pch.hpp>
-#include <corelib/version_api.hpp>
+#include <corelib/version.hpp>
#include <common/ncbi_package_ver.h>
#include <common/ncbi_source_ver.h>
os << "{";
bool need_separator = false;
if (m_Major >= 0) {
- os << "\"major\": \"" << m_Major <<
- "\", \"minor\": \"" << (m_Minor >= 0 ? m_Minor : 0) << "\"";
+ os << "\"major\": " << m_Major <<
+ ", \"minor\": " << (m_Minor >= 0 ? m_Minor : 0);
if (m_PatchLevel >= 0) {
- os << ", \"patch_level\": \"" << m_PatchLevel << "\"";
+ os << ", \"patch_level\": " << m_PatchLevel;
}
need_separator = true;
}
os << "{ \"name\": \"" <<
NStr::JsonEncode(GetComponentName()) <<
"\", \"version_info\": " <<
- CVersionInfo::PrintJson() << endl <<
- m_BuildInfo.PrintJson() << "}" << endl;
+ CVersionInfo::PrintJson() << ",\n" <<
+ " \"build_info\": " <<
+ m_BuildInfo.PrintJson() << "}";
return CNcbiOstrstreamToString(os);
}
{
CNcbiOstrstream os;
bool need_separator = false;
- os << '{' << endl;
+ os << '{';
if ( !date.empty() ) {
os << "\"" << ExtraNameJson(eBuildDate) << "\": \"" << NStr::JsonEncode(date) << '\"';
need_separator = true;
}
if ( !tag.empty() ) {
- if ( need_separator ) os << ',' << endl;
+ if ( need_separator ) os << ", ";
os << '\"' << ExtraNameJson(eBuildTag) << "\": \"" << NStr::JsonEncode(tag) << '\"';
need_separator = true;
}
for( const auto& e : m_extra) {
- if ( need_separator ) os << "," << endl;
+ if ( need_separator ) os << ", ";
os << '\"' << ExtraNameJson(e.first) << "\": \"" << NStr::JsonEncode(e.second) << '\"';
need_separator = true;
}
- if ( need_separator ) os << endl;
os << '}';
return CNcbiOstrstreamToString(os);
}
if (flags & fComponents) {
if ( need_separator ) os << ",\n";
- os << " \"components\": [";
+ os << " \"component\": [";
need_separator = false;
for (const auto& c : m_Components) {
if ( need_separator ) os << ",";
-/* $Id: dbapi_conn_factory.cpp 600085 2020-01-11 15:56:54Z mcelhany $
+/* $Id: dbapi_conn_factory.cpp 610945 2020-06-25 18:31:37Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <dbapi/driver/impl/dbapi_driver_utils.hpp>
#include <dbapi/driver/impl/dbapi_impl_connection.hpp>
#include <dbapi/driver/impl/dbapi_impl_context.hpp>
+#include <dbapi/driver/impl/dbapi_pool_balancer.hpp>
#include <dbapi/driver/public.hpp>
#include <dbapi/error_codes.hpp>
-#include "dbapi_pool_balancer.hpp"
#include <corelib/ncbiapp.hpp>
#include <corelib/request_ctx.hpp>
&& !service_name.empty() ) {
balancer.Reset(new CDBPoolBalancer
(service_name, params.GetParam("pool_name"),
- ctx.driver_ctx,
- rt_data.GetServerOptions(service_name)));
+ rt_data.GetServerOptions(service_name),
+ &ctx.driver_ctx));
}
for ( ; !t_con && alternatives > 0; --alternatives ) {
TSvrRef dsp_srv;
// In this case we even won't try to map it.
else if (!service_name.empty()) {
if (balancer.NotEmpty()) {
- dsp_srv = balancer->GetServer(&t_con, params);
+ dsp_srv = balancer->GetServer(&t_con, ¶ms);
}
if (dsp_srv.Empty()) {
dsp_srv = rt_data.GetDispatchedServer(service_name);
balancer.Reset
(new CDBPoolBalancer
(service_name, params.GetParam("pool_name"),
- ctx.driver_ctx,
- rt_data.GetServerOptions(service_name, true)));
+ rt_data.GetServerOptions(service_name, true),
+ &ctx.driver_ctx));
}
full_retry_made = true;
continue;
-/* $Id: dbapi_impl_context.cpp 600087 2020-01-11 19:46:51Z mcelhany $
+/* $Id: dbapi_impl_context.cpp 610920 2020-06-25 13:37:30Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
++total_cnt;
}
}
+ mg.Release();
vector< AutoPtr<CDB_Connection> > conns(pool_min);
for (int i = total_cnt; i < pool_min; ++i) {
try {
-/* $Id: dbapi_pool_balancer.cpp 548289 2017-10-12 14:54:18Z ucko $
+/* $Id: dbapi_pool_balancer.cpp 610945 2020-06-25 18:31:37Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <ncbi_pch.hpp>
-#include "dbapi_pool_balancer.hpp"
+#include <dbapi/driver/impl/dbapi_pool_balancer.hpp>
#include <dbapi/driver/dbapi_conn_factory.hpp>
#include <dbapi/driver/impl/dbapi_impl_context.hpp>
#include <dbapi/error_codes.hpp>
CDBPoolBalancer::CDBPoolBalancer(const string& service_name,
const string& pool_name,
- I_DriverContext& driver_ctx,
- const IDBServiceMapper::TOptions& options)
+ const IDBServiceMapper::TOptions& options,
+ I_DriverContext* driver_ctx)
: m_DriverCtx(driver_ctx), m_TotalCount(0U)
{
- bool is_ftds = NStr::StartsWith(driver_ctx.GetDriverName(), "ftds");
+ bool is_ftds = (driver_ctx == nullptr
+ || NStr::StartsWith(driver_ctx->GetDriverName(), "ftds"));
for (auto it : options) {
CTempString name = it->GetName();
auto key = impl::MakeEndpointKey(it->GetHost(), it->GetPort());
}
const impl::CDriverContext* ctx_impl
- = dynamic_cast<const impl::CDriverContext*>(&driver_ctx);
+ = dynamic_cast<const impl::CDriverContext*>(driver_ctx);
impl::CDriverContext::TCounts counts;
if (ctx_impl == NULL) {
- ERR_POST_X(1, Warning << "Called with non-standard IDriverContext");
+ if (driver_ctx != nullptr) {
+ ERR_POST_X(1, Warning <<
+ "Called with non-standard IDriverContext");
+ }
} else if (pool_name.empty()) {
ctx_impl->GetCountsForService(service_name, &counts);
} else {
}
TSvrRef CDBPoolBalancer::GetServer(CDB_Connection** conn,
- const CDBConnParams& params)
+ const CDBConnParams* params)
{
TSvrRef result;
impl::TEndpointKey conn_key = 0;
return result;
}
- if (/* m_TotalCount > 1 && */ conn != NULL) {
- string pool_name = params.GetParam("pool_name");
- CDBConnParams_DNC dnc_params(params);
- *conn = IDBConnectionFactory::CtxMakeConnection(m_DriverCtx,
+ if (/* m_TotalCount > 1 && */ conn != nullptr && params != nullptr
+ && m_DriverCtx != nullptr) {
+ string pool_name = params->GetParam("pool_name");
+ CDBConnParams_DNC dnc_params(*params);
+ *conn = IDBConnectionFactory::CtxMakeConnection(*m_DriverCtx,
dnc_params);
if (*conn != NULL) {
const string& server_name = (*conn)->ServerName();
"Unrecognized endpoint for existing connection to "
<< impl::ConvertN2A(host) << ":" << port
<< " (" << server_name << ')');
- excess = m_DriverCtx.NofConnections(server_name, pool_name);
+ excess = m_DriverCtx->NofConnections(server_name, pool_name);
result.Reset(&*it->second.ref);
} else {
double scale_factor = m_TotalCount / total_ranking;
<< ":" << port << " (" << server_name
<< ") for turnover; projected excess count " << excess);
if (excess > 0.0) {
- string pool_max_str = params.GetParam("pool_maxsize");
+ string pool_max_str = params->GetParam("pool_maxsize");
unsigned int pool_max = 0u;
if ( !pool_max_str.empty() && pool_max_str != "default") {
NStr::StringToNumeric(pool_max_str, &pool_max,
// This call might not close the exact connection we
// considered, but closing any connection to the
// relevant server is sufficient here.
- m_DriverCtx.CloseUnusedConnections
- (server_name, params.GetParam("pool_name"), 1u);
+ m_DriverCtx->CloseUnusedConnections
+ (server_name, params->GetParam("pool_name"), 1u);
}
}
}
+++ /dev/null
-#ifndef DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP
-#define DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP
-
-/* $Id: dbapi_pool_balancer.hpp 548289 2017-10-12 14:54:18Z ucko $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Author: Aaron Ucko
- *
- */
-
-/// @file dbapi_pool_balancer.hpp
-/// Help distribute connections within a pool across servers.
-
-#include <dbapi/driver/impl/dbapi_driver_utils.hpp>
-
-/** @addtogroup DBAPI
- *
- * @{
- */
-
-BEGIN_NCBI_SCOPE
-
-class CDBPoolBalancer : public CObject
-{
-public:
- CDBPoolBalancer(const string& service_name,
- const string& pool_name,
- I_DriverContext& driver_ctx,
- const IDBServiceMapper::TOptions& options);
-
- TSvrRef GetServer(CDB_Connection** conn, const CDBConnParams& params);
-
-private:
- struct SEndpointInfo {
- SEndpointInfo()
- : effective_ranking(0.0), ideal_count(0.0), actual_count(0U),
- penalty_level(0U)
- { }
-
- CRef<CDBServerOption> ref;
- double effective_ranking;
- double ideal_count;
- unsigned int actual_count;
- unsigned int penalty_level;
- };
- typedef map<impl::TEndpointKey, SEndpointInfo> TEndpoints;
-
- impl::TEndpointKey x_NameToKey(CTempString& name) const;
-
- TEndpoints m_Endpoints;
- multiset<double> m_Rankings;
- I_DriverContext& m_DriverCtx;
- unsigned int m_TotalCount;
-};
-
-END_NCBI_SCOPE
-
-/* @} */
-
-#endif /* DBAPI_DRIVER___DBAPI_POOL_BALANCER__HPP */
-/* ===========================================================================
+/* $Id: snpptis.cpp 615550 2020-09-01 13:13:11Z fukanchi $
+ * ===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
#ifdef HAVE_LIBGRPC
# include <objects/dbsnp/primary_track/impl/snpptis_impl.hpp>
# include <corelib/ncbi_param.hpp>
+# include <corelib/ncbi_system.hpp>
#endif
BEGIN_NCBI_NAMESPACE;
}
+#ifdef HAVE_LIBGRPC
+const char* const kSection = "ID2SNP";
+const char* const kParam_PTISName = "PTIS_NAME";
+const char* const kParam_Retry = "RETRY";
+const char* const kParam_Timeout = "TIMEOUT";
+const char* const kParam_TimeoutMul = "TIMEOUT_MULTIPLIER";
+const char* const kParam_TimeoutInc = "TIMEOUT_INCREMENT";
+const char* const kParam_TimeoutMax = "TIMEOUT_MAX";
+const char* const kParam_WaitTime = "WAIT_TIME";
+const char* const kParam_WaitTimeMul = "WAIT_TIME_MULTIPLIER";
+const char* const kParam_WaitTimeInc = "WAIT_TIME_INCREMENT";
+const char* const kParam_WaitTimeMax = "WAIT_TIME_MAX";
+const int kDefault_Retry = 5;
+const float kDefault_Timeout = 1;
+const float kDefault_TimeoutMul = 1.5;
+const float kDefault_TimeoutInc = 0;
+const float kDefault_TimeoutMax = 10;
+const float kDefault_WaitTime = 0.5;
+const float kDefault_WaitTimeMul = 1.2;
+const float kDefault_WaitTimeInc = 0.2;
+const float kDefault_WaitTimeMax = 5;
+#endif
+
+
bool CSnpPtisClient::IsEnabled()
{
#ifdef HAVE_LIBGRPC
- return CGRPCClientContext::IsImplemented();
+ if ( !CGRPCClientContext::IsImplemented() ) {
+ return false;
+ }
+ // check if there's valid address
+ int source;
+ auto addr = g_NCBI_GRPC_GetAddress(kSection, kParam_PTISName, nullptr, &source);
+#ifndef NCBI_OS_LINUX
+ if ( source == CParamBase::eSource_Default ) {
+ // default grpc link to linkerd daemon works on Linux only
+ return false;
+ }
+#endif
+ return !addr.empty();
#else
return false;
#endif
#ifdef HAVE_LIBGRPC
CSnpPtisClient_Impl::CSnpPtisClient_Impl()
{
- channel = grpc::CreateChannel(g_NCBI_GRPC_GetAddress("ID2SNP", "PTIS_NAME"),
- grpc::InsecureChannelCredentials());
-
+ grpc::ChannelArguments args;
+ string address = g_NCBI_GRPC_GetAddress(kSection, kParam_PTISName);
+ //LOG_POST(Trace<<"CSnpPtisClient: connecting to "<<address);
+ channel = grpc::CreateCustomChannel(address, grpc::InsecureChannelCredentials(), args);
+ max_retries = g_GetConfigInt(kSection, kParam_Retry, nullptr, kDefault_Retry);
+ timeout = g_GetConfigDouble(kSection, kParam_Timeout , nullptr, kDefault_Timeout );
+ timeout_mul = g_GetConfigDouble(kSection, kParam_TimeoutMul, nullptr, kDefault_TimeoutMul);
+ timeout_inc = g_GetConfigDouble(kSection, kParam_TimeoutInc, nullptr, kDefault_TimeoutInc);
+ timeout_max = g_GetConfigDouble(kSection, kParam_TimeoutMax, nullptr, kDefault_TimeoutMax);
+ wait_time = g_GetConfigDouble(kSection, kParam_WaitTime , nullptr, kDefault_WaitTime );
+ wait_time_mul = g_GetConfigDouble(kSection, kParam_WaitTimeMul, nullptr, kDefault_WaitTimeMul);
+ wait_time_inc = g_GetConfigDouble(kSection, kParam_WaitTimeInc, nullptr, kDefault_WaitTimeInc);
+ wait_time_max = g_GetConfigDouble(kSection, kParam_WaitTimeMax, nullptr, kDefault_WaitTimeMax);
+
stub = ncbi::grpcapi::dbsnp::primary_track::DbSnpPrimaryTrack::NewStub(channel);
}
string CSnpPtisClient_Impl::x_GetPrimarySnpTrack(const TRequest& request)
{
- CGRPCClientContext context;
-
- ncbi::grpcapi::dbsnp::primary_track::PrimaryTrackReply reply;
-
- auto status = stub->ForSeqId(&context, request, &reply);
+ int cur_retry = 0;
+ float cur_timeout = timeout;
+ float cur_wait_time = wait_time;
+ for ( ;; ) {
+ CGRPCClientContext context;
+ std::chrono::system_clock::time_point deadline =
+ std::chrono::system_clock::now() + std::chrono::microseconds(Int8(cur_timeout*1e6));
+ context.set_deadline(deadline);
- if ( !status.ok() ) {
+ ncbi::grpcapi::dbsnp::primary_track::PrimaryTrackReply reply;
+
+ auto status = stub->ForSeqId(&context, request, &reply);
+
+ if ( status.ok() ) {
+ return reply.na_track_acc_with_filter();
+ }
+
if ( status.error_code() == grpc::StatusCode::NOT_FOUND ) {
return string();
}
- NCBI_THROW(CException, eUnknown, status.error_message());
+ if ( ++cur_retry >= max_retries ) {
+ NCBI_THROW(CException, eUnknown, status.error_message());
+ }
+ LOG_POST(Trace<<
+ "CSnpPtisClient: failed : "<<status.error_message()<<". "
+ "Waiting "<<cur_wait_time<<" seconds before retry...");
+ SleepMicroSec(Int8(cur_wait_time*1e6));
+ cur_timeout = min(cur_timeout*timeout_mul + timeout_inc, timeout_max);
+ cur_wait_time = min(cur_wait_time*wait_time_mul + wait_time_inc, wait_time_max);
}
-
- // cout << reply.na_track_acc_with_filter() << "\t" << reply.tms_track_id() << endl;
-
- return reply.na_track_acc_with_filter();
}
#endif
-/* $Id: Dbtag.cpp 600775 2020-01-27 19:10:07Z kans $
+/* $Id: Dbtag.cpp 617215 2020-09-28 17:22:41Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
{ "dbProbe", CDbtag::eDbtagType_dbProbe },
{ "dbSNP", CDbtag::eDbtagType_dbSNP },
{ "dbSTS", CDbtag::eDbtagType_dbSTS },
+ { "dbVar", CDbtag::eDbtagType_dbVar },
{ "dictyBase", CDbtag::eDbtagType_dictyBase },
{ "miRBase", CDbtag::eDbtagType_miRBase },
{ "niaEST", CDbtag::eDbtagType_niaEST },
//=========================================================================//
// special case URLs
-static const char kFBan[] = "http://www.fruitfly.org/cgi-bin/annot/fban?"; // url not found \93Internal Server Error\94 tested 7/13/2016
+static const char kFBan[] = "http://www.fruitfly.org/cgi-bin/annot/fban?"; // url not found "Internal Server Error" tested 7/13/2016
static const char kHInvDbHIT[] = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=30&KEN_STR="; // access forbidden 7/13/2016
static const char kHInvDbHIX[] = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=31&KEN_STR="; // \93Internal Server Error\94 tested 7/13/2016
static const char kDictyPrim[] = "http://dictybase.org/db/cgi-bin/gene_page.pl?primary_id="; // url not found tested 7/13/2016
{ CDbtag::eDbtagType_EPDnew, "http://epd.vital-it.ch/cgi-bin/get_doc?format=genome&entry=" },
{ CDbtag::eDbtagType_Ensembl, "https://www.ensembl.org/id/" }, // url seems incorrect, includes msg user has been redirected and \93Error 404 Page not found\94 tested 7/13/2016
{ CDbtag::eDbtagType_PseudoCAP, "http://www.pseudomonas.com/primarySequenceFeature/list?c1=name&e1=1&v1=" }, // url not found tested 7/13/2016
+ { CDbtag::eDbtagType_dbVar, "https://www.ncbi.nlm.nih.gov/dbvar/variants/" }
};
typedef CStaticPairArrayMap<CDbtag::EDbtagType, const char*> TUrlPrefixMap;
-/* $Id: genomic_collections_cli.cpp 603970 2020-03-19 15:32:22Z ivanov $
+/* $Id: genomic_collections_cli.cpp 617470 2020-10-01 17:56:09Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
void CGenomicCollectionsService::x_ConfigureConnection()
{
SetTimeout(&kTimeout);
- SetRetryLimit(20);
+ SetRetryLimit(40);
// it's a backward-compatibility fix for old versions of server (no much harm to leave it - only little data overhead is expected)
// always send request and get response in ASN text format so that server can properly parse request
{
CGCClient_ValidateChrTypeLocRequest req;
CGCClientResponse reply;
-
req.SetType(chrType);
req.SetLocation(chrLoc);
LogRequest(req);
-
- try {
- return AskGet_chrtype_valid(req, &reply);
- } catch (CException& ) {
- if (reply.IsSrvr_error())
- throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
- throw;
- }
+
+ int retry_counter=0;
+ const int RETRY_MAX = 3;
+ for(retry_counter=1; retry_counter <= RETRY_MAX; retry_counter++) {
+ try {
+ return AskGet_chrtype_valid(req, &reply);
+ } catch (const CException& e) {
+ if( retry_counter == RETRY_MAX) {
+ if (reply.IsSrvr_error())
+ throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+ throw e;
+ } else {
+ ERR_POST(Warning <<"Try "<<retry_counter<<":"<<e.GetMsg());
+ SleepSec(10);
+ }
+ } // end catch
+ } // end retry for
+
+ if (reply.IsSrvr_error())
+ throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+ NCBI_THROW(CException, eUnknown, "ValidateChrType ran out of retries.");
}
+
CRef<CGCClient_AssemblyInfo> CGenomicCollectionsService::FindOneAssemblyBySequences(const string& sequence_acc, int filter, CGCClient_GetAssemblyBySequenceRequest::ESort sort)
{
CRef<CGCClient_AssemblySequenceInfo> asmseq_info(FindOneAssemblyBySequences(list<string>(1, sequence_acc), filter, sort));
LogRequest(req);
- try {
- return AskGet_assembly_by_sequence(req, &reply);
- } catch (const CException& ) {
- if (reply.IsSrvr_error())
- throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
- throw;
- }
+ int retry_counter=0;
+ const int RETRY_MAX = 3;
+ for(retry_counter=1; retry_counter <= RETRY_MAX; retry_counter++) {
+ try {
+ return AskGet_assembly_by_sequence(req, &reply);
+ } catch (const CException& e) {
+ if( retry_counter == RETRY_MAX) {
+ if (reply.IsSrvr_error())
+ throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+ throw e;
+ } else {
+ ERR_POST(Warning <<"Try "<<retry_counter<<":"<<e.GetMsg());
+ SleepSec(10);
+ }
+ } // end catch
+ } // end retry for
+
+ if (reply.IsSrvr_error())
+ throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+ NCBI_THROW(CException, eUnknown, "FindAssembliesBySequences ran out of retries.");
}
req.SetEquivalency(equivalency);
LogRequest(req);
-
- try {
- return AskGet_equivalent_assemblies(req, &reply);
- } catch (const CException& ) {
- if (reply.IsSrvr_error())
- throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
- throw;
- }
+
+ int retry_counter=0;
+ const int RETRY_MAX = 3;
+ for(retry_counter=1; retry_counter <= RETRY_MAX; retry_counter++) {
+ try {
+ return AskGet_equivalent_assemblies(req, &reply);
+ } catch (const CException& e) {
+ if( retry_counter == RETRY_MAX) {
+ if (reply.IsSrvr_error())
+ throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+ throw e;
+ } else {
+ ERR_POST(Warning <<"Try "<<retry_counter<<":"<<e.GetMsg());
+ SleepSec(10);
+ }
+ } // end catch
+ } // end retry for
+
+ if (reply.IsSrvr_error())
+ throw CGCServiceException(DIAG_COMPILE_INFO, reply.GetSrvr_error());
+ NCBI_THROW(CException, eUnknown, "GetEquivalentAssemblies ran out of retries.");
}
-/* $Id: Bioseq.cpp 502444 2016-05-24 18:46:25Z kans $
+/* $Id: Bioseq.cpp 614732 2020-08-21 13:43:27Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
{
/// A taxid can be found either in a source descriptor (the newer form) or in a
/// org descriptor. If both are there, the source descriptor should have precedence.
- int taxid_from_source = 0,
- taxid_from_org = 0;
+ TTaxId taxid_from_source = ZERO_TAX_ID,
+ taxid_from_org = ZERO_TAX_ID;
if (IsSetDescr()) {
ITERATE (TDescr::Tdata, it, GetDescr().Get()) {
} else if (desc.IsSource() && desc.GetSource().IsSetOrg()) {
taxid_from_source = desc.GetSource().GetOrg().GetTaxId();
}
- if (taxid_from_source) {
+ if (taxid_from_source != ZERO_TAX_ID) {
break;
}
}
}
- return taxid_from_source ? taxid_from_source : taxid_from_org;
+ return TAX_ID_TO(int, taxid_from_source != ZERO_TAX_ID ? taxid_from_source : taxid_from_org);
}
-/* $Id: so_map.cpp 607816 2020-05-07 19:01:26Z ivanov $
+/* $Id: so_map.cpp 617358 2020-09-30 12:55:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
return (tolower(*pLhs) < tolower(*pRhs));
}
+// ----------------------------------------------------------------------------
+string GetUnambiguousNamedQual(
+ const CSeq_feat& feature,
+ const string& qualName)
+// ----------------------------------------------------------------------------
+{
+ string namedQual;
+ const auto& quals = feature.GetQual();
+ for (const auto& qual: quals) {
+ if (!qual->CanGetQual() || !qual->CanGetVal()) {
+ continue;
+ }
+ if (qual->GetQual() != qualName) {
+ continue;
+ }
+ if (namedQual.empty()) {
+ namedQual = qual->GetVal();
+ continue;
+ }
+ if (namedQual != qual->GetVal()) {
+ return "";
+ }
+ }
+ return namedQual;
+}
+
// ----------------------------------------------------------------------------
CSoMap::TYPEMAP CSoMap::mMapSoTypeToId;
// ----------------------------------------------------------------------------
CSeq_feat& feature)
// ----------------------------------------------------------------------------
{
- static const map<string, string, CompareNoCase> mTypeToClass = {
+ static const TYPEMAP mTypeToClass = {
{"ncRNA", "other"},
};
feature.SetData().SetRna().SetType(CRNA_ref::eType_ncRNA);
CSeq_feat& feature)
// ----------------------------------------------------------------------------
{
- static const map<string, string, CompareNoCase> mapTypeToQual = {
+ static const TYPEMAP mapTypeToQual = {
{"TSS", "transcription_start_site"},
};
feature.SetData().SetImp().SetKey("misc_feature");
CSeq_feat& feature)
// ----------------------------------------------------------------------------
{
- static const map<string, string, CompareNoCase> mapTypeToQual = {
+ static const TYPEMAP mapTypeToQual = {
{"meiotic_recombination_region", "meiotic"},
{"mitotic_recombination_region", "mitotic"},
{"non_allelic_homologous_recombination", "non_allelic_homologous"},
CSeq_feat& feature)
// ----------------------------------------------------------------------------
{
- static const map<string, string, CompareNoCase> mapTypeToKey = {
+ static const TYPEMAP mapTypeToKey = {
{"C_gene_segment", "C_region"},
{"D_gene_segment", "D_segment"},
{"D_loop", "D-loop"},
CSeq_feat& feature)
// ----------------------------------------------------------------------------
{
- static const map<string, string, CompareNoCase> mapTypeToQual = {
+ static const TYPEMAP mapTypeToQual = {
{"DNAsel_hypersensitive_site", "DNase_I_hypersensitive_site"},
{"GC_rich_promoter_region", "GC_signal"},
{"boundary_element", "insulator"},
CSeq_feat& feature)
// ----------------------------------------------------------------------------
{
- static const map<string, string, CompareNoCase> mapTypeToSatellite = {
+ static const TYPEMAP mapTypeToSatellite = {
{"microsatellite", "microsatellite"},
{"minisatellite", "minisatellite"},
{"satellite_DNA", "satellite"},
};
- static const map<string, string, CompareNoCase> mapTypeToRptType = {
+ static const TYPEMAP mapTypeToRptType = {
{"tandem_repeat", "tandem"},
{"inverted_repeat", "inverted"},
{"direct_repeat", "direct"},
{CSeqFeatData::eSubtype_primer_bind, "primer_binding_site"},
{CSeqFeatData::eSubtype_promoter, "promoter"},
{CSeqFeatData::eSubtype_propeptide, "propeptide"},
- {CSeqFeatData::eSubtype_prot, "protein"},
+ {CSeqFeatData::eSubtype_prot, "polypeptide"},
{CSeqFeatData::eSubtype_protein_bind, "protein_binding_site"},
{CSeqFeatData::eSubtype_rep_origin, "origin_of_replication"},
{CSeqFeatData::eSubtype_S_region, "S_region"},
string& so_type)
// ----------------------------------------------------------------------------
{
- so_type = "region";
+ so_type = "biological_region";
return true;
}
string& so_type)
// ----------------------------------------------------------------------------
{
- map<string, string> mapFeatClassToSoType = {
+ static const TYPEMAP mapFeatClassToSoType = {
{"transcription_start_site", "TSS"},
{"other", "sequence_feature"},
};
- string feat_class = feature.GetNamedQual("feat_class");
+ string feat_class = GetUnambiguousNamedQual(feature, "feat_class");
if (feat_class.empty()) {
so_type = "sequence_feature";
return true;
string& so_type)
// ----------------------------------------------------------------------------
{
- map<string, string> mapRecombClassToSoType = {
+ static const TYPEMAP mapRecombClassToSoType = {
{"meiotic", "meiotic_recombination_region"},
{"mitotic", "mitotic_recombination_region"},
{"non_allelic_homologous", "non_allelic_homologous_recombination_region"},
{"non_allelic_homologous_recombination", "non_allelic_homologous_recombination_region"},
{"other", "recombination_feature"},
};
- string recomb_class = feature.GetNamedQual("recombination_class");
+ string recomb_class = GetUnambiguousNamedQual(feature, "recombination_class");
if (recomb_class.empty()) {
so_type = "recombination_feature";
return true;
string& so_type)
// ----------------------------------------------------------------------------
{
- map<string, string> mapNcRnaClassToSoType = {
+ static const TYPEMAP mapNcRnaClassToSoType = {
{"antisense_RNA", "antisense_RNA"},
{"autocatalytically_spliced_intron", "autocatalytically_spliced_intron"},
{"guide_RNA", "guide_RNA"},
{"vault_RNA", "vault_RNA"},
{"Y_RNA", "Y_RNA"},
};
- string ncrna_class = feature.GetNamedQual("ncRNA_class");
+ string ncrna_class = GetUnambiguousNamedQual(feature, "ncRNA_class");
if (ncrna_class.empty()) {
if (feature.IsSetData() &&
feature.GetData().IsRna() &&
string& so_type)
// ----------------------------------------------------------------------------
{
- map<string, string> mapRegulatoryClassToSoType = {
- {"DNase_I_hypersensitive_site", "DNAseI_hypersensitive_site"},
+ static const TYPEMAP mapRegulatoryClassToSoType = {
+ {"DNase_I_hypersensitive_site", "DNaseI_hypersensitive_site"},
{"GC_signal", "GC_rich_promoter_region"},
{"enhancer_blocking_element", "enhancer_blocking_element"},
{"epigenetically_modified_region", "epigenetically_modified_region"},
{"ribosome_binding_site", "ribosome_entry_site"},
};
- string regulatory_class = feature.GetNamedQual("regulatory_class");
+ string regulatory_class = GetUnambiguousNamedQual(feature, "regulatory_class");
if (regulatory_class.empty()) {
so_type = "regulatory_region";
return true;
string& so_type)
// ----------------------------------------------------------------------------
{
- map<string, string> mapBondTypeToSoType = {
+ static const TYPEMAP mapBondTypeToSoType = {
{"disulfide", "disulfide_bond"},
{"xlink", "cross_link"},
};
- string bond_type = feature.GetNamedQual("bond_type");
+ string bond_type = GetUnambiguousNamedQual(feature, "bond_type");
if (bond_type.empty()) {
return false;
}
return true;
}
+
// ----------------------------------------------------------------------------
bool CSoMap::xMapRepeatRegion(
const CSeq_feat& feature,
string& so_type)
// ----------------------------------------------------------------------------
{
- map<string, string> mapSatelliteToSoType = {
+ static const TYPEMAP mapSatelliteToSoType = {
{"satellite", "satellite_DNA"},
{"microsatellite", "microsatellite"},
{"minisatellite", "minisatellite"},
};
- string satellite = feature.GetNamedQual("satellite");
+ string satellite = GetUnambiguousNamedQual(feature, "satellite");
if (!satellite.empty()) {
auto cit = mapSatelliteToSoType.find(satellite);
if (cit == mapSatelliteToSoType.end()) {
return true;
}
- map<string, string> mapRptTypeToSoType = {
+ static const TYPEMAP mapRptTypeToSoType = {
{"tandem", "tandem_repeat"},
{"inverted", "inverted_repeat"},
{"flanking", "repeat_region"},
{"y_prime_element", "Y_prime_element"},
{"other", "repeat_region"},
};
- string rpt_type = feature.GetNamedQual("rpt_type");
+ string rpt_type = GetUnambiguousNamedQual(feature, "rpt_type");
if (rpt_type.empty()) {
so_type = "repeat_region";
return true;
-/* $Id: OrgMod.cpp 602802 2020-03-02 23:09:16Z kans $
+/* $Id: OrgMod.cpp 613887 2020-08-13 18:36:41Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
replace(name.begin(), name.end(), ' ', '-');
if (name == "note" ||
- NStr::EqualNocase(name, "orgmod-note")) {
+ NStr::EqualNocase(name, "orgmod-note") ||
+ NStr::EqualNocase(name, "note-orgmod")) {
return eSubtype_other;
} else if (vocabulary == eVocabulary_insdc) {
if (name == "host" || name == "specific-host") {
replace(name.begin(), name.end(), ' ', '-');
if (name == "note" ||
- name == "orgmod-note") {
+ name == "orgmod-note" ||
+ name == "note-orgmod") {
return true;
} else if (vocabulary == eVocabulary_insdc) {
if (name == "host" || name == "sub-strain") {
-/* $Id: SeqFeatData.cpp 599381 2019-12-26 23:31:18Z kans $
+/* $Id: SeqFeatData.cpp 613780 2020-08-12 16:42:40Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
FEAT_INFO_PAIR(Txinit, txinit, "TxInit", "promoter"),
FEAT_INFO_PAIR(Num, num, "Num", "misc_feature"),
FEAT_INFO_PAIR(Psec_str, psec_str, "SecStr", "SecStr"),
- FEAT_INFO_PAIR(Non_std_residue, non_std_residue, "NonStdRes", "misc_feature"),
+ FEAT_INFO_PAIR(Non_std_residue, non_std_residue, "NonStdRes", "NonStdRes"),
FEAT_INFO_PAIR(Het, het, "Het", "Het"),
FEAT_INFO_PAIR(Biosrc, biosrc, "Src", "source"),
FEAT_INFO_PAIR(Clone, clone, "CloneRef", "misc_feature"),
eQual_usedin,
} },
-//{ eSubtype_non_std_residue, {
-//},
+{ eSubtype_non_std_residue, {
+ eQual_allele,
+ eQual_citation,
+ eQual_db_xref,
+ eQual_exception,
+ eQual_experiment,
+ eQual_function,
+ eQual_gene,
+ eQual_gene_synonym,
+ eQual_inference,
+ eQual_label,
+ eQual_locus_tag,
+ eQual_map,
+ eQual_non_std_residue,
+ eQual_note,
+ eQual_number,
+ eQual_old_locus_tag,
+ eQual_phenotype,
+ eQual_product,
+ eQual_pseudo,
+ eQual_pseudogene,
+ eQual_standard_name,
+ eQual_usedin,
+} },
//sameasmisc_feature
{ eSubtype_het, {
{ CSeqFeatData::eQual_mol_type, "mol_type" },
{ CSeqFeatData::eQual_name, "name" },
{ CSeqFeatData::eQual_nomenclature, "nomenclature" },
+ { CSeqFeatData::eQual_non_std_residue, "non_std_residue" },
{ CSeqFeatData::eQual_ncRNA_class, "ncRNA_class" },
{ CSeqFeatData::eQual_note, "note" },
{ CSeqFeatData::eQual_number, "number" },
case eSubtype_propeptide_aa:
case eSubtype_bond:
case eSubtype_psec_str:
+ case eSubtype_non_std_residue:
rval = eFeatureLocationAllowed_ProtOnly;
break;
case eSubtype_region:
-/* $Id: SubSource.cpp 605788 2020-04-15 14:55:53Z ivanov $
+/* $Id: SubSource.cpp 615787 2020-09-03 18:18:36Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
if ( NStr::EqualNocase(name, "note") ||
NStr::EqualNocase(name, "subsource-note") ||
- NStr::EqualNocase(name, "subsrc-note")) {
+ NStr::EqualNocase(name, "subsrc-note") ||
+ NStr::EqualNocase(name, "note-subsource")) {
return eSubtype_other;
} else if (vocabulary == eVocabulary_insdc) {
// consider a table if more special cases arise.
if ( NStr::EqualNocase(name, "note") ||
NStr::EqualNocase(name, "subsource-note") ||
- NStr::EqualNocase(name, "subsrc-note")) {
+ NStr::EqualNocase(name, "subsrc-note") ||
+ NStr::EqualNocase(name, "note-subsource")) {
return true;
}
if (vocabulary == eVocabulary_insdc) {
return kEmptyStr;
}
-
- if (NStr::EqualNocase (country, "China") && NStr::EqualNocase (cguess, "Hong Kong")) {
- delete id;
- return kEmptyStr;
- }
- if (NStr::EqualNocase (country, "USA") && NStr::EqualNocase (cguess, "Puerto Rico")) {
- delete id;
- return kEmptyStr;
- }
if (NStr::EqualNocase (country, "State of Palestine") &&
(NStr::EqualNocase (cguess, "Gaza Strip") ||
NStr::EqualNocase (cguess, "West Bank"))) {
// 7. Spaces and other printable characters are permitted
// 8. Must not contain the word "plasmid" (ignoring case)
// 9. Must not contain the word "chromosome" (ignoring case)
-// 10. Must not contain the phrase "linkage group" (ignoring case)
-// 11. Must not contain the series of letters "chr" (ignoring case)
-// 12. Must not contain the taxname (ignoring case)
-// 14. Must not contain the genus (ignoring case)
+// 10. Must not contain the phrase "linkage group" (ignoring case)
+// 11. Must not contain the series of letters "chr" (ignoring case)
+// 12. Must not contain the taxname (ignoring case)
+// 14. Must not contain the genus (ignoring case)
// 15. Must not contain the species (ignoring case)
+// except allow the species to match the value after an initial 'p' (e.g., JX416328)
// 16. Must not contain the series of letters "chrm" (ignoring case)
// 17. Must not contain the series of letters "chrom" (ignoring case)
// 18. Must not contain the phrase "linkage-group" (ignoring case)
}
size_t pos = NStr::Find(taxname, " ");
if (pos != NPOS) {
- if (NStr::FindNoCase(value, taxname.substr(0, pos)) != NPOS) {
+ string genus = taxname.substr(0, pos);
+ if (NStr::FindNoCase(value, genus) != NPOS) {
// B.14
return false;
}
- if (NStr::FindNoCase(value, taxname.substr(pos + 1)) != NPOS) {
- // B.15
- return false;
+ string species = taxname.substr(pos + 1);
+ pos = NStr::FindNoCase(value, species);
+ if (pos != NPOS) {
+ if (pos != 1 || value[0] != 'p') {
+ // B.15
+ return false;
+ }
}
}
}
-/* $Id: ecnum_ambiguous.inc 578243 2019-01-15 21:20:22Z kans $
+/* $Id: ecnum_ambiguous.inc 615790 2020-09-03 18:19:26Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
"1.1.4.n\tWith a disulfide as acceptor",
"1.1.5.-\tWith a quinone or similar compound as acceptor",
"1.1.5.n\tWith a quinone or similar compound as acceptor",
+ "1.1.7.-\tWith an iron-sulfur protein as acceptor",
+ "1.1.7.n\tWith an iron-sulfur protein as acceptor",
"1.1.9.-\tWith a copper protein as acceptor",
"1.1.9.n\tWith a copper protein as acceptor",
"1.1.98.-\tWith other, known, acceptors",
"6.2.n.n\tForming carbon-sulfur bonds",
"6.2.1.-\tAcid--thiol ligases",
"6.2.1.n\tAcid--thiol ligases",
+ "6.2.2.-\tAmide--thiol ligases",
+ "6.2.2.n\tAmide--thiol ligases",
"6.3.-.-\tForming carbon-nitrogen bonds",
"6.3.n.n\tForming carbon-nitrogen bonds",
"6.3.1.-\tAcid--ammonia (or amine) ligases (amide synthases)",
1.1.4.n With a disulfide as acceptor
1.1.5.- With a quinone or similar compound as acceptor
1.1.5.n With a quinone or similar compound as acceptor
+1.1.7.- With an iron-sulfur protein as acceptor
+1.1.7.n With an iron-sulfur protein as acceptor
1.1.9.- With a copper protein as acceptor
1.1.9.n With a copper protein as acceptor
1.1.98.- With other, known, acceptors
6.2.n.n Forming carbon-sulfur bonds
6.2.1.- Acid--thiol ligases
6.2.1.n Acid--thiol ligases
+6.2.2.- Amide--thiol ligases
+6.2.2.n Amide--thiol ligases
6.3.-.- Forming carbon-nitrogen bonds
6.3.n.n Forming carbon-nitrogen bonds
6.3.1.- Acid--ammonia (or amine) ligases (amide synthases)
-/* $Id: ecnum_replaced.inc 604099 2020-03-23 12:20:07Z ivanov $
+/* $Id: ecnum_replaced.inc 612554 2020-07-23 15:34:08Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
"1.3.1.52\t1.3.8.5",
"1.3.1.63\t1.21.1.2",
"1.3.1.80\t1.3.7.12",
+ "1.3.1.99\t1.3.1.122",
"1.3.1.n1\t1.3.1.87",
"1.3.1.n2\t1.14.19.52",
"1.3.2.1\t1.3.8.1",
"1.8.6.1\t2.5.1.18",
"1.8.99.3\t1.8.99.5",
"1.8.99.4\t1.8.4.8",
+ "1.9.3.1\t7.1.1.9",
"1.9.3.2\t1.7.2.1",
"1.9.99.1\t1.9.98.1",
"1.10.2.2\t7.1.1.8",
"1.10.99.2\t1.10.5.1",
"1.10.99.3\t1.23.5.1",
"1.11.1.4\t1.13.11.11",
+ "1.11.1.15\t1.11.1.24",
"1.12.1.1\t1.12.7.2",
"1.12.7.1\t1.12.7.2",
"1.12.99.1\t1.12.98.1",
"2.7.7.17\t4.6.1.19",
"2.7.7.21\t2.7.7.72",
"2.7.7.25\t2.7.7.72",
- "2.7.7.26\t3.1.27.3",
+ "2.7.7.26\t4.6.1.24",
"2.7.7.29\t2.7.7.28",
"2.7.7.54\t6.3.2.40",
"2.7.7.55\t6.3.2.40",
"3.1.4.5\t3.1.21.1",
"3.1.4.6\t3.1.22.1",
"3.1.4.7\t3.1.31.1",
- "3.1.4.8\t3.1.27.3",
+ "3.1.4.8\t4.6.1.24",
"3.1.4.9\t3.1.30.2",
"3.1.4.10\t4.6.1.13",
"3.1.4.15\t2.7.7.89",
"3.1.4.n1\t3.1.4.53",
"3.1.7.4\t4.2.1.133\t4.2.3.141",
"3.1.7.7\t4.2.3.194",
+ "3.1.11.7\t3.6.1.71",
+ "3.1.11.8\t3.6.1.70",
+ "3.1.12.2\t3.6.1.72",
"3.1.22.3\t3.1.21.7",
"3.1.23.1\t3.1.21.4",
"3.1.23.2\t3.1.21.4",
"3.1.26.n1\t3.1.26.12",
"3.1.27.1\t4.6.1.19",
"3.1.27.2\t4.6.1.22",
+ "3.1.27.3\t4.6.1.24",
"3.1.27.4\t4.6.1.20",
"3.1.27.5\t4.6.1.18",
"3.1.27.6\t4.6.1.21",
"3.2.1.29\t3.2.1.52",
"3.2.1.30\t3.2.1.52",
"3.2.1.34\t3.2.1.35",
+ "3.2.1.44\t3.2.1.211",
"3.2.1.69\t3.2.1.41",
"3.2.1.79\t3.2.1.55",
"3.2.1.110\t3.2.1.97",
"4.1.2.31\t4.1.3.16",
"4.1.2.37\t4.1.2.46\t4.1.2.47",
"4.1.2.39\t4.1.2.46\t4.1.2.47",
+ "4.1.2.41\t4.1.2.61",
"4.1.2.n1\t4.1.2.44",
"4.1.2.n3\t4.1.2.53",
"4.1.2.n4\t4.1.2.52",
1.3.1.52 1.3.8.5
1.3.1.63 1.21.1.2
1.3.1.80 1.3.7.12
+1.3.1.99 1.3.1.122
1.3.1.n1 1.3.1.87
1.3.1.n2 1.14.19.52
1.3.2.1 1.3.8.1
1.8.6.1 2.5.1.18
1.8.99.3 1.8.99.5
1.8.99.4 1.8.4.8
+1.9.3.1 7.1.1.9
1.9.3.2 1.7.2.1
1.9.99.1 1.9.98.1
1.10.2.2 7.1.1.8
1.10.99.2 1.10.5.1
1.10.99.3 1.23.5.1
1.11.1.4 1.13.11.11
+1.11.1.15 1.11.1.24
1.12.1.1 1.12.7.2
1.12.7.1 1.12.7.2
1.12.99.1 1.12.98.1
2.7.7.17 4.6.1.19
2.7.7.21 2.7.7.72
2.7.7.25 2.7.7.72
-2.7.7.26 3.1.27.3
+2.7.7.26 4.6.1.24
2.7.7.29 2.7.7.28
2.7.7.54 6.3.2.40
2.7.7.55 6.3.2.40
3.1.4.5 3.1.21.1
3.1.4.6 3.1.22.1
3.1.4.7 3.1.31.1
-3.1.4.8 3.1.27.3
+3.1.4.8 4.6.1.24
3.1.4.9 3.1.30.2
3.1.4.10 4.6.1.13
3.1.4.15 2.7.7.89
3.1.4.n1 3.1.4.53
3.1.7.4 4.2.1.133 4.2.3.141
3.1.7.7 4.2.3.194
+3.1.11.7 3.6.1.71
+3.1.11.8 3.6.1.70
+3.1.12.2 3.6.1.72
3.1.22.3 3.1.21.7
3.1.23.1 3.1.21.4
3.1.23.2 3.1.21.4
3.1.26.n1 3.1.26.12
3.1.27.1 4.6.1.19
3.1.27.2 4.6.1.22
+3.1.27.3 4.6.1.24
3.1.27.4 4.6.1.20
3.1.27.5 4.6.1.18
3.1.27.6 4.6.1.21
3.2.1.29 3.2.1.52
3.2.1.30 3.2.1.52
3.2.1.34 3.2.1.35
+3.2.1.44 3.2.1.211
3.2.1.69 3.2.1.41
3.2.1.79 3.2.1.55
3.2.1.110 3.2.1.97
4.1.2.31 4.1.3.16
4.1.2.37 4.1.2.46 4.1.2.47
4.1.2.39 4.1.2.46 4.1.2.47
+4.1.2.41 4.1.2.61
4.1.2.n1 4.1.2.44
4.1.2.n3 4.1.2.53
4.1.2.n4 4.1.2.52
-/* $Id: ecnum_specific.inc 604099 2020-03-23 12:20:07Z ivanov $
+/* $Id: ecnum_specific.inc 615790 2020-09-03 18:19:26Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
"1.1.1.417\t3-beta-hydroxysteroid-4-beta-carboxylate 3-dehydrogenase (decarboxylating)",
"1.1.1.418\tPlant 3-beta-hydroxysteroid-4-alpha-carboxylate 3-dehydrogenase (decarboxylating)",
"1.1.1.419\tNepetalactol dehydrogenase",
+ "1.1.1.420\tD-apiose dehydrogenase",
+ "1.1.1.421\tD-apionate oxidoisomerase",
+ "1.1.1.422\tPseudoephedrine dehydrogenase",
+ "1.1.1.423\tEphedrine dehydrogenase",
"1.1.1.n4\t(-)-trans-carveol dehydrogenase",
"1.1.1.n5\t3-methylmalate dehydrogenase",
"1.1.1.n11\tSuccinic semialdehyde reductase",
"1.1.2.7\tMethanol dehydrogenase (cytochrome c)",
"1.1.2.8\tAlcohol dehydrogenase (cytochrome c)",
"1.1.2.9\t1-butanol dehydrogenase (cytochrome c)",
+ "1.1.2.10\tLanthanide-dependent methanol dehydrogenase",
"1.1.3.4\tGlucose oxidase",
"1.1.3.5\tHexose oxidase",
"1.1.3.6\tCholesterol oxidase",
"1.1.98.4\tF420H(2):quinone oxidoreductase",
"1.1.98.5\tSecondary-alcohol dehydrogenase (coenzyme-F420)",
"1.1.98.6\tRibonucleoside-triphosphate reductase (formate)",
+ "1.1.98.7\tSerine-type anaerobic sulfatase-maturating enzyme",
"1.1.99.1\tCholine dehydrogenase",
"1.1.99.2\tL-2-hydroxyglutarate dehydrogenase",
"1.1.99.3\tGluconate 2-dehydrogenase (acceptor)",
"1.2.1.100\t5-formyl-3-hydroxy-2-methylpyridine 4-carboxylate 5-dehydrogenase",
"1.2.1.101\tL-tyrosine reductase",
"1.2.1.102\tIsopyridoxal dehydrogenase (5-pyridoxate-forming)",
+ "1.2.1.103\t[Amino-group carrier protein]-6-phospho-L-2-aminoadipate reductase",
"1.2.1.n2\tFatty acyl-CoA reductase",
"1.2.2.1\tFormate dehydrogenase (cytochrome)",
"1.2.2.4\tCarbon-monoxide dehydrogenase (cytochrome b-561)",
"1.3.1.31\t2-enoate reductase",
"1.3.1.32\tMaleylacetate reductase",
"1.3.1.33\tProtochlorophyllide reductase",
- "1.3.1.34\t2,4-dienoyl-CoA reductase (NADPH)",
+ "1.3.1.34\t2,4-dienoyl-CoA reductase ((2E)-enoyl-CoA-producing)",
"1.3.1.36\tGeissoschizine dehydrogenase",
"1.3.1.37\tCis-2-enoyl-CoA reductase (NADPH)",
"1.3.1.38\tTrans-2-enoyl-CoA reductase (NADPH)",
"1.3.1.96\tBotryococcus squalene synthase",
"1.3.1.97\tBotryococcene synthase",
"1.3.1.98\tUDP-N-acetylmuramate dehydrogenase",
- "1.3.1.99\tIridoid synthase",
"1.3.1.100\tChanoclavine-I aldehyde reductase",
"1.3.1.101\t2,3-bis-O-geranylgeranyl-sn-glycerol 1-phosphate reductase (NAD(P)H)",
"1.3.1.102\t2-alkenal reductase (NADP(+))",
"1.3.1.118\tMeromycolic acid enoyl-[acyl-carrier-protein] reductase",
"1.3.1.119\tChlorobenzene dihydrodiol dehydrogenase",
"1.3.1.120\tCyclohexane-1-carbonyl-CoA reductase (NADP(+))",
+ "1.3.1.121\t4-amino-4-deoxyprephenate dehydrogenase",
+ "1.3.1.122\t(S)-8-oxocitronellyl enol synthase",
+ "1.3.1.123\t7-epi-iridoid synthase",
+ "1.3.1.124\t2,4-dienoyl-CoA reductase ((3E)-enoyl-CoA-producing)",
"1.3.1.n3\tCurcumin reductase",
"1.3.2.3\tL-galactonolactone dehydrogenase",
"1.3.3.3\tCoproporphyrinogen oxidase",
"1.3.8.12\t(2S)-methylsuccinyl-CoA dehydrogenase",
"1.3.8.13\tCrotonobetainyl-CoA reductase",
"1.3.8.14\tL-prolyl-[peptidyl-carrier protein] dehydrogenase",
+ "1.3.8.15\t3-(aryl)acrylate reductase",
"1.3.98.1\tDihydroorotate oxidase (fumarate)",
"1.3.98.3\tCoproporphyrinogen dehydrogenase",
"1.3.98.4\t5a,11a-dehydrotetracycline reductase",
"1.8.4.12\tPeptide-methionine (R)-S-oxide reductase",
"1.8.4.13\tL-methionine (S)-S-oxide reductase",
"1.8.4.14\tL-methionine (R)-S-oxide reductase",
+ "1.8.4.15\tProtein dithiol oxidoreductase (disulfide-forming)",
+ "1.8.4.16\tThioredoxin:protein disulfide reductase",
"1.8.5.1\tGlutathione dehydrogenase (ascorbate)",
"1.8.5.2\tThiosulfate dehydrogenase (quinone)",
"1.8.5.3\tRespiratory dimethylsulfoxide reductase",
"1.8.5.6\tSulfite dehydrogenase (quinone)",
"1.8.5.7\tGlutathionyl-hydroquinone reductase",
"1.8.5.8\tEukaryotic sulfide quinone oxidoreductase",
+ "1.8.5.9\tProtein dithiol:quinone oxidoreductase DsbB",
"1.8.7.1\tAssimilatory sulfite reductase (ferredoxin)",
"1.8.7.2\tFerredoxin:thioredoxin reductase",
"1.8.7.3\tFerredoxin:CoB-CoM heterodisulfide reductase",
"1.8.98.4\tCoenzyme F420:CoB-CoM heterodisulfide,ferredoxin reductase",
"1.8.98.5\tH(2):CoB-CoM heterodisulfide,ferredoxin reductase",
"1.8.98.6\tFormate:CoB-CoM heterodisulfide,ferredoxin reductase",
+ "1.8.98.7\tCysteine-type anaerobic sulfatase-maturating enzyme",
"1.8.99.2\tAdenylyl-sulfate reductase",
"1.8.99.5\tDissimilatory sulfite reductase",
- "1.9.3.1\tCytochrome-c oxidase",
"1.9.6.1\tNitrate reductase (cytochrome)",
"1.9.98.1\tIron--cytochrome-c reductase",
"1.10.1.1\tTrans-acenaphthene-1,2-diol dehydrogenase",
"1.10.3.11\tUbiquinol oxidase (non-electrogenic)",
"1.10.3.15\tGrixazone synthase",
"1.10.3.16\tDihydrophenazinedicarboxylate synthase",
+ "1.10.3.17\tSuperoxide oxidase",
"1.10.5.1\tRibosyldihydronicotinamide dehydrogenase (quinone)",
"1.11.1.1\tNADH peroxidase",
"1.11.1.2\tNADPH peroxidase",
"1.11.1.12\tPhospholipid-hydroperoxide glutathione peroxidase",
"1.11.1.13\tManganese peroxidase",
"1.11.1.14\tLignin peroxidase",
- "1.11.1.15\tPeroxiredoxin",
"1.11.1.16\tVersatile peroxidase",
"1.11.1.17\tGlutathione amide-dependent peroxidase",
"1.11.1.18\tBromide peroxidase",
"1.11.1.21\tCatalase peroxidase",
"1.11.1.22\tHydroperoxy fatty acid reductase",
"1.11.1.23\t(S)-2-hydroxypropylphosphonic acid epoxidase",
+ "1.11.1.24\tThioredoxin-dependent peroxiredoxin",
+ "1.11.1.25\tGlutaredoxin-dependent peroxiredoxin",
+ "1.11.1.26\tNADH-dependent peroxiredoxin",
+ "1.11.1.27\tGlutathione-dependent peroxiredoxin",
+ "1.11.1.28\tLipoyl-dependent peroxiredoxin",
+ "1.11.1.29\tMycoredoxin-dependent peroxiredoxin",
"1.11.2.1\tUnspecific peroxygenase",
"1.11.2.2\tMyeloperoxidase",
"1.11.2.3\tPlant seed peroxygenase",
"1.14.11.67\t[Histone H3]-trimethyl-L-lysine(4) demethylase",
"1.14.11.68\t[Histone H3]-trimethyl-L-lysine(27) demethylase",
"1.14.11.69\t[Histone H3]-trimethyl-L-lysine(36) demethylase",
+ "1.14.11.70\t7-deoxycylindrospermopsin hydroxylase",
+ "1.14.11.71\tMethylphosphonate hydroxylase",
"1.14.11.n2\tMethylcytosine dioxygenase",
"1.14.11.n4\tAnkyrin-repeat-histidine dioxagenase",
"1.14.12.1\tAnthranilate 1,2-dioxygenase (deaminating, decarboxylating)",
"1.14.13.244\tPhenol 2-monooxygenase (NADH)",
"1.14.13.245\tAssimilatory dimethylsulfide S-monooxygenase",
"1.14.13.246\t4-beta-methylsterol monooxygenase",
+ "1.14.13.247\tStachydrine N-demethylase",
"1.14.13.n6\tHexahomomethionine N-hydroxylase",
"1.14.13.n7\t4-nitrophenol 2-hydroxylase",
"1.14.14.1\tUnspecific monooxygenase",
"1.14.18.9\t4-alpha-methylsterol monooxygenase",
"1.14.18.10\tPlant 4,4-dimethylsterol C-4-alpha-methyl-monooxygenase",
"1.14.18.11\tPlant 4-alpha-monomethylsterol monooxygenase",
+ "1.14.18.12\t2-hydroxy fatty acid dioxygenase",
"1.14.19.1\tStearoyl-CoA 9-desaturase",
"1.14.19.2\tStearoyl-[acyl-carrier-protein] 9-desaturase",
"1.14.19.3\tAcyl-CoA 6-desaturase",
"1.17.99.4\tUracil/thymine dehydrogenase",
"1.17.99.6\tEpoxyqueuosine reductase",
"1.17.99.7\tFormate dehydrogenase (acceptor)",
+ "1.17.99.8\tLimonene dehydrogenase",
"1.18.1.1\tRubredoxin--NAD(+) reductase",
"1.18.1.2\tFerredoxin--NADP(+) reductase",
"1.18.1.3\tFerredoxin--NAD(+) reductase",
"1.19.6.1\tNitrogenase (flavodoxin)",
"1.20.1.1\tPhosphonate dehydrogenase",
"1.20.2.1\tArsenate reductase (cytochrome c)",
- "1.20.4.1\tArsenate reductase (glutaredoxin)",
+ "1.20.4.1\tArsenate reductase (glutathione/glutaredoxin)",
"1.20.4.2\tMethylarsonate reductase",
"1.20.4.3\tMycoredoxin",
"1.20.4.4\tArsenate reductase (thioredoxin)",
"2.1.1.360\t[Histone H3]-lysine(79) N-trimethyltransferase",
"2.1.1.361\t[Histone H4]-lysine(20) N-methyltransferase",
"2.1.1.362\t[Histone H4]-N-methyl-L-lysine(20) N-methyltransferase",
+ "2.1.1.363\tPre-sodorifen synthase",
"2.1.1.n1\tResorcinol O-methyltransferase",
"2.1.1.n4\tThiocyanate methyltransferase",
"2.1.1.n7\t5-pentadecatrienyl resorcinol O-methyltransferase",
"2.1.1.n8\tSmall RNA 2'-O-methyltransferase",
"2.1.1.n11\tMethylphosphotriester-DNA--[protein]-cysteine S-methyltransferase",
"2.1.2.1\tGlycine hydroxymethyltransferase",
- "2.1.2.2\tPhosphoribosylglycinamide formyltransferase",
+ "2.1.2.2\tPhosphoribosylglycinamide formyltransferase 1",
"2.1.2.3\tPhosphoribosylaminoimidazolecarboxamide formyltransferase",
"2.1.2.4\tGlycine formimidoyltransferase",
"2.1.2.5\tGlutamate formimidoyltransferase",
"2.3.1.242\tKdo(2)-lipid IV(A) palmitoleoyltransferase",
"2.3.1.243\tLauroyl-Kdo(2)-lipid IV(A) myristoyltransferase",
"2.3.1.244\t2-methylbutanoate polyketide synthase",
- "2.3.1.245\t3-hydroxy-5-phosphonooxypentane-2,4-dione thiolase",
+ "2.3.1.245\t3-hydroxy-5-phosphooxypentane-2,4-dione thiolase",
"2.3.1.246\t3,5-dihydroxyphenylacetyl-CoA synthase",
"2.3.1.247\t3-keto-5-aminohexanoate cleavage enzyme",
"2.3.1.248\tSpermidine disinapoyl transferase",
"2.3.1.291\tSphingoid base N-palmitoyltransferase",
"2.3.1.292\t(Phenol)carboxyphthiodiolenone synthase",
"2.3.1.293\tMeromycolic acid 3-oxoacyl-(acyl carrier protein) synthase I",
+ "2.3.1.294\tMeromycolic acid 3-oxoacyl-(acyl carrier protein) synthase II",
+ "2.3.1.295\tMycoketide-CoA synthase",
"2.3.1.296\tOmega-hydroxyceramide transacylase",
"2.3.1.297\tVery-long-chain ceramide synthase",
"2.3.1.298\tUltra-long-chain ceramide synthase",
"2.3.2.30\tL-ornithine N(alpha)-acyltransferase",
"2.3.2.31\tRBR-type E3 ubiquitin transferase",
"2.3.2.32\tCullin-RING-type E3 NEDD8 transferase",
+ "2.3.2.33\tRCR-type E3 ubiquitin transferase",
"2.3.3.1\tCitrate (Si)-synthase",
"2.3.3.2\tDecylcitrate synthase",
"2.3.3.3\tCitrate (Re)-synthase",
"2.4.1.368\tOleanolate 3-O-glucosyltransferase",
"2.4.1.369\tEnterobactin C-glucosyltransferase",
"2.4.1.370\tInositol phosphorylceramide mannosyltransferase",
+ "2.4.1.371\tPolymannosyl GlcNAc-diphospho-ditrans,octacis-undecaprenol 2,3-alpha-mannosylpolymerase",
+ "2.4.1.372\tMutansucrase",
+ "2.4.1.373\tAlpha-(1->2) branching sucrase",
+ "2.4.1.374\tBeta-1,2-mannooligosaccharide synthase",
"2.4.1.n2\tLoliose synthase",
"2.4.2.1\tPurine-nucleoside phosphorylase",
"2.4.2.2\tPyrimidine-nucleoside phosphorylase",
"2.4.2.60\tCysteine-dependent adenosine diphosphate thiazole synthase",
"2.4.2.61\tAlpha-dystroglycan beta-1,4-xylosyltransferase",
"2.4.2.n2\tGlucoside xylosyltransferase",
- "2.4.2.n3\tXyloside xylosyltransferase",
+ "2.4.2.n3\tXylosyl alpha-1,3-xylosyltransferase",
"2.4.99.1\tBeta-galactoside alpha-(2,6)-sialyltransferase",
"2.4.99.2\tBeta-D-galactosyl-(1->3)-N-acetyl-beta-D-galactosaminide alpha-2,3-sialyltransferase",
"2.4.99.3\tAlpha-N-acetylgalactosaminide alpha-2,6-sialyltransferase",
"2.6.1.115\t5-hydroxydodecatetraenal 1-aminotransferase",
"2.6.1.116\t6-aminohexanoate aminotransferase",
"2.6.1.117\tL-glutamine--4-(methylsulfanyl)-2-oxobutanoate aminotransferase",
+ "2.6.1.118\t[Amino group carrier protein]-gamma-(L-lysyl)-L-glutamate aminotransferase",
"2.6.3.1\tOximinotransferase",
"2.6.99.1\tdATP(dGTP)--DNA purinetransferase",
"2.6.99.2\tPyridoxine 5'-phosphate synthase",
"2.7.1.45\t2-dehydro-3-deoxygluconokinase",
"2.7.1.46\tL-arabinokinase",
"2.7.1.47\tD-ribulokinase",
- "2.7.1.48\tUridine kinase",
+ "2.7.1.48\tUridine/cytidine kinase",
"2.7.1.49\tHydroxymethylpyrimidine kinase",
"2.7.1.50\tHydroxyethylthiazole kinase",
"2.7.1.51\tL-fuculokinase",
"2.7.1.144\tTagatose-6-phosphate kinase",
"2.7.1.145\tDeoxynucleoside kinase",
"2.7.1.146\tADP-specific phosphofructokinase",
- "2.7.1.147\tADP-specific glucokinase",
+ "2.7.1.147\tADP-specific glucose/glucosamine kinase",
"2.7.1.148\t4-(cytidine 5'-diphospho)-2-C-methyl-D-erythritol kinase",
"2.7.1.149\t1-phosphatidylinositol-5-phosphate 4-kinase",
"2.7.1.150\t1-phosphatidylinositol-3-phosphate 5-kinase",
"2.7.1.227\tInositol phosphorylceramide synthase",
"2.7.1.228\tMannosyl-inositol-phosphoceramide inositolphosphotransferase",
"2.7.1.229\tDeoxyribokinase",
+ "2.7.1.230\tAmicoumacin kinase",
"2.7.2.1\tAcetate kinase",
"2.7.2.2\tCarbamate kinase",
"2.7.2.3\tPhosphoglycerate kinase",
"2.7.2.13\tGlutamate 1-kinase",
"2.7.2.14\tBranched-chain-fatty-acid kinase",
"2.7.2.15\tPropionate kinase",
+ "2.7.2.16\t2-phosphoglycerate kinase",
+ "2.7.2.17\t[Amino-group carrier protein]-L-2-aminoadipate 6-kinase",
"2.7.3.1\tGuanidinoacetate kinase",
"2.7.3.2\tCreatine kinase",
"2.7.3.3\tArginine kinase",
"2.8.3.23\tCaffeate CoA-transferase",
"2.8.3.24\t(R)-2-hydroxy-4-methylpentanoate CoA-transferase",
"2.8.3.25\tBile acid CoA-transferase",
+ "2.8.3.26\tSuccinyl-CoA:mesaconate CoA transferase",
"2.8.4.1\tCoenzyme-B sulfoethylthiotransferase",
"2.8.4.2\tArsenate-mycothiol transferase",
"2.8.4.3\ttRNA-2-methylthio-N(6)-dimethylallyladenosine synthase",
"2.8.5.2\tL-cysteine S-thiosulfotransferase",
"2.9.1.1\tL-seryl-tRNA(Sec) selenium transferase",
"2.9.1.2\tO-phospho-L-seryl-tRNA(Sec):L-selenocysteinyl-tRNA synthase",
+ "2.9.1.3\ttRNA 2-selenouridine synthase",
"2.10.1.1\tMolybdopterin molybdotransferase",
"3.1.1.1\tCarboxylesterase",
"3.1.1.2\tArylesterase",
"3.1.3.104\t5-amino-6-(5-phospho-D-ribitylamino)uracil phosphatase",
"3.1.3.105\tN-acetyl-D-muramate 6-phosphate phosphatase",
"3.1.3.106\t2-lysophosphatidate phosphatase",
+ "3.1.3.107\tAmicoumacin phosphatase",
+ "3.1.3.108\tNocturnin",
"3.1.4.1\tPhosphodiesterase I",
"3.1.4.2\tGlycerophosphocholine phosphodiesterase",
"3.1.4.3\tPhospholipase C",
"3.1.11.4\tExodeoxyribonuclease (phage SP3-induced)",
"3.1.11.5\tExodeoxyribonuclease V",
"3.1.11.6\tExodeoxyribonuclease VII",
- "3.1.11.7\tAdenosine-5'-diphospho-5'-(DNA) diphosphatase",
- "3.1.11.8\tGuaosine-5'-diphospho-5'-(DNA) diphosphatase",
"3.1.12.1\t5' to 3' exodeoxyribonuclease (nucleoside 3'-phosphate-forming)",
- "3.1.12.2\tDNA-3'-diphospho-5'-guanosine diphosphatase",
"3.1.13.1\tExoribonuclease II",
"3.1.13.2\tExoribonuclease H",
"3.1.13.3\tOligonucleotidase",
"3.1.26.12\tRibonuclease E",
"3.1.26.13\tRetroviral ribonuclease H",
"3.1.26.n2\tArgonaute-2",
- "3.1.27.3\tRibonuclease T(1)",
"3.1.27.7\tRibonuclease F",
"3.1.27.8\tRibonuclease V",
"3.1.30.1\tAspergillus nuclease S(1)",
"3.2.1.41\tPullulanase",
"3.2.1.42\tGDP-glucosidase",
"3.2.1.43\tBeta-L-rhamnosidase",
- "3.2.1.44\tFucoidanase",
"3.2.1.45\tGlucosylceramidase",
"3.2.1.46\tGalactosylceramidase",
"3.2.1.47\tGalactosylgalactosylglucosylceramidase",
"3.2.1.152\tMannosylglycoprotein endo-beta-mannosidase",
"3.2.1.153\tFructan beta-(2,1)-fructosidase",
"3.2.1.154\tFructan beta-(2,6)-fructosidase",
- "3.2.1.155\tXyloglucan-specific exo-beta-1,4-glucanase",
+ "3.2.1.155\tXyloglucan-specific endo-processive beta-1,4-glucanase",
"3.2.1.156\tOligosaccharide reducing-end xylanase",
"3.2.1.157\tIota-carrageenase",
"3.2.1.158\tAlpha-agarase",
"3.2.1.208\tGlucosylglycerate hydrolase",
"3.2.1.209\tEndoplasmic reticulum Man(9)GlcNAc(2) 1,2-alpha-mannosidase",
"3.2.1.210\tEndoplasmic reticulum Man(8)GlcNAc(2) 1,2-alpha-mannosidase",
+ "3.2.1.211\tEndo-(1->3)-fucoidanase",
+ "3.2.1.212\tEndo-(1->4)-fucoidanase",
+ "3.2.1.213\tGalactan exo-1,6-beta-galactobiohydrolase (non-reducing end)",
"3.2.1.n1\tBlood group B branched chain alpha-1,3-galactosidase",
"3.2.1.n2\tBlood group B linear chain alpha-1,3-galactosidase",
"3.2.1.n3\tDictyostelium lysozyme A",
"3.4.17.21\tGlutamate carboxypeptidase II",
"3.4.17.22\tMetallocarboxypeptidase D",
"3.4.17.23\tAngiotensin-converting enzyme 2",
+ "3.4.17.24\tTubulin-glutamate carboxypeptidase",
"3.4.18.1\tCathepsin X",
"3.4.19.1\tAcylaminoacyl-peptidase",
"3.4.19.2\tPeptidyl-glycinamidase",
"3.5.1.107\tMaleamate amidohydrolase",
"3.5.1.108\tUDP-3-O-acyl-N-acetylglucosamine deacetylase",
"3.5.1.109\tSphingomyelin deacylase",
- "3.5.1.110\tPeroxyureidoacrylate/ureidoacrylate amidohydrolase",
+ "3.5.1.110\tUreidoacrylate amidohydrolase",
"3.5.1.111\t2-oxoglutaramate amidase",
"3.5.1.112\t2'-N-acetylparomamine deacetylase",
"3.5.1.113\t2'''-acetyl-6'''-hydroxyneomycin C deacetylase",
"3.5.1.127\tJasmonoyl-L-amino acid hydrolase",
"3.5.1.128\tDeaminated glutathione amidase",
"3.5.1.129\tN(5)-(cytidine 5'-diphosphoramidyl)-L-glutamine hydrolase",
- "3.5.1.130\t[Lysine-biosynthesis-protein LysW]-lysine/ornithine hydrolase",
+ "3.5.1.130\t[Amino group carrier protein]-lysine hydrolase",
"3.5.1.131\t1-carboxybiuret hydrolase",
+ "3.5.1.132\t[Amino group carrier protein]-ornithine hydrolase",
"3.5.1.133\tN(alpha)-acyl-L-glutamine aminoacylase",
"3.5.1.134\t(Indol-3-yl)acetyl-L-aspartate hydrolase",
"3.5.1.n3\t4-deoxy-4-formamido-L-arabinose-phosphoundecaprenol deformylase",
"3.6.1.66\tXTP/dITP diphosphatase",
"3.6.1.67\tDihydroneopterin triphosphate diphosphatase",
"3.6.1.68\tGeranyl diphosphate phosphohydrolase",
+ "3.6.1.69\t8-oxo-(d)GTP phosphatase",
+ "3.6.1.70\tGuaosine-5'-diphospho-5'-(DNA) diphosphatase",
+ "3.6.1.71\tAdenosine-5'-diphospho-5'-(DNA) diphosphatase",
+ "3.6.1.72\tDNA-3'-diphospho-5'-guanosine diphosphatase",
+ "3.6.1.73\tInosine/xanthosine triphosphatase",
"3.6.1.n1\tD-tyrosyl-tRNA(Tyr) hydrolase",
"3.6.1.n2\tL-cysteinyl-tRNA(Pro)",
"3.6.1.n3\tL-cysteinyl-tRNA(Cys) hydrolase",
"3.7.1.23\tMaleylpyruvate hydrolase",
"3.7.1.24\t2,4-diacetylphloroglucinol hydrolase",
"3.7.1.25\t2-hydroxy-6-oxohepta-2,4-dienoate hydrolase",
+ "3.7.1.26\t2,4-didehydro-3-deoxy-L-rhamnonate hydrolase",
"3.8.1.1\tAlkylhalidase",
"3.8.1.2\t(S)-2-haloacid dehalogenase",
"3.8.1.3\tHaloacetate dehalogenase",
"3.13.1.6\t[CysO sulfur-carrier protein]-S-L-cysteine hydrolase",
"3.13.1.7\tCarbonyl sulfide hydrolase",
"3.13.1.8\tS-adenosyl-L-methionine hydrolase (adenosine-forming)",
+ "3.13.1.9\tS-inosyl-L-homocysteine hydrolase",
"4.1.1.1\tPyruvate decarboxylase",
"4.1.1.2\tOxalate decarboxylase",
"4.1.1.4\tAcetoacetate decarboxylase",
"4.1.1.116\tD-ornithine/D-lysine decarboxylase",
"4.1.1.117\t2-((L-alanin-3-ylcarbamoyl)methyl)-2-hydroxybutanedioate decarboxylase",
"4.1.1.118\tIsophthalyl-CoA decarboxylase",
+ "4.1.1.119\tPhenylacetate decarboxylase",
"4.1.2.2\tKetotetrose-phosphate aldolase",
"4.1.2.4\tDeoxyribose-phosphate aldolase",
"4.1.2.5\tL-threonine aldolase",
"4.1.2.36\tLactate aldolase",
"4.1.2.38\tBenzoin aldolase",
"4.1.2.40\tTagatose-bisphosphate aldolase",
- "4.1.2.41\tVanillin synthase",
"4.1.2.42\tD-threonine aldolase",
"4.1.2.43\t3-hexulose-6-phosphate synthase",
"4.1.2.44\t2,3-epoxybenzoyl-CoA dihydrolase",
"4.1.2.58\t2-dehydro-3,6-dideoxy-6-sulfogluconate aldolase",
"4.1.2.59\tDihydroneopterin phosphate aldolase",
"4.1.2.60\tDihydroneopterin triphosphate aldolase",
+ "4.1.2.61\tFeruloyl-CoA hydratase/lyase",
"4.1.2.n2\t2-hydroxyphytanoyl-CoA lyase",
"4.1.3.1\tIsocitrate lyase",
"4.1.3.3\tN-acetylneuraminate lyase",
"4.2.1.84\tNitrile hydratase",
"4.2.1.85\tDimethylmaleate hydratase",
"4.2.1.87\tOctopamine dehydratase",
- "4.2.1.88\t(R)-synephrine",
+ "4.2.1.88\tSynephrine dehydratase",
"4.2.1.90\tL-rhamnonate dehydratase",
"4.2.1.91\tArogenate dehydratase",
"4.2.1.92\tHydroperoxide dehydratase",
"4.2.2.24\tRhamnogalacturonan exolyase",
"4.2.2.25\tGellan lyase",
"4.2.2.26\tOligo-alginate lyase",
+ "4.2.2.27\tPectin monosaccharide-lyase",
"4.2.2.n1\tPeptidoglycan lytic exotransglycosylase",
"4.2.2.n2\tPeptidoglycan lytic endotransglycosylase",
"4.2.3.1\tThreonine synthase",
"4.2.3.131\tMiltiradiene synthase",
"4.2.3.132\tNeoabietadiene synthase",
"4.2.3.133\tAlpha-copaene synthase",
- "4.2.3.134\t5-phosphonooxy-L-lysine phospho-lyase",
+ "4.2.3.134\t5-phosphooxy-L-lysine phospho-lyase",
"4.2.3.135\tDelta(6)-protoilludene synthase",
"4.2.3.136\tAlpha-isocomene synthase",
"4.2.3.137\t(E)-2-epi-beta-caryophyllene synthase",
"4.6.1.21\tEnterobacter ribonuclease",
"4.6.1.22\tBacillus subtilis ribonuclease",
"4.6.1.23\tRibotoxin",
+ "4.6.1.24\tRibonuclease T(1)",
+ "4.6.1.25\tBacteriophage T(4) restriction endoribonuclease RegB",
"4.7.1.1\tAlpha-D-ribose 1-methylphosphonate 5-phosphate C-P-lyase",
"4.99.1.1\tProtoporphyrin ferrochelatase",
"4.99.1.2\tAlkylmercury lyase",
"5.3.1.29\tRibose 1,5-bisphosphate isomerase",
"5.3.1.30\t5-deoxy-glucuronate isomerase",
"5.3.1.31\tSulfoquinovose isomerase",
- "5.3.1.32\t(4S)-4-hydroxy-5-phosphonooxypentane-2,3-dione isomerase",
+ "5.3.1.32\t(4S)-4-hydroxy-5-phosphooxypentane-2,3-dione isomerase",
"5.3.1.33\tL-erythrulose 1-phosphate isomerase",
"5.3.1.34\tD-erythrulose 4-phosphate isomerase",
"5.3.1.35\t2-dehydrotetronate isomerase",
"5.5.1.31\tHapalindole H synthase",
"5.5.1.32\t12-epi-hapalindole U synthase",
"5.5.1.33\t12-epi-fischerindole U synthase",
+ "5.5.1.34\t(+)-cis,trans-nepetalactol synthase",
+ "5.5.1.35\t(+)-cis,cis-nepetalactol synthase",
"5.6.1.1\tMicrotubule-severing ATPase",
"5.6.1.2\tDynein ATPase",
"5.6.1.3\tPlus-end-directed kinesin ATPase",
"6.2.1.58\tIsophthalate--CoA ligase",
"6.2.1.59\tLong-chain fatty acid adenylase/transferase FadD26",
"6.2.1.60\tMarinolic acid--CoA ligase",
+ "6.2.1.61\tSalicylate--[aryl-carrier protein] ligase",
+ "6.2.1.62\t3,4-dihydroxybenzoate--[aryl-carrier protein] ligase",
+ "6.2.1.63\tL-arginine--[L-arginyl-carrier protein] ligase",
"6.2.1.n2\tAmino acid--[acyl-carrier-protein] ligase",
"6.2.1.n3\tMalonate--CoA ligase",
"6.3.1.1\tAspartate--ammonia ligase",
"6.3.2.40\tCyclopeptine synthase",
"6.3.2.41\tN-acetylaspartylglutamate synthase",
"6.3.2.42\tN-acetylaspartylglutamylglutamate synthase",
- "6.3.2.43\t[Amino group carrier protein]--L-2-aminoadipate ligase",
+ "6.3.2.43\t[Amino-group carrier protein]--L-2-aminoadipate ligase",
"6.3.2.44\tPantoate--beta-alanine ligase (ADP-forming)",
"6.3.2.45\tUDP-N-acetylmuramate L-alanyl-gamma-D-glutamyl-meso-2,6-diaminoheptanedioate ligase",
"6.3.2.46\tFumarate--(S)-2,3-diaminopropanoate ligase",
"6.3.2.49\tL-alanine--L-anticapsin ligase",
"6.3.2.50\tTenuazonic acid synthetase",
"6.3.2.51\tPhosphopantothenate--cysteine ligase (ATP)",
- "6.3.2.52\tJasmonoyl--L-amino acid synthetase",
+ "6.3.2.52\tJasmonoyl--L-amino acid ligase",
"6.3.2.53\tUDP-N-acetylmuramoyl-L-alanine--L-glutamate ligase",
"6.3.2.54\tL-2,3-diaminopropanoate--citrate ligase",
"6.3.2.55\t2-((L-alanin-3-ylcarbamoyl)methyl)-3-(2-aminoethylcarbamoyl)-2-hydroxypropanoate synthase",
"7.1.1.6\tPlastoquinol--plastocyanin reductase",
"7.1.1.7\tUbiquinol oxidase (electrogenic, proton-motive force generating)",
"7.1.1.8\tQuinol--cytochrome-c reductase",
+ "7.1.1.9\tCytochrome-c oxidase",
"7.1.2.1\tP-type H(+)-exporting transporter",
"7.1.2.2\tH(+)-transporting two-sector ATPase",
"7.1.3.1\tH(+)-exporting diphosphatase",
"7.4.2.10\tABC-type glutathione transporter",
"7.4.2.11\tABC-type methionine transporter",
"7.4.2.12\tABC-type cystine transporter",
+ "7.4.2.13\tABC-type tyrosine transporter",
"7.5.2.1\tABC-type maltose transporter",
"7.5.2.2\tABC-type oligosaccharide transporter",
"7.5.2.3\tABC-type beta-glucan transporter",
1.1.1.417 3-beta-hydroxysteroid-4-beta-carboxylate 3-dehydrogenase (decarboxylating)
1.1.1.418 Plant 3-beta-hydroxysteroid-4-alpha-carboxylate 3-dehydrogenase (decarboxylating)
1.1.1.419 Nepetalactol dehydrogenase
+1.1.1.420 D-apiose dehydrogenase
+1.1.1.421 D-apionate oxidoisomerase
+1.1.1.422 Pseudoephedrine dehydrogenase
+1.1.1.423 Ephedrine dehydrogenase
1.1.1.n4 (-)-trans-carveol dehydrogenase
1.1.1.n5 3-methylmalate dehydrogenase
1.1.1.n11 Succinic semialdehyde reductase
1.1.2.7 Methanol dehydrogenase (cytochrome c)
1.1.2.8 Alcohol dehydrogenase (cytochrome c)
1.1.2.9 1-butanol dehydrogenase (cytochrome c)
+1.1.2.10 Lanthanide-dependent methanol dehydrogenase
1.1.3.4 Glucose oxidase
1.1.3.5 Hexose oxidase
1.1.3.6 Cholesterol oxidase
1.1.98.4 F420H(2):quinone oxidoreductase
1.1.98.5 Secondary-alcohol dehydrogenase (coenzyme-F420)
1.1.98.6 Ribonucleoside-triphosphate reductase (formate)
+1.1.98.7 Serine-type anaerobic sulfatase-maturating enzyme
1.1.99.1 Choline dehydrogenase
1.1.99.2 L-2-hydroxyglutarate dehydrogenase
1.1.99.3 Gluconate 2-dehydrogenase (acceptor)
1.2.1.100 5-formyl-3-hydroxy-2-methylpyridine 4-carboxylate 5-dehydrogenase
1.2.1.101 L-tyrosine reductase
1.2.1.102 Isopyridoxal dehydrogenase (5-pyridoxate-forming)
+1.2.1.103 [Amino-group carrier protein]-6-phospho-L-2-aminoadipate reductase
1.2.1.n2 Fatty acyl-CoA reductase
1.2.2.1 Formate dehydrogenase (cytochrome)
1.2.2.4 Carbon-monoxide dehydrogenase (cytochrome b-561)
1.3.1.31 2-enoate reductase
1.3.1.32 Maleylacetate reductase
1.3.1.33 Protochlorophyllide reductase
-1.3.1.34 2,4-dienoyl-CoA reductase (NADPH)
+1.3.1.34 2,4-dienoyl-CoA reductase ((2E)-enoyl-CoA-producing)
1.3.1.36 Geissoschizine dehydrogenase
1.3.1.37 Cis-2-enoyl-CoA reductase (NADPH)
1.3.1.38 Trans-2-enoyl-CoA reductase (NADPH)
1.3.1.96 Botryococcus squalene synthase
1.3.1.97 Botryococcene synthase
1.3.1.98 UDP-N-acetylmuramate dehydrogenase
-1.3.1.99 Iridoid synthase
1.3.1.100 Chanoclavine-I aldehyde reductase
1.3.1.101 2,3-bis-O-geranylgeranyl-sn-glycerol 1-phosphate reductase (NAD(P)H)
1.3.1.102 2-alkenal reductase (NADP(+))
1.3.1.118 Meromycolic acid enoyl-[acyl-carrier-protein] reductase
1.3.1.119 Chlorobenzene dihydrodiol dehydrogenase
1.3.1.120 Cyclohexane-1-carbonyl-CoA reductase (NADP(+))
+1.3.1.121 4-amino-4-deoxyprephenate dehydrogenase
+1.3.1.122 (S)-8-oxocitronellyl enol synthase
+1.3.1.123 7-epi-iridoid synthase
+1.3.1.124 2,4-dienoyl-CoA reductase ((3E)-enoyl-CoA-producing)
1.3.1.n3 Curcumin reductase
1.3.2.3 L-galactonolactone dehydrogenase
1.3.3.3 Coproporphyrinogen oxidase
1.3.8.12 (2S)-methylsuccinyl-CoA dehydrogenase
1.3.8.13 Crotonobetainyl-CoA reductase
1.3.8.14 L-prolyl-[peptidyl-carrier protein] dehydrogenase
+1.3.8.15 3-(aryl)acrylate reductase
1.3.98.1 Dihydroorotate oxidase (fumarate)
1.3.98.3 Coproporphyrinogen dehydrogenase
1.3.98.4 5a,11a-dehydrotetracycline reductase
1.8.4.12 Peptide-methionine (R)-S-oxide reductase
1.8.4.13 L-methionine (S)-S-oxide reductase
1.8.4.14 L-methionine (R)-S-oxide reductase
+1.8.4.15 Protein dithiol oxidoreductase (disulfide-forming)
+1.8.4.16 Thioredoxin:protein disulfide reductase
1.8.5.1 Glutathione dehydrogenase (ascorbate)
1.8.5.2 Thiosulfate dehydrogenase (quinone)
1.8.5.3 Respiratory dimethylsulfoxide reductase
1.8.5.6 Sulfite dehydrogenase (quinone)
1.8.5.7 Glutathionyl-hydroquinone reductase
1.8.5.8 Eukaryotic sulfide quinone oxidoreductase
+1.8.5.9 Protein dithiol:quinone oxidoreductase DsbB
1.8.7.1 Assimilatory sulfite reductase (ferredoxin)
1.8.7.2 Ferredoxin:thioredoxin reductase
1.8.7.3 Ferredoxin:CoB-CoM heterodisulfide reductase
1.8.98.4 Coenzyme F420:CoB-CoM heterodisulfide,ferredoxin reductase
1.8.98.5 H(2):CoB-CoM heterodisulfide,ferredoxin reductase
1.8.98.6 Formate:CoB-CoM heterodisulfide,ferredoxin reductase
+1.8.98.7 Cysteine-type anaerobic sulfatase-maturating enzyme
1.8.99.2 Adenylyl-sulfate reductase
1.8.99.5 Dissimilatory sulfite reductase
-1.9.3.1 Cytochrome-c oxidase
1.9.6.1 Nitrate reductase (cytochrome)
1.9.98.1 Iron--cytochrome-c reductase
1.10.1.1 Trans-acenaphthene-1,2-diol dehydrogenase
1.10.3.11 Ubiquinol oxidase (non-electrogenic)
1.10.3.15 Grixazone synthase
1.10.3.16 Dihydrophenazinedicarboxylate synthase
+1.10.3.17 Superoxide oxidase
1.10.5.1 Ribosyldihydronicotinamide dehydrogenase (quinone)
1.11.1.1 NADH peroxidase
1.11.1.2 NADPH peroxidase
1.11.1.12 Phospholipid-hydroperoxide glutathione peroxidase
1.11.1.13 Manganese peroxidase
1.11.1.14 Lignin peroxidase
-1.11.1.15 Peroxiredoxin
1.11.1.16 Versatile peroxidase
1.11.1.17 Glutathione amide-dependent peroxidase
1.11.1.18 Bromide peroxidase
1.11.1.21 Catalase peroxidase
1.11.1.22 Hydroperoxy fatty acid reductase
1.11.1.23 (S)-2-hydroxypropylphosphonic acid epoxidase
+1.11.1.24 Thioredoxin-dependent peroxiredoxin
+1.11.1.25 Glutaredoxin-dependent peroxiredoxin
+1.11.1.26 NADH-dependent peroxiredoxin
+1.11.1.27 Glutathione-dependent peroxiredoxin
+1.11.1.28 Lipoyl-dependent peroxiredoxin
+1.11.1.29 Mycoredoxin-dependent peroxiredoxin
1.11.2.1 Unspecific peroxygenase
1.11.2.2 Myeloperoxidase
1.11.2.3 Plant seed peroxygenase
1.14.11.67 [Histone H3]-trimethyl-L-lysine(4) demethylase
1.14.11.68 [Histone H3]-trimethyl-L-lysine(27) demethylase
1.14.11.69 [Histone H3]-trimethyl-L-lysine(36) demethylase
+1.14.11.70 7-deoxycylindrospermopsin hydroxylase
+1.14.11.71 Methylphosphonate hydroxylase
1.14.11.n2 Methylcytosine dioxygenase
1.14.11.n4 Ankyrin-repeat-histidine dioxagenase
1.14.12.1 Anthranilate 1,2-dioxygenase (deaminating, decarboxylating)
1.14.13.244 Phenol 2-monooxygenase (NADH)
1.14.13.245 Assimilatory dimethylsulfide S-monooxygenase
1.14.13.246 4-beta-methylsterol monooxygenase
+1.14.13.247 Stachydrine N-demethylase
1.14.13.n6 Hexahomomethionine N-hydroxylase
1.14.13.n7 4-nitrophenol 2-hydroxylase
1.14.14.1 Unspecific monooxygenase
1.14.18.9 4-alpha-methylsterol monooxygenase
1.14.18.10 Plant 4,4-dimethylsterol C-4-alpha-methyl-monooxygenase
1.14.18.11 Plant 4-alpha-monomethylsterol monooxygenase
+1.14.18.12 2-hydroxy fatty acid dioxygenase
1.14.19.1 Stearoyl-CoA 9-desaturase
1.14.19.2 Stearoyl-[acyl-carrier-protein] 9-desaturase
1.14.19.3 Acyl-CoA 6-desaturase
1.17.99.4 Uracil/thymine dehydrogenase
1.17.99.6 Epoxyqueuosine reductase
1.17.99.7 Formate dehydrogenase (acceptor)
+1.17.99.8 Limonene dehydrogenase
1.18.1.1 Rubredoxin--NAD(+) reductase
1.18.1.2 Ferredoxin--NADP(+) reductase
1.18.1.3 Ferredoxin--NAD(+) reductase
1.19.6.1 Nitrogenase (flavodoxin)
1.20.1.1 Phosphonate dehydrogenase
1.20.2.1 Arsenate reductase (cytochrome c)
-1.20.4.1 Arsenate reductase (glutaredoxin)
+1.20.4.1 Arsenate reductase (glutathione/glutaredoxin)
1.20.4.2 Methylarsonate reductase
1.20.4.3 Mycoredoxin
1.20.4.4 Arsenate reductase (thioredoxin)
2.1.1.360 [Histone H3]-lysine(79) N-trimethyltransferase
2.1.1.361 [Histone H4]-lysine(20) N-methyltransferase
2.1.1.362 [Histone H4]-N-methyl-L-lysine(20) N-methyltransferase
+2.1.1.363 Pre-sodorifen synthase
2.1.1.n1 Resorcinol O-methyltransferase
2.1.1.n4 Thiocyanate methyltransferase
2.1.1.n7 5-pentadecatrienyl resorcinol O-methyltransferase
2.1.1.n8 Small RNA 2'-O-methyltransferase
2.1.1.n11 Methylphosphotriester-DNA--[protein]-cysteine S-methyltransferase
2.1.2.1 Glycine hydroxymethyltransferase
-2.1.2.2 Phosphoribosylglycinamide formyltransferase
+2.1.2.2 Phosphoribosylglycinamide formyltransferase 1
2.1.2.3 Phosphoribosylaminoimidazolecarboxamide formyltransferase
2.1.2.4 Glycine formimidoyltransferase
2.1.2.5 Glutamate formimidoyltransferase
2.3.1.242 Kdo(2)-lipid IV(A) palmitoleoyltransferase
2.3.1.243 Lauroyl-Kdo(2)-lipid IV(A) myristoyltransferase
2.3.1.244 2-methylbutanoate polyketide synthase
-2.3.1.245 3-hydroxy-5-phosphonooxypentane-2,4-dione thiolase
+2.3.1.245 3-hydroxy-5-phosphooxypentane-2,4-dione thiolase
2.3.1.246 3,5-dihydroxyphenylacetyl-CoA synthase
2.3.1.247 3-keto-5-aminohexanoate cleavage enzyme
2.3.1.248 Spermidine disinapoyl transferase
2.3.1.291 Sphingoid base N-palmitoyltransferase
2.3.1.292 (Phenol)carboxyphthiodiolenone synthase
2.3.1.293 Meromycolic acid 3-oxoacyl-(acyl carrier protein) synthase I
+2.3.1.294 Meromycolic acid 3-oxoacyl-(acyl carrier protein) synthase II
+2.3.1.295 Mycoketide-CoA synthase
2.3.1.296 Omega-hydroxyceramide transacylase
2.3.1.297 Very-long-chain ceramide synthase
2.3.1.298 Ultra-long-chain ceramide synthase
2.3.2.30 L-ornithine N(alpha)-acyltransferase
2.3.2.31 RBR-type E3 ubiquitin transferase
2.3.2.32 Cullin-RING-type E3 NEDD8 transferase
+2.3.2.33 RCR-type E3 ubiquitin transferase
2.3.3.1 Citrate (Si)-synthase
2.3.3.2 Decylcitrate synthase
2.3.3.3 Citrate (Re)-synthase
2.4.1.368 Oleanolate 3-O-glucosyltransferase
2.4.1.369 Enterobactin C-glucosyltransferase
2.4.1.370 Inositol phosphorylceramide mannosyltransferase
+2.4.1.371 Polymannosyl GlcNAc-diphospho-ditrans,octacis-undecaprenol 2,3-alpha-mannosylpolymerase
+2.4.1.372 Mutansucrase
+2.4.1.373 Alpha-(1->2) branching sucrase
+2.4.1.374 Beta-1,2-mannooligosaccharide synthase
2.4.1.n2 Loliose synthase
2.4.2.1 Purine-nucleoside phosphorylase
2.4.2.2 Pyrimidine-nucleoside phosphorylase
2.4.2.60 Cysteine-dependent adenosine diphosphate thiazole synthase
2.4.2.61 Alpha-dystroglycan beta-1,4-xylosyltransferase
2.4.2.n2 Glucoside xylosyltransferase
-2.4.2.n3 Xyloside xylosyltransferase
+2.4.2.n3 Xylosyl alpha-1,3-xylosyltransferase
2.4.99.1 Beta-galactoside alpha-(2,6)-sialyltransferase
2.4.99.2 Beta-D-galactosyl-(1->3)-N-acetyl-beta-D-galactosaminide alpha-2,3-sialyltransferase
2.4.99.3 Alpha-N-acetylgalactosaminide alpha-2,6-sialyltransferase
2.6.1.115 5-hydroxydodecatetraenal 1-aminotransferase
2.6.1.116 6-aminohexanoate aminotransferase
2.6.1.117 L-glutamine--4-(methylsulfanyl)-2-oxobutanoate aminotransferase
+2.6.1.118 [Amino group carrier protein]-gamma-(L-lysyl)-L-glutamate aminotransferase
2.6.3.1 Oximinotransferase
2.6.99.1 dATP(dGTP)--DNA purinetransferase
2.6.99.2 Pyridoxine 5'-phosphate synthase
2.7.1.45 2-dehydro-3-deoxygluconokinase
2.7.1.46 L-arabinokinase
2.7.1.47 D-ribulokinase
-2.7.1.48 Uridine kinase
+2.7.1.48 Uridine/cytidine kinase
2.7.1.49 Hydroxymethylpyrimidine kinase
2.7.1.50 Hydroxyethylthiazole kinase
2.7.1.51 L-fuculokinase
2.7.1.144 Tagatose-6-phosphate kinase
2.7.1.145 Deoxynucleoside kinase
2.7.1.146 ADP-specific phosphofructokinase
-2.7.1.147 ADP-specific glucokinase
+2.7.1.147 ADP-specific glucose/glucosamine kinase
2.7.1.148 4-(cytidine 5'-diphospho)-2-C-methyl-D-erythritol kinase
2.7.1.149 1-phosphatidylinositol-5-phosphate 4-kinase
2.7.1.150 1-phosphatidylinositol-3-phosphate 5-kinase
2.7.1.227 Inositol phosphorylceramide synthase
2.7.1.228 Mannosyl-inositol-phosphoceramide inositolphosphotransferase
2.7.1.229 Deoxyribokinase
+2.7.1.230 Amicoumacin kinase
2.7.2.1 Acetate kinase
2.7.2.2 Carbamate kinase
2.7.2.3 Phosphoglycerate kinase
2.7.2.13 Glutamate 1-kinase
2.7.2.14 Branched-chain-fatty-acid kinase
2.7.2.15 Propionate kinase
+2.7.2.16 2-phosphoglycerate kinase
+2.7.2.17 [Amino-group carrier protein]-L-2-aminoadipate 6-kinase
2.7.3.1 Guanidinoacetate kinase
2.7.3.2 Creatine kinase
2.7.3.3 Arginine kinase
2.8.3.23 Caffeate CoA-transferase
2.8.3.24 (R)-2-hydroxy-4-methylpentanoate CoA-transferase
2.8.3.25 Bile acid CoA-transferase
+2.8.3.26 Succinyl-CoA:mesaconate CoA transferase
2.8.4.1 Coenzyme-B sulfoethylthiotransferase
2.8.4.2 Arsenate-mycothiol transferase
2.8.4.3 tRNA-2-methylthio-N(6)-dimethylallyladenosine synthase
2.8.5.2 L-cysteine S-thiosulfotransferase
2.9.1.1 L-seryl-tRNA(Sec) selenium transferase
2.9.1.2 O-phospho-L-seryl-tRNA(Sec):L-selenocysteinyl-tRNA synthase
+2.9.1.3 tRNA 2-selenouridine synthase
2.10.1.1 Molybdopterin molybdotransferase
3.1.1.1 Carboxylesterase
3.1.1.2 Arylesterase
3.1.3.104 5-amino-6-(5-phospho-D-ribitylamino)uracil phosphatase
3.1.3.105 N-acetyl-D-muramate 6-phosphate phosphatase
3.1.3.106 2-lysophosphatidate phosphatase
+3.1.3.107 Amicoumacin phosphatase
+3.1.3.108 Nocturnin
3.1.4.1 Phosphodiesterase I
3.1.4.2 Glycerophosphocholine phosphodiesterase
3.1.4.3 Phospholipase C
3.1.11.4 Exodeoxyribonuclease (phage SP3-induced)
3.1.11.5 Exodeoxyribonuclease V
3.1.11.6 Exodeoxyribonuclease VII
-3.1.11.7 Adenosine-5'-diphospho-5'-(DNA) diphosphatase
-3.1.11.8 Guaosine-5'-diphospho-5'-(DNA) diphosphatase
3.1.12.1 5' to 3' exodeoxyribonuclease (nucleoside 3'-phosphate-forming)
-3.1.12.2 DNA-3'-diphospho-5'-guanosine diphosphatase
3.1.13.1 Exoribonuclease II
3.1.13.2 Exoribonuclease H
3.1.13.3 Oligonucleotidase
3.1.26.12 Ribonuclease E
3.1.26.13 Retroviral ribonuclease H
3.1.26.n2 Argonaute-2
-3.1.27.3 Ribonuclease T(1)
3.1.27.7 Ribonuclease F
3.1.27.8 Ribonuclease V
3.1.30.1 Aspergillus nuclease S(1)
3.2.1.41 Pullulanase
3.2.1.42 GDP-glucosidase
3.2.1.43 Beta-L-rhamnosidase
-3.2.1.44 Fucoidanase
3.2.1.45 Glucosylceramidase
3.2.1.46 Galactosylceramidase
3.2.1.47 Galactosylgalactosylglucosylceramidase
3.2.1.152 Mannosylglycoprotein endo-beta-mannosidase
3.2.1.153 Fructan beta-(2,1)-fructosidase
3.2.1.154 Fructan beta-(2,6)-fructosidase
-3.2.1.155 Xyloglucan-specific exo-beta-1,4-glucanase
+3.2.1.155 Xyloglucan-specific endo-processive beta-1,4-glucanase
3.2.1.156 Oligosaccharide reducing-end xylanase
3.2.1.157 Iota-carrageenase
3.2.1.158 Alpha-agarase
3.2.1.208 Glucosylglycerate hydrolase
3.2.1.209 Endoplasmic reticulum Man(9)GlcNAc(2) 1,2-alpha-mannosidase
3.2.1.210 Endoplasmic reticulum Man(8)GlcNAc(2) 1,2-alpha-mannosidase
+3.2.1.211 Endo-(1->3)-fucoidanase
+3.2.1.212 Endo-(1->4)-fucoidanase
+3.2.1.213 Galactan exo-1,6-beta-galactobiohydrolase (non-reducing end)
3.2.1.n1 Blood group B branched chain alpha-1,3-galactosidase
3.2.1.n2 Blood group B linear chain alpha-1,3-galactosidase
3.2.1.n3 Dictyostelium lysozyme A
3.4.17.21 Glutamate carboxypeptidase II
3.4.17.22 Metallocarboxypeptidase D
3.4.17.23 Angiotensin-converting enzyme 2
+3.4.17.24 Tubulin-glutamate carboxypeptidase
3.4.18.1 Cathepsin X
3.4.19.1 Acylaminoacyl-peptidase
3.4.19.2 Peptidyl-glycinamidase
3.5.1.107 Maleamate amidohydrolase
3.5.1.108 UDP-3-O-acyl-N-acetylglucosamine deacetylase
3.5.1.109 Sphingomyelin deacylase
-3.5.1.110 Peroxyureidoacrylate/ureidoacrylate amidohydrolase
+3.5.1.110 Ureidoacrylate amidohydrolase
3.5.1.111 2-oxoglutaramate amidase
3.5.1.112 2'-N-acetylparomamine deacetylase
3.5.1.113 2'''-acetyl-6'''-hydroxyneomycin C deacetylase
3.5.1.127 Jasmonoyl-L-amino acid hydrolase
3.5.1.128 Deaminated glutathione amidase
3.5.1.129 N(5)-(cytidine 5'-diphosphoramidyl)-L-glutamine hydrolase
-3.5.1.130 [Lysine-biosynthesis-protein LysW]-lysine/ornithine hydrolase
+3.5.1.130 [Amino group carrier protein]-lysine hydrolase
3.5.1.131 1-carboxybiuret hydrolase
+3.5.1.132 [Amino group carrier protein]-ornithine hydrolase
3.5.1.133 N(alpha)-acyl-L-glutamine aminoacylase
3.5.1.134 (Indol-3-yl)acetyl-L-aspartate hydrolase
3.5.1.n3 4-deoxy-4-formamido-L-arabinose-phosphoundecaprenol deformylase
3.6.1.66 XTP/dITP diphosphatase
3.6.1.67 Dihydroneopterin triphosphate diphosphatase
3.6.1.68 Geranyl diphosphate phosphohydrolase
+3.6.1.69 8-oxo-(d)GTP phosphatase
+3.6.1.70 Guaosine-5'-diphospho-5'-(DNA) diphosphatase
+3.6.1.71 Adenosine-5'-diphospho-5'-(DNA) diphosphatase
+3.6.1.72 DNA-3'-diphospho-5'-guanosine diphosphatase
+3.6.1.73 Inosine/xanthosine triphosphatase
3.6.1.n1 D-tyrosyl-tRNA(Tyr) hydrolase
3.6.1.n2 L-cysteinyl-tRNA(Pro)
3.6.1.n3 L-cysteinyl-tRNA(Cys) hydrolase
3.7.1.23 Maleylpyruvate hydrolase
3.7.1.24 2,4-diacetylphloroglucinol hydrolase
3.7.1.25 2-hydroxy-6-oxohepta-2,4-dienoate hydrolase
+3.7.1.26 2,4-didehydro-3-deoxy-L-rhamnonate hydrolase
3.8.1.1 Alkylhalidase
3.8.1.2 (S)-2-haloacid dehalogenase
3.8.1.3 Haloacetate dehalogenase
3.13.1.6 [CysO sulfur-carrier protein]-S-L-cysteine hydrolase
3.13.1.7 Carbonyl sulfide hydrolase
3.13.1.8 S-adenosyl-L-methionine hydrolase (adenosine-forming)
+3.13.1.9 S-inosyl-L-homocysteine hydrolase
4.1.1.1 Pyruvate decarboxylase
4.1.1.2 Oxalate decarboxylase
4.1.1.4 Acetoacetate decarboxylase
4.1.1.116 D-ornithine/D-lysine decarboxylase
4.1.1.117 2-((L-alanin-3-ylcarbamoyl)methyl)-2-hydroxybutanedioate decarboxylase
4.1.1.118 Isophthalyl-CoA decarboxylase
+4.1.1.119 Phenylacetate decarboxylase
4.1.2.2 Ketotetrose-phosphate aldolase
4.1.2.4 Deoxyribose-phosphate aldolase
4.1.2.5 L-threonine aldolase
4.1.2.36 Lactate aldolase
4.1.2.38 Benzoin aldolase
4.1.2.40 Tagatose-bisphosphate aldolase
-4.1.2.41 Vanillin synthase
4.1.2.42 D-threonine aldolase
4.1.2.43 3-hexulose-6-phosphate synthase
4.1.2.44 2,3-epoxybenzoyl-CoA dihydrolase
4.1.2.58 2-dehydro-3,6-dideoxy-6-sulfogluconate aldolase
4.1.2.59 Dihydroneopterin phosphate aldolase
4.1.2.60 Dihydroneopterin triphosphate aldolase
+4.1.2.61 Feruloyl-CoA hydratase/lyase
4.1.2.n2 2-hydroxyphytanoyl-CoA lyase
4.1.3.1 Isocitrate lyase
4.1.3.3 N-acetylneuraminate lyase
4.2.1.84 Nitrile hydratase
4.2.1.85 Dimethylmaleate hydratase
4.2.1.87 Octopamine dehydratase
-4.2.1.88 (R)-synephrine
+4.2.1.88 Synephrine dehydratase
4.2.1.90 L-rhamnonate dehydratase
4.2.1.91 Arogenate dehydratase
4.2.1.92 Hydroperoxide dehydratase
4.2.2.24 Rhamnogalacturonan exolyase
4.2.2.25 Gellan lyase
4.2.2.26 Oligo-alginate lyase
+4.2.2.27 Pectin monosaccharide-lyase
4.2.2.n1 Peptidoglycan lytic exotransglycosylase
4.2.2.n2 Peptidoglycan lytic endotransglycosylase
4.2.3.1 Threonine synthase
4.2.3.131 Miltiradiene synthase
4.2.3.132 Neoabietadiene synthase
4.2.3.133 Alpha-copaene synthase
-4.2.3.134 5-phosphonooxy-L-lysine phospho-lyase
+4.2.3.134 5-phosphooxy-L-lysine phospho-lyase
4.2.3.135 Delta(6)-protoilludene synthase
4.2.3.136 Alpha-isocomene synthase
4.2.3.137 (E)-2-epi-beta-caryophyllene synthase
4.6.1.21 Enterobacter ribonuclease
4.6.1.22 Bacillus subtilis ribonuclease
4.6.1.23 Ribotoxin
+4.6.1.24 Ribonuclease T(1)
+4.6.1.25 Bacteriophage T(4) restriction endoribonuclease RegB
4.7.1.1 Alpha-D-ribose 1-methylphosphonate 5-phosphate C-P-lyase
4.99.1.1 Protoporphyrin ferrochelatase
4.99.1.2 Alkylmercury lyase
5.3.1.29 Ribose 1,5-bisphosphate isomerase
5.3.1.30 5-deoxy-glucuronate isomerase
5.3.1.31 Sulfoquinovose isomerase
-5.3.1.32 (4S)-4-hydroxy-5-phosphonooxypentane-2,3-dione isomerase
+5.3.1.32 (4S)-4-hydroxy-5-phosphooxypentane-2,3-dione isomerase
5.3.1.33 L-erythrulose 1-phosphate isomerase
5.3.1.34 D-erythrulose 4-phosphate isomerase
5.3.1.35 2-dehydrotetronate isomerase
5.5.1.31 Hapalindole H synthase
5.5.1.32 12-epi-hapalindole U synthase
5.5.1.33 12-epi-fischerindole U synthase
+5.5.1.34 (+)-cis,trans-nepetalactol synthase
+5.5.1.35 (+)-cis,cis-nepetalactol synthase
5.6.1.1 Microtubule-severing ATPase
5.6.1.2 Dynein ATPase
5.6.1.3 Plus-end-directed kinesin ATPase
6.2.1.58 Isophthalate--CoA ligase
6.2.1.59 Long-chain fatty acid adenylase/transferase FadD26
6.2.1.60 Marinolic acid--CoA ligase
+6.2.1.61 Salicylate--[aryl-carrier protein] ligase
+6.2.1.62 3,4-dihydroxybenzoate--[aryl-carrier protein] ligase
+6.2.1.63 L-arginine--[L-arginyl-carrier protein] ligase
6.2.1.n2 Amino acid--[acyl-carrier-protein] ligase
6.2.1.n3 Malonate--CoA ligase
6.3.1.1 Aspartate--ammonia ligase
6.3.2.40 Cyclopeptine synthase
6.3.2.41 N-acetylaspartylglutamate synthase
6.3.2.42 N-acetylaspartylglutamylglutamate synthase
-6.3.2.43 [Amino group carrier protein]--L-2-aminoadipate ligase
+6.3.2.43 [Amino-group carrier protein]--L-2-aminoadipate ligase
6.3.2.44 Pantoate--beta-alanine ligase (ADP-forming)
6.3.2.45 UDP-N-acetylmuramate L-alanyl-gamma-D-glutamyl-meso-2,6-diaminoheptanedioate ligase
6.3.2.46 Fumarate--(S)-2,3-diaminopropanoate ligase
6.3.2.49 L-alanine--L-anticapsin ligase
6.3.2.50 Tenuazonic acid synthetase
6.3.2.51 Phosphopantothenate--cysteine ligase (ATP)
-6.3.2.52 Jasmonoyl--L-amino acid synthetase
+6.3.2.52 Jasmonoyl--L-amino acid ligase
6.3.2.53 UDP-N-acetylmuramoyl-L-alanine--L-glutamate ligase
6.3.2.54 L-2,3-diaminopropanoate--citrate ligase
6.3.2.55 2-((L-alanin-3-ylcarbamoyl)methyl)-3-(2-aminoethylcarbamoyl)-2-hydroxypropanoate synthase
7.1.1.6 Plastoquinol--plastocyanin reductase
7.1.1.7 Ubiquinol oxidase (electrogenic, proton-motive force generating)
7.1.1.8 Quinol--cytochrome-c reductase
+7.1.1.9 Cytochrome-c oxidase
7.1.2.1 P-type H(+)-exporting transporter
7.1.2.2 H(+)-transporting two-sector ATPase
7.1.3.1 H(+)-exporting diphosphatase
7.4.2.10 ABC-type glutathione transporter
7.4.2.11 ABC-type methionine transporter
7.4.2.12 ABC-type cystine transporter
+7.4.2.13 ABC-type tyrosine transporter
7.5.2.1 ABC-type maltose transporter
7.5.2.2 ABC-type oligosaccharide transporter
7.5.2.3 ABC-type beta-glucan transporter
-/* $Id: gc.inc 585639 2019-05-01 19:41:04Z fukanchi $
+/* $Id: gc.inc 610069 2020-06-10 17:10:47Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
" sncbieaa \"--*-------**--*-----------------M--M---------------M------------\"",
" } ,",
" {",
- " name \"Pterobranchia Mitochondrial\" ,",
+ " name \"Rhabdopleuridae Mitochondrial\" ,",
" id 24 ,",
" ncbieaa \"FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG\",",
" sncbieaa \"---M------**-------M---------------M---------------M------------\"",
-- readability at the suggestion of Peter Rice, EMBL
-- Later additions by Taxonomy Group staff at NCBI
--
+-- Version 4.6
+-- Renamed genetic code 24 to Rhabdopleuridae Mitochondrial
+--
-- Version 4.5
-- Added Cephalodiscidae mitochondrial genetic code 33
--
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
- name "Pterobranchia Mitochondrial" ,
+ name "Rhabdopleuridae Mitochondrial" ,
id 24 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
sncbieaa "---M------**-------M---------------M---------------M------------"
-/* $Id: institution_codes.inc 607542 2020-05-05 14:51:12Z ivanov $
+/* $Id: institution_codes.inc 616908 2020-09-22 18:24:46Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
*/
static const char* const kInstitutionCollectionCodeList[] = {
-"# $Id: institution_codes.inc 607542 2020-05-05 14:51:12Z ivanov $",
+"# $Id: institution_codes.inc 616908 2020-09-22 18:24:46Z ivanov $",
"A\ts\tArnold Arboretum, Harvard University\t\t\t",
"AA\ts\tMinistry of Science, Academy of Sciences\t\t\t",
"AAC\tc\tArignar Anna College\t\t\t",
"ABTRI\tc\tApex Biotechnology Training and Research Institute\t\t\t",
"ABU<NGA>\ts\tAhmadu Bello University Herbarium\t\t\t",
"AC\ts\tAmherst College\t\t\t",
-"ACA\ts\tAgricultural University of Athens\t\t\t",
"ACA-DC\tc\tGreek Coordinated Collections of Microorganisms\t\t\t",
"ACAD\ts\tAcadia University, K. C. Irving Environmental Science Centre & Harriet Irving Botanical Gardens\t\t\t",
"ACAD<AUS>\tsb\tAustralian Centre for Ancient DNA\t\t\t",
"ACAM\tc\tThe Australian Collection of Antarctic Microorganisms, Cooperative Research Center for the Antarctic and Southern Ocean Environment\t\t\t",
+"ACAM<GRC>\ts\tAgricultural University of Athens\tACA\t\t",
"ACAP\ts\tAquaculture Center of Aomori Prefecture\t\t\t",
"ACBC\ts\tAgriculture Canada Research Station\t\t\t",
"ACBR\tc\tAustrian Center of Biological Resources and Applied Mycology\t\t\t",
"BCCM/ITM\tc\tBelgian Coordinated Collections of Microorganisms / ITM Mycobacteria Collection\tITM\t\t",
"BCCM/LMG\tc\tBelgian Coordinated Collections of Microorganisms/ LMG Bacteria Collection\tLMG\thttp://bccm.belspo.be/catalogues/lmg-strain-details?NUM=\t",
"BCCM/MUCL\tc\tBelgian Coordinated Collections of Microorganisms / MUCL Agro-food & Environmental Fungal Collection\tMUCL\t\t",
-"BCCM/ULC\tc\tBelgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection\tULC\thttp://bccm.belspo.be/catalogues/ulc-strain-details?ACCESSION_NUMBER=\t",
+"BCCM/ULC\tc\tBelgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection\tULC\thttps://bccm.belspo.be/catalogues/bm-details?accession_number=\t",
"BCCN\tc\tBrucella Culture Collection\t\t\t",
"BCCUSP\tc\tBrazilian Cyanobacteria Collection - University of Sao Paulo\t\t\t",
"BCF\ts\tUniversitat de Barcelona, Laboratori de Botanica\t\t\t",
"BPBM\ts\tBernice P. Bishop Museum\tBISHOP\t\t",
"BPBM:Fish\ts\tBernice P. Bishop Museum, Fish Collection\t\t",
"BPBM:IZ\ts\tBernice P. Bishop Museum, Invertebrate Zoology\t\t",
-"BPI\tsc\tU.S. National Fungus Collections, Systematic Botany and Mycology Laboratory\t\thttp://nt.ars-grin.gov/fungaldatabases/specimens/new_rptSpecimenOneRec.cfm?thisrec=BPI+&spec;\t",
+"BPI\tsc\tU.S. National Fungus Collections, Systematic Botany and Mycology Laboratory\t\t\t",
"BPI<ZAF>\ts\tBernard Price Institute for Palaeontological Research\t\t\t",
"BPIC\tc\tBenaki Phytopathological Institute Collection\t\t\t",
"BPL\ts\tMuseum of Barnstaple & North Devon\t\t\t",
"CCM\tc\tCzech Collection of Microorganisms\tCCM<CZE>\t\t",
"CCM-A\tc\tColeccion de Cultivos Microbianos\t\t\t",
"CCM-CIBE\tc\tEscuela Superior Politecnica del Litoral\t\t\t",
+"CCM-UFV\tc\tCollection of Cyanobacteria and Microalgae at the Universidade Federal de Vicosa\t\t\t",
"CCM<CHN>\ts\tChangchun College of Traditional Chinese Medicine, Department of Chinese Materia Medica\t\t\t",
"CCM<USA-MT>\ts\tCarter County Museum\t\t\t",
"CCMA-UFSCar\tc\tCulture Collection of Freshwater Microalgae\t\t\t",
"CGEC\ts\tChina Entomological Research Institute\t\t\t",
"CGG\ts\tCambridge University Botanic Garden\t\t\t",
"CGH\ts\tNational Museum of Prague\t\t\t",
-"CGMCC\tc\tChina General Microbiological Culture Collection Center\tAS\t\t",
+"CGMCC\tc\tChina General Microbiological Culture Collection Center\tAS\thttp://www.cgmcc.net/english/cata.php?stn=CGMCC%20\t",
"CGMS\ts\tUniversidade Federal de Mato Grosso do Sul, Departamento de Biologia\t\t\t",
"CGN\ts\tCentre for Genetic Resources, The Netherlands\t\t\t",
"CGRIS\tb\tChinese Crop Germplasm Resources Information Network\t\t\t",
"CHELB\ts\tCheltenham College for Boys\t\t\t",
"CHEP\ts\tEscuela Superior Politecnica del Chimborazo\t\t\t",
"CHER\ts\tYu. Fedcovich Chernivtsi State University, Botany Department\t\t\t",
+"CHFC-EA\tsc\tChilean Fungal Collection\tCHFC,ChFC\t\t",
"CHFD\ts\tChelmsford and Essex Museum\t\t\t",
"CHI\ts\tUniversity of Illinois, Biological Sciences Department\t\t\t",
"CHIA\ts\tNational Chiayi Agricultural College, Forestry Department\t\t\t",
"CIBM\ts\tCentro Invest. Biol. Noroeste\t\t\t",
"CIC\ts\tAlbertson College of Idaho, Biology Department\t\t\t",
"CICC\tc\tChina Center for Industrial Culture Collection\t\t\t",
-"CICCM\tc\tCawthron Institute Culture Collection of Micro-algae\t\t\t",
+"CICCM\tc\tCawthron Institute Culture Collection of Micro-algae\tCAWD\t\t",
"CICESE\ts\tCentro de Investigacion Cientifica y de Educacion Superior de Ensenada\t\t\t",
"CICIM\tc\tCulture and Information Centre of Industrial Microorganisms of China's Univeristies\t\t\t",
"CICIMAR\ts\tCentro Interdisciplinario de Ciencias Marinas\t\t\t",
"CSAT\ts\tColegio de Postgraduados, Campus Tabasco\t\t\t",
"CSAU\ts\tNational Agrarian University, Southern Branch \"Crimean Agrotechnological University\", Department of Botany, Plant Physiology and Genetics\t\t\t",
"CSB\ts\tSt. John's University/College of Saint Benedict, Biology Department\t\t\t",
+"CSBD\ts\tCentre for Study of Biological Diversity\t\t\t",
"CSC\ts\tColegio del Sagrado Corazon\t\t\t",
"CSC-CLCH\tc\tCentro Substrati Cellulari, Cell Lines Collection and Hybridomas\t\t\t",
"CSCA\ts\tCalifornia State Collection of Arthropods\t\t\t",
"CTNRC\ts\tThai National Reference Collections\t\t\t",
"CTR\ts\tCharles T. Ramsden historical collection\t\t\t",
"CTS\ts\tChongqing Teachers College\t\t\t",
+"CTUA\ts\tColección Teriológica de la Universidad de Antioquia\t\t\t",
"CTY\ts\tCanterbury Literary and Philosophical Institution\t\t\t",
"CU\tsb\tCornell University\t\t\t",
"CUAC\ts\tClemson University\t\t\t",
"DKG\ts\tJuniper Hall Field Centre\t\t\t",
"DLF\ts\tStetson University, Biology Department\t\t\t",
"DLU\ts\tDa Lat University\t\t\t",
+"DLUCC\tc\tDali University Culture Collection\t\t\t",
"DLY\ts\tDudley and Midland Geological and Scientific Society and Field Club\t\t\t",
"DM<NZ>\ts\tDominion Museum\t\t\t",
"DM<USA-UT>\ts\tThe Dinosaur Museum\t\t\t",
"ECK\ts\tBuffalo State College\t\t\t",
"ECM\ts\tHubei College of Traditional Chinese Medicine, Department of Chinese Materia Medica\t\t\t",
"ECNB\ts\tEscuela Nacional Ciencias\t\t\t",
+"ECNU\ts\tMuseum of Biology, East China Normal University, School of Life Sciences\t\t\t",
"ECOCHM\ts\tColeccion de Mamiferos del Museo de Zoologia-ECOSUR\t\t\t",
"ECOL\ts\tCollection du Laborataire d'Ecologie\t\t\t",
"ECOMAR<FRA>\ts\tECOMAR lab University of Reunion\t\t\t",
"EELM\ts\tEstacion Experimental Agricola de la Molina\t\t\t",
"EERU\ts\tEconomic Entomology Research Unit\t\t\t",
"EFC\ts\tEscola de Florestas\t\t\t",
-"EFCC\ts\tEpping Forest Conservation Centre\t\t\t",
+"EFCC\tc\tEntomopathogenic Fungal Collection\t\t\t",
"EFH\ts\tForestry Commission\t\t\t",
"EFM\ts\tEpping Forest Museum, Corporation of London\t\t\t",
"EFWM\ts\tDepartment of Entomology\t\t\t",
"F\ts\tField Museum of Natural History, Botany Department\tFMNH:F\t\t",
"FAA\ts\tUniversidad Nacional del Centro de la Provincia de Buenos Aires\t\t\t",
"FABR\ts\tHarmas de J. H. Fabre\t\t\t",
-"FACHB\tc\tFreshwater Algae Culture Collection\t\t\t",
+"FACHB\tc\tFreshwater Algae Culture Collection\tCHAB<China> \t\t",
"FACS\ts\tFujian Agricultural College\t\t\t",
"FAK\ts\tDepartment of Fisheries, Faculty of Agriculture\t\t\t",
"FAKOU\ts\tFaculty of Agriculture, Kochi Univerisity\t\t\t",
"LECB\ts\tSaint Petersburg State University, Botany Department\t\t\t",
"LEDLIE\ts\tPatricia Ledlie Herbarium\t\t\t",
"LEF\ts\tEconomic Forestry Institute of Liaoning Province\t\t\t",
+"LEGEcc\tc\tBlue Biotechnology and Ecotoxicology Culture Collection\t\t\t",
"LEH\ts\tLehigh University\t\t\t",
"LEI\ts\tLeicester Literary and Philosophical Society\t\t\t",
"LEISHCRYOBANK\tc\tInternational Cryobank of Leishmania\t\t\t",
"MADM\ts\tMuseu Municipal do Funchal\t\t\t",
"MADS\ts\tMuseu de Historia Natural do Seminario do Funchal\t\t\t",
"MAF\ts\tUniversidad Complutense, Departamento de Biologia Vegetal II\t\t\t",
-"MAFF\tc\tMAFF Genebank, Ministry of Agriculture Forestry and Fisheries\t\t\t",
+"MAFF\tc\tMAFF Genebank, Ministry of Agriculture Forestry and Fisheries\t\thttps://www.gene.affrc.go.jp/databases-micro_search_detail_en.php?maff=\t",
"MAFF<FJI>\ts\tColo-i-Suva Silvicultural Station\t\t\t",
"MAFI\ts\tMagyar Allami Foeldtani Intezet, Budapest - Hungarian Geological Survey\t\t\t",
"MAFST\ts\tInstituto Forestal de la Moncloa\t\t\t",
"MDTN\ts\tMiddleton Botanical Society\t\t\t",
"MDUG\ts\tUniversidad Guanajuato, Museo Alfredo Duges\t\t\t",
"MDZAU\ts\tMuseum Deptartment of Zoology\t\t\t",
+"MEAN\tc\tMicoteca da Estacao Agronomica Nacional\t\t\t",
"MECB\ts\tUniversidade Federal de Pelotas, Museu Entomologico Ceslau Biezanko\t\t\t",
"MECG\ts\tMedical Entomology Collection Gallery\t\t\t",
"MECN\ts\tMuseo Ecuadoriano de Ciencias Naturales\tDHMECN\t\t",
"MU<TUR>\tc\tMugla Sitki Kocman University\t\t\t",
"MU<USA-OH>\ts\tMiami University, Botany Department, Willard Sherman Turrell Herbarium\t\thttp://herbarium.muohio.edu/herbariummu/\t",
"MU<USA-TX>\ts\tMidwestern University\t\t\t",
+"MUA-AVP\ts\tMuseo Universitario de la Universidad de Antioquia\t\t\t",
"MUACC\tc\tMurdoch University Algal Culture Collection\t\t\t",
"MUAF\tc\tCulture collection of Mendel University of Agriculture and Forestry in Brno\t\t\t",
"MUAP\ts\tMuseo del Mar Universidad Arturo Prat\t\t\t",
"MZCR\ts\tMuseo de Zoologia\t\t\t",
"MZFC\ts\tMuseo de Zoologia \"Alfonso L. Herrera\"\t\t\t",
"MZFN\ts\tMuseo Zoologico dell'Universita \"Federico II\"\t\t\t",
+"MZFS-DAR\ts\tMuseu de Zoologia da Universidade Estadual de Feira de Santana\t\t\t",
"MZGZ\ts\tMuseum Zoologia del Giardino Zoologico\t\t\t",
"MZH\ts\tZoolgical Museum, Finnish Museum of Natural History\t\t\t",
"MZKI\tc\tMicrobial Culture Collection of National Institute of Chemistry\t\t\t",
"NLU\ts\tUniversity of Louisiana at Monroe, Museum of Natural History\t\t\t",
"NLUH\ts\tUniversity of the Philippines College Baguio\t\t\t",
"NM\ts\tNorthern Michigan University, Biology Department\t\t\t",
+"NMA\ts\tNational Museum Australia -\t\t\t",
"NMAC\ts\tInner Mongolia Agricultural University, Department of Pratacultural Science\t\t\t",
"NMAG\ts\tNaturhistorisches Museum, Augsburg\t\t\t",
"NMB\tc\tNingbo Marine Biotechnology\t\t\t",
"NRC<EGY>\ts\tNational Research Centre\t\t\t",
"NRCC\ts\tNational Research Council of Canada\t\t\t",
"NRCS\tc\tNational Reference Center for Streptococci in Aachen\t\t\t",
+"NRI\ts\tTexas A&M Natural Resources Institute\t\t\t",
"NRIBAS\ts\tNational Research Institute of Biology, Academia Sinica\t\t\t",
"NRIC\tc\tNODAI Research Institute Culture Collection\t\t\t",
"NRL\tc\tNeisseria Reference Laboratory\t\t\t",
-"NRM\ts\tSwedish Museum of Natural History\t\t\t",
+"NRM\ts\tSwedish Museum of Natural History\tSMNH\t\t",
"NRN\ts\tNairn Literary Society Library, Public Library\t\t\t",
"NRNZ\ts\tNorthland Regional Museum\t\t\t",
"NRPSU\tc\tDepartment of Agro-industry, Faculty of Natural Resources\t\t\t",
"NTOU\tsc\tInstitute of Marine Biology, National Taiwan Ocean University\t\t\t",
"NTS\ts\tNevada Operations Office, U.S. Department of Energy\t\t\t",
"NTSC\ts\tUniversity of North Texas, Biological Sciences Department\t\t\t",
+"NTUCC\tc\tPlant Pathology and Microbiology, National Taiwan University Culture Collection\t\t\t",
"NTUF\ts\tNational Taiwan University, Forestry Department\t\t\t",
-"NTUM\ts\tNational Taiwan University\t\t\t",
-"NTUMA\ts\tNational Taiwan University\t\t\t",
+"NTUH\ts\tHerbarium of the Department of Plant Pathology and Microbiology, National Taiwan University\t\t\t",
+"NTUM\ts\tNational Taiwan University Museum\t\t\t",
"NU<THA>\tc\tDepartment of Microbiology, Faculty of Science\t\t\t",
"NU<ZAF>\ts\tUniversity of Natal, School of Botany and Zoology\t\t\t",
"NUA\tc\tDepartment of Microbiology, National University of Athens\t\t\t",
"UHCC\tc\tUniversity of Helsinki Cyanobacteria Culture Collection\t\t\t",
"UHI\ts\tUssishkin House, Botany Department\t\t\t",
"UHM\ts\tManoa, College of Tropical Agriculture, Department of Entomology\t\t\t",
-"UI<NGA>\ts\tUniversity of Ibadan\t\t\t",
+"UI<NGA>\tsc\tUniversity of Ibadan\t\t\t",
"UI<USA-UT>\ts\tBureau of Land Management (Uinta Herbarium)\t\t\t",
"UICC\tc\tUniversity of Indonesia Culture Collection\t\t\t",
"UIDA\ts\tUniversity of Idaho, Bird and Mammal Museum\t\t\t",
"UPEI\ts\tUniversity of Prince Edward Island, Biology Department\t\t\t",
"UPF\ts\tUniversite de Polynesie Francaise Herbarium\t\t\t",
"UPIE\tb\tUnidad de Patologia Infecciosa y Epidemiologia\t\t\t",
-"UPLB\ts\tMuseum of Natural History, University of the Philippines\t\t\t",
+"UPLB\ts\tUniversity of Philippines Los Banos\t\t\t",
"UPM<FRA>\ts\tDepartement des Siences de la Terre\t\t\t",
"UPM<MYS>\ts\tUniversiti Pertanian Malaysia, Biology Department\t\t\t",
"UPM<RUS>\ts\tUdory Paleontological Museum\t\t\t",
"VPCI\tc\tFungal Culture Collection\t\t\t",
"VPH\ts\tVan Pharmaceutical Herbarium, Yuzuncu Yil University\t\t\t",
"VPI\tsc\tVirginia Polytechnic Institute and State University\tVTMH\t\t",
-"VPI:F\ts\tVirginia Polytechnic Institute and State University, Fungal Collection\t",
+"VPI:F\ts\tVirginia Polytechnic Institute and State University, Fungal Collection\t\t",
"VPIC\ts\tVirginia Polytechnic Institute and State University\t\t\t",
"VPIMM\ts\tVirginia Polytechnic University, Mammal Museum\t\t\t",
"VPM\ts\tVolgograd Provincial Museum\t\t\t",
"WFPL\tc\tWestern Forest Products Laboratory\t\t\t",
"WFU\ts\tWake Forest University, Biology Department\t\t\t",
"WFUVC\ts\tWake Forest University, Vertebrate Collection\t\t\t",
+"WFVZ\ts\tWestern Foundation of Vertebrate Zoology\t\t\t",
"WGC\ts\tState University of West Georgia, Biology Department\t\t\t",
"WGCH\ts\tWilton Garden Club\t\t\t",
"WGD\ts\tWashington Game Department\t\t\t",
-# $Id: institution_codes.txt 607542 2020-05-05 14:51:12Z ivanov $
+# $Id: institution_codes.txt 616908 2020-09-22 18:24:46Z ivanov $
A s Arnold Arboretum, Harvard University
AA s Ministry of Science, Academy of Sciences
AAC c Arignar Anna College
ABTRI c Apex Biotechnology Training and Research Institute
ABU<NGA> s Ahmadu Bello University Herbarium
AC s Amherst College
-ACA s Agricultural University of Athens
ACA-DC c Greek Coordinated Collections of Microorganisms
ACAD s Acadia University, K. C. Irving Environmental Science Centre & Harriet Irving Botanical Gardens
ACAD<AUS> sb Australian Centre for Ancient DNA
ACAM c The Australian Collection of Antarctic Microorganisms, Cooperative Research Center for the Antarctic and Southern Ocean Environment
+ACAM<GRC> s Agricultural University of Athens ACA
ACAP s Aquaculture Center of Aomori Prefecture
ACBC s Agriculture Canada Research Station
ACBR c Austrian Center of Biological Resources and Applied Mycology
BCCM/ITM c Belgian Coordinated Collections of Microorganisms / ITM Mycobacteria Collection ITM
BCCM/LMG c Belgian Coordinated Collections of Microorganisms/ LMG Bacteria Collection LMG http://bccm.belspo.be/catalogues/lmg-strain-details?NUM=
BCCM/MUCL c Belgian Coordinated Collections of Microorganisms / MUCL Agro-food & Environmental Fungal Collection MUCL
-BCCM/ULC c Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection ULC http://bccm.belspo.be/catalogues/ulc-strain-details?ACCESSION_NUMBER=
+BCCM/ULC c Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection ULC https://bccm.belspo.be/catalogues/bm-details?accession_number=
BCCN c Brucella Culture Collection
BCCUSP c Brazilian Cyanobacteria Collection - University of Sao Paulo
BCF s Universitat de Barcelona, Laboratori de Botanica
BPBM s Bernice P. Bishop Museum BISHOP
BPBM:Fish s Bernice P. Bishop Museum, Fish Collection
BPBM:IZ s Bernice P. Bishop Museum, Invertebrate Zoology
-BPI sc U.S. National Fungus Collections, Systematic Botany and Mycology Laboratory http://nt.ars-grin.gov/fungaldatabases/specimens/new_rptSpecimenOneRec.cfm?thisrec=BPI+&spec;
+BPI sc U.S. National Fungus Collections, Systematic Botany and Mycology Laboratory
BPI<ZAF> s Bernard Price Institute for Palaeontological Research
BPIC c Benaki Phytopathological Institute Collection
BPL s Museum of Barnstaple & North Devon
CCM c Czech Collection of Microorganisms CCM<CZE>
CCM-A c Coleccion de Cultivos Microbianos
CCM-CIBE c Escuela Superior Politecnica del Litoral
+CCM-UFV c Collection of Cyanobacteria and Microalgae at the Universidade Federal de Vicosa
CCM<CHN> s Changchun College of Traditional Chinese Medicine, Department of Chinese Materia Medica
CCM<USA-MT> s Carter County Museum
CCMA-UFSCar c Culture Collection of Freshwater Microalgae
CGEC s China Entomological Research Institute
CGG s Cambridge University Botanic Garden
CGH s National Museum of Prague
-CGMCC c China General Microbiological Culture Collection Center AS
+CGMCC c China General Microbiological Culture Collection Center AS http://www.cgmcc.net/english/cata.php?stn=CGMCC%20
CGMS s Universidade Federal de Mato Grosso do Sul, Departamento de Biologia
CGN s Centre for Genetic Resources, The Netherlands
CGRIS b Chinese Crop Germplasm Resources Information Network
CHELB s Cheltenham College for Boys
CHEP s Escuela Superior Politecnica del Chimborazo
CHER s Yu. Fedcovich Chernivtsi State University, Botany Department
+CHFC-EA sc Chilean Fungal Collection CHFC,ChFC
CHFD s Chelmsford and Essex Museum
CHI s University of Illinois, Biological Sciences Department
CHIA s National Chiayi Agricultural College, Forestry Department
CIBM s Centro Invest. Biol. Noroeste
CIC s Albertson College of Idaho, Biology Department
CICC c China Center for Industrial Culture Collection
-CICCM c Cawthron Institute Culture Collection of Micro-algae
+CICCM c Cawthron Institute Culture Collection of Micro-algae CAWD
CICESE s Centro de Investigacion Cientifica y de Educacion Superior de Ensenada
CICIM c Culture and Information Centre of Industrial Microorganisms of China's Univeristies
CICIMAR s Centro Interdisciplinario de Ciencias Marinas
CSAT s Colegio de Postgraduados, Campus Tabasco
CSAU s National Agrarian University, Southern Branch "Crimean Agrotechnological University", Department of Botany, Plant Physiology and Genetics
CSB s St. John's University/College of Saint Benedict, Biology Department
+CSBD s Centre for Study of Biological Diversity
CSC s Colegio del Sagrado Corazon
CSC-CLCH c Centro Substrati Cellulari, Cell Lines Collection and Hybridomas
CSCA s California State Collection of Arthropods
CTNRC s Thai National Reference Collections
CTR s Charles T. Ramsden historical collection
CTS s Chongqing Teachers College
+CTUA s Colección Teriológica de la Universidad de Antioquia
CTY s Canterbury Literary and Philosophical Institution
CU sb Cornell University
CUAC s Clemson University
DKG s Juniper Hall Field Centre
DLF s Stetson University, Biology Department
DLU s Da Lat University
+DLUCC c Dali University Culture Collection
DLY s Dudley and Midland Geological and Scientific Society and Field Club
DM<NZ> s Dominion Museum
DM<USA-UT> s The Dinosaur Museum
ECK s Buffalo State College
ECM s Hubei College of Traditional Chinese Medicine, Department of Chinese Materia Medica
ECNB s Escuela Nacional Ciencias
+ECNU s Museum of Biology, East China Normal University, School of Life Sciences
ECOCHM s Coleccion de Mamiferos del Museo de Zoologia-ECOSUR
ECOL s Collection du Laborataire d'Ecologie
ECOMAR<FRA> s ECOMAR lab University of Reunion
EELM s Estacion Experimental Agricola de la Molina
EERU s Economic Entomology Research Unit
EFC s Escola de Florestas
-EFCC s Epping Forest Conservation Centre
+EFCC c Entomopathogenic Fungal Collection
EFH s Forestry Commission
EFM s Epping Forest Museum, Corporation of London
EFWM s Department of Entomology
F s Field Museum of Natural History, Botany Department FMNH:F
FAA s Universidad Nacional del Centro de la Provincia de Buenos Aires
FABR s Harmas de J. H. Fabre
-FACHB c Freshwater Algae Culture Collection
+FACHB c Freshwater Algae Culture Collection CHAB<China>
FACS s Fujian Agricultural College
FAK s Department of Fisheries, Faculty of Agriculture
FAKOU s Faculty of Agriculture, Kochi Univerisity
LECB s Saint Petersburg State University, Botany Department
LEDLIE s Patricia Ledlie Herbarium
LEF s Economic Forestry Institute of Liaoning Province
+LEGEcc c Blue Biotechnology and Ecotoxicology Culture Collection
LEH s Lehigh University
LEI s Leicester Literary and Philosophical Society
LEISHCRYOBANK c International Cryobank of Leishmania
MADM s Museu Municipal do Funchal
MADS s Museu de Historia Natural do Seminario do Funchal
MAF s Universidad Complutense, Departamento de Biologia Vegetal II
-MAFF c MAFF Genebank, Ministry of Agriculture Forestry and Fisheries
+MAFF c MAFF Genebank, Ministry of Agriculture Forestry and Fisheries https://www.gene.affrc.go.jp/databases-micro_search_detail_en.php?maff=
MAFF<FJI> s Colo-i-Suva Silvicultural Station
MAFI s Magyar Allami Foeldtani Intezet, Budapest - Hungarian Geological Survey
MAFST s Instituto Forestal de la Moncloa
MDTN s Middleton Botanical Society
MDUG s Universidad Guanajuato, Museo Alfredo Duges
MDZAU s Museum Deptartment of Zoology
+MEAN c Micoteca da Estacao Agronomica Nacional
MECB s Universidade Federal de Pelotas, Museu Entomologico Ceslau Biezanko
MECG s Medical Entomology Collection Gallery
MECN s Museo Ecuadoriano de Ciencias Naturales DHMECN
MU<TUR> c Mugla Sitki Kocman University
MU<USA-OH> s Miami University, Botany Department, Willard Sherman Turrell Herbarium http://herbarium.muohio.edu/herbariummu/
MU<USA-TX> s Midwestern University
+MUA-AVP s Museo Universitario de la Universidad de Antioquia
MUACC c Murdoch University Algal Culture Collection
MUAF c Culture collection of Mendel University of Agriculture and Forestry in Brno
MUAP s Museo del Mar Universidad Arturo Prat
MZCR s Museo de Zoologia
MZFC s Museo de Zoologia "Alfonso L. Herrera"
MZFN s Museo Zoologico dell'Universita "Federico II"
+MZFS-DAR s Museu de Zoologia da Universidade Estadual de Feira de Santana
MZGZ s Museum Zoologia del Giardino Zoologico
MZH s Zoolgical Museum, Finnish Museum of Natural History
MZKI c Microbial Culture Collection of National Institute of Chemistry
NLU s University of Louisiana at Monroe, Museum of Natural History
NLUH s University of the Philippines College Baguio
NM s Northern Michigan University, Biology Department
+NMA s National Museum Australia -
NMAC s Inner Mongolia Agricultural University, Department of Pratacultural Science
NMAG s Naturhistorisches Museum, Augsburg
NMB c Ningbo Marine Biotechnology
NRC<EGY> s National Research Centre
NRCC s National Research Council of Canada
NRCS c National Reference Center for Streptococci in Aachen
+NRI s Texas A&M Natural Resources Institute
NRIBAS s National Research Institute of Biology, Academia Sinica
NRIC c NODAI Research Institute Culture Collection
NRL c Neisseria Reference Laboratory
-NRM s Swedish Museum of Natural History
+NRM s Swedish Museum of Natural History SMNH
NRN s Nairn Literary Society Library, Public Library
NRNZ s Northland Regional Museum
NRPSU c Department of Agro-industry, Faculty of Natural Resources
NTOU sc Institute of Marine Biology, National Taiwan Ocean University
NTS s Nevada Operations Office, U.S. Department of Energy
NTSC s University of North Texas, Biological Sciences Department
+NTUCC c Plant Pathology and Microbiology, National Taiwan University Culture Collection
NTUF s National Taiwan University, Forestry Department
-NTUM s National Taiwan University
-NTUMA s National Taiwan University
+NTUH s Herbarium of the Department of Plant Pathology and Microbiology, National Taiwan University
+NTUM s National Taiwan University Museum
NU<THA> c Department of Microbiology, Faculty of Science
NU<ZAF> s University of Natal, School of Botany and Zoology
NUA c Department of Microbiology, National University of Athens
UHCC c University of Helsinki Cyanobacteria Culture Collection
UHI s Ussishkin House, Botany Department
UHM s Manoa, College of Tropical Agriculture, Department of Entomology
-UI<NGA> s University of Ibadan
+UI<NGA> sc University of Ibadan
UI<USA-UT> s Bureau of Land Management (Uinta Herbarium)
UICC c University of Indonesia Culture Collection
UIDA s University of Idaho, Bird and Mammal Museum
UPEI s University of Prince Edward Island, Biology Department
UPF s Universite de Polynesie Francaise Herbarium
UPIE b Unidad de Patologia Infecciosa y Epidemiologia
-UPLB s Museum of Natural History, University of the Philippines
+UPLB s University of Philippines Los Banos
UPM<FRA> s Departement des Siences de la Terre
UPM<MYS> s Universiti Pertanian Malaysia, Biology Department
UPM<RUS> s Udory Paleontological Museum
VPCI c Fungal Culture Collection
VPH s Van Pharmaceutical Herbarium, Yuzuncu Yil University
VPI sc Virginia Polytechnic Institute and State University VTMH
-VPI:F s Virginia Polytechnic Institute and State University, Fungal Collection
+VPI:F s Virginia Polytechnic Institute and State University, Fungal Collection
VPIC s Virginia Polytechnic Institute and State University
VPIMM s Virginia Polytechnic University, Mammal Museum
VPM s Volgograd Provincial Museum
WFPL c Western Forest Products Laboratory
WFU s Wake Forest University, Biology Department
WFUVC s Wake Forest University, Vertebrate Collection
+WFVZ s Western Foundation of Vertebrate Zoology
WGC s State University of West Georgia, Biology Department
WGCH s Wilton Garden Club
WGD s Washington Game Department
-/* $Id: lat_lon_country.inc 599818 2020-01-07 20:09:07Z kans $
+/* $Id: lat_lon_country.inc 612552 2020-07-23 15:34:00Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
-/* $Id: Seq_id.cpp 603822 2020-03-17 17:37:01Z ivanov $
+/* $Id: Seq_id.cpp 617367 2020-09-30 12:57:11Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
E_Choice type = WhichInverseSeqId(fasta_pieces.front());
ETypeVariant tv;
if (type == e_not_set) {
- // unknown database are reported as 'general'
- type = e_General;
+ if (fasta_pieces.size() == 2) {
+ // unknown database are reported as 'general'
+ type = e_General;
+ }
tv = eTV_plain;
} else {
tv = x_IdentifyTypeVariant(type, fasta_pieces.front());
ids.push_back(id);
++count;
} catch (std::exception& e) {
+ if (fasta_pieces.empty()) {
+ throw;
+ }
if ((flags & fParse_PartialOK) != 0) {
ERR_POST_X(7, Warning << e.what());
+ do {
+ auto l = fasta_pieces.front().size();
+ if (l != 2 && l != 3) {
+ fasta_pieces.pop_front();
+ } else {
+ break;
+ }
+ } while ( !fasta_pieces.empty() );
} else {
throw;
}
-/* $Id: accguide.inc 603797 2020-03-17 13:51:04Z ucko $
+/* $Id: accguide.inc 615212 2020-08-28 13:43:44Z ucko $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
*/
static const char* const kBuiltInGuide[] = {
- "# $Id: accguide.inc 603797 2020-03-17 13:51:04Z ucko $",
+ "# $Id: accguide.inc 615212 2020-08-28 13:43:44Z ucko $",
"version 1 # of file format",
"",
"# three-letter-prefix protein accessions (traditionally with five digits)",
"2+6 LQ embl_patent",
"2+8 LQ embl_other_nuc",
"2+10 LQ embl_other_nuc",
- "2+6 LR embl_dirsub # embl_patent?",
+ "2+6 LR embl_dirsub * # embl_patent?",
"2+8 LR embl_other_nuc",
"2+10 LR embl_other_nuc",
"2+6 LS embl_dirsub # embl_patent?",
"2+6 MD ddbj_patent",
"2+8 MD ddbj_other_nuc",
"2+10 MD ddbj_other_nuc",
- "2+6 ME ddbj_other_nuc",
+ "2+6 ME ddbj_patent",
"2+8 ME ddbj_other_nuc",
"2+10 ME ddbj_other_nuc",
"2+6 MF gb_dirsub",
"2+6 MU gb_con",
"2+8 MU gb_other_nuc",
"2+10 MU gb_other_nuc",
+ "2+6 MV gb_patent",
+ "2+8 MV gb_other_nuc",
+ "2+10 MV gb_other_nuc",
+ "2+6 MW gb_dirsub",
+ "2+8 MW gb_other_nuc",
+ "2+10 MW gb_other_nuc",
"2+6 M? gb_other_nuc",
"2+8 M? gb_other_nuc",
"2+10 M? gb_other_nuc",
"special LN901386-LN901412 embl_est",
"",
"# Nominally embl_dirsub.",
+ "special LR594708-LR594709 embl_tpa_nuc",
+ "",
+ "# Nominally embl_dirsub.",
"special LT159851-LT159865 embl_est",
"special LT548096-LT548244 embl_tpa_nuc",
"special LT556286-LT558089 embl_est",
"special OB000001-OB660024 embl_con",
"",
"# Some \"EMBL\" WGS nucleotide accessions are really third-party annotations.",
- "special CAADVW000000000-CAADVX999999999 embl_tpa_wgs_nuc # 6+9",
+ "special CAADSF000000000-CAADSF999999999 embl_tpa_wgs_nuc # 6+9",
+ "special CAADSM000000000-CAAGJX999999999 embl_tpa_wgs_nuc # 6+9",
+ "special CAAGKD000000000-CAAGKQ999999999 embl_tpa_wgs_nuc # 6+9",
+ "special CAAGKS000000000-CAAGRI999999999 embl_tpa_wgs_nuc # 6+9",
+ "special CAAGRK000000000-CAAGSH999999999 embl_tpa_wgs_nuc # 6+9",
+ "special CAAHDL000000000-CAAHDL999999999 embl_tpa_wgs_nuc # 6+9",
+ "special CAAHDO000000000-CAAHFA999999999 embl_tpa_wgs_nuc # 6+9",
+ "special CADEPO000000000-CADEVH999999999 embl_tpa_wgs_nuc # 6+9",
+ "special CADEVJ000000000-CADFGZ999999999 embl_tpa_wgs_nuc # 6+9",
"",
"# Some \"EMBL\" 8-character protein accessions are really third-party",
"# annotations.",
-# $Id: accguide.txt 603797 2020-03-17 13:51:04Z ucko $
+# $Id: accguide.txt 615212 2020-08-28 13:43:44Z ucko $
version 1 # of file format
# three-letter-prefix protein accessions (traditionally with five digits)
2+6 LQ embl_patent
2+8 LQ embl_other_nuc
2+10 LQ embl_other_nuc
-2+6 LR embl_dirsub # embl_patent?
+2+6 LR embl_dirsub * # embl_patent?
2+8 LR embl_other_nuc
2+10 LR embl_other_nuc
2+6 LS embl_dirsub # embl_patent?
2+6 MD ddbj_patent
2+8 MD ddbj_other_nuc
2+10 MD ddbj_other_nuc
-2+6 ME ddbj_other_nuc
+2+6 ME ddbj_patent
2+8 ME ddbj_other_nuc
2+10 ME ddbj_other_nuc
2+6 MF gb_dirsub
2+6 MU gb_con
2+8 MU gb_other_nuc
2+10 MU gb_other_nuc
+2+6 MV gb_patent
+2+8 MV gb_other_nuc
+2+10 MV gb_other_nuc
+2+6 MW gb_dirsub
+2+8 MW gb_other_nuc
+2+10 MW gb_other_nuc
2+6 M? gb_other_nuc
2+8 M? gb_other_nuc
2+10 M? gb_other_nuc
special LN901194-LN901210 embl_tpa_nuc
special LN901386-LN901412 embl_est
+# Nominally embl_dirsub.
+special LR594708-LR594709 embl_tpa_nuc
+
# Nominally embl_dirsub.
special LT159851-LT159865 embl_est
special LT548096-LT548244 embl_tpa_nuc
special OB000001-OB660024 embl_con
# Some "EMBL" WGS nucleotide accessions are really third-party annotations.
-special CAADVW000000000-CAADVX999999999 embl_tpa_wgs_nuc # 6+9
+special CAADSF000000000-CAADSF999999999 embl_tpa_wgs_nuc # 6+9
+special CAADSM000000000-CAAGJX999999999 embl_tpa_wgs_nuc # 6+9
+special CAAGKD000000000-CAAGKQ999999999 embl_tpa_wgs_nuc # 6+9
+special CAAGKS000000000-CAAGRI999999999 embl_tpa_wgs_nuc # 6+9
+special CAAGRK000000000-CAAGSH999999999 embl_tpa_wgs_nuc # 6+9
+special CAAHDL000000000-CAAHDL999999999 embl_tpa_wgs_nuc # 6+9
+special CAAHDO000000000-CAAHFA999999999 embl_tpa_wgs_nuc # 6+9
+special CADEPO000000000-CADEVH999999999 embl_tpa_wgs_nuc # 6+9
+special CADEVJ000000000-CADFGZ999999999 embl_tpa_wgs_nuc # 6+9
# Some "EMBL" 8-character protein accessions are really third-party
# annotations.
-/* $Id: ValidErrItem.cpp 597158 2019-11-18 17:58:02Z kans $
+/* $Id: ValidErrItem.cpp 611904 2020-07-13 15:51:08Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
{ eErr_SEQ_FEAT_CDSdoesNotMatchVDJC,
{ "CDSdoesNotMatchVDJC",
"The CDS does not have a parent VDJ or C segment" } },
+ { eErr_SEQ_FEAT_GeneOnNucPositionOfPeptide,
+ { "GeneOnNucPositionOfPeptide",
+ "Peptide under CDS matches small Gene" } },
/* SEQ_ALIGN */
-/* $Id: scope.cpp 603742 2020-03-16 17:25:41Z ivanov $
+/* $Id: scope.cpp 610058 2020-06-10 16:19:48Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
+void CScope::RemoveFromHistory(const CSeq_id_Handle& seq_id)
+{
+ m_Impl->RemoveFromHistory(seq_id);
+}
+
+
+void CScope::RemoveFromHistory(const CSeq_id& seq_id)
+{
+ RemoveFromHistory(CSeq_id_Handle::GetHandle(seq_id));
+}
+
+
void CScope::RemoveFromHistory(const CBioseq_Handle& bioseq,
EActionIfLocked action)
{
-/* $Id: scope_impl.cpp 602775 2020-03-02 19:52:55Z grichenk $
+/* $Id: scope_impl.cpp 610058 2020-06-10 16:19:48Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
+void CScope_Impl::RemoveFromHistory(const CSeq_id_Handle& seq_id)
+{
+ if ( !seq_id ) {
+ return;
+ }
+ TConfWriteLockGuard guard(m_ConfLock);
+ // Clear removed bioseq handles
+ TSeq_idMap::iterator it = m_Seq_idMap.find(seq_id);
+ if ( it != m_Seq_idMap.end() ) {
+ it->second.x_ResetAnnotRef_Info();
+ if ( it->second.m_Bioseq_Info ) {
+ CBioseq_ScopeInfo& binfo = *it->second.m_Bioseq_Info;
+ binfo.x_ResetAnnotRef_Info();
+ if ( binfo.IsDetached() ) {
+ binfo.m_SynCache.Reset();
+ m_Seq_idMap.erase(it);
+ }
+ }
+ }
+}
+
+
void CScope_Impl::ResetHistory(int action)
{
TConfWriteLockGuard guard(m_ConfLock);
-/* $Id: tse_info.cpp 606922 2020-04-28 18:58:25Z ivanov $
+/* $Id: tse_info.cpp 611227 2020-07-01 11:37:30Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
uniq_id = CBioObjectId(CBioObjectId::eUniqNumber,
- m_InternalBioObjNumber++);
+ ++m_InternalBioObjNumber);
m_BioObjects[uniq_id] = &info;
return uniq_id;
}
-/* $Id: autodef.cpp 607821 2020-05-07 19:13:41Z ivanov $
+/* $Id: autodef.cpp 611612 2020-07-08 17:43:23Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
case CAutoDefOptions::eListAllFeatures:
end = " sequence.";
break;
+ case CAutoDefOptions::eWholeGenomeShotgunSequence:
+ end = " whole genome shotgun sequence.";
+ break;
default:
break;
}
-/* $Id: autodef_options.cpp 530196 2017-03-13 12:59:43Z bollin $
+/* $Id: autodef_options.cpp 611612 2020-07-08 17:43:23Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
{ "List All Features", CAutoDefOptions::eListAllFeatures },
{ "Partial Genome", CAutoDefOptions::ePartialGenome },
{ "Partial Sequence", CAutoDefOptions::ePartialSequence },
- { "Sequence", CAutoDefOptions::eSequence }
+ { "Sequence", CAutoDefOptions::eSequence },
+ { "Whole Genome Shotgun Sequence", CAutoDefOptions::eWholeGenomeShotgunSequence }
};
DEFINE_STATIC_ARRAY_MAP_WITH_COPY(TNameValPairMap, sc_FeatureListTypeStrsMap, sc_FeatureListTypeStr);
return false;
}
- pos = NStr::FindNoCase (taxname, strain, 0, taxname.size() - 1, NStr::eLast);
+ pos = NStr::Find (taxname, strain, NStr::eNocase, NStr::eReverseSearch);
if (pos == taxname.size() - strain.size()) {
// check for space to avoid fortuitous match to end of taxname
char ch = taxname[pos - 1];
int len = str.length();
if (len < 5) return str;
if (str [len - 1] != ']') return str;
- SIZE_TYPE cp = NStr::Find(str, "[", 0, NPOS, NStr::eLast);
+ SIZE_TYPE cp = NStr::Find(str, "[", NStr::eNocase, NStr::eReverseSearch);
if (cp == NPOS) return str;
string suffix = str.substr(cp+1);
if (NStr::StartsWith(suffix, "NAD")) return str;
idx = len1 - len2 - 3;
if (len1 > len2 + 4 && title [idx] == ' ' && title [idx + 1] == '[' && title [len1 - 1] == ']') {
- pos = NStr::FindNoCase(title, taxname, 0, NPOS, NStr::eLast);
+ pos = NStr::Find(title, taxname, NStr::eNocase, NStr::eReverseSearch);
if (pos == idx + 2) {
return pos - 1;
}
tpos = s_TitleEndsInOrganism(m_MainTitle, binomial);
if (tpos == NPOS) {
if (m_IsCrossKingdom) {
- pos = NStr::FindNoCase(m_MainTitle, "][", 0, NPOS, NStr::eLast);
+ pos = NStr::Find(m_MainTitle, "][", NStr::eNocase, NStr::eReverseSearch);
if (pos != NPOS) {
m_MainTitle.erase (pos + 1);
s_TrimMainTitle (m_MainTitle);
tpos = s_TitleEndsInOrganism(m_MainTitle, binomial);
if (tpos == NPOS) {
if (m_IsCrossKingdom) {
- pos = NStr::FindNoCase(m_MainTitle, "][", 0, NPOS, NStr::eLast);
+ pos = NStr::Find(m_MainTitle, "][", NStr::eNocase, NStr::eReverseSearch);
if (pos != NPOS) {
m_MainTitle.erase (pos + 1);
s_TrimMainTitle (m_MainTitle);
}
}
}
+ if ( bios && bios->IsSetPcr_primers() ) {
+ const CBioSource_Base::TPcr_primers & primers = bios->GetPcr_primers();
+ if ( primers.CanGet() ) {
+ ITERATE( CBioSource_Base::TPcr_primers::Tdata, it, primers.Get() ) {
+
+ // bool has_fwd_seq = false;
+ // bool has_rev_seq = false;
+
+ if( (*it)->IsSetForward() ) {
+ const CPCRReaction_Base::TForward &forward = (*it)->GetForward();
+ if( forward.CanGet() ) {
+ ITERATE( CPCRReaction_Base::TForward::Tdata, it2, forward.Get() ) {
+ const string &fwd_name = ( (*it2)->CanGetName() ? (*it2)->GetName().Get() : kEmptyStr );
+ if( ! fwd_name.empty() ) {
+ joiner.Add("fwd-primer-name", fwd_name);
+ }
+ const string &fwd_seq = ( (*it2)->CanGetSeq() ? (*it2)->GetSeq().Get() : kEmptyStr );
+ // NStr::ToLower( fwd_seq );
+ if( ! fwd_seq.empty() ) {
+ joiner.Add("fwd-primer-seq", fwd_seq);
+ // has_fwd_seq = true;
+ }
+ }
+ }
+ }
+ if( (*it)->IsSetReverse() ) {
+ const CPCRReaction_Base::TReverse &reverse = (*it)->GetReverse();
+ if( reverse.CanGet() ) {
+ ITERATE( CPCRReaction_Base::TReverse::Tdata, it2, reverse.Get() ) {
+ const string &rev_name = ((*it2)->CanGetName() ? (*it2)->GetName().Get() : kEmptyStr );
+ if( ! rev_name.empty() ) {
+ joiner.Add("rev-primer-name", rev_name);
+ }
+ const string &rev_seq = ( (*it2)->CanGetSeq() ? (*it2)->GetSeq().Get() : kEmptyStr );
+ // NStr::ToLower( rev_seq ); // do we need this?
+ if( ! rev_seq.empty() ) {
+ joiner.Add("rev-primer-seq", rev_seq);
+ // has_rev_seq = true;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
}
catch (CException &) {
// ignore exception; it probably just means there's no org-ref
-/* $Id: feature_edit.cpp 599823 2020-01-07 21:35:24Z foleyjp $
+/* $Id: feature_edit.cpp 610146 2020-06-11 11:11:01Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
+static TSeqPos s_GetTrimmedLength(const CSeq_loc& trimmed_loc)
+{
+
+ if (trimmed_loc.IsEmpty() || trimmed_loc.IsNull()) {
+ return 0;
+ }
+
+ if (trimmed_loc.IsPnt()) {
+ return 1;
+ }
+
+ if (trimmed_loc.IsInt()) {
+ return trimmed_loc.GetInt().GetLength();
+ }
+
+ if (trimmed_loc.IsPacked_int()) {
+ TSeqPos length=0;
+ for (auto pSubInt : trimmed_loc.GetPacked_int().Get()) {
+ length += pSubInt->GetLength();
+ }
+ return length;
+ }
+
+ if (trimmed_loc.IsPacked_pnt()) {
+ return trimmed_loc.GetPacked_pnt().GetPoints().size();
+ }
+
+ if (trimmed_loc.IsMix()) {
+ TSeqPos length=0;
+ for (auto pSubLoc : trimmed_loc.GetMix().Get()) {
+ length += s_GetTrimmedLength(*pSubLoc);
+ }
+ return length;
+ }
+
+ return 0;
+}
+
+static TSeqPos s_GetTrimmedLength(const CSeq_loc& loc, TSeqPos from, TSeqPos to)
+{
+ auto pTrimmedInt = Ref(new CSeq_loc());
+ CSeq_loc_CI loc_it(loc);
+ pTrimmedInt->SetInt().SetId().Assign(loc_it.GetSeq_id());
+ pTrimmedInt->SetInt().SetFrom(from);
+ pTrimmedInt->SetInt().SetTo(to);
+ auto pTrimmedLoc = loc.Intersect(*pTrimmedInt, CSeq_loc::fStrand_Ignore, nullptr);
+ if (pTrimmedLoc) {
+ return s_GetTrimmedLength(*pTrimmedLoc);
+ }
+ return 0;
+}
+
+
TSeqPos CFeatTrim::x_GetStartOffset(const CSeq_feat& feat,
TSeqPos from, TSeqPos to)
{
if (strand != eNa_strand_minus) {
TSeqPos feat_from = feat_range.GetFrom();
if (feat_from < from) {
- offset = from - feat_from;
+ if (feat.GetLocation().IsInt()) {
+ return (from - feat_from);
+ }
+ return s_GetTrimmedLength(feat.GetLocation(), feat_from, from-1);
}
}
else { // eNa_strand_minus
TSeqPos feat_to = feat_range.GetTo();
if (feat_to > to) {
- offset = feat_to - to;
+ if (feat.GetLocation().IsInt()) {
+ return (feat_to - to);
+ }
+ return s_GetTrimmedLength(feat.GetLocation(), to+1, feat_to);
}
}
return offset;
CCdregion::EFrame CFeatTrim::GetCdsFrame(const CSeq_feat& cds_feature, const CRange<TSeqPos>& range)
{
const TSeqPos offset = x_GetStartOffset(cds_feature, range.GetFrom(), range.GetTo());
-
return x_GetNewFrame(offset, cds_feature.GetData().GetCdregion());
}
}
const TSeqPos old_frame = x_GetFrame(cdregion);
- const TSeqPos new_frame = (old_frame + frame_change)%3;
+
+ // RW-1098
+ const TSeqPos new_frame = 3 - ((3 + offset - old_frame)%3);
+ // Note new_frame, thus defined, takes values 1,2,3,
+ // whereas old_frame takes values 0,1,2.
+ // However, 0 == 3 in modulo 3 arithmetic.
if (new_frame == 1) {
return CCdregion::eFrame_two;
}
#include <objmgr/util/indexer.hpp>
#include <objmgr/util/sequence.hpp>
+#include <objmgr/util/feature_edit.hpp>
#define NCBI_USE_ERRCODE_X ObjMgr_Indexer
// CSeqEntryIndex
// Constructors take top-level sequence object, create a CRef<CSeqMasterIndex>, and call its initializer
-CSeqEntryIndex::CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy, TFlags flags)
{
m_Idx.Reset(new CSeqMasterIndex);
- m_Idx->x_Initialize(topseh, policy, flags, depth);
+ m_Idx->x_Initialize(topseh, policy, flags);
}
-CSeqEntryIndex::CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy, TFlags flags)
{
m_Idx.Reset(new CSeqMasterIndex);
- m_Idx->x_Initialize(bsh, policy, flags, depth);
+ m_Idx->x_Initialize(bsh, policy, flags);
}
-CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy, TFlags flags)
{
m_Idx.Reset(new CSeqMasterIndex);
- m_Idx->x_Initialize(topsep, policy, flags, depth);
+ m_Idx->x_Initialize(topsep, policy, flags);
}
-CSeqEntryIndex::CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy, TFlags flags)
{
m_Idx.Reset(new CSeqMasterIndex);
- m_Idx->x_Initialize(seqset, policy, flags, depth);
+ m_Idx->x_Initialize(seqset, policy, flags);
}
-CSeqEntryIndex::CSeqEntryIndex (CBioseq& bioseq, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CBioseq& bioseq, EPolicy policy, TFlags flags)
{
m_Idx.Reset(new CSeqMasterIndex);
- m_Idx->x_Initialize(bioseq, policy, flags, depth);
+ m_Idx->x_Initialize(bioseq, policy, flags);
}
-CSeqEntryIndex::CSeqEntryIndex (CSeq_submit& submit, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CSeq_submit& submit, EPolicy policy, TFlags flags)
{
m_Idx.Reset(new CSeqMasterIndex);
- m_Idx->x_Initialize(submit, policy, flags, depth);
+ m_Idx->x_Initialize(submit, policy, flags);
}
-CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy, TFlags flags)
{
m_Idx.Reset(new CSeqMasterIndex);
- m_Idx->x_Initialize(topsep, sblock, policy, flags, depth);
+ m_Idx->x_Initialize(topsep, sblock, policy, flags);
}
-CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy, TFlags flags, int depth)
+CSeqEntryIndex::CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy, TFlags flags)
{
m_Idx.Reset(new CSeqMasterIndex);
- m_Idx->x_Initialize(topsep, descr, policy, flags, depth);
+ m_Idx->x_Initialize(topsep, descr, policy, flags);
}
// Get first Bioseq index
return m_Idx->GetBioseqIndex(loc);
}
-// Get Bioseq index by subrange
-CRef<CBioseqIndex> CSeqEntryIndex::GetBioseqIndex (const string& accn, int from, int to, bool rev_comp)
+const vector<CRef<CBioseqIndex>>& CSeqEntryIndex::GetBioseqIndices(void)
{
- return m_Idx->GetBioseqIndex(accn, from, to, rev_comp);
+ return m_Idx->GetBioseqIndices();
}
-CRef<CBioseqIndex> CSeqEntryIndex::GetBioseqIndex (int from, int to, bool rev_comp)
+const vector<CRef<CSeqsetIndex>>& CSeqEntryIndex::GetSeqsetIndices(void)
{
- return m_Idx->GetBioseqIndex("", from, to, rev_comp);
+ return m_Idx->GetSeqsetIndices();
}
-const vector<CRef<CBioseqIndex>>& CSeqEntryIndex::GetBioseqIndices(void)
+bool CSeqEntryIndex::DistributedReferences(void)
{
- return m_Idx->GetBioseqIndices();
+ return m_Idx->DistributedReferences();
}
-const vector<CRef<CSeqsetIndex>>& CSeqEntryIndex::GetSeqsetIndices(void)
+void CSeqEntryIndex::SetSnpFunc(FAddSnpFunc* snp)
{
- return m_Idx->GetSeqsetIndices();
+ m_Idx->SetSnpFunc (snp);
}
-bool CSeqEntryIndex::DistributedReferences(void)
+FAddSnpFunc* CSeqEntryIndex::GetSnpFunc(void)
{
- return m_Idx->DistributedReferences();
+ return m_Idx->GetSnpFunc();
+}
+
+void CSeqEntryIndex::SetFeatDepth(int featDepth)
+
+{
+ m_Idx->SetFeatDepth (featDepth);
+}
+
+int CSeqEntryIndex::GetFeatDepth(void)
+
+{
+ return m_Idx->GetFeatDepth();
+}
+
+void CSeqEntryIndex::SetGapDepth(int featDepth)
+
+{
+ m_Idx->SetGapDepth (featDepth);
+}
+
+int CSeqEntryIndex::GetGapDepth(void)
+
+{
+ return m_Idx->GetGapDepth();
}
bool CSeqEntryIndex::IsFetchFailure(void)
// CSeqMasterIndex
// Initializers take top-level sequence object, create Seq-entry wrapper if necessary
-void CSeqMasterIndex::x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
{
m_Policy = policy;
m_Flags = flags;
- m_Depth = depth;
m_Tseh = topseh.GetTopLevelEntry();
CConstRef<CSeq_entry> tcsep = m_Tseh.GetCompleteSeq_entry();
m_HasOperon = false;
m_IsSmallGenomeSet = false;
m_DistributedReferences = false;
+ m_SnpFunc = 0;
+ m_FeatDepth = 0;
+ m_GapDepth = 0;
m_IndexFailure = false;
try {
}
}
-void CSeqMasterIndex::x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
{
m_Policy = policy;
m_Flags = flags;
- m_Depth = depth;
m_Tseh = bsh.GetTopLevelEntry();
CConstRef<CSeq_entry> tcsep = m_Tseh.GetCompleteSeq_entry();
m_HasOperon = false;
m_IsSmallGenomeSet = false;
m_DistributedReferences = false;
+ m_SnpFunc = 0;
+ m_FeatDepth = 0;
+ m_GapDepth = 0;
m_IndexFailure = false;
try {
}
}
-void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
{
m_Policy = policy;
m_Flags = flags;
- m_Depth = depth;
topsep.Parentize();
m_Tsep.Reset(&topsep);
x_Init();
}
-void CSeqMasterIndex::x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
{
m_Policy = policy;
m_Flags = flags;
- m_Depth = depth;
CSeq_entry* parent = seqset.GetParentEntry();
if (parent) {
x_Init();
}
-void CSeqMasterIndex::x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
{
m_Policy = policy;
m_Flags = flags;
- m_Depth = depth;
CSeq_entry* parent = bioseq.GetParentEntry();
if (parent) {
x_Init();
}
-void CSeqMasterIndex::x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
{
m_Policy = policy;
m_Flags = flags;
- m_Depth = depth;
_ASSERT(submit.CanGetData());
_ASSERT(submit.CanGetSub());
x_Init();
}
-void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
{
m_Policy = policy;
m_Flags = flags;
- m_Depth = depth;
topsep.Parentize();
m_Tsep.Reset(&topsep);
x_Init();
}
-void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, int depth)
+void CSeqMasterIndex::x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags)
{
m_Policy = policy;
m_Flags = flags;
- m_Depth = depth;
topsep.Parentize();
m_Tsep.Reset(&topsep);
x_Init();
}
+void CSeqMasterIndex::SetSnpFunc (FAddSnpFunc* snp)
+
+{
+ m_SnpFunc = snp;
+}
+
+FAddSnpFunc* CSeqMasterIndex::GetSnpFunc (void)
+
+{
+ return m_SnpFunc;
+}
+
+void CSeqMasterIndex::SetFeatDepth (int featDepth)
+
+{
+ m_FeatDepth = featDepth;
+}
+
+int CSeqMasterIndex::GetFeatDepth (void)
+
+{
+ return m_FeatDepth;
+}
+
+void CSeqMasterIndex::SetGapDepth (int gapDepth)
+
+{
+ m_GapDepth = gapDepth;
+}
+
+int CSeqMasterIndex::GetGapDepth (void)
+
+{
+ return m_GapDepth;
+}
+
+
// At end of program, poll all Bioseqs to check for far fetch failure flag
bool CSeqMasterIndex::IsFetchFailure (void)
CBioseq_Handle bsh = m_Scope->GetBioseqHandle(bsp);
if (bsh) {
// create CBioseqIndex object for current Bioseq
- CRef<CBioseqIndex> bsx(new CBioseqIndex(bsh, bsp, bsh, prnt, m_Tseh, m_Scope, *this, m_Policy, m_Flags, m_Depth, false));
+ CRef<CBioseqIndex> bsx(new CBioseqIndex(bsh, bsp, bsh, prnt, m_Tseh, m_Scope, *this, m_Policy, m_Flags));
// record CBioseqIndex in vector for IterateBioseqs or GetBioseqIndex
m_BsxList.push_back(bsx);
m_HasOperon = false;
m_IsSmallGenomeSet = false;
m_DistributedReferences = false;
+ m_SnpFunc = 0;
+ m_FeatDepth = 0;
+ m_GapDepth = 0;
m_IndexFailure = false;
try {
}
}
-// Support for temporary delta sequence referring to subrange of original sequence
-CRef<CSeq_id> CSeqMasterIndex::x_MakeUniqueId(void)
-{
- CRef<CSeq_id> id(new CSeq_id());
- bool good = false;
- while (!good) {
- id->SetLocal().SetStr("tmp_delta_subset_" + NStr::NumericToString(m_Counter.Add(1)));
- CBioseq_Handle bsh = m_Scope->GetBioseqHandle(*id);
- if (! bsh) {
- good = true;
- }
- }
- return id;
-}
-
-CRef<CBioseqIndex> CSeqMasterIndex::x_DeltaIndex(const CSeq_loc& loc)
-
-{
- try {
- // create delta sequence referring to location or range, using temporary local Seq-id
- CBioseq_Handle bsh = m_Scope->GetBioseqHandle(loc);
- CRef<CBioseq> delta(new CBioseq());
- delta->SetId().push_back(x_MakeUniqueId());
- delta->SetInst().Assign(bsh.GetInst());
- delta->SetInst().ResetSeq_data();
- delta->SetInst().ResetExt();
- delta->SetInst().SetRepr(CSeq_inst::eRepr_delta);
- CRef<CDelta_seq> element(new CDelta_seq());
- element->SetLoc().Assign(loc);
- delta->SetInst().SetExt().SetDelta().Set().push_back(element);
- delta->SetInst().SetLength(sequence::GetLength(loc, m_Scope));
-
- // add to scope
- CBioseq_Handle deltaBsh = m_Scope->AddBioseq(*delta);
-
- if (deltaBsh) {
- // create CBioseqIndex object for delta Bioseq
- CRef<CSeqsetIndex> noparent;
-
- CRef<CBioseqIndex> bsx(new CBioseqIndex(deltaBsh, *delta, bsh, noparent, m_Tseh, m_Scope, *this, m_Policy, m_Flags, m_Depth, true));
-
- return bsx;
- }
- }
- catch (CException& e) {
- LOG_POST_X(2, Error << "Error in CSeqMasterIndex::x_DeltaIndex: " << e.what());
- }
- return CRef<CBioseqIndex> ();
-}
-
-CConstRef<CSeq_loc> CSeqMasterIndex::x_SubRangeLoc(const string& accn, int from, int to, bool rev_comp)
-
-{
- TAccnIndexMap::iterator it = m_AccnIndexMap.find(accn);
- if (it != m_AccnIndexMap.end()) {
- CRef<CBioseqIndex> bsx = it->second;
- for (const CRef<CSeq_id>& id : bsx->GetBioseq().GetId()) {
- switch (id->Which()) {
- case CSeq_id::e_Other:
- case CSeq_id::e_Genbank:
- case CSeq_id::e_Embl:
- case CSeq_id::e_Ddbj:
- case CSeq_id::e_Tpg:
- case CSeq_id::e_Tpe:
- case CSeq_id::e_Tpd:
- {
- CSeq_loc::TStrand strand = eNa_strand_unknown;
- if (rev_comp) {
- strand = eNa_strand_minus;
- }
- CSeq_id& nc_id = const_cast<CSeq_id&>(*id);
- // create location from range
- CConstRef<CSeq_loc> loc(new CSeq_loc(nc_id, from, to, strand));
- if (loc) {
- return loc;
- }
- }
- break;
- default:
- break;
- }
- }
- }
- return CConstRef<CSeq_loc> ();
-}
-
// Get first Bioseq index
CRef<CBioseqIndex> CSeqMasterIndex::GetBioseqIndex (void)
CRef<CBioseqIndex> CSeqMasterIndex::GetBioseqIndex (const CSeq_loc& loc)
{
- CRef<CBioseqIndex> bsx = x_DeltaIndex(loc);
-
- if (bsx) {
- return bsx;
- }
- return CRef<CBioseqIndex> ();
-}
-
-// Get Bioseq index by subrange
-CRef<CBioseqIndex> CSeqMasterIndex::GetBioseqIndex (const string& accn, int from, int to, bool rev_comp)
-
-{
- string accession = accn;
- if (accession.empty()) {
- CRef<CBioseqIndex> bsx = GetBioseqIndex();
- if (bsx) {
- accession = bsx->GetAccession();
- }
- }
-
- if (! accession.empty()) {
- CConstRef<CSeq_loc> loc = x_SubRangeLoc(accession, from, to, rev_comp);
-
- if (loc) {
- return GetBioseqIndex(*loc);
- }
- }
- return CRef<CBioseqIndex> ();
-}
-
-CRef<CBioseqIndex> CSeqMasterIndex::GetBioseqIndex (int from, int to, bool rev_comp)
-
-{
- return GetBioseqIndex("", from, to, rev_comp);
+ CBioseq_Handle bsh = m_Scope->GetBioseqHandle(loc);
+ return GetBioseqIndex(bsh);
}
// Allow access to internal vectors for application to use in iterators
CRef<CScope> scope,
CSeqMasterIndex& idx,
CSeqEntryIndex::EPolicy policy,
- CSeqEntryIndex::TFlags flags,
- int depth,
- bool surrogate)
+ CSeqEntryIndex::TFlags flags)
: m_Bsh(bsh),
m_Bsp(bsp),
m_OrigBsh(obsh),
m_Scope(scope),
m_Idx(&idx),
m_Policy(policy),
- m_Flags(flags),
- m_Depth(depth),
- m_Surrogate(surrogate)
+ m_Flags(flags)
{
m_FetchFailure = false;
m_Topology = NCBI_SEQTOPOLOGY(not_set);
m_IsDelta = false;
+ m_IsDeltaLitOnly = false;
m_IsVirtual = false;
m_IsMap = false;
m_Accession.clear();
+ m_IsRefSeq = false;
m_IsNC = false;
m_IsNM = false;
m_IsNR = false;
m_Taxname.clear();
m_Common.clear();
m_Lineage.clear();
- m_Taxid = 0;
+ m_Taxid = ZERO_TAX_ID;
m_UsingAnamorph = false;
m_Genus.clear();
m_Species.clear();
m_IsVirtual = (repr == CSeq_inst::eRepr_virtual);
m_IsMap = (repr == CSeq_inst::eRepr_map);
}
+ if (m_IsDelta && m_Bsh.IsSetInst_Ext()) {
+ const CBioseq_Handle::TInst_Ext& ext = m_Bsh.GetInst_Ext();
+ bool hasLoc = false;
+ if ( ext.IsDelta() ) {
+ ITERATE (CDelta_ext::Tdata, it, ext.GetDelta().Get()) {
+ if ( (*it)->IsLoc() ) {
+ const CSeq_loc& loc = (*it)->GetLoc();
+ if (loc.IsNull()) continue;
+ hasLoc = true;
+ }
+ }
+ }
+ if (! hasLoc) {
+ m_IsDeltaLitOnly = true;
+ }
+ }
}
// process Seq-ids
for (CSeq_id_Handle sid : obsh.GetId()) {
+ // first switch to set RefSeq and ThirdParty flags
switch (sid.Which()) {
+ case NCBI_SEQID(Other):
+ m_IsRefSeq = true;
+ break;
case NCBI_SEQID(Tpg):
case NCBI_SEQID(Tpe):
case NCBI_SEQID(Tpd):
m_ThirdParty = true;
- // fall through
+ break;
+ default:
+ break;
+ }
+ // second switch now avoids complicated flag setting logic
+ switch (sid.Which()) {
+ case NCBI_SEQID(Tpg):
+ case NCBI_SEQID(Tpe):
+ case NCBI_SEQID(Tpd):
case NCBI_SEQID(Other):
case NCBI_SEQID(Genbank):
case NCBI_SEQID(Embl):
CBioseqIndex::~CBioseqIndex (void)
{
- if (m_Surrogate) {
- try {
- m_Scope->RemoveBioseq(m_Bsh);
- } catch (CException&) {
- // presumably still in use; let it be
- }
- }
}
// Gap collection (delayed until needed)
SSeqMapSelector sel;
size_t resolveCount = 0;
- /*
- if (m_Policy == CSeqEntryIndex::eInternal) {
- resolveCount = 0;
+
+ CWeakRef<CSeqMasterIndex> idx = GetSeqMasterIndex();
+ auto idxl = idx.Lock();
+ if (idxl) {
+ resolveCount = idxl->GetGapDepth();
}
- */
sel.SetFlags(CSeqMap::fFindGap)
.SetResolveCount(resolveCount);
}
}
-// Feature collection (delayed until needed)
-void CBioseqIndex::x_InitFeats (void)
+void CBioseqIndex::x_DefaultSelector(SAnnotSelector& sel, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, bool onlyNear, CScope& scope)
{
- try {
- if (m_FeatsInitialized) {
- return;
- }
+ bool snpOK = false;
+ bool cddOK = false;
- if (! m_DescsInitialized) {
- // initialize descriptors first to get m_ForceOnlyNearFeats flag
- x_InitDescs();
- }
+ if (policy == CSeqEntryIndex::eExhaustive) {
- m_FeatsInitialized = true;
+ // experimental policy forces collection of features from all sequence levels
+ sel.SetResolveAll();
+ sel.SetResolveDepth(kMax_Int);
+ // ignores RefSeq/INSD barrier, overrides far fetch policy user object
+ // for now, always excludes external annots, ignores custom enable bits
- SAnnotSelector sel;
+ } else if (policy == CSeqEntryIndex::eInternal || onlyNear) {
- if (m_Policy != CSeqEntryIndex::eExternal) {
- // unless explicitly desired, exclude external annots - need explicit show flags
- if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) {
- sel.ExcludeNamedAnnots("SNP");
- }
- if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) {
- sel.ExcludeNamedAnnots("CDD");
- }
- sel.ExcludeNamedAnnots("STS");
- }
+ // do not fetch features from underlying sequence component records
+ sel.SetResolveDepth(0);
+ sel.SetExcludeExternal(true);
+ // always excludes external annots, ignores custom enable bits
- if (m_Policy == CSeqEntryIndex::eInternal || m_ForceOnlyNearFeats) {
+ } else if (policy == CSeqEntryIndex::eAdaptive) {
- // do not fetch features from underlying sequence component records
- if (m_Surrogate) {
- // delta with sublocation needs to map features from original Bioseq
- sel.SetResolveAll();
- sel.SetResolveDepth(1);
- sel.SetExcludeExternal();
- } else {
- // otherwise limit collection to local records in top-level Seq-entry
- sel.SetResolveDepth(0);
- sel.SetExcludeExternal();
- }
+ sel.SetResolveAll();
+ // normal situation uses adaptive depth for feature collection,
+ // includes barrier between RefSeq and INSD accession types
+ sel.SetAdaptiveDepth(true);
- } else if (m_Policy == CSeqEntryIndex::eExhaustive) {
+ // conditionally allows external annots, based on custom enable bits
+ if ((flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
+ snpOK = true;
+ }
+ if ((flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
+ cddOK = true;
+ }
- sel.SetResolveAll();
- // experimental flag forces collection of features from all levels
- sel.SetResolveDepth(kMax_Int);
- // also ignores RefSeq/INSD barrier, far fetch policy user object
+ } else if (policy == CSeqEntryIndex::eExternal) {
- } else if (m_Policy == CSeqEntryIndex::eExternal) {
+ // same as eAdaptive
+ sel.SetResolveAll();
+ sel.SetAdaptiveDepth(true);
- // same as eAdaptive, except also allows external annots
- sel.SetResolveAll();
- sel.SetAdaptiveDepth(true);
- // needs to be here
- sel.AddUnnamedAnnots();
- // allow external SNPs
- if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
- sel.IncludeNamedAnnotAccession("SNP");
- sel.AddNamedAnnots("SNP");
- }
- if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
- sel.IncludeNamedAnnotAccession("CDD");
- sel.AddNamedAnnots("CDD");
- }
- m_Scope->SetKeepExternalAnnotsForEdit();
- // obey flag to hide CDD features by default in the web display
- if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) {
- sel.ExcludeNamedAnnots("CDD");
- }
+ // but always allows external annots without need for custom enable bits
+ snpOK = true;
+ cddOK = true;
- } else if (m_Depth > -1) {
+ } else if (policy == CSeqEntryIndex::eFtp) {
+ // for public ftp releases
+ if (m_IsRefSeq) {
sel.SetResolveAll();
- // explicit depth setting overrides adaptive depth (probably only needed for debugging)
- sel.SetResolveDepth(m_Depth);
+ sel.SetAdaptiveDepth(true);
+ } else if (m_IsDeltaLitOnly) {
+ sel.SetResolveDepth(0);
+ sel.SetExcludeExternal(true);
+ } else {
+ sel.SetResolveDepth(0);
+ sel.SetExcludeExternal(true);
+ }
- } else if (m_Policy == CSeqEntryIndex::eAdaptive) {
+ } else if (policy == CSeqEntryIndex::eWeb) {
+ // for public web pages
+ if (m_IsRefSeq) {
sel.SetResolveAll();
- // normal situation uses adaptive depth for feature collection,
- // includes barrier between RefSeq and INSD accession types
sel.SetAdaptiveDepth(true);
-
- // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot
- // but commenting it out allows external variations in NG_008330 to override internal gene, mRNA, CDS, and exon features
- sel.AddUnnamedAnnots();
-
- // allow external SNPs - testing for now, probably needs to be in external policy
- if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
- sel.IncludeNamedAnnotAccession("SNP");
- sel.AddNamedAnnots("SNP");
- }
- if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
- sel.IncludeNamedAnnotAccession("CDD");
- sel.AddNamedAnnots("CDD");
- }
- m_Scope->SetKeepExternalAnnotsForEdit();
-
- } else if (m_Policy == CSeqEntryIndex::eIncremental) {
-
- // do not fetch features from underlying sequence component records
- if (m_Surrogate) {
- // delta with sublocation needs to map features from original Bioseq
- sel.SetResolveAll();
- sel.SetResolveDepth(1);
- sel.SetExcludeExternal();
- } else {
- // otherwise limit collection to local records in top-level Seq-entry
- sel.SetResolveAll();
- sel.SetResolveDepth(0);
- sel.SetExcludeExternal();
- }
-
- /*
+ } else if (m_IsDeltaLitOnly) {
sel.SetResolveAll();
- // flatfile generator now needs to do its own exploration of far delta components
- // and needs to implement barrier between RefSeq and INSD accession types
- sel.SetResolveDepth(1);
-
- // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot
- // sel.AddUnnamedAnnots();
-
- // allow external SNPs - testing for now, probably needs to be in external policy
- if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
- sel.IncludeNamedAnnotAccession("SNP");
- sel.AddNamedAnnots("SNP");
- }
- if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
- sel.IncludeNamedAnnotAccession("CDD");
- sel.AddNamedAnnots("CDD");
- }
- m_Scope->SetKeepExternalAnnotsForEdit();
- */
+ sel.SetAdaptiveDepth(true);
+ } else {
+ sel.SetResolveAll();
+ sel.SetAdaptiveDepth(true);
}
- // bit flags exclude specific features
- if ((m_Flags & CSeqEntryIndex::fHideImpFeats) != 0) {
- sel.ExcludeFeatType(CSeqFeatData::e_Imp);
- }
- if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) {
- sel.ExcludeFeatType(CSeqFeatData::e_Variation);
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_variation);
+ // conditionally allows external annots, based on custom enable bits
+ if ((flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
+ snpOK = true;
}
- if ((m_Flags & CSeqEntryIndex::fHideSTSFeats) != 0) {
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_STS);
- }
- if ((m_Flags & CSeqEntryIndex::fHideExonFeats) != 0) {
- sel.ExcludeNamedAnnots("Exon");
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon);
- }
- if ((m_Flags & CSeqEntryIndex::fHideIntronFeats) != 0) {
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron);
- }
- if ((m_Flags & CSeqEntryIndex::fHideMiscFeats) != 0) {
- sel.ExcludeFeatType(CSeqFeatData::e_Site);
- sel.ExcludeFeatType(CSeqFeatData::e_Bond);
- sel.ExcludeFeatType(CSeqFeatData::e_Region);
- sel.ExcludeFeatType(CSeqFeatData::e_Comment);
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature);
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_preprotein);
- }
- if ((m_Flags & CSeqEntryIndex::fHideGapFeats) != 0) {
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_gap);
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_assembly_gap);
- }
-
- // additional common settings
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue)
- .ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite)
- .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq);
-
- sel.SetFeatComparator(new feature::CFeatComparatorByLabel);
-
- // request exception to capture fetch failure
- sel.SetFailUnresolved();
-
- bool onlyGeneRNACDS = false;
- if ((m_Flags & CSeqEntryIndex::fGeneRNACDSOnly) != 0) {
- onlyGeneRNACDS = true;
+ if ((flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
+ cddOK = true;
}
+ }
- // variables for setting m_BestProteinFeature
- TSeqPos longest = 0;
- CProt_ref::EProcessed bestprocessed = CProt_ref::eProcessed_not_set;
- CProt_ref::EProcessed processed;
+ // fHideSNPFeats and fHideCDDFeats flags override any earlier settings
+ if ((flags & CSeqEntryIndex::fHideSNPFeats) != 0) {
+ snpOK = false;
+ }
+ if ((flags & CSeqEntryIndex::fHideCDDFeats) != 0) {
+ cddOK = false;
+ }
- // next gap
- CGapIndex* sgx = NULL;
- if (m_GapList.size() > 0) {
- sgx = m_GapList[0];
- }
+ // configure remote annot settings in selector
+ if ( snpOK ) {
CWeakRef<CSeqMasterIndex> idx = GetSeqMasterIndex();
auto idxl = idx.Lock();
if (idxl) {
- /*
- if (! idxl->IsSmallGenomeSet()) {
- // limit feature collection to immediate Bioseq-set parent
- CRef<CSeqsetIndex> prnt = GetParent();
- if (prnt) {
- CBioseq_set_Handle bssh = prnt->GetSeqsetHandle();
- if (bssh) {
- CSeq_entry_Handle pseh = bssh.GetParentEntry();
- if (pseh) {
- sel.SetLimitSeqEntry(pseh);
- }
- }
+ FAddSnpFunc* func = idxl->GetSnpFunc();
+ if (func) {
+ // under PubSeq Gateway, need to get exact accession for SNP retrieval
+ CBioseq_Handle bsh = GetBioseqHandle();
+ string na_acc;
+ (*func) (bsh, na_acc);
+ if (na_acc.length() > 0) {
+ sel.IncludeNamedAnnotAccession(na_acc);
}
+ } else {
+ // otherwise just give SNP name
+ sel.IncludeNamedAnnotAccession("SNP");
}
- */
-
- CRef<feature::CFeatTree> ft = idxl->GetFeatTree();
-
- // iterate features on Bioseq
- for (CFeat_CI feat_it(m_Bsh, sel); feat_it; ++feat_it) {
- const CMappedFeat mf = *feat_it;
-
- if (onlyGeneRNACDS) {
- const CSeqFeatData& data = mf.GetData();
- CSeqFeatData::E_Choice type = data.Which();
- if (type != CSeqFeatData::e_Gene &&
- type != CSeqFeatData::e_Rna &&
- type != CSeqFeatData::e_Cdregion) {
- continue;
- }
- }
-
- CSeq_feat_Handle hdl = mf.GetSeq_feat_Handle();
-
- CRef<CFeatureIndex> sfx(new CFeatureIndex(hdl, mf, *this));
- m_SfxList.push_back(sfx);
-
- ft->AddFeature(mf);
-
- // CFeatureIndex from CMappedFeat for use with GetBestGene
- m_FeatIndexMap[mf] = sfx;
+ }
- // set specific flags for various feature types
- CSeqFeatData::E_Choice type = sfx->GetType();
- CSeqFeatData::ESubtype subtype = sfx->GetSubtype();
+ } else {
+ sel.ExcludeNamedAnnotAccession("SNP");
+ }
- if (type == CSeqFeatData::e_Biosrc) {
- m_HasSource = true;
- if (! m_BioSource) {
- if (! mf.IsSetData ()) continue;
- const CSeqFeatData& sfdata = mf.GetData();
- const CBioSource& biosrc = sfdata.GetBiosrc();
- m_BioSource.Reset (&biosrc);
- }
- continue;
- }
+ if ( cddOK ) {
+ sel.IncludeNamedAnnotAccession("CDD");
+ } else {
+ sel.ExcludeNamedAnnotAccession("CDD");
+ }
- if (type == CSeqFeatData::e_Gene) {
- m_HasGene = true;
- if (m_HasMultiIntervalGenes) {
- continue;
- }
- const CSeq_loc& loc = mf.GetLocation ();
- switch (loc.Which()) {
- case CSeq_loc::e_Packed_int:
- case CSeq_loc::e_Packed_pnt:
- case CSeq_loc::e_Mix:
- case CSeq_loc::e_Equiv:
- m_HasMultiIntervalGenes = true;
- break;
- default:
- break;
- }
- continue;
- }
+ CWeakRef<CSeqMasterIndex> idx = GetSeqMasterIndex();
+ auto idxl = idx.Lock();
+ if (idxl) {
+ int featDepth = idxl->GetFeatDepth();
+ if (featDepth > 0) {
+ sel.SetResolveDepth(featDepth);
+ }
+ }
- if (subtype == CSeqFeatData::eSubtype_operon) {
- idxl->SetHasOperon(true);
- continue;
- }
+ // bit flags exclude specific features
+ // source features are collected elsewhere
+ sel.ExcludeFeatType(CSeqFeatData::e_Biosrc);
+ // pub features are used in the REFERENCES section
+ sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_pub);
+ // some feature types are always excluded (deprecated?)
+ // sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue)
+ sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite)
+ .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq);
+ // exclude other types based on user flags
+ if ((flags & CSeqEntryIndex::fHideImpFeats) != 0) {
+ sel.ExcludeFeatType(CSeqFeatData::e_Imp);
+ }
+ if ((flags & CSeqEntryIndex::fHideSTSFeats) != 0) {
+ sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_STS);
+ }
+ if ((flags & CSeqEntryIndex::fHideExonFeats) != 0) {
+ sel.ExcludeNamedAnnots("Exon");
+ sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon);
+ }
+ if ((flags & CSeqEntryIndex::fHideIntronFeats) != 0) {
+ sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron);
+ }
+ if ((flags & CSeqEntryIndex::fHideMiscFeats) != 0) {
+ sel.ExcludeFeatType(CSeqFeatData::e_Site);
+ sel.ExcludeFeatType(CSeqFeatData::e_Bond);
+ sel.ExcludeFeatType(CSeqFeatData::e_Region);
+ sel.ExcludeFeatType(CSeqFeatData::e_Comment);
+ sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature);
+ sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_preprotein);
+ }
+ if ((flags & CSeqEntryIndex::fHideGapFeats) != 0) {
+ sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_gap);
+ sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_assembly_gap);
+ }
- if (type == CSeqFeatData::e_Prot && IsAA()) {
- if (! mf.IsSetData ()) continue;
- const CSeqFeatData& sfdata = mf.GetData();
- const CProt_ref& prp = sfdata.GetProt();
- processed = CProt_ref::eProcessed_not_set;
- if (prp.IsSetProcessed()) {
- processed = prp.GetProcessed();
- }
- const CSeq_loc& loc = mf.GetLocation ();
- TSeqPos prot_length = sequence::GetLength(loc, m_Scope);
- if (prot_length > longest) {
- m_BestProtFeatInitialized = true;
- m_BestProteinFeature = sfx;
- longest = prot_length;
- bestprocessed = processed;
- } else if (prot_length == longest) {
- // unprocessed 0 > preprotein 1 > mat peptide 2
- if (processed < bestprocessed) {
- m_BestProtFeatInitialized = true;
- m_BestProteinFeature = sfx;
- longest = prot_length;
- bestprocessed = processed;
- }
- }
- continue;
- }
+ // additional common settings
+ sel.SetFeatComparator(new feature::CFeatComparatorByLabel);
- if (type == CSeqFeatData::e_Cdregion && IsNA()) {
- } else if (type == CSeqFeatData::e_Rna && IsNA()) {
- } else if (type == CSeqFeatData::e_Prot && IsAA()) {
- } else {
- continue;
- }
+ // limit exploration of far deltas with no features to avoid timeout
+ sel.SetMaxSearchSegments(500);
+ sel.SetMaxSearchSegmentsAction(SAnnotSelector::eMaxSearchSegmentsSilent);
+ sel.SetMaxSearchTime(25);
- // index feature for (local) product Bioseq (CDS -> protein, mRNA -> cDNA, or Prot -> peptide)
- CSeq_id_Handle idh = mf.GetProductId();
- if (idh) {
- string str = idh.AsString();
- CRef<CBioseqIndex> bsxp = idxl->GetBioseqIndex(str);
- if (bsxp) {
- bsxp->m_FeatForProdInitialized = true;
- bsxp->m_FeatureForProduct = sfx;
- }
- }
- }
- }
- }
- catch (CException& e) {
- m_FetchFailure = true;
- LOG_POST_X(6, Error << "Error in CBioseqIndex::x_InitFeats: " << e.what());
- }
+ // request exception to capture fetch failure
+ sel.SetFailUnresolved();
}
-// Feature collection (delayed until needed)
-void CBioseqIndex::x_InitFeatsByLoc (const CSeq_loc& slp)
+// Feature collection common implementation method (delayed until needed)
+void CBioseqIndex::x_InitFeats (CSeq_loc* slpp)
{
try {
SAnnotSelector sel;
- if (m_Policy != CSeqEntryIndex::eExternal) {
- // unless explicitly desired, exclude external annots - need explicit show flags
- if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) {
- sel.ExcludeNamedAnnots("SNP");
- }
- if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) {
- sel.ExcludeNamedAnnots("CDD");
- }
- sel.ExcludeNamedAnnots("STS");
- }
-
- if (m_Policy == CSeqEntryIndex::eExhaustive) {
-
- sel.SetResolveAll();
- // experimental flag forces collection of features from all levels
- sel.SetResolveDepth(kMax_Int);
- // also ignores RefSeq/INSD barrier, far fetch policy user object
-
- } else if (m_Policy == CSeqEntryIndex::eExternal) {
-
- // same as eAdaptive, except also allows external annots
- sel.SetResolveAll();
- sel.SetAdaptiveDepth(true);
- // needs to be here
- sel.AddUnnamedAnnots();
- // allow external SNPs
- if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
- sel.IncludeNamedAnnotAccession("SNP");
- sel.AddNamedAnnots("SNP");
- }
- if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
- sel.IncludeNamedAnnotAccession("CDD");
- sel.AddNamedAnnots("CDD");
- }
- m_Scope->SetKeepExternalAnnotsForEdit();
- // obey flag to hide CDD features by default in the web display
- if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) != 0) {
- sel.ExcludeNamedAnnots("CDD");
- }
-
- } else if (m_Policy == CSeqEntryIndex::eInternal || m_ForceOnlyNearFeats) {
-
- // do not fetch features from underlying sequence component records
- if (m_Surrogate) {
- // delta with sublocation needs to map features from original Bioseq
- sel.SetResolveAll();
- sel.SetResolveDepth(1);
- sel.SetExcludeExternal();
- } else {
- // otherwise limit collection to local records in top-level Seq-entry
- sel.SetResolveDepth(0);
- sel.SetExcludeExternal();
- }
-
- } else if (m_Depth > -1) {
-
- sel.SetResolveAll();
- // explicit depth setting overrides adaptive depth (probably only needed for debugging)
- sel.SetResolveDepth(m_Depth);
-
- } else if (m_Policy == CSeqEntryIndex::eAdaptive) {
-
- sel.SetResolveAll();
- // normal situation uses adaptive depth for feature collection,
- // includes barrier between RefSeq and INSD accession types
- sel.SetAdaptiveDepth(true);
-
- // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot
- // but commenting it out allows external variations in NG_008330 to override internal gene, mRNA, CDS, and exon features
- sel.AddUnnamedAnnots();
-
- // allow external SNPs - testing for now, probably needs to be in external policy
- if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
- sel.IncludeNamedAnnotAccession("SNP");
- sel.AddNamedAnnots("SNP");
- }
- if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
- sel.IncludeNamedAnnotAccession("CDD");
- sel.AddNamedAnnots("CDD");
- }
- m_Scope->SetKeepExternalAnnotsForEdit();
-
- } else if (m_Policy == CSeqEntryIndex::eIncremental) {
-
- // do not fetch features from underlying sequence component records
- if (m_Surrogate) {
- // delta with sublocation needs to map features from original Bioseq
- sel.SetResolveAll();
- sel.SetResolveDepth(1);
- sel.SetExcludeExternal();
- } else {
- // otherwise limit collection to local records in top-level Seq-entry
- sel.SetResolveAll();
- sel.SetResolveDepth(0);
- sel.SetExcludeExternal();
- }
-
- /*
- sel.SetResolveAll();
- // flatfile generator now needs to do its own exploration of far delta components
- // and needs to implement barrier between RefSeq and INSD accession types
- sel.SetResolveDepth(1);
-
- // calling AddUnnamedAnnots once again suppresses tRNA features in a ("tRNAscan-SE") named annot
- // sel.AddUnnamedAnnots();
-
- // allow external SNPs - testing for now, probably needs to be in external policy
- if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowSNPFeats) != 0) {
- sel.IncludeNamedAnnotAccession("SNP");
- sel.AddNamedAnnots("SNP");
- }
- if ((m_Flags & CSeqEntryIndex::fHideCDDFeats) == 0 && (m_Flags & CSeqEntryIndex::fShowCDDFeats) != 0) {
- sel.IncludeNamedAnnotAccession("CDD");
- sel.AddNamedAnnots("CDD");
- }
- m_Scope->SetKeepExternalAnnotsForEdit();
- */
- }
-
- // bit flags exclude specific features
- if ((m_Flags & CSeqEntryIndex::fHideImpFeats) != 0) {
- sel.ExcludeFeatType(CSeqFeatData::e_Imp);
- }
- if ((m_Flags & CSeqEntryIndex::fHideSNPFeats) != 0) {
- sel.ExcludeFeatType(CSeqFeatData::e_Variation);
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_variation);
- }
- if ((m_Flags & CSeqEntryIndex::fHideSTSFeats) != 0) {
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_STS);
- }
- if ((m_Flags & CSeqEntryIndex::fHideExonFeats) != 0) {
- sel.ExcludeNamedAnnots("Exon");
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon);
- }
- if ((m_Flags & CSeqEntryIndex::fHideIntronFeats) != 0) {
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron);
- }
- if ((m_Flags & CSeqEntryIndex::fHideMiscFeats) != 0) {
- sel.ExcludeFeatType(CSeqFeatData::e_Site);
- sel.ExcludeFeatType(CSeqFeatData::e_Bond);
- sel.ExcludeFeatType(CSeqFeatData::e_Region);
- sel.ExcludeFeatType(CSeqFeatData::e_Comment);
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature);
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_preprotein);
- }
- if ((m_Flags & CSeqEntryIndex::fHideGapFeats) != 0) {
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_gap);
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_assembly_gap);
- }
-
- // additional common settings
- sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue)
- .ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite)
- .ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq);
-
- sel.SetFeatComparator(new feature::CFeatComparatorByLabel);
-
- // request exception to capture fetch failure
- sel.SetFailUnresolved();
+ x_DefaultSelector(sel, m_Policy, m_Flags, m_ForceOnlyNearFeats, *m_Scope);
bool onlyGeneRNACDS = false;
if ((m_Flags & CSeqEntryIndex::fGeneRNACDSOnly) != 0) {
CProt_ref::EProcessed bestprocessed = CProt_ref::eProcessed_not_set;
CProt_ref::EProcessed processed;
- // next gap
- CGapIndex* sgx = NULL;
- if (m_GapList.size() > 0) {
- sgx = m_GapList[0];
- }
-
CWeakRef<CSeqMasterIndex> idx = GetSeqMasterIndex();
auto idxl = idx.Lock();
if (idxl) {
// start collection over on each segment
m_SfxList.clear();
+ // iterate features on Bioseq or sublocation
+ CFeat_CI feat_it;
+ CRef<CSeq_loc_Mapper> slice_mapper;
+ if (slpp == 0) {
+ feat_it = CFeat_CI(m_Bsh, sel);
+ } else {
+ SAnnotSelector sel_cpy = sel;
+ sel_cpy.SetIgnoreStrand();
+ /*
+ if (selp->IsSetStrand() && selp->GetStrand() == eNa_strand_minus) {
+ sel_cpy.SetSortOrder(SAnnotSelector::eSortOrder_Reverse);
+ }
+ */
+ CConstRef<CSeq_id> bsid = m_Bsh.GetSeqId();
+ if (bsid) {
+ SetDiagFilter(eDiagFilter_All, "!(1305.28,31)");
+ CSeq_id seq_id;
+ seq_id.Assign( *bsid );
+ CSeq_loc old_loc;
+ old_loc.SetInt().SetId( seq_id );
+ old_loc.SetInt().SetFrom( 0 );
+ old_loc.SetInt().SetTo( m_Length - 1 );
+ slice_mapper = new CSeq_loc_Mapper( *slpp, old_loc, m_Scope );
+ slice_mapper->SetFuzzOption( CSeq_loc_Mapper::fFuzzOption_RemoveLimTlOrTr );
+ slice_mapper->TruncateNonmappingRanges();
+ SetDiagFilter(eDiagFilter_All, "");
+ }
+ feat_it = CFeat_CI(*m_Scope, *slpp, sel_cpy);
+ }
+
// iterate features on Bioseq
- for (CFeat_CI feat_it(*m_Scope, slp, sel); feat_it; ++feat_it) {
+ for (; feat_it; ++feat_it) {
const CMappedFeat mf = *feat_it;
+ const CSeqFeatData& data = mf.GetData();
+ CSeqFeatData::E_Choice typ = data.Which();
if (onlyGeneRNACDS) {
- const CSeqFeatData& data = mf.GetData();
- CSeqFeatData::E_Choice type = data.Which();
- if (type != CSeqFeatData::e_Gene &&
- type != CSeqFeatData::e_Rna &&
- type != CSeqFeatData::e_Cdregion) {
+ if (typ != CSeqFeatData::e_Gene &&
+ typ != CSeqFeatData::e_Rna &&
+ typ != CSeqFeatData::e_Cdregion) {
continue;
}
}
CSeq_feat_Handle hdl = mf.GetSeq_feat_Handle();
- CRef<CFeatureIndex> sfx(new CFeatureIndex(hdl, mf, *this));
+ CConstRef<CSeq_loc> feat_loc(&mf.GetLocation());
+ if (slpp) {
+ feat_loc.Reset( slice_mapper->Map( mf.GetLocation() ) );
+ }
+
+ CRef<CFeatureIndex> sfx(new CFeatureIndex(hdl, mf, feat_loc, *this));
m_SfxList.push_back(sfx);
ft->AddFeature(mf);
}
catch (CException& e) {
m_FetchFailure = true;
- LOG_POST_X(6, Error << "Error in CBioseqIndex::x_InitFeatsByLoc: " << e.what());
+ LOG_POST_X(6, Error << "Error in CBioseqIndex::x_InitFeats: " << e.what());
}
}
+// Feature collection methods (delayed until needed)
+void CBioseqIndex::x_InitFeats (void)
+
+{
+ x_InitFeats(0);
+}
+
+void CBioseqIndex::x_InitFeats (CSeq_loc& slp)
+
+{
+ x_InitFeats(&slp);
+}
+
// GetFeatureForProduct allows hypothetical protein defline generator to obtain gene locus tag
CRef<CFeatureIndex> CBioseqIndex::GetFeatureForProduct (void)
return m_Lineage;
}
-int CBioseqIndex::GetTaxid (void)
+TTaxId CBioseqIndex::GetTaxid (void)
{
if (! m_SourcesInitialized) {
bool isUnknownLength,
bool isAssemblyGap,
CBioseqIndex& bsx)
- : m_Start(start),
+ : m_Bsx(&bsx),
+ m_Start(start),
m_End(end),
m_Length(length),
m_GapType(type),
m_GapEvidence(evidence),
m_IsUnknownLength(isUnknownLength),
- m_IsAssemblyGap(isAssemblyGap),
- m_Bsx(&bsx)
+ m_IsAssemblyGap(isAssemblyGap)
{
}
// Constructor
CFeatureIndex::CFeatureIndex (CSeq_feat_Handle sfh,
const CMappedFeat mf,
+ CConstRef<CSeq_loc> feat_loc,
CBioseqIndex& bsx)
: m_Sfh(sfh),
m_Mf(mf),
const CSeqFeatData& data = m_Mf.GetData();
m_Type = data.Which();
m_Subtype = data.GetSubtype();
- const CSeq_feat& mpd = m_Mf.GetMappedFeature();
- CConstRef<CSeq_loc> fl(&mpd.GetLocation());
- m_Fl = fl;
- m_Start = fl->GetStart(eExtreme_Positional);
- m_End = fl->GetStop(eExtreme_Positional);
+ m_Fl = feat_loc;
+ m_Start = m_Fl->GetStart(eExtreme_Positional);
+ m_End = m_Fl->GetStop(eExtreme_Positional);
}
// Find CFeatureIndex object for best gene using internal CFeatTree
#############################################################################
-# $Id: CMakeLists.txt 593577 2019-09-20 12:22:42Z gouriano $
+# $Id: CMakeLists.txt 612973 2020-07-30 19:13:00Z ivanov $
#############################################################################
NCBI_add_subdirectory(
alnmgr cddalignview test manip cleanup format edit validator
asniotest align seqmasks_io eutils
align_format snputil uudutil variation writers pubseq_gateway
- logging import
+ logging import flatfile
)
-# $Id: Makefile.in 586035 2019-05-08 18:29:07Z vakatov $
+# $Id: Makefile.in 612973 2020-07-30 19:13:00Z ivanov $
# Meta-makefile("objtools" project)
#################################
alnmgr cddalignview test manip edit cleanup format validator \
asniotest align seqmasks_io eutils \
align_format snputil uudutil variation writers \
- import
+ import flatfile
srcdir = @srcdir@
include @builddir@/Makefile.meta
-/* $Id: alnvec.cpp 577167 2018-12-31 20:16:49Z dicuccio $
+/* $Id: alnvec.cpp 608806 2020-05-21 14:51:55Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
const bool record_coords = scrn_width && scrn_lefts && scrn_rights;
// allocate space for the row
- char* c_buff = new char[aln_len + 1];
- char* c_buff_ptr = c_buff;
+ buffer.clear();
+ buffer.reserve(aln_len);
string buff;
const TNumseg& left_seg = x_GetSeqLeftSeg(row);
// add regular sequence to buffer
GetSeqString(buff, row, start, stop);
TSeqPos buf_len = min<TSeqPos>(buff.size(), seg_len);
- memcpy(c_buff_ptr, buff.c_str(), buf_len);
- c_buff_ptr += buf_len;
+ buffer += buff;
if (buf_len < seg_len) {
// Not enough chars in the sequence, add gap
buf_len = seg_len - buf_len;
- char* ch_buff = new char[buf_len + 1];
char fill_ch;
if (seg < left_seg || seg > right_seg) {
fill_ch = GetGapChar(row);
}
- memset(ch_buff, fill_ch, buf_len);
- ch_buff[buf_len] = 0;
- memcpy(c_buff_ptr, ch_buff, buf_len);
- c_buff_ptr += buf_len;
- delete[] ch_buff;
+ for (size_t i = 0; i < buf_len; ++i) {
+ buffer += fill_ch;
+ }
}
// take care of coords if necessary
} else {
// add appropriate number of gap/end chars
- char* ch_buff = new char[seg_len + 1];
char fill_ch;
if (seg < left_seg || seg > right_seg) {
fill_ch = GetGapChar(row);
}
- memset(ch_buff, fill_ch, seg_len);
- ch_buff[seg_len] = 0;
- memcpy(c_buff_ptr, ch_buff, seg_len);
- c_buff_ptr += seg_len;
- delete[] ch_buff;
+ for (size_t i = 0; i < seg_len; ++i) {
+ buffer += fill_ch;
+ }
}
aln_pos += len;
}
}
}
}
- c_buff[aln_len] = '\0';
- buffer = c_buff;
- delete [] c_buff;
return buffer;
}
-/* $Id: blastdb_dataextract.cpp 591961 2019-08-23 13:08:25Z madden $
+/* $Id: blastdb_dataextract.cpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
if (seqid->IsGi()) {
if (seqid->GetGi() == m_Gi) {
ITERATE(CBlast_def_line::TLinks, links_int, (*itr)->GetLinks()) {
- retval += NStr::IntToString(*links_int) + SEPARATOR;
+ retval += NStr::NumericToString(*links_int) + SEPARATOR;
}
break;
}
} else {
ITERATE(CBlast_def_line::TLinks, links_int, (*itr)->GetLinks()) {
- retval += NStr::IntToString(*links_int) + SEPARATOR;
+ retval += NStr::NumericToString(*links_int) + SEPARATOR;
}
}
}
}
string CBlastDBExtractor::ExtractTaxId() {
- return NStr::IntToString(x_ExtractTaxId());
+ return NStr::NumericToString(x_ExtractTaxId());
}
string CBlastDBExtractor::ExtractLeafTaxIds() {
- set<int> taxids;
+ set<TTaxId> taxids;
x_ExtractLeafTaxIds(taxids);
if (taxids.empty()) {
return ExtractTaxId();
}
string retval;
- ITERATE(set<int>, taxids_iter, taxids) {
+ ITERATE(set<TTaxId>, taxids_iter, taxids) {
if (retval.empty()) {
- retval = NStr::IntToString(*taxids_iter);
+ retval = NStr::NumericToString(*taxids_iter);
} else {
- retval += SEPARATOR + NStr::IntToString(*taxids_iter);
+ retval += SEPARATOR + NStr::NumericToString(*taxids_iter);
}
}
return retval;
}
string CBlastDBExtractor::ExtractCommonTaxonomicName() {
- const int kTaxID = x_ExtractTaxId();
+ const TTaxId kTaxID = x_ExtractTaxId();
SSeqDBTaxInfo tax_info;
string retval(NOT_AVAILABLE);
try {
}
string CBlastDBExtractor::ExtractLeafCommonTaxonomicNames() {
- set<int> taxids;
+ set<TTaxId> taxids;
x_ExtractLeafTaxIds(taxids);
SSeqDBTaxInfo tax_info;
string retval;
- ITERATE(set<int>, taxid_iter, taxids) {
- const int kTaxID = *taxid_iter;
+ ITERATE(set<TTaxId>, taxid_iter, taxids) {
+ const TTaxId kTaxID = *taxid_iter;
try {
m_BlastDb.GetTaxInfo(kTaxID, tax_info);
_ASSERT(kTaxID == tax_info.taxid);
}
string CBlastDBExtractor::ExtractScientificName() {
- const int kTaxID = x_ExtractTaxId();
+ const TTaxId kTaxID = x_ExtractTaxId();
SSeqDBTaxInfo tax_info;
string retval(NOT_AVAILABLE);
try {
}
string CBlastDBExtractor::ExtractLeafScientificNames() {
- set<int> taxids;
+ set<TTaxId> taxids;
x_ExtractLeafTaxIds(taxids);
SSeqDBTaxInfo tax_info;
string retval;
- ITERATE(set<int>, taxid_iter, taxids) {
- const int kTaxID = *taxid_iter;
+ ITERATE(set<TTaxId>, taxid_iter, taxids) {
+ const TTaxId kTaxID = *taxid_iter;
try {
m_BlastDb.GetTaxInfo(kTaxID, tax_info);
_ASSERT(kTaxID == tax_info.taxid);
}
string CBlastDBExtractor::ExtractBlastName() {
- const int kTaxID = x_ExtractTaxId();
+ const TTaxId kTaxID = x_ExtractTaxId();
SSeqDBTaxInfo tax_info;
string retval(NOT_AVAILABLE);
try {
//}
string CBlastDBExtractor::ExtractSuperKingdom() {
- const int kTaxID = x_ExtractTaxId();
+ const TTaxId kTaxID = x_ExtractTaxId();
SSeqDBTaxInfo tax_info;
string retval(NOT_AVAILABLE);
try {
return out.str();
}
-int CBlastDBExtractor::x_ExtractTaxId()
+TTaxId CBlastDBExtractor::x_ExtractTaxId()
{
x_SetGi();
return m_Gi2TaxidMap.second[m_Gi];
}
// for database without Gi:
- vector<int> taxid;
+ vector<TTaxId> taxid;
m_BlastDb.GetTaxIDs(m_Oid, taxid);
- return taxid.size() ? taxid[0] : 0;
+ return taxid.size() ? taxid[0] : ZERO_TAX_ID;
}
-void CBlastDBExtractor::x_ExtractLeafTaxIds(set<int>& taxids)
+void CBlastDBExtractor::x_ExtractLeafTaxIds(set<TTaxId>& taxids)
{
x_SetGi();
m_BlastDb.GetLeafTaxIDs(m_Oid, m_Gi2TaxidSetMap.second);
}
taxids.clear();
- const set<int>& taxid_set = m_Gi2TaxidSetMap.second[m_Gi];
+ const set<TTaxId>& taxid_set = m_Gi2TaxidSetMap.second[m_Gi];
taxids.insert(taxid_set.begin(), taxid_set.end());
return;
}
// for database without Gi:
- vector<int> taxid;
+ vector<TTaxId> taxid;
m_BlastDb.GetLeafTaxIDs(m_Oid, taxid);
taxids.clear();
taxids.insert(taxid.begin(), taxid.end());
}
}
if ((fields.tax_id == 1) || (fields.tax_names == 1)) {
- unsigned int tax_id = 0;
+ TTaxId tax_id = ZERO_TAX_ID;
if (dl.IsSetTaxid()) {
tax_id = dl.GetTaxid();
}
}
if ((fields.leaf_node_tax_ids == 1) || (fields.leaf_node_tax_names == 1)) {
- set<int> tax_id_set = dl.GetLeafTaxIds();
+ set<TTaxId> tax_id_set = dl.GetLeafTaxIds();
if (tax_id_set.empty()) {
if (dl.IsSetTaxid()) {
tax_id_set.insert(dl.GetTaxid());
}
else {
- tax_id_set.insert(0);
+ tax_id_set.insert(ZERO_TAX_ID);
}
}
string separator = kEmptyStr;
- ITERATE(set<int>, itr, tax_id_set) {
+ ITERATE(set<TTaxId>, itr, tax_id_set) {
if (fields.leaf_node_tax_names == 1) {
try {
SSeqDBTaxInfo taxinfo;
if(fields.links == 1) {
if (dl.IsSetLinks()) {
ITERATE(CBlast_def_line::TLinks, links_int, dl.GetLinks()) {
- results[CBlastDeflineUtil::links] += NStr::IntToString(*links_int) + SEPARATOR;
+ results[CBlastDeflineUtil::links] += NStr::NumericToString(*links_int) + SEPARATOR;
}
}
else {
-/* $Id: seqdb.cpp 605340 2020-04-09 16:06:43Z ivanov $
+/* $Id: seqdb.cpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
m_Impl->AccessionsToOids(accs, oids);
}
-void CSeqDB::TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const
+void CSeqDB::TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const
{
m_Impl->TaxIdsToOids(tax_ids, rv);
}
-void CSeqDB::GetDBTaxIds(set<Int4> & tax_ids) const
+void CSeqDB::GetDBTaxIds(set<TTaxId> & tax_ids) const
{
m_Impl->GetDBTaxIds(tax_ids);
}
}
void CSeqDB::GetTaxIDs(int oid,
- map<TGi, int> & gi_to_taxid,
+ map<TGi, TTaxId> & gi_to_taxid,
bool persist) const
{
////m_Impl->Verify();
- typedef map<TGi, int> TmpMap;
+ typedef map<TGi, TTaxId> TmpMap;
TmpMap gi_to_taxid_tmp;
m_Impl->GetTaxIDs(oid, gi_to_taxid_tmp, persist);
if ( !persist ) {
}
void CSeqDB::GetTaxIDs(int oid,
- vector<int> & taxids,
+ vector<TTaxId> & taxids,
bool persist) const
{
////m_Impl->Verify();
}
void CSeqDB::GetAllTaxIDs(int oid,
- set<int> & taxids) const
+ set<TTaxId> & taxids) const
{
m_Impl->GetAllTaxIDs(oid, taxids);
}
void CSeqDB::GetLeafTaxIDs(
int oid,
- map<TGi, set<int> >& gi_to_taxid_set,
+ map<TGi, set<TTaxId> >& gi_to_taxid_set,
bool persist
) const
{
////m_Impl->Verify();
- typedef map<TGi, set<int> > TmpMap;
+ typedef map<TGi, set<TTaxId> > TmpMap;
TmpMap gi_to_taxid_set_tmp;
m_Impl->GetLeafTaxIDs(oid, gi_to_taxid_set_tmp, persist);
if ( !persist ) {
void CSeqDB::GetLeafTaxIDs(
int oid,
- vector<int>& taxids,
+ vector<TTaxId>& taxids,
bool persist
) const
{
////m_Impl->Verify();
}
-void CSeqDB::GetTaxInfo(int taxid, SSeqDBTaxInfo & info)
+void CSeqDB::GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info)
{
CSeqDBImpl::GetTaxInfo(taxid, info);
}
-/* $Id: seqdb_lmdb.cpp 595902 2019-10-29 17:32:09Z fongah2 $
+/* $Id: seqdb_lmdb.cpp 616872 2020-09-22 13:14:27Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
CBlastLMDBManager::CBlastEnv::CBlastEnv(const string & fname, ELMDBFileType file_type, bool read_only, Uint8 map_size) :
- m_Filename(fname), m_FileType(file_type),m_Env(lmdb::env::create()), m_Count(1), m_ReadOnly(read_only), m_MapSize(map_size)
+ m_Filename(fname), m_FileType(file_type),m_Env(lmdb::env::create()), m_Count(1), m_ReadOnly(read_only)
{
const MDB_dbi num_db(3);
m_Env.set_max_dbs(num_db);
m_dbis.resize(eDbiMax, UINT_MAX);
if(m_ReadOnly) {
CFile tf(fname);
- m_MapSize = (tf.GetLength()/10000 + 1) *10000;
- m_Env.set_mapsize(m_MapSize);
+ Uint8 readMapSize = (tf.GetLength()/10000 + 1) *10000;
+ m_Env.set_mapsize(readMapSize);
m_Env.open(m_Filename.c_str(), MDB_NOSUBDIR|MDB_NOLOCK|MDB_RDONLY, 0664);
InitDbi(m_Env,file_type);
}
else {
+ LOG_POST(Info <<"Initial Map Size: " << map_size);
/// map_size 0 means use lmdb default
- if(m_MapSize != 0) {
- m_Env.set_mapsize(m_MapSize);
+ if(map_size != 0) {
+ m_Env.set_mapsize(map_size);
}
m_Env.open(m_Filename.c_str(), MDB_NOSUBDIR , 0664);
}
return m_dbis[dbi_type];
}
+void CBlastLMDBManager::CBlastEnv::SetMapSize(Uint8 map_size)
+{
+ if(!m_ReadOnly) {
+ m_Env.set_mapsize(map_size);
+ }
+}
+
CBlastLMDBManager & CBlastLMDBManager::GetInstance() {
static CSafeStatic<CBlastLMDBManager> lmdb_manager;
return lmdb_manager.Get();
db_volname = p->GetDbi(CBlastEnv::eDbiVolname);
return p->GetEnv();
}
-lmdb::env & CBlastLMDBManager::GetReadEnvAcc(const string & fname, MDB_dbi & db_acc)
+lmdb::env & CBlastLMDBManager::GetReadEnvAcc(const string & fname, MDB_dbi & db_acc, bool* opened)
{
- CBlastEnv* p = GetBlastEnv(fname, eLMDB);
+ CBlastEnv* p = GetBlastEnv(fname, eLMDB, opened);
db_acc = p->GetDbi(CBlastEnv::eDbiAcc2oid);
return p->GetEnv();
}
-lmdb::env & CBlastLMDBManager::GetReadEnvTax(const string & fname, MDB_dbi & db_tax)
+lmdb::env & CBlastLMDBManager::GetReadEnvTax(const string & fname, MDB_dbi & db_tax, bool* opened)
{
- CBlastEnv* p = GetBlastEnv(fname, eTaxId2Offsets);
+ CBlastEnv* p = GetBlastEnv(fname, eTaxId2Offsets, opened);
db_tax = p->GetDbi(CBlastEnv::eDbiTaxid2offset);
return p->GetEnv();
}
-CBlastLMDBManager::CBlastEnv* CBlastLMDBManager::GetBlastEnv(const string & fname, ELMDBFileType file_type)
+CBlastLMDBManager::CBlastEnv* CBlastLMDBManager::GetBlastEnv(const string & fname,
+ ELMDBFileType file_type,
+ bool* opened)
{
CFastMutexGuard guard(m_Mutex);
NON_CONST_ITERATE(list <CBlastEnv* >, itr, m_EnvList) {
if((*itr)->GetFilename() == fname) {
(*itr)->AddReference();
+ if ( opened && !*opened ) {
+ (*itr)->AddReference();
+ *opened = true;
+ }
return (*itr);
}
}
CBlastEnv * p (new CBlastEnv(fname, file_type));
m_EnvList.push_back(p);
+ if ( opened && !*opened ) {
+ p->AddReference();
+ *opened = true;
+ }
return p;
}
m_Oid2SeqIdsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eOid2SeqIds)),
m_Oid2TaxIdsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eOid2TaxIds)),
m_TaxId2OidsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eTaxId2Oids)),
- m_TaxId2OffsetsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eTaxId2Offsets))
+ m_TaxId2OffsetsFile(GetFileNameFromExistingLMDBFile(fname, ELMDBFileType::eTaxId2Offsets)),
+ m_LMDBFileOpened(false)
+{
+}
+CSeqDBLMDB::~CSeqDBLMDB()
{
+ if ( m_LMDBFileOpened ) {
+ CBlastLMDBManager::GetInstance().CloseEnv(m_LMDBFile);
+ m_LMDBFileOpened = false;
+ }
}
void
oids.clear();
{
MDB_dbi dbi_handle;
- lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle);
+ lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle, &m_LMDBFileOpened);
lmdb::dbi dbi(dbi_handle);
auto txn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
auto cursor = lmdb::cursor::open(txn, dbi);
oids.resize(accessions.size(), kSeqDBEntryNotFound);
MDB_dbi dbi_handle;
- lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle);
+ lmdb::env & env = CBlastLMDBManager::GetInstance().GetReadEnvAcc(m_LMDBFile, dbi_handle, &m_LMDBFileOpened);
{
lmdb::dbi dbi(dbi_handle);
auto txn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
}
-void CSeqDBLMDB::GetDBTaxIds(vector<Int4> & tax_ids) const
+void CSeqDBLMDB::GetDBTaxIds(vector<TTaxId> & tax_ids) const
{
tax_ids.clear();
auto cursor = lmdb::cursor::open(txn, dbi);
lmdb::val key;
while (cursor.get(key, MDB_NEXT)) {
- Int4 taxid = *((Int4 *)key.data());
+ TTaxId taxid = TAX_ID_FROM(Int4, *((Int4 *)key.data()));
tax_ids.push_back(taxid);
}
cursor.close();
NCBI_THROW( CSeqDBException, eArgErr, "Taxonomy Id to Oids lookup error in " + dbname);
}
}
+ CBlastLMDBManager::GetInstance().CloseEnv(m_TaxId2OffsetsFile);
}
-void CSeqDBLMDB::GetOidsForTaxIds(const set<Int4> & tax_ids, vector<blastdb::TOid>& oids, vector<Int4> & tax_ids_found) const
+void CSeqDBLMDB::GetOidsForTaxIds(const set<TTaxId> & tax_ids, vector<blastdb::TOid>& oids, vector<TTaxId> & tax_ids_found) const
{
try {
auto txn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
lmdb::dbi dbi(dbi_handle);
auto cursor = lmdb::cursor::open(txn, dbi);
- ITERATE(set<Int4>, itr, tax_ids) {
- Int4 tax_id = *itr;
+ ITERATE(set<TTaxId>, itr, tax_ids) {
+ Int4 tax_id = TAX_ID_TO(Int4, *itr);
lmdb::val data2find(tax_id);
if (cursor.get(data2find, MDB_SET)) {
m_DataStart += (2* (num_of_oids + 1));
}
- inline void GetTaxIdListForOid(blastdb::TOid oid, vector<Int4> & taxid_list);
+ inline void GetTaxIdListForOid(blastdb::TOid oid, vector<TTaxId> & taxid_list);
private:
Uint8 * m_IndexStart;
Int4 * m_DataStart;
};
-void CLookupTaxIds::GetTaxIdListForOid(blastdb::TOid oid, vector<Int4> & taxid_list)
+void CLookupTaxIds::GetTaxIdListForOid(blastdb::TOid oid, vector<TTaxId> & taxid_list)
{
taxid_list.clear();
Uint8 * index_ptr = m_IndexStart + oid;
index_ptr--;
Int4 * begin = (oid == 0) ? m_DataStart:m_DataStart + (*index_ptr);
while (begin < end) {
- taxid_list.push_back(*begin);
+ taxid_list.push_back(TAX_ID_FROM(Int4, *begin));
begin++;
}
}
void
-CSeqDBLMDB::NegativeTaxIdsToOids(const set<Int4>& tax_ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const
+CSeqDBLMDB::NegativeTaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const
{
rv.clear();
vector<blastdb::TOid> oids;
GetOidsForTaxIds(tax_ids, oids, tax_ids_found);
CMemoryFile oid_file(m_Oid2TaxIdsFile);
- set<Int4> tax_id_list(tax_ids.begin(), tax_ids.end());
+ set<TTaxId> tax_id_list(tax_ids.begin(), tax_ids.end());
CLookupTaxIds lookup(oid_file);
for(unsigned int i=0; i < oids.size(); i++) {
- vector<Int4> file_list;
+ vector<TTaxId> file_list;
lookup.GetTaxIdListForOid(oids[i], file_list);
if(file_list.size() > tax_ids.size()) {
continue;
}
}
-void CSeqDBLMDB::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const
+void CSeqDBLMDB::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const
{
CMemoryFile oid_file(m_Oid2TaxIdsFile);
CLookupTaxIds lookup(oid_file);
for(unsigned int i=0; i < oids.size(); i++) {
- vector<Int4> taxid_list;
+ vector<TTaxId> taxid_list;
lookup.GetTaxIdListForOid(oids[i], taxid_list);
tax_ids.insert(taxid_list.begin(), taxid_list.end());
}
-/* $Id: seqdbcommon.cpp 605336 2020-04-09 16:04:52Z ivanov $
+/* $Id: seqdbcommon.cpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
for(Int4 * elem = (bbeginp + 2); elem < bendp; ++elem) {
- taxids.tax_ids.insert(SeqDB_GetStdOrd(elem));
+ taxids.tax_ids.insert(TAX_ID_FROM(Int4, SeqDB_GetStdOrd(elem)));
}
} else {
Int4 elem(0);
if (dig == -1) {
// Skip blank lines or comments by ignoring zero.
if (elem != 0) {
- taxids.tax_ids.insert(elem);
+ taxids.tax_ids.insert(TAX_ID_FROM(Int4, elem));
}
elem = 0;
continue;
-/* $Id: seqdbgilistset.cpp 597735 2019-11-26 17:53:47Z fongah2 $
+/* $Id: seqdbgilistset.cpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
if(user_list->GetNumTaxIds() > 0) {
vector<blastdb::TOid> & oids = user_list->SetOidsForTaxIdsList();
- set<Int4> & tax_ids = user_list->GetTaxIdsList();
+ set<TTaxId> & tax_ids = user_list->GetTaxIdsList();
lmdb_set.TaxIdsToOids(tax_ids, oids);
}
if((user_list->GetNumGis() == 0) && (user_list->GetNumTis() == 0) &&
}
if(m_NegativeList->GetNumTaxIds() > 0) {
vector<blastdb::TOid> & oids = m_NegativeList->SetExcludedOids();
- set<Int4> & tax_ids = m_NegativeList->GetTaxIdsList();
+ set<TTaxId> & tax_ids = m_NegativeList->GetTaxIdsList();
lmdb_set.NegativeTaxIdsToOids(tax_ids, oids);
}
-/* $Id: seqdbimpl.cpp 607218 2020-04-30 18:42:35Z ivanov $
+/* $Id: seqdbimpl.cpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
void CSeqDBImpl::GetTaxIDs(int oid,
- map<TGi, int> & gi_to_taxid,
+ map<TGi, TTaxId> & gi_to_taxid,
bool persist)
{
CSeqDBLockHold locked(m_Atlas);
}
void CSeqDBImpl::GetTaxIDs(int oid,
- vector<int> & taxids,
+ vector<TTaxId> & taxids,
bool persist)
{
CSeqDBLockHold locked(m_Atlas);
}
void CSeqDBImpl::GetAllTaxIDs(int oid,
- set<int> & taxids)
+ set<TTaxId> & taxids)
{
CSeqDBLockHold locked(m_Atlas);
void CSeqDBImpl::GetLeafTaxIDs(
int oid,
- map<TGi, set<int> >& gi_to_taxid_set,
+ map<TGi, set<TTaxId> >& gi_to_taxid_set,
bool persist
)
{
void CSeqDBImpl::GetLeafTaxIDs(
int oid,
- vector<int>& taxids,
+ vector<TTaxId>& taxids,
bool persist
)
{
}
-void CSeqDBImpl::TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv)
+void CSeqDBImpl::TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv)
{
CHECK_MARKER();
rv.clear();
return;
}
-void CSeqDBImpl::GetDBTaxIds(set<Int4> & tax_ids)
+void CSeqDBImpl::GetDBTaxIds(set<TTaxId> & tax_ids)
{
CHECK_MARKER();
CSeqDBLockHold locked(m_Atlas);
}
}
-void CSeqDBImpl::GetTaxInfo(int taxid, SSeqDBTaxInfo & info)
+void CSeqDBImpl::GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info)
{
if (! CSeqDBTaxInfo::GetTaxNames(taxid, info)) {
CNcbiOstrstream oss;
#ifndef OBJTOOLS_READERS_SEQDB__SEQDBIMPL_HPP
#define OBJTOOLS_READERS_SEQDB__SEQDBIMPL_HPP
-/* $Id: seqdbimpl.hpp 605340 2020-04-09 16:06:43Z ivanov $
+/* $Id: seqdbimpl.hpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/// @param persist
/// If false, the map will be cleared before adding new entries.
void GetTaxIDs(int oid,
- map<TGi, int> & gi_to_taxid,
+ map<TGi, TTaxId> & gi_to_taxid,
bool persist);
/// Get taxids for an OID.
/// @param persist
/// If false, the map will be cleared before adding new entries.
void GetTaxIDs(int oid,
- vector<int> & taxids,
+ vector<TTaxId> & taxids,
bool persist);
/// Get gi to taxid map for an OID.
/// If false, the map will be cleared before adding new entries.
void GetLeafTaxIDs(
int oid,
- map<TGi, set<int> >& gi_to_taxid_set,
+ map<TGi, set<TTaxId> >& gi_to_taxid_set,
bool persist
);
/// Get all tax ids (leaf and non-leaf for an oid
void GetAllTaxIDs(int oid,
- set<int> & taxids);
+ set<TTaxId> & taxids);
/// Get gi to taxid map for an OID.
///
/// If false, the map will be cleared before adding new entries.
void GetLeafTaxIDs(
int oid,
- vector<int>& gi_to_taxid_set,
+ vector<TTaxId>& gi_to_taxid_set,
bool persist
);
/// An integer identifying the taxid to fetch.
/// @param info
/// A structure containing taxonomic description strings.
- static void GetTaxInfo(int taxid, SSeqDBTaxInfo & info);
+ static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo & info);
/// Returns the sum of the sequence lengths.
///
/// Get Oid list for input tax ids
/// @param tax_ids taxonomy ids
/// @param rv oids corrpond to tax ids
- void TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv);
+ void TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv);
/// Get all unique tax ids from db
/// @param tax_ids return taxonomy ids in db
- void GetDBTaxIds(set<Int4> & tax_ids);
+ void GetDBTaxIds(set<TTaxId> & tax_ids);
private:
CLASS_MARKER_FIELD("IMPL")
x_AdjustOidsOffset(rv);
}
-void CSeqDBLMDBEntry::TaxIdsToOids(const set<Int4>& tax_ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const
+void CSeqDBLMDBEntry::TaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const
{
m_LMDB->GetOidsForTaxIds(tax_ids, rv, tax_ids_found);
x_AdjustOidsOffset_TaxList(rv);
}
-void CSeqDBLMDBEntry::NegativeTaxIdsToOids(const set<Int4>& tax_ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const
+void CSeqDBLMDBEntry::NegativeTaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const
{
m_LMDB->NegativeTaxIdsToOids(tax_ids, rv, tax_ids_found);
x_AdjustOidsOffset_TaxList(rv);
}
-void CSeqDBLMDBEntry::GetDBTaxIds(vector<Int4> & tax_ids) const
+void CSeqDBLMDBEntry::GetDBTaxIds(vector<TTaxId> & tax_ids) const
{
m_LMDB->GetDBTaxIds(tax_ids);
}
-void CSeqDBLMDBEntry::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const
+void CSeqDBLMDBEntry::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const
{
if(m_isPartial) {
vector<TOid> tmp;
}
-void CSeqDBLMDBSet::TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const
+void CSeqDBLMDBSet::TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const
{
- vector<Int4> tax_ids_found;
- set<Int4> rv_tax_ids;
+ vector<TTaxId> tax_ids_found;
+ set<TTaxId> rv_tax_ids;
m_LMDBEntrySet[0]->TaxIdsToOids(tax_ids, rv, tax_ids_found);
rv_tax_ids.insert(tax_ids_found.begin(), tax_ids_found.end());
for(unsigned int i=1; i < m_LMDBEntrySet.size(); i++) {
tax_ids.swap(rv_tax_ids);
}
-void CSeqDBLMDBSet::NegativeTaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const
+void CSeqDBLMDBSet::NegativeTaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const
{
- vector<Int4> tax_ids_found;
- set<Int4> rv_tax_ids;
+ vector<TTaxId> tax_ids_found;
+ set<TTaxId> rv_tax_ids;
m_LMDBEntrySet[0]->NegativeTaxIdsToOids(tax_ids, rv, tax_ids_found);
rv_tax_ids.insert(tax_ids_found.begin(), tax_ids_found.end());
for(unsigned int i=1; i < m_LMDBEntrySet.size(); i++) {
tax_ids.swap(rv_tax_ids);
}
-void CSeqDBLMDBSet::GetDBTaxIds(set<Int4> & tax_ids) const
+void CSeqDBLMDBSet::GetDBTaxIds(set<TTaxId> & tax_ids) const
{
- vector<Int4> t;
+ vector<TTaxId> t;
m_LMDBEntrySet[0]->GetDBTaxIds(t);
tax_ids.insert(t.begin(), t.end());
for(unsigned int i=1; i < m_LMDBEntrySet.size(); i++) {
}
-void CSeqDBLMDBSet::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const
+void CSeqDBLMDBSet::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const
{
if (m_LMDBEntrySet.size() > 1) {
vector<TOid> t;
for(unsigned int i =0; i < oids.size(); i++){
if (oids[i] >= m_LMDBEntrySet[j]->GetOIDEnd()){
if (t.size() > 0){
- set<Int4> t_set;
+ set<TTaxId> t_set;
m_LMDBEntrySet[j]->GetTaxIdsForOids(t, t_set);
t.clear();
tax_ids.insert(t_set.begin(), t_set.end());
t.push_back(oids[i] - m_LMDBEntrySet[j]->GetOIDStart());
}
if (t.size() > 0){
- set<Int4> t_set;
+ set<TTaxId> t_set;
m_LMDBEntrySet[j]->GetTaxIdsForOids(t, t_set);
tax_ids.insert(t_set.begin(), t_set.end());
}
void NegativeSeqIdsToOids(const vector<string>& ids, vector<blastdb::TOid>& rv) const;
- void TaxIdsToOids(const set<Int4>& tax_ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const;
+ void TaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const;
- void NegativeTaxIdsToOids(const set<Int4>& tax_ids, vector<blastdb::TOid>& rv, vector<Int4> & tax_ids_found) const;
+ void NegativeTaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const;
- void GetDBTaxIds(vector<Int4> & tax_ids) const;
+ void GetDBTaxIds(vector<TTaxId> & tax_ids) const;
- void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const;
+ void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const;
private:
void x_AdjustOidsOffset(vector<TOid> & oids) const;
void NegativeSeqIdsToOids(const vector<string>& ids, vector<blastdb::TOid>& rv) const;
- void TaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const;
+ void TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const;
- void NegativeTaxIdsToOids(set<Int4>& tax_ids, vector<blastdb::TOid>& rv) const;
+ void NegativeTaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const;
- void GetDBTaxIds(set<Int4> & tax_ids) const;
+ void GetDBTaxIds(set<TTaxId> & tax_ids) const;
- void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<Int4> & tax_ids) const;
+ void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const;
private:
vector<CRef<CSeqDBLMDBEntry> > m_LMDBEntrySet;
-/* $Id: seqdboidlist.cpp 579001 2019-01-29 13:54:57Z fongah2 $
+/* $Id: seqdboidlist.cpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
return;
}
- set<Int4> user_taxids;
+ set<TTaxId> user_taxids;
if(!user_list.Empty() && (user_list->GetNumTaxIds() > 0)) {
user_taxids = user_list->GetTaxIdsList();
}
- set<Int4> neg_user_taxids;
+ set<TTaxId> neg_user_taxids;
if(!neg_user_list.Empty() && (neg_user_list->GetNumTaxIds() > 0)) {
neg_user_taxids = neg_user_list->GetTaxIdsList();
}
vector<blastdb::TOid> oids;
CRef<CSeqDBGiList> list(new CSeqDBFileGiList(fnames[k], CSeqDBFileGiList::eTaxIdList));
s_GetFilteredOidRange(volset, fnames_vols[k], excluded_vols, list);
- set<Int4> taxids;
+ set<TTaxId> taxids;
taxids = list->GetTaxIdsList();
if(taxids.size() == 0){
continue;
}
if(user_taxids.size() > 0){
- vector<Int4> common;
+ vector<TTaxId> common;
common.resize(taxids.size());
- vector<Int4>::iterator itr = set_intersection(taxids.begin(), taxids.end(),
+ vector<TTaxId>::iterator itr = set_intersection(taxids.begin(), taxids.end(),
user_taxids.begin(), user_taxids.end(), common.begin());
common.resize(itr-common.begin());
if( common.size() == 0) {
taxids.insert(common.begin(), common.end());
}
if(neg_user_taxids.size() > 0) {
- vector<Int4> difference;
+ vector<TTaxId> difference;
difference.resize(taxids.size());
- vector<Int4>::iterator itr = set_difference(taxids.begin(), taxids.end(),
+ vector<TTaxId>::iterator itr = set_difference(taxids.begin(), taxids.end(),
neg_user_taxids.begin(), neg_user_taxids.end(), difference.begin());
difference.resize(itr-difference.begin());
if(difference.size() == 0){
-/* $Id: seqdbtax.cpp 530943 2017-03-20 12:53:37Z fongah2 $
+/* $Id: seqdbtax.cpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
/// Return the taxonomic identifier field (in host order)
- Int4 GetTaxId()const
+ TTaxId GetTaxId()const
{
- return SeqDB_GetStdOrd(& m_Taxid);
+ return TAX_ID_FROM(Int4, SeqDB_GetStdOrd(& m_Taxid));
}
/// Return the offset field (in host order)
}
-bool CSeqDBTaxInfo::GetTaxNames(Int4 tax_id,
+bool CSeqDBTaxInfo::GetTaxNames(TTaxId tax_id,
SSeqDBTaxInfo & info )
{
static CTaxDBFileInfo t;
const char * Data = t.GetDataPtr();
const CSeqDBTaxId* Index = t.GetIndexPtr();
- Int4 low_taxid = Index[low_index ].GetTaxId();
- Int4 high_taxid = Index[high_index].GetTaxId();
+ TTaxId low_taxid = Index[low_index ].GetTaxId();
+ TTaxId high_taxid = Index[high_index].GetTaxId();
if((tax_id < low_taxid) || (tax_id > high_taxid))
return false;
Int4 old_index = new_index;
while(1) {
- Int4 curr_taxid = Index[new_index].GetTaxId();
+ TTaxId curr_taxid = Index[new_index].GetTaxId();
if (tax_id < curr_taxid) {
high_index = new_index;
-/* $Id: seqdbvol.cpp 607218 2020-04-30 18:42:35Z ivanov $
+/* $Id: seqdbvol.cpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
//m_Atlas.Lock(locked);
for(TBDLLConstIter iter = dl.begin(); iter != dl.end(); iter ++) {
- int taxid = 0;
+ TTaxId taxid = ZERO_TAX_ID;
if ((*iter)->CanGetTaxid()) {
taxid = (*iter)->GetTaxid();
}
- if (taxid <= 0) {
+ if (taxid <= ZERO_TAX_ID) {
continue;
}
bool have_org_desc = false;
- if (use_taxinfo_cache && m_TaxCache.Lookup(taxid).NotEmpty()) {
+ if (use_taxinfo_cache && m_TaxCache.Lookup(TAX_ID_TO(int, taxid)).NotEmpty()) {
have_org_desc = true;
}
if (provide_new_taxonomy_info) {
if (have_org_desc) {
- taxonomy.push_back(m_TaxCache.Lookup(taxid));
+ taxonomy.push_back(m_TaxCache.Lookup(TAX_ID_TO(int, taxid)));
} else {
CRef<CDbtag> org_tag(new CDbtag);
org_tag->SetDb(TAX_ORGREF_DB_NAME);
- org_tag->SetTag().SetId(taxid);
+ org_tag->SetTag().SetId(TAX_ID_TO(int, taxid));
CRef<COrg_ref> org(new COrg_ref);
if (found_taxid_in_taxonomy_blastdb) {
taxonomy.push_back(desc);
if (use_taxinfo_cache) {
- m_TaxCache.Lookup(taxid) = desc;
+ m_TaxCache.Lookup(TAX_ID_TO(int, taxid)) = desc;
}
}
}
return x_GetFilteredHeader(oid, NULL);
}
-bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set<int> & user_tax_ids)
+bool s_IncludeDefline_Taxid(const CBlast_def_line & def, const set<TTaxId> & user_tax_ids)
{
CBlast_def_line::TTaxIds tax_ids;
if (def.IsSetTaxid()) {
}
if(def.IsSetLinks()) {
CBlast_def_line::TLinks leaf_ids = def.GetLinks();
- tax_ids.insert(leaf_ids.begin(), leaf_ids.end());
- }
+#ifdef NCBI_STRICT_TAX_ID
+ ITERATE(CBlast_def_line::TLinks, it, leaf_ids) tax_ids.insert(TAX_ID_FROM(int, *it));
+#else
+ tax_ids.insert(leaf_ids.begin(), leaf_ids.end());
+#endif
+ }
if(user_tax_ids.size() > tax_ids.size()) {
ITERATE(CBlast_def_line::TTaxIds, itr, tax_ids) {
}
else {
- ITERATE(set<int>, itr, user_tax_ids) {
+ ITERATE(set<TTaxId>, itr, user_tax_ids) {
if(tax_ids.find(*itr) != tax_ids.end()) {
return true;
}
return false;
}
-bool s_IncludeDefline_NegativeTaxid(const CBlast_def_line & def, const set<int> & user_tax_ids)
+bool s_IncludeDefline_NegativeTaxid(const CBlast_def_line & def, const set<TTaxId> & user_tax_ids)
{
CBlast_def_line::TTaxIds taxid_set = def.GetTaxIds();
if(taxid_set.size() > user_tax_ids.size()) {
# subdirectory
# Author: Kevin Bealer
# Original date: 10/21/2005
-# $URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.10.1/c++/src/objtools/blast/seqdb_writer/build-alias-index $
+# $URL: https://svn.ncbi.nlm.nih.gov/repos/toolkit/release/blast/2.11.0/c++/src/objtools/blast/seqdb_writer/build-alias-index $
INDEX_NAME=index.alx
OUTNAME=index.alx.new
-/* $Id: taxid_set.cpp 548810 2017-10-18 13:38:41Z ivanov $
+/* $Id: taxid_set.cpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
USING_SCOPE(objects);
#endif
+const TTaxId CTaxIdSet::kTaxIdNotSet = ZERO_TAX_ID;
+
void CTaxIdSet::SetMappingFromFile(CNcbiIstream & f)
{
while(f && (! f.eof())) {
}
if (gi_str.size() && tx_str.size()) {
- int taxid = NStr::StringToInt(tx_str, NStr::fAllowLeadingSpaces);
+ TTaxId taxid = NStr::StringToNumeric<TTaxId>(tx_str, NStr::fAllowLeadingSpaces);
string key = AccessionToKey(gi_str);
m_TaxIdMap[key] = taxid;
m_Matched = (m_GlobalTaxId != kTaxIdNotSet) || m_TaxIdMap.empty();
}
-int CTaxIdSet::x_SelectBestTaxid(const objects::CBlast_def_line & defline)
+TTaxId CTaxIdSet::x_SelectBestTaxid(const objects::CBlast_def_line & defline)
{
- int retval = m_GlobalTaxId;
+ TTaxId retval = m_GlobalTaxId;
if (retval != kTaxIdNotSet) {
return retval;
if (key->empty())
continue;
- map<string, int>::const_iterator item = m_TaxIdMap.find(*key);
+ map<string, TTaxId>::const_iterator item = m_TaxIdMap.find(*key);
if (item != m_TaxIdMap.end()) {
retval = item->second;
test_db.InsertVolumesInfo(vol_names, vol_num_oids);
CWriteDB_TaxID taxdb(tax_lmdb,100000);
- const int taxids[5] = { 9606, 562, 0, 2, 10239 };
+ const TTaxId taxids[5] = { TAX_ID_CONST(9606), TAX_ID_CONST(562), TAX_ID_CONST(0), TAX_ID_CONST(2), TAX_ID_CONST(10239) };
for (int i=0; i < source_db.GetNumOIDs(); i++) {
- set<int> t;
+ set<TTaxId> t;
for(int j=0; j < (i % 5 + 1); j++) {
t.insert(taxids[j]);
}
/* Test Tax Ids */
vector<blastdb::TOid> tax_oids;
- set<Int4> tax_ids;
- tax_ids.insert(10239);
- vector<Int4> rv_tax_ids;
+ set<TTaxId> tax_ids;
+ tax_ids.insert(TAX_ID_CONST(10239));
+ vector<TTaxId> rv_tax_ids;
test_db.GetOidsForTaxIds(tax_ids, tax_oids, rv_tax_ids);
for(unsigned int i=0; i < tax_ids.size(); i++) {
BOOST_REQUIRE_EQUAL(tax_oids[i] % 5, 4);
}
+BOOST_AUTO_TEST_CASE(TestLMDBMapSize)
+{
+ const string base_name = "tmp_lmdb";
+ DeleteLMDBFiles(true, base_name);
+ const string lmdb_name = BuildLMDBFileName(base_name, true);
+ const string tax_lmdb = GetFileNameFromExistingLMDBFile(lmdb_name, ELMDBFileType::eTaxId2Offsets);
+ const int kNumVols = 4;
+ CSeqDB source_db("data/writedb_prot",CSeqDB::eProtein);
+ vector<string> vol_names;
+ vector<blastdb::TOid> vol_num_oids;
+ for(unsigned int k=0; k < kNumVols; k++) {
+ vol_names.push_back("tmp_lmdb" + NStr::IntToString(k));
+ vol_num_oids.push_back(k*1234);
+ }
+
+ {
+ CWriteDB_LMDB test_db(lmdb_name, 10);
+ for (int i=0; i < source_db.GetNumOIDs(); i++) {
+ list< CRef<CSeq_id> > ids = source_db.GetSeqIDs(i);
+ test_db.InsertEntries(ids, i);
+ }
+ test_db.InsertVolumesInfo(vol_names, vol_num_oids);
+
+ CWriteDB_TaxID taxdb(tax_lmdb,10);
+ const TTaxId taxids[5] = { TAX_ID_CONST(9606), TAX_ID_CONST(562), TAX_ID_CONST(0), TAX_ID_CONST(2), TAX_ID_CONST(10239) };
+ for (int i=0; i < source_db.GetNumOIDs(); i++) {
+ set<TTaxId> t;
+ for(int j=0; j < (i % 5 + 1); j++) {
+ t.insert(taxids[j]);
+ }
+ taxdb.InsertEntries(t, i);
+ }
+ }
+
+ {
+ vector<string> test_neg_accs;
+ CSeqDBLMDB test_db(lmdb_name);
+
+ /* Test GetOids from Seq IDs */
+ for(int i=0; i < source_db.GetNumOIDs(); i++) {
+ vector<string> test_accs;
+ vector<blastdb::TOid> test_oids;
+ list< CRef<CSeq_id> > ids = source_db.GetSeqIDs(i);
+ CRef<CSeq_id> n_id = FindBestChoice(ids, CSeq_id::WorstRank);
+ test_neg_accs.push_back(n_id->GetSeqIdString(false));
+ ITERATE(list< CRef<CSeq_id> >, itr, ids) {
+ if((*itr)->IsGi()) {
+ continue;
+ }
+ test_accs.push_back((*itr)->GetSeqIdString(true));
+ test_accs.push_back((*itr)->GetSeqIdString(false));
+ }
+ test_db.GetOids(test_accs, test_oids);
+ for(unsigned int j=0; j < test_accs.size(); j++) {
+ BOOST_REQUIRE_EQUAL(test_oids[j], i);
+ }
+ }
+
+ /* Test Negative Seq IDs to OIDs */
+ vector<blastdb::TOid> neg_oids;
+ test_db.NegativeSeqIdsToOids(test_neg_accs, neg_oids);
+ BOOST_REQUIRE_EQUAL(neg_oids.size(), 65);
+
+ /* Test Vol Info */
+ vector<string> test_vol_names;
+ vector<blastdb::TOid> test_vol_num_oids;
+ test_db.GetVolumesInfo(test_vol_names, test_vol_num_oids);
+ for(unsigned int k=0; k < kNumVols; k++) {
+ BOOST_REQUIRE_EQUAL(test_vol_num_oids[k], vol_num_oids[k]);
+ BOOST_REQUIRE_EQUAL(test_vol_names[k], vol_names[k]);
+ }
+
+ /* Test Tax Ids */
+ vector<blastdb::TOid> tax_oids;
+ set<TTaxId> tax_ids;
+ tax_ids.insert(TAX_ID_CONST(10239));
+ vector<TTaxId> rv_tax_ids;
+ test_db.GetOidsForTaxIds(tax_ids, tax_oids, rv_tax_ids);
+ for(unsigned int i=0; i < tax_ids.size(); i++) {
+ BOOST_REQUIRE_EQUAL(tax_oids[i] % 5, 4);
+ }
+
+ test_db.NegativeTaxIdsToOids(tax_ids, tax_oids, rv_tax_ids);
+ BOOST_REQUIRE_EQUAL(tax_oids.size(), 0);
+
+ tax_ids.clear();
+ tax_ids.insert(9606);
+ tax_ids.insert(562);
+ test_db.NegativeTaxIdsToOids(tax_ids, tax_oids, rv_tax_ids);
+ for(unsigned int i=0; i < rv_tax_ids.size(); i++) {
+ BOOST_REQUIRE((tax_oids[i] % 5 < 2));
+ }
+
+ }
+ DeleteLMDBFiles(true, base_name);
+}
+
BOOST_AUTO_TEST_SUITE_END()
-/* $Id: writedb_unit_test.cpp 588813 2019-07-01 12:29:54Z fongah2 $
+/* $Id: writedb_unit_test.cpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomy)
{
- const int kTaxId(9986);
+ const TTaxId kTaxId = TAX_ID_CONST(9986);
CTaxIdSet tis(kTaxId);
const string kDbName("foo");
CWriteDB blastdb(kDbName, CWriteDB::eNucleotide, kDbName);
int total=db.GetNumSeqs();
for (int oid=0; oid<total; oid++)
{
- vector<int> taxids;
+ vector<TTaxId> taxids;
db.GetTaxIDs(oid, taxids);
BOOST_REQUIRE(taxids.size() == 1);
BOOST_REQUIRE_EQUAL(kTaxId, taxids.front());
BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMap)
{
- const int kTaxId(9986);
+ const TTaxId kTaxId = TAX_ID_CONST(9986);
CRef<CTaxIdSet> tis(new CTaxIdSet());
const string kDbName("foo");
CWriteDB blastdb(kDbName, CWriteDB::eNucleotide, kDbName);
int total=db.GetNumSeqs();
for (int oid=0; oid<total; oid++)
{
- vector<int> taxids;
+ vector<TTaxId> taxids;
db.GetTaxIDs(oid, taxids);
BOOST_REQUIRE(taxids.size() == 1);
BOOST_REQUIRE_EQUAL(kTaxId, taxids.front());
BOOST_AUTO_TEST_CASE(CWriteDB_SetTaxonomyFromMapLclIds)
{
- const int kTaxId(382);
+ const TTaxId kTaxId = TAX_ID_CONST(382);
CRef<CTaxIdSet> tis(new CTaxIdSet());
const string kDbName("foo");
CWriteDB blastdb(kDbName, CWriteDB::eProtein, kDbName);
int total=db.GetNumSeqs();
for (int oid=0; oid<total; oid++)
{
- vector<int> taxids;
+ vector<TTaxId> taxids;
db.GetTaxIDs(oid, taxids);
BOOST_REQUIRE(taxids.size() == 1);
BOOST_REQUIRE_EQUAL(kTaxId, taxids.front());
CSeqDB readdb(dbname, CSeqDB::eProtein);
for(unsigned int i=0; i < kNumOfDeflines; i++){
CRef<CBlast_def_line_set> new_set = readdb.GetHdr(i);
- set<int> t;
+ set<TTaxId> t;
readdb.GetAllTaxIDs(i, t);
BOOST_REQUIRE_EQUAL(num_taxids[i], t.size());
BOOST_REQUIRE_EQUAL(num_deflines[i], new_set->Set().size());
-/* $Id: writedb_impl.cpp 588812 2019-07-01 12:29:10Z fongah2 $
+/* $Id: writedb_impl.cpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
return;
}
- vector<int> taxids;
+ vector<TTaxId> taxids;
string titles;
// Scan the CBioseq for taxids and the title string.
if (oi.IsId()) {
//defline->SetTaxid(oi.GetId());
- taxids.push_back(oi.GetId());
+ taxids.push_back(TAX_ID_FROM(CObject_id::TId, oi.GetId()));
}
}
}
const vector< vector<int> > & membbits,
const vector< vector<int> > & linkouts,
int pig,
- set<Int4> & tax_ids,
+ set<TTaxId> & tax_ids,
int OID,
bool parse_ids,
bool long_ids,
CConstRef<CBlast_def_line_set> deflines;
string binary_header;
vector< vector<int> > v1, v2;
- set<Int4> t;
+ set<TTaxId> t;
CConstRef<CBioseq> bsref(& bs);
x_ExtractDeflines(bsref, deflines, binary_header, v2, v2, 0, t, -1, parse_ids,
#ifndef OBJTOOLS_WRITERS_WRITEDB__WRITEDB_IMPL_HPP
#define OBJTOOLS_WRITERS_WRITEDB__WRITEDB_IMPL_HPP
-/* $Id: writedb_impl.hpp 588812 2019-07-01 12:29:10Z fongah2 $
+/* $Id: writedb_impl.hpp 616350 2020-09-15 12:19:05Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
const vector< vector<int> > & membbits,
const vector< vector<int> > & linkouts,
int pig,
- set<Int4> & tax_ids,
+ set<TTaxId> & tax_ids,
int OID=-1,
bool parse_ids=true,
bool long_seqid=false,
/// Binary header in format that will be written to disk.
string m_BinHdr;
- set<Int4> m_TaxIds;
+ set<TTaxId> m_TaxIds;
// Volumes
CWriteDB_LMDB::CWriteDB_LMDB(const string& dbname, Uint8 map_size, Uint8 capacity): m_Db(dbname),
m_Env(CBlastLMDBManager::GetInstance().GetWriteEnv(dbname, map_size)),
m_ListCapacity(capacity),
- m_MaxEntryPerTxn(DEFAULT_MAX_ENTRY_PER_TXN)
+ m_MaxEntryPerTxn(DEFAULT_MAX_ENTRY_PER_TXN),
+ m_TotalIdsLength(0)
{
m_list.reserve(m_ListCapacity);
char* max_entry_str = getenv("MAX_LMDB_TXN_ENTRY");
void CWriteDB_LMDB::InsertVolumesInfo(const vector<string> & vol_names, const vector<blastdb::TOid> & vol_num_oids)
{
+ x_IncreaseEnvMapSize(vol_names, vol_num_oids);
+
lmdb::txn txn = lmdb::txn::begin(m_Env);
lmdb::dbi volinfo = lmdb::dbi::open(txn, blastdb::volinfo_str.c_str(), MDB_CREATE | MDB_INTEGERKEY);
lmdb::dbi volname = lmdb::dbi::open(txn, blastdb::volname_str.c_str(), MDB_CREATE | MDB_INTEGERKEY);
txn.commit();
}
-
int CWriteDB_LMDB::InsertEntries(const list<CRef<CSeq_id>> & seqids, const blastdb::TOid oid)
{
int count = 0;
return;
}
+void CWriteDB_LMDB::x_IncreaseEnvMapSize(const vector<string> & vol_names, const vector<blastdb::TOid> & vol_num_oids)
+{
+ // 2 meta pages
+ const size_t MIN_PAGES = 3;
+ const size_t BRANCH_PAGES = 2;
+ // Each entry has 8 byte overhead + size of (key + entry)
+ size_t vol_name_size = (vol_names.front().size() + 24)* vol_names.size();
+ size_t vol_info_size = 24* vol_names.size();
+
+ MDB_env *env = m_Env.handle();
+ MDB_stat stat;
+ MDB_envinfo info;
+ lmdb::env_stat(env, &stat);
+ lmdb::env_info(env, &info);
+ size_t page_size = stat.ms_psize;
+ // For each page 16 byte header
+ size_t page_max_size = page_size -16;
+ size_t last_page_num = info.me_last_pgno;
+ size_t max_num_pages = info.me_mapsize/page_size;
+ size_t leaf_pages_needed = vol_name_size/page_max_size + vol_info_size/page_max_size + 2;
+ size_t total_pages_needed = MIN_PAGES + BRANCH_PAGES + leaf_pages_needed;
+ if( (total_pages_needed + last_page_num) > max_num_pages ) {
+ size_t newMapSize = (total_pages_needed + last_page_num) * page_size;
+ m_Env.set_mapsize(newMapSize);
+ LOG_POST(Info << "Increased lmdb mapsize to " << newMapSize);
+ }
+
+}
+
+void CWriteDB_LMDB::x_IncreaseEnvMapSize()
+{
+ size_t size = m_TotalIdsLength + m_list.size() * 16;
+ size_t avg_id_length = m_TotalIdsLength/m_list.size();
+ MDB_env *env = m_Env.handle();
+ MDB_stat stat;
+ MDB_envinfo info;
+ lmdb::env_stat(env, &stat);
+ lmdb::env_info(env, &info);
+ size_t page_size = stat.ms_psize;
+ // 16 byte header for each page
+ size_t page_max_size = page_size -16;
+ size_t last_page_num = info.me_last_pgno;
+ size_t max_num_pages = info.me_mapsize/page_size;
+ size_t leaf_pages_needed = size/page_max_size + 1;
+ size_t dup_pages = (leaf_pages_needed > 200) ? 14: 7;
+ size_t branch_pages_needed = (avg_id_length + 16)* leaf_pages_needed/page_max_size + 1;
+ size_t total_pages_needed = leaf_pages_needed + branch_pages_needed + dup_pages;
+ if( (total_pages_needed + last_page_num) > max_num_pages) {
+ size_t newMapSize = (total_pages_needed + last_page_num) * page_size;
+ m_Env.set_mapsize(newMapSize);
+ LOG_POST(Info << "Increased lmdb mapsize to " << newMapSize);
+ }
+}
+
void CWriteDB_LMDB::x_Split(vector<SKeyValuePair>::iterator b, vector<SKeyValuePair>::iterator e, const unsigned int min_chunk_size)
{
#ifdef _OPENMP
#else
std::sort (m_list.begin(), m_list.end(), SKeyValuePair::cmp_key);
#endif
+
+ x_IncreaseEnvMapSize();
+
unsigned int j=0;
while (j < m_list.size()){
lmdb::txn txn = lmdb::txn::begin(m_Env);
count++;
tmp_ids.clear();
}
+ m_TotalIdsLength +=m_list[i].id.size();
if(!m_list[i].saveToOidList) {
continue;
}
CFile(m_Db+"-lock").Remove();
}
-int CWriteDB_TaxID::InsertEntries(const set<Int4> & tax_ids, const blastdb::TOid oid)
+int CWriteDB_TaxID::InsertEntries(const set<TTaxId> & tax_ids, const blastdb::TOid oid)
{
int count = 0;
if(tax_ids.size() == 0) {
x_Resize();
- SKeyValuePair<blastdb::TOid> kv(0, oid);
+ SKeyValuePair<blastdb::TOid> kv(ZERO_TAX_ID, oid);
m_TaxId2OidList.push_back(kv);
return 1;
}
- ITERATE(set<Int4>, itr, tax_ids) {
+ ITERATE(set<TTaxId>, itr, tax_ids) {
x_Resize();
SKeyValuePair<blastdb::TOid> kv(*itr, oid);
m_TaxId2OidList.push_back(kv);
return count;
}
+void CWriteDB_TaxID::x_IncreaseEnvMapSize()
+{
+ const size_t MIN_PAGES = 4;
+ MDB_env *env = m_Env.handle();
+ MDB_stat stat;
+ MDB_envinfo info;
+ lmdb::env_stat(env, &stat);
+ lmdb::env_info(env, &info);
+ size_t size = m_TaxId2OffsetsList.size()*32;
+ size_t page_size = stat.ms_psize;
+ size_t page_max_size = stat.ms_psize - 16;
+ size_t last_page_num = info.me_last_pgno;
+ size_t max_num_pages = info.me_mapsize/page_size;
+ size_t leaf_pages_needed = size/page_max_size + 1;
+ size_t branch_pages_needed = 24 * leaf_pages_needed/page_max_size + 1;
+ size_t total_pages_needed = leaf_pages_needed + branch_pages_needed + MIN_PAGES;
+ if( (total_pages_needed + last_page_num) > max_num_pages) {
+ size_t newMapSize = (total_pages_needed + last_page_num) * page_size;
+ m_Env.set_mapsize(newMapSize);
+ LOG_POST(Info << "Increased lmdb mapsize to " << newMapSize);
+ }
+}
+
+
void CWriteDB_TaxID::x_CommitTransaction()
{
_ASSERT(m_TaxId2OffsetsList.size());
sort (m_TaxId2OffsetsList.begin(), m_TaxId2OffsetsList.end(), SKeyValuePair<Uint8>::cmp_key);
+ x_IncreaseEnvMapSize();
+
unsigned int j=0;
while (j < m_TaxId2OffsetsList.size()){
lmdb::txn txn = lmdb::txn::begin(m_Env);
}
for(; i < j; i++){
Uint8 & offset = m_TaxId2OffsetsList[i].value;
- Int4 & tax_id = m_TaxId2OffsetsList[i].tax_id;
+ TTaxId & tax_id = m_TaxId2OffsetsList[i].tax_id;
//cerr << m_list[i].id << endl;
lmdb::val value{&offset, sizeof(offset)};
lmdb::val key{&tax_id, sizeof(tax_id)};
bool rc = lmdb::dbi_put(txn, dbi.handle(), key, value, MDB_APPENDDUP);
if (!rc) {
- NCBI_THROW( CSeqDBException, eArgErr, "taxid2offset error for tax id " + tax_id);
+ NCBI_THROW( CSeqDBException, eArgErr, "taxid2offset error for tax id " + NStr::NumericToString(tax_id));
}
}
txn.commit();
}
return;
-
}
-Uint4 s_WirteTaxIds(CNcbiOfstream & os, vector<Int4> & tax_ids)
+Uint4 s_WirteTaxIds(CNcbiOfstream & os, vector<TTaxId> & tax_ids)
{
for(unsigned int j =0; j < tax_ids.size(); j++) {
- os.write((char *)&tax_ids[j], 4);
+ Int4 tid = TAX_ID_TO(Int4, tax_ids[j]);
+ os.write((char *)&tid, 4);
}
return tax_ids.size();
}
os.flush();
blastdb::TOid count = 0;
- vector<Int4> tmp_tax_ids;
+ vector<TTaxId> tmp_tax_ids;
for(unsigned int i = 0; i < m_TaxId2OidList.size(); i++) {
if(i > 0 && m_TaxId2OidList[i].value != m_TaxId2OidList[i-1].value ) {
if((m_TaxId2OidList[i].value - m_TaxId2OidList[i-1].value) != 1) {
#############################################################################
-# $Id: CMakeLists.cleanup.lib.txt 594157 2019-09-30 18:28:48Z gouriano $
+# $Id: CMakeLists.cleanup.lib.txt 608332 2020-05-14 16:04:14Z ivanov $
#############################################################################
NCBI_begin_lib(xcleanup)
autogenerated_cleanup autogenerated_extended_cleanup cleanup
cleanup_utils gene_qual_normalization cleanup_user_object cleanup_author
cleanup_pub newcleanupp capitalization_string fix_feature_id
+ cleanup_message
)
NCBI_uses_toolkit_libraries(xobjedit)
NCBI_project_watchers(bollin kans)
-# $Id: Makefile.cleanup.lib 581537 2019-03-01 21:27:51Z ucko $
+# $Id: Makefile.cleanup.lib 608332 2020-05-14 16:04:14Z ivanov $
# Build library "xcleanup"
###############################
ASN_DEP = submit taxon3 valid
SRC = autogenerated_cleanup autogenerated_extended_cleanup cleanup \
cleanup_utils gene_qual_normalization cleanup_user_object cleanup_author \
- cleanup_pub newcleanupp capitalization_string fix_feature_id
+ cleanup_pub newcleanupp capitalization_string fix_feature_id \
+ cleanup_message
DLL_LIB = $(OBJEDIT_LIBS) xregexp $(PCRE_LIB)
LIB = xcleanup
-/* $Id: cleanup.cpp 608035 2020-05-11 13:51:46Z ivanov $
+/* $Id: cleanup.cpp 614966 2020-08-25 16:46:33Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <objtools/edit/cds_fix.hpp>
#include <objtools/cleanup/cleanup.hpp>
#include "cleanup_utils.hpp"
+#include <objtools/cleanup/cleanup_message.hpp>
#include <util/strsearch.hpp>
#include "newcleanupp.hpp"
+#include <objtools/logging/listener.hpp>
+
BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)
}
bool is_bad = false;
size_t len = sequence::GetLength(f.GetLocation(), scope);
- string rrna_name = f.GetData().GetRna().GetRnaProductName();
+ const CRNA_ref& rrna = f.GetData().GetRna();
+ string rrna_name = rrna.GetRnaProductName();
+ if (rrna_name.empty()) {
+ // RNA name may still be in product GBQual
+ if (f.IsSetQual()) {
+ for (auto qit : f.GetQual()) {
+ const CGb_qual& gbq = *qit;
+ if ( gbq.IsSetQual() && gbq.GetQual() == "product" ) {
+ rrna_name = gbq.GetVal();
+ break;
+ }
+ }
+ }
+ }
ITERATE (TRNALengthMap, it, kTrnaLengthMap) {
SIZE_TYPE pos = NStr::FindNoCase(rrna_name, it->first);
if (pos != string::npos && len < it->second.first && !(it->second.second && f.IsSetPartial() && f.GetPartial()) ) {
CTSE_Handle tse = entry.GetTSE_Handle();
- for (CFeat_CI gene_it(entry, SAnnotSelector(CSeqFeatData::e_Gene)); gene_it; ++gene_it) {
- bool change_this_gene;
- CRef<CSeq_feat> new_gene(new CSeq_feat());
- new_gene->Assign(*(gene_it->GetSeq_feat()));
-
- change_this_gene = ExpandGeneToIncludeChildren(*new_gene, tse);
-
- change_this_gene |= SetGenePartialByLongestContainedFeature(*new_gene, entry.GetScope());
-
- if (change_this_gene) {
- CSeq_feat_EditHandle gene_h(*gene_it);
- gene_h.Replace(*new_gene);
- any_changes = true;
- }
- }
-
for (CFeat_CI rna_it(entry, SAnnotSelector(CSeqFeatData::e_Rna)); rna_it; ++rna_it) {
const CSeq_feat& rna_feat = *(rna_it->GetSeq_feat());
- if (rna_feat.IsSetData() && rna_feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_rRNA && !rna_feat.IsSetPartial() && s_CleanupIsShortrRNA(rna_feat, &(entry.GetScope()))) {
+ if (rna_feat.IsSetData() &&
+ rna_feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_rRNA &&
+ s_CleanupIsShortrRNA(rna_feat, &(entry.GetScope()))) {
bool change_this_rrna = false;
CRef<CSeq_feat> new_rrna(new CSeq_feat());
}
}
+ for (CFeat_CI gene_it(entry, SAnnotSelector(CSeqFeatData::e_Gene)); gene_it; ++gene_it) {
+ bool change_this_gene;
+ CRef<CSeq_feat> new_gene(new CSeq_feat());
+ new_gene->Assign(*(gene_it->GetSeq_feat()));
+
+ change_this_gene = ExpandGeneToIncludeChildren(*new_gene, tse);
+
+ change_this_gene |= SetGenePartialByLongestContainedFeature(*new_gene, entry.GetScope());
+
+ if (change_this_gene) {
+ CSeq_feat_EditHandle gene_h(*gene_it);
+ gene_h.Replace(*new_gene);
+ any_changes = true;
+ }
+ }
+
NormalizeDescriptorOrder(entry);
for (CBioseq_CI bi(entry, CSeq_inst::eMol_na); bi; ++bi) {
void CCleanup::GetPubdescLabels
(const CPubdesc& pd,
-vector<int>& pmids, vector<int>& muids, vector<int>& serials,
+vector<TEntrezId>& pmids, vector<TEntrezId>& muids, vector<int>& serials,
vector<string>& published_labels,
vector<string>& unpublished_labels)
{
// first get descriptor pubs
CSeqdesc_CI di(bsh, CSeqdesc::e_Pub);
while (di) {
- vector<int> pmids;
- vector<int> muids;
+ vector<TEntrezId> pmids;
+ vector<TEntrezId> muids;
vector<int> serials;
vector<string> published_labels;
vector<string> unpublished_labels;
// now get pub features
CFeat_CI fi(bsh, SAnnotSelector(CSeqFeatData::e_Pub));
while (fi) {
- vector<int> pmids;
- vector<int> muids;
+ vector<TEntrezId> pmids;
+ vector<TEntrezId> muids;
vector<int> serials;
vector<string> published_labels;
vector<string> unpublished_labels;
}
-bool CCleanup::ParseCodeBreak(const CSeq_feat& feat, CCdregion& cds, const string& str, CScope& scope)
+bool CCleanup::ParseCodeBreak(const CSeq_feat& feat,
+ CCdregion& cds,
+ const CTempString& str,
+ CScope& scope,
+ IObjtoolsListener* pMessageListener)
{
if (str.empty() || !feat.IsSetLocation()) {
return false;
}
loc_pos = NStr::Find(str, "(pos:");
+
+ using TSubcode = CCleanupMessage::ESubcode;
+ auto postMessage =
+ [pMessageListener](string msg, TSubcode subcode) {
+ pMessageListener->PutMessage(
+ CCleanupMessage(msg, eDiag_Error, CCleanupMessage::ECode::eCodeBreak, subcode));
+ };
+
if (loc_pos == string::npos) {
+ if (pMessageListener) {
+ string msg = "Unable to identify code-break location in '" + str + "'";
+ postMessage(msg, TSubcode::eParseError);
+ }
return false;
}
loc_pos += 5;
break_loc = ReadLocFromText(pos, feat_loc_seq_id, &scope);
if (break_loc == NULL) {
+ if (pMessageListener) {
+ string msg = "Unable to extract code-break location from '" + str + "'";
+ postMessage(msg, TSubcode::eParseError);
+ }
return false;
- } else if (break_loc->IsInt() && sequence::GetLength(*break_loc, &scope) > 3) {
+ }
+
+ if (break_loc->IsInt() && sequence::GetLength(*break_loc, &scope) > 3) {
+ if (pMessageListener) {
+ string msg = "code-break location exceeds 3 bases";
+ postMessage(msg, TSubcode::eBadLocation);
+ }
return false;
- } else if ((break_loc->IsInt() || break_loc->IsPnt()) &&
+ }
+ if ((break_loc->IsInt() || break_loc->IsPnt()) &&
sequence::Compare(*break_loc, feat.GetLocation(), &scope, sequence::fCompareOverlapping) != sequence::eContained) {
+ if (pMessageListener) {
+ string msg = "code-break location lies outside of coding region";
+ postMessage(msg, TSubcode::eBadLocation);
+ }
return false;
}
--- /dev/null
+/* $Id: cleanup_message.cpp 608332 2020-05-14 16:04:14Z ivanov $
+ * ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ * Author: Justin Foley
+ *
+ * File Description:
+ * .......
+ *
+ */
+#include <ncbi_pch.hpp>
+#include <objtools/cleanup/cleanup_message.hpp>
+
+BEGIN_NCBI_SCOPE
+BEGIN_SCOPE(objects)
+
+CCleanupMessage::CCleanupMessage(string text, EDiagSev sev, ECode code, ESubcode subcode)
+ : CObjtoolsMessage(text, sev), m_Code(code), m_Subcode(subcode) {}
+
+CCleanupMessage* CCleanupMessage::Clone(void) const
+{
+ return new CCleanupMessage(GetText(), GetSeverity(), m_Code, m_Subcode);
+}
+
+END_SCOPE(objects)
+END_NCBI_SCOPE
+
-/* $Id: cleanup_pub.cpp 591351 2019-08-14 14:26:28Z bollin $
+/* $Id: cleanup_pub.cpp 614966 2020-08-25 16:46:33Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
// we keep the last of these because we might transfer one
// to the other as necessary to fill in gaps.
- int last_pmid = 0;
- int last_article_pubmed_id = 0; // the last from a journal
+ TEntrezId last_pmid = ZERO_ENTREZ_ID;
+ TEntrezId last_article_pubmed_id = ZERO_ENTREZ_ID; // the last from a journal
CRef<CCit_art> last_article;
auto& pe_set = m_Equiv.Set();
}
// Now, we might have to transfer data to fill in missing information
- if (last_pmid == 0 && last_article_pubmed_id > 0) {
+ if (last_pmid == ZERO_ENTREZ_ID && last_article_pubmed_id > ZERO_ENTREZ_ID) {
CRef<CPub> new_pub(new CPub);
new_pub->SetPmid().Set(last_article_pubmed_id);
m_Equiv.Set().insert(m_Equiv.Set().begin(), new_pub);
change = true;
}
- else if (last_pmid > 0 && last_article_pubmed_id == 0 && last_article) {
+ else if (last_pmid > ZERO_ENTREZ_ID && last_article_pubmed_id == ZERO_ENTREZ_ID && last_article) {
CRef<CArticleId> new_article_id(new CArticleId);
new_article_id->SetPubmed().Set(last_pmid);
last_article->SetIds().Set().push_back(new_article_id);
{
return (!m_Gen.IsSetCit()) &&
!m_Gen.IsSetAuthors() &&
- (!m_Gen.IsSetMuid() || m_Gen.GetMuid() <= 0) &&
+ (!m_Gen.IsSetMuid() || m_Gen.GetMuid() <= ZERO_ENTREZ_ID) &&
!m_Gen.IsSetJournal() &&
(!m_Gen.IsSetVolume() || m_Gen.GetVolume().empty()) &&
(!m_Gen.IsSetIssue() || m_Gen.GetIssue().empty()) &&
!m_Gen.IsSetDate() &&
(!m_Gen.IsSetSerial_number() || m_Gen.GetSerial_number() <= 0) &&
(!m_Gen.IsSetTitle() || m_Gen.GetTitle().empty()) &&
- (!m_Gen.IsSetPmid() || m_Gen.GetPmid() <= 0);
+ (!m_Gen.IsSetPmid() || m_Gen.GetPmid().Get() <= ZERO_ENTREZ_ID);
}
#ifndef OBJECTS_GENERAL___CLEANUP_UTILS__HPP
#define OBJECTS_GENERAL___CLEANUP_UTILS__HPP
-/* $Id: cleanup_utils.hpp 581496 2019-03-01 16:42:04Z bollin $
+/* $Id: cleanup_utils.hpp 613129 2020-08-03 12:12:19Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CSeq_loc> ReadLocFromText(const string& text, const CSeq_id *id, CScope *scope);
// for finding the correct amino acid letter given an abbreviation
-char ValidAminoAcid (const string& abbrev);
+char NCBI_CLEANUP_EXPORT ValidAminoAcid (const string& abbrev);
// for sorting and uniquing dbtags
bool s_DbtagCompare (const CRef<CDbtag>& dbt1, const CRef<CDbtag>& dbt2);
{
if (NStr::Find (except_text, "ribosome slippage") == NPOS &&
+ NStr::Find (except_text, "ribosome-slippage") == NPOS &&
+ NStr::Find (except_text, "ribosome_slippage") == NPOS &&
+ NStr::Find (except_text, "ribosomal-slippage") == NPOS &&
+ NStr::Find (except_text, "ribosomal_slippage") == NPOS &&
NStr::Find (except_text, "trans splicing") == NPOS &&
NStr::Find (except_text, "trans_splicing") == NPOS &&
NStr::Find (except_text, "alternate processing") == NPOS &&
ChangeMade (CCleanupChange::eTrimSpaces);
}
if (! text.empty()) {
- if (text == "ribosome slippage") {
+ if (text == "ribosome slippage" || text == "ribosome-slippage" || text == "ribosome_slippage" ||
+ text == "ribosomal-slippage" || text == "ribosomal_slippage") {
text = "ribosomal slippage";
ChangeMade (CCleanupChange::eChangeException);
} else if (text == "trans splicing" || text == "trans_splicing") {
if( ! m_MuidPubContainer.empty() ) {
NON_CONST_ITERATE( TMuidPubContainer, pub_iter, m_MuidPubContainer ) {
CPub &pub = **pub_iter;
- const int muid = pub.GetMuid();
+ const TEntrezId muid = pub.GetMuid();
// attempt to find that muid in the muid-to-pmid mapping created earlier
- TMuidToPmidMap::const_iterator map_iter = m_MuidToPmidMap.find(muid);
+ TMuidToPmidMap::const_iterator map_iter = m_MuidToPmidMap.find(ENTREZ_ID_TO(int, muid));
if( map_iter != m_MuidToPmidMap.end() ) {
- const int pmid = map_iter->second;
+ const TEntrezId pmid = ENTREZ_ID_FROM(int, map_iter->second);
pub.SetPmid().Set(pmid);
ChangeMade(CCleanupChange::eChangePublication);
}
}
void CNewCleanup_imp::x_NotePubdescOrAnnotPubs_RecursionHelper(
- const CPub_equiv &pub_equiv, int &muid, int &pmid )
+ const CPub_equiv &pub_equiv, int &muid, int &pmid )
{
FOR_EACH_PUB_ON_PUBEQUIV(pub_iter, pub_equiv) {
const CPub &pub = **pub_iter;
switch( pub.Which() ) {
case NCBI_PUB(Muid):
- muid = pub.GetMuid();
+ muid = ENTREZ_ID_TO(int, pub.GetMuid());
break;
case NCBI_PUB(Pmid):
- pmid = pub.GetPmid().Get();
+ pmid = ENTREZ_ID_TO(int, pub.GetPmid().Get());
break;
case NCBI_PUB(Gen):
{
{
if (pub.IsGen() && IsMinimal(pub.GetGen())) {
return true;
- } else if (pub.IsMuid() && pub.GetMuid() == 0) {
+ } else if (pub.IsMuid() && pub.GetMuid() == ZERO_ENTREZ_ID) {
return true;
- } else if (pub.IsPmid() && pub.GetPmid() == 0) {
+ } else if (pub.IsPmid() && pub.GetPmid() == ZERO_ENTREZ_ID) {
return true;
} else if (pub.IsPat_id() && x_IsPubContentBad(pub.GetPat_id())) {
return true;
-/* $Id: bdbloader.cpp 500404 2016-05-04 14:59:01Z camacho $
+/* $Id: bdbloader.cpp 612733 2020-07-27 11:38:27Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
m_DBName (param.m_DbName),
m_DBType (param.m_DbType),
m_BlastDb (0),
+ m_Ids (1000),
m_UseFixedSizeSlices (param.m_UseFixedSizeSlices)
{
if (param.m_BlastDbHandle.NotEmpty()) {
-/* $Id: reader.cpp 578792 2019-01-25 16:39:00Z vasilche $
+/* $Id: reader.cpp 610682 2020-06-22 17:47:10Z ivanov $
* ===========================================================================
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
}
+#define DEFAULT_PREOPEN false
#define DEFAULT_RETRY_COUNT 5
#define DEFAULT_WAIT_TIME_ERRORS 2
#define DEFAULT_WAIT_TIME 1
CReader::CReader(void)
: m_Dispatcher(0),
m_MaxConnections(0),
- m_PreopenConnection(true),
+ m_PreopenConnection(DEFAULT_PREOPEN),
m_NextNewConnection(0),
m_NumFreeConnections(0, 1000),
m_MaximumRetryCount(3),
conf.GetBool(driver_name,
NCBI_GBLOADER_READER_PARAM_PREOPEN,
CConfig::eErr_NoThrow,
- true);
+ DEFAULT_PREOPEN);
SetPreopenConnection(open_initial_connection);
m_WaitTimeErrors =
conf.GetInt(driver_name,
#############################################################################
-# $Id: CMakeLists.edit.lib.txt 594157 2019-09-30 18:28:48Z gouriano $
+# $Id: CMakeLists.edit.lib.txt 615131 2020-08-27 17:51:01Z fukanchi $
#############################################################################
NCBI_begin_lib(xobjedit)
external_annots feature_propagate text_object_description
seq_edit
)
- NCBI_uses_toolkit_libraries(mlacli taxon3 valid xobjread xobjutil)
+ NCBI_uses_toolkit_libraries(mlacli taxon3 valid xobjread xobjutil xlogging)
NCBI_project_watchers(bollin gotvyans foleyjp)
NCBI_end_lib()
-/* $Id: feattable_edit.cpp 594944 2019-10-11 12:07:51Z ludwigf $
+/* $Id: feattable_edit.cpp 612522 2020-07-23 11:23:26Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <objects/seqfeat/Feat_id.hpp>
#include <objects/seqfeat/Gb_qual.hpp>
#include <objects/seqfeat/SeqFeatXref.hpp>
+#include <objects/seqfeat/Genetic_code_table.hpp>
+#include <objects/seqfeat/Trna_ext.hpp>
#include <objmgr/object_manager.hpp>
#include <objmgr/scope.hpp>
}
}
+// ----------------------------------------------------------------------------
+void CFeatTableEdit::ProcessCodonRecognized()
+// ----------------------------------------------------------------------------
+{
+ static map<char, list<char>> sIUPACmap {
+ {'A', list<char>({'A'})},
+ {'G', list<char>({'G'})},
+ {'C', list<char>({'C'})},
+ {'T', list<char>({'T'})},
+ {'U', list<char>({'U'})},
+ {'M', list<char>({'A', 'C'})},
+ {'R', list<char>({'A', 'G'})},
+ {'W', list<char>({'A', 'T'})},
+ {'S', list<char>({'C', 'G'})},
+ {'Y', list<char>({'C', 'T'})},
+ {'K', list<char>({'G', 'T'})},
+ {'V', list<char>({'A', 'C', 'G'})},
+ {'H', list<char>({'A', 'C', 'T'})},
+ {'D', list<char>({'A', 'G', 'T'})},
+ {'B', list<char>({'C', 'G', 'T'})},
+ {'N', list<char>({'A', 'C', 'G', 'T'})}
+ };
+ SAnnotSelector sel;
+ sel.IncludeFeatSubtype(CSeqFeatData::eSubtype_tRNA);
+ CFeat_CI it(mHandle, sel);
+ for (; it; ++it) {
+ CMappedFeat mf = *it;
+ auto codonRecognized = mf.GetNamedQual("codon_recognized");
+ if (codonRecognized.empty()) {
+ continue;
+ }
+ if (codonRecognized.size() != 3) {
+ xPutErrorBadCodonRecognized(codonRecognized);
+ return;
+ }
+ NStr::ToUpper(codonRecognized);
+
+ const CSeq_feat& origFeat = mf.GetOriginalFeature();
+
+ CRef<CSeq_feat> pEditedFeat(new CSeq_feat);
+ pEditedFeat->Assign(origFeat);
+ CRNA_ref::C_Ext::TTRNA & extTrna = pEditedFeat->SetData().SetRna().SetExt().SetTRNA();
+
+ set<int> codons;
+ try {
+ for (char char1 : sIUPACmap.at(codonRecognized[0])) {
+ for (char char2 : sIUPACmap.at(codonRecognized[1])) {
+ for (char char3 : sIUPACmap.at(codonRecognized[2])) {
+ const auto codonIndex = CGen_code_table::CodonToIndex(char1, char2, char3);
+ codons.insert(codonIndex);
+ }
+ }
+ }
+ }
+ catch(CException&) {
+ xPutErrorBadCodonRecognized(codonRecognized);
+ return;
+ }
+ if (!codons.empty()) {
+ for (const auto codonIndex : codons) {
+ extTrna.SetCodon().push_back(codonIndex);
+ }
+ CSeq_feat_EditHandle feh(mpScope->GetObjectHandle(origFeat));
+ feh.Replace(*pEditedFeat);
+ feh.RemoveQualifier("codon_recognized");
+ }
+ }
+}
// ---------------------------------------------------------------------------
void CFeatTableEdit::GenerateProteinAndTranscriptIds()
xPutError(message);
}
+// ----------------------------------------------------------------------------
+void
+CFeatTableEdit::xPutErrorBadCodonRecognized(
+ const string codonRecognized)
+// ----------------------------------------------------------------------------
+{
+ if (!mpMessageListener) {
+ return;
+ }
+ string message = "tRNA with bad codon recognized attribute \"" +
+ codonRecognized + "\".";
+ xPutError(message);
+}
+
// ----------------------------------------------------------------------------
void
CFeatTableEdit::xPutErrorMissingProteinId(
void
CFeatTableEdit::xPutErrorDifferingProteinIds(
const CMappedFeat& mrna)
- // ----------------------------------------------------------------------------
+// ----------------------------------------------------------------------------
{
if (!mpMessageListener) {
return;
void
CFeatTableEdit::xPutErrorDifferingTranscriptIds(
const CMappedFeat& mrna)
- // ----------------------------------------------------------------------------
+// ----------------------------------------------------------------------------
{
if (!mpMessageListener) {
return;
-/* $Id: loc_edit.cpp 601240 2020-02-04 16:06:49Z ludwigf $
+/* $Id: loc_edit.cpp 609624 2020-06-04 15:45:32Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CRef<CSeq_feat> new_cds(new CSeq_feat());
new_cds->Assign(*(f->GetOriginalSeq_feat()));
- if (AdjustFeatureEnd5(*new_cds, related_features, bsh.GetScope()) ||
- AdjustFeatureEnd3(*new_cds, related_features, bsh.GetScope())) {
+ const bool adjusted_5prime = AdjustFeatureEnd5(*new_cds, related_features, bsh.GetScope());
+ const bool adjusted_3prime = AdjustFeatureEnd3(*new_cds, related_features, bsh.GetScope());
+
+ if (adjusted_5prime || adjusted_3prime) {
feature::RetranslateCDS(*new_cds, bsh.GetScope());
CSeq_feat_EditHandle feh(*f);
feh.Replace(*new_cds);
-/* $Id: remote_updater.cpp 605109 2020-04-07 11:01:53Z ivanov $
+/* $Id: remote_updater.cpp 614634 2020-08-20 13:02:41Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#include <objects/general/Name_std.hpp>
#include <objtools/edit/remote_updater.hpp>
+#include <objtools/edit/edit_error.hpp>
+#include <objtools/logging/listener.hpp>
#include <common/test_assert.h> /* This header must go last */
namespace
{
-int FindPMID(CMLAClient& mlaClient, const CPub_equiv::Tdata& arr)
+TEntrezId FindPMID(const list<CRef<CPub>>& arr)
{
for (auto pPub : arr) {
if (pPub->IsPmid()) {
}
}
- return 0;
+ return ZERO_ENTREZ_ID;
}
-// the method is not used at the momment
-void CreatePubPMID(CMLAClient& mlaClient, CPub_equiv::Tdata& arr, int id)
+
+static bool s_IsConnectionFailure(EError_val mlaErrorVal) {
+ switch(mlaErrorVal) {
+ case eError_val_cannot_connect_pmdb:
+ case eError_val_cannot_connect_searchbackend_pmdb:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+
+CRef<CPub> s_GetPubFrompmid(CMLAClient& mlaClient, TEntrezId id, int maxAttempts, IObjtoolsListener* pMessageListener)
{
- try {
- CPubMedId req(id);
- CRef<CPub> new_pub = mlaClient.AskGetpubpmid(req);
- if (new_pub.NotEmpty())
- {
- // authors come back in a weird format that we need
- // to convert to ISO
- if (new_pub->IsSetAuthors())
- CRemoteUpdater::ConvertToStandardAuthors((CAuth_list&)new_pub->GetAuthors());
+ CRef<CPub> result;
+ CPubMedId request(id);
+ CMLAClient::TReply reply;
+
+ int maxCount = max(1, maxAttempts);
+ for (int count=0; count<maxCount; ++count) {
+ try {
+ result = mlaClient.AskGetpubpmid(request, &reply);
+ return result;
+ }
+ catch(CException&) {
+ auto errorVal = reply.GetError();
+ auto isConnectionError = s_IsConnectionFailure(errorVal);
+ if (isConnectionError && count<maxCount-1) {
+ continue;
+ }
- arr.clear();
- CRef<CPub> new_pmid(new CPub);
- new_pmid->SetPmid().Set(id);
- arr.push_back(new_pmid);
- arr.push_back(new_pub);
+ CNcbiOstrstream oss;
+ oss << "Failed to retrieve publication for PMID "
+ << id
+ << ". ";
+ if (isConnectionError) {
+ oss << count+1 << " attempts made. ";
+ }
+ oss << "CMLAClient : "
+ << errorVal;
+ string msg = CNcbiOstrstreamToString(oss);
+ if (pMessageListener) {
+ pMessageListener->PutMessage(CObjEditMessage(msg, eDiag_Error));
+ break;
+ }
+ else {
+ NCBI_THROW(CException, eUnknown, msg);
+ }
}
- } catch(...) {
- // don't worry if we can't look it up
}
-
+ return result;
}
}// end anonymous namespace
+bool CRemoteUpdater::xUpdatePubPMID(list<CRef<CPub>>& arr, TEntrezId id)
+{
+ CMLAClient::TReply reply;
+ auto new_pub =
+ s_GetPubFrompmid(*m_mlaClient, id, m_MaxMlaAttempts, m_pMessageListener);
+ if (!new_pub) {
+ return false;
+ }
+
+ // authors come back in a weird format that we need
+ // to convert to ISO
+ if (new_pub->IsSetAuthors())
+ CRemoteUpdater::ConvertToStandardAuthors((CAuth_list&)new_pub->GetAuthors());
+
+ arr.clear();
+ CRef<CPub> new_pmid(new CPub);
+ new_pmid->SetPmid().Set(id);
+ arr.push_back(new_pmid);
+ arr.push_back(new_pub);
+ return true;
+}
+
+
+void CRemoteUpdater::SetMaxMlaAttempts(int maxAttempts)
+{
+ m_MaxMlaAttempts = maxAttempts;
+}
+
class CCachedTaxon3_impl
{
}
}
+ CRef<COrg_ref> GetOrg(const COrg_ref& org, IObjtoolsListener* pMessageListener=nullptr)
+ {
+ CRef<COrg_ref> result;
+ CRef<CT3Reply> reply = GetOrgReply(org);
+ if (reply->IsError() && pMessageListener)
+ {
+ const string& error_message =
+ "Taxon update: " +
+ (org.IsSetTaxname() ? org.GetTaxname() : NStr::NumericToString(org.GetTaxId())) + ": " +
+ reply->GetError().GetMessage();
+
+ pMessageListener->PutMessage(
+ CObjEditMessage(error_message, eDiag_Error));
+
+ }
+ else
+ if (reply->IsData() && reply->SetData().IsSetOrg())
+ {
+ result.Reset(&reply->SetData().SetOrg());
+ }
+ return result;
+ }
+
+
CRef<COrg_ref> GetOrg(const COrg_ref& org, CRemoteUpdater::FLogger f_logger)
{
CRef<COrg_ref> result;
"Taxon update: " +
(org.IsSetTaxname() ? org.GetTaxname() : NStr::IntToString(org.GetTaxId())) + ": " +
reply->GetError().GetMessage();
-
-/*
- logger->PutError(*auto_ptr<CLineError>(
- CLineError::Create(ILineError::eProblem_Unset, eDiag_Warning, "", 0,
- string("Taxon update: ") +
- (org.IsSetTaxname() ? org.GetTaxname() : NStr::IntToString(org.GetTaxId())) + ": " +
- reply->GetError().GetMessage())));
- */
+
+ f_logger(error_message);
}
else
if (reply->IsData() && reply->SetData().IsSetOrg())
auto_ptr<CCachedReplyMap> m_cache;
};
-void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeqdesc& obj)
+void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, CSeqdesc& obj)
{
if (obj.IsOrg())
{
}
void CRemoteUpdater::xUpdateOrgTaxname(FLogger logger, COrg_ref& org)
-{
+{ // remove after the deprecated UpdateOrgFromTaxon(FLogger, CSeqdes&)
+ // has been removed.
CMutexGuard guard(m_Mutex);
int taxid = org.GetTaxId();
}
}
+void CRemoteUpdater::UpdateOrgFromTaxon(CSeqdesc& desc)
+{
+ if (desc.IsOrg())
+ {
+ xUpdateOrgTaxname(desc.SetOrg());
+ }
+ else
+ if (desc.IsSource() && desc.GetSource().IsSetOrg())
+ {
+ xUpdateOrgTaxname(desc.SetSource().SetOrg());
+ }
+}
+
+
+void CRemoteUpdater::xUpdateOrgTaxname(COrg_ref& org)
+{
+ CMutexGuard guard(m_Mutex);
+
+ TTaxId taxid = org.GetTaxId();
+ if (taxid == ZERO_TAX_ID && !org.IsSetTaxname())
+ return;
+
+ if (m_taxClient.get() == 0)
+ {
+ m_taxClient.reset(new CCachedTaxon3_impl);
+ m_taxClient->Init();
+ }
+
+ CRef<COrg_ref> new_org = m_taxClient->GetOrg(org, m_pMessageListener);
+ if (new_org.NotEmpty())
+ {
+ org.Assign(*new_org);
+ }
+}
+
+
CRemoteUpdater& CRemoteUpdater::GetInstance()
{
CMutexGuard guard(m_static_mutex);
return instance;
}
+CRemoteUpdater::CRemoteUpdater(IObjtoolsListener* pMessageListener) :
+ m_pMessageListener(pMessageListener)
+{
+}
+
+
CRemoteUpdater::CRemoteUpdater(bool enable_caching)
:m_enable_caching(enable_caching)
{
}
}
-void CRemoteUpdater::UpdatePubReferences(objects::CSeq_entry_EditHandle& obj)
+void CRemoteUpdater::UpdatePubReferences(CSeq_entry_EditHandle& obj)
{
for (CBioseq_CI it(obj); it; ++it)
{
xUpdatePubReferences(entry.SetDescr());
}
-void CRemoteUpdater::xUpdatePubReferences(objects::CSeq_descr& seq_descr)
+
+
+void CRemoteUpdater::xUpdatePubReferences(CSeq_descr& seq_descr)
{
CMutexGuard guard(m_Mutex);
- CSeq_descr::Tdata& descr = seq_descr.Set();
- size_t count = descr.size();
- CSeq_descr::Tdata::iterator it = descr.begin();
-
- for (size_t i=0; i<count; ++it, ++i)
- {
- if (! ( (**it).IsPub() && (**it).GetPub().IsSetPub() ) )
+ for (auto pDesc : seq_descr.Set()) {
+ if (!pDesc->IsPub() || !pDesc->GetPub().IsSetPub()) {
continue;
+ }
- CPub_equiv::Tdata& arr = (**it).SetPub().SetPub().Set();
- if (m_mlaClient.Empty())
- m_mlaClient.Reset(new CMLAClient);
+ auto& arr = pDesc->SetPub().SetPub().Set();
+ if (m_mlaClient.Empty())
+ m_mlaClient.Reset(new CMLAClient());
- int id = FindPMID(*m_mlaClient, arr);
- if (id>0)
- {
- CreatePubPMID(*m_mlaClient, arr, id);
+ auto id = FindPMID(arr);
+ if (id>ZERO_ENTREZ_ID) {
+ xUpdatePubPMID(arr, id);
+ continue;
}
- else
- // nothing was found
- NON_CONST_ITERATE(CPub_equiv::Tdata, item_it, arr)
- {
- if ((**item_it).IsArticle())
- try
- {
- id = m_mlaClient->AskCitmatchpmid(**item_it);
- if (id>0)
+
+ for (auto pPubEquiv : arr) {
+ if (pPubEquiv->IsArticle()) {
+ CMLAClient::TReply reply;
+ try {
+ id = ENTREZ_ID_FROM(int, m_mlaClient->AskCitmatchpmid(*pPubEquiv, &reply));
+ }
+ catch(CException& e)
{
- CreatePubPMID(*m_mlaClient, arr, id);
+ continue;
+ }
+ if (id>ZERO_ENTREZ_ID &&
+ xUpdatePubPMID(arr,id)) {
break;
}
}
- catch(CException& /*ex*/)
- {
- }
}
}
}
+
namespace
{
typedef set<CRef< CSeqdesc >* > TOwnerSet;
typedef struct { TOwnerSet owner; CRef<COrg_ref> org_ref; } TOwner;
typedef map<string, TOwner > TOrgMap;
- void _UpdateOrgFromTaxon(CRemoteUpdater::FLogger logger, objects::CSeq_entry& entry, TOrgMap& m)
+ void _UpdateOrgFromTaxon(CSeq_entry& entry, TOrgMap& m)
{
if (entry.IsSet())
{
NON_CONST_ITERATE(CSeq_entry::TSet::TSeq_set, it, entry.SetSet().SetSeq_set())
{
- _UpdateOrgFromTaxon(logger, **it, m);
+ _UpdateOrgFromTaxon(**it, m);
}
}
CRef<COrg_ref> org_ref;
if (desc.IsOrg())
{
- //xUpdateOrgTaxname(logger, desc.SetOrg());
org_ref.Reset(&desc.SetOrg());
}
else
if (desc.IsSource() && desc.GetSource().IsSetOrg())
{
- //xUpdateOrgTaxname(logger, desc.SetSource().SetOrg());
org_ref.Reset(&desc.SetSource().SetOrg());
}
if (org_ref)
}
}
}
-void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeq_entry& entry)
+
+void CRemoteUpdater::UpdateOrgFromTaxon(CSeq_entry& entry)
{
TOrgMap org_to_update;
- _UpdateOrgFromTaxon(logger, entry, org_to_update);
+ _UpdateOrgFromTaxon(entry, org_to_update);
if (org_to_update.empty())
return;
}
}
-void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, objects::CSeq_entry_EditHandle& obj)
+void CRemoteUpdater::UpdateOrgFromTaxon(FLogger /*logger*/, CSeq_entry& entry)
+{
+ // this method is deprecated.
+ // until we remove it, it simply calls the non-deprecated method
+ UpdateOrgFromTaxon(entry);
+}
+
+void CRemoteUpdater::UpdateOrgFromTaxon(FLogger logger, CSeq_entry_EditHandle& obj)
{
for (CBioseq_CI bioseq_it(obj); bioseq_it; ++bioseq_it)
{
}
}
+
+
namespace
{
bool s_IsAllCaps(const string& str)
PostProcessPubs((CPubdesc&)desc_it->GetPub());
}
}
-
+}
+
+void CRemoteUpdater::SetMLAClient(CMLAClient& mlaClient) {
+ m_mlaClient.Reset(&mlaClient);
}
END_SCOPE(edit)
-/* $Id: context.cpp 602293 2020-02-20 18:24:39Z kans $
+/* $Id: context.cpp 608545 2020-05-18 19:35:41Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
// JIRA SQD-4444 : copy annot selector from the one saved in this context structure
- SAnnotSelector sel = m_FFCtx.SetAnnotSelector();
+ // SAnnotSelector sel = m_FFCtx.SetAnnotSelector();
+ SAnnotSelector sel;
sel.SetAnnotType(CSeq_annot::TData::e_Ftable);
CAnnot_CI annot_ci(m_Handle, sel);
for( ; annot_ci; ++annot_ci ) {
-/* $Id: dbsource_item.cpp 577454 2019-01-03 22:58:25Z kans $
+/* $Id: dbsource_item.cpp 614611 2020-08-20 12:59:34Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
GetContext()->Config().GetHTMLFormatter().FormatUniProtId(ht, acc);
} else {
GetContext()->Config().GetHTMLFormatter().FormatNucId(ht, *idh.GetSeqId(),
- GetContext()->GetScope().GetGi(idh), acc);
+ GI_TO(TIntId, GetContext()->GetScope().GetGi(idh)), acc);
}
#endif
s += comma + sep + "accession " + ht;
-/* $Id: defline_item.cpp 577454 2019-01-03 22:58:25Z kans $
+/* $Id: defline_item.cpp 613774 2020-08-12 16:32:22Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
if ( ctx.Config().IgnoreExistingTitle() ) {
flags |= sequence::CDeflineGenerator::fIgnoreExisting;
}
+ if ( ctx.Config().ShowDeflineModifiers() ) {
+ flags |= sequence::CDeflineGenerator::fShowModifiers;
+ }
if ( ctx.UsingSeqEntryIndex() ) {
CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
CBioseq_Handle bsh = scope.GetBioseqHandle(*bioseq);
if (! Defliner.UsePDBCompoundForDefline()) {
ctx.SetPDBCompoundForComment(true);
}
- // CompressSpaces( m_Defline );
- CleanAndCompress (m_Defline, m_Defline.c_str());
+ if ( ctx.Config().ShowDeflineModifiers() ) {
+ CompressSpaces( m_Defline );
+ } else {
+ CleanAndCompress (m_Defline, m_Defline.c_str());
+ }
ConvertQuotes(m_Defline);
AddPeriod(m_Defline);
CSeqdesc_CI di(ctx.GetHandle(), CSeqdesc::e_Title);
-/* $Id: feature_item.cpp 606747 2020-04-27 11:07:41Z ivanov $
+/* $Id: feature_item.cpp 615038 2020-08-26 13:39:07Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype();
if ( subtype == CSeqFeatData::eSubtype_pub ||
- subtype == CSeqFeatData::eSubtype_non_std_residue ||
+ /* subtype == CSeqFeatData::eSubtype_non_std_residue || */
subtype == CSeqFeatData::eSubtype_biosrc ||
subtype == CSeqFeatData::eSubtype_rsite ||
subtype == CSeqFeatData::eSubtype_seq ) {
gf = &(mf.GetMappedFeature());
gr = &(mf.GetData().GetGene());
if (gr) {
- if (feat_gene_xref->IsSetLocus() && gr->IsSetLocus()) {
- if (feat_gene_xref->GetLocus() == gr->GetLocus()) {
+ if (feat_gene_xref->IsSetLocus_tag() && gr->IsSetLocus_tag()) {
+ if (feat_gene_xref->GetLocus_tag() == gr->GetLocus_tag()) {
gene_feat = &(mf.GetMappedFeature());
gene_ref = &(mf.GetData().GetGene());
} else {
// RW-985
gene_ref = feat_gene_xref;
}
- } else if (feat_gene_xref->IsSetLocus_tag() && gr->IsSetLocus_tag()) {
- if (feat_gene_xref->GetLocus_tag() == gr->GetLocus_tag()) {
+ } else if (feat_gene_xref->IsSetLocus() && gr->IsSetLocus()) {
+ if (feat_gene_xref->GetLocus() == gr->GetLocus()) {
gene_feat = &(mf.GetMappedFeature());
gene_ref = &(mf.GetData().GetGene());
} else {
case CSeqFeatData::e_Psec_str:
x_AddQualsPsecStr( ctx );
break;
+ case CSeqFeatData::e_Non_std_residue:
+ x_AddQualsNonStd( ctx );
+ break;
case CSeqFeatData::e_Het:
x_AddQualsHet( ctx );
break;
case CSeqFeatData::e_Psec_str:
x_AddQualsPsecStr( ctx );
break;
+ case CSeqFeatData::e_Non_std_residue:
+ x_AddQualsNonStd( ctx );
+ break;
case CSeqFeatData::e_Het:
x_AddQualsHet( ctx );
break;
x_AddQual(slot, new CFlatSeqIdQVal(*acc_id));
}
/*
- if (! cfg.HideGI()) {
+ if (! (cfg.HideGI() || cfg.IsPolicyFtp())) {
x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(*sip, true));
}
*/
if ( protId ) {
if ( !cfg.AlwaysTranslateCDS() ) {
CScope::EGetBioseqFlag get_flag = CScope::eGetBioseq_Loaded;
- if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() ) {
+ if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() ) {
get_flag = CScope::eGetBioseq_All;
}
protHandle = scope.GetBioseqHandle(*protId, get_flag);
if ( protId ) {
if ( !cfg.AlwaysTranslateCDS() ) {
CScope::EGetBioseqFlag get_flag = CScope::eGetBioseq_Loaded;
- if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() ) {
+ if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() ) {
get_flag = CScope::eGetBioseq_All;
}
protHandle = scope.GetBioseqHandle(*protId, get_flag);
case CSeq_id::e_Gi:
if( seqid.GetGi() > ZERO_GI ) {
const CFlatFileConfig& cfg = GetContext()->Config();
- if (! cfg.HideGI()) {
+ if (! (cfg.HideGI() || cfg.IsPolicyFtp())) {
if ( eLastRegularChoice == CSeq_id::e_not_set ) {
// use as protein_id if it's the first usable one
x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) );
const CFlatFileConfig& cfg = GetContext()->Config();
ITERATE( CBioseq_Handle::TId, id_iter, ids ) {
if( id_iter->IsGi() ) {
- if (! cfg.HideGI()) {
+ if (! (cfg.HideGI() || cfg.IsPolicyFtp())) {
x_AddQual( eFQ_db_xref,
new CFlatStringQVal("GI:" + NStr::NumericToString(id_iter->GetGi()) ));
}
x_AddQual( eFQ_sec_str_type, new CFlatStringQVal( sec_str_as_str ) );
}
+// ----------------------------------------------------------------------------
+void CFeatureItem::x_AddQualsNonStd(
+ CBioseqContext& ctx )
+// ----------------------------------------------------------------------------
+{
+ _ASSERT( m_Feat.GetData().IsNon_std_residue() );
+
+ const CSeqFeatData& data = m_Feat.GetData();
+
+ CSeqFeatData_Base::TNon_std_residue n_s_res = data.GetNon_std_residue();
+
+ x_AddQual( eFQ_non_std_residue, new CFlatStringQVal( n_s_res ) );
+}
+
// ----------------------------------------------------------------------------
void CFeatureItem::x_AddQualsHet(
CBioseqContext& ctx )
DO_QUAL(site_type);
DO_QUAL(sec_str_type);
DO_QUAL(heterogen);
+ DO_QUAL(non_std_residue);
DO_QUAL(tag_peptide);
{ eFQ_mol_wt, CSeqFeatData::eQual_calculated_mol_wt },
{ eFQ_ncRNA_class, CSeqFeatData::eQual_ncRNA_class },
{ eFQ_nomenclature, CSeqFeatData::eQual_nomenclature },
+ { eFQ_non_std_residue, CSeqFeatData::eQual_non_std_residue },
{ eFQ_number, CSeqFeatData::eQual_number },
{ eFQ_old_locus_tag, CSeqFeatData::eQual_old_locus_tag },
{ eFQ_operon, CSeqFeatData::eQual_operon },
case CSeqFeatData::e_Psec_str:
x_AddFTablePsecStrQuals(data.GetPsec_str());
break;
+ case CSeqFeatData::e_Non_std_residue:
+ x_AddFTableNonStdQuals(data.GetNon_std_residue());
+ break;
case CSeqFeatData::e_Het:
x_AddFTablePsecStrQuals(data.GetHet());
break;
case CTrna_ext::C_Aa::e_Ncbistdaa:
aa = GetAAName(trna_ext.GetAa().GetNcbistdaa(), false);
break;
+ default:
+ break;
}
string seq("---");
CBioseq_Handle prod =
ctx.GetScope().GetBioseqHandle(m_Feat.GetProductId());
if ( prod ) {
- string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !ctx.Config().HideGI());
+ string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !(ctx.Config().HideGI() || ctx.Config().IsPolicyFtp()));
if (!NStr::IsBlank(id_str)) {
x_AddFTableQual("transcript_id", id_str);
}
}
if (prod && !cfg.HideProteinID()) {
- string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !ctx.Config().HideGI());
+ string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !(ctx.Config().HideGI() || ctx.Config().IsPolicyFtp()));
if (!NStr::IsBlank(id_str)) {
x_AddFTableQual("protein_id", id_str);
}
}
}
+// ----------------------------------------------------------------------------
+void CFeatureItem::x_AddFTableNonStdQuals(
+ const CSeqFeatData::TNon_std_residue& res )
+// ----------------------------------------------------------------------------
+{
+ if ( !res.empty() ) {
+ x_AddFTableQual("non_std_residue", res);
+ }
+}
+
static const string s_GetSubtypeString(const COrgMod::TSubtype& subtype)
{
if( voucher_info_ref->m_Prefix != NULL ) {
text << *voucher_info_ref->m_Prefix;
}
+ if( voucher_info_ref->m_Trim != NULL ) {
+ const string& trim = *voucher_info_ref->m_Trim;
+ if (NStr::StartsWith(id, trim)) {
+ NStr::TrimPrefixInPlace(id, trim);
+ NStr::TruncateSpacesInPlace(id);
+ }
+ }
if( voucher_info_ref->m_PadTo > 0 && voucher_info_ref->m_PadWith != NULL) {
int len_id = id.length();
int len_pad = voucher_info_ref->m_PadWith->length();
}
+/*
static bool s_IsExactAndNonExactMatchOnNoteQuals(CFlatFeature::TQuals& qvec, const string& str)
{
if (qvec.empty()) {
if (has_exact == 1 && non_exact > 0) return true;
return false;
}
+*/
-/* $Id: flat_file_config.cpp 606754 2020-04-27 11:09:46Z ivanov $
+/* $Id: flat_file_config.cpp 614736 2020-08-21 13:43:48Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
TStyle style,
TFlags flags,
TView view,
- TPolicy policy) :
- m_Format(format), m_Mode(mode), m_Style(style), m_Flags(flags), m_View(view), m_Policy(policy)
+ TPolicy policy,
+ TCustom custom) :
+ m_Format(format), m_Mode(mode), m_Style(style), m_Flags(flags), m_View(view), m_Policy(policy), m_Custom(custom)
{
m_RefSeqConventions = false;
+ m_FeatDepth = 0;
+ m_GapDepth = 0;
SetGenbankBlocks(fGenbankBlocks_All);
SetGenbankBlockCallback(NULL);
SetCanceledCallback(NULL);
BasicCleanup(false);
- SetCustom(0);
// FTable always requires master style
if (m_Format == eFormat_FTable) {
"Far fetch policy",
CArgDescriptions::eString, "adaptive");
arg_desc->SetConstraint("policy",
- &(*new CArgAllow_Strings, "adaptive", "internal", "external", "exhaustive"));
+ &(*new CArgAllow_Strings, "adaptive", "internal", "external", "exhaustive", "ftp", "web"));
// flags (default: 0)
arg_desc->AddDefaultKey("flags", "Flags",
arg_desc->AddOptionalKey("depth", "Depth",
"Exploration depth", CArgDescriptions::eInteger);
+ arg_desc->AddOptionalKey("gap-depth", "GapDepth",
+ "Gap exploration depth", CArgDescriptions::eInteger);
+
arg_desc->AddOptionalKey("max_search_segments", "MaxSearchSegments",
"Max number of empty segments to search", CArgDescriptions::eInteger);
return CFlatFileConfig::ePolicy_External;
} else if ( Policy == "exhaustive" ) {
return CFlatFileConfig::ePolicy_Exhaustive;
+ } else if ( Policy == "ftp" ) {
+ return CFlatFileConfig::ePolicy_Ftp;
+ } else if ( Policy == "web" ) {
+ return CFlatFileConfig::ePolicy_Web;
}
// default
{
int custom = args["custom"].AsInteger();
- // ID-5865 : Set the "show SNP" and "show CDD" bits based on the value of the
- // "enable-external" flag.
- if (args["enable-external"] || args["policy"].AsString() == "external")
- custom |= (CFlatFileConfig::fShowSNPFeatures | CFlatFileConfig::fShowCDDFeatures);
-
return (CFlatFileConfig::ECustom)custom;
}
CFlatFileConfig::TGenbankBlocks genbank_blocks = x_GetGenbankBlocks(args);
CFlatFileConfig::ECustom custom = x_GetCustom(args);
+ // ID-5865 : Set the "show SNP" and "show CDD" bits based on the value of the
+ // "enable-external" flag.
+ if (args["no-external"]) {
+ int flg = (int) flags;
+ flg |= CFlatFileConfig::fHideCDDFeatures;
+ flg |= CFlatFileConfig::fHideSNPFeatures;
+ flags = (CFlatFileConfig::EFlags) flg;
+ } else if (args["enable-external"] || args["policy"].AsString() == "external") {
+ int cust = (int) custom;
+ if ((flags & CFlatFileConfig::fHideCDDFeatures) == 0) {
+ cust |= CFlatFileConfig::fShowCDDFeatures;
+ }
+ if ((flags & CFlatFileConfig::fHideSNPFeatures) == 0) {
+ cust |= CFlatFileConfig::fShowSNPFeatures;
+ }
+ custom = (CFlatFileConfig::ECustom) cust;
+ }
+
SetFormat(format);
SetMode(mode);
SetStyle(style);
m_fGenbankBlocks = genbank_blocks;
m_BasicCleanup = args["cleanup"];
SetCustom(custom);
+
+ if( args["depth"] ) {
+ int featDepth = args["depth"].AsInteger();
+ SetFeatDepth(featDepth);
+ }
+ if( args["gap-depth"] ) {
+ int gapDepth = args["gap-depth"].AsInteger();
+ SetGapDepth(gapDepth);
+ }
}
#ifdef NEW_HTML_FMT
os << id;
}
-void CHTMLEmptyFormatter::FormatTaxid(string& str, const int taxid, const string& taxname) const
+void CHTMLEmptyFormatter::FormatTaxid(string& str, const TTaxId taxid, const string& taxname) const
{
str = taxname;
}
-/* $Id: flat_file_generator.cpp 606748 2020-04-27 11:07:58Z ivanov $
+/* $Id: flat_file_generator.cpp 615047 2020-08-26 13:40:19Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CFlatFileConfig::TStyle style,
CFlatFileConfig::TFlags flags,
CFlatFileConfig::TView view,
- CFlatFileConfig::TCustom custom) :
- m_Ctx(new CFlatFileContext(CFlatFileConfig(format, mode, style, flags, view)))
+ CFlatFileConfig::TCustom custom,
+ CFlatFileConfig::TPolicy policy) :
+ m_Ctx(new CFlatFileContext(CFlatFileConfig(format, mode, style, flags, view, policy, custom)))
{
m_Failed = false;
if ( !m_Ctx ) {
NCBI_THROW(CFlatException, eInternal, "Unable to initialize context");
}
- m_Ctx->SetConfig().SetCustom(custom);
}
if ( m_Ctx->GetConfig().IsPolicyExhaustive() ) {
policy = CSeqEntryIndex::eExhaustive;
}
+ if ( m_Ctx->GetConfig().IsPolicyFtp() ) {
+ policy = CSeqEntryIndex::eFtp;
+ }
+ if ( m_Ctx->GetConfig().IsPolicyWeb() ) {
+ policy = CSeqEntryIndex::eWeb;
+ }
CRef<CSeqEntryIndex> idx(new CSeqEntryIndex( topseh, policy, flags ));
m_Ctx->SetSeqEntryIndex(idx);
if (idx->IsIndexFailure()) {
// bool nearFeatsSuppress = false;
bool isNc = false;
+ /*
bool isNgNtNwNz = false;
bool isGED = false;
bool isTPA = false;
+ */
bool hasLocalFeat = false;
bool forceOnlyNear = false;
case CSeq_id::e_Genbank:
case CSeq_id::e_Embl:
case CSeq_id::e_Ddbj:
- isGED = true;
+ // isGED = true;
break;
case CSeq_id::e_Tpg:
case CSeq_id::e_Tpe:
case CSeq_id::e_Tpd:
- isTPA = true;
+ // isTPA = true;
break;
case CSeq_id::e_Other:
{
if (acc == "NC_") {
isNc = true;
} else if (acc == "NG_" || acc == "NT_" || acc == "NW_" || acc == "NZ_") {
- isNgNtNwNz = true;
+ // isNgNtNwNz = true;
}
}
}
}
if ( cfg.HideSNPFeatures() ) {
flags |= CSeqEntryIndex::fHideSNPFeats;
+ } else if ( cfg.ShowSNPFeatures() ) {
+ flags |= CSeqEntryIndex::fShowSNPFeats;
}
if ( cfg.HideCDDFeatures() ) {
flags |= CSeqEntryIndex::fHideCDDFeats;
- }
- if ( cfg.ShowSNPFeatures() ) {
- flags |= CSeqEntryIndex::fShowSNPFeats;
- }
- if ( cfg.ShowCDDFeatures() ) {
+ } else if ( cfg.ShowCDDFeatures() ) {
flags |= CSeqEntryIndex::fShowCDDFeats;
}
- if ( m_Ctx->GetConfig().IsPolicyInternal() ) {
+ if ( cfg.IsPolicyInternal() ) {
policy = CSeqEntryIndex::eInternal;
}
- if ( m_Ctx->GetConfig().IsPolicyExternal() ) {
+ if ( cfg.IsPolicyExternal() ) {
policy = CSeqEntryIndex::eExternal;
}
- if ( m_Ctx->GetConfig().IsPolicyExhaustive() ) {
+ if ( cfg.IsPolicyExhaustive() ) {
policy = CSeqEntryIndex::eExhaustive;
}
- CRef<CSeqEntryIndex> idx(new CSeqEntryIndex( topseh, policy, flags ));
+ if ( cfg.IsPolicyFtp() ) {
+ policy = CSeqEntryIndex::eFtp;
+ }
+ if ( cfg.IsPolicyWeb() ) {
+ policy = CSeqEntryIndex::eWeb;
+ }
+ CRef<CSeqEntryIndex> idx(new CSeqEntryIndex( topseh, policy, flags));
m_Ctx->SetSeqEntryIndex(idx);
if (idx->IsIndexFailure()) {
m_Failed = true;
return;
}
+ int featDepth = cfg.GetFeatDepth();
+ idx->SetFeatDepth(featDepth);
+ int gapDepth = cfg.GetGapDepth();
+ idx->SetGapDepth(gapDepth);
} catch(CException &) {
m_Failed = true;
return;
}
+void CFlatFileGenerator::Generate
+(const CBioseq_Handle& bsh,
+ CNcbiOstream& os,
+ bool useSeqEntryIndexing)
+{
+ CRef<CFlatItemOStream>
+ item_os(new CFormatItemOStream(new COStreamTextOStream(os)));
+
+ const CSeq_entry_Handle entry = bsh.GetSeq_entry_Handle();
+ Generate(entry, *item_os, useSeqEntryIndexing);
+
+}
+
+
void CFlatFileGenerator::Generate
(const CSeq_submit& submit,
CScope& scope,
-/* $Id: flat_qual_slots.cpp 564513 2018-05-29 17:40:10Z kans $
+/* $Id: flat_qual_slots.cpp 613781 2020-08-12 16:42:43Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
TYPICAL_FQ(mol_wt),
TYPICAL_FQ(ncRNA_class),
TYPICAL_FQ(nomenclature),
+ TYPICAL_FQ(non_std_residue),
TYPICAL_FQ(number),
TYPICAL_FQ(old_locus_tag),
TYPICAL_FQ(operon),
-/* $Id: gather_items.cpp 607405 2020-05-04 14:19:32Z ivanov $
+/* $Id: gather_items.cpp 615788 2020-09-03 18:19:11Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
if (! bsx) return;
// gather references from descriptors
- bsx->IterateDescriptors([this, &ctx, &scope, &refs, &idx, bsx](CDescriptorIndex& sdx) {
+ bsx->IterateDescriptors([this, &refs, &idx, bsx](CDescriptorIndex& sdx) {
try {
CSeqdesc::E_Choice chs = sdx.GetType();
if (chs == CSeqdesc::e_Pub) {
string genome_build_number =
CGenomeAnnotComment::GetGenomeBuildNumber(ctx.GetHandle());
bool has_ref_track_status = s_HasRefTrackStatus(ctx.GetHandle());
- CCommentItem::ECommentFormat format = ctx.Config().DoHTML() ?
- CCommentItem::eFormat_Html : CCommentItem::eFormat_Text;
+ // CCommentItem::ECommentFormat format = ctx.Config().DoHTML() ? CCommentItem::eFormat_Html : CCommentItem::eFormat_Text;
ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) {
const CSeq_id& id = **id_iter;
// if protein, get sources applicable to DNA location of CDS
if ( ctx.IsProt() ) {
// collect biosources features on bioseq
- if ( !ctx.DoContigStyle() || cfg.ShowContigSources() ) {
+ if ( !ctx.DoContigStyle() || cfg.ShowContigSources() || cfg.IsPolicyFtp() ) {
CConstRef<CSeq_feat> src_feat = x_GetSourceFeatFromCDS (bh);
if (src_feat.NotEmpty()) {
// CMappedFeat mapped_feat(bh.GetScope().GetSeq_featHandle(*src_feat));
if ( ! ctx.IsProt() ) {
// collect biosources features on bioseq
- if ( !ctx.DoContigStyle() || cfg.ShowContigSources() ) {
+ if ( !ctx.DoContigStyle() || cfg.ShowContigSources() || cfg.IsPolicyFtp() ) {
x_CollectSourceFeatures(bh, range, ctx, srcs);
}
}
void CFlatGatherer::x_CollectBioSources(TSourceFeatSet& srcs) const
{
CBioseqContext& ctx = *m_Current;
- CScope* scope = &ctx.GetScope();
+ // CScope* scope = &ctx.GetScope();
const CFlatFileConfig& cfg = ctx.Config();
x_CollectBioSourcesOnBioseq(ctx.GetHandle(),
return true;
}
+// for the non-indexed, non-faster, older version of the flatfile generator
void s_SetSelection(SAnnotSelector& sel, CBioseqContext& ctx)
{
const CFlatFileConfig& cfg = ctx.Config();
{
if (feat.GetAnnot().IsNamed()) {
const string& name = feat.GetAnnot().GetName();
- return (name == "Annot:CDD" || name == "CDDSearch");
+ return (name == "Annot:CDD" || name == "CDDSearch" || name == "CDD");
}
return false;
}
SAnnotSelector& sel,
CBioseqContext& ctx) const
{
- CScope& scope = ctx.GetScope();
+ // CScope& scope = ctx.GetScope();
CFlatItemOStream& out = *m_ItemOS;
CSeqMap_CI gap_it = s_CreateGapMapIter(loc, ctx);
s_SetGapIdxData (gap_data, gaps);
}
- bsx->IterateFeatures([this, &ctx, &scope, &prev_feat, &gap_it, &loc_len, &item, &out, &slice_mapper,
+ bsx->IterateFeatures([this, &ctx, &prev_feat, &loc_len, &item, &out, &slice_mapper,
gaps, &gap_data, showGapsOfSizeZero, bsx](CFeatureIndex& sfx) {
try {
CMappedFeat mf = sfx.GetMappedFeat();
// may need to map sig_peptide on a different segment
if (feat.GetData().IsCdregion()) {
if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
- x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, slice_mapper);
+ x_GetFeatsOnCdsProductIdx(original_feat, ctx, slice_mapper);
}
}
return; // continue;
bool has_gap = gap_data.has_gap;
int gap_start = gap_data.gap_start;
int gap_end = gap_data.gap_end;
- while (has_gap && gap_start < feat_start) {
+ while (has_gap && gap_start <= feat_start) {
const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_start <= gap_end) );
const bool gapMatch = ( subtype == CSeqFeatData::eSubtype_gap && feat_start == gap_start && feat_end == gap_end - 1 );
if ( noGapSizeProblem && ! gapMatch ) {
{{
// map features from protein
if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
- x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx,
+ x_GetFeatsOnCdsProductIdx(original_feat, ctx,
slice_mapper,
CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
}
// when all features are done, output remaining gaps
while (gap_data.has_gap) {
- const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_data.gap_start < gap_data.gap_end) );
+ const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_data.gap_start <= gap_data.gap_end) );
if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) {
item.Reset( s_NewGapItem(gap_data.gap_start, gap_data.gap_end, gap_data.gap_length, gap_data.gap_type,
gap_data.gap_evidence, gap_data.is_unknown_length, gap_data.is_assembly_gap, ctx) );
CRef<CSeq_loc_Mapper> slice_mapper = s_MakeSliceMapper(loc, ctx);
// Gaps of length zero are only shown for SwissProt Genpept records
- const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
+ // const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
// cache to avoid repeated calculations
- const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
+ // const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
CSeq_feat_Handle prev_feat;
CConstRef<IFlatItem> item;
CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
if (! idx) return;
- CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (loc);
+ // CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (loc);
+ CRef<CBioseqIndex> bsx = idx->GetBioseqIndex ();
if (! bsx) return;
const vector<CRef<CGapIndex>>& gaps = bsx->GetGapIndices();
s_SetGapIdxData (gap_data, gaps);
}
- bsx->IterateFeatures([this, &ctx, &scope, &prev_feat, &gap_it, &loc_len, &item, &out, &slice_mapper,
- gaps, &gap_data, showGapsOfSizeZero, bsx](CFeatureIndex& sfx) {
+ CSeq_loc slp;
+ slp.Assign(loc);
+ bsx->IterateFeatures(slp, [this, &ctx, &scope, &prev_feat, &item, &out, &slice_mapper,
+ gaps, bsx](CFeatureIndex& sfx) {
try {
CMappedFeat mf = sfx.GetMappedFeat();
CSeq_feat_Handle feat = sfx.GetSeqFeatHandle(); // it->GetSeq_feat_Handle();
// may need to map sig_peptide on a different segment
if (feat.GetData().IsCdregion()) {
if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
- x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx, slice_mapper);
+ x_GetFeatsOnCdsProductIdx(original_feat, ctx, slice_mapper);
}
}
return;
const CSeq_loc& loc = original_feat.GetLocation();
CRef<CSeq_loc> loc2(new CSeq_loc);
loc2->Assign(*feat_loc);
- loc2->SetId(*loc.GetId());
-
- item.Reset( x_NewFeatureItem(mf, ctx, loc2, m_Feat_Tree, CFeatureItem::eMapped_not_mapped, true) );
- out << item;
-
- // Add more features depending on user preferences
-
- switch (feat.GetFeatSubtype()) {
- case CSeqFeatData::eSubtype_mRNA:
- {{
- // optionally map CDS from cDNA onto genomic
- if (s_CopyCDSFromCDNA(ctx) && feat.IsSetProduct()) {
- x_CopyCDSFromCDNA(original_feat, ctx);
- }
- break;
- }}
- case CSeqFeatData::eSubtype_cdregion:
- {{
- // map features from protein
- if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
- x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx,
- slice_mapper,
- CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
- }
- break;
- }}
- default:
- break;
+ const CSeq_id* id2 = loc.GetId();
+ // test needed for gene in X55766, to prevent seg fault, but still does not produce correct mixed location
+ if (id2) {
+ loc2->SetId(*id2);
}
- } catch (CException& e) {
- // special case: Job cancellation exceptions make us stop
- // generating features.
- CMappedFeat mf = sfx.GetMappedFeat();
- if( NStr::EqualNocase(e.what(), "job cancelled") ||
- NStr::EqualNocase(e.what(), "job canceled") )
- {
- LOG_POST_X(2, Error << "Job canceled while processing feature "
- << s_GetFeatDesc(mf.GetSeq_feat_Handle())
- << " [" << e << "]; flatfile may be truncated");
- return;
- }
-
- // for cases where a halt is requested, just rethrow the exception
- if( e.GetErrCodeString() == string("eHaltRequested") ) {
- throw e;
- }
-
- // post to log, go on to next feature
- LOG_POST_X(2, Error << "Error processing feature "
- << s_GetFeatDesc(mf.GetSeq_feat_Handle())
- << " [" << e << "]");
- }
- }); // end of for loop
-}
-
-size_t CFlatGatherer::x_GatherFeaturesOnSegmentIdx
-(const CSeq_loc& loc,
- SAnnotSelector& sel,
- CBioseqContext& ctx) const
-{
- size_t count = 0;
-
- CScope& scope = ctx.GetScope();
- CFlatItemOStream& out = *m_ItemOS;
-
- // logic to handle offsets that occur when user sets
- // the -from and -to command-line parameters
- // build slice_mapper for mapping locations
- CRef<CSeq_loc_Mapper> slice_mapper = s_MakeSliceMapper(loc, ctx);
-
- // Gaps of length zero are only shown for SwissProt Genpept records
- const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
-
- // cache to avoid repeated calculations
- const int loc_len = sequence::GetLength(*loc.GetId(), &ctx.GetScope() ) ;
-
- CSeq_feat_Handle prev_feat;
- CConstRef<IFlatItem> item;
-
- CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
- if (! idx) return count;
- CRef<CBioseqIndex> bsx = idx->GetBioseqIndex ();
- if (! bsx) return count;
-
- count = bsx->IterateFeaturesByLoc(loc, [this, &ctx, &scope, &prev_feat, &loc_len,
- &item, &out, &slice_mapper, showGapsOfSizeZero, bsx](CFeatureIndex& sfx) {
- try {
- CMappedFeat mf = sfx.GetMappedFeat();
- CSeq_feat_Handle feat = sfx.GetSeqFeatHandle(); // it->GetSeq_feat_Handle();
- const CSeq_feat& original_feat = sfx.GetMappedFeat().GetOriginalFeature(); // it->GetOriginalFeature();
-
- /// we need to cleanse CDD features
-
- s_CleanCDDFeature(original_feat);
-
- const CFlatFileConfig& cfg = ctx.Config();
- CSeqFeatData::ESubtype subtype = feat.GetFeatSubtype();
- if (cfg.HideCDDFeatures() &&
- (subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
- s_IsCDD(feat)) {
- return;
- }
-
- if( (feat.GetFeatSubtype() == CSeqFeatData::eSubtype_gap) && ! feat.IsPlainFeat() ) {
- // skip gaps when we take slices (i.e. "-from" and "-to" command-line args),
- // unless they're a plain feature.
- // (compare NW_001468136 (100 to 200000) and AC185591 (100 to 100000) )
- return;
- }
-
- // supress duplicate features
- if (prev_feat && s_IsDuplicateFeatures(prev_feat, feat)) {
- return;
- }
- prev_feat = feat;
-
- CConstRef<CSeq_loc> feat_loc( sfx.GetMappedLocation()); // &it->GetLocation());
-
- feat_loc = s_NormalizeNullsBetween( feat_loc );
-
- feat_loc = Seq_loc_Merge(*feat_loc, CSeq_loc::fMerge_Abutting, &scope);
-
- // HANDLE GAPS SECTION GOES HERE
-
-
- const CSeq_loc& loc = original_feat.GetLocation();
- CRef<CSeq_loc> loc2(new CSeq_loc);
- loc2->Assign(*feat_loc);
- loc2->SetId(*loc.GetId());
item.Reset( x_NewFeatureItem(mf, ctx, loc2, m_Feat_Tree, CFeatureItem::eMapped_not_mapped, true) );
out << item;
{{
// map features from protein
if (( !ctx.Config().IsFormatFTable() || ctx.Config().ShowFtablePeptides() )) {
- x_GetFeatsOnCdsProductIdx(mf, original_feat, ctx,
+ x_GetFeatsOnCdsProductIdx(original_feat, ctx,
slice_mapper,
CConstRef<CFeatureItem>(static_cast<const CFeatureItem*>(item.GetNonNullPointer())) );
}
<< " [" << e << "]");
}
}); // end of for loop
-
- return count;
}
void CFlatGatherer::x_GatherFeaturesOnRange
return false;
}
-void CFlatGatherer::x_GatherFeatures(void) const
+/*
+static bool s_NotForceNearFeats(CBioseqContext& ctx)
+{
+ // asn2flat -id NW_003127872 -flags 2 -faster -custom 2048
+ CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
+ if (idx) {
+ CBioseq_Handle hdl = ctx.GetHandle();
+ CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
+ if (bsx) {
+ if (bsx->IsForceOnlyNearFeats()) return false;
+ }
+ }
+
+ return true;
+}
+*/
+
+void CFlatGatherer::x_GatherFeaturesIdx(void) const
{
CBioseqContext& ctx = *m_Current;
const CFlatFileConfig& cfg = ctx.Config();
+ if ( ! cfg.UseSeqEntryIndexer()) return;
+
+ CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
+ if (! idx) return;
+ CBioseq_Handle hdl = ctx.GetHandle();
+ CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
+ if (! bsx) return;
+
CFlatItemOStream& out = *m_ItemOS;
CConstRef<IFlatItem> item;
}
// collect features
- // if ( ctx.IsSegmented() && cfg.IsStyleMaster() && cfg.OldFeaturesOrder() ) {
- if ( cfg.UseSeqEntryIndexer() && ctx.IsDelta() && ! ctx.IsDeltaLitOnly() && cfg.IsStyleMaster() && ctx.GetLocation().IsWhole() ) {
+ if (ctx.GetLocation().IsWhole()) {
+ x_GatherFeaturesOnWholeLocationIdx(loc, sel, ctx);
+ } else {
+ x_GatherFeaturesOnRangeIdx(loc, sel, ctx);
+ }
+
+ if ( ctx.IsProt() ) {
+ // Also collect features which this protein is their product.
+ // Currently there are only two possible candidates: Coding regions
+ // and Prot features (rare).
- CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
- if (! idx) return;
- CBioseq_Handle hdl = ctx.GetHandle();
- CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
- if (! bsx) return;
+ // look for the Cdregion feature for this protein
+ CBioseq_Handle handle = ( ctx.CanGetMaster() ? ctx.GetMaster().GetHandle() : ctx.GetHandle() );
+ SAnnotSelector sel(CSeqFeatData::e_Cdregion);
+ sel.SetByProduct().SetResolveDepth(0);
+ // try first in-TSE CDS
+ sel.SetLimitTSE(handle.GetTSE_Handle());
+ CFeat_CI feat_it(handle, sel);
+ if ( !feat_it ) {
+ // then any other CDS
+ sel.SetLimitNone().ExcludeTSE(handle.GetTSE_Handle());
+ feat_it = CFeat_CI(handle, sel);
+ }
+ if (feat_it) {
+ try {
+ CMappedFeat cds = *feat_it;
- // Gaps of length zero are only shown for SwissProt Genpept records
- const bool showGapsOfSizeZero = ( ctx.IsProt() && ctx.GetPrimaryId()->Which() == CSeq_id_Base::e_Swissprot );
+ // map CDS location to its location on the product
+ CSeq_loc_Mapper mapper(*cds.GetOriginalSeq_feat(),
+ CSeq_loc_Mapper::eLocationToProduct,
+ &ctx.GetScope());
+ mapper.SetFuzzOption( CSeq_loc_Mapper::fFuzzOption_CStyle | CSeq_loc_Mapper::fFuzzOption_RemoveLimTlOrTr );
+ CRef<CSeq_loc> cds_prod = mapper.Map(cds.GetLocation());
+ cds_prod = cds_prod->Merge( ( s_IsCircularTopology(ctx) ? CSeq_loc::fMerge_All : CSeq_loc::fSortAndMerge_All ), NULL );
- const vector<CRef<CGapIndex>>& gaps = bsx->GetGapIndices();
+ // it's a common case that we map one residue past the edge of the protein (e.g. NM_131089).
+ // In that case, we shrink the cds's location back one residue.
+ if( cds_prod->IsInt() && cds.GetProduct().IsWhole() ) {
+ const CSeq_id *cds_prod_seq_id = cds.GetProduct().GetId();
+ if( cds_prod_seq_id != NULL ) {
+ CBioseq_Handle prod_bioseq_handle = ctx.GetScope().GetBioseqHandle( *cds_prod_seq_id );
+ if( prod_bioseq_handle ) {
+ const TSeqPos bioseq_len = prod_bioseq_handle.GetBioseqLength();
+ if( cds_prod->GetInt().GetTo() >= bioseq_len ) {
+ cds_prod->SetInt().SetTo( bioseq_len - 1 );
+ }
+ }
+ }
+ }
- SGapIdxData gap_data{};
+ // if there are any gaps in the location, we know that there was an issue with the mapping, so
+ // we fall back on the product.
+ if( s_ContainsGaps(*cds_prod) ) {
+ cds_prod->Assign( cds.GetProduct() );
+ }
- gap_data.num_gaps = gaps.size();
- gap_data.next_gap = 0;
+ // remove fuzz
+ cds_prod->SetPartialStart( false, eExtreme_Positional );
+ cds_prod->SetPartialStop ( false, eExtreme_Positional );
- if (gap_data.num_gaps > 0 && ! ctx.Config().HideGapFeatures()) {
- s_SetGapIdxData (gap_data, gaps);
+ item.Reset(
+ x_NewFeatureItem(cds, ctx, &*cds_prod, m_Feat_Tree,
+ CFeatureItem::eMapped_from_cdna) );
+
+ out << item;
+ } catch (CAnnotMapperException& e) {
+ LOG_POST_X(2, Error << e );
+ }
}
- SSeqMapSelector msel;
- msel.SetFlags(CSeqMap::fFindAny);
- CBioseq_Handle bsh = ctx.GetHandle();
+ // look for Prot features (only for RefSeq records or
+ // GenBank not release_mode).
+ if ( ctx.IsRefSeq() || !cfg.ForGBRelease() ) {
+ SAnnotSelector prod_sel(CSeqFeatData::e_Prot, true);
+ prod_sel.SetLimitTSE(ctx.GetHandle().GetTopLevelEntry());
+ prod_sel.SetResolveMethod(SAnnotSelector::eResolve_TSE);
+ prod_sel.SetOverlapType(SAnnotSelector::eOverlap_Intervals);
+ CFeat_CI it(ctx.GetHandle(), prod_sel);
+ ctx.GetFeatTree().AddFeatures(it);
+ for ( ; it; ++it) {
+ item.Reset(x_NewFeatureItem(*it,
+ ctx,
+ &it->GetProduct(),
+ m_Feat_Tree,
+ CFeatureItem::eMapped_from_prot) );
+ out << item;
+ }
+ }
+ }
+}
- bool keepGoing = true;
- bool noFeatsSeen = true;
- int withoutFeats = 0;
+void CFlatGatherer::x_GatherFeatures(void) const
+{
+ CBioseqContext& ctx = *m_Current;
+ const CFlatFileConfig& cfg = ctx.Config();
- SetDiagFilter(eDiagFilter_All, "!(1305.28,31)");
+ if (cfg.UseSeqEntryIndexer()) {
+ x_GatherFeaturesIdx();
+ return;
+ }
- CConstRef<CSeqMap> seqmap;
- if (ctx.GetLocation().IsWhole()) {
- seqmap = &bsh.GetSeqMap();
- } else {
- seqmap = CSeqMap::CreateSeqMapForSeq_loc(loc, &ctx.GetScope());
- }
+ CFlatItemOStream& out = *m_ItemOS;
+ CConstRef<IFlatItem> item;
- for ( CSeqMap_CI seg(seqmap, &ctx.GetScope(), msel); seg; ++seg ) {
- if (seg.GetType() != CSeqMap::eSeqGap) {
- if (keepGoing) {
- // go over each of the segments
- ENa_strand strand = eNa_strand_unknown;
- if (seg.GetRefMinusStrand()) {
- strand = eNa_strand_minus;
- }
- // cout << "SEG " << seg.GetType() << " @ " << seg.GetPosition() << " - " << seg.GetEndPosition() << " " << seg.GetLength() << endl;
- CRef<CSeq_loc> sl = bsh.GetRangeSeq_loc(seg.GetPosition(), seg.GetEndPosition() - 1, strand);
- if (sl) {
- size_t count = x_GatherFeaturesOnSegmentIdx(*sl, *selp, ctx);
- if (count > 0) {
- noFeatsSeen = false;
- } else if (ctx.IsEMBL() || ctx.IsDDBJ()) {
- withoutFeats++;
- if (withoutFeats > 20 && noFeatsSeen) {
- keepGoing = false;
- }
- }
- }
- }
- } else {
- // cout << "GAP " << seg.GetType() << " @ " << seg.GetPosition() << " - " << seg.GetEndPosition() << " " << seg.GetLength() << endl;
- const bool noGapSizeProblem = ( false || (seg.GetPosition() < seg.GetEndPosition()) );
- if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) {
- CConstRef<IFlatItem> item;
- if (gap_data.has_gap) {
- const bool noGapSizeProblem = ( showGapsOfSizeZero || (gap_data.gap_start < gap_data.gap_end) );
- if( noGapSizeProblem /* && ! s_CoincidingGapFeatures( it, gap_start, gap_end ) */ ) {
- item.Reset( s_NewGapItem(gap_data.gap_start, gap_data.gap_end, gap_data.gap_length, gap_data.gap_type,
- gap_data.gap_evidence, gap_data.is_unknown_length, gap_data.is_assembly_gap, ctx) );
- out << item;
- }
- if (gap_data.next_gap < gap_data.num_gaps) {
- s_SetGapIdxData (gap_data, gaps);
- } else {
- gap_data.has_gap = false;
- }
- }
- }
+ SAnnotSelector sel;
+ SAnnotSelector* selp = &sel;
+ if (ctx.GetAnnotSelector() != NULL) {
+ selp = &ctx.SetAnnotSelector();
+ }
+ s_SetSelection(*selp, ctx);
+
+ // optionally map gene from genomic onto cDNA
+ if ( ctx.IsInGPS() && cfg.CopyGeneToCDNA() &&
+ ctx.GetBiomol() == CMolInfo::eBiomol_mRNA ) {
+ CMappedFeat mrna = GetMappedmRNAForProduct(ctx.GetHandle());
+ if (mrna) {
+ CMappedFeat gene = GetBestGeneForMrna(mrna, &ctx.GetFeatTree());
+ if (gene) {
+ CRef<CSeq_loc> loc(new CSeq_loc);
+ loc->SetWhole(*ctx.GetPrimaryId());
+ item.Reset(
+ x_NewFeatureItem(gene, ctx, loc, m_Feat_Tree,
+ CFeatureItem::eMapped_from_genomic) );
+ out << item;
}
}
+ }
- SetDiagFilter(eDiagFilter_All, "");
-
+ CSeq_loc loc;
+ if ( ctx.GetMasterLocation() != 0 ) {
+ loc.Assign(*ctx.GetMasterLocation());
} else {
- x_GatherFeaturesOnLocation(loc, *selp, ctx);
+ loc.Assign(*ctx.GetHandle().GetRangeSeq_loc(0, 0));
}
+ // collect features
+ x_GatherFeaturesOnLocation(loc, *selp, ctx);
+
if ( ctx.IsProt() ) {
// Also collect features which this protein is their product.
// Currently there are only two possible candidates: Coding regions
// ============================================================================
void CFlatGatherer::x_GetFeatsOnCdsProductIdx(
- CMappedFeat mf,
const CSeq_feat& feat,
CBioseqContext& ctx,
CRef<CSeq_loc_Mapper> slice_mapper,
CBioseq_Handle prot;
- prot = scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle());
+ // prot = scope.GetBioseqHandleFromTSE(*prot_id, ctx.GetHandle());
+ prot = scope.GetBioseqHandle(*prot_id);
// !!! need a flag for fetching far proteins
if (!prot) {
return;
}
-
- CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
- if (! idx) {
- return;
- }
-
- CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (prot);
- if (! bsx) return;
-
- /*
CFeat_CI it(prot, s_GetCdsProductSel(ctx));
if (!it) {
return;
}
ctx.GetFeatTree().AddFeatures( it ); // !!!
- */
// map from cds product to nucleotide
CSeq_loc_Mapper prot_to_cds(feat, CSeq_loc_Mapper::eProductToLocation, &scope);
prot_to_cds.SetFuzzOption( CSeq_loc_Mapper::fFuzzOption_CStyle );
CSeq_feat_Handle prev; // keep track of the previous feature
- /*
- for ( ; it; ++it )
- */
- bsx->IterateFeatures([this, &ctx, &scope, &prev, &cfg, &prot_to_cds, &slice_mapper, &cdsFeatureItem, bsx](CFeatureIndex& sfx) {
-
- CMappedFeat mf = sfx.GetMappedFeat();
- CSeq_feat_Handle curr = sfx.GetSeqFeatHandle(); // it->GetSeq_feat_Handle();
- const CSeq_feat& original_feat = sfx.GetMappedFeat().GetOriginalFeature(); // it->GetOriginalFeature();
-
+ for ( ; it; ++it ) {
+ CSeq_feat_Handle curr = it->GetSeq_feat_Handle();
const CSeq_loc& curr_loc = curr.GetLocation();
CSeqFeatData::ESubtype subtype = curr.GetFeatSubtype();
subtype != CSeqFeatData::eSubtype_transit_peptide_aa &&
subtype != CSeqFeatData::eSubtype_preprotein &&
subtype != CSeqFeatData::eSubtype_propeptide_aa) {
- return;
+ continue;
}
- if ( cfg.HideCDDFeatures() &&
+ if ( ( cfg.HideCDDFeatures() || ! cfg.ShowCDDFeatures() ) &&
(subtype == CSeqFeatData::eSubtype_region || subtype == CSeqFeatData::eSubtype_site) &&
s_IsCDD(curr) ) {
// passing this test prevents mapping of COG CDD region features
- return;
+ continue;
}
// suppress duplicate features (on protein)
if (prev && s_IsDuplicateFeatures(curr, prev)) {
- return;
+ continue;
}
/// we need to cleanse CDD features
- s_CleanCDDFeature(original_feat);
+ s_CleanCDDFeature(it->GetOriginalFeature());
// map prot location to nuc location
CRef<CSeq_loc> loc(prot_to_cds.Map(curr_loc));
}
}
if (!loc || loc->IsNull()) {
- return;
+ continue;
}
if ( !s_SeqLocEndsOnBioseq(*loc, ctx, eEndsOnBioseqOpt_AnyPartOfSeqLoc, CSeqFeatData::e_Cdregion) ) {
- return;
+ continue;
}
CConstRef<IFlatItem> item;
// for command-line args "-from" and "-to"
- CMappedFeat mapped_feat = mf;
+ CMappedFeat mapped_feat = *it;
if( slice_mapper && loc ) {
CRange<TSeqPos> range = ctx.GetLocation().GetTotalRange();
CRef<CSeq_loc> mapped_loc = slice_mapper->Map(*CFeatTrim::Apply(*loc, range));
if( mapped_loc->IsNull() ) {
- return;
+ continue;
}
CRef<CSeq_feat> feat(new CSeq_feat());
feat->Assign(mapped_feat.GetMappedFeature());
loc = mapped_loc;
}
- item = ConstRef( x_NewFeatureItem(mapped_feat, ctx,
+ item = ConstRef( x_NewFeatureItem(*it, ctx,
s_NormalizeNullsBetween(loc), m_Feat_Tree,
CFeatureItem::eMapped_from_prot, true,
cdsFeatureItem ) );
*m_ItemOS << item;
prev = curr;
- }); // end of iterate loop
+ }
}
// ============================================================================
-/* $Id: gbseq_formatter.cpp 601813 2020-02-13 18:41:46Z kans $
+/* $Id: gbseq_formatter.cpp 614619 2020-08-20 13:00:42Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
str.append( s_CloseTag(" ", "GBXref"));
str.append( s_CloseTag(" ", "GBReference_xref"));
}
- if ( ref.GetPMID() != 0 ) {
- str.append( s_CombineStrings(" ", "GBReference_pubmed", ref.GetPMID()));
+ if ( ref.GetPMID() != ZERO_ENTREZ_ID ) {
+ str.append( s_CombineStrings(" ", "GBReference_pubmed", ENTREZ_ID_TO(int, ref.GetPMID())));
}
if ( !ref.GetRemark().empty() ) {
str.append( s_CombineStrings(" ", "GBReference_remark", ref.GetRemark()));
-/* $Id: genbank_formatter.cpp 602692 2020-02-28 22:11:47Z kans $
+/* $Id: genbank_formatter.cpp 615046 2020-08-26 13:40:11Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
version_line << version.GetAccession();
if ( version.GetGi() > ZERO_GI ) {
const CFlatFileConfig& cfg = GetContext().GetConfig();
- if (! cfg.HideGI()) {
+ if (! (cfg.HideGI() || cfg.IsPolicyFtp())) {
version_line << " GI:" << version.GetGi();
}
}
x_Consortium(l, ref, ctx);
x_Title(l, ref, ctx);
x_Journal(l, ref, ctx);
- if (ref.GetPMID() == 0) { // suppress MEDLINE if has PUBMED
+ if (ref.GetPMID() == ZERO_ENTREZ_ID) { // suppress MEDLINE if has PUBMED
x_Medline(l, ref, ctx);
}
x_Pubmed(l, ref, ctx);
bool bHtml = ctx.Config().DoHTML();
string strDummy( "[PUBMED-ID]" );
- if ( ref.GetMUID() != 0 ) {
+ if ( ref.GetMUID() != ZERO_ENTREZ_ID) {
Wrap(l, GetWidth(), "MEDLINE", strDummy, eSubp);
}
- string strPubmed( NStr::IntToString( ref.GetMUID() ) );
+ string strPubmed( NStr::NumericToString( ref.GetMUID() ) );
if ( bHtml ) {
string strLink = "<a href=\"";
strLink += strLinkBasePubmed;
CBioseqContext& ctx) const
{
- if ( ref.GetPMID() == 0 ) {
+ if ( ref.GetPMID() == ZERO_ENTREZ_ID) {
return;
}
- string strPubmed = NStr::IntToString( ref.GetPMID() );
+ string strPubmed = NStr::NumericToString( ref.GetPMID() );
if ( ctx.Config().DoHTML() ) {
string strRaw = strPubmed;
strPubmed = "<a href=\"https://www.ncbi.nlm.nih.gov/pubmed/";
// assembly of the actual string:
strLink.reserve(100); // euristical URL length
#ifdef NEW_HTML_FMT
- item.GetContext()->Config().GetHTMLFormatter().FormatLocation(strLink, item.GetFeat().GetLocation(), iGi, strRawKey);
+ item.GetContext()->Config().GetHTMLFormatter().FormatLocation(strLink, item.GetFeat().GetLocation(), GI_TO(TIntId, iGi), strRawKey);
#else
// check if this is a protein or nucleotide link
bool is_prot = false;
fill(line, line+kLineBufferSize, ' ');
// add the span stuff
- TSeqPos length_of_span_before_base_count = 0;
+ length_of_span_before_base_count = 0;
if( bHtml ) {
string kSpan = " <span class=\"ff_line\" id=\"";
kSpan += accn;
-/* $Id: genbank_gather.cpp 602636 2020-02-27 20:27:11Z kans $
+/* $Id: genbank_gather.cpp 612549 2020-07-23 15:33:36Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
{
GATHER_VIA_FUNC(Tsa, x_GatherTLS);
} else if ( ctx.DoContigStyle() ) {
- if ( cfg.ShowContigFeatures() ) {
+ if ( cfg.ShowContigFeatures() || cfg.IsPolicyFtp() ) {
GATHER_VIA_FUNC(FeatAndGap, x_GatherFeatures);
}
else if ( cfg.IsModeEntrez() && m_Current->GetLocation().IsWhole()) {
-/* $Id: genome_project_item.cpp 577454 2019-01-03 22:58:25Z kans $
+/* $Id: genome_project_item.cpp 615791 2020-09-03 18:19:35Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
string strHeader = uo.GetType().GetStr();
if ( NStr::EqualNocase(strHeader, "GenomeProjectsDB")) {
- genome_projects_user_obje = &uo;
- x_SetObject(*desc);
- } else if( NStr::EqualNocase( strHeader, "DBLink" ) ) {
- dblink_user_obj = &uo;
- x_SetObject(*desc);
- }
+ if (! genome_projects_user_obje) {
+ genome_projects_user_obje = &uo;
+ x_SetObject(*desc);
+ }
+ } else if( NStr::EqualNocase( strHeader, "DBLink" ) ) {
+ if (! dblink_user_obj) {
+ dblink_user_obj = &uo;
+ x_SetObject(*desc);
+ }
+ }
}
// process GenomeProjectsDB
-/* $Id: inst_info_map.cpp 601754 2020-02-12 23:10:12Z kans $
+/* $Id: inst_info_map.cpp 611903 2020-07-13 15:51:00Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
static const string s_acbr_base("http://www.acbr-database.at/BioloMICS.aspx?Link=T&DB=0&Table=0&Descr=");
static const string s_atcc_base("http://www.atcc.org/Products/All/");
- static const string s_bccm_base("http://bccm.belspo.be/catalogues/ulc-strain-details?ACCESSION_NUMBER=ULC");
+ static const string s_bccm_base("https://bccm.belspo.be/catalogues/bm-details?accession_number=ULC%20");
static const string s_bcrc_base("https://catalog.bcrc.firdi.org.tw/BSAS_cart/controller?event=SEARCH&bcrc_no=");
static const string s_cas_base("http://collections.calacademy.org/herp/specimen/");
static const string s_cbs_base("http://www.cbs.knaw.nl/collections/BioloMICS.aspx?Fields=All&ExactMatch=T&Table=CBS+strain+database&Name=CBS+");
static const string yp0("0");
+ static const string s_bccm_trim("ULC");
+
static const string s_colon_pfx(":");
static const string s_uscr_pfx("_");
-
+
static const string s_kui_pfx("KUI/");
static const string s_kuit_pfx("KUIT/");
static const string s_psu_pfx("PSU:Mamm:");
typedef SStaticPair<const char*, TVoucherInfoRef> TVoucherInfoElem;
static const TVoucherInfoElem sc_voucher_info_map[] = {
- { "ACBR", TVoucherInfoRef(new SVoucherInfo(&s_acbr_base, false, false, 0, NULL, NULL, &s_acbr_sfx, "Austrian Center of Biological Resources and Applied Mycology") ) },
- { "ATCC", TVoucherInfoRef(new SVoucherInfo(&s_atcc_base, false, false, 0, NULL, NULL, &s_atcc_sfx, "American Type Culture Collection") ) },
- { "BCCM", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 0, NULL, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
- { "BCCM/ULC", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 0, NULL, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
- { "BCRC", TVoucherInfoRef(new SVoucherInfo(&s_bcrc_base, false, false, 0, NULL, NULL, &s_bcrc_sfx, "Bioresource Collection and Research Center") ) },
- { "CAS:HERP", TVoucherInfoRef(new SVoucherInfo(&s_cas_base, true, false, 0, NULL, &s_colon_pfx, NULL, "California Academy of Sciences, Herpetology collection") ) },
- { "CBS", TVoucherInfoRef(new SVoucherInfo(&s_cbs_base, false, false, 0, NULL, NULL, NULL, "Westerdijk Fungal Biodiversity Institute") ) },
- { "CCAP", TVoucherInfoRef(new SVoucherInfo(&s_ccap_base, false, false, 0, NULL, NULL, NULL, "Culture Collection of Algae and Protozoa") ) },
- { "CCMP", TVoucherInfoRef(new SVoucherInfo(&s_ccmp_base, false, false, 0, NULL, NULL, NULL, "Provasoli-Guillard National Center for Culture of Marine Phytoplankton") ) },
- { "CCUG", TVoucherInfoRef(new SVoucherInfo(&s_ccug_base, false, false, 0, NULL, NULL, NULL, "Culture Collection, University of Goteborg, Department of Clinical Bacteriology") ) },
- { "CFMR", TVoucherInfoRef(new SVoucherInfo(&s_cfmr_base, false, false, 0, NULL, NULL, NULL, "USDA Forest Service, Center for Forest Mycology Research") ) },
- { "CHR", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, &s_uscr_pfx, NULL, "Allan Herbarium, Landcare Research New Zealand Limited") ) },
- { "CRCM:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Washington State University, Charles R. Conner Museum, bird collection") ) },
- { "CUMV:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Cornell University Museum of Vertebrates, Fish Collection") ) },
- { "Coriell", TVoucherInfoRef(new SVoucherInfo(&s_cori_base, false, false, 0, NULL, NULL, NULL, "Coriell Institute for Medical Research") ) },
- { "DGR:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, bird tissue collection") ) },
- { "DGR:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, entomology tissue collection") ) },
- { "DGR:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, fish tissue collection") ) },
- { "DGR:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, herpetology tissue collection") ) },
- { "DGR:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, mammal tissue collection") ) },
- { "DMNS:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Ornithology Collections") ) },
- { "DMNS:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Mammology Collection") ) },
- { "DMNS:Para", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Parasite Collection") ) },
- { "DSM", TVoucherInfoRef(new SVoucherInfo(&s_dsm_base, false, false, 0, NULL, NULL, NULL, "Deutsche Sammlung von Mikroorganismen und Zellkulturen GmbH") ) },
- { "DSMZ", TVoucherInfoRef(new SVoucherInfo(&s_dsmz_base, false, false, 0, NULL, NULL, NULL, "Deutsche Sammlung von Mikroorganismen und Zellkulturen") ) },
- { "EMEC", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, true, false, 0, NULL, NULL, NULL, "Essig Museum") ) },
- { "EMEC:EMEC", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, false, true, 0, NULL, NULL, NULL, "Essig Museum") ) },
- { "EMEC:UCIS", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, false, true, 0, NULL, NULL, NULL, "Essig Museum") ) },
- { "FRR", TVoucherInfoRef(new SVoucherInfo(&s_frr_base, false, false, 0, NULL, NULL, NULL, "Food Science Australia, Ryde") ) },
- { "FSU<DEU>", TVoucherInfoRef(new SVoucherInfo(&s_fsu_base, false, false, 0, NULL, NULL, NULL, "Jena Microbial Resource Collection") ) },
- { "ICMP", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, &s_uscr_pfx, NULL, "International Collection of Microorganisms from Plants") ) },
- { "JCM", TVoucherInfoRef(new SVoucherInfo(&s_jcm_base, false, false, 0, NULL, NULL, NULL, "Japan Collection of Microorganisms") ) },
- { "KCTC", TVoucherInfoRef(new SVoucherInfo(&s_kctc_base, false, false, 0, NULL, NULL, NULL, "Korean Collection for Type Cultures") ) },
- { "KNWR:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Kenai National Wildlife Refuge, Entomology Collection") ) },
- { "KU:I", TVoucherInfoRef(new SVoucherInfo(&s_ku_base, false, false, 0, NULL, &s_kui_pfx, &s_ku_sfx, "University of Kansas, Museum of Natural History, Ichthyology collection") ) },
- { "KU:IT", TVoucherInfoRef(new SVoucherInfo(&s_ku_base, false, false, 0, NULL, &s_kuit_pfx, &s_ku_sfx, "University of Kansas, Museum of Natural History, Ichthyology tissue collection") ) },
- { "KWP:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Kenelm W. Philip Collection, University of Alaska Museum of the North, Lepidoptera collection") ) },
- { "MAFF", TVoucherInfoRef(new SVoucherInfo(&s_maff_base, false, false, 0, NULL, NULL, NULL, "Genebank, Ministry of Agriculture Forestry and Fisheries") ) },
- { "MCZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Ornithology Collection") ) },
- { "MCZ:Cryo", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Cryogenic Collection") ) },
- { "MCZ:Ent", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Entomology Collection") ) },
- { "MCZ:Fish", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Icthyology Collection") ) },
- { "MCZ:Herp", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Herpetology Collection") ) },
- { "MCZ:IP", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Invertebrate Paleontology Collection") ) },
- { "MCZ:IZ", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Invertebrate Zoology Collection") ) },
- { "MCZ:Ich", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Icthyology Collection") ) },
- { "MCZ:Mala", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Malacology Collection") ) },
- { "MCZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Mammalogy Collection") ) },
- { "MCZ:Orn", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Ornithology Collection") ) },
- { "MLZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Moore Laboratory of Zoology, Occidental College, Bird Collection" ) ) },
- { "MLZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Moore Laboratory of Zoology, Occidental College, Mammal Collection" ) ) },
- { "MSB:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Bird Collection") ) },
- { "MSB:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Mammal Collection") ) },
- { "MSB:Para", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Parasitology Collection") ) },
- { "MTCC", TVoucherInfoRef(new SVoucherInfo(&s_mtcc_base, false, false, 0, NULL, NULL, NULL, "Microbial Type Culture Collection & Gene Bank") ) },
- { "MUCL", TVoucherInfoRef(new SVoucherInfo(&s_mucl_base, false, false, 0, NULL, NULL, &s_mucl_sfx, "Mycotheque de l'Universite Catholique de Louvain") ) },
- { "MVZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Bird Collection") ) },
- { "MVZ:Egg", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Egg Collection") ) },
- { "MVZ:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) },
- { "MVZ:Hild", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Milton Hildebrand collection") ) },
- { "MVZ:Img", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Image Collection") ) },
- { "MVZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Mammal Collection") ) },
- { "MVZ:Page", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Notebook Page Collection") ) },
- { "MVZObs:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) },
- { "NBRC", TVoucherInfoRef(new SVoucherInfo(&s_nbrc_base, false, false, 8, &yp0, NULL, NULL, "NITE Biological Resource Center") ) },
- { "NBSB:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "National Biomonitoring Specimen Bank, U.S. Geological Survey, bird collection") ) },
- { "NCIMB", TVoucherInfoRef(new SVoucherInfo(&s_ncimb_base, false, false, 0, NULL, NULL, NULL, "National Collections of Industrial Food and Marine Bacteria (incorporating the NCFB)") ) },
- { "NCTC", TVoucherInfoRef(new SVoucherInfo(&s_nctc_base, false, false, 0, NULL, NULL, NULL, "National Collection of Type Cultures") ) },
- { "NRRL", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_base, false, false, 0, NULL, NULL, NULL, "Agricultural Research Service Culture Collection") ) },
- { "NRRL:MOLD", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_mold, false, false, 0, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Mold collection") ) },
- { "NRRL:PROK", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_prok, false, false, 0, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Prokaryotic collection") ) },
- { "NRRL:YEAST", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_yest, false, false, 0, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Yeast Collection") ) },
- { "NZAC", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, &s_uscr_pfx, NULL, "New Zealand Arthropod Collection") ) },
- { "PCC", TVoucherInfoRef(new SVoucherInfo(&s_pcc_base, false, false, 0, NULL, NULL, NULL, "Pasteur Culture Collection of Cyanobacteria") ) },
- { "PCMB", TVoucherInfoRef(new SVoucherInfo(&s_pcmb_base, false, false, 0, NULL, NULL, NULL, "The Pacific Center for Molecular Biodiversity") ) },
- { "PDD", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, &s_uscr_pfx, NULL, "New Zealand Fungarium") ) },
- { "PSU<USA-OR>:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, false, false, 0, NULL, &s_psu_pfx, NULL, "Portland State University, Vertebrate Biology Museum, Mammal Collection") ) },
- { "PYCC", TVoucherInfoRef(new SVoucherInfo(&s_pycc_base, false, false, 0, NULL, NULL, &s_pycc_sfx, "Portuguese Yeast Culture Collection") ) },
- { "SAG", TVoucherInfoRef(new SVoucherInfo(&s_sag_base, false, false, 0, NULL, NULL, NULL, "Sammlung von Algenkulturen at Universitat Gottingen") ) },
- { "TGRC", TVoucherInfoRef(new SVoucherInfo(&s_tgrc_base, false, false, 0, NULL, NULL, NULL, "C.M. Rick Tomato Genetics Resource Center") ) },
- { "UAM:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Bird Collection") ) },
- { "UAM:Bryo", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Bryozoan Collection") ) },
- { "UAM:Crus", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Marine Arthropod Collection") ) },
- { "UAM:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Insect Collection") ) },
- { "UAM:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Fish Collection") ) },
- { "UAM:Herb", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, UAM Herbarium") ) },
- { "UAM:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Amphibian and Reptile Collection") ) },
- { "UAM:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mammal Collection") ) },
- { "UAM:Moll", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mollusc Collection") ) },
- { "UAM:Paleo", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, paleontology collection") ) },
- { "UAMH", TVoucherInfoRef(new SVoucherInfo(&s_uamh_base, false, false, 0, NULL, NULL, NULL, "Centre for Global Microfungal Biodiversity") ) },
- { "UAMObs:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mammal Collection") ) },
- { "ULC", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 0, NULL, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
- { "USNM:Birds", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Birds") ) },
- { "USNM:ENT", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Entomology Collection") ) },
- { "USNM:Fish", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, National Fish Collection") ) },
- { "USNM:Herp", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Amphibians and Reptiles") ) },
- { "USNM:IZ", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Department of Invertebrate Zoology") ) },
- { "USNM:MAMM", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Mammals") ) },
- { "WNMU:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, bird collection") ) },
- { "WNMU:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, fish collection") ) },
- { "WNMU:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, mammal collection") ) },
- { "YPM:ENT", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypment_pfx, NULL, "Yale Peabody Museum of Natural History, Entomology Collection") ) },
- { "YPM:HER", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypmher_pfx, NULL, "Yale Peabody Museum of Natural History, Herpetology Collection") ) },
- { "YPM:ICH", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypmich_pfx, NULL, "Yale Peabody Museum of Natural History, Ichthyology Collection") ) },
- { "YPM:IZ", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypmiz_pfx, NULL, "Yale Peabody Museum of Natural History, Invertebrate Zoology Collection") ) },
- { "YPM:MAM", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypmmam_pfx, NULL, "Yale Peabody Museum of Natural History, Mammology Collection") ) },
- { "YPM:ORN", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, &s_ypmorn_pfx, NULL, "Yale Peabody Museum of Natural History, Ornithology Collection") ) }
+ { "ACBR", TVoucherInfoRef(new SVoucherInfo(&s_acbr_base, false, false, 0, NULL, NULL, NULL, &s_acbr_sfx, "Austrian Center of Biological Resources and Applied Mycology") ) },
+ { "ATCC", TVoucherInfoRef(new SVoucherInfo(&s_atcc_base, false, false, 0, NULL, NULL, NULL, &s_atcc_sfx, "American Type Culture Collection") ) },
+ { "BCCM", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 4, &yp0, NULL, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
+ { "BCCM/ULC", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 4, &yp0, &s_bccm_trim, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
+ { "BCRC", TVoucherInfoRef(new SVoucherInfo(&s_bcrc_base, false, false, 0, NULL, &s_bccm_trim, NULL, &s_bcrc_sfx, "Bioresource Collection and Research Center") ) },
+ { "CAS:HERP", TVoucherInfoRef(new SVoucherInfo(&s_cas_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "California Academy of Sciences, Herpetology collection") ) },
+ { "CBS", TVoucherInfoRef(new SVoucherInfo(&s_cbs_base, false, false, 0, NULL, NULL, NULL, NULL, "Westerdijk Fungal Biodiversity Institute") ) },
+ { "CCAP", TVoucherInfoRef(new SVoucherInfo(&s_ccap_base, false, false, 0, NULL, NULL, NULL, NULL, "Culture Collection of Algae and Protozoa") ) },
+ { "CCMP", TVoucherInfoRef(new SVoucherInfo(&s_ccmp_base, false, false, 0, NULL, NULL, NULL, NULL, "Provasoli-Guillard National Center for Culture of Marine Phytoplankton") ) },
+ { "CCUG", TVoucherInfoRef(new SVoucherInfo(&s_ccug_base, false, false, 0, NULL, NULL, NULL, NULL, "Culture Collection, University of Goteborg, Department of Clinical Bacteriology") ) },
+ { "CFMR", TVoucherInfoRef(new SVoucherInfo(&s_cfmr_base, false, false, 0, NULL, NULL, NULL, NULL, "USDA Forest Service, Center for Forest Mycology Research") ) },
+ { "CHR", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, NULL, &s_uscr_pfx, NULL, "Allan Herbarium, Landcare Research New Zealand Limited") ) },
+ { "CRCM:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Washington State University, Charles R. Conner Museum, bird collection") ) },
+ { "CUMV:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Cornell University Museum of Vertebrates, Fish Collection") ) },
+ { "Coriell", TVoucherInfoRef(new SVoucherInfo(&s_cori_base, false, false, 0, NULL, NULL, NULL, NULL, "Coriell Institute for Medical Research") ) },
+ { "DGR:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, bird tissue collection") ) },
+ { "DGR:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, entomology tissue collection") ) },
+ { "DGR:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, fish tissue collection") ) },
+ { "DGR:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, herpetology tissue collection") ) },
+ { "DGR:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Division of Genomic Resources, University of New Mexico, mammal tissue collection") ) },
+ { "DMNS:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Ornithology Collections") ) },
+ { "DMNS:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Mammology Collection") ) },
+ { "DMNS:Para", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Denver Museum of Nature and Science, Parasite Collection") ) },
+ { "DSM", TVoucherInfoRef(new SVoucherInfo(&s_dsm_base, false, false, 0, NULL, NULL, NULL, NULL, "Deutsche Sammlung von Mikroorganismen und Zellkulturen GmbH") ) },
+ { "DSMZ", TVoucherInfoRef(new SVoucherInfo(&s_dsmz_base, false, false, 0, NULL, NULL, NULL, NULL, "Deutsche Sammlung von Mikroorganismen und Zellkulturen") ) },
+ { "EMEC", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, true, false, 0, NULL, NULL, NULL, NULL, "Essig Museum") ) },
+ { "EMEC:EMEC", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, false, true, 0, NULL, NULL, NULL, NULL, "Essig Museum") ) },
+ { "EMEC:UCIS", TVoucherInfoRef(new SVoucherInfo(&s_emec_base, false, true, 0, NULL, NULL, NULL, NULL, "Essig Museum") ) },
+ { "FRR", TVoucherInfoRef(new SVoucherInfo(&s_frr_base, false, false, 0, NULL, NULL, NULL, NULL, "Food Science Australia, Ryde") ) },
+ { "FSU<DEU>", TVoucherInfoRef(new SVoucherInfo(&s_fsu_base, false, false, 0, NULL, NULL, NULL, NULL, "Jena Microbial Resource Collection") ) },
+ { "ICMP", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, NULL, &s_uscr_pfx, NULL, "International Collection of Microorganisms from Plants") ) },
+ { "JCM", TVoucherInfoRef(new SVoucherInfo(&s_jcm_base, false, false, 0, NULL, NULL, NULL, NULL, "Japan Collection of Microorganisms") ) },
+ { "KCTC", TVoucherInfoRef(new SVoucherInfo(&s_kctc_base, false, false, 0, NULL, NULL, NULL, NULL, "Korean Collection for Type Cultures") ) },
+ { "KNWR:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Kenai National Wildlife Refuge, Entomology Collection") ) },
+ { "KU:I", TVoucherInfoRef(new SVoucherInfo(&s_ku_base, false, false, 0, NULL, NULL, &s_kui_pfx, &s_ku_sfx, "University of Kansas, Museum of Natural History, Ichthyology collection") ) },
+ { "KU:IT", TVoucherInfoRef(new SVoucherInfo(&s_ku_base, false, false, 0, NULL, NULL, &s_kuit_pfx, &s_ku_sfx, "University of Kansas, Museum of Natural History, Ichthyology tissue collection") ) },
+ { "KWP:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Kenelm W. Philip Collection, University of Alaska Museum of the North, Lepidoptera collection") ) },
+ { "MAFF", TVoucherInfoRef(new SVoucherInfo(&s_maff_base, false, false, 0, NULL, NULL, NULL, NULL, "Genebank, Ministry of Agriculture Forestry and Fisheries") ) },
+ { "MCZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Ornithology Collection") ) },
+ { "MCZ:Cryo", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Cryogenic Collection") ) },
+ { "MCZ:Ent", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Entomology Collection") ) },
+ { "MCZ:Fish", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Icthyology Collection") ) },
+ { "MCZ:Herp", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Herpetology Collection") ) },
+ { "MCZ:IP", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Invertebrate Paleontology Collection") ) },
+ { "MCZ:IZ", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Invertebrate Zoology Collection") ) },
+ { "MCZ:Ich", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Icthyology Collection") ) },
+ { "MCZ:Mala", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Malacology Collection") ) },
+ { "MCZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Mammalogy Collection") ) },
+ { "MCZ:Orn", TVoucherInfoRef(new SVoucherInfo(&s_mcz_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Harvard Museum of Comparative Zoology, Ornithology Collection") ) },
+ { "MLZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Moore Laboratory of Zoology, Occidental College, Bird Collection" ) ) },
+ { "MLZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Moore Laboratory of Zoology, Occidental College, Mammal Collection" ) ) },
+ { "MSB:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Bird Collection") ) },
+ { "MSB:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Mammal Collection") ) },
+ { "MSB:Para", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Southwestern Biology, Parasitology Collection") ) },
+ { "MTCC", TVoucherInfoRef(new SVoucherInfo(&s_mtcc_base, false, false, 0, NULL, NULL, NULL, NULL, "Microbial Type Culture Collection & Gene Bank") ) },
+ { "MUCL", TVoucherInfoRef(new SVoucherInfo(&s_mucl_base, false, false, 0, NULL, NULL, NULL, &s_mucl_sfx, "Mycotheque de l'Universite Catholique de Louvain") ) },
+ { "MVZ:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Bird Collection") ) },
+ { "MVZ:Egg", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Egg Collection") ) },
+ { "MVZ:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) },
+ { "MVZ:Hild", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Milton Hildebrand collection") ) },
+ { "MVZ:Img", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Image Collection") ) },
+ { "MVZ:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Mammal Collection") ) },
+ { "MVZ:Page", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Notebook Page Collection") ) },
+ { "MVZObs:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Museum of Vertebrate Zoology, University of California at Berkeley, Herpetology Collection") ) },
+ { "NBRC", TVoucherInfoRef(new SVoucherInfo(&s_nbrc_base, false, false, 8, &yp0, &yp0, NULL, NULL, "NITE Biological Resource Center") ) },
+ { "NBSB:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "National Biomonitoring Specimen Bank, U.S. Geological Survey, bird collection") ) },
+ { "NCIMB", TVoucherInfoRef(new SVoucherInfo(&s_ncimb_base, false, false, 0, NULL, NULL, NULL, NULL, "National Collections of Industrial Food and Marine Bacteria (incorporating the NCFB)") ) },
+ { "NCTC", TVoucherInfoRef(new SVoucherInfo(&s_nctc_base, false, false, 0, NULL, NULL, NULL, NULL, "National Collection of Type Cultures") ) },
+ { "NRRL", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_base, false, false, 0, NULL, NULL, NULL, NULL, "Agricultural Research Service Culture Collection") ) },
+ { "NRRL:MOLD", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_mold, false, false, 0, NULL, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Mold collection") ) },
+ { "NRRL:PROK", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_prok, false, false, 0, NULL, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Prokaryotic collection") ) },
+ { "NRRL:YEAST", TVoucherInfoRef(new SVoucherInfo(&s_nrrl_yest, false, false, 0, NULL, NULL, NULL, NULL, "Agricultural Research Service Culture Collection, Yeast Collection") ) },
+ { "NZAC", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, NULL, &s_uscr_pfx, NULL, "New Zealand Arthropod Collection") ) },
+ { "PCC", TVoucherInfoRef(new SVoucherInfo(&s_pcc_base, false, false, 0, NULL, NULL, NULL, NULL, "Pasteur Culture Collection of Cyanobacteria") ) },
+ { "PCMB", TVoucherInfoRef(new SVoucherInfo(&s_pcmb_base, false, false, 0, NULL, NULL, NULL, NULL, "The Pacific Center for Molecular Biodiversity") ) },
+ { "PDD", TVoucherInfoRef(new SVoucherInfo(&s_lcr_base, true, false, 0, NULL, NULL, &s_uscr_pfx, NULL, "New Zealand Fungarium") ) },
+ { "PSU<USA-OR>:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, false, false, 0, NULL, NULL, &s_psu_pfx, NULL, "Portland State University, Vertebrate Biology Museum, Mammal Collection") ) },
+ { "PYCC", TVoucherInfoRef(new SVoucherInfo(&s_pycc_base, false, false, 0, NULL, NULL, NULL, &s_pycc_sfx, "Portuguese Yeast Culture Collection") ) },
+ { "SAG", TVoucherInfoRef(new SVoucherInfo(&s_sag_base, false, false, 0, NULL, NULL, NULL, NULL, "Sammlung von Algenkulturen at Universitat Gottingen") ) },
+ { "TGRC", TVoucherInfoRef(new SVoucherInfo(&s_tgrc_base, false, false, 0, NULL, NULL, NULL, NULL, "C.M. Rick Tomato Genetics Resource Center") ) },
+ { "UAM:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Bird Collection") ) },
+ { "UAM:Bryo", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Bryozoan Collection") ) },
+ { "UAM:Crus", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Marine Arthropod Collection") ) },
+ { "UAM:Ento", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Insect Collection") ) },
+ { "UAM:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Fish Collection") ) },
+ { "UAM:Herb", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, UAM Herbarium") ) },
+ { "UAM:Herp", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Amphibian and Reptile Collection") ) },
+ { "UAM:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mammal Collection") ) },
+ { "UAM:Moll", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mollusc Collection") ) },
+ { "UAM:Paleo", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, paleontology collection") ) },
+ { "UAMH", TVoucherInfoRef(new SVoucherInfo(&s_uamh_base, false, false, 0, NULL, NULL, NULL, NULL, "Centre for Global Microfungal Biodiversity") ) },
+ { "UAMObs:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "University of Alaska, Museum of the North, Mammal Collection") ) },
+ { "ULC", TVoucherInfoRef(new SVoucherInfo(&s_bccm_base, false, false, 4, &yp0, &s_bccm_trim, NULL, NULL, "Belgian Coordinated Collections of Microorganisms / ULC Cyanobacteria Collection") ) },
+ { "USNM:Birds", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Birds") ) },
+ { "USNM:ENT", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Entomology Collection") ) },
+ { "USNM:Fish", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, National Fish Collection") ) },
+ { "USNM:Herp", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Amphibians and Reptiles") ) },
+ { "USNM:IZ", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Department of Invertebrate Zoology") ) },
+ { "USNM:MAMM", TVoucherInfoRef(new SVoucherInfo(&s_usnm_base, false, true, 0, NULL, NULL, &s_colon_pfx, NULL, "National Museum of Natural History, Smithsonian Institution, Division of Mammals") ) },
+ { "WNMU:Bird", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, bird collection") ) },
+ { "WNMU:Fish", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, fish collection") ) },
+ { "WNMU:Mamm", TVoucherInfoRef(new SVoucherInfo(&s_uam_base, true, false, 0, NULL, NULL, &s_colon_pfx, NULL, "Western New Mexico University Museum, mammal collection") ) },
+ { "YPM:ENT", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypment_pfx, NULL, "Yale Peabody Museum of Natural History, Entomology Collection") ) },
+ { "YPM:HER", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypmher_pfx, NULL, "Yale Peabody Museum of Natural History, Herpetology Collection") ) },
+ { "YPM:ICH", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypmich_pfx, NULL, "Yale Peabody Museum of Natural History, Ichthyology Collection") ) },
+ { "YPM:IZ", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypmiz_pfx, NULL, "Yale Peabody Museum of Natural History, Invertebrate Zoology Collection") ) },
+ { "YPM:MAM", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypmmam_pfx, NULL, "Yale Peabody Museum of Natural History, Mammology Collection") ) },
+ { "YPM:ORN", TVoucherInfoRef(new SVoucherInfo(&s_ypm_base, false, false, 6, &yp0, NULL, &s_ypmorn_pfx, NULL, "Yale Peabody Museum of Natural History, Ornithology Collection") ) }
};
typedef CStaticArrayMap<const char*, TVoucherInfoRef, PCase_CStr> TVoucherInfoMap;
DEFINE_STATIC_ARRAY_MAP(TVoucherInfoMap, sc_VoucherInfoMap, sc_voucher_info_map);
-/* $Id: inst_info_map.hpp 567275 2018-07-16 20:27:34Z kans $
+/* $Id: inst_info_map.hpp 611903 2020-07-13 15:51:00Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
bool prependCollection,
int pad_to,
const string *pad_with,
+ const string *trim,
const string *prefix,
const string *suffix,
const char *inst_full_name ):
m_PrependCollection(prependCollection),
m_PadTo(pad_to),
m_PadWith(pad_with),
+ m_Trim(trim),
m_Prefix(prefix),
m_Suffix(suffix),
m_InstFullName(inst_full_name) { }
bool m_PrependCollection;
int m_PadTo;
const string *m_PadWith;
+ const string *m_Trim;
const string *m_Prefix;
const string *m_Suffix;
const char *m_InstFullName;
-/* $Id: primary_item.cpp 577454 2019-01-03 22:58:25Z kans $
+/* $Id: primary_item.cpp 610065 2020-06-10 17:10:26Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
string str;
string s;
- s.reserve(80);
+ string r;
+ s.reserve(82);
CConstRef<CSeq_id> other_id;
TSignedSeqPos last_stop = -1;
ITERATE( TAlnConstList, it, seglist ) {
s.erase();
+ r.erase();
const CSeq_align& align = **it;
TSeqPos this_start = align.GetSeqStart(0);
}
s += tid;
s.resize(39, ' ');
- s += NStr::IntToString(align.GetSeqStart(1) + 1) + '-' +
+ r = NStr::IntToString(align.GetSeqStart(1) + 1) + '-' +
NStr::IntToString(align.GetSeqStop(1) + 1);
+ s += r;
ENa_strand s0 = align.GetSeqStrand(0);
ENa_strand s1 = align.GetSeqStrand(1);
if (s0 != s1) {
- s.resize(59, ' ');
+ if (r.length() > 20) {
+ s.resize(61, ' ');
+ } else {
+ s.resize(59, ' ');
+ }
s += 'c';
}
-/* $Id: qualifiers.cpp 578574 2019-01-22 18:30:29Z kans $
+/* $Id: qualifiers.cpp 615036 2020-08-26 13:38:52Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
} else {
bool add_dash = false;
+ // RW-922 only make one link from GO:id - text.
+ go_text.clear();
if (go_id != NULL) {
- go_text = string( "GO:" );
if( is_html ) {
go_text += "<a href=\"";
go_text += strLinkBaseGeneOntology + *go_id + "\">";
}
+ go_text += string( "GO:" );
go_text += *go_id;
- if( is_html ) {
- go_text += "</a>";
- }
add_dash = true;
- } else {
- go_text.clear();
}
if ( text_string != 0 && text_string->length() > 0 ) {
if (add_dash) {
go_text += string( " - " );
}
- if( is_html && go_id != NULL ) {
- go_text += "<a href=\"";
- go_text += strLinkBaseGeneOntology + *go_id + "\">";
- }
// NO, we NO LONGER have the dash here even if there's no go_id (RETAIN compatibility with CHANGE in C)
go_text += *text_string;
- if( is_html && go_id != NULL ) {
- go_text += "</a>";
- }
+ }
+ if( is_html && go_id != NULL ) {
+ go_text += "</a>";
}
if ( evidence != 0 ) {
go_text += string( " [Evidence " ) + *evidence + string( "]" );
string value;
string pub_id_str;
int serial = (*ref_iter)->GetSerial();
- int pmid = (*ref_iter)->GetPMID();
+ TEntrezId pmid = (*ref_iter)->GetPMID();
if (serial) {
pub_id_str = NStr::IntToString(serial);
- } else if (pmid) {
- pub_id_str = NStr::IntToString(pmid);
+ } else if (pmid != ZERO_ENTREZ_ID) {
+ pub_id_str = NStr::NumericToString(pmid);
}
/*
string pub_id_str =
NStr::IntToString((*ref_iter)->GetSerial()));
*/
- if(bHtml && pmid) {
+ if(bHtml && pmid != ZERO_ENTREZ_ID) {
// create a link
value = "[<a href=\"";
- value += strLinkBasePubmed + NStr::IntToString(pmid) + "\">" + pub_id_str + "</a>]";
+ value += strLinkBasePubmed + NStr::NumericToString(pmid) + "\">" + pub_id_str + "</a>]";
} else {
value = '[' + pub_id_str + ']';
}
CPub_set_Base::TPub::iterator pub_iter = unusedPubs.begin();
for (; pub_iter != unusedPubs.end(); ++pub_iter) {
if ((*pub_iter)->IsPmid()) {
- const int pmid = (*pub_iter)->GetPmid().Get();
+ const TEntrezId pmid = (*pub_iter)->GetPmid().Get();
string pmid_str = NStr::NumericToString(pmid);
pubmed = "[PUBMED ";
if (bHtml) {
if ( m_Value->IsGi() ) {
if ( m_GiPrefix ) {
id_str = "GI:";
- if (ctx.Config().HideGI() && name == "db_xref") return;
+ if ((ctx.Config().HideGI() || ctx.Config().IsPolicyFtp()) && name == "db_xref") return;
}
m_Value->GetLabel(&id_str, CSeq_id::eContent);
} else {
-/* $Id: reference_item.cpp 604101 2020-03-23 12:20:44Z ivanov $
+/* $Id: reference_item.cpp 615039 2020-08-26 13:39:14Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/////////////////////////////////////////////////////////////////////////////
CCacheItem::CCacheItem(CBioseqContext& ctx, TCache csh, int length, bool is_prot) :
+ CFlatItem(&ctx),
m_Cache(csh),
m_Length(length),
- m_IsProt(is_prot),
- CFlatItem(&ctx)
+ m_IsProt(is_prot)
{
}
CReferenceItem::CReferenceItem(const CSeqdesc& desc, CBioseqContext& ctx) :
CFlatItem(&ctx), m_PubType(ePub_not_set), m_Category(eUnknown),
- m_PatentId(0), m_PMID(0), m_MUID(0), m_Serial(kMax_Int),
+ m_PatentId(0), m_PMID(ZERO_ENTREZ_ID), m_MUID(ZERO_ENTREZ_ID), m_Serial(kMax_Int),
m_JustUids(true), m_Elect(false)
{
_ASSERT(desc.IsPub());
CBioseqContext& ctx,
const CSeq_loc* loc) :
CFlatItem(&ctx), m_PubType(ePub_not_set), m_Category(eUnknown),
- m_PatentId(0), m_PMID(0), m_MUID(0), m_Serial(kMax_Int),
+ m_PatentId(0), m_PMID(ZERO_ENTREZ_ID), m_MUID(ZERO_ENTREZ_ID), m_Serial(kMax_Int),
m_JustUids(true), m_Elect(false)
{
_ASSERT(feat.GetData().IsPub());
CReferenceItem::CReferenceItem(const CSubmit_block& sub, CBioseqContext& ctx) :
CFlatItem(&ctx), m_PubType(ePub_sub), m_Category(eSubmission),
- m_PatentId(0), m_PMID(0), m_MUID(0), m_Serial(kMax_Int),
+ m_PatentId(0), m_PMID(ZERO_ENTREZ_ID), m_MUID(ZERO_ENTREZ_ID), m_Serial(kMax_Int),
m_JustUids(false), m_Elect(false)
{
x_SetObject(sub);
}}
// same PMID ( and overlap )
- if( curr_ref.GetPMID() != 0 && prev_ref.GetPMID() != 0 ) {
+ if( curr_ref.GetPMID() != ZERO_ENTREZ_ID && prev_ref.GetPMID() != ZERO_ENTREZ_ID) {
return ( curr_ref.GetPMID() == prev_ref.GetPMID() );
}
// same MUID ( and overlap )
- if( curr_ref.GetMUID() != 0 && prev_ref.GetMUID() != 0 ) {
+ if( curr_ref.GetMUID() != ZERO_ENTREZ_ID && prev_ref.GetMUID() != ZERO_ENTREZ_ID) {
return ( curr_ref.GetMUID() == prev_ref.GetMUID() );
}
}}
// most merging ops are only done if muid or pmid match
- const bool same_muid = ( curr_ref.GetMUID() != 0 && (prev_ref.GetMUID() == curr_ref.GetMUID()) );
- const bool same_pmid = ( curr_ref.GetPMID() != 0 && (prev_ref.GetPMID() == curr_ref.GetPMID()) );
+ const bool same_muid = ( curr_ref.GetMUID() != ZERO_ENTREZ_ID && (prev_ref.GetMUID() == curr_ref.GetMUID()) );
+ const bool same_pmid = ( curr_ref.GetPMID() != ZERO_ENTREZ_ID && (prev_ref.GetPMID() == curr_ref.GetPMID()) );
if( (same_muid || same_pmid) &&
( prev_ref.GetRemark() != curr_ref.GetRemark() ) )
{
{{
// you can only compare on unique string if the reference
// does not have a pmid or muid (example accession: L40362.1)
- if( GetMUID() == 0 && GetPMID() == 0 ) {
+ if( GetMUID() == ZERO_ENTREZ_ID && GetPMID() == ZERO_ENTREZ_ID) {
x_CreateUniqueStr();
const string& uniquestr = m_UniqueStr;
switch(pub.Which()) {
case CPub::e_Pmid:
{
- const int pmid = pub.GetPmid().Get();
+ const TEntrezId pmid = pub.GetPmid().Get();
CPubMedId req(pmid);
CMLAClient::TReply reply;
break;
case CPub::e_Muid:
{
- const int muid = pub.GetMuid();
+ const TEntrezId muid = pub.GetMuid();
// RW-1040: removed mlaClient.AskUidtopmid and AskGetpubpmid
}
break;
break;
case CPub::e_Muid:
- if (m_MUID == 0) {
+ if (m_MUID == ZERO_ENTREZ_ID) {
m_MUID = pub.GetMuid();
m_Category = ePublished;
}
break;
case CPub::e_Pmid:
- if (m_PMID == 0) {
- m_PMID = pub.GetPmid();
+ if (m_PMID == ZERO_ENTREZ_ID) {
+ m_PMID = pub.GetPmid().Get();
m_Category = ePublished;
}
break;
}
// MUID
- if (gen.CanGetMuid() && m_MUID == 0) {
+ if (gen.CanGetMuid() && m_MUID == ZERO_ENTREZ_ID) {
m_MUID = gen.GetMuid();
}
// PMID
- if (gen.CanGetPmid() && m_PMID == 0) {
- m_PMID = gen.GetPmid();
+ if (gen.CanGetPmid() && m_PMID == ZERO_ENTREZ_ID) {
+ m_PMID = gen.GetPmid().Get();
}
}
{
m_Category = ePublished;
- if (mle.CanGetUid() && m_MUID == 0) {
+ if (mle.CanGetUid() && m_MUID == ZERO_ENTREZ_ID) {
m_MUID = mle.GetUid();
}
- if (mle.CanGetPmid() && m_PMID == 0) {
- m_PMID = mle.GetPmid();
+ if (mle.CanGetPmid() && m_PMID == ZERO_ENTREZ_ID) {
+ m_PMID = mle.GetPmid().Get();
}
if (mle.CanGetCit()) {
ITERATE (CArticleIdSet::Tdata, it, art.GetIds().Get()) {
switch ((*it)->Which()) {
case CArticleId::e_Pubmed:
- if (m_PMID == 0) {
- m_PMID = (*it)->GetPubmed();
+ if (m_PMID == ZERO_ENTREZ_ID) {
+ m_PMID = (*it)->GetPubmed().Get();
}
break;
case CArticleId::e_Medline:
- if (m_MUID == 0) {
- m_MUID = (*it)->GetMedline();
+ if (m_MUID == ZERO_ENTREZ_ID) {
+ m_MUID = (*it)->GetMedline().Get();
}
break;
case CArticleId::e_Doi:
// no DOIs pritned if there's a pmid or muid
bool hasPmidOrMuid = false;
ITERATE( CArticleIdSet_Base::Tdata, it, ids.Get() ) {
- if( (*it)->IsPubmed() && (*it)->GetPubmed().Get() != 0 ) {
+ if( (*it)->IsPubmed() && (*it)->GetPubmed().Get() != ZERO_ENTREZ_ID ) {
hasPmidOrMuid = true;
break;
- } else if( (*it)->IsMedline() && (*it)->GetMedline().Get() != 0 ) {
+ } else if( (*it)->IsMedline() && (*it)->GetMedline().Get() != ZERO_ENTREZ_ID ) {
hasPmidOrMuid = true;
break;
}
// after: dates are the same, or both missing.
// distinguish by uids (swap order for RefSeq)
- if ( ref1->GetPMID() != 0 && ref2->GetPMID() != 0 &&
+ if ( ref1->GetPMID() != ZERO_ENTREZ_ID && ref2->GetPMID() != ZERO_ENTREZ_ID &&
!(ref1->GetPMID() == ref2->GetPMID()) ) {
return m_IsRefSeq ? (ref1->GetPMID() > ref2->GetPMID()) :
(ref1->GetPMID() < ref2->GetPMID());
}
- if ( ref1->GetMUID() != 0 && ref2->GetMUID() != 0 &&
+ if ( ref1->GetMUID() != ZERO_ENTREZ_ID && ref2->GetMUID() != ZERO_ENTREZ_ID &&
!(ref1->GetMUID() == ref2->GetMUID()) ) {
return m_IsRefSeq ? (ref1->GetMUID() > ref2->GetMUID()) :
(ref1->GetMUID() < ref2->GetMUID());
}
// just uids goes last
- if ( (ref1->GetPMID() != 0 && ref2->GetPMID() != 0) ||
- (ref1->GetMUID() != 0 && ref2->GetMUID() != 0) ) {
+ if ( (ref1->GetPMID() != ZERO_ENTREZ_ID && ref2->GetPMID() != ZERO_ENTREZ_ID) ||
+ (ref1->GetMUID() != ZERO_ENTREZ_ID && ref2->GetMUID() != ZERO_ENTREZ_ID) ) {
if ( ref1->IsJustUids() && !ref2->IsJustUids() ) {
return true;
} else if ( !ref1->IsJustUids() && ref2->IsJustUids() ) {
-/* $Id: source_item.cpp 577454 2019-01-03 22:58:25Z kans $
+/* $Id: source_item.cpp 614736 2020-08-21 13:43:48Z fukanchi $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
BEGIN_SCOPE(objects)
-const CSourceItem::TTaxid CSourceItem::kInvalidTaxid = -1;
+const CSourceItem::TTaxid CSourceItem::kInvalidTaxid = INVALID_TAX_ID;
///////////////////////////////////////////////////////////////////////////
// Taxid
{{
TTaxid taxid = org.GetTaxId();
- if (taxid != 0) {
+ if (taxid != ZERO_TAX_ID) {
m_Taxid = taxid;
}
}}
-/* $Id: listener.cpp 600608 2020-01-23 17:32:17Z foleyjp $
+/* $Id: listener.cpp 608330 2020-05-14 16:03:45Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
bool
CObjtoolsListener::PutMessage(const IObjtoolsMessage& message)
{
- m_Messages.emplace_back(dynamic_cast<IObjtoolsMessage*>(message.Clone()));
+ m_Messages.emplace_back(message.Clone());
return true;
}
-/* $Id: psg_client.cpp 605160 2020-04-07 18:06:40Z ivanov $
+/* $Id: psg_client.cpp 612393 2020-07-21 13:51:24Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
os << ioc.GetClientId();
+ if (const auto hops = user_request->m_Hops) os << "&hops=" << hops;
return os.str();
}
return m_Impl->Empty();
}
+CPSG_Queue::TApiLock CPSG_Queue::GetApiLock()
+{
+ return SImpl::GetApiLock();
+}
+
END_NCBI_SCOPE
#ifndef OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_IMPL_HPP
#define OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_IMPL_HPP
-/* $Id: psg_client_impl.hpp 598004 2019-12-02 22:13:17Z sadyrovr $
+/* $Id: psg_client_impl.hpp 612393 2020-07-21 13:51:24Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
bool SendRequest(shared_ptr<const CPSG_Request> request, const CDeadline& deadline);
+ static TApiLock GetApiLock() { return CService::GetMap(); }
+
private:
class CService
{
using TMap = unordered_map<string, unique_ptr<SPSG_IoCoordinator>>;
SPSG_IoCoordinator& GetIoC(const string& service);
- static shared_ptr<TMap> GetMap();
shared_ptr<TMap> m_Map;
static pair<mutex, weak_ptr<TMap>> sm_Instance;
SPSG_IoCoordinator& ioc;
CService(const string& service) : m_Map(GetMap()), ioc(GetIoC(service)) {}
+
+ static shared_ptr<TMap> GetMap();
};
string x_GetAbsPathRef(shared_ptr<const CPSG_Request> user_request);
-/* $Id: psg_client_transport.cpp 608076 2020-05-11 17:59:21Z ivanov $
+/* $Id: psg_client_transport.cpp 609548 2020-06-03 17:22:06Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
#define __STDC_FORMAT_MACROS
#include <nghttp2/nghttp2.h>
+#include <corelib/version.hpp>
#include <corelib/request_status.hpp>
#include "psg_client_transport.hpp"
-/* $Id: aln_reader.cpp 602230 2020-02-19 15:48:48Z foleyjp $
+/* $Id: aln_reader.cpp 610753 2020-06-23 18:10:35Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CAlnReader::CAlnReader(CNcbiIstream& is, FValidateIds fValidateIds) :
m_fValidateIds(fValidateIds),
- m_IS(is), m_ReadDone(false), m_ReadSucceeded(false),
m_AlignFormat(EAlignFormat::UNKNOWN),
+ m_IS(is), m_ReadDone(false), m_ReadSucceeded(false),
m_UseNexusInfo(true)
{
m_Errors.clear();
}
-static void
-sReportError(
- ILineErrorListener* pEC,
- EDiagSev severity,
- const string& seqId,
- int lineNumber,
- const string& message,
- ILineError::EProblem problemType=ILineError::eProblem_GeneralParsingError)
-{
- sReportError(pEC, severity, eReader_Alignment, 0, seqId, lineNumber, message, problemType);
-}
-
void CAlnReader::Read(
TReadFlags readFlags,
ncbi::objects::ILineErrorListener* pErrorListener)
"Only one sequence was detected in the alignment file. An alignment file must contain more than one sequence.");
}
- const auto numSequences = alignmentInfo.NumSequences();
m_Seqs.assign(alignmentInfo.mSequences.begin(), alignmentInfo.mSequences.end());
m_Entry = new CSeq_entry();
CRef<CSeq_align> seq_align = GetSeqAlign(fasta_flags, pErrorListener);
- const CDense_seg& denseg = seq_align->GetSegs().GetDenseg();
- _ASSERT(denseg.GetIds().size() == m_Dim);
-
CRef<CSeq_annot> seq_annot (new CSeq_annot);
seq_annot->SetData().SetAlign().push_back(seq_align);
// seq-id(s)
auto& ids = pSubEntry->SetSeq().SetId();
- //ids.push_back(denseg.GetIds()[row_i]);
ids = m_Ids[row_i];
// mol
/*
- * $Id: aln_scanner_clustal.cpp 589468 2019-07-11 14:51:16Z kornbluh $
+ * $Id: aln_scanner_clustal.cpp 610753 2020-06-23 18:10:35Z ivanov $
*
* ===========================================================================
*
bool inBlock = false;
int blockLineLength = 0;
int blockCount = 0;
- bool firstBlock = true;
int numSeqs = 0;
int seqCount = 0;
- int maxSeqCount = 0;
string line;
int lineCount = 0;
/*
- * $Id: aln_scanner_nexus.cpp 599135 2019-12-19 16:40:05Z foleyjp $
+ * $Id: aln_scanner_nexus.cpp 610753 2020-06-23 18:10:35Z ivanov $
*
* ===========================================================================
*
}
string seqData = NStr::Join(tokens.begin()+1, tokens.end(), "");
- auto dataSize = seqData.size();
+ const int dataSize = seqData.size();
// ----------------------------------------------------------------------------
-int
+size_t
CAlnScannerNexus::sFindCharOutsideComment(
char c,
const string& line,
size_t startPos)
// ----------------------------------------------------------------------------
{
- for (int index=startPos; index<line.size(); ++index) {
+ for (auto index=startPos; index<line.size(); ++index) {
if (line[index] == '[') {
++numUnmatchedLeftBrackets;
}
}
if (!commandTokens.empty()) {
- auto commandStartLine = commandTokens.front().mNumLine;
string description =
"Terminating semicolon missing from command. Commands in a Nexus file must end with a semicolon.";
throw SShowStopper(
return;
}
- list<pair<int, int>> commentLimits;
- int index=0;
- int start=0;
- int stop;
+ list<pair<size_t, size_t>> commentLimits;
+ size_t index=0;
+ size_t start=0;
+ size_t stop;
while (index < line.size()) {
const auto& c = line[index];
if (c == '[') {
return;
}
- list<pair<int, int>> commentLimits;
- int start=0;
- int stop;
+ list<pair<size_t,size_t>> commentLimits;
+ size_t start=0;
+ size_t stop;
if (!inCommand &&
(numUnmatchedLeftBrackets == 0) &&
const auto len = line.size();
- for (int index=0; index<len; ++index) {
+ for (size_t index=0; index<len; ++index) {
const auto& c = line[index];
if (inCommand) {
#define _ALN_SCANNER_NEXUS_HPP_
/*
- * $Id: aln_scanner_nexus.hpp 585192 2019-04-24 19:38:23Z foleyjp $
+ * $Id: aln_scanner_nexus.hpp 610753 2020-06-23 18:10:35Z ivanov $
*
* ===========================================================================
*
// ============================================================================
{
public:
- CAlnScannerNexus():
- mGapChar(0), mMissingChar(0), mMatchChar(0) {};
+// CAlnScannerNexus():
+// mGapChar(0), mMissingChar(0), mMatchChar(0) {};
~CAlnScannerNexus() {};
TDeflines& SetDeflines(void) { return mDeflines; }
int &numUnmatchedLeftBrackets,
bool &inCommand);
- static int sFindCharOutsideComment(
+ static size_t sFindCharOutsideComment(
char c,
const string& line,
int &numUnmatchedLeftBrackets,
int mNumSequences = 0;
int mSequenceSize = 0;
- char mMatchChar;
- char mMissingChar;
- char mGapChar;
+ char mMatchChar=0;
+ char mMissingChar=0;
+ char mGapChar=0;
bool mInBlock=false;
string mCurrentBlock;
int mBlockStartLine;
-/* $Id: descr_mod_apply.cpp 601793 2020-02-13 16:02:42Z foleyjp $
+/* $Id: descr_mod_apply.cpp 610757 2020-06-23 18:10:59Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
const auto& value = x_GetModValue(mod_entry);
m_pDescrCache->SetBioSource().SetOrg().SetTaxname(value);
if (!preserve_taxid &&
- m_pDescrCache->SetBioSource().GetOrg().GetTaxId()) {
+ m_pDescrCache->SetBioSource().GetOrg().GetTaxId() != ZERO_ENTREZ_ID) {
// clear taxid if it does not occur in this modifier set
- m_pDescrCache->SetBioSource().SetOrg().SetTaxId(0);
+ m_pDescrCache->SetBioSource().SetOrg().SetTaxId(ZERO_ENTREZ_ID);
}
return true;
}
if (name == "taxid") {
const auto& value = x_GetModValue(mod_entry);
- int taxid;
+ TTaxId taxid;
try {
- taxid = NStr::StringToInt(value);
+ taxid = NStr::StringToNumeric<TTaxId>(value);
}
catch (...) {
x_ReportInvalidValue(mod_entry.second.front(), "Integer value expected.");
for (const auto& mod : mod_entry.second)
{
const auto& value = mod.GetValue();
- int pmid;
+ TEntrezId pmid;
try {
- pmid = NStr::StringToInt(value);
+ pmid = NStr::StringToNumeric<TEntrezId>(value);
}
catch(...) {
x_ReportInvalidValue(mod_entry.second.front(), "Expected integer value.");
CUser_object& CDescrCache::SetFileTrack()
{
return x_SetDescriptor(eFileTrack,
- [this](const CSeqdesc& desc) {
+ [](const CSeqdesc& desc) {
return (desc.IsUser() && s_IsUserType(desc.GetUser(), "FileTrack"));
},
[this]() {
CUser_object& CDescrCache::SetTpaAssembly()
{
return x_SetDescriptor(eTpa,
- [this](const CSeqdesc& desc) {
+ [](const CSeqdesc& desc) {
return (desc.IsUser() && s_IsUserType(desc.GetUser(), "TpaAssembly"));
},
[this]() {
CUser_object& CDescrCache::SetGenomeProjects()
{
return x_SetDescriptor(eGenomeProjects,
- [this](const CSeqdesc& desc) {
+ [](const CSeqdesc& desc) {
return (desc.IsUser() && s_IsUserType(desc.GetUser(), "GenomeProjectsDB"));
},
[this]() {
-/* $Id: fasta.cpp 600608 2020-01-23 17:32:17Z foleyjp $
+/* $Id: fasta.cpp 612524 2020-07-23 11:37:59Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
CFastaReader::CFastaReader(ILineReader& reader, TFlags flags, FIdCheck f_idcheck)
: m_LineReader(&reader), m_MaskVec(0),
- m_IDGenerator(new CSeqIdGenerator()),
m_gapNmin(0), m_gap_Unknown_length(0),
m_MaxIDLength(kMax_UI4),
m_fIdCheck(f_idcheck)
CFastaReader::CFastaReader(CReaderBase::TReaderFlags fBaseFlags, TFlags flags, FIdCheck f_idcheck)
: CReaderBase(fBaseFlags), m_MaskVec(0),
- m_IDGenerator(new CSeqIdGenerator),
m_gapNmin(0), m_gap_Unknown_length(0),
m_MaxIDLength(kMax_UI4),
m_fIdCheck(f_idcheck)
ParseDefLine(">", pMessageListener);
need_defline = false;
} else {
+ const auto lineNum = LineNumber();
GetLineReader().UngetLine();
NCBI_THROW2(CObjReaderParseException, eNoDefline,
"CFastaReader: Input doesn't start with"
- " a defline or comment around line " + NStr::NumericToString(LineNumber()),
- LineNumber() );
+ " a defline or comment around line " + NStr::NumericToString(lineNum),
+ lineNum);
}
}
CFastaDeflineReader::s_MaxLocalIDLength =
CFastaDeflineReader::s_MaxGeneralTagLength =
CFastaDeflineReader::s_MaxAccessionLength = m_MaxIDLength = max_len;
+ m_bModifiedMaxIdLength=true;
}
void CFastaReader::ParseDefLine(const TStr& s, ILineErrorListener * pMessageListener)
{
SDefLineParseInfo parseInfo;
- parseInfo.fBaseFlags = m_iFlags;
- parseInfo.fFastaFlags = GetFlags();
- parseInfo.maxIdLength = m_MaxIDLength;
- parseInfo.lineNumber = LineNumber();
+ x_SetDeflineParseInfo(parseInfo);
CFastaDeflineReader::SDeflineData data;
CFastaDeflineReader::ParseDefline(s, parseInfo, data, pMessageListener, m_fIdCheck);
bool CFastaReader::ParseIDs(
const TStr& s, ILineErrorListener * pMessageListener)
{
-
SDefLineParseInfo info;
+ x_SetDeflineParseInfo(info);
+
+ return CFastaDeflineReader::ParseIDs(s, info, m_ignorable, SetIDs(), pMessageListener);
+}
+
+
+void CFastaReader::x_SetDeflineParseInfo(SDefLineParseInfo& info)
+{
info.fBaseFlags = m_iFlags;
info.fFastaFlags = GetFlags();
- info.maxIdLength = m_MaxIDLength;
+ info.maxIdLength = m_bModifiedMaxIdLength ?
+ m_MaxIDLength :
+ 0;
info.lineNumber = LineNumber();
-
- return CFastaDeflineReader::ParseIDs(s, info, m_ignorable, SetIDs(), pMessageListener);
}
void CFastaReader::SetGapLinkageEvidences(CSeq_gap::EType type, const set<int>& evidences)
{
- if (type == -1)
- m_gap_type.Release();
- else
- m_gap_type.Reset(new SGap::TGapTypeObj(type));
-
-
+ m_gap_type.Reset(new SGap::TGapTypeObj(type));
+
m_DefaultLinkageEvidence.clear();
for (const auto& evidence : evidences) {
m_DefaultLinkageEvidence.insert(static_cast<CLinkage_evidence::EType>(evidence));
-/* $Id: fasta_exception.cpp 407174 2013-07-18 16:27:25Z gouriano $
+/* $Id: fasta_exception.cpp 610176 2020-06-11 19:24:49Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
pos_prefix = ", ";
}
- if( rangesFound.size() > maxRanges ) {
+ if (iRangesFound > maxRanges) {
out << ", and more";
return;
}
-/* $Id: fasta_reader_utils.cpp 599582 2020-01-02 20:02:39Z foleyjp $
+/* $Id: fasta_reader_utils.cpp 612524 2020-07-23 11:37:59Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
return TSeqPos(s.length() - pos);
}
-static bool s_ASCII_IsUnAmbigNuc(unsigned char c)
-{
- switch( c ) {
- case 'A':
- case 'C':
- case 'G':
- case 'T':
- case 'a':
- case 'c':
- case 'g':
- case 't':
- return true;
- default:
- return false;
- }
-}
-
class CIdErrorReporter
{
return true;
}
- TSeqPos num_ids = 0;
// be generous overall, and give raw local IDs the benefit of the
// doubt for now
CSeq_id::TParseFlags flags
for (auto& ch : local_copy)
if (ch == ',')
ch = '_';
- num_ids = CSeq_id::ParseIDs(ids, local_copy, flags);
+
+ CSeq_id::ParseIDs(ids, local_copy, flags);
const string errMessage =
"Near line " + NStr::NumericToString(info.lineNumber)
}
else
{
- num_ids = CSeq_id::ParseIDs(ids, s, flags);
+ CSeq_id::ParseIDs(ids, s, flags);
}
} catch (CSeqIdException&) {
// swap(ids, old_ids);
CFastaIdValidate idValidate(info.fFastaFlags);
- idValidate.SetMaxLocalIDLength(info.maxIdLength);
- idValidate.SetMaxGeneralTagLength(info.maxIdLength);
- idValidate.SetMaxAccessionLength(info.maxIdLength);
+ if (info.maxIdLength) {
+ idValidate.SetMaxLocalIDLength(info.maxIdLength);
+ idValidate.SetMaxGeneralTagLength(info.maxIdLength);
+ idValidate.SetMaxAccessionLength(info.maxIdLength);
+ }
idValidate(ids, info.lineNumber, CIdErrorReporter(pMessageListener, ignoreGeneralParsingError));
return true;
}
CFastaIdValidate s_IdValidate(info.fFastaFlags);
+ if (info.maxIdLength) {
+ s_IdValidate.SetMaxLocalIDLength(info.maxIdLength);
+ s_IdValidate.SetMaxGeneralTagLength(info.maxIdLength);
+ s_IdValidate.SetMaxAccessionLength(info.maxIdLength);
+ }
s_IdValidate(ids, info.lineNumber, CIdErrorReporter(listener));
}
-/* $Id: gff2_data.cpp 607807 2020-05-07 18:58:43Z ivanov $
+/* $Id: gff2_data.cpp 610645 2020-06-22 11:31:02Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
it = attrs_left.find("partial");
if (it != attrs_left.end()) {
- pFeature->SetPartial(true);
+ if (!(flags & CGff2Reader::fGenbankMode)) {
+ pFeature->AddQualifier("partial", it->second);
+ }
attrs_left.erase(it);
}
-/* $Id: gff2_reader.cpp 603569 2020-03-12 18:23:57Z ivanov $
+/* $Id: gff2_reader.cpp 610837 2020-06-24 15:29:29Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
}
+// ----------------------------------------------------------------------------
+void
+CGff2Reader::xGetData(
+ ILineReader& lr,
+ TReaderData& readerData)
+// ----------------------------------------------------------------------------
+{
+ readerData.clear();
+ string line;
+ if (xGetLine(lr, line)) {
+ if (xNeedsNewSeqAnnot(line)) {
+ return;
+ }
+ if (xIsTrackLine(line)) {
+ if (!mCurrentFeatureCount) {
+ xParseTrackLine(line);
+ xGetData(lr, readerData);
+ return;
+ }
+ m_PendingLine = line;
+ return;
+ }
+ if (xIsTrackTerminator(line)) {
+ if (!mCurrentFeatureCount) {
+ xParseTrackLine("track");
+ xGetData(lr, readerData);
+ }
+ return;
+ }
+ if (!xIsCurrentDataType(line)) {
+ xUngetLine(lr);
+ return;
+ }
+ readerData.push_back(TReaderLine{m_uLineNumber, line});
+ }
+ ++m_uDataCount;
+}
+
// ----------------------------------------------------------------------------
void CGff2Reader::xAssignAnnotId(
CSeq_annot& annot,
return false;
}
+// ---------------------------------------------------------------------------
+bool
+CGff2Reader::xNeedsNewSeqAnnot(
+ const string& line)
+// ---------------------------------------------------------------------------
+{
+ if (IsInGenbankMode()) {
+ vector<string> columns;
+ NStr::Split(line, "\t ", columns, NStr::eMergeDelims);
+ string seqId = columns[0];
+ if (m_CurrentSeqId == seqId) {
+ return false;
+ }
+ m_CurrentSeqId = seqId;
+ if (mCurrentFeatureCount == 0) {
+ return false;
+ }
+ m_PendingLine = line;
+ return true;
+ }
+ return false;
+}
+
+// ----------------------------------------------------------------------------
+bool CGff2Reader::IsInGenbankMode() const
+// ----------------------------------------------------------------------------
+{
+ return (m_iFlags & CGff2Reader::fGenbankMode);
+}
+
+
+
END_objects_SCOPE
END_NCBI_SCOPE
-/* $Id: gff3_reader.cpp 607807 2020-05-07 18:58:43Z ivanov $
+/* $Id: gff3_reader.cpp 610837 2020-06-24 15:29:29Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
}
-// ----------------------------------------------------------------------------
-bool CGff3Reader::IsInGenbankMode() const
-// ----------------------------------------------------------------------------
-{
- return (m_iFlags & CGff3Reader::fGenbankMode);
-}
-
-// ----------------------------------------------------------------------------
-void
-CGff3Reader::xGetData(
- ILineReader& lr,
- TReaderData& readerData)
-// ----------------------------------------------------------------------------
-{
- readerData.clear();
- string line;
- if (xGetLine(lr, line)) {
- if (xNeedsNewSeqAnnot(line)) {
- lr.UngetLine();
- return;
- }
- if (xIsTrackLine(line)) {
- if (!mCurrentFeatureCount) {
- xParseTrackLine(line);
- xGetData(lr, readerData);
- return;
- }
- m_PendingLine = line;
- return;
- }
- if (xIsTrackTerminator(line)) {
- if (!mCurrentFeatureCount) {
- xParseTrackLine("track");
- xGetData(lr, readerData);
- }
- return;
- }
- if (!xIsCurrentDataType(line)) {
- xUngetLine(lr);
- return;
- }
- readerData.push_back(TReaderLine{m_uLineNumber, line});
- }
- ++m_uDataCount;
-}
-
// ----------------------------------------------------------------------------
void CGff3Reader::xProcessAlignmentData(
CSeq_annot& annot)
return CGff2Reader::xPostProcessAnnot(annot);
}
-// ---------------------------------------------------------------------------
-bool
-CGff3Reader::xNeedsNewSeqAnnot(
- const string& line)
-// ---------------------------------------------------------------------------
-{
- if (IsInGenbankMode()) {
- vector<string> columns;
- NStr::Split(line, "\t ", columns, NStr::eMergeDelims);
- string seqId = columns[0];
- if (m_CurrentSeqId == seqId) {
- return false;
- }
- m_CurrentSeqId = seqId;
- if (mCurrentFeatureCount == 0) {
- return false;
- }
- m_PendingLine = line;
- return true;
- }
- return false;
-}
-
-
END_objects_SCOPE
END_NCBI_SCOPE
-/* $Id: gtf_reader.cpp 603569 2020-03-12 18:23:57Z ivanov $
+/* $Id: gtf_reader.cpp 610936 2020-06-25 16:26:53Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
ILineErrorListener* pEC )
// ----------------------------------------------------------------------------
{
+ mCurrentFeatureCount = 0;
return CReaderBase::ReadSeqAnnot(lineReader, pEC);
}
{
for (const auto& lineData: readerData) {
const auto& line = lineData.mData;
+ if (xIsTrackTerminator(line)) {
+ continue;
+ }
if (xParseStructuredComment(line)) {
continue;
}
return false;
}
}
-
- if ( xCdsIsPartial( gff ) ) {
- CRef<CSeq_feat> pParent = xFindParentMrna(gff);
- if (pParent) {
- CSeq_loc& loc = pCds->SetLocation();
- size_t uCdsStart = gff.SeqStart();
- size_t uMrnaStart = pParent->GetLocation().GetStart( eExtreme_Positional );
- if ( uCdsStart == uMrnaStart ) {
- loc.SetPartialStart( true, eExtreme_Positional );
- }
- size_t uCdsStop = gff.SeqStop();
- size_t uMrnaStop = pParent->GetLocation().GetStop( eExtreme_Positional );
- if ( uCdsStop == uMrnaStop && gff.Type() != "stop_codon" ) {
- loc.SetPartialStop( true, eExtreme_Positional );
- }
- }
- }
return true;
}
return true;
}
if ( 0 == NStr::CompareNocase(key, "partial")) {
- feature.SetPartial( true );
- return true;
+ // RW-1108 - ignore partial attribute in Genbank mode
+ if (m_iFlags & CGtfReader::fGenbankMode) {
+ return true;
+ }
}
return false;
}
-/* $Id: line_error.cpp 580916 2019-02-22 16:30:37Z foleyjp $
+/* $Id: line_error.cpp 610758 2020-06-23 18:11:06Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
: m_eProblem(eProblem), m_eSeverity( eSeverity ),
m_Code(code), m_Subcode(subcode),
m_strSeqId(strSeqId), m_uLine( uLine ),
- m_strErrorMessage(strErrorMessage),
m_strFeatureName(strFeatureName), m_strQualifierName(strQualifierName),
m_strQualifierValue(strQualifierValue),
+ m_strErrorMessage(strErrorMessage),
m_vecOfOtherLines(vecOfOtherLines)
{ }
-/* $Id: mod_reader.cpp 600608 2020-01-23 17:32:17Z foleyjp $
+/* $Id: mod_reader.cpp 610749 2020-06-23 18:10:01Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
bool CTitleParser::x_FindBrackets(const CTempString& line, size_t& start, size_t& stop, size_t& eq_pos)
{ // Copied from CSourceModParser
size_t i = start;
- bool found = false;
eq_pos = CTempString::npos;
const char* s = line.data() + start;
typedef CStaticPairArrayMap <const char*, COrgMod::ESubtype, PCase_CStr> TOrgModMap;
DEFINE_STATIC_ARRAY_MAP(TOrgModMap, sm_OrgModKeys, orgmod_key_to_subtype);
-
-typedef SStaticPair<const char *, int> TTrnaKey;
-
-static const TTrnaKey trna_key_to_subtype [] = {
+static const map<const char*, int, PNocase_CStr> sm_TrnaKeys
+{
{ "Ala", 'A' },
{ "Alanine", 'A' },
{ "Arg", 'R' },
{ "Valine", 'V' },
{ "Xle", 'J' },
{ "Xxx", 'X' },
+ { "Undet", 'X' },
{ "fMet", 'M' },
{ "iMet", 'M' }
};
-typedef CStaticPairArrayMap <const char*, int, PCase_CStr> TTrnaMap;
-DEFINE_STATIC_ARRAY_MAP(TTrnaMap, sm_TrnaKeys, trna_key_to_subtype);
-
static
set<const char*, PCase_CStr>
seq_start - (aa_start+3);
string abbrev = pos_str.substr (aa_start + 3, aa_length);
- TTrnaMap::const_iterator t_iter = sm_TrnaKeys.find (abbrev.c_str ());
+ //TTrnaMap::const_iterator
+ auto t_iter = sm_TrnaKeys.find (abbrev.c_str ());
if (t_iter == sm_TrnaKeys.end ()) {
// unable to parse
return false;
case CSeqFeatData::e_Pub:
if( qtype == eQual_PubMed ) {
CRef<CPub> new_pub( new CPub );
- new_pub->SetPmid( CPubMedId( x_StringToLongNoThrow(val, feat_name, qual) ) );
+ new_pub->SetPmid( CPubMedId( ENTREZ_ID_FROM(long, x_StringToLongNoThrow(val, feat_name, qual)) ) );
sfdata.SetPub().SetPub().Set().push_back( new_pub );
return true;
}
{
if (featType == CSeqFeatData::e_Rna &&
sfdata.GetRna().GetType() == CRNA_ref::eType_mRNA) {
+ CBioseq::TId ids;
try {
- CBioseq::TId ids;
CSeq_id::ParseIDs(ids, val,
- CSeq_id::fParse_ValidLocal
- | CSeq_id::fParse_PartialOK);
- for (const auto& id : ids) {
- auto id_string = id->GetSeqIdString(true);
- auto res = m_ProcessedTranscriptIds.insert(id_string);
- if (res.second == false) { // Insertion failed because Seq-id already encountered
- x_ProcessMsg(
- ILineError::eProblem_DuplicateIDs, eDiag_Error,
- feat_name, qual, val,
- "Transcript ID " + id_string + " appears on multiple mRNA features"
- );
- }
- }
+ CSeq_id::fParse_ValidLocal
+ | CSeq_id::fParse_PartialOK);
}
- catch (CException&) {
- return false;
+ catch (CSeqIdException& e)
+ {
+ x_ProcessMsg(
+ ILineError::eProblem_QualifierBadValue, eDiag_Error,
+ feat_name, qual, val,
+ "Invalid transcript_id : " + val);
+ return true;
+ }
+
+ for (const auto& id : ids) {
+ auto id_string = id->GetSeqIdString(true);
+ auto res = m_ProcessedTranscriptIds.insert(id_string);
+ if (res.second == false) { // Insertion failed because Seq-id already encountered
+ x_ProcessMsg(
+ ILineError::eProblem_DuplicateIDs, eDiag_Error,
+ feat_name, qual, val,
+ "Transcript ID " + id_string + " appears on multiple mRNA features"
+ );
+ }
}
}
x_AddGBQualToFeature(sfp, qual, val);
(featType == CSeqFeatData::e_Prot &&
sfdata.GetProt().IsSetProcessed() &&
sfdata.GetProt().GetProcessed() == CProt_ref::eProcessed_mature))
- try {
+ {
CBioseq::TId ids;
- CSeq_id::ParseIDs(ids, val,
- CSeq_id::fParse_ValidLocal |
- CSeq_id::fParse_PartialOK);
- if (!ids.empty()) {
- if (featType == CSeqFeatData::e_Cdregion) {
- for (const auto& id : ids) {
- auto id_string = id->GetSeqIdString(true);
- auto res = m_ProcessedProteinIds.insert(id_string);
- if (res.second == false) { // Insertion failed because Seq-id already encountered
- x_ProcessMsg(
- ILineError::eProblem_DuplicateIDs, eDiag_Error,
- feat_name, qual, val,
- "Protein ID " + id_string + " appears on multiple CDS features"
- );
- }
+ try {
+ CSeq_id::ParseIDs(ids, val,
+ CSeq_id::fParse_ValidLocal |
+ CSeq_id::fParse_PartialOK);
+ }
+ catch (CSeqIdException& e)
+ {
+ x_ProcessMsg(
+ ILineError::eProblem_QualifierBadValue, eDiag_Error,
+ feat_name, qual, val,
+ "Invalid protein_id : " + val);
+ return true;
+ }
+
+ if (featType == CSeqFeatData::e_Cdregion) {
+ for (const auto& id : ids) {
+ auto id_string = id->GetSeqIdString(true);
+ auto res = m_ProcessedProteinIds.insert(id_string);
+ if (res.second == false) { // Insertion failed because Seq-id already encountered
+ x_ProcessMsg(
+ ILineError::eProblem_DuplicateIDs, eDiag_Error,
+ feat_name, qual, val,
+ "Protein ID " + id_string + " appears on multiple CDS features"
+ );
}
}
+ }
- if (featType != CSeqFeatData::e_Rna) {
- auto pBestId = GetBestId(ids);
- if (pBestId) {
- sfp->SetProduct().SetWhole(*pBestId);
- }
- }
-
- if (featType != CSeqFeatData::e_Prot) {
- x_AddGBQualToFeature(sfp, qual, val);
+ if (featType != CSeqFeatData::e_Rna) { // mRNA only has a protein_id qualifier
+ auto pBestId = GetBestId(ids);
+ if (pBestId) {
+ sfp->SetProduct().SetWhole(*pBestId);
}
}
- return true;
- } catch( CSeqIdException & ) {
- return false;
}
+
+ if (featType != CSeqFeatData::e_Prot) { // Mat-peptide has an instantiated product, but no qualifier
+ x_AddGBQualToFeature(sfp, qual, val);
+ }
+ return true;
case eQual_regulatory_class:
// This should've been handled up in x_AddQualifierToImp
// so it's always a bad value to be here
while ( !m_reader->AtEOF() ) {
- // since reader's UngetLine doesn't actually push back
- // into the reader's underlying stream, we try to
- // be careful to detect the most common case of
- // "there's another feature next"
- if( m_reader->PeekChar() == '>' ) {
- break;
- }
-
CTempString line = *++(*m_reader);
if( m_reader->GetLineNumber() % 10000 == 0 &&
}
} else if (x_ParseFeatureTableLine (line, loc_info, feat, qual, qual_value, offset)) {
- // } else if (x_ParseFeatureTableLine (line, &start, &stop, &partial5, &partial3,
- // &ispoint, &isminus, feat, qual, qual_value, offset)) {
-/*
- SFeatLocInfo loc_info;
- loc_info.start_pos = start;
- loc_info.stop_pos = stop;
- loc_info.is_5p_partial = partial5;
- loc_info.is_3p_partial = partial3;
- loc_info.is_point = ispoint;
- loc_info.is_minus_strand = isminus;
- */
// process line in feature table
replace( qual_value.begin(), qual_value.end(), '\"', '\'' );
// and add first interval
x_AddIntervalToFeature (curr_feat_name, sfp, loc_info);
- // x_AddIntervalToFeature (curr_feat_name, sfp,
- // start, stop, partial5, partial3, ispoint, isminus);
-
ignore_until_next_feature_key = false;
curr_feat_name = feat;
{
x_CreateGenesFromCDSs(sap, choiceToFeatMap, flags);
}
-
return sap;
}
-/* $Id: rm_reader.cpp 601856 2020-02-14 14:44:09Z mozese2 $
+/* $Id: rm_reader.cpp 610834 2020-06-24 15:29:06Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
continue;
}
++record_counter;
+ //if (record_counter == 91555) {
+ // cerr << "";
+ //}
SRepeatRegion mask_data;
if ( ! ParseRecord( line, mask_data ) ) {
// fields position 12 and 14 flip depending on the strand value.
string rpt_left;
if (mask_data.IsReverseStrand()) {
- mask_data.rpt_pos_begin = NStr::StringToUInt( field14 );
+ mask_data.rpt_pos_begin = NStr::StringToInt( field14 );
rpt_left = field12;
} else {
- mask_data.rpt_pos_begin = NStr::StringToUInt( field12 );
+ mask_data.rpt_pos_begin = NStr::StringToInt( field12 );
rpt_left = field14;
}
StripParens(rpt_left);
- mask_data.rpt_left = NStr::StringToUInt(rpt_left);
+ mask_data.rpt_left = NStr::StringToInt(rpt_left);
// 15: "ID"
++it;
-/* $Id: source_mod_parser.cpp 571491 2018-09-27 16:13:08Z foleyjp $
+/* $Id: source_mod_parser.cpp 610750 2020-06-23 18:10:12Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
if ((mod = FindMod(s_Mod_taxid)) != NULL) {
- bsrc->SetOrg().SetTaxId( NStr::StringToInt(mod->value, NStr::fConvErr_NoThrow) );
+ bsrc->SetOrg().SetTaxId( NStr::StringToNumeric<TEntrezId>(mod->value, NStr::fConvErr_NoThrow) );
}
else
- if (reset_taxid && bsrc->IsSetOrgname() && bsrc->GetOrg().GetTaxId() != 0) {
- bsrc->SetOrg().SetTaxId(0);
+ if (reset_taxid && bsrc->IsSetOrgname() && bsrc->GetOrg().GetTaxId() != ZERO_ENTREZ_ID) {
+ bsrc->SetOrg().SetTaxId(ZERO_ENTREZ_ID);
}
}
{
for (CSourceModParser::TModsCI it = range.first;
it != range.second; ++it) {
- TIntId pmid = NStr::StringToNumeric<TIntId>(it->value, NStr::fConvErr_NoThrow);
+ TEntrezId pmid = NStr::StringToNumeric<TEntrezId>(it->value, NStr::fConvErr_NoThrow);
CRef<CPub> pub(new CPub);
pub->SetPmid().Set(pmid);
CRef<CSeqdesc> pubdesc(new CSeqdesc);
-/* $Id: grpc_integration.cpp 606576 2020-04-23 17:12:06Z ivanov $
+/* $Id: grpc_integration.cpp 608310 2020-05-14 12:35:38Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
/// (in order of priority):
/// - Config file entry "[section] variable"
/// - Environment variables: env_var_name (if not empty/NULL);
-/// then "NCBI_CONFIG__<section>__<name>"; then "grpc_proxy"
+/// then "NCBI_CONFIG__<section>__<name>"; then "GRPC_PROXY"
/// - The hard-coded NCBI default "linkerd:4142"
string g_NCBI_GRPC_GetAddress(const char* section,
const char* variable,
- const char* env_var_name)
+ const char* env_var_name,
+ int* value_source)
{
- auto addr = g_GetConfigString(section, variable, env_var_name, nullptr);
+ auto addr = g_GetConfigString(section, variable, env_var_name, nullptr, value_source);
if ( addr.empty() ) {
- addr = g_GetConfigString(nullptr, nullptr, "grpc_proxy", "linkerd:4142");
+ addr = g_GetConfigString(nullptr, nullptr, "GRPC_PROXY", "linkerd:4142", value_source);
}
return addr;
}
-/* $Id: rpcbase.cpp 604211 2020-03-24 16:03:08Z ivanov $
+/* $Id: rpcbase.cpp 615799 2020-09-03 18:56:59Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
BEGIN_NCBI_SCOPE
+static string s_GetConfigString(const string& service,
+ const string& variable)
+{
+ if (service.empty() || variable.empty()) return kEmptyStr;
+
+ string env_var = service + "__RPC_CLIENT__" + variable;
+ NStr::ToUpper(env_var);
+ const TXChar* str = NcbiSys_getenv(_T_XCSTRING(env_var.c_str()));
+
+ if (str && *str) {
+ return _T_CSTRING(str);
+ }
+
+ CNcbiApplicationGuard app = CNcbiApplication::InstanceGuard();
+ if (app && app->HasLoadedConfig()) {
+ return app->GetConfig().Get(service + ".rpc_client", variable);
+ }
+ return kEmptyStr;
+}
+
+
+static unsigned int s_GetTryLimit(const string& service)
+{
+ string str = s_GetConfigString(service, "max_try");
+ if (!str.empty()) {
+ try {
+ unsigned int ret = NStr::StringToNumeric<unsigned int>(str);
+ return ret > 0 ? ret : 3;
+ }
+ catch (...) {
+ ERR_POST(Warning << "Bad " << service << "/max_try value: " << str);
+ }
+ }
+ return 3;
+}
+
+
+static CTimeSpan s_GetRetryDelay(const string& service)
+{
+ CTimeSpan ret;
+ string str = s_GetConfigString(service, "retry_delay");
+ if (!str.empty()) {
+ try {
+ double sec = NStr::StringToNumeric<double>(str);
+ return CTimeSpan(sec > 0 ? sec : 0);
+ }
+ catch (...) {
+ ERR_POST(Warning << "Bad " << service << "/retry_delay value: " << str);
+ }
+ }
+ return ret;
+}
+
+
+CRPCClient_Base::CRPCClient_Base(const string& service,
+ ESerialDataFormat format)
+ : m_Format(format),
+ m_RetryDelay(s_GetRetryDelay(service)),
+ m_TryCount(0),
+ m_RecursionCount(0),
+ m_Service(service),
+ m_TryLimit(s_GetTryLimit(service))
+{
+}
+
+
CRPCClient_Base::CRPCClient_Base(const string& service,
- ESerialDataFormat format,
- unsigned int retry_limit)
+ ESerialDataFormat format,
+ unsigned int try_limit)
: m_Format(format),
- m_RetryCount(0),
+ m_RetryDelay(s_GetRetryDelay(service)),
+ m_TryCount(0),
m_RecursionCount(0),
m_Service(service),
- m_RetryLimit(retry_limit)
+ m_TryLimit(try_limit > 0 ? try_limit : 3)
{
}
{
if (m_Affinity != affinity) {
if (m_RecursionCount > 1) {
- ERR_POST("Affinity can not be changed on a recursive request");
+ ERR_POST("Affinity cannot be changed on a recursive request");
return;
}
Disconnect();
{
CMutexGuard LOCK(m_Mutex);
if (m_RecursionCount == 0) {
- m_RetryCount = 0;
+ m_TryCount = 0;
}
// Recursion counter needs to be decremented on both success and failure.
CCounterGuard recursion_guard(&m_RecursionCount);
- const string& request_name = ( request.GetThisTypeInfo() != NULL
- ? ("("+request.GetThisTypeInfo()->GetName()+")") : "(no_request_type)");
+ const string& request_name = request.GetThisTypeInfo() != NULL
+ ? ("("+request.GetThisTypeInfo()->GetName()+")")
+ : "(no_request_type)";
// Reset headers from previous requests if any.
m_RetryCtx.Reset();
- double max_span = m_RetryDelay.GetAsDouble()*m_RetryLimit;
+ double max_span = m_RetryDelay.GetAsDouble()*m_TryLimit;
double span = max_span;
bool limit_by_time = !m_RetryDelay.IsEmpty();
// Retry context can be either the default one (m_RetryCtx), or provided
// through an exception.
for (;;) {
if ( IsCanceled() ) {
- NCBI_THROW(CRPCClientException, eFailed, "Request canceled "+request_name);
+ NCBI_THROW(CRPCClientException, eFailed,
+ "Request canceled " + request_name);
}
try {
SetAffinity(x_GetAffinity(request));
}
m_Stream->peek(); // send data, read response headers
if (!m_Stream->good() && !m_Stream->eof()) {
- NCBI_THROW(CRPCClientException, eFailed, "Connection stream is in bad state "+request_name);
+ NCBI_THROW(CRPCClientException, eFailed,
+ "Connection stream is in bad state " + request_name);
}
if (m_RetryCtx.IsSetContentOverride() &&
m_RetryCtx.GetContentOverride() == CHttpRetryContext::eFromResponse) {
// proceed to retry
}
else if ( !dynamic_cast<CSerialException*>(&e)
- && !dynamic_cast<CIOException*>(&e) ) {
+ && !dynamic_cast<CIOException*>(&e) ) {
// Not a retry related exception, abort.
throw;
}
// If using time limit, allow to make more than m_RetryLimit attempts
// if the server has set shorter delay.
- if ((!limit_by_time && ++m_RetryCount >= m_RetryLimit) ||
- !x_ShouldRetry(m_RetryCount)) {
+ if ((!limit_by_time && ++m_TryCount >= m_TryLimit) ||
+ !x_ShouldRetry(m_TryCount)) {
NCBI_THROW(CRPCClientException, eFailed,
- "Failed to receive reply after " +
- NStr::NumericToString(m_RetryCount) +
- (m_RetryCount == 1 ? " try" : " tries") +
- " " + request_name );
+ "Failed to receive reply after "
+ + NStr::NumericToString(m_TryCount)
+ + (m_TryCount == 1 ? " try " : " tries ")
+ + request_name );
}
if ( m_RetryCtx.IsSetStop() ) {
NCBI_THROW(CRPCClientException, eFailed,
- "Retrying request stopped by the server: " +
- m_RetryCtx.GetStopReason() + " " + request_name);
+ "Retrying request stopped by the server: "
+ + m_RetryCtx.GetStopReason() + ' ' + request_name);
}
CTimeSpan delay = x_GetRetryDelay(span);
if ( !delay.IsEmpty() ) {
span -= delay.GetAsDouble();
if (limit_by_time && span <= 0) {
NCBI_THROW(CRPCClientException, eFailed,
- "Failed to receive reply in " +
- CTimeSpan(max_span).AsSmartString() +
- " " + request_name);
+ "Failed to receive reply in "
+ + CTimeSpan(max_span).AsSmartString()
+ + ' ' + request_name);
}
}
// Always reconnect on retry.
if ( IsCanceled() ) {
- NCBI_THROW(CRPCClientException, eFailed, "Request canceled "+request_name);
+ NCBI_THROW(CRPCClientException, eFailed,
+ "Request canceled " + request_name);
}
try {
Reset();
- } STD_CATCH_ALL_XX(Serial_RPCClient, 1 ,"CRPCClient_Base::Reset()"+request_name);
+ } STD_CATCH_ALL_XX(Serial_RPCClient, 1,
+ "CRPCClient_Base::Reset() " + request_name);
}
// Reset retry context when done.
m_RetryCtx.Reset();
// If there were any retries, force disconnect to prevent using old
// retry url, args etc. with the next request.
- if ( m_RetryCount > 0 && m_RecursionCount <= 1 ) {
+ if ( m_TryCount > 0 && m_RecursionCount <= 1 ) {
Disconnect();
}
}
bool CRPCClient_Base::x_ShouldRetry(unsigned int tries) /* NCBI_FAKE_WARNING */
{
_TRACE("CRPCClient_Base::x_ShouldRetry: retrying after " << tries
- << " failures");
+ << " failure(s)");
return true;
}
-/* $Id: format_guess.cpp 600741 2020-01-27 15:56:56Z foleyjp $
+/* $Id: format_guess.cpp 612523 2020-07-23 11:23:30Z ivanov $
* ===========================================================================
*
* PUBLIC DOMAIN NOTICE
}
-// Must list all EFormats except eUnknown and eFormat_max.
+// Must list all *supported* EFormats except eUnknown and eFormat_max.
// Will cause assertion if violated!
-int CFormatGuess::s_CheckOrder[] =
+vector<int> CFormatGuess::sm_CheckOrder =
{
eBam, // must precede eGZip!
eZip,
eHgvs,
eDistanceMatrix,
eFlatFileSequence,
+ eFlatFileUniProt,
+ eFlatFileEna,
+ eFlatFileGenbank,
eFiveColFeatureTable,
eSnpMarkers,
eFasta,
eTextASN,
eAlignment,
eTaxplot,
- ePhrapAce,
eTable,
eBinaryASN,
+ ePhrapAce,
eUCSCRegion,
- eJSON
+ eJSON,
};
// This array must stay in sync with enum EFormat, but that's not
// supposed to change in the middle anyway, so the explicit size
// should suffice to avoid accidental skew.
-const char* const CFormatGuess::sm_FormatNames[CFormatGuess::eFormat_max] =
-{
- "unknown",
- "binary ASN.1",
- "RepeatMasker",
- "GFF/GTF Poisoned",
- "Glimmer3",
- "AGP",
- "XML",
- "WIGGLE",
- "BED",
- "BED15",
- "Newick",
- "alignment",
- "distance matrix",
- "flat-file sequence",
- "five-column feature table",
- "SNP Markers",
- "FASTA",
- "text ASN.1",
- "Taxplot",
- "Phrap ACE",
- "table",
- "GTF",
- "GFF3",
- "GFF2",
- "HGVS",
- "GVF",
- "zip",
- "gzip",
- "bzip2",
- "lzo",
- "SRA",
- "BAM",
- "VCF",
- "UCSC Region",
- "GFF Augustus",
- "JSON",
- "PSL",
+const CFormatGuess::NAME_MAP CFormatGuess::sm_FormatNames = {
+ {eUnknown, "unknown"},
+ {eBinaryASN, "binary ASN.1"},
+ {eRmo, "RepeatMasker"},
+ {eGtf_POISENED, "GFF/GTF Poisoned"},
+ {eGlimmer3, "Glimmer3"},
+ {eAgp, "AGP"},
+ {eXml, "XML"},
+ {eWiggle, "WIGGLE"},
+ {eBed, "BED"},
+ {eBed15, "BED15"},
+ {eNewick, "Newick"},
+ {eAlignment, "alignment"},
+ {eDistanceMatrix, "distance matrix"},
+ {eFlatFileSequence, "flat-file sequence"},
+ {eFiveColFeatureTable, "five-column feature table"},
+ {eSnpMarkers, "SNP Markers"},
+ {eFasta, "FASTA"},
+ {eTextASN, "text ASN.1"},
+ {eTaxplot, "Taxplot"},
+ {ePhrapAce, "Phrap ACE"},
+ {eTable, "table"},
+ {eGtf, "GTF"},
+ {eGff3, "GFF3"},
+ {eGff2, "GFF2"},
+ {eHgvs, "HGVS"},
+ {eGvf, "GVF"},
+ {eZip, "zip"},
+ {eGZip, "gzip"},
+ {eBZip2, "bzip2"},
+ {eLzo, "lzo"},
+ {eSra, "SRA"},
+ {eBam, "BAM"},
+ {eVcf, "VCF"},
+ {eUCSCRegion, "UCSC Region"},
+ {eGffAugustus, "GFF Augustus"},
+ {eJSON, "JSON"},
+ {ePsl, "PSL"},
+ {eAltGraphX, "altGraphX"},
+ {eBed5FloatScore, "BED5 float score"},
+ {eBedGraph, "BED graph"},
+ {eBedRnaElements, "BED Rna elements"},
+ {eBigBarChart, "bigBarChart"},
+ {eBigBed, "BigBED"},
+ {eBigPsl, "BigPSL"},
+ {eBigChain, "BigChain"},
+ {eBigMaf, "BigMaf"},
+ {eBigWig, "BigWig"},
+ {eBroadPeak, "BroadPeak"},
+ {eChain, "Chain"},
+ {eClonePos, "ClonePos"},
+ {eColoredExon, "ColoredExon"},
+ {eCtgPos, "CtgPos"},
+ {eDownloadsOnly, "DowloadsOnly"},
+ {eEncodeFiveC, "EncodeFiveC"},
+ {eExpRatio, "ExpRatio"},
+ {eFactorSource, "FactorSource"},
+ {eGenePred, "GenePred"},
+ {eLd2, "Ld2"},
+ {eNarrowPeak, "NarrowPeak"},
+ {eNetAlign, "NetAlign"},
+ {ePeptideMapping, "PeptideMapping"},
+ {eRmsk, "Rmsk"},
+ {eSnake, "Snake"},
+ {eVcfTabix, "VcfTabix"},
+ {eWigMaf, "WigMaf"},
+ {eFlatFileGenbank, "Genbank FlatFile"},
+ {eFlatFileEna, "ENA FlatFile"},
+ {eFlatFileUniProt, "UniProt FlatFile"},
};
const char*
CFormatGuess::GetFormatName(EFormat format)
{
- unsigned int i = static_cast<unsigned int>(format);
- if (i >= static_cast <unsigned int>(eFormat_max)) {
+ auto formatIt = sm_FormatNames.find(format);
+ if (formatIt == sm_FormatNames.end()) {
NCBI_THROW(CUtilException, eWrongData,
"CFormatGuess::GetFormatName: out-of-range format value "
- + NStr::IntToString(i));
+ + NStr::IntToString(format));
}
- return sm_FormatNames[i];
+ return formatIt->second;
}
}
}
+// ----------------------------------------------------------------------------
+bool
+CFormatGuess::IsSupportedFormat(EFormat format)
+{
+ return (std::find(sm_CheckOrder.begin(), sm_CheckOrder.end(), format)
+ != sm_CheckOrder.end());
+}
+
// ----------------------------------------------------------------------------
CFormatGuess::EFormat
CFormatGuess::GuessFormat( EMode )
}
EMode mode = eQuick;
- size_t uFormatCount = ArraySize(s_CheckOrder);
+ size_t uFormatCount = sm_CheckOrder.size();
// First, try to use hints
if ( !m_Hints.IsEmpty() ) {
for (size_t f = 0; f < uFormatCount; ++f) {
- EFormat fmt = EFormat( s_CheckOrder[f] );
+ EFormat fmt = EFormat( sm_CheckOrder[f] );
if (m_Hints.IsPreferred(fmt) && x_TestFormat(fmt, mode)) {
return fmt;
}
// Check other formats, skip the ones that are disabled through hints
for (size_t f = 0; f < uFormatCount; ++f) {
- EFormat fmt = EFormat( s_CheckOrder[f] );
+ EFormat fmt = EFormat( sm_CheckOrder[f] );
if ( ! m_Hints.IsDisabled(fmt) && x_TestFormat(fmt, mode) ) {
return fmt;
}
return TestFormatAugustus( mode );
case eJSON:
return TestFormatJson( mode );
+ case eFlatFileGenbank:
+ return TestFormatFlatFileGenbank( mode );
+ case eFlatFileEna:
+ return TestFormatFlatFileEna( mode );
+ case eFlatFileUniProt:
+ return TestFormatFlatFileUniProt( mode );
default:
NCBI_THROW( CCoreException, eInvalidArg,
"CFormatGuess::x_TestFormat(): Unsupported format ID (" +
void
CFormatGuess::Initialize()
{
- NCBI_ASSERT(eFormat_max-2 == sizeof( s_CheckOrder ) / sizeof( int ),
- "Indices in s_CheckOrder do not match format count ---"
- "update s_CheckOrder to list all formats"
- );
- NCBI_ASSERT(eFormat_max == sizeof(sm_FormatNames) / sizeof(const char*)
- && sm_FormatNames[eFormat_max - 1] != NULL,
- "sm_FormatNames does not list all possible formats");
+ NCBI_ASSERT(eFormat_max == sm_FormatNames.size(),
+ "sm_FormatNames does not list all possible formats");
m_pTestBuffer = 0;
m_bStatsAreValid = false;
IsInputRepeatMaskerWithoutHeader();
}
+
+// ----------------------------------------------------------------------------
+
+static bool s_LooksLikeNucSeqData(const string& line, size_t minLength=10) {
+ if (line.size()<minLength) {
+ return false;
+ }
+
+ int nucCount=0;
+ for (auto c : line) {
+ if (isalpha(c)) {
+ auto index = static_cast<int>(c);
+ if (symbol_type_table[index] & fDNA_Main_Alphabet) {
+ ++nucCount;
+ }
+ continue;
+ }
+
+ if (!isspace(c)) {
+ return false;
+ }
+ }
+
+ return (nucCount/line.size() > 0.9);
+}
+
+
// ----------------------------------------------------------------------------
bool
CFormatGuess::TestFormatPhrapAce(
return false;
}
- ITERATE( list<string>, it, m_TestLines ) {
- if ( IsLinePhrapId( *it ) ) {
- return true;
+ if (memchr(m_pTestBuffer, 0, m_iTestDataSize)) { // Cannot contain NuLL bytes
+ return false; // RW-1102
+ }
+
+ bool foundId = false;
+ for (const auto& line : m_TestLines) {
+ if (foundId) {
+ if (s_LooksLikeNucSeqData(line)) {
+ return true;
+ }
+ }
+ else if (IsLinePhrapId(line)) {
+ foundId = true;
}
}
return false;
return (uPslLineCount != 0);
}
+// ----------------------------------------------------------------------------
+bool
+GenbankGetKeywordLine(
+ list<string>::iterator& lineIt,
+ list<string>::iterator endIt,
+ string& keyword,
+ string& data)
+// ----------------------------------------------------------------------------
+{
+ if (lineIt == endIt) {
+ return false;
+ }
+ if (lineIt->size() > 79) {
+ return false;
+ }
+
+ vector<int> validIndents = {0, 2, 3, 5, 12, 21};
+ auto firstNotBlank = lineIt->find_first_not_of(" ");
+ while (firstNotBlank != 0) {
+ if (std::find(validIndents.begin(), validIndents.end(), firstNotBlank) ==
+ validIndents.end()) {
+ auto firstNotBlankOrDigit = lineIt->find_first_not_of(" 1234567890");
+ if (firstNotBlankOrDigit != 10) {
+ return false;
+ }
+ }
+ lineIt++;
+ if (lineIt == endIt) {
+ return false;
+ }
+ firstNotBlank = lineIt->find_first_not_of(" ");
+ }
+ try {
+ NStr::SplitInTwo(
+ *lineIt, " ", keyword, data, NStr::fSplit_MergeDelimiters);
+ }
+ catch (CException&) {
+ return false;
+ }
+ lineIt++;
+ return true;
+}
+
+// ----------------------------------------------------------------------------
+bool CFormatGuess::TestFormatFlatFileGenbank(
+ EMode /*unused*/)
+{
+ // see ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
+
+ if ( ! EnsureStats() || ! EnsureSplitLines() ) {
+ return false;
+ }
+
+ // smell test:
+ // note: sample size at least 8000 characters, line length soft limited to
+ // 80 characters
+ if (m_TestLines.size() < 9) { // number of required records
+ return false;
+ }
+
+ string keyword, data, lookingFor;
+ auto recordIt = m_TestLines.begin();
+ auto endIt = m_TestLines.end();
+ NStr::SplitInTwo(
+ *recordIt, " ", keyword, data, NStr::fSplit_MergeDelimiters);
+
+ lookingFor = "LOCUS"; // excactly one
+ if (keyword != lookingFor) {
+ return false;
+ }
+ recordIt++;
+ if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+ return false;
+ }
+
+ lookingFor = "DEFINITION"; // one or more
+ if (keyword != lookingFor) {
+ return false;
+ }
+ while (keyword == lookingFor) {
+ if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+ return false;
+ }
+ }
+
+ lookingFor = "ACCESSION"; // one or more
+ if (keyword != lookingFor) {
+ return false;
+ }
+ while (keyword == lookingFor) {
+ if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+ return false;
+ }
+ }
+
+ bool nidSeen = false;
+ lookingFor = "NID"; // zero or one, can come before or after VERSION
+ if (keyword == lookingFor) {
+ nidSeen = true;
+ if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+ return false;
+ }
+ }
+
+ lookingFor = "VERSION"; // exactly one
+ if (keyword != lookingFor) {
+ return false;
+ }
+ if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+ return false;
+ }
+
+ if (!nidSeen) {
+ lookingFor = "NID"; // zero or one
+ if (keyword == lookingFor) {
+ if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+ return false;
+ }
+ }
+ }
+
+ lookingFor = "PROJECT"; // zero or more
+ while (keyword == lookingFor) {
+ if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+ return false;
+ }
+ }
+
+ lookingFor = "DBLINK"; // zero or more
+ while (keyword == lookingFor) {
+ if (!GenbankGetKeywordLine(recordIt, endIt, keyword, data)) {
+ return false;
+ }
+ }
+
+ lookingFor = "KEYWORDS"; // one or more
+ if (keyword != lookingFor) {
+ return false;
+ }
+
+ // I am convinced now. There may be flaws farther down but this input
+ // definitely wants to be a Genbank flat file.
+ return true;
+}
+
+// ----------------------------------------------------------------------------
+bool
+EnaGetLineData(
+ list<string>::iterator& lineIt,
+ list<string>::iterator endIt,
+ string& lineCode,
+ string& lineData)
+// ----------------------------------------------------------------------------
+{
+ while (lineIt != endIt && NStr::StartsWith(*lineIt, "XX")) {
+ lineIt++;
+ }
+ if (lineIt == endIt) {
+ return false;
+ }
+ try {
+ NStr::SplitInTwo(
+ *lineIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters);
+ }
+ catch(CException&) {
+ lineCode = *lineIt;
+ lineData = "";
+ }
+ lineIt++;
+ return true;
+}
+
+// ----------------------------------------------------------------------------
+bool CFormatGuess::TestFormatFlatFileEna(
+ EMode /*unused*/)
+{
+ // see: ftp://ftp.ebi.ac.uk/pub/databases/ena/sequence/release/doc/usrman.txt
+
+ if ( ! EnsureStats() || ! EnsureSplitLines() ) {
+ return false;
+ }
+
+ // smell test:
+ // note: sample size at least 8000 characters, line length soft limited to
+ // 78 characters
+ if (m_TestLines.size() < 19) { // number of required records
+ return false;
+ }
+
+ string lineCode, lineData, lookingFor;
+ auto recordIt = m_TestLines.begin();
+ auto endIt = m_TestLines.end();
+ NStr::SplitInTwo(
+ *recordIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters);
+
+ lookingFor = "ID"; // excactly one
+ if (lineCode != lookingFor) {
+ return false;
+ }
+ recordIt++;
+
+ lookingFor = "AC"; // one or more
+ if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return false;
+ }
+ if (lineCode != lookingFor) {
+ return false;
+ }
+ while (lineCode == lookingFor) {
+ if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return false;
+ }
+ }
+
+ lookingFor = "PR"; // zero or more
+ while (lineCode == lookingFor) {
+ if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return false;
+ }
+ }
+
+ lookingFor = "DT"; // two (first hard difference from UniProt)
+ for (int i = 0; i < 2; ++i) {
+ if (lineCode != lookingFor) {
+ return false;
+ }
+ if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return false;
+ }
+ }
+
+ lookingFor = "DE"; // one or more
+ if (lineCode != lookingFor) {
+ return false;
+ }
+ while (lineCode == lookingFor) {
+ if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return true;
+ }
+ }
+
+ lookingFor = "KW"; // one or more
+ if (lineCode != lookingFor) {
+ return false;
+ }
+ while (lineCode == lookingFor) {
+ if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return true;
+ }
+ }
+
+ lookingFor = "OS"; // one or more
+ if (lineCode != lookingFor) {
+ return false;
+ }
+ while (lineCode == lookingFor) {
+ if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return true;
+ }
+ }
+
+ lookingFor = "OC"; // one or more
+ if (lineCode != lookingFor) {
+ return false;
+ }
+ while (lineCode == lookingFor) {
+ if (!EnaGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return true;
+ }
+ }
+
+ // once here it's Ena or someone is messing with me
+ return true;
+}
+
+// ----------------------------------------------------------------------------
+bool
+UniProtGetLineData(
+ list<string>::iterator& lineIt,
+ list<string>::iterator endIt,
+ string& lineCode,
+ string& lineData)
+// ----------------------------------------------------------------------------
+{
+ if (lineIt == endIt) {
+ return false;
+ }
+ try {
+ NStr::SplitInTwo(
+ *lineIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters);
+ }
+ catch(CException&) {
+ lineCode = *lineIt;
+ lineData = "";
+ }
+ lineIt++;
+ return true;
+}
+
+// ----------------------------------------------------------------------------
+bool CFormatGuess::TestFormatFlatFileUniProt(
+ EMode /*unused*/)
+{
+ // see: https://web.expasy.org/docs/userman.html#genstruc
+
+ if ( ! EnsureStats() || ! EnsureSplitLines() ) {
+ return false;
+ }
+
+ // smell test:
+ // note: sample size at least 8000 characters, line length soft limited to
+ // 75 characters
+ if (m_TestLines.size() < 15) { // number of required records
+ return false;
+ }
+
+ // note:
+ // we are only trying to assert that the input is *meant* to be uniprot.
+ // we should not be in the business of validation - this should happen
+ // downstream, with better error messages than we could possibly provide here.
+ string lineCode, lineData, lookingFor;
+ auto recordIt = m_TestLines.begin();
+ auto endIt = m_TestLines.end();
+ NStr::SplitInTwo(
+ *recordIt, " ", lineCode, lineData, NStr::fSplit_MergeDelimiters);
+
+ lookingFor = "ID"; // excatly one
+ if (lineCode != lookingFor) {
+ return false;
+ }
+ recordIt++;
+
+ lookingFor = "AC"; // one or more
+ if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return false;
+ }
+ if (lineCode != lookingFor) {
+ return false;
+ }
+ while (lineCode == lookingFor) {
+ if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return false;
+ }
+ }
+
+ lookingFor = "DT"; // three (first hard difference from UniProt)
+ for (int i = 0; i < 3; ++i) {
+ if (lineCode != lookingFor) {
+ return false;
+ }
+ if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return false;
+ }
+ }
+
+
+ lookingFor = "DE"; // one or more
+ if (lineCode != lookingFor) {
+ return false;
+ }
+ while (lineCode == lookingFor) {
+ if (!UniProtGetLineData(recordIt, endIt, lineCode, lineData)) {
+ return true;
+ }
+ }
+
+ // optional "GN" line or first "OS" line
+ if (lineCode != "GN" && lineCode != "OS") {
+ return false;
+ }
+
+ // once here it's UniProt or someone is messing with me
+ return true;
+}
+
// ----------------------------------------------------------------------------
bool CFormatGuess::TestFormatVcf(
EMode)