From 39547d166536d044cc7f46c4c443e75f84d1f1e8 Mon Sep 17 00:00:00 2001 From: Andreas Tille Date: Wed, 4 Jul 2018 23:40:50 +0200 Subject: [PATCH] Try to take over patches from libSeqLib which needs separate bfc.h Last-Update: Thu, 02 Feb 2017 10:57:56 +0100 FIXME: Please check the FIXME at the end. I think I did things wrong to put a static declaration into header file but I had no better idea to make ec1buf_init known in libSeqLib Gbp-Pq: Name bcf_seqlib.patch --- bfc.c | 134 +++++--------------------------------------------- bfc.h | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 121 deletions(-) create mode 100644 bfc.h diff --git a/bfc.c b/bfc.c index ea74523..084467d 100644 --- a/bfc.c +++ b/bfc.c @@ -7,24 +7,12 @@ #include "kmer.h" #include "internal.h" #include "fml.h" +#include "bfc.h" /******************* *** BFC options *** *******************/ -typedef struct { - int n_threads, q, k, l_pre; - int min_cov; // a k-mer is considered solid if the count is no less than this - - int max_end_ext; - int win_multi_ec; - float min_trim_frac; - - // these ec options cannot be changed on the command line - int w_ec, w_ec_high, w_absent, w_absent_high; - int max_path_diff, max_heap; -} bfc_opt_t; - void bfc_opt_init(bfc_opt_t *opt) { memset(opt, 0, sizeof(bfc_opt_t)); @@ -46,26 +34,6 @@ void bfc_opt_init(bfc_opt_t *opt) opt->max_heap = 100; } -/********************** - *** K-mer counting *** - **********************/ - -#define CNT_BUF_SIZE 256 - -typedef struct { // cache to reduce locking - uint64_t y[2]; - int is_high; -} insbuf_t; - -typedef struct { - int k, q; - int n_seqs; - const fml_seq1_t *seqs; - bfc_ch_t *ch; - int *n_buf; - insbuf_t **buf; -} cnt_step_t; - bfc_kmer_t bfc_kmer_null = {{0,0,0,0}}; static int bfc_kmer_bufclear(cnt_step_t *cs, int forced, int tid) @@ -127,34 +95,6 @@ struct bfc_ch_s *fml_count(int n, const fml_seq1_t *seq, int k, int q, int l_pre return cs.ch; } -/*************** - *** Correct *** - ***************/ - -#define BFC_MAX_KMER 63 -#define BFC_MAX_BF_SHIFT 37 - -#define BFC_MAX_PATHS 4 -#define BFC_EC_HIST 5 -#define BFC_EC_HIST_HIGH 2 - -#define BFC_EC_MIN_COV_COEF .1 - -/************************** - * Sequence struct for ec * - **************************/ - -#include "kvec.h" - -typedef struct { // NOTE: unaligned memory - uint8_t b:3, q:1, ob:3, oq:1; - uint8_t dummy; - uint16_t lcov:6, hcov:6, solid_end:1, high_end:1, ec:1, absent:1; - int i; -} ecbase_t; - -typedef kvec_t(ecbase_t) ecseq_t; - static int bfc_seq_conv(const char *s, const char *q, int qthres, ecseq_t *seq) { int i, l; @@ -264,53 +204,6 @@ uint64_t bfc_ec_best_island(int k, const ecseq_t *s) return max > 0? (uint64_t)(max_i - max - k + 1) << 32 | max_i : 0; } -/******************** - * Correct one read * - ********************/ - -#include "ksort.h" - -#define ECCODE_MISC 1 -#define ECCODE_MANY_N 2 -#define ECCODE_NO_SOLID 3 -#define ECCODE_UNCORR_N 4 -#define ECCODE_MANY_FAIL 5 - -typedef struct { - uint32_t ec_code:3, brute:1, n_ec:14, n_ec_high:14; - uint32_t n_absent:24, max_heap:8; -} ecstat_t; - -typedef struct { - uint8_t ec:1, ec_high:1, absent:1, absent_high:1, b:4; -} bfc_penalty_t; - -typedef struct { - int tot_pen; - int i; // base position - int k; // position in the stack - int32_t ecpos_high[BFC_EC_HIST_HIGH]; - int32_t ecpos[BFC_EC_HIST]; - bfc_kmer_t x; -} echeap1_t; - -typedef struct { - int parent, i, tot_pen; - uint8_t b; - bfc_penalty_t pen; - uint16_t cnt; -} ecstack1_t; - -typedef struct { - const bfc_opt_t *opt; - const bfc_ch_t *ch; - kvec_t(echeap1_t) heap; - kvec_t(ecstack1_t) stack; - ecseq_t seq, tmp, ec[2]; - int mode; - ecstat_t ori_st; -} bfc_ec1buf_t; - #define heap_lt(a, b) ((a).tot_pen > (b).tot_pen) KSORT_INIT(ec, echeap1_t, heap_lt) @@ -567,19 +460,6 @@ ecstat_t bfc_ec1(bfc_ec1buf_t *e, char *seq, char *qual) return s; } -/******************** - * Error correction * - ********************/ - -typedef struct { - const bfc_opt_t *opt; - const bfc_ch_t *ch; - bfc_ec1buf_t **e; - int64_t n_processed; - int n_seqs, flt_uniq; - fml_seq1_t *seqs; -} ec_step_t; - static uint64_t max_streak(int k, const bfc_ch_t *ch, const fml_seq1_t *s) { int i, l; @@ -663,6 +543,18 @@ float fml_correct_core(const fml_opt_t *opt, int flt_uniq, int n, fml_seq1_t *se return kcov; } +// Added by jwala for use in libSeqLib +void kmer_correct(ec_step_t * es, int mode, bfc_ch_t * ch) { + int i = 0; + es->e = (bfc_ec1buf_t**)calloc(es->opt->n_threads, sizeof(void*)); //jwala added cast + for (i = 0; i < es->opt->n_threads; ++i) + es->e[i] = ec1buf_init(es->opt, ch), es->e[i]->mode = mode; + kt_for(es->opt->n_threads, worker_ec, es, es->n_seqs); + for (i = 0; i < es->opt->n_threads; ++i) + ec1buf_destroy(es->e[i]); + free(es->e); +} + float fml_correct(const fml_opt_t *opt, int n, fml_seq1_t *seq) { return fml_correct_core(opt, 0, n, seq); diff --git a/bfc.h b/bfc.h new file mode 100644 index 0000000..05c2db3 --- /dev/null +++ b/bfc.h @@ -0,0 +1,153 @@ +#ifndef AC_BFC_H__ +#define AC_BFC_H__ + +#include +#include +#include +#include +#include +#include "htab.h" +/* #include "kmer.h" ... this is actually included by htab.h */ +#include "internal.h" +#include "fml.h" +#include "khash.h" + +/* Andreas Tille : Its not clear where jwala took this from and what its purpose might be - commenting out for the moment +#define _cnt_eq(a, b) ((a)>>14 == (b)>>14) +#define _cnt_hash(a) ((a)>>14) +KHASH_INIT(cnt, uint64_t, char, 0, _cnt_hash, _cnt_eq) +typedef khash_t(cnt) cnthash_t; + +struct bfc_ch_s { + int k; + cnthash_t **h; + // private + int l_pre; +}; +*/ + +typedef struct { + int n_threads, q, k, l_pre; + int min_cov; // a k-mer is considered solid if the count is no less than this + + int max_end_ext; + int win_multi_ec; + float min_trim_frac; + + // these ec options cannot be changed on the command line + int w_ec, w_ec_high, w_absent, w_absent_high; + int max_path_diff, max_heap; +} bfc_opt_t; + +/********************** + *** K-mer counting *** + **********************/ + +#define CNT_BUF_SIZE 256 + +typedef struct { // cache to reduce locking + uint64_t y[2]; + int is_high; +} insbuf_t; + +typedef struct { + int k, q; + int n_seqs; + const fml_seq1_t *seqs; + bfc_ch_t *ch; + int *n_buf; + insbuf_t **buf; +} cnt_step_t; + +/*************** + *** Correct *** + ***************/ + +#define BFC_MAX_KMER 63 +#define BFC_MAX_BF_SHIFT 37 + +#define BFC_MAX_PATHS 4 +#define BFC_EC_HIST 5 +#define BFC_EC_HIST_HIGH 2 + +#define BFC_EC_MIN_COV_COEF .1 + +/************************** + * Sequence struct for ec * + **************************/ + +#include "kvec.h" + +typedef struct { // NOTE: unaligned memory + uint8_t b:3, q:1, ob:3, oq:1; + uint8_t dummy; + uint16_t lcov:6, hcov:6, solid_end:1, high_end:1, ec:1, absent:1; + int i; +} ecbase_t; + +typedef kvec_t(ecbase_t) ecseq_t; + +/******************** + * Correct one read * + ********************/ + +#include "ksort.h" + +#define ECCODE_MISC 1 +#define ECCODE_MANY_N 2 +#define ECCODE_NO_SOLID 3 +#define ECCODE_UNCORR_N 4 +#define ECCODE_MANY_FAIL 5 + +typedef struct { + uint32_t ec_code:3, brute:1, n_ec:14, n_ec_high:14; + uint32_t n_absent:24, max_heap:8; +} ecstat_t; + +typedef struct { + uint8_t ec:1, ec_high:1, absent:1, absent_high:1, b:4; +} bfc_penalty_t; + +typedef struct { + int tot_pen; + int i; // base position + int k; // position in the stack + int32_t ecpos_high[BFC_EC_HIST_HIGH]; + int32_t ecpos[BFC_EC_HIST]; + bfc_kmer_t x; +} echeap1_t; + +typedef struct { + int parent, i, tot_pen; + uint8_t b; + bfc_penalty_t pen; + uint16_t cnt; +} ecstack1_t; + +typedef struct { + const bfc_opt_t *opt; + const bfc_ch_t *ch; + kvec_t(echeap1_t) heap; + kvec_t(ecstack1_t) stack; + ecseq_t seq, tmp, ec[2]; + int mode; + ecstat_t ori_st; +} bfc_ec1buf_t; + +/******************** + * Error correction * + ********************/ + +typedef struct { + const bfc_opt_t *opt; + const bfc_ch_t *ch; + bfc_ec1buf_t **e; + int64_t n_processed; + int n_seqs, flt_uniq; + fml_seq1_t *seqs; +} ec_step_t; + +void kmer_correct(ec_step_t * es, int mode, bfc_ch_t * ch); +void bfc_opt_init(bfc_opt_t *opt); + +#endif -- 2.30.2