Imported Upstream version 0.9.1+ds

author Afif Elghraoui <afif@ghraoui.name>

Sun, 19 Jun 2016 21:17:37 +0000 (14:17 -0700)

committer Afif Elghraoui <afif@ghraoui.name>

Sun, 19 Jun 2016 21:17:37 +0000 (14:17 -0700)
author Afif Elghraoui <afif@ghraoui.name>
Sun, 19 Jun 2016 21:17:37 +0000 (14:17 -0700)
committer Afif Elghraoui <afif@ghraoui.name>
Sun, 19 Jun 2016 21:17:37 +0000 (14:17 -0700)
diff --git a/INSTALL b/INSTALL

index 30fe77097e52688b68304f6868db642a24ee1f7d..5ddff7f649075536a36a8e547687c35e41396bf2 100644 (file)
--- a/INSTALL
+++ b/INSTALL
@@ -15,7 +15,7 @@ manually modifying one line in Makefile.
  
  curl
  
-Pysam requires Python (2.6 or greater) and Cython (0.22 or greater).
+Pysam requires Python (2.7 or greater) and Cython (0.22 or greater).
  It has not been tested on many other platforms.
  
  Compilation
diff --git a/bcftools/bcftools.h b/bcftools/bcftools.h

index 6f22272b992f7a51af6bcd1b01a18e388a2935b3..d4e856da73b61be66cf5c5bb59387a54220ee5e1 100644 (file)
--- a/bcftools/bcftools.h
+++ b/bcftools/bcftools.h
@@ -26,6 +26,7 @@ THE SOFTWARE.  */
  #define BCFTOOLS_H
  
  #include <stdarg.h>
+#include <htslib/hts_defs.h>
  #include <htslib/vcf.h>
  #include <math.h>
  
@@ -37,7 +38,7 @@ THE SOFTWARE.  */
  #define FT_STDIN (1<<3)
  
  char *bcftools_version(void);
-void error(const char *format, ...);
+void error(const char *format, ...) HTS_NORETURN;
  void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *cmd);
  const char *hts_bcf_wmode(int file_type);
  
diff --git a/bcftools/consensus.c b/bcftools/consensus.c

index 7a615fef0e100fee6012fea89f1551c2da221238..051f3536cb6b2c0e08a81d31ef7516dd97a11dd8 100644 (file)
--- a/bcftools/consensus.c
+++ b/bcftools/consensus.c
@@ -623,7 +623,7 @@ int main_consensus(int argc, char *argv[])
          {"chain",1,0,'c'},
          {0,0,0,0}
      };
-    char c;
+    int c;
      while ((c = getopt_long(argc, argv, "h?s:1iH:f:o:m:c:",loptions,NULL)) >= 0) 
      {
          switch (c) 
diff --git a/bcftools/consensus.c.pysam.c b/bcftools/consensus.c.pysam.c

index 7765d6b2290e30829f30167d9c0621722d3f2b34..91aa5ae0dffc68e37fb816d0a5448a8b41d400c2 100644 (file)
--- a/bcftools/consensus.c.pysam.c
+++ b/bcftools/consensus.c.pysam.c
@@ -87,7 +87,7 @@ args_t;
  
  static chain_t* init_chain(chain_t *chain, int ref_ori_pos)
  {
-//     fprintf(pysamerr, "init_chain(*chain, ref_ori_pos=%d)\n", ref_ori_pos);
+//     fprintf(pysam_stderr, "init_chain(*chain, ref_ori_pos=%d)\n", ref_ori_pos);
      chain = (chain_t*) calloc(1,sizeof(chain_t));
      chain->num = 0;
      chain->block_lengths = NULL;
@@ -157,7 +157,7 @@ static void print_chain(args_t *args)
  
  static void push_chain_gap(chain_t *chain, int ref_start, int ref_len, int alt_start, int alt_len)
  {
-//     fprintf(pysamerr, "push_chain_gap(*chain, ref_start=%d, ref_len=%d, alt_start=%d, alt_len=%d)\n", ref_start, ref_len, alt_start, alt_len);
+//     fprintf(pysam_stderr, "push_chain_gap(*chain, ref_start=%d, ref_len=%d, alt_start=%d, alt_len=%d)\n", ref_start, ref_len, alt_start, alt_len);
      int num = chain->num;
  
      if (ref_start <= chain->ref_last_block_ori) {
@@ -218,7 +218,7 @@ static void init_data(args_t *args)
          args->fp_out = fopen(args->output_fname,"w");
          if ( ! args->fp_out ) error("Failed to create %s: %s\n", args->output_fname, strerror(errno));
      }
-    else args->fp_out = stdout;
+    else args->fp_out = pysam_stdout;
  }
  
  static void destroy_data(args_t *args)
@@ -257,7 +257,7 @@ static void init_region(args_t *args, char *line)
          }
      }
      args->rid = bcf_hdr_name2id(args->hdr,line);
-    if ( args->rid<0 ) fprintf(pysamerr,"Warning: Sequence \"%s\" not in %s\n", line,args->fname);
+    if ( args->rid<0 ) fprintf(pysam_stderr,"Warning: Sequence \"%s\" not in %s\n", line,args->fname);
      args->fa_buf.l = 0;
      args->fa_length = 0;
      args->fa_end_pos = to;
@@ -342,7 +342,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
  
      if ( rec->pos <= args->fa_frz_pos )
      {
-        fprintf(pysamerr,"The site %s:%d overlaps with another variant, skipping...\n", bcf_seqname(args->hdr,rec),rec->pos+1);
+        fprintf(pysam_stderr,"The site %s:%d overlaps with another variant, skipping...\n", bcf_seqname(args->hdr,rec),rec->pos+1);
          return;
      }
      if ( args->mask )
@@ -428,7 +428,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
      }
      else if ( strncasecmp(rec->d.allele[0],args->fa_buf.s+idx,rec->rlen) )
      {
-        // fprintf(pysamerr,"%d .. [%s], idx=%d ori=%d off=%d\n",args->fa_ori_pos,args->fa_buf.s,idx,args->fa_ori_pos,args->fa_mod_off);
+        // fprintf(pysam_stderr,"%d .. [%s], idx=%d ori=%d off=%d\n",args->fa_ori_pos,args->fa_buf.s,idx,args->fa_ori_pos,args->fa_mod_off);
          char tmp = 0;
          if ( args->fa_buf.l - idx > rec->rlen ) 
          { 
@@ -589,23 +589,23 @@ static void consensus(args_t *args)
  
  static void usage(args_t *args)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Create consensus sequence by applying VCF variants to a reference\n");
-    fprintf(pysamerr, "         fasta file.\n");
-    fprintf(pysamerr, "Usage:   bcftools consensus [OPTIONS] <file.vcf>\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "    -f, --fasta-ref <file>     reference sequence in fasta format\n");
-    fprintf(pysamerr, "    -H, --haplotype <1|2>      apply variants for the given haplotype\n");
-    fprintf(pysamerr, "    -i, --iupac-codes          output variants in the form of IUPAC ambiguity codes\n");
-    fprintf(pysamerr, "    -m, --mask <file>          replace regions with N\n");
-    fprintf(pysamerr, "    -o, --output <file>        write output to a file [standard output]\n");
-    fprintf(pysamerr, "    -c, --chain <file>         write a chain file for liftover\n");
-    fprintf(pysamerr, "    -s, --sample <name>        apply variants of the given sample\n");
-    fprintf(pysamerr, "Examples:\n");
-    fprintf(pysamerr, "   # Get the consensus for one region. The fasta header lines are then expected\n");
-    fprintf(pysamerr, "   # in the form \">chr:from-to\".\n");
-    fprintf(pysamerr, "   samtools faidx ref.fa 8:11870-11890 | bcftools consensus in.vcf.gz > out.fa\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Create consensus sequence by applying VCF variants to a reference\n");
+    fprintf(pysam_stderr, "         fasta file.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools consensus [OPTIONS] <file.vcf>\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "    -f, --fasta-ref <file>     reference sequence in fasta format\n");
+    fprintf(pysam_stderr, "    -H, --haplotype <1|2>      apply variants for the given haplotype\n");
+    fprintf(pysam_stderr, "    -i, --iupac-codes          output variants in the form of IUPAC ambiguity codes\n");
+    fprintf(pysam_stderr, "    -m, --mask <file>          replace regions with N\n");
+    fprintf(pysam_stderr, "    -o, --output <file>        write output to a file [standard output]\n");
+    fprintf(pysam_stderr, "    -c, --chain <file>         write a chain file for liftover\n");
+    fprintf(pysam_stderr, "    -s, --sample <name>        apply variants of the given sample\n");
+    fprintf(pysam_stderr, "Examples:\n");
+    fprintf(pysam_stderr, "   # Get the consensus for one region. The fasta header lines are then expected\n");
+    fprintf(pysam_stderr, "   # in the form \">chr:from-to\".\n");
+    fprintf(pysam_stderr, "   samtools faidx ref.fa 8:11870-11890 | bcftools consensus in.vcf.gz > out.fa\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -625,7 +625,7 @@ int main_consensus(int argc, char *argv[])
          {"chain",1,0,'c'},
          {0,0,0,0}
      };
-    char c;
+    int c;
      while ((c = getopt_long(argc, argv, "h?s:1iH:f:o:m:c:",loptions,NULL)) >= 0) 
      {
          switch (c) 
diff --git a/bcftools/convert.c.pysam.c b/bcftools/convert.c.pysam.c

index ee27882a5f52fc8ebd5e6f37d07b4979e1fe694e..084ef50ca96f3c2d30bf94030b37e6683e3665ea 100644 (file)
--- a/bcftools/convert.c.pysam.c
+++ b/bcftools/convert.c.pysam.c
@@ -197,7 +197,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
              case BCF_BT_INT32: if ( info->v1.i==bcf_int32_missing ) kputc('.', str); else kputw(info->v1.i, str); break;
              case BCF_BT_FLOAT: if ( bcf_float_is_missing(info->v1.f) ) kputc('.', str); else ksprintf(str, "%g", info->v1.f); break;
              case BCF_BT_CHAR:  kputc(info->v1.i, str); break;
-            default: fprintf(pysamerr,"todo: type %d\n", info->type); exit(1); break;
+            default: fprintf(pysam_stderr,"todo: type %d\n", info->type); exit(1); break;
          }
      }
      else if ( fmt->subscript >=0 )
@@ -218,7 +218,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
              case BCF_BT_INT16: BRANCH(int16_t, val==bcf_int16_missing, val==bcf_int16_vector_end, kputw(val, str)); break;
              case BCF_BT_INT32: BRANCH(int32_t, val==bcf_int32_missing, val==bcf_int32_vector_end, kputw(val, str)); break;
              case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(val), bcf_float_is_vector_end(val), ksprintf(str, "%g", val)); break;
-            default: fprintf(pysamerr,"todo: type %d\n", info->type); exit(1); break;
+            default: fprintf(pysam_stderr,"todo: type %d\n", info->type); exit(1); break;
          }
          #undef BRANCH
      }
@@ -730,7 +730,7 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
              else if ( id>=0 && bcf_hdr_idinfo_exists(convert->header,BCF_HL_INFO,id) )
              {
                  fmt->type = T_INFO;
-                fprintf(pysamerr,"Warning: Assuming INFO/%s\n", key);
+                fprintf(pysam_stderr,"Warning: Assuming INFO/%s\n", key);
              }
          }
      }
@@ -896,7 +896,7 @@ convert_t *convert_init(bcf_hdr_t *hdr, int *samples, int nsamples, const char *
      char *p = convert->format_str;
      while ( *p )
      {
-        //fprintf(pysamerr,"<%s>\n", p);
+        //fprintf(pysam_stderr,"<%s>\n", p);
          switch (*p)
          {
              case '[': is_gtf = 1; p++; break;
diff --git a/bcftools/em.c.pysam.c b/bcftools/em.c.pysam.c

index 758d919ec269eb9dca9f0ffd7e1696145597f537..81091528ee1c15b6912903fe94ed8a3ea518fd6b 100644 (file)
--- a/bcftools/em.c.pysam.c
+++ b/bcftools/em.c.pysam.c
@@ -74,7 +74,7 @@ static double prob1(double f, void *data)
      minaux1_t *a = (minaux1_t*)data;
      double p = 1., l = 0., f3[3];
      int i;
-//  printf("brent %lg\n", f);
+//  fprintf(pysam_stdout, "brent %lg\n", f);
      if (f < 0 || f > 1) return 1e300;
      f3[0] = (1.-f)*(1.-f); f3[1] = 2.*f*(1.-f); f3[2] = f*f;
      for (i = a->beg; i < a->end; ++i) {
@@ -90,7 +90,7 @@ static double freq_iter(double *f, const double *_pdg, int beg, int end)
  {
      double f0 = *f, f3[3], err;
      int i;
-//  printf("em %lg\n", *f);
+//  fprintf(pysam_stdout, "em %lg\n", *f);
      f3[0] = (1.-f0)*(1.-f0); f3[1] = 2.*f0*(1.-f0); f3[2] = f0*f0;
      for (i = beg, f0 = 0.; i < end; ++i) {
          const double *pdg = _pdg + i * 3;
@@ -128,7 +128,7 @@ static double g3_iter(double g[3], const double *_pdg, int beg, int end)
      double err, gg[3];
      int i;
      gg[0] = gg[1] = gg[2] = 0.;
-//  printf("%lg,%lg,%lg\n", g[0], g[1], g[2]);
+//  fprintf(pysam_stdout, "%lg,%lg,%lg\n", g[0], g[1], g[2]);
      for (i = beg; i < end; ++i) {
          double sum, tmp[3];
          const double *pdg = _pdg + i * 3;
@@ -237,7 +237,7 @@ static int pair_freq_iter(int n, double *pdg[2], double f[4])
  {
      double ff[4];
      int i, k, h;
-//  printf("%lf,%lf,%lf,%lf\n", f[0], f[1], f[2], f[3]);
+//  fprintf(pysam_stdout, "%lf,%lf,%lf,%lf\n", f[0], f[1], f[2], f[3]);
      memset(ff, 0, 4 * sizeof(double));
      for (i = 0; i < n; ++i) {
          double *p[2], sum, tmp;
diff --git a/bcftools/filter.c.pysam.c b/bcftools/filter.c.pysam.c

index 7520106dc59c60c5907f3359e14164fa93d52869..531339e22e420bf4b3605c15b7db0910a822c144 100644 (file)
--- a/bcftools/filter.c.pysam.c
+++ b/bcftools/filter.c.pysam.c
@@ -360,7 +360,7 @@ static int bcf_get_info_value(bcf1_t *line, int info_id, int ivec, void *value)
          case BCF_BT_INT16: BRANCH(int16_t, p[j]==bcf_int16_missing, p[j]==bcf_int16_vector_end, int); break;
          case BCF_BT_INT32: BRANCH(int32_t, p[j]==bcf_int32_missing, p[j]==bcf_int32_vector_end, int); break;
          case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(p[j]), bcf_float_is_vector_end(p[j]), float); break;
-        default: fprintf(pysamerr,"todo: type %d\n", info->type); exit(1); break;
+        default: fprintf(pysam_stderr,"todo: type %d\n", info->type); exit(1); break;
      }
      #undef BRANCH
      return -1;  // this shouldn't happen
@@ -586,7 +586,7 @@ gt_length_too_big:
              case BCF_BT_INT8:  BRANCH(int8_t); break;
              case BCF_BT_INT16: BRANCH(int16_t); break;
              case BCF_BT_INT32: BRANCH(int32_t); break;
-            default: fprintf(pysamerr,"FIXME: type %d in bcf_format_gt?\n", fmt->type); abort(); break;
+            default: fprintf(pysam_stderr,"FIXME: type %d in bcf_format_gt?\n", fmt->type); abort(); break;
          }
          #undef BRANCH
  
@@ -1045,7 +1045,7 @@ static int vector_logic_or(token_t *atok, token_t *btok, int or_type)
          { \
              if ( (atok)->values[0] CMP_OP (btok)->values[0] ) { pass_site = 1; } \
          } \
-        /*fprintf(pysamerr,"pass=%d\n", pass_site);*/ \
+        /*fprintf(pysam_stderr,"pass=%d\n", pass_site);*/ \
          (ret) = pass_site; \
      } \
  }
@@ -1394,16 +1394,16 @@ static void filter_debug_print(token_t *toks, token_t **tok_ptrs, int ntoks)
          if ( tok->tok_type==TOK_VAL )
          {
              if ( tok->key )
-                fprintf(pysamerr,"%s", tok->key);
+                fprintf(pysam_stderr,"%s", tok->key);
              else if ( tok->tag )
-                fprintf(pysamerr,"%s", tok->tag);
+                fprintf(pysam_stderr,"%s", tok->tag);
              else
-                fprintf(pysamerr,"%e", tok->threshold);
+                fprintf(pysam_stderr,"%e", tok->threshold);
          }
          else
-            fprintf(pysamerr,"%c", TOKEN_STRING[tok->tok_type]);
-        if ( tok->setter ) fprintf(pysamerr,"\t[setter %p]", tok->setter);
-        fprintf(pysamerr,"\n");
+            fprintf(pysam_stderr,"%c", TOKEN_STRING[tok->tok_type]);
+        if ( tok->setter ) fprintf(pysam_stderr,"\t[setter %p]", tok->setter);
+        fprintf(pysam_stderr,"\n");
      }
  }
  
@@ -1427,8 +1427,8 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
          ret = filters_next_token(&tmp, &len);
          if ( ret==-1 ) error("Missing quotes in: %s\n", str);
  
-        //fprintf(pysamerr,"token=[%c] .. [%s] %d\n", TOKEN_STRING[ret], tmp, len);
-        //int i; for (i=0; i<nops; i++) fprintf(pysamerr," .%c.", TOKEN_STRING[ops[i]]); fprintf(pysamerr,"\n");
+        //fprintf(pysam_stderr,"token=[%c] .. [%s] %d\n", TOKEN_STRING[ret], tmp, len);
+        //int i; for (i=0; i<nops; i++) fprintf(pysam_stderr," .%c.", TOKEN_STRING[ops[i]]); fprintf(pysam_stderr,"\n");
  
          if ( ret==TOK_LFT )         // left bracket
          {
diff --git a/bcftools/khash_str2str.h b/bcftools/khash_str2str.h

index ecf4e0b46150334706c58a53820944468951d132..4a5bd12789196a1160eedce189f1b1506ffeb7bf 100644 (file)
--- a/bcftools/khash_str2str.h
+++ b/bcftools/khash_str2str.h
@@ -1,6 +1,6 @@
  /*  khash_str2str.h -- C-string to C-string hash table.
  
-    Copyright (C) 2014 Genome Research Ltd.
+    Copyright (C) 2014,2016 Genome Research Ltd.
  
      Author: Petr Danecek <pd3@sanger.ac.uk>
  
@@ -60,6 +60,23 @@ static inline void khash_str2str_destroy_free(void *_hash)
      kh_destroy(str2str, hash);
  }
  
+/*
+ *  Destroys the hash structure, the keys and the values
+ */
+static inline void khash_str2str_destroy_free_all(void *_hash)
+{
+    khash_t(str2str) *hash = (khash_t(str2str)*)_hash;
+    khint_t k;
+    if (hash == 0) return;
+    for (k = 0; k < kh_end(hash); ++k)
+        if (kh_exist(hash, k))
+        {
+            free((char*)kh_key(hash, k));
+            free((char*)kh_val(hash, k));
+        }
+    kh_destroy(str2str, hash);
+}
+
  /*
   *  Returns value if key exists or NULL if not
   */
diff --git a/bcftools/main.c b/bcftools/main.c

index f08b5c7e1b6d4866914a7c6820d866a10465e9b5..1892c1de59ba855cbef80d504a16df8582c18900 100644 (file)
--- a/bcftools/main.c
+++ b/bcftools/main.c
@@ -1,6 +1,6 @@
  /*  main.c -- main bcftools command front-end.
  
-    Copyright (C) 2012-2015 Genome Research Ltd.
+    Copyright (C) 2012-2016 Genome Research Ltd.
  
      Author: Petr Danecek <pd3@sanger.ac.uk>
  
@@ -219,7 +219,7 @@ int main(int argc, char *argv[])
      if (argc < 2) { usage(stderr); return 1; }
  
      if (strcmp(argv[1], "version") == 0 || strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-v") == 0) {
-        printf("bcftools %s\nUsing htslib %s\nCopyright (C) 2015 Genome Research Ltd.\n", bcftools_version(), hts_version());
+        printf("bcftools %s\nUsing htslib %s\nCopyright (C) 2016 Genome Research Ltd.\n", bcftools_version(), hts_version());
  #if USE_GPL
          printf("License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n");
  #else
diff --git a/bcftools/main.c.pysam.c b/bcftools/main.c.pysam.c

index f180e5683db6c20f22d81262fb70dcface4e7132..f578442729a1ada4592cecf6736b76a5cd89d774 100644 (file)
--- a/bcftools/main.c.pysam.c
+++ b/bcftools/main.c.pysam.c
@@ -2,7 +2,7 @@
  
  /*  main.c -- main bcftools command front-end.
  
-    Copyright (C) 2012-2015 Genome Research Ltd.
+    Copyright (C) 2012-2016 Genome Research Ltd.
  
      Author: Petr Danecek <pd3@sanger.ac.uk>
  
@@ -218,24 +218,24 @@ static void usage(FILE *fp)
  
  int bcftools_main(int argc, char *argv[])
  {
-    if (argc < 2) { usage(pysamerr); return 1; }
+    if (argc < 2) { usage(pysam_stderr); return 1; }
  
      if (strcmp(argv[1], "version") == 0 || strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-v") == 0) {
-        printf("bcftools %s\nUsing htslib %s\nCopyright (C) 2015 Genome Research Ltd.\n", bcftools_version(), hts_version());
+        fprintf(pysam_stdout, "bcftools %s\nUsing htslib %s\nCopyright (C) 2016 Genome Research Ltd.\n", bcftools_version(), hts_version());
  #if USE_GPL
-        printf("License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n");
+        fprintf(pysam_stdout, "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n");
  #else
-        printf("License Expat: The MIT/Expat license\n");
+        fprintf(pysam_stdout, "License Expat: The MIT/Expat license\n");
  #endif
-        printf("This is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n");
+        fprintf(pysam_stdout, "This is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n");
          return 0;
      }
      else if (strcmp(argv[1], "--version-only") == 0) {
-        printf("%s+htslib-%s\n", bcftools_version(), hts_version());
+        fprintf(pysam_stdout, "%s+htslib-%s\n", bcftools_version(), hts_version());
          return 0;
      }
      else if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) {
-        if (argc == 2) { usage(stdout); return 0; }
+        if (argc == 2) { usage(pysam_stdout); return 0; }
          // Otherwise change "bcftools help COMMAND [...]" to "bcftools COMMAND";
          // main_xyz() functions by convention display the subcommand's usage
          // when invoked without any arguments.
@@ -260,7 +260,7 @@ int bcftools_main(int argc, char *argv[])
          }
          i++;
      }
-    fprintf(pysamerr, "[E::%s] unrecognized command '%s'\n", __func__, argv[1]);
+    fprintf(pysam_stderr, "[E::%s] unrecognized command '%s'\n", __func__, argv[1]);
      return 1;
  }
  
diff --git a/bcftools/mcall.c.pysam.c b/bcftools/mcall.c.pysam.c

index b4c4a99c9a4f1caf321c37906ecf978fb90e23bd..29ed79976a96b72f6870787a26bd01fb40b21f9c 100644 (file)
--- a/bcftools/mcall.c.pysam.c
+++ b/bcftools/mcall.c.pysam.c
@@ -288,7 +288,7 @@ void mcall_init(call_t *call)
          call->theta *= aM;
          if ( call->theta >= 1 )
          {
-            fprintf(pysamerr,"The prior is too big (theta*aM=%.2f), going with 0.99\n", call->theta);
+            fprintf(pysam_stderr,"The prior is too big (theta*aM=%.2f), going with 0.99\n", call->theta);
              call->theta = 0.99;
          }
          call->theta = log(call->theta);
@@ -516,13 +516,13 @@ float calc_ICB(int nref, int nalt, int nhets, int ndiploid)
      double q = 2*fref*falt;                 // probability of a het, assuming HWE
      double mean = q*ndiploid;
  
-    //fprintf(pysamerr,"\np=%e N=%d k=%d  .. nref=%d nalt=%d nhets=%d ndiploid=%d\n", q,ndiploid,nhets, nref,nalt,nhets,ndiploid);
+    //fprintf(pysam_stderr,"\np=%e N=%d k=%d  .. nref=%d nalt=%d nhets=%d ndiploid=%d\n", q,ndiploid,nhets, nref,nalt,nhets,ndiploid);
  
      // Can we use normal approximation? The second condition is for performance only
      // and is not well justified.
      if ( (mean>10 && (1-q)*ndiploid>10 ) || ndiploid>200 )
      {
-        //fprintf(pysamerr,"out: mean=%e  p=%e\n", mean,exp(-0.5*(nhets-mean)*(nhets-mean)/(mean*(1-q))));
+        //fprintf(pysam_stderr,"out: mean=%e  p=%e\n", mean,exp(-0.5*(nhets-mean)*(nhets-mean)/(mean*(1-q))));
          return exp(-0.5*(nhets-mean)*(nhets-mean)/(mean*(1-q)));
      }
  
@@ -1032,12 +1032,12 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
                  if ( igt==GT_SKIP ) continue;
                  lk += gl[igt];
                  npresent++;
-                // fprintf(pysamerr," %e", gl[igt]);
+                // fprintf(pysam_stderr," %e", gl[igt]);
              }
-            // fprintf(pysamerr,"\t\t");
+            // fprintf(pysam_stderr,"\t\t");
              double Pkij = npresent==3 ? (double)2/(trio[itr]>>12) : 1;  // with missing genotypes Pkij's are different
              lk += log(1 - trio_Pm * (1 - Pkij));
-            // fprintf(pysamerr,"%d%d%d\t%e\t%.2f\n", trio[itr]>>8&0xf,trio[itr]>>4&0xf,trio[itr]&0xf, lk, Pkij);
+            // fprintf(pysam_stderr,"%d%d%d\t%e\t%.2f\n", trio[itr]>>8&0xf,trio[itr]>>4&0xf,trio[itr]&0xf, lk, Pkij);
              if ( c_lk < lk ) { c_lk = lk; c_itr = trio[itr]; }
              if ( uc_itr==trio[itr] ) uc_is_mendelian = 1;
          }
@@ -1045,10 +1045,10 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
          if ( !uc_is_mendelian )
          {
              uc_lk += log(1 - trio_Pm);
-            // fprintf(pysamerr,"c_lk=%e uc_lk=%e c_itr=%d%d%d uc_itr=%d%d%d\n", c_lk,uc_lk,c_itr>>8&0xf,c_itr>>4&0xf,c_itr&0xf,uc_itr>>8&0xf,uc_itr>>4&0xf,uc_itr&0xf);
+            // fprintf(pysam_stderr,"c_lk=%e uc_lk=%e c_itr=%d%d%d uc_itr=%d%d%d\n", c_lk,uc_lk,c_itr>>8&0xf,c_itr>>4&0xf,c_itr&0xf,uc_itr>>8&0xf,uc_itr>>4&0xf,uc_itr&0xf);
              if ( c_lk < uc_lk ) { c_lk = uc_lk; c_itr = uc_itr; }
          }
-        // fprintf(pysamerr,"best_lk=%e best_itr=%d%d%d uc_itr=%d%d%d\n", c_lk,c_itr>>8&0xf,c_itr>>4&0xf,c_itr&0xf,uc_itr>>8&0xf,uc_itr>>4&0xf,uc_itr&0xf);
+        // fprintf(pysam_stderr,"best_lk=%e best_itr=%d%d%d uc_itr=%d%d%d\n", c_lk,c_itr>>8&0xf,c_itr>>4&0xf,c_itr&0xf,uc_itr>>8&0xf,uc_itr>>4&0xf,uc_itr&0xf);
  
          // Set genotypes for father, mother, child and calculate genotype qualities
          for (i=0; i<3; i++)
@@ -1429,7 +1429,7 @@ int mcall(call_t *call, bcf1_t *rec)
      int out_als, nout;
      if ( nals > 8*sizeof(out_als) )
      { 
-        fprintf(pysamerr,"Too many alleles at %s:%d, skipping.\n", bcf_seqname(call->hdr,rec),rec->pos+1); 
+        fprintf(pysam_stderr,"Too many alleles at %s:%d, skipping.\n", bcf_seqname(call->hdr,rec),rec->pos+1); 
          return 0; 
      }
      nout = mcall_find_best_alleles(call, nals, &out_als);
@@ -1473,7 +1473,7 @@ int mcall(call_t *call, bcf1_t *rec)
          {
              if ( nout>4 ) 
              { 
-                fprintf(pysamerr,"Too many alleles at %s:%d, skipping.\n", bcf_seqname(call->hdr,rec),rec->pos+1); 
+                fprintf(pysam_stderr,"Too many alleles at %s:%d, skipping.\n", bcf_seqname(call->hdr,rec),rec->pos+1); 
                  return 0; 
              }
              mcall_call_trio_genotypes(call, rec, nals,nout,out_als);
diff --git a/bcftools/ploidy.c b/bcftools/ploidy.c

index 160bc3eaea0f8ee05af26cd66dcd42d8b3f9d138..719e1753c327cd2fd64483657ddfd0af6ee4291f 100644 (file)
--- a/bcftools/ploidy.c
+++ b/bcftools/ploidy.c
@@ -1,5 +1,5 @@
-/* 
-    Copyright (C) 2014 Genome Research Ltd.
+/*
+    Copyright (C) 2014-2016 Genome Research Ltd.
  
      Author: Petr Danecek <pd3@sanger.ac.uk>
  
@@ -98,7 +98,7 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
          ploidy->id2sex[ploidy->nsex-1] = strdup(ploidy->tmp_str.s);
          sp->sex = khash_str2int_inc(ploidy->sex2id, ploidy->id2sex[ploidy->nsex-1]);
          ploidy->sex2dflt = (int*) realloc(ploidy->sex2dflt,sizeof(int)*ploidy->nsex);
-        ploidy->sex2dflt[ploidy->nsex-1] = ploidy->dflt;
+        ploidy->sex2dflt[ploidy->nsex-1] = -1;
      }
  
      ss = se;
@@ -106,8 +106,8 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
      if ( !*se ) error("Could not parse: %s\n", line);
      sp->ploidy = strtol(ss,&se,10);
      if ( ss==se ) error("Could not parse: %s\n", line);
-    if ( sp->ploidy < ploidy->min ) ploidy->min = sp->ploidy;
-    if ( sp->ploidy > ploidy->max ) ploidy->max = sp->ploidy;
+    if ( ploidy->min<0 || sp->ploidy < ploidy->min ) ploidy->min = sp->ploidy;
+    if ( ploidy->max<0 || sp->ploidy > ploidy->max ) ploidy->max = sp->ploidy;
  
      // Special case, chr="*" stands for a default value
      if ( default_ploidy_def )
@@ -119,19 +119,32 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
      return 0;
  }
  
+static void _set_defaults(ploidy_t *ploidy, int dflt)
+{
+    int i;
+    if ( khash_str2int_get(ploidy->sex2id, "*", &i) == 0 ) dflt = ploidy->sex2dflt[i];
+    for (i=0; i<ploidy->nsex; i++)
+        if ( ploidy->sex2dflt[i]==-1 ) ploidy->sex2dflt[i] = dflt;
+
+    ploidy->dflt = dflt;
+    if ( ploidy->min<0 || dflt < ploidy->min ) ploidy->min = dflt;
+    if ( ploidy->max<0 || dflt > ploidy->max ) ploidy->max = dflt;
+}
+
  ploidy_t *ploidy_init(const char *fname, int dflt)
  {
      ploidy_t *pld = (ploidy_t*) calloc(1,sizeof(ploidy_t));
      if ( !pld ) return NULL;
  
-    pld->dflt = pld->min = pld->max = dflt;
+    pld->min = pld->max = -1;
      pld->sex2id = khash_str2int_init();
      pld->idx = regidx_init(fname,ploidy_parse,NULL,sizeof(sex_ploidy_t),pld);
      if ( !pld->idx )
      {
          ploidy_destroy(pld);
-        pld = NULL;
+        return NULL;
      }
+    _set_defaults(pld,dflt);
      return pld;
  }
  
@@ -140,7 +153,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
      ploidy_t *pld = (ploidy_t*) calloc(1,sizeof(ploidy_t));
      if ( !pld ) return NULL;
  
-    pld->dflt = pld->min = pld->max = dflt;
+    pld->min = pld->max = -1;
      pld->sex2id = khash_str2int_init();
      pld->idx = regidx_init(NULL,ploidy_parse,NULL,sizeof(sex_ploidy_t),pld);
  
@@ -160,6 +173,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
      regidx_insert(pld->idx,NULL);
      free(tmp.s);
  
+    _set_defaults(pld,dflt);
      return pld;
  }
  
diff --git a/bcftools/ploidy.c.pysam.c b/bcftools/ploidy.c.pysam.c

index 4f567a37117592a337dbe5b260214a6227c1e65f..d0468b9f920d039982fd708f6ce886ac02c1e0e3 100644 (file)
--- a/bcftools/ploidy.c.pysam.c
+++ b/bcftools/ploidy.c.pysam.c
@@ -1,7 +1,7 @@
  #include "pysam.h"
  
-/* 
-    Copyright (C) 2014 Genome Research Ltd.
+/*
+    Copyright (C) 2014-2016 Genome Research Ltd.
  
      Author: Petr Danecek <pd3@sanger.ac.uk>
  
@@ -100,7 +100,7 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
          ploidy->id2sex[ploidy->nsex-1] = strdup(ploidy->tmp_str.s);
          sp->sex = khash_str2int_inc(ploidy->sex2id, ploidy->id2sex[ploidy->nsex-1]);
          ploidy->sex2dflt = (int*) realloc(ploidy->sex2dflt,sizeof(int)*ploidy->nsex);
-        ploidy->sex2dflt[ploidy->nsex-1] = ploidy->dflt;
+        ploidy->sex2dflt[ploidy->nsex-1] = -1;
      }
  
      ss = se;
@@ -108,8 +108,8 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
      if ( !*se ) error("Could not parse: %s\n", line);
      sp->ploidy = strtol(ss,&se,10);
      if ( ss==se ) error("Could not parse: %s\n", line);
-    if ( sp->ploidy < ploidy->min ) ploidy->min = sp->ploidy;
-    if ( sp->ploidy > ploidy->max ) ploidy->max = sp->ploidy;
+    if ( ploidy->min<0 || sp->ploidy < ploidy->min ) ploidy->min = sp->ploidy;
+    if ( ploidy->max<0 || sp->ploidy > ploidy->max ) ploidy->max = sp->ploidy;
  
      // Special case, chr="*" stands for a default value
      if ( default_ploidy_def )
@@ -121,19 +121,32 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
      return 0;
  }
  
+static void _set_defaults(ploidy_t *ploidy, int dflt)
+{
+    int i;
+    if ( khash_str2int_get(ploidy->sex2id, "*", &i) == 0 ) dflt = ploidy->sex2dflt[i];
+    for (i=0; i<ploidy->nsex; i++)
+        if ( ploidy->sex2dflt[i]==-1 ) ploidy->sex2dflt[i] = dflt;
+
+    ploidy->dflt = dflt;
+    if ( ploidy->min<0 || dflt < ploidy->min ) ploidy->min = dflt;
+    if ( ploidy->max<0 || dflt > ploidy->max ) ploidy->max = dflt;
+}
+
  ploidy_t *ploidy_init(const char *fname, int dflt)
  {
      ploidy_t *pld = (ploidy_t*) calloc(1,sizeof(ploidy_t));
      if ( !pld ) return NULL;
  
-    pld->dflt = pld->min = pld->max = dflt;
+    pld->min = pld->max = -1;
      pld->sex2id = khash_str2int_init();
      pld->idx = regidx_init(fname,ploidy_parse,NULL,sizeof(sex_ploidy_t),pld);
      if ( !pld->idx )
      {
          ploidy_destroy(pld);
-        pld = NULL;
+        return NULL;
      }
+    _set_defaults(pld,dflt);
      return pld;
  }
  
@@ -142,7 +155,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
      ploidy_t *pld = (ploidy_t*) calloc(1,sizeof(ploidy_t));
      if ( !pld ) return NULL;
  
-    pld->dflt = pld->min = pld->max = dflt;
+    pld->min = pld->max = -1;
      pld->sex2id = khash_str2int_init();
      pld->idx = regidx_init(NULL,ploidy_parse,NULL,sizeof(sex_ploidy_t),pld);
  
@@ -162,6 +175,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
      regidx_insert(pld->idx,NULL);
      free(tmp.s);
  
+    _set_defaults(pld,dflt);
      return pld;
  }
  
diff --git a/bcftools/prob1.c.pysam.c b/bcftools/prob1.c.pysam.c

index bad2478d7a769ccb7e22a5a35cde534b29ce4019..a59ec44c39aa5e000d6eb7126ebb657fe92132bf 100644 (file)
--- a/bcftools/prob1.c.pysam.c
+++ b/bcftools/prob1.c.pysam.c
@@ -128,7 +128,7 @@ int bcf_p1_set_n1(bcf_p1aux_t *b, int n1)
  {
      if (n1 == 0 || n1 >= b->n) return -1;
      if (b->M != b->n * 2) {
-        fprintf(pysamerr, "[%s] unable to set `n1' when there are haploid samples.\n", __func__);
+        fprintf(pysam_stderr, "[%s] unable to set `n1' when there are haploid samples.\n", __func__);
          return -1;
      }
      b->n1 = n1;
@@ -523,9 +523,9 @@ int bcf_p1_cal(call_t *call, bcf1_t *b, int do_contrast, bcf_p1aux_t *ma, bcf_p1
  void bcf_p1_dump_afs(bcf_p1aux_t *ma)
  {
      int k;
-    fprintf(pysamerr, "[afs]");
+    fprintf(pysam_stderr, "[afs]");
      for (k = 0; k <= ma->M; ++k)
-        fprintf(pysamerr, " %d:%.3lf", k, ma->afs[ma->M - k]);
-    fprintf(pysamerr, "\n");
+        fprintf(pysam_stderr, " %d:%.3lf", k, ma->afs[ma->M - k]);
+    fprintf(pysam_stderr, "\n");
      memset(ma->afs, 0, sizeof(double) * (ma->M + 1));
  }
diff --git a/bcftools/pysam.h b/bcftools/pysam.h

index 008cbbd9ee9e0b3ed79c1c4660a3066c70732770..b0fc4fb565eabbd084b3507fb049a87ad821d106 100644 (file)
--- a/bcftools/pysam.h
+++ b/bcftools/pysam.h
@@ -1,5 +1,7 @@
  #ifndef PYSAM_H
  #define PYSAM_H
  #include "stdio.h"
-extern FILE * pysamerr;
+extern FILE * pysam_stderr;
+extern FILE * pysam_stdout;
+extern const char * pysam_stdout_fn;
  #endif
diff --git a/bcftools/tabix.c.pysam.c b/bcftools/tabix.c.pysam.c

index 0eb328fb1164ba5176f6b9d8375df47f3d3bf35c..afa361920699715c94c280bc0468d5e980452f0b 100644 (file)
--- a/bcftools/tabix.c.pysam.c
+++ b/bcftools/tabix.c.pysam.c
@@ -52,24 +52,24 @@ int main_tabix(int argc, char *argv[])
              else if (strcmp(optarg, "sam") == 0) conf_ptr = &tbx_conf_sam;
              else if (strcmp(optarg, "vcf") == 0) conf_ptr = &tbx_conf_vcf;
              else {
-                fprintf(pysamerr, "The type '%s' not recognised\n", optarg);
+                fprintf(pysam_stderr, "The type '%s' not recognised\n", optarg);
                  return 1;
              }
  
          }
      if (optind == argc) {
-        fprintf(pysamerr, "\nUsage: bcftools tabix [options] <in.gz> [reg1 [...]]\n\n");
-        fprintf(pysamerr, "Options: -p STR    preset: gff, bed, sam or vcf [gff]\n");
-        fprintf(pysamerr, "         -s INT    column number for sequence names (suppressed by -p) [1]\n");
-        fprintf(pysamerr, "         -b INT    column number for region start [4]\n");
-        fprintf(pysamerr, "         -e INT    column number for region end (if no end, set INT to -b) [5]\n");
-        fprintf(pysamerr, "         -0        specify coordinates are zero-based\n");
-        fprintf(pysamerr, "         -S INT    skip first INT lines [0]\n");
-        fprintf(pysamerr, "         -c CHAR   skip lines starting with CHAR [null]\n");
-        fprintf(pysamerr, "         -a        print all records\n");
-        fprintf(pysamerr, "         -f        force to overwrite existing index\n");
-        fprintf(pysamerr, "         -m INT    set the minimal interval size to 1<<INT; 0 for the old tabix index [0]\n");
-        fprintf(pysamerr, "\n");
+        fprintf(pysam_stderr, "\nUsage: bcftools tabix [options] <in.gz> [reg1 [...]]\n\n");
+        fprintf(pysam_stderr, "Options: -p STR    preset: gff, bed, sam or vcf [gff]\n");
+        fprintf(pysam_stderr, "         -s INT    column number for sequence names (suppressed by -p) [1]\n");
+        fprintf(pysam_stderr, "         -b INT    column number for region start [4]\n");
+        fprintf(pysam_stderr, "         -e INT    column number for region end (if no end, set INT to -b) [5]\n");
+        fprintf(pysam_stderr, "         -0        specify coordinates are zero-based\n");
+        fprintf(pysam_stderr, "         -S INT    skip first INT lines [0]\n");
+        fprintf(pysam_stderr, "         -c CHAR   skip lines starting with CHAR [null]\n");
+        fprintf(pysam_stderr, "         -a        print all records\n");
+        fprintf(pysam_stderr, "         -f        force to overwrite existing index\n");
+        fprintf(pysam_stderr, "         -m INT    set the minimal interval size to 1<<INT; 0 for the old tabix index [0]\n");
+        fprintf(pysam_stderr, "\n");
          return 1;
      }
      if (is_all) { // read without random access
@@ -77,7 +77,7 @@ int main_tabix(int argc, char *argv[])
          BGZF *fp;
          s.l = s.m = 0; s.s = 0;
          fp = bgzf_open(argv[optind], "r");
-        while (bgzf_getline(fp, '\n', &s) >= 0) puts(s.s);
+        while (bgzf_getline(fp, '\n', &s) >= 0) fputs(s.s, pysam_stdout) & fputc('\n', pysam_stdout);
          bgzf_close(fp);
          free(s.s);
      } else if (optind + 2 > argc) { // create index
@@ -100,13 +100,13 @@ int main_tabix(int argc, char *argv[])
              strcat(strcpy(fn, argv[optind]), min_shift <= 0? ".tbi" : ".csi");
              if ((fp = fopen(fn, "rb")) != 0) {
                  fclose(fp);
-                fprintf(pysamerr, "[E::%s] the index file exists; use option '-f' to overwrite\n", __func__);
+                fprintf(pysam_stderr, "[E::%s] the index file exists; use option '-f' to overwrite\n", __func__);
                  return 1;
              }
          }
          if ( tbx_index_build(argv[optind], min_shift, &conf) )
          {
-            fprintf(pysamerr,"tbx_index_build failed: Is the file bgzip-compressed? Was wrong -p [type] option used?\n");
+            fprintf(pysam_stderr,"tbx_index_build failed: Is the file bgzip-compressed? Was wrong -p [type] option used?\n");
              return 1;
          }
      } else { // read with random access
@@ -120,7 +120,7 @@ int main_tabix(int argc, char *argv[])
          for (i = optind + 1; i < argc; ++i) {
              hts_itr_t *itr;
              if ((itr = tbx_itr_querys(tbx, argv[i])) == 0) continue;
-            while (tbx_bgzf_itr_next(fp, tbx, itr, &s) >= 0) puts(s.s);
+            while (tbx_bgzf_itr_next(fp, tbx, itr, &s) >= 0) fputs(s.s, pysam_stdout) & fputc('\n', pysam_stdout);
              tbx_itr_destroy(itr);
          }
          free(s.s);
diff --git a/bcftools/vcfannotate.c b/bcftools/vcfannotate.c

index 96a1649c5a9e3b76de35b065db9017cedb777d2e..d5164f3332aae6adbb523da8a127928fd35e1c5a 100644 (file)
--- a/bcftools/vcfannotate.c
+++ b/bcftools/vcfannotate.c
@@ -1,6 +1,6 @@
  /*  vcfannotate.c -- Annotate and edit VCF/BCF files.
  
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
  
      Author: Petr Danecek <pd3@sanger.ac.uk>
  
@@ -120,7 +120,7 @@ typedef struct _args_t
  
      char **argv, *output_fname, *targets_fname, *regions_list, *header_fname;
      char *remove_annots, *columns, *rename_chrs, *sample_names, *mark_sites;
-    int argc, drop_header, tgts_is_vcf, mark_sites_logic;
+    int argc, drop_header, record_cmd_line, tgts_is_vcf, mark_sites_logic;
  }
  args_t;
  
@@ -809,6 +809,135 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
          return bcf_update_genotypes(args->hdr_out,line,args->tmpi3,nsrc*bcf_hdr_nsamples(args->hdr_out));
      }
  }
+static int count_vals(annot_line_t *tab, int icol_beg, int icol_end)
+{
+    int i, nmax = 0;
+    for (i=icol_beg; i<icol_end; i++)
+    {
+        char *str = tab->cols[i], *end = str;
+        if ( str[0]=='.' && !str[1] ) 
+        {
+            // missing value
+            if ( !nmax ) nmax = 1;
+            continue;
+        }
+        int n = 1;
+        while ( *end )
+        {
+            if ( *end==',' ) n++;
+            end++;
+        }
+        if ( nmax<n ) nmax = n;
+    }
+    return nmax;
+}
+static int setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+    assert( col->icol+nsmpl <= tab->ncols );
+    int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
+    assert( nvals>0 );
+    hts_expand(int32_t,nvals*nsmpl,args->mtmpi,args->tmpi);
+
+    int icol = col->icol, ismpl;
+    for (ismpl=0; ismpl<nsmpl; ismpl++)
+    {
+        int32_t *ptr = args->tmpi + ismpl*nvals;
+        int ival = 0;
+
+        char *str = tab->cols[icol];
+        while ( *str )
+        {
+            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
+            {
+                ptr[ival++] = bcf_int32_missing;
+                str += str[1] ? 2 : 1;
+                continue;
+            }
+
+            char *end = str;
+            ptr[ival] = strtol(str, &end, 10); 
+            if ( end==str )
+                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+            ival++;
+            str = *end ? end+1 : end;
+        }
+        while ( ival<nvals ) ptr[ival++] = bcf_int32_vector_end;
+        icol++;
+    }
+    return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi,nsmpl*nvals);
+}
+static int setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+    assert( col->icol+nsmpl <= tab->ncols );
+    int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
+    assert( nvals>0 );
+    hts_expand(float,nvals*nsmpl,args->mtmpf,args->tmpf);
+
+    int icol = col->icol, ismpl;
+    for (ismpl=0; ismpl<nsmpl; ismpl++)
+    {
+        float *ptr = args->tmpf + ismpl*nvals;
+        int ival = 0;
+
+        char *str = tab->cols[icol];
+        while ( *str )
+        {
+            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
+            {
+                bcf_float_set_missing(ptr[ival]); 
+                ival++;
+                str += str[1] ? 2 : 1;
+                continue;
+            }
+
+            char *end = str;
+            ptr[ival] = strtod(str, &end); 
+            if ( end==str )
+                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+            ival++;
+            str = *end ? end+1 : end;
+        }
+        while ( ival<nvals ) { bcf_float_set_vector_end(ptr[ival]); ival++; }
+        icol++;
+    }
+    return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf,nsmpl*nvals);
+}
+static int setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+    assert( col->icol+nsmpl <= tab->ncols );
+
+    int i, max_len = 0;
+    for (i=col->icol; i<col->icol+nsmpl; i++)
+    {
+        int len = strlen(tab->cols[i]);
+        if ( max_len < len ) max_len = len;
+    }
+    hts_expand(char,max_len*nsmpl,args->mtmps,args->tmps);
+
+    int icol = col->icol, ismpl;
+    for (ismpl=0; ismpl<nsmpl; ismpl++)
+    {
+        char *ptr = args->tmps + ismpl*max_len;
+        char *str = tab->cols[icol];
+        i = 0;
+        while ( str[i] )
+        {
+            ptr[i] = str[i];
+            i++;
+        }
+        while ( i<max_len ) ptr[i++] = 0;
+        icol++;
+    }
+    return bcf_update_format_char(args->hdr_out,line,col->hdr_key,args->tmps,nsmpl*max_len);
+}
  static int vcf_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
  {
      bcf1_t *rec = (bcf1_t*) data;
@@ -1127,7 +1256,7 @@ static void init_columns(args_t *args)
      kstring_t str = {0,0,0}, tmp = {0,0,0};
      char *ss = args->columns, *se = ss;
      args->ncols = 0;
-    int i = -1, has_fmt_str = 0, force_samples = -1;
+    int icol = -1, has_fmt_str = 0, force_samples = -1;
      while ( *ss )
      {
          if ( *se && *se!=',' ) { se++; continue; }
@@ -1135,22 +1264,22 @@ static void init_columns(args_t *args)
          if ( *ss=='+' ) { replace = REPLACE_MISSING; ss++; }
          else if ( *ss=='-' ) { replace = REPLACE_EXISTING; ss++; }
          else if ( *ss=='=' ) { replace = SET_OR_APPEND; ss++; }
-        i++;
+        icol++;
          str.l = 0;
          kputsn(ss, se-ss, &str);
          if ( !str.s[0] || !strcasecmp("-",str.s) ) ;
-        else if ( !strcasecmp("CHROM",str.s) ) args->chr_idx = i;
-        else if ( !strcasecmp("POS",str.s) ) args->from_idx = i;
-        else if ( !strcasecmp("FROM",str.s) ) args->from_idx = i;
-        else if ( !strcasecmp("TO",str.s) ) args->to_idx = i;
-        else if ( !strcasecmp("REF",str.s) ) args->ref_idx = i;
-        else if ( !strcasecmp("ALT",str.s) ) args->alt_idx = i;
+        else if ( !strcasecmp("CHROM",str.s) ) args->chr_idx = icol;
+        else if ( !strcasecmp("POS",str.s) ) args->from_idx = icol;
+        else if ( !strcasecmp("FROM",str.s) ) args->from_idx = icol;
+        else if ( !strcasecmp("TO",str.s) ) args->to_idx = icol;
+        else if ( !strcasecmp("REF",str.s) ) args->ref_idx = icol;
+        else if ( !strcasecmp("ALT",str.s) ) args->alt_idx = icol;
          else if ( !strcasecmp("ID",str.s) )
          {
              if ( replace==REPLACE_EXISTING ) error("Apologies, the -ID feature has not been implemented yet.\n");
              args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
              annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = i;
+            col->icol = icol;
              col->replace = replace;
              col->setter = args->tgts_is_vcf ? vcf_setter_id : setter_id;
              col->hdr_key = strdup(str.s);
@@ -1160,7 +1289,7 @@ static void init_columns(args_t *args)
              if ( replace==REPLACE_EXISTING ) error("Apologies, the -FILTER feature has not been implemented yet.\n");
              args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
              annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = i;
+            col->icol = icol;
              col->replace = replace;
              col->setter = args->tgts_is_vcf ? vcf_setter_filter : setter_filter;
              col->hdr_key = strdup(str.s);
@@ -1187,7 +1316,7 @@ static void init_columns(args_t *args)
              if ( replace==SET_OR_APPEND ) error("Apologies, the =QUAL feature has not been implemented yet.\n");
              args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
              annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = i;
+            col->icol = icol;
              col->replace = replace;
              col->setter = args->tgts_is_vcf ? vcf_setter_qual : setter_qual;
              col->hdr_key = strdup(str.s);
@@ -1262,30 +1391,38 @@ static void init_columns(args_t *args)
          }
          else if ( !strncasecmp("FORMAT/",str.s, 7) || !strncasecmp("FMT/",str.s,4) )
          {
-            if ( !args->tgts_is_vcf )
-                error("Error: FORMAT fields can be carried over from a VCF file only.\n");
-
              char *key = str.s + (!strncasecmp("FMT/",str.s,4) ? 4 : 7);
              if ( force_samples<0 ) force_samples = replace;
-            if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;;
-            bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key, NULL);
-            tmp.l = 0;
-            bcf_hrec_format(hrec, &tmp);
-            bcf_hdr_append(args->hdr_out, tmp.s);
-            bcf_hdr_sync(args->hdr_out);
+            if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;
+            if ( args->tgts_is_vcf )
+            {
+                bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key, NULL);
+                tmp.l = 0;
+                bcf_hrec_format(hrec, &tmp);
+                bcf_hdr_append(args->hdr_out, tmp.s);
+                bcf_hdr_sync(args->hdr_out);
+            }
              int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key);
+            if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_FMT,hdr_id) )
+                error("The tag \"%s\" is not defined in %s\n", str.s, args->targets_fname);
              args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
              annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = -1;
+            if ( !args->tgts_is_vcf )
+            {
+                col->icol = icol;
+                icol += bcf_hdr_nsamples(args->hdr_out) - 1;
+            }
+            else
+                col->icol = -1;
              col->replace = replace;
              col->hdr_key = strdup(key);
              if ( !strcasecmp("GT",key) ) col->setter = vcf_setter_format_gt;
              else
                  switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
                  {
-                    case BCF_HT_INT:    col->setter = vcf_setter_format_int; break;
-                    case BCF_HT_REAL:   col->setter = vcf_setter_format_real; break;
-                    case BCF_HT_STR:    col->setter = vcf_setter_format_str; has_fmt_str = 1; break;
+                    case BCF_HT_INT:    col->setter = args->tgts_is_vcf ? vcf_setter_format_int  : setter_format_int; break;
+                    case BCF_HT_REAL:   col->setter = args->tgts_is_vcf ? vcf_setter_format_real : setter_format_real; break;
+                    case BCF_HT_STR:    col->setter = args->tgts_is_vcf ? vcf_setter_format_str  : setter_format_str; has_fmt_str = 1; break;
                      default: error("The type of %s not recognised (%d)\n", str.s,bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id));
                  }
          }
@@ -1314,7 +1451,7 @@ static void init_columns(args_t *args)
  
              args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
              annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = i;
+            col->icol = icol;
              col->replace = replace;
              col->hdr_key = strdup(str.s);
              col->number  = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
@@ -1338,11 +1475,12 @@ static void init_columns(args_t *args)
      if ( skip_fmt ) khash_str2int_destroy_free(skip_fmt);
      if ( has_fmt_str )
      {
-        int n = bcf_hdr_nsamples(args->hdr_out) > bcf_hdr_nsamples(args->files->readers[1].header) ? bcf_hdr_nsamples(args->hdr_out) : bcf_hdr_nsamples(args->files->readers[1].header);
+        int n = bcf_hdr_nsamples(args->hdr_out);
+        if ( args->tgts_is_vcf && n<bcf_hdr_nsamples(args->files->readers[1].header) ) n = bcf_hdr_nsamples(args->files->readers[1].header);
          args->tmpp  = (char**)malloc(sizeof(char*)*n);
          args->tmpp2 = (char**)malloc(sizeof(char*)*n);
      }
-    if ( force_samples>=0 )
+    if ( force_samples>=0 && args->tgts_is_vcf )
          set_samples(args, args->files->readers[1].header, args->hdr, force_samples==REPLACE_ALL ? 0 : 1);
  }
  
@@ -1419,7 +1557,7 @@ static void init_data(args_t *args)
              args->mark_sites,args->mark_sites_logic==MARK_LISTED?"":"not ",args->mark_sites);
      }
  
-    bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_annotate");
+     if (args->record_cmd_line) bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_annotate");
      if ( !args->drop_header )
      {
          if ( args->rename_chrs ) rename_chrs(args, args->rename_chrs);
@@ -1517,8 +1655,10 @@ static void buffer_annot_lines(args_t *args, bcf1_t *line, int start_pos, int en
          }
          if ( args->ref_idx != -1 )
          {
-            assert( args->ref_idx < tmp->ncols );
-            assert( args->alt_idx < tmp->ncols );
+            if ( args->ref_idx >= tmp->ncols ) 
+                error("Could not parse the line, expected %d+ columns, found %d:\n\t%s\n",args->ref_idx+1,tmp->ncols,args->tgts->line.s);
+            if ( args->alt_idx >= tmp->ncols )
+                error("Could not parse the line, expected %d+ columns, found %d:\n\t%s\n",args->alt_idx+1,tmp->ncols,args->tgts->line.s);
              tmp->nals = 2;
              hts_expand(char*,tmp->nals,tmp->mals,tmp->als);
              tmp->als[0] = tmp->cols[args->ref_idx];
@@ -1624,9 +1764,10 @@ static void usage(args_t *args)
      fprintf(stderr, "   -c, --columns <list>           list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
      fprintf(stderr, "   -e, --exclude <expr>           exclude sites for which the expression is true (see man page for details)\n");
      fprintf(stderr, "   -h, --header-lines <file>      lines which should be appended to the VCF header\n");
-    fprintf(stderr, "   -I, --set-id [+]<format>       set ID column, see man pagee for details\n");
-    fprintf(stderr, "   -i, --include <expr>           select sites for which the expression is true (see man pagee for details)\n");
+    fprintf(stderr, "   -I, --set-id [+]<format>       set ID column, see man page for details\n");
+    fprintf(stderr, "   -i, --include <expr>           select sites for which the expression is true (see man page for details)\n");
      fprintf(stderr, "   -m, --mark-sites [+-]<tag>     add INFO/tag flag to sites which are (\"+\") or are not (\"-\") listed in the -a file\n");
+    fprintf(stderr, "       --no-version               do not append version and command line to the header\n");
      fprintf(stderr, "   -o, --output <file>            write output to a file [standard output]\n");
      fprintf(stderr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
      fprintf(stderr, "   -r, --regions <region>         restrict to comma-separated list of regions\n");
@@ -1649,6 +1790,7 @@ int main_vcfannotate(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      args->ref_idx = args->alt_idx = args->chr_idx = args->from_idx = args->to_idx = -1;
      args->set_ids_replace = 1;
      int regions_is_file = 0;
@@ -1671,6 +1813,7 @@ int main_vcfannotate(int argc, char *argv[])
          {"header-lines",required_argument,NULL,'h'},
          {"samples",required_argument,NULL,'s'},
          {"samples-file",required_argument,NULL,'S'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      while ((c = getopt_long(argc, argv, "h:?o:O:r:R:a:x:c:i:e:S:s:I:m:",loptions,NULL)) >= 0)
@@ -1705,6 +1848,7 @@ int main_vcfannotate(int argc, char *argv[])
              case 'h': args->header_fname = optarg; break;
              case  1 : args->rename_chrs = optarg; break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case '?': usage(args); break;
              default: error("Unknown argument: %s\n", optarg);
          }
diff --git a/bcftools/vcfannotate.c.pysam.c b/bcftools/vcfannotate.c.pysam.c

index 1d86dbe1befc8153d5ab7c84a74ac1c40c4eafbe..ea8398c6f068330bcde006f5bd12f04c2ff7a4cf 100644 (file)
--- a/bcftools/vcfannotate.c.pysam.c
+++ b/bcftools/vcfannotate.c.pysam.c
@@ -2,7 +2,7 @@
  
  /*  vcfannotate.c -- Annotate and edit VCF/BCF files.
  
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
  
      Author: Petr Danecek <pd3@sanger.ac.uk>
  
@@ -122,7 +122,7 @@ typedef struct _args_t
  
      char **argv, *output_fname, *targets_fname, *regions_list, *header_fname;
      char *remove_annots, *columns, *rename_chrs, *sample_names, *mark_sites;
-    int argc, drop_header, tgts_is_vcf, mark_sites_logic;
+    int argc, drop_header, record_cmd_line, tgts_is_vcf, mark_sites_logic;
  }
  args_t;
  
@@ -265,7 +265,7 @@ static void init_remove_annots(args_t *args)
              int id = bcf_hdr_id2int(args->hdr,BCF_DT_ID,str.s);
              if ( !bcf_hdr_idinfo_exists(args->hdr,type,id) )
              {
-                fprintf(pysamerr,"Warning: The tag \"%s\" not defined in the header\n", str.s);
+                fprintf(pysam_stderr,"Warning: The tag \"%s\" not defined in the header\n", str.s);
                  args->nrm--;
              }
              else if ( (type==BCF_HL_FMT && keep_fmt) || (type==BCF_HL_INFO && keep_info) )
@@ -811,6 +811,135 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
          return bcf_update_genotypes(args->hdr_out,line,args->tmpi3,nsrc*bcf_hdr_nsamples(args->hdr_out));
      }
  }
+static int count_vals(annot_line_t *tab, int icol_beg, int icol_end)
+{
+    int i, nmax = 0;
+    for (i=icol_beg; i<icol_end; i++)
+    {
+        char *str = tab->cols[i], *end = str;
+        if ( str[0]=='.' && !str[1] ) 
+        {
+            // missing value
+            if ( !nmax ) nmax = 1;
+            continue;
+        }
+        int n = 1;
+        while ( *end )
+        {
+            if ( *end==',' ) n++;
+            end++;
+        }
+        if ( nmax<n ) nmax = n;
+    }
+    return nmax;
+}
+static int setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+    assert( col->icol+nsmpl <= tab->ncols );
+    int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
+    assert( nvals>0 );
+    hts_expand(int32_t,nvals*nsmpl,args->mtmpi,args->tmpi);
+
+    int icol = col->icol, ismpl;
+    for (ismpl=0; ismpl<nsmpl; ismpl++)
+    {
+        int32_t *ptr = args->tmpi + ismpl*nvals;
+        int ival = 0;
+
+        char *str = tab->cols[icol];
+        while ( *str )
+        {
+            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
+            {
+                ptr[ival++] = bcf_int32_missing;
+                str += str[1] ? 2 : 1;
+                continue;
+            }
+
+            char *end = str;
+            ptr[ival] = strtol(str, &end, 10); 
+            if ( end==str )
+                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+            ival++;
+            str = *end ? end+1 : end;
+        }
+        while ( ival<nvals ) ptr[ival++] = bcf_int32_vector_end;
+        icol++;
+    }
+    return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi,nsmpl*nvals);
+}
+static int setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+    assert( col->icol+nsmpl <= tab->ncols );
+    int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
+    assert( nvals>0 );
+    hts_expand(float,nvals*nsmpl,args->mtmpf,args->tmpf);
+
+    int icol = col->icol, ismpl;
+    for (ismpl=0; ismpl<nsmpl; ismpl++)
+    {
+        float *ptr = args->tmpf + ismpl*nvals;
+        int ival = 0;
+
+        char *str = tab->cols[icol];
+        while ( *str )
+        {
+            if ( str[0]=='.' && (!str[1] || str[1]==',') )  // missing value
+            {
+                bcf_float_set_missing(ptr[ival]); 
+                ival++;
+                str += str[1] ? 2 : 1;
+                continue;
+            }
+
+            char *end = str;
+            ptr[ival] = strtod(str, &end); 
+            if ( end==str )
+                error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+            ival++;
+            str = *end ? end+1 : end;
+        }
+        while ( ival<nvals ) { bcf_float_set_vector_end(ptr[ival]); ival++; }
+        icol++;
+    }
+    return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf,nsmpl*nvals);
+}
+static int setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+    annot_line_t *tab = (annot_line_t*) data;
+    int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+    assert( col->icol+nsmpl <= tab->ncols );
+
+    int i, max_len = 0;
+    for (i=col->icol; i<col->icol+nsmpl; i++)
+    {
+        int len = strlen(tab->cols[i]);
+        if ( max_len < len ) max_len = len;
+    }
+    hts_expand(char,max_len*nsmpl,args->mtmps,args->tmps);
+
+    int icol = col->icol, ismpl;
+    for (ismpl=0; ismpl<nsmpl; ismpl++)
+    {
+        char *ptr = args->tmps + ismpl*max_len;
+        char *str = tab->cols[icol];
+        i = 0;
+        while ( str[i] )
+        {
+            ptr[i] = str[i];
+            i++;
+        }
+        while ( i<max_len ) ptr[i++] = 0;
+        icol++;
+    }
+    return bcf_update_format_char(args->hdr_out,line,col->hdr_key,args->tmps,nsmpl*max_len);
+}
  static int vcf_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
  {
      bcf1_t *rec = (bcf1_t*) data;
@@ -1010,7 +1139,7 @@ static void set_samples(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst, int need_s
              return;    // the same samples in both files
  
          if ( !nmatch ) error("No matching samples found in the source and the destination file\n");
-        if ( nmatch!=bcf_hdr_nsamples(src) || nmatch!=bcf_hdr_nsamples(dst) ) fprintf(pysamerr,"%d sample(s) in common\n", nmatch);
+        if ( nmatch!=bcf_hdr_nsamples(src) || nmatch!=bcf_hdr_nsamples(dst) ) fprintf(pysam_stderr,"%d sample(s) in common\n", nmatch);
  
          args->nsample_map = bcf_hdr_nsamples(dst);
          args->sample_map  = (int*) malloc(sizeof(int)*args->nsample_map);
@@ -1129,7 +1258,7 @@ static void init_columns(args_t *args)
      kstring_t str = {0,0,0}, tmp = {0,0,0};
      char *ss = args->columns, *se = ss;
      args->ncols = 0;
-    int i = -1, has_fmt_str = 0, force_samples = -1;
+    int icol = -1, has_fmt_str = 0, force_samples = -1;
      while ( *ss )
      {
          if ( *se && *se!=',' ) { se++; continue; }
@@ -1137,22 +1266,22 @@ static void init_columns(args_t *args)
          if ( *ss=='+' ) { replace = REPLACE_MISSING; ss++; }
          else if ( *ss=='-' ) { replace = REPLACE_EXISTING; ss++; }
          else if ( *ss=='=' ) { replace = SET_OR_APPEND; ss++; }
-        i++;
+        icol++;
          str.l = 0;
          kputsn(ss, se-ss, &str);
          if ( !str.s[0] || !strcasecmp("-",str.s) ) ;
-        else if ( !strcasecmp("CHROM",str.s) ) args->chr_idx = i;
-        else if ( !strcasecmp("POS",str.s) ) args->from_idx = i;
-        else if ( !strcasecmp("FROM",str.s) ) args->from_idx = i;
-        else if ( !strcasecmp("TO",str.s) ) args->to_idx = i;
-        else if ( !strcasecmp("REF",str.s) ) args->ref_idx = i;
-        else if ( !strcasecmp("ALT",str.s) ) args->alt_idx = i;
+        else if ( !strcasecmp("CHROM",str.s) ) args->chr_idx = icol;
+        else if ( !strcasecmp("POS",str.s) ) args->from_idx = icol;
+        else if ( !strcasecmp("FROM",str.s) ) args->from_idx = icol;
+        else if ( !strcasecmp("TO",str.s) ) args->to_idx = icol;
+        else if ( !strcasecmp("REF",str.s) ) args->ref_idx = icol;
+        else if ( !strcasecmp("ALT",str.s) ) args->alt_idx = icol;
          else if ( !strcasecmp("ID",str.s) )
          {
              if ( replace==REPLACE_EXISTING ) error("Apologies, the -ID feature has not been implemented yet.\n");
              args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
              annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = i;
+            col->icol = icol;
              col->replace = replace;
              col->setter = args->tgts_is_vcf ? vcf_setter_id : setter_id;
              col->hdr_key = strdup(str.s);
@@ -1162,7 +1291,7 @@ static void init_columns(args_t *args)
              if ( replace==REPLACE_EXISTING ) error("Apologies, the -FILTER feature has not been implemented yet.\n");
              args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
              annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = i;
+            col->icol = icol;
              col->replace = replace;
              col->setter = args->tgts_is_vcf ? vcf_setter_filter : setter_filter;
              col->hdr_key = strdup(str.s);
@@ -1189,7 +1318,7 @@ static void init_columns(args_t *args)
              if ( replace==SET_OR_APPEND ) error("Apologies, the =QUAL feature has not been implemented yet.\n");
              args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
              annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = i;
+            col->icol = icol;
              col->replace = replace;
              col->setter = args->tgts_is_vcf ? vcf_setter_qual : setter_qual;
              col->hdr_key = strdup(str.s);
@@ -1264,30 +1393,38 @@ static void init_columns(args_t *args)
          }
          else if ( !strncasecmp("FORMAT/",str.s, 7) || !strncasecmp("FMT/",str.s,4) )
          {
-            if ( !args->tgts_is_vcf )
-                error("Error: FORMAT fields can be carried over from a VCF file only.\n");
-
              char *key = str.s + (!strncasecmp("FMT/",str.s,4) ? 4 : 7);
              if ( force_samples<0 ) force_samples = replace;
-            if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;;
-            bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key, NULL);
-            tmp.l = 0;
-            bcf_hrec_format(hrec, &tmp);
-            bcf_hdr_append(args->hdr_out, tmp.s);
-            bcf_hdr_sync(args->hdr_out);
+            if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;
+            if ( args->tgts_is_vcf )
+            {
+                bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key, NULL);
+                tmp.l = 0;
+                bcf_hrec_format(hrec, &tmp);
+                bcf_hdr_append(args->hdr_out, tmp.s);
+                bcf_hdr_sync(args->hdr_out);
+            }
              int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key);
+            if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_FMT,hdr_id) )
+                error("The tag \"%s\" is not defined in %s\n", str.s, args->targets_fname);
              args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
              annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = -1;
+            if ( !args->tgts_is_vcf )
+            {
+                col->icol = icol;
+                icol += bcf_hdr_nsamples(args->hdr_out) - 1;
+            }
+            else
+                col->icol = -1;
              col->replace = replace;
              col->hdr_key = strdup(key);
              if ( !strcasecmp("GT",key) ) col->setter = vcf_setter_format_gt;
              else
                  switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
                  {
-                    case BCF_HT_INT:    col->setter = vcf_setter_format_int; break;
-                    case BCF_HT_REAL:   col->setter = vcf_setter_format_real; break;
-                    case BCF_HT_STR:    col->setter = vcf_setter_format_str; has_fmt_str = 1; break;
+                    case BCF_HT_INT:    col->setter = args->tgts_is_vcf ? vcf_setter_format_int  : setter_format_int; break;
+                    case BCF_HT_REAL:   col->setter = args->tgts_is_vcf ? vcf_setter_format_real : setter_format_real; break;
+                    case BCF_HT_STR:    col->setter = args->tgts_is_vcf ? vcf_setter_format_str  : setter_format_str; has_fmt_str = 1; break;
                      default: error("The type of %s not recognised (%d)\n", str.s,bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id));
                  }
          }
@@ -1316,7 +1453,7 @@ static void init_columns(args_t *args)
  
              args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
              annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = i;
+            col->icol = icol;
              col->replace = replace;
              col->hdr_key = strdup(str.s);
              col->number  = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
@@ -1340,11 +1477,12 @@ static void init_columns(args_t *args)
      if ( skip_fmt ) khash_str2int_destroy_free(skip_fmt);
      if ( has_fmt_str )
      {
-        int n = bcf_hdr_nsamples(args->hdr_out) > bcf_hdr_nsamples(args->files->readers[1].header) ? bcf_hdr_nsamples(args->hdr_out) : bcf_hdr_nsamples(args->files->readers[1].header);
+        int n = bcf_hdr_nsamples(args->hdr_out);
+        if ( args->tgts_is_vcf && n<bcf_hdr_nsamples(args->files->readers[1].header) ) n = bcf_hdr_nsamples(args->files->readers[1].header);
          args->tmpp  = (char**)malloc(sizeof(char*)*n);
          args->tmpp2 = (char**)malloc(sizeof(char*)*n);
      }
-    if ( force_samples>=0 )
+    if ( force_samples>=0 && args->tgts_is_vcf )
          set_samples(args, args->files->readers[1].header, args->hdr, force_samples==REPLACE_ALL ? 0 : 1);
  }
  
@@ -1421,7 +1559,7 @@ static void init_data(args_t *args)
              args->mark_sites,args->mark_sites_logic==MARK_LISTED?"":"not ",args->mark_sites);
      }
  
-    bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_annotate");
+     if (args->record_cmd_line) bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_annotate");
      if ( !args->drop_header )
      {
          if ( args->rename_chrs ) rename_chrs(args, args->rename_chrs);
@@ -1519,8 +1657,10 @@ static void buffer_annot_lines(args_t *args, bcf1_t *line, int start_pos, int en
          }
          if ( args->ref_idx != -1 )
          {
-            assert( args->ref_idx < tmp->ncols );
-            assert( args->alt_idx < tmp->ncols );
+            if ( args->ref_idx >= tmp->ncols ) 
+                error("Could not parse the line, expected %d+ columns, found %d:\n\t%s\n",args->ref_idx+1,tmp->ncols,args->tgts->line.s);
+            if ( args->alt_idx >= tmp->ncols )
+                error("Could not parse the line, expected %d+ columns, found %d:\n\t%s\n",args->alt_idx+1,tmp->ncols,args->tgts->line.s);
              tmp->nals = 2;
              hts_expand(char*,tmp->nals,tmp->mals,tmp->als);
              tmp->als[0] = tmp->cols[args->ref_idx];
@@ -1617,28 +1757,29 @@ static void annotate(args_t *args, bcf1_t *line)
  
  static void usage(args_t *args)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Annotate and edit VCF/BCF files.\n");
-    fprintf(pysamerr, "Usage:   bcftools annotate [options] <in.vcf.gz>\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "   -a, --annotations <file>       VCF file or tabix-indexed file with annotations: CHR\\tPOS[\\tVALUE]+\n");
-    fprintf(pysamerr, "   -c, --columns <list>           list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
-    fprintf(pysamerr, "   -e, --exclude <expr>           exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(pysamerr, "   -h, --header-lines <file>      lines which should be appended to the VCF header\n");
-    fprintf(pysamerr, "   -I, --set-id [+]<format>       set ID column, see man pagee for details\n");
-    fprintf(pysamerr, "   -i, --include <expr>           select sites for which the expression is true (see man pagee for details)\n");
-    fprintf(pysamerr, "   -m, --mark-sites [+-]<tag>     add INFO/tag flag to sites which are (\"+\") or are not (\"-\") listed in the -a file\n");
-    fprintf(pysamerr, "   -o, --output <file>            write output to a file [standard output]\n");
-    fprintf(pysamerr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
-    fprintf(pysamerr, "   -r, --regions <region>         restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "   -R, --regions-file <file>      restrict to regions listed in a file\n");
-    fprintf(pysamerr, "       --rename-chrs <file>       rename sequences according to map file: from\\tto\n");
-    fprintf(pysamerr, "   -s, --samples [^]<list>        comma separated list of samples to annotate (or exclude with \"^\" prefix)\n");
-    fprintf(pysamerr, "   -S, --samples-file [^]<file>   file of samples to annotate (or exclude with \"^\" prefix)\n");
-    fprintf(pysamerr, "   -x, --remove <list>            list of annotations to remove (e.g. ID,INFO/DP,FORMAT/DP,FILTER). See man page for details\n");
-    fprintf(pysamerr, "       --threads <int>            number of extra output compression threads [0]\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Annotate and edit VCF/BCF files.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools annotate [options] <in.vcf.gz>\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "   -a, --annotations <file>       VCF file or tabix-indexed file with annotations: CHR\\tPOS[\\tVALUE]+\n");
+    fprintf(pysam_stderr, "   -c, --columns <list>           list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
+    fprintf(pysam_stderr, "   -e, --exclude <expr>           exclude sites for which the expression is true (see man page for details)\n");
+    fprintf(pysam_stderr, "   -h, --header-lines <file>      lines which should be appended to the VCF header\n");
+    fprintf(pysam_stderr, "   -I, --set-id [+]<format>       set ID column, see man page for details\n");
+    fprintf(pysam_stderr, "   -i, --include <expr>           select sites for which the expression is true (see man page for details)\n");
+    fprintf(pysam_stderr, "   -m, --mark-sites [+-]<tag>     add INFO/tag flag to sites which are (\"+\") or are not (\"-\") listed in the -a file\n");
+    fprintf(pysam_stderr, "       --no-version               do not append version and command line to the header\n");
+    fprintf(pysam_stderr, "   -o, --output <file>            write output to a file [standard output]\n");
+    fprintf(pysam_stderr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+    fprintf(pysam_stderr, "   -r, --regions <region>         restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "   -R, --regions-file <file>      restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "       --rename-chrs <file>       rename sequences according to map file: from\\tto\n");
+    fprintf(pysam_stderr, "   -s, --samples [^]<list>        comma separated list of samples to annotate (or exclude with \"^\" prefix)\n");
+    fprintf(pysam_stderr, "   -S, --samples-file [^]<file>   file of samples to annotate (or exclude with \"^\" prefix)\n");
+    fprintf(pysam_stderr, "   -x, --remove <list>            list of annotations to remove (e.g. ID,INFO/DP,FORMAT/DP,FILTER). See man page for details\n");
+    fprintf(pysam_stderr, "       --threads <int>            number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -1651,6 +1792,7 @@ int main_vcfannotate(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      args->ref_idx = args->alt_idx = args->chr_idx = args->from_idx = args->to_idx = -1;
      args->set_ids_replace = 1;
      int regions_is_file = 0;
@@ -1673,6 +1815,7 @@ int main_vcfannotate(int argc, char *argv[])
          {"header-lines",required_argument,NULL,'h'},
          {"samples",required_argument,NULL,'s'},
          {"samples-file",required_argument,NULL,'S'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      while ((c = getopt_long(argc, argv, "h:?o:O:r:R:a:x:c:i:e:S:s:I:m:",loptions,NULL)) >= 0)
@@ -1707,6 +1850,7 @@ int main_vcfannotate(int argc, char *argv[])
              case 'h': args->header_fname = optarg; break;
              case  1 : args->rename_chrs = optarg; break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case '?': usage(args); break;
              default: error("Unknown argument: %s\n", optarg);
          }
diff --git a/bcftools/vcfcall.c b/bcftools/vcfcall.c

index a28caee6afc9e6af9c17df6038184fa6d8e89a11..e5bbf11674a3e321cf58fa08dda9f16346851d5b 100644 (file)
--- a/bcftools/vcfcall.c
+++ b/bcftools/vcfcall.c
@@ -1,6 +1,6 @@
  /*  vcfcall.c -- SNP/indel variant calling from VCF/BCF.
  
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
  
      Author: Petr Danecek <pd3@sanger.ac.uk>
  
@@ -68,7 +68,7 @@ void error(const char *format, ...);
  typedef struct
  {
      int flag;   // combination of CF_* flags above
-    int output_type, n_threads;
+    int output_type, n_threads, record_cmd_line;
      htsFile *bcf_in, *out_fh;
      char *bcf_fname, *output_fname;
      char **samples;             // for subsampling and ploidy
@@ -175,6 +175,11 @@ static ploidy_predef_t ploidy_predefs[] =
            "*  * *     M 1\n"
            "*  * *     F 0\n"
      },
+    { .alias  = "1",
+      .about  = "Treat all samples as haploid",
+      .ploidy =
+          "*  * *     * 1\n"
+    },
      {
          .alias  = NULL,
          .about  = NULL,
@@ -381,7 +386,7 @@ static void init_data(args_t *args)
      if ( args->regions )
      {
          if ( bcf_sr_set_regions(args->aux.srs, args->regions, args->regions_is_file)<0 )
-            error("Failed to read the targets: %s\n", args->regions);
+            error("Failed to read the regions: %s\n", args->regions);
      }
  
      if ( !bcf_sr_add_reader(args->aux.srs, args->bcf_fname) ) error("Failed to open %s: %s\n", args->bcf_fname,bcf_sr_strerror(args->aux.srs->errnum));
@@ -396,9 +401,21 @@ static void init_data(args_t *args)
              if ( 3*args->aux.nfams!=args->nsamples ) error("Expected only trios in %s, sorry!\n", args->samples_fname);
              fprintf(stderr,"Detected %d samples in %d trio families\n", args->nsamples,args->aux.nfams);
          }
+    }
+    if ( args->ploidy  )
+    {
          args->nsex = ploidy_nsex(args->ploidy);
          args->sex2ploidy = (int*) calloc(args->nsex,sizeof(int));
          args->sex2ploidy_prev = (int*) calloc(args->nsex,sizeof(int));
+        if ( !args->nsamples )
+        {
+            args->nsamples = bcf_hdr_nsamples(args->aux.hdr);
+            args->sample2sex = (int*) malloc(sizeof(int)*args->nsamples);
+            for (i=0; i<args->nsamples; i++) args->sample2sex[i] = 0;
+        }
+    }
+    if ( args->nsamples )
+    {
          args->aux.ploidy = (uint8_t*) malloc(args->nsamples);
          for (i=0; i<args->nsamples; i++) args->aux.ploidy[i] = 2;
          for (i=0; i<args->nsex; i++) args->sex2ploidy_prev[i] = 2;
@@ -418,9 +435,12 @@ static void init_data(args_t *args)
      else
      {
          args->aux.hdr = bcf_hdr_dup(bcf_sr_get_header(args->aux.srs,0));
-        for (i=0; i<args->nsamples; i++)
-            if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
-                error("No such sample: %s\n", args->samples[i]);
+        if ( args->samples )
+        {
+            for (i=0; i<args->nsamples; i++)
+                if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
+                    error("No such sample: %s\n", args->samples[i]);
+        }
      }
  
      args->out_fh = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
@@ -439,7 +459,7 @@ static void init_data(args_t *args)
      bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "QS");
      bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "I16");
  
-    bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
      bcf_hdr_write(args->out_fh, args->aux.hdr);
  
      if ( args->flag&CF_INS_MISSED ) init_missed_line(args);
@@ -451,7 +471,10 @@ static void destroy_data(args_t *args)
      else if ( args->flag & CF_MCALL ) mcall_destroy(&args->aux);
      else if ( args->flag & CF_QCALL ) qcall_destroy(&args->aux);
      int i;
-    for (i=0; i<args->nsamples; i++) free(args->samples[i]);
+    if ( args->samples )
+    {
+        for (i=0; i<args->nsamples; i++) free(args->samples[i]);
+    }
      if ( args->aux.fams )
      {
          for (i=0; i<args->aux.nfams; i++) free(args->aux.fams[i].name);
@@ -579,6 +602,7 @@ static void usage(args_t *args)
      fprintf(stderr, "Usage:   bcftools call [options] <in.vcf.gz>\n");
      fprintf(stderr, "\n");
      fprintf(stderr, "File format options:\n");
+    fprintf(stderr, "       --no-version                do not append version and command line to the header\n");
      fprintf(stderr, "   -o, --output <file>             write output to a file [standard output]\n");
      fprintf(stderr, "   -O, --output-type <b|u|z|v>     output type: 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
      fprintf(stderr, "       --ploidy <assembly>[?]      predefined ploidy, 'list' to print available settings, append '?' for details\n");
@@ -634,6 +658,7 @@ int main_vcfcall(int argc, char *argv[])
      args.output_fname   = "-";
      args.output_type    = FT_VCF;
      args.n_threads = 0;
+    args.record_cmd_line = 1;
      args.aux.trio_Pm_SNPs = 1 - 1e-8;
      args.aux.trio_Pm_ins  = args.aux.trio_Pm_del  = 1 - 1e-9;
  
@@ -668,6 +693,7 @@ int main_vcfcall(int argc, char *argv[])
          {"ploidy-file",required_argument,NULL,2},
          {"chromosome-X",no_argument,NULL,'X'},
          {"chromosome-Y",no_argument,NULL,'Y'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
  
@@ -727,6 +753,7 @@ int main_vcfcall(int argc, char *argv[])
              case 's': args.samples_fname = optarg; break;
              case 'S': args.samples_fname = optarg; args.samples_is_file = 1; break;
              case  9 : args.n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args.record_cmd_line = 0; break;
              default: usage(&args);
          }
      }
diff --git a/bcftools/vcfcall.c.pysam.c b/bcftools/vcfcall.c.pysam.c

index 9e8c1bb98a48627ac42dd87b5db585262baf70af..8e59fd98531edd36dddb3c9fd68ed20c0e81dad3 100644 (file)
--- a/bcftools/vcfcall.c.pysam.c
+++ b/bcftools/vcfcall.c.pysam.c
@@ -2,7 +2,7 @@
  
  /*  vcfcall.c -- SNP/indel variant calling from VCF/BCF.
  
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
  
      Author: Petr Danecek <pd3@sanger.ac.uk>
  
@@ -70,7 +70,7 @@ void error(const char *format, ...);
  typedef struct
  {
      int flag;   // combination of CF_* flags above
-    int output_type, n_threads;
+    int output_type, n_threads, record_cmd_line;
      htsFile *bcf_in, *out_fh;
      char *bcf_fname, *output_fname;
      char **samples;             // for subsampling and ploidy
@@ -177,6 +177,11 @@ static ploidy_predef_t ploidy_predefs[] =
            "*  * *     M 1\n"
            "*  * *     F 0\n"
      },
+    { .alias  = "1",
+      .about  = "Treat all samples as haploid",
+      .ploidy =
+          "*  * *     * 1\n"
+    },
      {
          .alias  = NULL,
          .about  = NULL,
@@ -290,7 +295,7 @@ static void set_samples(args_t *args, const char *fn, int is_file)
          char x = *se, *xptr = se; *se = 0;
  
          int ismpl = bcf_hdr_id2int(args->aux.hdr, BCF_DT_SAMPLE, ss);
-        if ( ismpl < 0 ) { fprintf(pysamerr,"Warning: No such sample in the VCF: %s\n",ss); continue; }
+        if ( ismpl < 0 ) { fprintf(pysam_stderr,"Warning: No such sample in the VCF: %s\n",ss); continue; }
  
          ss = se+1;
          while ( *ss && isspace(*ss) ) ss++;
@@ -383,7 +388,7 @@ static void init_data(args_t *args)
      if ( args->regions )
      {
          if ( bcf_sr_set_regions(args->aux.srs, args->regions, args->regions_is_file)<0 )
-            error("Failed to read the targets: %s\n", args->regions);
+            error("Failed to read the regions: %s\n", args->regions);
      }
  
      if ( !bcf_sr_add_reader(args->aux.srs, args->bcf_fname) ) error("Failed to open %s: %s\n", args->bcf_fname,bcf_sr_strerror(args->aux.srs->errnum));
@@ -396,11 +401,23 @@ static void init_data(args_t *args)
          if ( args->aux.flag&CALL_CONSTR_TRIO )
          {
              if ( 3*args->aux.nfams!=args->nsamples ) error("Expected only trios in %s, sorry!\n", args->samples_fname);
-            fprintf(pysamerr,"Detected %d samples in %d trio families\n", args->nsamples,args->aux.nfams);
+            fprintf(pysam_stderr,"Detected %d samples in %d trio families\n", args->nsamples,args->aux.nfams);
          }
+    }
+    if ( args->ploidy  )
+    {
          args->nsex = ploidy_nsex(args->ploidy);
          args->sex2ploidy = (int*) calloc(args->nsex,sizeof(int));
          args->sex2ploidy_prev = (int*) calloc(args->nsex,sizeof(int));
+        if ( !args->nsamples )
+        {
+            args->nsamples = bcf_hdr_nsamples(args->aux.hdr);
+            args->sample2sex = (int*) malloc(sizeof(int)*args->nsamples);
+            for (i=0; i<args->nsamples; i++) args->sample2sex[i] = 0;
+        }
+    }
+    if ( args->nsamples )
+    {
          args->aux.ploidy = (uint8_t*) malloc(args->nsamples);
          for (i=0; i<args->nsamples; i++) args->aux.ploidy[i] = 2;
          for (i=0; i<args->nsex; i++) args->sex2ploidy_prev[i] = 2;
@@ -420,9 +437,12 @@ static void init_data(args_t *args)
      else
      {
          args->aux.hdr = bcf_hdr_dup(bcf_sr_get_header(args->aux.srs,0));
-        for (i=0; i<args->nsamples; i++)
-            if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
-                error("No such sample: %s\n", args->samples[i]);
+        if ( args->samples )
+        {
+            for (i=0; i<args->nsamples; i++)
+                if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
+                    error("No such sample: %s\n", args->samples[i]);
+        }
      }
  
      args->out_fh = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
@@ -441,7 +461,7 @@ static void init_data(args_t *args)
      bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "QS");
      bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "I16");
  
-    bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
      bcf_hdr_write(args->out_fh, args->aux.hdr);
  
      if ( args->flag&CF_INS_MISSED ) init_missed_line(args);
@@ -453,7 +473,10 @@ static void destroy_data(args_t *args)
      else if ( args->flag & CF_MCALL ) mcall_destroy(&args->aux);
      else if ( args->flag & CF_QCALL ) qcall_destroy(&args->aux);
      int i;
-    for (i=0; i<args->nsamples; i++) free(args->samples[i]);
+    if ( args->samples )
+    {
+        for (i=0; i<args->nsamples; i++) free(args->samples[i]);
+    }
      if ( args->aux.fams )
      {
          for (i=0; i<args->aux.nfams; i++) free(args->aux.fams[i].name);
@@ -507,7 +530,7 @@ static int parse_format_flag(const char *str)
          else if ( !strncasecmp(ss,"GP",se-ss) ) flag |= CALL_FMT_GP;
          else
          {
-            fprintf(pysamerr,"Could not parse \"%s\"\n", str);
+            fprintf(pysam_stderr,"Could not parse \"%s\"\n", str);
              exit(1);
          }
          if ( !*se ) break;
@@ -548,23 +571,23 @@ ploidy_t *init_ploidy(char *alias)
  
      if ( !pld->alias )
      {
-        fprintf(pysamerr,"Predefined ploidies:\n");
+        fprintf(pysam_stderr,"Predefined ploidies:\n");
          pld = ploidy_predefs;
          while ( pld->alias )
          {
-            fprintf(pysamerr,"%s\n   .. %s\n\n", pld->alias,pld->about);
+            fprintf(pysam_stderr,"%s\n   .. %s\n\n", pld->alias,pld->about);
              if ( detailed )
-                fprintf(pysamerr,"%s\n", pld->ploidy);
+                fprintf(pysam_stderr,"%s\n", pld->ploidy);
              pld++;
          }
-        fprintf(pysamerr,"Run as --ploidy <alias> (e.g. --ploidy GRCh37).\n");
-        fprintf(pysamerr,"To see the detailed ploidy definition, append a question mark (e.g. --ploidy GRCh37?).\n");
-        fprintf(pysamerr,"\n");
+        fprintf(pysam_stderr,"Run as --ploidy <alias> (e.g. --ploidy GRCh37).\n");
+        fprintf(pysam_stderr,"To see the detailed ploidy definition, append a question mark (e.g. --ploidy GRCh37?).\n");
+        fprintf(pysam_stderr,"\n");
          exit(-1);
      }
      else if ( detailed )
      {
-        fprintf(pysamerr,"%s", pld->ploidy);
+        fprintf(pysam_stderr,"%s", pld->ploidy);
          exit(-1);
      }
      return ploidy_init_string(pld->ploidy,2);
@@ -572,51 +595,52 @@ ploidy_t *init_ploidy(char *alias)
  
  static void usage(args_t *args)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   SNP/indel variant calling from VCF/BCF. To be used in conjunction with samtools mpileup.\n");
-    fprintf(pysamerr, "         This command replaces the former \"bcftools view\" caller. Some of the original\n");
-    fprintf(pysamerr, "         functionality has been temporarily lost in the process of transition to htslib,\n");
-    fprintf(pysamerr, "         but will be added back on popular demand. The original calling model can be\n");
-    fprintf(pysamerr, "         invoked with the -c option.\n");
-    fprintf(pysamerr, "Usage:   bcftools call [options] <in.vcf.gz>\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "File format options:\n");
-    fprintf(pysamerr, "   -o, --output <file>             write output to a file [standard output]\n");
-    fprintf(pysamerr, "   -O, --output-type <b|u|z|v>     output type: 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
-    fprintf(pysamerr, "       --ploidy <assembly>[?]      predefined ploidy, 'list' to print available settings, append '?' for details\n");
-    fprintf(pysamerr, "       --ploidy-file <file>        space/tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY\n");
-    fprintf(pysamerr, "   -r, --regions <region>          restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "   -R, --regions-file <file>       restrict to regions listed in a file\n");
-    fprintf(pysamerr, "   -s, --samples <list>            list of samples to include [all samples]\n");
-    fprintf(pysamerr, "   -S, --samples-file <file>       PED file or a file with an optional column with sex (see man page for details) [all samples]\n");
-    fprintf(pysamerr, "   -t, --targets <region>          similar to -r but streams rather than index-jumps\n");
-    fprintf(pysamerr, "   -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n");
-    fprintf(pysamerr, "       --threads <int>             number of extra output compression threads [0]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Input/output options:\n");
-    fprintf(pysamerr, "   -A, --keep-alts                 keep all possible alternate alleles at variant sites\n");
-    fprintf(pysamerr, "   -f, --format-fields <list>      output format fields: GQ,GP (lowercase allowed) []\n");
-    fprintf(pysamerr, "   -g, --gvcf <int>,[...]          group non-variant sites into gVCF blocks by minimum per-sample DP\n");
-    fprintf(pysamerr, "   -i, --insert-missed             output also sites missed by mpileup but present in -T\n");
-    fprintf(pysamerr, "   -M, --keep-masked-ref           keep sites with masked reference allele (REF=N)\n");
-    fprintf(pysamerr, "   -V, --skip-variants <type>      skip indels/snps\n");
-    fprintf(pysamerr, "   -v, --variants-only             output variant sites only\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Consensus/variant calling options:\n");
-    fprintf(pysamerr, "   -c, --consensus-caller          the original calling method (conflicts with -m)\n");
-    fprintf(pysamerr, "   -C, --constrain <str>           one of: alleles, trio (see manual)\n");
-    fprintf(pysamerr, "   -m, --multiallelic-caller       alternative model for multiallelic and rare-variant calling (conflicts with -c)\n");
-    fprintf(pysamerr, "   -n, --novel-rate <float>,[...]  likelihood of novel mutation for constrained trio calling, see man page for details [1e-8,1e-9,1e-9]\n");
-    fprintf(pysamerr, "   -p, --pval-threshold <float>    variant if P(ref|D)<FLOAT with -c [0.5]\n");
-    fprintf(pysamerr, "   -P, --prior <float>             mutation rate (use bigger for greater sensitivity) [1.1e-3]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   SNP/indel variant calling from VCF/BCF. To be used in conjunction with samtools mpileup.\n");
+    fprintf(pysam_stderr, "         This command replaces the former \"bcftools view\" caller. Some of the original\n");
+    fprintf(pysam_stderr, "         functionality has been temporarily lost in the process of transition to htslib,\n");
+    fprintf(pysam_stderr, "         but will be added back on popular demand. The original calling model can be\n");
+    fprintf(pysam_stderr, "         invoked with the -c option.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools call [options] <in.vcf.gz>\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "File format options:\n");
+    fprintf(pysam_stderr, "       --no-version                do not append version and command line to the header\n");
+    fprintf(pysam_stderr, "   -o, --output <file>             write output to a file [standard output]\n");
+    fprintf(pysam_stderr, "   -O, --output-type <b|u|z|v>     output type: 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+    fprintf(pysam_stderr, "       --ploidy <assembly>[?]      predefined ploidy, 'list' to print available settings, append '?' for details\n");
+    fprintf(pysam_stderr, "       --ploidy-file <file>        space/tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY\n");
+    fprintf(pysam_stderr, "   -r, --regions <region>          restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "   -R, --regions-file <file>       restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "   -s, --samples <list>            list of samples to include [all samples]\n");
+    fprintf(pysam_stderr, "   -S, --samples-file <file>       PED file or a file with an optional column with sex (see man page for details) [all samples]\n");
+    fprintf(pysam_stderr, "   -t, --targets <region>          similar to -r but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "   -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "       --threads <int>             number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Input/output options:\n");
+    fprintf(pysam_stderr, "   -A, --keep-alts                 keep all possible alternate alleles at variant sites\n");
+    fprintf(pysam_stderr, "   -f, --format-fields <list>      output format fields: GQ,GP (lowercase allowed) []\n");
+    fprintf(pysam_stderr, "   -g, --gvcf <int>,[...]          group non-variant sites into gVCF blocks by minimum per-sample DP\n");
+    fprintf(pysam_stderr, "   -i, --insert-missed             output also sites missed by mpileup but present in -T\n");
+    fprintf(pysam_stderr, "   -M, --keep-masked-ref           keep sites with masked reference allele (REF=N)\n");
+    fprintf(pysam_stderr, "   -V, --skip-variants <type>      skip indels/snps\n");
+    fprintf(pysam_stderr, "   -v, --variants-only             output variant sites only\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Consensus/variant calling options:\n");
+    fprintf(pysam_stderr, "   -c, --consensus-caller          the original calling method (conflicts with -m)\n");
+    fprintf(pysam_stderr, "   -C, --constrain <str>           one of: alleles, trio (see manual)\n");
+    fprintf(pysam_stderr, "   -m, --multiallelic-caller       alternative model for multiallelic and rare-variant calling (conflicts with -c)\n");
+    fprintf(pysam_stderr, "   -n, --novel-rate <float>,[...]  likelihood of novel mutation for constrained trio calling, see man page for details [1e-8,1e-9,1e-9]\n");
+    fprintf(pysam_stderr, "   -p, --pval-threshold <float>    variant if P(ref|D)<FLOAT with -c [0.5]\n");
+    fprintf(pysam_stderr, "   -P, --prior <float>             mutation rate (use bigger for greater sensitivity) [1.1e-3]\n");
  
      // todo (and more)
-    // fprintf(pysamerr, "\nContrast calling and association test options:\n");
-    // fprintf(pysamerr, "       -1 INT    number of group-1 samples [0]\n");
-    // fprintf(pysamerr, "       -C FLOAT  posterior constrast for LRT<FLOAT and P(ref|D)<0.5 [%g]\n", args->aux.min_lrt);
-    // fprintf(pysamerr, "       -U INT    number of permutations for association testing (effective with -1) [0]\n");
-    // fprintf(pysamerr, "       -X FLOAT  only perform permutations for P(chi^2)<FLOAT [%g]\n", args->aux.min_perm_p);
-    fprintf(pysamerr, "\n");
+    // fprintf(pysam_stderr, "\nContrast calling and association test options:\n");
+    // fprintf(pysam_stderr, "       -1 INT    number of group-1 samples [0]\n");
+    // fprintf(pysam_stderr, "       -C FLOAT  posterior constrast for LRT<FLOAT and P(ref|D)<0.5 [%g]\n", args->aux.min_lrt);
+    // fprintf(pysam_stderr, "       -U INT    number of permutations for association testing (effective with -1) [0]\n");
+    // fprintf(pysam_stderr, "       -X FLOAT  only perform permutations for P(chi^2)<FLOAT [%g]\n", args->aux.min_perm_p);
+    fprintf(pysam_stderr, "\n");
      exit(-1);
  }
  
@@ -636,6 +660,7 @@ int main_vcfcall(int argc, char *argv[])
      args.output_fname   = "-";
      args.output_type    = FT_VCF;
      args.n_threads = 0;
+    args.record_cmd_line = 1;
      args.aux.trio_Pm_SNPs = 1 - 1e-8;
      args.aux.trio_Pm_ins  = args.aux.trio_Pm_del  = 1 - 1e-9;
  
@@ -670,6 +695,7 @@ int main_vcfcall(int argc, char *argv[])
          {"ploidy-file",required_argument,NULL,2},
          {"chromosome-X",no_argument,NULL,'X'},
          {"chromosome-Y",no_argument,NULL,'Y'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
  
@@ -680,8 +706,8 @@ int main_vcfcall(int argc, char *argv[])
          {
              case  2 : ploidy_fname = optarg; break;
              case  1 : ploidy = optarg; break;
-            case 'X': ploidy = "X"; fprintf(pysamerr,"Warning: -X will be deprecated, please use --ploidy instead.\n"); break;
-            case 'Y': ploidy = "Y"; fprintf(pysamerr,"Warning: -Y will be deprecated, please use --ploidy instead.\n"); break;
+            case 'X': ploidy = "X"; fprintf(pysam_stderr,"Warning: -X will be deprecated, please use --ploidy instead.\n"); break;
+            case 'Y': ploidy = "Y"; fprintf(pysam_stderr,"Warning: -Y will be deprecated, please use --ploidy instead.\n"); break;
              case 'f': args.aux.output_tags |= parse_format_flag(optarg); break;
              case 'M': args.flag &= ~CF_ACGT_ONLY; break;     // keep sites where REF is N
              case 'N': args.flag |= CF_ACGT_ONLY; break;      // omit sites where first base in REF is N (the new default)
@@ -729,6 +755,7 @@ int main_vcfcall(int argc, char *argv[])
              case 's': args.samples_fname = optarg; break;
              case 'S': args.samples_fname = optarg; args.samples_is_file = 1; break;
              case  9 : args.n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args.record_cmd_line = 0; break;
              default: usage(&args);
          }
      }
@@ -745,7 +772,7 @@ int main_vcfcall(int argc, char *argv[])
  
      if ( !ploidy_fname && !ploidy )
      {
-        fprintf(pysamerr,"Note: Neither --ploidy nor --ploidy-file given, assuming all sites are diploid\n");
+        fprintf(pysam_stderr,"Note: Neither --ploidy nor --ploidy-file given, assuming all sites are diploid\n");
          args.ploidy = ploidy_init_string("",2);
      }
  
diff --git a/bcftools/vcfcnv.c.pysam.c b/bcftools/vcfcnv.c.pysam.c

index d8a1ca51dee19f837ca667763e61f0e5ec91d031..10a00b9b97692ac6302c8425ac371d815cb8c7f9 100644 (file)
--- a/bcftools/vcfcnv.c.pysam.c
+++ b/bcftools/vcfcnv.c.pysam.c
@@ -267,7 +267,7 @@ static void init_data(args_t *args)
      args->hmm = hmm_init(args->nstates, args->tprob, 10000);
      hmm_init_states(args->hmm, args->iprobs);
  
-    args->summary_fh = stdout;
+    args->summary_fh = pysam_stdout;
      if ( args->output_dir )
      {
          init_sample_files(&args->query_sample, args->output_dir);
@@ -306,7 +306,7 @@ static void py_plot_cnv(char *script, float th)
  
      char *cmd = msprintf("python %s -p %f", script, th);
      int ret = system(cmd);
-    if ( ret) fprintf(pysamerr, "The command returned non-zero status %d: %s\n", ret, cmd);
+    if ( ret) fprintf(pysam_stderr, "The command returned non-zero status %d: %s\n", ret, cmd);
      free(cmd);
  }
  
@@ -641,7 +641,7 @@ static int set_observed_prob(args_t *args, sample_t *smpl, int isite)
      cn3_baf /= norm;
  
      #if DBG0
-    if ( args->verbose ) fprintf(pysamerr,"%f\t%f %f %f\n", baf,cn1_baf,cn2_baf,cn3_baf);
+    if ( args->verbose ) fprintf(pysam_stderr,"%f\t%f %f %f\n", baf,cn1_baf,cn2_baf,cn3_baf);
      #endif
  
      double cn1_lrr = exp(-(lrr + 0.45)*(lrr + 0.45)/smpl->lrr_dev2);
@@ -866,7 +866,7 @@ static int update_sample_args(args_t *args, sample_t *smpl, int ismpl)
      baf_AA_dev2 /= norm_baf_AA_dev2;
      if ( baf_dev2 < baf_AA_dev2 )  baf_dev2 = baf_AA_dev2;
      double max_mean_cn3 = 0.5 - sqrt(baf_dev2)*1.644854;    // R: qnorm(0.95)=1.644854
-    //fprintf(pysamerr,"dev=%f  AA_dev=%f  max_mean_cn3=%f  mean_cn3=%f\n", baf_dev2,baf_AA_dev2,max_mean_cn3,mean_cn3);
+    //fprintf(pysam_stderr,"dev=%f  AA_dev=%f  max_mean_cn3=%f  mean_cn3=%f\n", baf_dev2,baf_AA_dev2,max_mean_cn3,mean_cn3);
      assert( max_mean_cn3>0 );
  
      double new_frac = 1./mean_cn3 - 2;
@@ -936,13 +936,13 @@ static void cnv_flush_viterbi(args_t *args)
      if ( args->optimize_frac )
      {
          int niter = 0;
-        fprintf(pysamerr,"Attempting to estimate the fraction of aberrant cells (chr %s):\n", bcf_hdr_id2name(args->hdr,args->prev_rid));
+        fprintf(pysam_stderr,"Attempting to estimate the fraction of aberrant cells (chr %s):\n", bcf_hdr_id2name(args->hdr,args->prev_rid));
          do
          {
-            fprintf(pysamerr,"\t.. %f %f", args->query_sample.cell_frac,args->query_sample.baf_dev2);
+            fprintf(pysam_stderr,"\t.. %f %f", args->query_sample.cell_frac,args->query_sample.baf_dev2);
              if ( args->control_sample.name )
-                fprintf(pysamerr,"\t.. %f %f", args->control_sample.cell_frac,args->control_sample.baf_dev2);
-            fprintf(pysamerr,"\n");
+                fprintf(pysam_stderr,"\t.. %f %f", args->control_sample.cell_frac,args->control_sample.baf_dev2);
+            fprintf(pysam_stderr,"\n");
              set_emission_probs(args);
              hmm_run_fwd_bwd(hmm, args->nsites, args->eprob, args->sites);
          }
@@ -958,10 +958,10 @@ static void cnv_flush_viterbi(args_t *args)
              if ( args->control_sample.name ) set_gauss_params(args, &args->control_sample);
          }
  
-        fprintf(pysamerr,"\t.. %f %f", args->query_sample.cell_frac,args->query_sample.baf_dev2);
+        fprintf(pysam_stderr,"\t.. %f %f", args->query_sample.cell_frac,args->query_sample.baf_dev2);
          if ( args->control_sample.name )
-            fprintf(pysamerr,"\t.. %f %f", args->control_sample.cell_frac,args->control_sample.baf_dev2);
-        fprintf(pysamerr,"\n");
+            fprintf(pysam_stderr,"\t.. %f %f", args->control_sample.cell_frac,args->control_sample.baf_dev2);
+        fprintf(pysam_stderr,"\n");
      }
      set_emission_probs(args);
  
@@ -971,7 +971,7 @@ static void cnv_flush_viterbi(args_t *args)
          double ori_ii = avg_ii_prob(nstates,hmm_get_tprob(hmm));
          hmm_run_baum_welch(hmm, args->nsites, args->eprob, args->sites);
          double new_ii = avg_ii_prob(nstates,hmm_get_tprob(hmm));
-        fprintf(pysamerr,"%e\t%e\t%e\n", ori_ii,new_ii,new_ii-ori_ii);
+        fprintf(pysam_stderr,"%e\t%e\t%e\n", ori_ii,new_ii,new_ii-ori_ii);
          double *tprob = init_tprob_matrix(nstates, 1-new_ii, args->same_prob);
          hmm_set_tprob(args->hmm, tprob, 10000);
          double *tprob_arr = hmm_get_tprob(hmm);
@@ -983,9 +983,9 @@ static void cnv_flush_viterbi(args_t *args)
              {
                  for (j=0; j<nstates; j++)
                  {
-                    printf(" %.15f", MAT(tprob_arr,nstates,j,i));
+                    fprintf(pysam_stdout, " %.15f", MAT(tprob_arr,nstates,j,i));
                  }
-                printf("\n");
+                fprintf(pysam_stdout, "\n");
              }
              break;
          }
@@ -1176,33 +1176,33 @@ static void cnv_next_line(args_t *args, bcf1_t *line)
  
  static void usage(args_t *args)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Copy number variation caller, requires Illumina's B-allele frequency (BAF) and Log R\n");
-    fprintf(pysamerr, "         Ratio intensity (LRR). The HMM considers the following copy number states: CN 2\n");
-    fprintf(pysamerr, "         (normal), 1 (single-copy loss), 0 (complete loss), 3 (single-copy gain)\n");
-    fprintf(pysamerr, "Usage:   bcftools cnv [OPTIONS] <file.vcf>\n");
-    fprintf(pysamerr, "General Options:\n");
-    fprintf(pysamerr, "    -c, --control-sample <string>      optional control sample name to highlight differences\n");
-    fprintf(pysamerr, "    -f, --AF-file <file>               read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
-    fprintf(pysamerr, "    -o, --output-dir <path>            \n");
-    fprintf(pysamerr, "    -p, --plot-threshold <float>       plot aberrant chromosomes with quality at least 'float'\n");
-    fprintf(pysamerr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
-    fprintf(pysamerr, "    -s, --query-sample <string>        query samply name\n");
-    fprintf(pysamerr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
-    fprintf(pysamerr, "HMM Options:\n");
-    fprintf(pysamerr, "    -a, --aberrant <float[,float]>     fraction of aberrant cells in query and control [1.0,1.0]\n");
-    fprintf(pysamerr, "    -b, --BAF-weight <float>           relative contribution from BAF [1]\n");
-    fprintf(pysamerr, "    -d, --BAF-dev <float[,float]>      expected BAF deviation in query and control [0.04,0.04]\n"); // experimental
-    fprintf(pysamerr, "    -e, --err-prob <float>             uniform error probability [1e-4]\n");
-    fprintf(pysamerr, "    -k, --LRR-dev <float[,float]>      expected LRR deviation [0.2,0.2]\n"); // experimental
-    fprintf(pysamerr, "    -l, --LRR-weight <float>           relative contribution from LRR [0.2]\n");
-    fprintf(pysamerr, "    -L, --LRR-smooth-win <int>         window of LRR moving average smoothing [10]\n");
-    fprintf(pysamerr, "    -O, --optimize <float>             estimate fraction of aberrant cells down to <float> [1.0]\n");
-    fprintf(pysamerr, "    -P, --same-prob <float>            prior probability of -s/-c being the same [0.5]\n");
-    fprintf(pysamerr, "    -x, --xy-prob <float>              P(x|y) transition probability [1e-9]\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Copy number variation caller, requires Illumina's B-allele frequency (BAF) and Log R\n");
+    fprintf(pysam_stderr, "         Ratio intensity (LRR). The HMM considers the following copy number states: CN 2\n");
+    fprintf(pysam_stderr, "         (normal), 1 (single-copy loss), 0 (complete loss), 3 (single-copy gain)\n");
+    fprintf(pysam_stderr, "Usage:   bcftools cnv [OPTIONS] <file.vcf>\n");
+    fprintf(pysam_stderr, "General Options:\n");
+    fprintf(pysam_stderr, "    -c, --control-sample <string>      optional control sample name to highlight differences\n");
+    fprintf(pysam_stderr, "    -f, --AF-file <file>               read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
+    fprintf(pysam_stderr, "    -o, --output-dir <path>            \n");
+    fprintf(pysam_stderr, "    -p, --plot-threshold <float>       plot aberrant chromosomes with quality at least 'float'\n");
+    fprintf(pysam_stderr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "    -s, --query-sample <string>        query samply name\n");
+    fprintf(pysam_stderr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "HMM Options:\n");
+    fprintf(pysam_stderr, "    -a, --aberrant <float[,float]>     fraction of aberrant cells in query and control [1.0,1.0]\n");
+    fprintf(pysam_stderr, "    -b, --BAF-weight <float>           relative contribution from BAF [1]\n");
+    fprintf(pysam_stderr, "    -d, --BAF-dev <float[,float]>      expected BAF deviation in query and control [0.04,0.04]\n"); // experimental
+    fprintf(pysam_stderr, "    -e, --err-prob <float>             uniform error probability [1e-4]\n");
+    fprintf(pysam_stderr, "    -k, --LRR-dev <float[,float]>      expected LRR deviation [0.2,0.2]\n"); // experimental
+    fprintf(pysam_stderr, "    -l, --LRR-weight <float>           relative contribution from LRR [0.2]\n");
+    fprintf(pysam_stderr, "    -L, --LRR-smooth-win <int>         window of LRR moving average smoothing [10]\n");
+    fprintf(pysam_stderr, "    -O, --optimize <float>             estimate fraction of aberrant cells down to <float> [1.0]\n");
+    fprintf(pysam_stderr, "    -P, --same-prob <float>            prior probability of -s/-c being the same [0.5]\n");
+    fprintf(pysam_stderr, "    -x, --xy-prob <float>              P(x|y) transition probability [1e-9]\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -1379,7 +1379,7 @@ int main_vcfcnv(int argc, char *argv[])
      }
      cnv_next_line(args, NULL);
      create_plots(args);
-    fprintf(pysamerr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
+    fprintf(pysam_stderr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
      destroy_data(args);
      free(args);
      return 0;
diff --git a/bcftools/vcfconcat.c b/bcftools/vcfconcat.c

index cfec7c01addeff4d4035dc21404a7b8c2645a764..bd6a00a25d22280927ede55dc7bfdb04362d88f2 100644 (file)
--- a/bcftools/vcfconcat.c
+++ b/bcftools/vcfconcat.c
@@ -31,13 +31,15 @@ THE SOFTWARE.  */
  #include <htslib/vcf.h>
  #include <htslib/synced_bcf_reader.h>
  #include <htslib/kseq.h>
+#include <htslib/bgzf.h>
+#include <htslib/tbx.h> // for hts_get_bgzfp()
  #include "bcftools.h"
  
  typedef struct _args_t
  {
      bcf_srs_t *files;
      htsFile *out_fh;
-    int output_type, n_threads;
+    int output_type, n_threads, record_cmd_line;
      bcf_hdr_t *out_hdr;
      int *seen_seq;
  
@@ -50,7 +52,7 @@ typedef struct _args_t
  
      char **argv, *output_fname, *file_list, **fnames, *remove_dups, *regions_list;
      int argc, nfnames, allow_overlaps, phased_concat, regions_is_file;
-    int compact_PS, phase_set_changed;
+    int compact_PS, phase_set_changed, naive_concat;
  }
  args_t;
  
@@ -106,7 +108,7 @@ static void init_data(args_t *args)
          bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PQ,Number=1,Type=Integer,Description=\"Phasing Quality (bigger is better)\">");
          bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PS,Number=1,Type=Integer,Description=\"Phase Set\">");
      }
-    bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_concat");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_concat");
      args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
      if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
      if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
@@ -176,8 +178,11 @@ static void destroy_data(args_t *args)
      for (i=0; i<args->nfnames; i++) free(args->fnames[i]);
      free(args->fnames);
      if ( args->files ) bcf_sr_destroy(args->files);
-    if ( hts_close(args->out_fh)!=0 ) error("hts_close error\n");
-    bcf_hdr_destroy(args->out_hdr);
+    if ( args->out_fh )
+    {
+        if ( hts_close(args->out_fh)!=0 ) error("hts_close error\n");
+    }
+    if ( args->out_hdr ) bcf_hdr_destroy(args->out_hdr);
      free(args->seen_seq);
      free(args->start_pos);
      free(args->swap_phase);
@@ -550,6 +555,108 @@ static void concat(args_t *args)
      }
  }
  
+static void naive_concat(args_t *args)
+{
+    // only compressed BCF atm
+    BGZF *bgzf_out = bgzf_open(args->output_fname,"w");;
+
+    const size_t page_size = 32768;
+    char *buf = (char*) malloc(page_size);
+    kstring_t tmp = {0,0,0};
+    int i;
+    for (i=0; i<args->nfnames; i++)
+    {
+        htsFile *hts_fp = hts_open(args->fnames[i],"r");
+        if ( !hts_fp ) error("Failed to open: %s\n", args->fnames[i]);
+        htsFormat type = *hts_get_format(hts_fp);
+
+        if ( type.format==vcf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
+        if ( type.compression!=bgzf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
+
+        BGZF *fp = hts_get_bgzfp(hts_fp);
+        if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length )
+            error("Failed to read %s: %s\n", args->fnames[i], strerror(errno));
+
+        uint8_t magic[5];
+        if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+        if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]);
+
+        if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+        hts_expand(char,tmp.l,tmp.m,tmp.s);
+        if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+
+        // write only the first header
+        if ( i==0 )
+        {
+            if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname);
+            if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname);
+            if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname);
+        }
+
+        // Output all non-header data that were read together with the header block
+        int nskip = fp->block_offset;
+        if ( fp->block_length - nskip > 0 )
+        {
+            if ( bgzf_write(bgzf_out, fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode);
+        }
+        if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode);
+
+
+        // Stream the rest of the file as it is, without recompressing, but remove BGZF EOF blocks
+        ssize_t nread, ncached = 0, nwr;
+        const int neof = 28;
+        char cached[neof];
+        while (1)
+        {
+            nread = bgzf_raw_read(fp, buf, page_size);
+
+            // page_size boundary may occur in the middle of the EOF block, so we need to cache the blocks' ends
+            if ( nread<=0 ) break;
+            if ( nread<=neof )      // last block
+            {
+                if ( ncached )
+                {
+                    // flush the part of the cache that won't be needed
+                    nwr = bgzf_raw_write(bgzf_out, cached, nread);
+                    if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
+
+                    // make space in the cache so that we can append to the end
+                    if ( nread!=neof ) memmove(cached,cached+nread,neof-nread);
+                }
+
+                // fill the cache and check for eof outside this loop
+                memcpy(cached+neof-nread,buf,nread);
+                break;
+            }
+
+            // not the last block, flush the cache if full
+            if ( ncached )
+            {
+                nwr = bgzf_raw_write(bgzf_out, cached, ncached);
+                if (nwr != ncached) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)ncached);
+                ncached = 0;
+            }
+
+            // fill the cache
+            nread -= neof;
+            memcpy(cached,buf+nread,neof);
+            ncached = neof;
+
+            nwr = bgzf_raw_write(bgzf_out, buf, nread);
+            if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
+        }
+        if ( ncached && memcmp(cached,"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0",neof) )
+        {
+            nwr = bgzf_raw_write(bgzf_out, cached, neof);
+            if (nwr != neof) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)neof);
+        }
+        if (hts_close(hts_fp)) error("Close failed: %s\n",args->fnames[i]);
+    }
+    free(buf);
+    free(tmp.s);
+    if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
+}
+
  static void usage(args_t *args)
  {
      fprintf(stderr, "\n");
@@ -558,7 +665,9 @@ static void usage(args_t *args)
      fprintf(stderr, "         concatenate chromosome VCFs into one VCF, or combine a SNP VCF and an indel\n");
      fprintf(stderr, "         VCF into one. The input files must be sorted by chr and position. The files\n");
      fprintf(stderr, "         must be given in the correct order to produce sorted VCF on output unless\n");
-    fprintf(stderr, "         the -a, --allow-overlaps option is specified.\n");
+    fprintf(stderr, "         the -a, --allow-overlaps option is specified. With the --naive option, the files\n");
+    fprintf(stderr, "         are concatenated without being recompressed, which is very fast but dangerous\n");
+    fprintf(stderr, "         if the BCF headers differ.\n");
      fprintf(stderr, "Usage:   bcftools concat [options] <A.vcf.gz> [<B.vcf.gz> [...]]\n");
      fprintf(stderr, "\n");
      fprintf(stderr, "Options:\n");
@@ -568,6 +677,8 @@ static void usage(args_t *args)
      fprintf(stderr, "   -D, --remove-duplicates        Alias for -d none\n");
      fprintf(stderr, "   -f, --file-list <file>         Read the list of files from a file.\n");
      fprintf(stderr, "   -l, --ligate                   Ligate phased VCFs by matching phase at overlapping haplotypes\n");
+    fprintf(stderr, "       --no-version               do not append version and command line to the header\n");
+    fprintf(stderr, "   -n, --naive                    Concatenate BCF files without recompression (dangerous, use with caution)\n");
      fprintf(stderr, "   -o, --output <file>            Write output to a file [standard output]\n");
      fprintf(stderr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
      fprintf(stderr, "   -q, --min-PQ <int>             Break phase set if phasing quality is lower than <int> [30]\n");
@@ -586,10 +697,12 @@ int main_vcfconcat(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      args->min_PQ  = 30;
  
      static struct option loptions[] =
      {
+        {"naive",no_argument,NULL,'n'},
          {"compact-PS",no_argument,NULL,'c'},
          {"regions",required_argument,NULL,'r'},
          {"regions-file",required_argument,NULL,'R'},
@@ -602,10 +715,11 @@ int main_vcfconcat(int argc, char *argv[])
          {"threads",required_argument,NULL,9},
          {"file-list",required_argument,NULL,'f'},
          {"min-PQ",required_argument,NULL,'q'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      char *tmp;
-    while ((c = getopt_long(argc, argv, "h:?o:O:f:alq:Dd:r:R:c",loptions,NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "h:?o:O:f:alq:Dd:r:R:cn",loptions,NULL)) >= 0)
      {
          switch (c) {
              case 'c': args->compact_PS = 1; break;
@@ -617,6 +731,7 @@ int main_vcfconcat(int argc, char *argv[])
                  args->min_PQ = strtol(optarg,&tmp,10);
                  if ( *tmp ) error("Could not parse argument: --min-PQ %s\n", optarg);
                  break;
+            case 'n': args->naive_concat = 1; break;
              case 'a': args->allow_overlaps = 1; break;
              case 'l': args->phased_concat = 1; break;
              case 'f': args->file_list = optarg; break;
@@ -631,6 +746,7 @@ int main_vcfconcat(int argc, char *argv[])
                  };
                  break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case 'h':
              case '?': usage(args); break;
              default: error("Unknown argument: %s\n", optarg);
@@ -654,6 +770,15 @@ int main_vcfconcat(int argc, char *argv[])
      if ( !args->nfnames ) usage(args);
      if ( args->remove_dups && !args->allow_overlaps ) error("The -D option is supported only with -a\n");
      if ( args->regions_list && !args->allow_overlaps ) error("The -r/-R option is supported only with -a\n");
+    if ( args->naive_concat )
+    {
+        if ( args->allow_overlaps ) error("The option --naive cannot be combined with --allow-overlaps\n");
+        if ( args->phased_concat ) error("The option --naive cannot be combined with --ligate\n");
+        naive_concat(args);
+        destroy_data(args);
+        free(args);
+        return 0;
+    }
      init_data(args);
      concat(args);
      destroy_data(args);
diff --git a/bcftools/vcfconcat.c.pysam.c b/bcftools/vcfconcat.c.pysam.c

index 40db3f77beb1aa5342e68a9446c21c5b70c6128e..be2d6a2e703c1de8dcd4c189ba0622b01cd04d92 100644 (file)
--- a/bcftools/vcfconcat.c.pysam.c
+++ b/bcftools/vcfconcat.c.pysam.c
@@ -33,13 +33,15 @@ THE SOFTWARE.  */
  #include <htslib/vcf.h>
  #include <htslib/synced_bcf_reader.h>
  #include <htslib/kseq.h>
+#include <htslib/bgzf.h>
+#include <htslib/tbx.h> // for hts_get_bgzfp()
  #include "bcftools.h"
  
  typedef struct _args_t
  {
      bcf_srs_t *files;
      htsFile *out_fh;
-    int output_type, n_threads;
+    int output_type, n_threads, record_cmd_line;
      bcf_hdr_t *out_hdr;
      int *seen_seq;
  
@@ -52,7 +54,7 @@ typedef struct _args_t
  
      char **argv, *output_fname, *file_list, **fnames, *remove_dups, *regions_list;
      int argc, nfnames, allow_overlaps, phased_concat, regions_is_file;
-    int compact_PS, phase_set_changed;
+    int compact_PS, phase_set_changed, naive_concat;
  }
  args_t;
  
@@ -108,7 +110,7 @@ static void init_data(args_t *args)
          bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PQ,Number=1,Type=Integer,Description=\"Phasing Quality (bigger is better)\">");
          bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PS,Number=1,Type=Integer,Description=\"Phase Set\">");
      }
-    bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_concat");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_concat");
      args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
      if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
      if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
@@ -178,8 +180,11 @@ static void destroy_data(args_t *args)
      for (i=0; i<args->nfnames; i++) free(args->fnames[i]);
      free(args->fnames);
      if ( args->files ) bcf_sr_destroy(args->files);
-    if ( hts_close(args->out_fh)!=0 ) error("hts_close error\n");
-    bcf_hdr_destroy(args->out_hdr);
+    if ( args->out_fh )
+    {
+        if ( hts_close(args->out_fh)!=0 ) error("hts_close error\n");
+    }
+    if ( args->out_hdr ) bcf_hdr_destroy(args->out_hdr);
      free(args->seen_seq);
      free(args->start_pos);
      free(args->swap_phase);
@@ -231,7 +236,7 @@ static void phased_flush(args_t *args)
          {
              if ( !gt_absent_warned )
              {
-                fprintf(pysamerr,"GT is not present at %s:%d. (This warning is printed only once.)\n", bcf_seqname(ahdr,arec), arec->pos+1);
+                fprintf(pysam_stderr,"GT is not present at %s:%d. (This warning is printed only once.)\n", bcf_seqname(ahdr,arec), arec->pos+1);
                  gt_absent_warned = 1;
              }
              continue;
@@ -242,7 +247,7 @@ static void phased_flush(args_t *args)
          {
              if ( !gt_absent_warned )
              {
-                fprintf(pysamerr,"GT is not present at %s:%d. (This warning is printed only once.)\n", bcf_seqname(bhdr,brec), brec->pos+1);
+                fprintf(pysam_stderr,"GT is not present at %s:%d. (This warning is printed only once.)\n", bcf_seqname(bhdr,brec), brec->pos+1);
                  gt_absent_warned = 1;
              }
              continue;
@@ -552,31 +557,137 @@ static void concat(args_t *args)
      }
  }
  
+static void naive_concat(args_t *args)
+{
+    // only compressed BCF atm
+    BGZF *bgzf_out = bgzf_open(args->output_fname,"w");;
+
+    const size_t page_size = 32768;
+    char *buf = (char*) malloc(page_size);
+    kstring_t tmp = {0,0,0};
+    int i;
+    for (i=0; i<args->nfnames; i++)
+    {
+        htsFile *hts_fp = hts_open(args->fnames[i],"r");
+        if ( !hts_fp ) error("Failed to open: %s\n", args->fnames[i]);
+        htsFormat type = *hts_get_format(hts_fp);
+
+        if ( type.format==vcf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
+        if ( type.compression!=bgzf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
+
+        BGZF *fp = hts_get_bgzfp(hts_fp);
+        if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length )
+            error("Failed to read %s: %s\n", args->fnames[i], strerror(errno));
+
+        uint8_t magic[5];
+        if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+        if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]);
+
+        if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+        hts_expand(char,tmp.l,tmp.m,tmp.s);
+        if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+
+        // write only the first header
+        if ( i==0 )
+        {
+            if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname);
+            if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname);
+            if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname);
+        }
+
+        // Output all non-header data that were read together with the header block
+        int nskip = fp->block_offset;
+        if ( fp->block_length - nskip > 0 )
+        {
+            if ( bgzf_write(bgzf_out, fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode);
+        }
+        if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode);
+
+
+        // Stream the rest of the file as it is, without recompressing, but remove BGZF EOF blocks
+        ssize_t nread, ncached = 0, nwr;
+        const int neof = 28;
+        char cached[neof];
+        while (1)
+        {
+            nread = bgzf_raw_read(fp, buf, page_size);
+
+            // page_size boundary may occur in the middle of the EOF block, so we need to cache the blocks' ends
+            if ( nread<=0 ) break;
+            if ( nread<=neof )      // last block
+            {
+                if ( ncached )
+                {
+                    // flush the part of the cache that won't be needed
+                    nwr = bgzf_raw_write(bgzf_out, cached, nread);
+                    if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
+
+                    // make space in the cache so that we can append to the end
+                    if ( nread!=neof ) memmove(cached,cached+nread,neof-nread);
+                }
+
+                // fill the cache and check for eof outside this loop
+                memcpy(cached+neof-nread,buf,nread);
+                break;
+            }
+
+            // not the last block, flush the cache if full
+            if ( ncached )
+            {
+                nwr = bgzf_raw_write(bgzf_out, cached, ncached);
+                if (nwr != ncached) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)ncached);
+                ncached = 0;
+            }
+
+            // fill the cache
+            nread -= neof;
+            memcpy(cached,buf+nread,neof);
+            ncached = neof;
+
+            nwr = bgzf_raw_write(bgzf_out, buf, nread);
+            if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
+        }
+        if ( ncached && memcmp(cached,"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0",neof) )
+        {
+            nwr = bgzf_raw_write(bgzf_out, cached, neof);
+            if (nwr != neof) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)neof);
+        }
+        if (hts_close(hts_fp)) error("Close failed: %s\n",args->fnames[i]);
+    }
+    free(buf);
+    free(tmp.s);
+    if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
+}
+
  static void usage(args_t *args)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Concatenate or combine VCF/BCF files. All source files must have the same sample\n");
-    fprintf(pysamerr, "         columns appearing in the same order. The program can be used, for example, to\n");
-    fprintf(pysamerr, "         concatenate chromosome VCFs into one VCF, or combine a SNP VCF and an indel\n");
-    fprintf(pysamerr, "         VCF into one. The input files must be sorted by chr and position. The files\n");
-    fprintf(pysamerr, "         must be given in the correct order to produce sorted VCF on output unless\n");
-    fprintf(pysamerr, "         the -a, --allow-overlaps option is specified.\n");
-    fprintf(pysamerr, "Usage:   bcftools concat [options] <A.vcf.gz> [<B.vcf.gz> [...]]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "   -a, --allow-overlaps           First coordinate of the next file can precede last record of the current file.\n");
-    fprintf(pysamerr, "   -c, --compact-PS               Do not output PS tag at each site, only at the start of a new phase set block.\n");
-    fprintf(pysamerr, "   -d, --rm-dups <string>         Output duplicate records present in multiple files only once: <snps|indels|both|all|none>\n");
-    fprintf(pysamerr, "   -D, --remove-duplicates        Alias for -d none\n");
-    fprintf(pysamerr, "   -f, --file-list <file>         Read the list of files from a file.\n");
-    fprintf(pysamerr, "   -l, --ligate                   Ligate phased VCFs by matching phase at overlapping haplotypes\n");
-    fprintf(pysamerr, "   -o, --output <file>            Write output to a file [standard output]\n");
-    fprintf(pysamerr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
-    fprintf(pysamerr, "   -q, --min-PQ <int>             Break phase set if phasing quality is lower than <int> [30]\n");
-    fprintf(pysamerr, "   -r, --regions <region>         Restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "   -R, --regions-file <file>      Restrict to regions listed in a file\n");
-    fprintf(pysamerr, "       --threads <int>            Number of extra output compression threads [0]\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Concatenate or combine VCF/BCF files. All source files must have the same sample\n");
+    fprintf(pysam_stderr, "         columns appearing in the same order. The program can be used, for example, to\n");
+    fprintf(pysam_stderr, "         concatenate chromosome VCFs into one VCF, or combine a SNP VCF and an indel\n");
+    fprintf(pysam_stderr, "         VCF into one. The input files must be sorted by chr and position. The files\n");
+    fprintf(pysam_stderr, "         must be given in the correct order to produce sorted VCF on output unless\n");
+    fprintf(pysam_stderr, "         the -a, --allow-overlaps option is specified. With the --naive option, the files\n");
+    fprintf(pysam_stderr, "         are concatenated without being recompressed, which is very fast but dangerous\n");
+    fprintf(pysam_stderr, "         if the BCF headers differ.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools concat [options] <A.vcf.gz> [<B.vcf.gz> [...]]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "   -a, --allow-overlaps           First coordinate of the next file can precede last record of the current file.\n");
+    fprintf(pysam_stderr, "   -c, --compact-PS               Do not output PS tag at each site, only at the start of a new phase set block.\n");
+    fprintf(pysam_stderr, "   -d, --rm-dups <string>         Output duplicate records present in multiple files only once: <snps|indels|both|all|none>\n");
+    fprintf(pysam_stderr, "   -D, --remove-duplicates        Alias for -d none\n");
+    fprintf(pysam_stderr, "   -f, --file-list <file>         Read the list of files from a file.\n");
+    fprintf(pysam_stderr, "   -l, --ligate                   Ligate phased VCFs by matching phase at overlapping haplotypes\n");
+    fprintf(pysam_stderr, "       --no-version               do not append version and command line to the header\n");
+    fprintf(pysam_stderr, "   -n, --naive                    Concatenate BCF files without recompression (dangerous, use with caution)\n");
+    fprintf(pysam_stderr, "   -o, --output <file>            Write output to a file [standard output]\n");
+    fprintf(pysam_stderr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+    fprintf(pysam_stderr, "   -q, --min-PQ <int>             Break phase set if phasing quality is lower than <int> [30]\n");
+    fprintf(pysam_stderr, "   -r, --regions <region>         Restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "   -R, --regions-file <file>      Restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "       --threads <int>            Number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -588,10 +699,12 @@ int main_vcfconcat(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      args->min_PQ  = 30;
  
      static struct option loptions[] =
      {
+        {"naive",no_argument,NULL,'n'},
          {"compact-PS",no_argument,NULL,'c'},
          {"regions",required_argument,NULL,'r'},
          {"regions-file",required_argument,NULL,'R'},
@@ -604,10 +717,11 @@ int main_vcfconcat(int argc, char *argv[])
          {"threads",required_argument,NULL,9},
          {"file-list",required_argument,NULL,'f'},
          {"min-PQ",required_argument,NULL,'q'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      char *tmp;
-    while ((c = getopt_long(argc, argv, "h:?o:O:f:alq:Dd:r:R:c",loptions,NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "h:?o:O:f:alq:Dd:r:R:cn",loptions,NULL)) >= 0)
      {
          switch (c) {
              case 'c': args->compact_PS = 1; break;
@@ -619,6 +733,7 @@ int main_vcfconcat(int argc, char *argv[])
                  args->min_PQ = strtol(optarg,&tmp,10);
                  if ( *tmp ) error("Could not parse argument: --min-PQ %s\n", optarg);
                  break;
+            case 'n': args->naive_concat = 1; break;
              case 'a': args->allow_overlaps = 1; break;
              case 'l': args->phased_concat = 1; break;
              case 'f': args->file_list = optarg; break;
@@ -633,6 +748,7 @@ int main_vcfconcat(int argc, char *argv[])
                  };
                  break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case 'h':
              case '?': usage(args); break;
              default: error("Unknown argument: %s\n", optarg);
@@ -656,6 +772,15 @@ int main_vcfconcat(int argc, char *argv[])
      if ( !args->nfnames ) usage(args);
      if ( args->remove_dups && !args->allow_overlaps ) error("The -D option is supported only with -a\n");
      if ( args->regions_list && !args->allow_overlaps ) error("The -r/-R option is supported only with -a\n");
+    if ( args->naive_concat )
+    {
+        if ( args->allow_overlaps ) error("The option --naive cannot be combined with --allow-overlaps\n");
+        if ( args->phased_concat ) error("The option --naive cannot be combined with --ligate\n");
+        naive_concat(args);
+        destroy_data(args);
+        free(args);
+        return 0;
+    }
      init_data(args);
      concat(args);
      destroy_data(args);
diff --git a/bcftools/vcfconvert.c b/bcftools/vcfconvert.c

index 26166dfad39cdc4f4b9bbfb6cb64a45144e41119..1e60d30a9b14ba7c0c7d330ccdd30804b47cb8d6 100644 (file)
--- a/bcftools/vcfconvert.c
+++ b/bcftools/vcfconvert.c
@@ -66,7 +66,7 @@ struct _args_t
      int nsamples, *samples, sample_is_file, targets_is_file, regions_is_file, output_type;
      char **argv, *sample_list, *targets_list, *regions_list, *tag, *columns;
      char *outfname, *infname, *ref_fname;
-    int argc, n_threads;
+    int argc, n_threads, record_cmd_line;
  };
  
  static void destroy_data(args_t *args)
@@ -369,7 +369,7 @@ static void gensample_to_vcf(args_t *args)
      bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
      bcf_hdr_append(args->header, "##FORMAT=<ID=GP,Number=G,Type=Float,Description=\"Genotype Probabilities\">");
      bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff);   // MAX_CSI_COOR
-    bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
  
      int i, nsamples;
      char **samples = hts_readlist(sample_fname, 1, &nsamples);
@@ -489,7 +489,7 @@ static void haplegendsample_to_vcf(args_t *args)
      bcf_hdr_append(args->header, "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
      bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
      bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff);   // MAX_CSI_COOR
-    bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
  
      int i, nrows, nsamples;
      char **samples = hts_readlist(sample_fname, 1, &nrows);
@@ -606,7 +606,7 @@ static void hapsample_to_vcf(args_t *args)
      bcf_hdr_append(args->header, "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
      bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
      bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff);   // MAX_CSI_COOR
-    bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
  
      int i, nsamples;
      char **samples = hts_readlist(sample_fname, 1, &nsamples);
@@ -1143,7 +1143,7 @@ static void tsv_to_vcf(args_t *args)
      args->header = bcf_hdr_init("w");
      bcf_hdr_set_chrs(args->header, args->ref);
      bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
-    bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
  
      int i, n;
      char **smpls = hts_readlist(args->sample_list, args->sample_is_file, &n);
@@ -1241,7 +1241,7 @@ static void gvcf_to_vcf(args_t *args)
      if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
  
      bcf_hdr_t *hdr = bcf_sr_get_header(args->files,0);
-    bcf_hdr_append_version(hdr, args->argc, args->argv, "bcftools_convert");
+    if (args->record_cmd_line) bcf_hdr_append_version(hdr, args->argc, args->argv, "bcftools_convert");
      bcf_hdr_write(out_fh,hdr);
  
      int32_t *itmp = NULL, nitmp = 0;
@@ -1304,11 +1304,12 @@ static void usage(void)
      fprintf(stderr, "   -S, --samples-file <file>   file of samples to include\n");
      fprintf(stderr, "   -t, --targets <region>      similar to -r but streams rather than index-jumps\n");
      fprintf(stderr, "   -T, --targets-file <file>   similar to -R but streams rather than index-jumps\n");
-    fprintf(stderr, "       --threads <int>         number of extra output compression threads [0]\n");
      fprintf(stderr, "\n");
      fprintf(stderr, "VCF output options:\n");
+    fprintf(stderr, "       --no-version               do not append version and command line to the header\n");
      fprintf(stderr, "   -o, --output <file>            output file name [stdout]\n");
      fprintf(stderr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+    fprintf(stderr, "       --threads <int>            number of extra output compression threads [0]\n");
      fprintf(stderr, "\n");
      fprintf(stderr, "GEN/SAMPLE conversion (input/output from IMPUTE2):\n");
      fprintf(stderr, "   -G, --gensample2vcf <...>   <prefix>|<gen-file>,<sample-file>\n");
@@ -1359,6 +1360,7 @@ int main_vcfconvert(int argc, char *argv[])
      args->outfname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
  
      static struct option loptions[] =
      {
@@ -1387,6 +1389,7 @@ int main_vcfconvert(int argc, char *argv[])
          {"haplegendsample2vcf",required_argument,NULL,'H'},
          {"columns",required_argument,NULL,'c'},
          {"fasta-ref",required_argument,NULL,'f'},
+        {"no-version",no_argument,NULL,10},
          {NULL,0,NULL,0}
      };
      while ((c = getopt_long(argc, argv, "?h:r:R:s:S:t:T:i:e:g:G:o:O:c:f:H:",loptions,NULL)) >= 0) {
@@ -1424,6 +1427,7 @@ int main_vcfconvert(int argc, char *argv[])
                  break;
              case 'h': args->convert_func = vcf_to_haplegendsample; args->outfname = optarg; break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case 10 : args->record_cmd_line = 0; break;
              case '?': usage();
              default: error("Unknown argument: %s\n", optarg);
          }
diff --git a/bcftools/vcfconvert.c.pysam.c b/bcftools/vcfconvert.c.pysam.c

index 03b24b48e9b157d0d46d735035d14da348b44466..12333cc5e3ae0f38fc82987d04dc311a053e9ca8 100644 (file)
--- a/bcftools/vcfconvert.c.pysam.c
+++ b/bcftools/vcfconvert.c.pysam.c
@@ -68,7 +68,7 @@ struct _args_t
      int nsamples, *samples, sample_is_file, targets_is_file, regions_is_file, output_type;
      char **argv, *sample_list, *targets_list, *regions_list, *tag, *columns;
      char *outfname, *infname, *ref_fname;
-    int argc, n_threads;
+    int argc, n_threads, record_cmd_line;
  };
  
  static void destroy_data(args_t *args)
@@ -211,13 +211,13 @@ static int tsv_setter_gt_gp(tsv_t *tsv, bcf1_t *rec, void *usr)
      {
          float aa,ab,bb;
          aa = strtod(tsv->ss, &tsv->se);
-        if ( tsv->ss==tsv->se ) { fprintf(pysamerr,"Could not parse first value of %d-th sample\n", i+1); return -1; }
+        if ( tsv->ss==tsv->se ) { fprintf(pysam_stderr,"Could not parse first value of %d-th sample\n", i+1); return -1; }
          tsv->ss = tsv->se+1;
          ab = strtod(tsv->ss, &tsv->se);
-        if ( tsv->ss==tsv->se ) { fprintf(pysamerr,"Could not parse second value of %d-th sample\n", i+1); return -1; }
+        if ( tsv->ss==tsv->se ) { fprintf(pysam_stderr,"Could not parse second value of %d-th sample\n", i+1); return -1; }
          tsv->ss = tsv->se+1;
          bb = strtod(tsv->ss, &tsv->se);
-        if ( tsv->ss==tsv->se ) { fprintf(pysamerr,"Could not parse third value of %d-th sample\n", i+1); return -1; }
+        if ( tsv->ss==tsv->se ) { fprintf(pysam_stderr,"Could not parse third value of %d-th sample\n", i+1); return -1; }
          tsv->ss = tsv->se+1;
  
          if ( args->rev_als ) { float tmp = bb; bb = aa; aa = tmp; }
@@ -263,7 +263,7 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
              if ( !ss[0] || !ss[1] || !ss[2] ||
                   (up && (!ss[3] || !ss[4]) ) )
              {
-                fprintf(pysamerr,"Wrong number of fields at %d-th sample ([%c][%c][%c]). ",i+1,ss[0],ss[1],ss[2]);
+                fprintf(pysam_stderr,"Wrong number of fields at %d-th sample ([%c][%c][%c]). ",i+1,ss[0],ss[1],ss[2]);
                  return -1;
              }
  
@@ -282,7 +282,7 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
                  args->gts[2*i+all] = bcf_int32_vector_end;
                  break;
              default :
-                fprintf(pysamerr,"Could not parse: [%c][%s]\n", ss[all*2+up],tsv->ss);
+                fprintf(pysam_stderr,"Could not parse: [%c][%s]\n", ss[all*2+up],tsv->ss);
                  return -1; 
              }
              if( ss[all*2+up+1]=='*' ) up = up + 1;
@@ -290,7 +290,7 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
          
          if(up && up != 2)
          {
-            fprintf(pysamerr,"Missing unphased marker '*': [%c][%s]", ss[2+up], tsv->ss);
+            fprintf(pysam_stderr,"Missing unphased marker '*': [%c][%s]", ss[2+up], tsv->ss);
              return -1;
          }
  
@@ -304,8 +304,8 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
      }
      if ( tsv->ss[(nsamples-1)*4+3+nup] )
      {
-        fprintf(pysamerr,"nup: %d", nup);
-        fprintf(pysamerr,"Wrong number of fields (%d-th column = [%c]). ", nsamples*2,tsv->ss[(nsamples-1)*4+nup]);
+        fprintf(pysam_stderr,"nup: %d", nup);
+        fprintf(pysam_stderr,"Wrong number of fields (%d-th column = [%c]). ", nsamples*2,tsv->ss[(nsamples-1)*4+nup]);
          return -1;
      }
  
@@ -371,7 +371,7 @@ static void gensample_to_vcf(args_t *args)
      bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
      bcf_hdr_append(args->header, "##FORMAT=<ID=GP,Number=G,Type=Float,Description=\"Genotype Probabilities\">");
      bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff);   // MAX_CSI_COOR
-    bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
  
      int i, nsamples;
      char **samples = hts_readlist(sample_fname, 1, &nsamples);
@@ -417,7 +417,7 @@ static void gensample_to_vcf(args_t *args)
      free(args->flt);
      tsv_destroy(tsv);
  
-    fprintf(pysamerr,"Number of processed rows: \t%d\n", args->n.total);
+    fprintf(pysam_stderr,"Number of processed rows: \t%d\n", args->n.total);
  }
  
  static void haplegendsample_to_vcf(args_t *args)
@@ -491,7 +491,7 @@ static void haplegendsample_to_vcf(args_t *args)
      bcf_hdr_append(args->header, "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
      bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
      bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff);   // MAX_CSI_COOR
-    bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
  
      int i, nrows, nsamples;
      char **samples = hts_readlist(sample_fname, 1, &nrows);
@@ -554,7 +554,7 @@ static void haplegendsample_to_vcf(args_t *args)
      tsv_destroy(hap_tsv);
      tsv_destroy(leg_tsv);
  
-    fprintf(pysamerr,"Number of processed rows: \t%d\n", args->n.total);
+    fprintf(pysam_stderr,"Number of processed rows: \t%d\n", args->n.total);
  }
  
  static void hapsample_to_vcf(args_t *args)
@@ -608,7 +608,7 @@ static void hapsample_to_vcf(args_t *args)
      bcf_hdr_append(args->header, "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
      bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
      bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff);   // MAX_CSI_COOR
-    bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
  
      int i, nsamples;
      char **samples = hts_readlist(sample_fname, 1, &nsamples);
@@ -653,7 +653,7 @@ static void hapsample_to_vcf(args_t *args)
      free(args->gts);
      tsv_destroy(tsv);
  
-    fprintf(pysamerr,"Number of processed rows: \t%d\n", args->n.total);
+    fprintf(pysam_stderr,"Number of processed rows: \t%d\n", args->n.total);
  }
  
  static void vcf_to_gensample(args_t *args)
@@ -710,8 +710,8 @@ static void vcf_to_gensample(args_t *args)
      if ( gen_fname && (strlen(gen_fname)<3 || strcasecmp(".gz",gen_fname+strlen(gen_fname)-3)) ) gen_compressed = 0;
      if ( sample_fname && strlen(sample_fname)>3 && strcasecmp(".gz",sample_fname+strlen(sample_fname)-3)==0 ) sample_compressed = 0;
  
-    if (gen_fname) fprintf(pysamerr, "Gen file: %s\n", gen_fname);
-    if (sample_fname) fprintf(pysamerr, "Sample file: %s\n", sample_fname);
+    if (gen_fname) fprintf(pysam_stderr, "Gen file: %s\n", gen_fname);
+    if (sample_fname) fprintf(pysam_stderr, "Sample file: %s\n", sample_fname);
  
      // write samples file
      if (sample_fname) {
@@ -755,7 +755,7 @@ static void vcf_to_gensample(args_t *args)
          // biallelic required
          if ( line->n_allele>2 ) {
              if (!non_biallelic)
-                fprintf(pysamerr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
+                fprintf(pysam_stderr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
              non_biallelic++;
              continue;
          }
@@ -774,7 +774,7 @@ static void vcf_to_gensample(args_t *args)
              nok++;
          }
      }
-    fprintf(pysamerr, "%d records written, %d skipped: %d/%d/%d/%d no-ALT/non-biallelic/filtered/duplicated\n", 
+    fprintf(pysam_stderr, "%d records written, %d skipped: %d/%d/%d/%d no-ALT/non-biallelic/filtered/duplicated\n", 
          nok, no_alt+non_biallelic+filtered+ndup, no_alt, non_biallelic, filtered, ndup);
  
      if ( str.m ) free(str.s);
@@ -826,9 +826,9 @@ static void vcf_to_haplegendsample(args_t *args)
      if ( legend_fname && (strlen(legend_fname)<3 || strcasecmp(".gz",legend_fname+strlen(legend_fname)-3)) ) legend_compressed = 0;
      if ( sample_fname && strlen(sample_fname)>3 && strcasecmp(".gz",sample_fname+strlen(sample_fname)-3)==0 ) sample_compressed = 0;
  
-    if (hap_fname) fprintf(pysamerr, "Haps file: %s\n", hap_fname);
-    if (legend_fname) fprintf(pysamerr, "Legend file: %s\n", legend_fname);
-    if (sample_fname) fprintf(pysamerr, "Sample file: %s\n", sample_fname);
+    if (hap_fname) fprintf(pysam_stderr, "Haps file: %s\n", hap_fname);
+    if (legend_fname) fprintf(pysam_stderr, "Legend file: %s\n", legend_fname);
+    if (sample_fname) fprintf(pysam_stderr, "Sample file: %s\n", sample_fname);
  
      // write samples file
      if (sample_fname) {
@@ -879,7 +879,7 @@ static void vcf_to_haplegendsample(args_t *args)
          // biallelic required
          if ( line->n_allele>2 ) {
              if (!non_biallelic)
-                fprintf(pysamerr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
+                fprintf(pysam_stderr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
              non_biallelic++;
              continue;
          }
@@ -906,7 +906,7 @@ static void vcf_to_haplegendsample(args_t *args)
          }
          nok++;
      }
-    fprintf(pysamerr, "%d records written, %d skipped: %d/%d/%d no-ALT/non-biallelic/filtered\n", nok,no_alt+non_biallelic+filtered, no_alt, non_biallelic, filtered);
+    fprintf(pysam_stderr, "%d records written, %d skipped: %d/%d/%d no-ALT/non-biallelic/filtered\n", nok,no_alt+non_biallelic+filtered, no_alt, non_biallelic, filtered);
      if ( str.m ) free(str.s);
      if ( hout && bgzf_close(hout)!=0 ) error("Error closing %s: %s\n", hap_fname, strerror(errno));
      if ( lout && bgzf_close(lout)!=0 ) error("Error closing %s: %s\n", legend_fname, strerror(errno));
@@ -968,8 +968,8 @@ static void vcf_to_hapsample(args_t *args)
      if ( hap_fname && (strlen(hap_fname)<3 || strcasecmp(".gz",hap_fname+strlen(hap_fname)-3)) ) hap_compressed = 0;
      if ( sample_fname && strlen(sample_fname)>3 && strcasecmp(".gz",sample_fname+strlen(sample_fname)-3)==0 ) sample_compressed = 0;
  
-    if (hap_fname) fprintf(pysamerr, "Haps file: %s\n", hap_fname);
-    if (sample_fname) fprintf(pysamerr, "Sample file: %s\n", sample_fname);
+    if (hap_fname) fprintf(pysam_stderr, "Haps file: %s\n", hap_fname);
+    if (sample_fname) fprintf(pysam_stderr, "Sample file: %s\n", sample_fname);
  
      // write samples file
      if (sample_fname) {
@@ -1013,7 +1013,7 @@ static void vcf_to_hapsample(args_t *args)
          // biallelic required
          if ( line->n_allele>2 ) {
              if (!non_biallelic)
-                fprintf(pysamerr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
+                fprintf(pysam_stderr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
              non_biallelic++;
              continue;
          }
@@ -1029,7 +1029,7 @@ static void vcf_to_hapsample(args_t *args)
          }
          nok++;
      }
-    fprintf(pysamerr, "%d records written, %d skipped: %d/%d/%d no-ALT/non-biallelic/filtered\n", nok, no_alt+non_biallelic+filtered, no_alt, non_biallelic, filtered);
+    fprintf(pysam_stderr, "%d records written, %d skipped: %d/%d/%d no-ALT/non-biallelic/filtered\n", nok, no_alt+non_biallelic+filtered, no_alt, non_biallelic, filtered);
      if ( str.m ) free(str.s);
      if ( hout && bgzf_close(hout)!=0 ) error("Error closing %s: %s\n", hap_fname, strerror(errno));
      if (hap_fname) free(hap_fname);
@@ -1145,7 +1145,7 @@ static void tsv_to_vcf(args_t *args)
      args->header = bcf_hdr_init("w");
      bcf_hdr_set_chrs(args->header, args->ref);
      bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
-    bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
  
      int i, n;
      char **smpls = hts_readlist(args->sample_list, args->sample_is_file, &n);
@@ -1197,13 +1197,13 @@ static void tsv_to_vcf(args_t *args)
      free(args->str.s);
      free(args->gts);
  
-    fprintf(pysamerr,"Rows total: \t%d\n", args->n.total);
-    fprintf(pysamerr,"Rows skipped: \t%d\n", args->n.skipped);
-    fprintf(pysamerr,"Missing GTs: \t%d\n", args->n.missing);
-    fprintf(pysamerr,"Hom RR: \t%d\n", args->n.hom_rr);
-    fprintf(pysamerr,"Het RA: \t%d\n", args->n.het_ra);
-    fprintf(pysamerr,"Hom AA: \t%d\n", args->n.hom_aa);
-    fprintf(pysamerr,"Het AA: \t%d\n", args->n.het_aa);
+    fprintf(pysam_stderr,"Rows total: \t%d\n", args->n.total);
+    fprintf(pysam_stderr,"Rows skipped: \t%d\n", args->n.skipped);
+    fprintf(pysam_stderr,"Missing GTs: \t%d\n", args->n.missing);
+    fprintf(pysam_stderr,"Hom RR: \t%d\n", args->n.hom_rr);
+    fprintf(pysam_stderr,"Het RA: \t%d\n", args->n.het_ra);
+    fprintf(pysam_stderr,"Hom AA: \t%d\n", args->n.hom_aa);
+    fprintf(pysam_stderr,"Het AA: \t%d\n", args->n.het_aa);
  }
  
  static void vcf_to_vcf(args_t *args)
@@ -1243,7 +1243,7 @@ static void gvcf_to_vcf(args_t *args)
      if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
  
      bcf_hdr_t *hdr = bcf_sr_get_header(args->files,0);
-    bcf_hdr_append_version(hdr, args->argc, args->argv, "bcftools_convert");
+    if (args->record_cmd_line) bcf_hdr_append_version(hdr, args->argc, args->argv, "bcftools_convert");
      bcf_hdr_write(out_fh,hdr);
  
      int32_t *itmp = NULL, nitmp = 0;
@@ -1291,65 +1291,66 @@ static void gvcf_to_vcf(args_t *args)
  
  static void usage(void)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Converts VCF/BCF to other formats and back. See man page for file\n");
-    fprintf(pysamerr, "         formats details. When specifying output files explicitly instead\n");
-    fprintf(pysamerr, "         of with <prefix>, one can use '-' for stdout and '.' to suppress.\n");
-    fprintf(pysamerr, "Usage:   bcftools convert [OPTIONS] <input_file>\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "VCF input options:\n");
-    fprintf(pysamerr, "   -e, --exclude <expr>        exclude sites for which the expression is true\n");
-    fprintf(pysamerr, "   -i, --include <expr>        select sites for which the expression is true\n");
-    fprintf(pysamerr, "   -r, --regions <region>      restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "   -R, --regions-file <file>   restrict to regions listed in a file\n");
-    fprintf(pysamerr, "   -s, --samples <list>        list of samples to include\n");
-    fprintf(pysamerr, "   -S, --samples-file <file>   file of samples to include\n");
-    fprintf(pysamerr, "   -t, --targets <region>      similar to -r but streams rather than index-jumps\n");
-    fprintf(pysamerr, "   -T, --targets-file <file>   similar to -R but streams rather than index-jumps\n");
-    fprintf(pysamerr, "       --threads <int>         number of extra output compression threads [0]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "VCF output options:\n");
-    fprintf(pysamerr, "   -o, --output <file>            output file name [stdout]\n");
-    fprintf(pysamerr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "GEN/SAMPLE conversion (input/output from IMPUTE2):\n");
-    fprintf(pysamerr, "   -G, --gensample2vcf <...>   <prefix>|<gen-file>,<sample-file>\n");
-    fprintf(pysamerr, "   -g, --gensample <...>       <prefix>|<gen-file>,<sample-file>\n");
-    fprintf(pysamerr, "       --tag <string>          tag to take values for .gen file: GT,PL,GL,GP [GT]\n");
-    fprintf(pysamerr, "       --chrom                 output chromosome in first column instead of CHROM:POS_REF_ALT\n");
-    fprintf(pysamerr, "       --vcf-ids               output VCF IDs in second column instead of CHROM:POS_REF_ALT\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "gVCF conversion:\n");
-    fprintf(pysamerr, "       --gvcf2vcf              expand gVCF reference blocks\n");
-    fprintf(pysamerr, "   -f, --fasta-ref <file>      reference sequence in fasta format\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "HAP/SAMPLE conversion (output from SHAPEIT):\n");
-    fprintf(pysamerr, "       --hapsample2vcf <...>   <prefix>|<haps-file>,<sample-file>\n");
-    fprintf(pysamerr, "       --hapsample <...>       <prefix>|<haps-file>,<sample-file>\n");
-    fprintf(pysamerr, "       --haploid2diploid       convert haploid genotypes to diploid homozygotes\n");
-    fprintf(pysamerr, "       --vcf-ids               output VCF IDs instead of CHROM:POS_REF_ALT\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "HAP/LEGEND/SAMPLE conversion:\n");
-    fprintf(pysamerr, "   -H, --haplegendsample2vcf <...>  <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
-    fprintf(pysamerr, "   -h, --haplegendsample <...>      <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
-    fprintf(pysamerr, "       --haploid2diploid            convert haploid genotypes to diploid homozygotes\n");
-    fprintf(pysamerr, "       --vcf-ids                    output VCF IDs instead of CHROM:POS_REF_ALT\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "TSV conversion:\n");
-    fprintf(pysamerr, "       --tsv2vcf <file>        \n");
-    fprintf(pysamerr, "   -c, --columns <string>      columns of the input tsv file [ID,CHROM,POS,AA]\n");
-    fprintf(pysamerr, "   -f, --fasta-ref <file>      reference sequence in fasta format\n");
-    fprintf(pysamerr, "   -s, --samples <list>        list of sample names\n");
-    fprintf(pysamerr, "   -S, --samples-file <file>   file of sample names\n");
-    fprintf(pysamerr, "\n");
-    // fprintf(pysamerr, "PLINK options:\n");
-    // fprintf(pysamerr, "   -p, --plink <prefix>|<ped>,<map>,<fam>|<bed>,<bim>,<fam>|<tped>,<tfam>\n");
-    // fprintf(pysamerr, "       --tped              make tped file instead\n");
-    // fprintf(pysamerr, "       --bin               make binary bed/fam/bim files\n");
-    // fprintf(pysamerr, "\n");
-    // fprintf(pysamerr, "PBWT options:\n");
-    // fprintf(pysamerr, "   -b, --pbwt          <prefix> or <pbwt>,<sites>,<sample>,<missing>\n");
-    // fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Converts VCF/BCF to other formats and back. See man page for file\n");
+    fprintf(pysam_stderr, "         formats details. When specifying output files explicitly instead\n");
+    fprintf(pysam_stderr, "         of with <prefix>, one can use '-' for pysam_stdout and '.' to suppress.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools convert [OPTIONS] <input_file>\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "VCF input options:\n");
+    fprintf(pysam_stderr, "   -e, --exclude <expr>        exclude sites for which the expression is true\n");
+    fprintf(pysam_stderr, "   -i, --include <expr>        select sites for which the expression is true\n");
+    fprintf(pysam_stderr, "   -r, --regions <region>      restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "   -R, --regions-file <file>   restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "   -s, --samples <list>        list of samples to include\n");
+    fprintf(pysam_stderr, "   -S, --samples-file <file>   file of samples to include\n");
+    fprintf(pysam_stderr, "   -t, --targets <region>      similar to -r but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "   -T, --targets-file <file>   similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "VCF output options:\n");
+    fprintf(pysam_stderr, "       --no-version               do not append version and command line to the header\n");
+    fprintf(pysam_stderr, "   -o, --output <file>            output file name [pysam_stdout]\n");
+    fprintf(pysam_stderr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+    fprintf(pysam_stderr, "       --threads <int>            number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "GEN/SAMPLE conversion (input/output from IMPUTE2):\n");
+    fprintf(pysam_stderr, "   -G, --gensample2vcf <...>   <prefix>|<gen-file>,<sample-file>\n");
+    fprintf(pysam_stderr, "   -g, --gensample <...>       <prefix>|<gen-file>,<sample-file>\n");
+    fprintf(pysam_stderr, "       --tag <string>          tag to take values for .gen file: GT,PL,GL,GP [GT]\n");
+    fprintf(pysam_stderr, "       --chrom                 output chromosome in first column instead of CHROM:POS_REF_ALT\n");
+    fprintf(pysam_stderr, "       --vcf-ids               output VCF IDs in second column instead of CHROM:POS_REF_ALT\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "gVCF conversion:\n");
+    fprintf(pysam_stderr, "       --gvcf2vcf              expand gVCF reference blocks\n");
+    fprintf(pysam_stderr, "   -f, --fasta-ref <file>      reference sequence in fasta format\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "HAP/SAMPLE conversion (output from SHAPEIT):\n");
+    fprintf(pysam_stderr, "       --hapsample2vcf <...>   <prefix>|<haps-file>,<sample-file>\n");
+    fprintf(pysam_stderr, "       --hapsample <...>       <prefix>|<haps-file>,<sample-file>\n");
+    fprintf(pysam_stderr, "       --haploid2diploid       convert haploid genotypes to diploid homozygotes\n");
+    fprintf(pysam_stderr, "       --vcf-ids               output VCF IDs instead of CHROM:POS_REF_ALT\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "HAP/LEGEND/SAMPLE conversion:\n");
+    fprintf(pysam_stderr, "   -H, --haplegendsample2vcf <...>  <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
+    fprintf(pysam_stderr, "   -h, --haplegendsample <...>      <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
+    fprintf(pysam_stderr, "       --haploid2diploid            convert haploid genotypes to diploid homozygotes\n");
+    fprintf(pysam_stderr, "       --vcf-ids                    output VCF IDs instead of CHROM:POS_REF_ALT\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "TSV conversion:\n");
+    fprintf(pysam_stderr, "       --tsv2vcf <file>        \n");
+    fprintf(pysam_stderr, "   -c, --columns <string>      columns of the input tsv file [ID,CHROM,POS,AA]\n");
+    fprintf(pysam_stderr, "   -f, --fasta-ref <file>      reference sequence in fasta format\n");
+    fprintf(pysam_stderr, "   -s, --samples <list>        list of sample names\n");
+    fprintf(pysam_stderr, "   -S, --samples-file <file>   file of sample names\n");
+    fprintf(pysam_stderr, "\n");
+    // fprintf(pysam_stderr, "PLINK options:\n");
+    // fprintf(pysam_stderr, "   -p, --plink <prefix>|<ped>,<map>,<fam>|<bed>,<bim>,<fam>|<tped>,<tfam>\n");
+    // fprintf(pysam_stderr, "       --tped              make tped file instead\n");
+    // fprintf(pysam_stderr, "       --bin               make binary bed/fam/bim files\n");
+    // fprintf(pysam_stderr, "\n");
+    // fprintf(pysam_stderr, "PBWT options:\n");
+    // fprintf(pysam_stderr, "   -b, --pbwt          <prefix> or <pbwt>,<sites>,<sample>,<missing>\n");
+    // fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -1361,6 +1362,7 @@ int main_vcfconvert(int argc, char *argv[])
      args->outfname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
  
      static struct option loptions[] =
      {
@@ -1389,6 +1391,7 @@ int main_vcfconvert(int argc, char *argv[])
          {"haplegendsample2vcf",required_argument,NULL,'H'},
          {"columns",required_argument,NULL,'c'},
          {"fasta-ref",required_argument,NULL,'f'},
+        {"no-version",no_argument,NULL,10},
          {NULL,0,NULL,0}
      };
      while ((c = getopt_long(argc, argv, "?h:r:R:s:S:t:T:i:e:g:G:o:O:c:f:H:",loptions,NULL)) >= 0) {
@@ -1426,6 +1429,7 @@ int main_vcfconvert(int argc, char *argv[])
                  break;
              case 'h': args->convert_func = vcf_to_haplegendsample; args->outfname = optarg; break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case 10 : args->record_cmd_line = 0; break;
              case '?': usage();
              default: error("Unknown argument: %s\n", optarg);
          }
diff --git a/bcftools/vcffilter.c b/bcftools/vcffilter.c

index ac4c3a32e0c94ed0b98bd1b13a4ca31ab9764e70..f979d77f344d629fa0c86de55ed747bdf1f7abd4 100644 (file)
--- a/bcftools/vcffilter.c
+++ b/bcftools/vcffilter.c
@@ -71,7 +71,7 @@ typedef struct _args_t
      int output_type, n_threads;
  
      char **argv, *output_fname, *targets_list, *regions_list;
-    int argc;
+    int argc, record_cmd_line;
  }
  args_t;
  
@@ -149,7 +149,7 @@ static void init_data(args_t *args)
          }
      }
  
-    bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_filter");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_filter");
  
      if ( args->filter_str )
          args->filter = filter_init(args->hdr, args->filter_str);
@@ -408,6 +408,7 @@ static void usage(args_t *args)
      fprintf(stderr, "    -G, --IndelGap <int>          filter clusters of indels separated by <int> or fewer base pairs allowing only one to pass\n");
      fprintf(stderr, "    -i, --include <expr>          include only sites for which the expression is true (see man page for details\n");
      fprintf(stderr, "    -m, --mode [+x]               \"+\": do not replace but add to existing FILTER; \"x\": reset filters at sites which pass\n");
+    fprintf(stderr, "        --no-version              do not append version and command line to the header\n");
      fprintf(stderr, "    -o, --output <file>           write output to a file [standard output]\n");
      fprintf(stderr, "    -O, --output-type <b|u|z|v>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
      fprintf(stderr, "    -r, --regions <region>        restrict to comma-separated list of regions\n");
@@ -430,6 +431,7 @@ int main_vcffilter(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      int regions_is_file = 0, targets_is_file = 0;
  
      static struct option loptions[] =
@@ -448,6 +450,7 @@ int main_vcffilter(int argc, char *argv[])
          {"threads",required_argument,NULL,9},
          {"SnpGap",required_argument,NULL,'g'},
          {"IndelGap",required_argument,NULL,'G'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      char *tmp;
@@ -488,6 +491,7 @@ int main_vcffilter(int argc, char *argv[])
                  else error("The argument to -S not recognised: %s\n", optarg);
                  break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case 'h':
              case '?': usage(args);
              default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcffilter.c.pysam.c b/bcftools/vcffilter.c.pysam.c

index c731ba31713fd2a59b182d62e4e196026bcadd53..58193daf1c408c2955786225ef74d4ffd2b517cd 100644 (file)
--- a/bcftools/vcffilter.c.pysam.c
+++ b/bcftools/vcffilter.c.pysam.c
@@ -73,7 +73,7 @@ typedef struct _args_t
      int output_type, n_threads;
  
      char **argv, *output_fname, *targets_list, *regions_list;
-    int argc;
+    int argc, record_cmd_line;
  }
  args_t;
  
@@ -131,7 +131,7 @@ static void init_data(args_t *args)
                  if ( tmp.s ) kputs(" and ", &tmp);
                  kputs("\"IndelGap\"", &tmp);
              }
-            fprintf(pysamerr,"Warning: using %s filter name instead of \"%s\"\n", tmp.s,args->soft_filter);
+            fprintf(pysam_stderr,"Warning: using %s filter name instead of \"%s\"\n", tmp.s,args->soft_filter);
              free(tmp.s);
          }
  
@@ -151,7 +151,7 @@ static void init_data(args_t *args)
          }
      }
  
-    bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_filter");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_filter");
  
      if ( args->filter_str )
          args->filter = filter_init(args->hdr, args->filter_str);
@@ -400,26 +400,27 @@ static void set_genotypes(args_t *args, bcf1_t *line, int pass_site)
  
  static void usage(args_t *args)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Apply fixed-threshold filters.\n");
-    fprintf(pysamerr, "Usage:   bcftools filter [options] <in.vcf.gz>\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "    -e, --exclude <expr>          exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(pysamerr, "    -g, --SnpGap <int>            filter SNPs within <int> base pairs of an indel\n");
-    fprintf(pysamerr, "    -G, --IndelGap <int>          filter clusters of indels separated by <int> or fewer base pairs allowing only one to pass\n");
-    fprintf(pysamerr, "    -i, --include <expr>          include only sites for which the expression is true (see man page for details\n");
-    fprintf(pysamerr, "    -m, --mode [+x]               \"+\": do not replace but add to existing FILTER; \"x\": reset filters at sites which pass\n");
-    fprintf(pysamerr, "    -o, --output <file>           write output to a file [standard output]\n");
-    fprintf(pysamerr, "    -O, --output-type <b|u|z|v>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
-    fprintf(pysamerr, "    -r, --regions <region>        restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "    -R, --regions-file <file>     restrict to regions listed in a file\n");
-    fprintf(pysamerr, "    -s, --soft-filter <string>    annotate FILTER column with <string> or unique filter name (\"Filter%%d\") made up by the program (\"+\")\n");
-    fprintf(pysamerr, "    -S, --set-GTs <.|0>           set genotypes of failed samples to missing (.) or ref (0)\n");
-    fprintf(pysamerr, "    -t, --targets <region>        similar to -r but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -T, --targets-file <file>     similar to -R but streams rather than index-jumps\n");
-    fprintf(pysamerr, "        --threads <int>           number of extra output compression threads [0]\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Apply fixed-threshold filters.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools filter [options] <in.vcf.gz>\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "    -e, --exclude <expr>          exclude sites for which the expression is true (see man page for details)\n");
+    fprintf(pysam_stderr, "    -g, --SnpGap <int>            filter SNPs within <int> base pairs of an indel\n");
+    fprintf(pysam_stderr, "    -G, --IndelGap <int>          filter clusters of indels separated by <int> or fewer base pairs allowing only one to pass\n");
+    fprintf(pysam_stderr, "    -i, --include <expr>          include only sites for which the expression is true (see man page for details\n");
+    fprintf(pysam_stderr, "    -m, --mode [+x]               \"+\": do not replace but add to existing FILTER; \"x\": reset filters at sites which pass\n");
+    fprintf(pysam_stderr, "        --no-version              do not append version and command line to the header\n");
+    fprintf(pysam_stderr, "    -o, --output <file>           write output to a file [standard output]\n");
+    fprintf(pysam_stderr, "    -O, --output-type <b|u|z|v>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+    fprintf(pysam_stderr, "    -r, --regions <region>        restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "    -R, --regions-file <file>     restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "    -s, --soft-filter <string>    annotate FILTER column with <string> or unique filter name (\"Filter%%d\") made up by the program (\"+\")\n");
+    fprintf(pysam_stderr, "    -S, --set-GTs <.|0>           set genotypes of failed samples to missing (.) or ref (0)\n");
+    fprintf(pysam_stderr, "    -t, --targets <region>        similar to -r but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "    -T, --targets-file <file>     similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "        --threads <int>           number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -432,6 +433,7 @@ int main_vcffilter(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      int regions_is_file = 0, targets_is_file = 0;
  
      static struct option loptions[] =
@@ -450,6 +452,7 @@ int main_vcffilter(int argc, char *argv[])
          {"threads",required_argument,NULL,9},
          {"SnpGap",required_argument,NULL,'g'},
          {"IndelGap",required_argument,NULL,'G'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      char *tmp;
@@ -490,6 +493,7 @@ int main_vcffilter(int argc, char *argv[])
                  else error("The argument to -S not recognised: %s\n", optarg);
                  break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case 'h':
              case '?': usage(args);
              default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfgtcheck.c.pysam.c b/bcftools/vcfgtcheck.c.pysam.c

index 161ca3c8c3e864bab970d205d18860fc349066bd..2f0a2884748889289be409fc13c057684173d24c 100644 (file)
--- a/bcftools/vcfgtcheck.c.pysam.c
+++ b/bcftools/vcfgtcheck.c.pysam.c
@@ -62,7 +62,7 @@ void py_plot(char *script)
      int len = strlen(script);
      char *cmd = !strcmp(".py",script+len-3) ? msprintf("python %s", script) : msprintf("python %s.py", script);
      int ret = system(cmd);
-    if ( ret ) fprintf(pysamerr, "The command returned non-zero status %d: %s\n", ret, cmd);
+    if ( ret ) fprintf(pysam_stderr, "The command returned non-zero status %d: %s\n", ret, cmd);
      free(cmd);
  }
  
@@ -272,7 +272,7 @@ static int init_gt2ipl(args_t *args, bcf1_t *gt_line, bcf1_t *sm_line, int *gt2i
              gt2ipl[ bcf_ij2G(j,i) ] = k<=l ? bcf_ij2G(k,l) : bcf_ij2G(l,k);
          }
      }
-    //for (i=0; i<n_gt2ipl; i++) printf("%d .. %d\n", i,gt2ipl[i]);
+    //for (i=0; i<n_gt2ipl; i++) fprintf(pysam_stdout, "%d .. %d\n", i,gt2ipl[i]);
      return 1;
  }
  
@@ -353,11 +353,11 @@ static void check_gt(args_t *args)
          if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "GT")<0 )
              error("[E::%s] Neither PL nor GT present in the header of %s\n", __func__, args->files->readers[0].fname);
          if ( !args->no_PLs )
-            fprintf(pysamerr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
+            fprintf(pysam_stderr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
          fake_pls = 1;
      }
  
-    FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : stdout;
+    FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : pysam_stdout;
      print_header(args, fp);
  
      int tgt_isample = -1, query_isample = 0;
@@ -370,7 +370,7 @@ static void check_gt(args_t *args)
      {
          if ( tgt_isample==-1 )
          {
-            fprintf(pysamerr,"No target sample selected for comparison, using the first sample in %s: %s\n", args->gt_fname,args->gt_hdr->samples[0]);
+            fprintf(pysam_stderr,"No target sample selected for comparison, using the first sample in %s: %s\n", args->gt_fname,args->gt_hdr->samples[0]);
              tgt_isample = 0;
          }
      }
@@ -556,12 +556,12 @@ static void cross_check_gts(args_t *args)
          if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "GT")<0 )
              error("[E::%s] Neither PL nor GT present in the header of %s\n", __func__, args->files->readers[0].fname);
          if ( !args->no_PLs )
-            fprintf(pysamerr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
+            fprintf(pysam_stderr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
          fake_pls = 1;
      }
      if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "DP")<0 ) ignore_dp = 1;
  
-    FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : stdout;
+    FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : pysam_stdout;
      print_header(args, fp);
      if ( args->all_sites ) fprintf(fp,"# [1]SD, Average Site Discordance\t[2]Chromosome\t[3]Position\t[4]Number of available pairs\t[5]Average discordance\n");
  
@@ -640,8 +640,8 @@ static void cross_check_gts(args_t *args)
      if ( args->tmp_arr ) free(args->tmp_arr);
      if ( is_hom ) free(is_hom);
  
-    if ( pl_warned ) fprintf(pysamerr, "[W::%s] PL was not found at %d site(s)\n", __func__, pl_warned);
-    if ( dp_warned ) fprintf(pysamerr, "[W::%s] DP was not found at %d site(s)\n", __func__, dp_warned);
+    if ( pl_warned ) fprintf(pysam_stderr, "[W::%s] PL was not found at %d site(s)\n", __func__, pl_warned);
+    if ( dp_warned ) fprintf(pysam_stderr, "[W::%s] DP was not found at %d site(s)\n", __func__, dp_warned);
  
      // Output samples sorted by average discordance
      double *score  = (double*) calloc(nsamples,sizeof(double));
@@ -709,23 +709,23 @@ static char *init_prefix(char *prefix)
  
  static void usage(void)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Check sample identity. With no -g BCF given, multi-sample cross-check is performed.\n");
-    fprintf(pysamerr, "Usage:   bcftools gtcheck [options] [-g <genotypes.vcf.gz>] <query.vcf.gz>\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "    -a, --all-sites                 output comparison for all sites\n");
-    fprintf(pysamerr, "    -g, --genotypes <file>          genotypes to compare against\n");
-    fprintf(pysamerr, "    -G, --GTs-only <int>            use GTs, ignore PLs, using <int> for unseen genotypes [99]\n");
-    fprintf(pysamerr, "    -H, --homs-only                 homozygous genotypes only (useful for low coverage data)\n");
-    fprintf(pysamerr, "    -p, --plot <prefix>             plot\n");
-    fprintf(pysamerr, "    -r, --regions <region>          restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "    -R, --regions-file <file>       restrict to regions listed in a file\n");
-    fprintf(pysamerr, "    -s, --query-sample <string>     query sample (by default the first sample is checked)\n");
-    fprintf(pysamerr, "    -S, --target-sample <string>    target sample in the -g file (used only for plotting)\n");
-    fprintf(pysamerr, "    -t, --targets <region>          similar to -r but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Check sample identity. With no -g BCF given, multi-sample cross-check is performed.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools gtcheck [options] [-g <genotypes.vcf.gz>] <query.vcf.gz>\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "    -a, --all-sites                 output comparison for all sites\n");
+    fprintf(pysam_stderr, "    -g, --genotypes <file>          genotypes to compare against\n");
+    fprintf(pysam_stderr, "    -G, --GTs-only <int>            use GTs, ignore PLs, using <int> for unseen genotypes [99]\n");
+    fprintf(pysam_stderr, "    -H, --homs-only                 homozygous genotypes only (useful for low coverage data)\n");
+    fprintf(pysam_stderr, "    -p, --plot <prefix>             plot\n");
+    fprintf(pysam_stderr, "    -r, --regions <region>          restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "    -R, --regions-file <file>       restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "    -s, --query-sample <string>     query sample (by default the first sample is checked)\n");
+    fprintf(pysam_stderr, "    -S, --target-sample <string>    target sample in the -g file (used only for plotting)\n");
+    fprintf(pysam_stderr, "    -t, --targets <region>          similar to -r but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "    -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
diff --git a/bcftools/vcfindex.c b/bcftools/vcfindex.c

index e40fab52caa690e3d0ea47b9fbb5d6d4f75cb4fe..d1e9179e4e0cb93c25b28ff766d2e9a054b6d889 100644 (file)
--- a/bcftools/vcfindex.c
+++ b/bcftools/vcfindex.c
@@ -1,7 +1,7 @@
  
  /*  vcfindex.c -- Index bgzip compressed VCF/BCF files for random access.
  
-    Copyright (C) 2014 Genome Research Ltd.
+    Copyright (C) 2014-2016 Genome Research Ltd.
  
      Author: Shane McCarthy <sm15@sanger.ac.uk>
  
@@ -177,6 +177,7 @@ int main_vcfindex(int argc, char *argv[])
      if (stats) return vcf_index_stats(fname, stats);
  
      htsFile *fp = hts_open(fname,"r"); 
+    if ( !fp ) error("Failed to read %s\n", fname);
      htsFormat type = *hts_get_format(fp);
      hts_close(fp);
  
diff --git a/bcftools/vcfindex.c.pysam.c b/bcftools/vcfindex.c.pysam.c

index 1cfde16a966c42f6ae9b7d594afaa3b4027f6232..479fc575f80f2b48b7d4d714d1915547fd8028d1 100644 (file)
--- a/bcftools/vcfindex.c.pysam.c
+++ b/bcftools/vcfindex.c.pysam.c
@@ -3,7 +3,7 @@
  
  /*  vcfindex.c -- Index bgzip compressed VCF/BCF files for random access.
  
-    Copyright (C) 2014 Genome Research Ltd.
+    Copyright (C) 2014-2016 Genome Research Ltd.
  
      Author: Shane McCarthy <sm15@sanger.ac.uk>
  
@@ -40,20 +40,20 @@ DEALINGS IN THE SOFTWARE.  */
  
  static void usage(void)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Index bgzip compressed VCF/BCF files for random access.\n");
-    fprintf(pysamerr, "Usage:   bcftools index [options] <in.bcf>|<in.vcf.gz>\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Indexing options:\n");
-    fprintf(pysamerr, "    -c, --csi            generate CSI-format index for VCF/BCF files [default]\n");
-    fprintf(pysamerr, "    -f, --force          overwrite index if it already exists\n");
-    fprintf(pysamerr, "    -m, --min-shift INT  set minimal interval size for CSI indices to 2^INT [14]\n");
-    fprintf(pysamerr, "    -t, --tbi            generate TBI-format index for VCF files\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Stats options:\n");
-    fprintf(pysamerr, "    -n, --nrecords       print number of records based on existing index file\n");
-    fprintf(pysamerr, "    -s, --stats   print per contig stats based on existing index file\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Index bgzip compressed VCF/BCF files for random access.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools index [options] <in.bcf>|<in.vcf.gz>\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Indexing options:\n");
+    fprintf(pysam_stderr, "    -c, --csi            generate CSI-format index for VCF/BCF files [default]\n");
+    fprintf(pysam_stderr, "    -f, --force          overwrite index if it already exists\n");
+    fprintf(pysam_stderr, "    -m, --min-shift INT  set minimal interval size for CSI indices to 2^INT [14]\n");
+    fprintf(pysam_stderr, "    -t, --tbi            generate TBI-format index for VCF files\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Stats options:\n");
+    fprintf(pysam_stderr, "    -n, --nrecords       print number of records based on existing index file\n");
+    fprintf(pysam_stderr, "    -s, --stats   print per contig stats based on existing index file\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -61,7 +61,7 @@ int vcf_index_stats(char *fname, int stats)
  {
      char *fn_out = NULL;
      FILE *out;
-    out = fn_out ? fopen(fn_out, "w") : stdout;
+    out = fn_out ? fopen(fn_out, "w") : pysam_stdout;
  
      const char **seq;
      int i, nseq;
@@ -69,23 +69,23 @@ int vcf_index_stats(char *fname, int stats)
      hts_idx_t *idx = NULL;
  
      htsFile *fp = hts_open(fname,"r");
-    if ( !fp ) { fprintf(pysamerr,"Could not read %s\n", fname); return 1; }
+    if ( !fp ) { fprintf(pysam_stderr,"Could not read %s\n", fname); return 1; }
      bcf_hdr_t *hdr = bcf_hdr_read(fp);
-    if ( !hdr ) { fprintf(pysamerr,"Could not read the header: %s\n", fname); return 1; }
+    if ( !hdr ) { fprintf(pysam_stderr,"Could not read the header: %s\n", fname); return 1; }
  
      if ( hts_get_format(fp)->format==vcf )
      {
          tbx = tbx_index_load(fname);
-        if ( !tbx ) { fprintf(pysamerr,"Could not load TBI index: %s\n", fname); return 1; }
+        if ( !tbx ) { fprintf(pysam_stderr,"Could not load TBI index: %s\n", fname); return 1; }
      }
      else if ( hts_get_format(fp)->format==bcf )
      {
          idx = bcf_index_load(fname);
-        if ( !idx ) { fprintf(pysamerr,"Could not load CSI index: %s\n", fname); return 1; }
+        if ( !idx ) { fprintf(pysam_stderr,"Could not load CSI index: %s\n", fname); return 1; }
      }
      else
      {
-        fprintf(pysamerr,"Could not detect the file type as VCF or BCF: %s\n", fname);
+        fprintf(pysam_stderr,"Could not detect the file type as VCF or BCF: %s\n", fname);
          return 1;
      }
  
@@ -108,7 +108,7 @@ int vcf_index_stats(char *fname, int stats)
          bcf1_t *rec = bcf_init1();
          if (bcf_read1(fp, hdr, rec) >= 0)
          {
-            fprintf(pysamerr,"%s index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", tbx ? "TBI" : "CSI", fname);
+            fprintf(pysam_stderr,"%s index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", tbx ? "TBI" : "CSI", fname);
              return 1;
          }
          bcf_destroy1(rec);
@@ -161,17 +161,17 @@ int main_vcfindex(int argc, char *argv[])
      if ( optind==argc ) usage();
      if (stats>2)
      {
-        fprintf(pysamerr, "[E::%s] expected only one of --stats or --nrecords options\n", __func__);
+        fprintf(pysam_stderr, "[E::%s] expected only one of --stats or --nrecords options\n", __func__);
          return 1;
      }
      if (tbi && min_shift>0)
      {
-        fprintf(pysamerr, "[E::%s] min-shift option only expected for CSI indices \n", __func__);
+        fprintf(pysam_stderr, "[E::%s] min-shift option only expected for CSI indices \n", __func__);
          return 1;
      }
      if (min_shift < 0 || min_shift > 30)
      {
-        fprintf(pysamerr, "[E::%s] expected min_shift in range [0,30] (%d)\n", __func__, min_shift);
+        fprintf(pysam_stderr, "[E::%s] expected min_shift in range [0,30] (%d)\n", __func__, min_shift);
          return 1;
      }
  
@@ -179,29 +179,30 @@ int main_vcfindex(int argc, char *argv[])
      if (stats) return vcf_index_stats(fname, stats);
  
      htsFile *fp = hts_open(fname,"r"); 
+    if ( !fp ) error("Failed to read %s\n", fname);
      htsFormat type = *hts_get_format(fp);
      hts_close(fp);
  
      if ( (type.format!=bcf && type.format!=vcf) || type.compression!=bgzf )
      {
-        fprintf(pysamerr, "[E::%s] unknown filetype; expected bgzip compressed VCF or BCF\n", __func__);
+        fprintf(pysam_stderr, "[E::%s] unknown filetype; expected bgzip compressed VCF or BCF\n", __func__);
          if ( type.compression!=bgzf )
-            fprintf(pysamerr, "[E::%s] was the VCF/BCF compressed with bgzip?\n", __func__);
+            fprintf(pysam_stderr, "[E::%s] was the VCF/BCF compressed with bgzip?\n", __func__);
          return 1;
      }
      if (tbi && type.format==bcf)
      {
-        fprintf(pysamerr, "[Warning] TBI-index does not work for BCF files. Generating CSI instead.\n");
+        fprintf(pysam_stderr, "[Warning] TBI-index does not work for BCF files. Generating CSI instead.\n");
          tbi = 0; min_shift = BCF_LIDX_SHIFT;
      }
      if (min_shift == 0 && type.format==bcf)
      {
-        fprintf(pysamerr, "[E::%s] Require min_shift>0 for BCF files.\n", __func__);
+        fprintf(pysam_stderr, "[E::%s] Require min_shift>0 for BCF files.\n", __func__);
          return 1;
      }
      if (!tbi && type.format==vcf && min_shift == 0)
      {
-        fprintf(pysamerr, "[Warning] min-shift set to 0 for VCF file. Generating TBI file.\n");
+        fprintf(pysam_stderr, "[Warning] min-shift set to 0 for VCF file. Generating TBI file.\n");
          tbi = 1;
      }
  
@@ -216,7 +217,7 @@ int main_vcfindex(int argc, char *argv[])
              stat(fname, &stat_file);
              if ( stat_file.st_mtime <= stat_tbi.st_mtime )
              {
-                fprintf(pysamerr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__);
+                fprintf(pysam_stderr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__);
                  return 1;
              }
          }
@@ -226,7 +227,7 @@ int main_vcfindex(int argc, char *argv[])
      {
          if ( bcf_index_build(fname, min_shift) != 0 )
          {
-            fprintf(pysamerr,"[E::%s] bcf_index_build failed for %s\n", __func__, fname);
+            fprintf(pysam_stderr,"[E::%s] bcf_index_build failed for %s\n", __func__, fname);
              return 1;
          }
      }
@@ -234,7 +235,7 @@ int main_vcfindex(int argc, char *argv[])
      {
          if ( tbx_index_build(fname, min_shift, &tbx_conf_vcf) != 0 )
          {
-            fprintf(pysamerr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname);
+            fprintf(pysam_stderr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname);
              return 1;
          }
      }
diff --git a/bcftools/vcfisec.c b/bcftools/vcfisec.c

index 6115146856b0b402dbca2e9ba8506648a5ceeb3f..9afe620095cbf3300d95d1f52b406bfcf570f027 100644 (file)
--- a/bcftools/vcfisec.c
+++ b/bcftools/vcfisec.c
@@ -58,7 +58,7 @@ typedef struct
      htsFile **fh_out;
      char **argv, *prefix, *output_fname, **fnames, *write_files, *targets_list, *regions_list;
      char *isec_exact;
-    int argc;
+    int argc, record_cmd_line;
  }
  args_t;
  
@@ -143,7 +143,7 @@ void isec_vcf(args_t *args)
          out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type));
          if ( out_fh == NULL ) error("Can't write to %s: %s\n", args->output_fname? args->output_fname : "standard output", strerror(errno));
          if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
-        bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
+        if (args->record_cmd_line) bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
          bcf_hdr_write(out_fh, files->readers[args->iwrite].header);
      }
      if ( !args->nwrite && !out_std && !args->prefix )
@@ -351,7 +351,7 @@ static void init_data(args_t *args)
                  args->fh_out[i] = hts_open(args->fnames[i], hts_bcf_wmode(args->output_type));  \
                  if ( !args->fh_out[i] ) error("Could not open %s\n", args->fnames[i]); \
                  if ( args->n_threads ) hts_set_threads(args->fh_out[i], args->n_threads); \
-                bcf_hdr_append_version(args->files->readers[j].header,args->argc,args->argv,"bcftools_isec"); \
+                if (args->record_cmd_line) bcf_hdr_append_version(args->files->readers[j].header,args->argc,args->argv,"bcftools_isec"); \
                  bcf_hdr_write(args->fh_out[i], args->files->readers[j].header); \
              }
              if ( !args->nwrite || args->write[0] )
@@ -456,6 +456,7 @@ static void usage(void)
      fprintf(stderr, "    -e, --exclude <expr>          exclude sites for which the expression is true\n");
      fprintf(stderr, "    -f, --apply-filters <list>    require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
      fprintf(stderr, "    -i, --include <expr>          include only sites for which the expression is true\n");
+    fprintf(stderr, "        --no-version                  do not append version and command line to the header\n");
      fprintf(stderr, "    -n, --nfiles [+-=~]<int>      output positions present in this many (=), this many or more (+), this many or fewer (-), the exact (~) files\n");
      fprintf(stderr, "    -o, --output <file>           write output to a file [standard output]\n");
      fprintf(stderr, "    -O, --output-type <b|u|z|v>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
@@ -464,8 +465,8 @@ static void usage(void)
      fprintf(stderr, "    -R, --regions-file <file>     restrict to regions listed in a file\n");
      fprintf(stderr, "    -t, --targets <region>        similar to -r but streams rather than index-jumps\n");
      fprintf(stderr, "    -T, --targets-file <file>     similar to -R but streams rather than index-jumps\n");
-    fprintf(stderr, "    -w, --write <list>            list of files to write with -p given as 1-based indexes. By default, all files are written\n");
      fprintf(stderr, "        --threads <int>           number of extra output compression threads [0]\n");
+    fprintf(stderr, "    -w, --write <list>            list of files to write with -p given as 1-based indexes. By default, all files are written\n");
      fprintf(stderr, "\n");
      fprintf(stderr, "Examples:\n");
      fprintf(stderr, "   # Create intersection and complements of two sets saving the output in dir/*\n");
@@ -492,6 +493,7 @@ int main_vcfisec(int argc, char *argv[])
      args->output_fname = NULL;
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      int targets_is_file = 0, regions_is_file = 0;
  
      static struct option loptions[] =
@@ -512,6 +514,7 @@ int main_vcfisec(int argc, char *argv[])
          {"output",required_argument,NULL,'o'},
          {"output-type",required_argument,NULL,'O'},
          {"threads",required_argument,NULL,9},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      while ((c = getopt_long(argc, argv, "hc:r:R:p:n:w:t:T:Cf:o:O:i:e:",loptions,NULL)) >= 0) {
@@ -560,6 +563,7 @@ int main_vcfisec(int argc, char *argv[])
                  }
                  break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case 'h':
              case '?': usage();
              default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfisec.c.pysam.c b/bcftools/vcfisec.c.pysam.c

index 24188956f8ec25bf103ade9e5096077730d3fc98..758d4759043635d3e034018e62a824aa6f3895d3 100644 (file)
--- a/bcftools/vcfisec.c.pysam.c
+++ b/bcftools/vcfisec.c.pysam.c
@@ -60,7 +60,7 @@ typedef struct
      htsFile **fh_out;
      char **argv, *prefix, *output_fname, **fnames, *write_files, *targets_list, *regions_list;
      char *isec_exact;
-    int argc;
+    int argc, record_cmd_line;
  }
  args_t;
  
@@ -136,7 +136,7 @@ void isec_vcf(args_t *args)
      kstring_t str = {0,0,0};
      htsFile *out_fh = NULL;
  
-    // When only one VCF is output, print VCF to stdout or -o file
+    // When only one VCF is output, print VCF to pysam_stdout or -o file
      int out_std = 0;
      if ( args->nwrite==1 && !args->prefix ) out_std = 1;
      if ( args->targets_list && files->nreaders==1 ) out_std = 1;
@@ -145,11 +145,11 @@ void isec_vcf(args_t *args)
          out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type));
          if ( out_fh == NULL ) error("Can't write to %s: %s\n", args->output_fname? args->output_fname : "standard output", strerror(errno));
          if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
-        bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
+        if (args->record_cmd_line) bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
          bcf_hdr_write(out_fh, files->readers[args->iwrite].header);
      }
      if ( !args->nwrite && !out_std && !args->prefix )
-        fprintf(pysamerr,"Note: -w option not given, printing list of sites...\n");
+        fprintf(pysam_stderr,"Note: -w option not given, printing list of sites...\n");
  
      int n;
      while ( (n=bcf_sr_next_line(files)) )
@@ -353,7 +353,7 @@ static void init_data(args_t *args)
                  args->fh_out[i] = hts_open(args->fnames[i], hts_bcf_wmode(args->output_type));  \
                  if ( !args->fh_out[i] ) error("Could not open %s\n", args->fnames[i]); \
                  if ( args->n_threads ) hts_set_threads(args->fh_out[i], args->n_threads); \
-                bcf_hdr_append_version(args->files->readers[j].header,args->argc,args->argv,"bcftools_isec"); \
+                if (args->record_cmd_line) bcf_hdr_append_version(args->files->readers[j].header,args->argc,args->argv,"bcftools_isec"); \
                  bcf_hdr_write(args->fh_out[i], args->files->readers[j].header); \
              }
              if ( !args->nwrite || args->write[0] )
@@ -402,7 +402,7 @@ static void init_data(args_t *args)
              if ( args->fh_sites == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
          }
          else
-            args->fh_sites = stdout;
+            args->fh_sites = pysam_stdout;
      }
  }
  
@@ -448,40 +448,41 @@ static void destroy_data(args_t *args)
  
  static void usage(void)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Create intersections, unions and complements of VCF files.\n");
-    fprintf(pysamerr, "Usage:   bcftools isec [options] <A.vcf.gz> <B.vcf.gz> [...]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "    -c, --collapse <string>       treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
-    fprintf(pysamerr, "    -C, --complement              output positions present only in the first file but missing in the others\n");
-    fprintf(pysamerr, "    -e, --exclude <expr>          exclude sites for which the expression is true\n");
-    fprintf(pysamerr, "    -f, --apply-filters <list>    require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
-    fprintf(pysamerr, "    -i, --include <expr>          include only sites for which the expression is true\n");
-    fprintf(pysamerr, "    -n, --nfiles [+-=~]<int>      output positions present in this many (=), this many or more (+), this many or fewer (-), the exact (~) files\n");
-    fprintf(pysamerr, "    -o, --output <file>           write output to a file [standard output]\n");
-    fprintf(pysamerr, "    -O, --output-type <b|u|z|v>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
-    fprintf(pysamerr, "    -p, --prefix <dir>            if given, subset each of the input files accordingly, see also -w\n");
-    fprintf(pysamerr, "    -r, --regions <region>        restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "    -R, --regions-file <file>     restrict to regions listed in a file\n");
-    fprintf(pysamerr, "    -t, --targets <region>        similar to -r but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -T, --targets-file <file>     similar to -R but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -w, --write <list>            list of files to write with -p given as 1-based indexes. By default, all files are written\n");
-    fprintf(pysamerr, "        --threads <int>           number of extra output compression threads [0]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Examples:\n");
-    fprintf(pysamerr, "   # Create intersection and complements of two sets saving the output in dir/*\n");
-    fprintf(pysamerr, "   bcftools isec A.vcf.gz B.vcf.gz -p dir\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "   # Filter sites in A and B (but not in C) and create intersection\n");
-    fprintf(pysamerr, "   bcftools isec -e'MAF<0.01' -i'dbSNP=1' -e - A.vcf.gz B.vcf.gz C.vcf.gz -p dir\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "   # Extract and write records from A shared by both A and B using exact allele match\n");
-    fprintf(pysamerr, "   bcftools isec A.vcf.gz B.vcf.gz -p dir -n =2 -w 1\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "   # Extract records private to A or B comparing by position only\n");
-    fprintf(pysamerr, "   bcftools isec A.vcf.gz B.vcf.gz -p dir -n -1 -c all\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Create intersections, unions and complements of VCF files.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools isec [options] <A.vcf.gz> <B.vcf.gz> [...]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "    -c, --collapse <string>       treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
+    fprintf(pysam_stderr, "    -C, --complement              output positions present only in the first file but missing in the others\n");
+    fprintf(pysam_stderr, "    -e, --exclude <expr>          exclude sites for which the expression is true\n");
+    fprintf(pysam_stderr, "    -f, --apply-filters <list>    require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+    fprintf(pysam_stderr, "    -i, --include <expr>          include only sites for which the expression is true\n");
+    fprintf(pysam_stderr, "        --no-version                  do not append version and command line to the header\n");
+    fprintf(pysam_stderr, "    -n, --nfiles [+-=~]<int>      output positions present in this many (=), this many or more (+), this many or fewer (-), the exact (~) files\n");
+    fprintf(pysam_stderr, "    -o, --output <file>           write output to a file [standard output]\n");
+    fprintf(pysam_stderr, "    -O, --output-type <b|u|z|v>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+    fprintf(pysam_stderr, "    -p, --prefix <dir>            if given, subset each of the input files accordingly, see also -w\n");
+    fprintf(pysam_stderr, "    -r, --regions <region>        restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "    -R, --regions-file <file>     restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "    -t, --targets <region>        similar to -r but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "    -T, --targets-file <file>     similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "        --threads <int>           number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "    -w, --write <list>            list of files to write with -p given as 1-based indexes. By default, all files are written\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Examples:\n");
+    fprintf(pysam_stderr, "   # Create intersection and complements of two sets saving the output in dir/*\n");
+    fprintf(pysam_stderr, "   bcftools isec A.vcf.gz B.vcf.gz -p dir\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "   # Filter sites in A and B (but not in C) and create intersection\n");
+    fprintf(pysam_stderr, "   bcftools isec -e'MAF<0.01' -i'dbSNP=1' -e - A.vcf.gz B.vcf.gz C.vcf.gz -p dir\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "   # Extract and write records from A shared by both A and B using exact allele match\n");
+    fprintf(pysam_stderr, "   bcftools isec A.vcf.gz B.vcf.gz -p dir -n =2 -w 1\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "   # Extract records private to A or B comparing by position only\n");
+    fprintf(pysam_stderr, "   bcftools isec A.vcf.gz B.vcf.gz -p dir -n -1 -c all\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -494,6 +495,7 @@ int main_vcfisec(int argc, char *argv[])
      args->output_fname = NULL;
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      int targets_is_file = 0, regions_is_file = 0;
  
      static struct option loptions[] =
@@ -514,6 +516,7 @@ int main_vcfisec(int argc, char *argv[])
          {"output",required_argument,NULL,'o'},
          {"output-type",required_argument,NULL,'O'},
          {"threads",required_argument,NULL,9},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      while ((c = getopt_long(argc, argv, "hc:r:R:p:n:w:t:T:Cf:o:O:i:e:",loptions,NULL)) >= 0) {
@@ -562,6 +565,7 @@ int main_vcfisec(int argc, char *argv[])
                  }
                  break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case 'h':
              case '?': usage();
              default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfmerge.c b/bcftools/vcfmerge.c

index 0517bd56c1b80d6838b1b5eb95926fc2745ebe10..02fac6bcc5698042d083a04cdbdf7b83ca89b42e 100644 (file)
--- a/bcftools/vcfmerge.c
+++ b/bcftools/vcfmerge.c
@@ -118,7 +118,7 @@ typedef struct
      htsFile *out_fh;
      bcf_hdr_t *out_hdr;
      char **argv;
-    int argc, n_threads;
+    int argc, n_threads, record_cmd_line;
  }
  args_t;
  
@@ -858,7 +858,7 @@ int copy_string_field(char *src, int isrc, int src_len, kstring_t *dst, int idst
      }
      if ( ith_src!=isrc ) return -1; // requested field not found
      int end_src = start_src;
-    while ( end_src<src_len && src[end_src]!=',' ) end_src++;
+    while ( end_src<src_len && src[end_src] && src[end_src]!=',' ) end_src++;
  
      int nsrc_cpy = end_src - start_src;
      if ( nsrc_cpy==1 && src[start_src]=='.' ) return 0;   // don't write missing values, dst is already initialized
@@ -1913,7 +1913,7 @@ void merge_vcf(args_t *args)
              char buf[10]; snprintf(buf,10,"%d",i+1);
              merge_headers(args->out_hdr, args->files->readers[i].header,buf,args->force_samples);
          }
-        bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_merge");
+        if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_merge");
          bcf_hdr_sync(args->out_hdr);
      }
      info_rules_init(args);
@@ -1962,6 +1962,7 @@ static void usage(void)
      fprintf(stderr, "    -i, --info-rules <tag:method,..>   rules for merging INFO fields (method is one of sum,avg,min,max,join) or \"-\" to turn off the default [DP:sum,DP4:sum]\n");
      fprintf(stderr, "    -l, --file-list <file>             read file names from the file\n");
      fprintf(stderr, "    -m, --merge <string>               allow multiallelic records for <snps|indels|both|all|none|id>, see man page for details [both]\n");
+    fprintf(stderr, "        --no-version                   do not append version and command line to the header\n");
      fprintf(stderr, "    -o, --output <file>                write output to a file [standard output]\n");
      fprintf(stderr, "    -O, --output-type <b|u|z|v>        'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
      fprintf(stderr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
@@ -1980,6 +1981,7 @@ int main_vcfmerge(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      args->collapse = COLLAPSE_BOTH;
      int regions_is_file = 0;
  
@@ -1998,6 +2000,7 @@ int main_vcfmerge(int argc, char *argv[])
          {"regions",required_argument,NULL,'r'},
          {"regions-file",required_argument,NULL,'R'},
          {"info-rules",required_argument,NULL,'i'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:",loptions,NULL)) >= 0) {
@@ -2032,6 +2035,7 @@ int main_vcfmerge(int argc, char *argv[])
              case  2 : args->header_only = 1; break;
              case  3 : args->force_samples = 1; break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case 'h':
              case '?': usage();
              default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfmerge.c.pysam.c b/bcftools/vcfmerge.c.pysam.c

index 94b5252891a03acb2fdfb77714dde40a50a16b64..daac45838e10261077cb90d7a4b73852f0582264 100644 (file)
--- a/bcftools/vcfmerge.c.pysam.c
+++ b/bcftools/vcfmerge.c.pysam.c
@@ -120,7 +120,7 @@ typedef struct
      htsFile *out_fh;
      bcf_hdr_t *out_hdr;
      char **argv;
-    int argc, n_threads;
+    int argc, n_threads, record_cmd_line;
  }
  args_t;
  
@@ -451,8 +451,8 @@ void merge_headers(bcf_hdr_t *hw, const bcf_hdr_t *hr, const char *clash_prefix,
  
  void debug_als(char **als, int nals)
  {
-    int k; for (k=0; k<nals; k++) fprintf(pysamerr,"%s ", als[k]);
-    fprintf(pysamerr,"\n");
+    int k; for (k=0; k<nals; k++) fprintf(pysam_stderr,"%s ", als[k]);
+    fprintf(pysam_stderr,"\n");
  }
  
  /**
@@ -534,7 +534,7 @@ char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb)
      {
          if ( strncasecmp(a[0],b[0],rla<rlb?rla:rlb) )
          {
-            fprintf(pysamerr, "The REF prefixes differ: %s vs %s (%d,%d)\n", a[0],b[0],rla,rlb);
+            fprintf(pysam_stderr, "The REF prefixes differ: %s vs %s (%d,%d)\n", a[0],b[0],rla,rlb);
              return NULL;
          }
          // Different case, change to uppercase
@@ -657,13 +657,13 @@ void maux_reset(maux_t *ma)
  }
  void maux_debug(maux_t *ma, int ir, int ib)
  {
-    printf("[%d,%d]\t", ir,ib);
+    fprintf(pysam_stdout, "[%d,%d]\t", ir,ib);
      int i;
      for (i=0; i<ma->nals; i++)
      {
-        printf(" %s [%d]", ma->als[i], ma->cnt[i]);
+        fprintf(pysam_stdout, " %s [%d]", ma->als[i], ma->cnt[i]);
      }
-    printf("\n");
+    fprintf(pysam_stdout, "\n");
  }
  
  void merge_chrom2qual(args_t *args, bcf1_t *out)
@@ -860,7 +860,7 @@ int copy_string_field(char *src, int isrc, int src_len, kstring_t *dst, int idst
      }
      if ( ith_src!=isrc ) return -1; // requested field not found
      int end_src = start_src;
-    while ( end_src<src_len && src[end_src]!=',' ) end_src++;
+    while ( end_src<src_len && src[end_src] && src[end_src]!=',' ) end_src++;
  
      int nsrc_cpy = end_src - start_src;
      if ( nsrc_cpy==1 && src[start_src]=='.' ) return 0;   // don't write missing values, dst is already initialized
@@ -946,7 +946,7 @@ static void merge_AGR_info_tag(bcf_hdr_t *hdr, bcf1_t *line, bcf_info_t *info, i
                  case BCF_BT_INT16: BRANCH(int16_t, *src==bcf_int16_missing, *src==bcf_int16_vector_end, int); break;
                  case BCF_BT_INT32: BRANCH(int32_t, *src==bcf_int32_missing, *src==bcf_int32_vector_end, int); break;
                  case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(*src), bcf_float_is_vector_end(*src), float); break;
-                default: fprintf(pysamerr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1);
+                default: fprintf(pysam_stderr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1);
              }
              #undef BRANCH
          }
@@ -976,7 +976,7 @@ static void merge_AGR_info_tag(bcf_hdr_t *hdr, bcf1_t *line, bcf_info_t *info, i
                  case BCF_BT_INT16: BRANCH(int16_t, src[kori]==bcf_int16_missing, src[kori]==bcf_int16_vector_end, int); break;
                  case BCF_BT_INT32: BRANCH(int32_t, src[kori]==bcf_int32_missing, src[kori]==bcf_int32_vector_end, int); break;
                  case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(src[kori]), bcf_float_is_vector_end(src[kori]), float); break;
-                default: fprintf(pysamerr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1);
+                default: fprintf(pysam_stderr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1);
              }
              #undef BRANCH
          }
@@ -1556,7 +1556,7 @@ void shake_buffer(maux_t *maux, int ir, int pos)
      if ( !reader->buffer ) return;
  
      int i;
-    // FILE *fp = stdout;
+    // FILE *fp = pysam_stdout;
      // fprintf(fp,"<going to shake> nbuf=%d\t", reader->nbuffer); for (i=0; i<reader->nbuffer; i++) fprintf(fp," %d", skip[i]); fprintf(fp,"\n");
      // debug_buffer(fp,reader);
      // fprintf(fp,"--\n");
@@ -1641,43 +1641,43 @@ void debug_maux(args_t *args, int pos, int var_type)
      maux_t *maux = args->maux;
      int j,k,l;
  
-    fprintf(pysamerr,"Alleles to merge at %d\n", pos+1);
+    fprintf(pysam_stderr,"Alleles to merge at %d\n", pos+1);
      for (j=0; j<files->nreaders; j++)
      {
          bcf_sr_t *reader = &files->readers[j];
-        fprintf(pysamerr," reader %d: ", j);
+        fprintf(pysam_stderr," reader %d: ", j);
          for (k=0; k<=reader->nbuffer; k++)
          {
              if ( maux->d[j][k].skip==SKIP_DONE ) continue;
              bcf1_t *line = reader->buffer[k];
              if ( line->pos!=pos ) continue;
-            fprintf(pysamerr,"\t");
-            if ( maux->d[j][k].skip ) fprintf(pysamerr,"[");  // this record will not be merged in this round
+            fprintf(pysam_stderr,"\t");
+            if ( maux->d[j][k].skip ) fprintf(pysam_stderr,"[");  // this record will not be merged in this round
              for (l=0; l<line->n_allele; l++)
-                fprintf(pysamerr,"%s%s", l==0?"":",", line->d.allele[l]);
-            if ( maux->d[j][k].skip ) fprintf(pysamerr,"]");
+                fprintf(pysam_stderr,"%s%s", l==0?"":",", line->d.allele[l]);
+            if ( maux->d[j][k].skip ) fprintf(pysam_stderr,"]");
          }
-        fprintf(pysamerr,"\n");
+        fprintf(pysam_stderr,"\n");
      }
-    fprintf(pysamerr," counts: ");
-    for (j=0; j<maux->nals; j++) fprintf(pysamerr,"%s   %dx %s", j==0?"":",",maux->cnt[j], maux->als[j]); fprintf(pysamerr,"\n");
+    fprintf(pysam_stderr," counts: ");
+    for (j=0; j<maux->nals; j++) fprintf(pysam_stderr,"%s   %dx %s", j==0?"":",",maux->cnt[j], maux->als[j]); fprintf(pysam_stderr,"\n");
      for (j=0; j<files->nreaders; j++)
      {
          bcf_sr_t *reader = &files->readers[j];
-        fprintf(pysamerr," out %d: ", j);
+        fprintf(pysam_stderr," out %d: ", j);
          for (k=0; k<=reader->nbuffer; k++)
          {
              if ( maux->d[j][k].skip==SKIP_DONE ) continue;
              bcf1_t *line = reader->buffer[k];
              if ( line->pos!=pos ) continue;
              if ( maux->d[j][k].skip ) continue;
-            fprintf(pysamerr,"\t");
+            fprintf(pysam_stderr,"\t");
              for (l=0; l<line->n_allele; l++)
-                fprintf(pysamerr,"%s%s", l==0?"":",", maux->als[maux->d[j][k].map[l]]);
+                fprintf(pysam_stderr,"%s%s", l==0?"":",", maux->als[maux->d[j][k].map[l]]);
          }
-        fprintf(pysamerr,"\n");
+        fprintf(pysam_stderr,"\n");
      }
-    fprintf(pysamerr,"\n");
+    fprintf(pysam_stderr,"\n");
  }
  
  // Determine which line should be merged from which reader: go through all
@@ -1915,7 +1915,7 @@ void merge_vcf(args_t *args)
              char buf[10]; snprintf(buf,10,"%d",i+1);
              merge_headers(args->out_hdr, args->files->readers[i].header,buf,args->force_samples);
          }
-        bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_merge");
+        if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_merge");
          bcf_hdr_sync(args->out_hdr);
      }
      info_rules_init(args);
@@ -1950,26 +1950,27 @@ void merge_vcf(args_t *args)
  
  static void usage(void)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Merge multiple VCF/BCF files from non-overlapping sample sets to create one multi-sample file.\n");
-    fprintf(pysamerr, "         Note that only records from different files can be merged, never from the same file. For\n");
-    fprintf(pysamerr, "         \"vertical\" merge take a look at \"bcftools norm\" instead.\n");
-    fprintf(pysamerr, "Usage:   bcftools merge [options] <A.vcf.gz> <B.vcf.gz> [...]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "        --force-samples                resolve duplicate sample names\n");
-    fprintf(pysamerr, "        --print-header                 print only the merged header and exit\n");
-    fprintf(pysamerr, "        --use-header <file>            use the provided header\n");
-    fprintf(pysamerr, "    -f, --apply-filters <list>         require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
-    fprintf(pysamerr, "    -i, --info-rules <tag:method,..>   rules for merging INFO fields (method is one of sum,avg,min,max,join) or \"-\" to turn off the default [DP:sum,DP4:sum]\n");
-    fprintf(pysamerr, "    -l, --file-list <file>             read file names from the file\n");
-    fprintf(pysamerr, "    -m, --merge <string>               allow multiallelic records for <snps|indels|both|all|none|id>, see man page for details [both]\n");
-    fprintf(pysamerr, "    -o, --output <file>                write output to a file [standard output]\n");
-    fprintf(pysamerr, "    -O, --output-type <b|u|z|v>        'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
-    fprintf(pysamerr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
-    fprintf(pysamerr, "        --threads <int>                number of extra output compression threads [0]\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Merge multiple VCF/BCF files from non-overlapping sample sets to create one multi-sample file.\n");
+    fprintf(pysam_stderr, "         Note that only records from different files can be merged, never from the same file. For\n");
+    fprintf(pysam_stderr, "         \"vertical\" merge take a look at \"bcftools norm\" instead.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools merge [options] <A.vcf.gz> <B.vcf.gz> [...]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "        --force-samples                resolve duplicate sample names\n");
+    fprintf(pysam_stderr, "        --print-header                 print only the merged header and exit\n");
+    fprintf(pysam_stderr, "        --use-header <file>            use the provided header\n");
+    fprintf(pysam_stderr, "    -f, --apply-filters <list>         require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+    fprintf(pysam_stderr, "    -i, --info-rules <tag:method,..>   rules for merging INFO fields (method is one of sum,avg,min,max,join) or \"-\" to turn off the default [DP:sum,DP4:sum]\n");
+    fprintf(pysam_stderr, "    -l, --file-list <file>             read file names from the file\n");
+    fprintf(pysam_stderr, "    -m, --merge <string>               allow multiallelic records for <snps|indels|both|all|none|id>, see man page for details [both]\n");
+    fprintf(pysam_stderr, "        --no-version                   do not append version and command line to the header\n");
+    fprintf(pysam_stderr, "    -o, --output <file>                write output to a file [standard output]\n");
+    fprintf(pysam_stderr, "    -O, --output-type <b|u|z|v>        'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+    fprintf(pysam_stderr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "        --threads <int>                number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -1982,6 +1983,7 @@ int main_vcfmerge(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      args->collapse = COLLAPSE_BOTH;
      int regions_is_file = 0;
  
@@ -2000,6 +2002,7 @@ int main_vcfmerge(int argc, char *argv[])
          {"regions",required_argument,NULL,'r'},
          {"regions-file",required_argument,NULL,'R'},
          {"info-rules",required_argument,NULL,'i'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:",loptions,NULL)) >= 0) {
@@ -2034,6 +2037,7 @@ int main_vcfmerge(int argc, char *argv[])
              case  2 : args->header_only = 1; break;
              case  3 : args->force_samples = 1; break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case 'h':
              case '?': usage();
              default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfnorm.c b/bcftools/vcfnorm.c

index 732eca9033b230a73f2d245dfc576a61550a8455..781833ceb6d5e6969efbd3bd742d1d6b886b4e57 100644 (file)
--- a/bcftools/vcfnorm.c
+++ b/bcftools/vcfnorm.c
@@ -1,6 +1,6 @@
  /*  vcfnorm.c -- Left-align and normalize indels.
  
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
  
      Author: Petr Danecek <pd3@sanger.ac.uk>
  
@@ -76,6 +76,7 @@ typedef struct
      char **argv, *output_fname, *ref_fname, *vcf_fname, *region, *targets;
      int argc, rmdup, output_type, n_threads, check_ref, strict_filter, do_indels;
      int nchanged, nskipped, nsplit, ntotal, mrows_op, mrows_collapse, parsimonious;
+    int record_cmd_line;
  }
  args_t;
  
@@ -295,17 +296,19 @@ static int realign(args_t *args, bcf1_t *line)
          if ( i>0 && als[i].l==als[0].l && !strcasecmp(als[0].s,als[i].s) ) return ERR_DUP_ALLELE;
      }
  
-
      // trim from right
      int ori_pos = line->pos;
      while (1)
      {
          // is the rightmost base identical in all alleles?
+        int min_len = als[0].l;
          for (i=1; i<line->n_allele; i++)
          {
              if ( als[0].s[ als[0].l-1 ]!=als[i].s[ als[i].l-1 ] ) break;
+            if ( als[i].l < min_len ) min_len = als[i].l;
          }
          if ( i!=line->n_allele ) break; // there are differences, cannot be trimmed
+        if ( min_len<=1 && line->pos==0 ) break;
  
          int pad_from_left = 0;
          for (i=0; i<line->n_allele; i++) // trim all alleles
@@ -343,7 +346,7 @@ static int realign(args_t *args, bcf1_t *line)
              if ( als[0].s[ntrim_left]!=als[i].s[ntrim_left] ) break;
              if ( min_len > als[i].l - ntrim_left ) min_len = als[i].l - ntrim_left;
          }
-        if ( i!=line->n_allele || min_len==1 ) break; // there are differences, cannot be trimmed
+        if ( i!=line->n_allele || min_len<=1 ) break; // there are differences, cannot be trimmed
          ntrim_left++;
      }
      if ( ntrim_left )
@@ -1287,7 +1290,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
      {
          kstring_t *tmp = &args->tmp_str[i];
          kputsn(tmp->s,tmp->l,&str);
-        for (j=tmp->l; j<max_len; j++) kputc(0,tmp);
+        for (j=tmp->l; j<max_len; j++) kputc('\0',&str);
      }
      args->ntmp_arr2 = str.m;
      args->tmp_arr2  = (uint8_t*)str.s;
@@ -1581,7 +1584,7 @@ static void normalize_vcf(args_t *args)
      htsFile *out = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
      if ( out == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
      if ( args->n_threads ) hts_set_threads(out, args->n_threads);
-    bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
      bcf_hdr_write(out, args->hdr);
  
      int prev_rid = -1, prev_pos = -1, prev_type = 0;
@@ -1641,7 +1644,6 @@ static void normalize_vcf(args_t *args)
              if ( args->lines[ilast]->pos - args->lines[i]->pos < args->buf_win ) break;
              j++;
          }
-        if ( args->rbuf.n==args->rbuf.m ) j = 1;
          if ( j>0 ) flush_buffer(args, out, j);
      }
      flush_buffer(args, out, args->rbuf.n);
@@ -1666,6 +1668,7 @@ static void usage(void)
      fprintf(stderr, "    -d, --rm-dup <type>               remove duplicate snps|indels|both|any\n");
      fprintf(stderr, "    -f, --fasta-ref <file>            reference sequence\n");
      fprintf(stderr, "    -m, --multiallelics <-|+>[type]   split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
+    fprintf(stderr, "        --no-version                  do not append version and command line to the header\n");
      fprintf(stderr, "    -N, --do-not-normalize            do not normalize indels (with -m or -c s)\n");
      fprintf(stderr, "    -o, --output <file>               write output to a file [standard output]\n");
      fprintf(stderr, "    -O, --output-type <type>          'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
@@ -1674,8 +1677,8 @@ static void usage(void)
      fprintf(stderr, "    -s, --strict-filter               when merging (-m+), merged site is PASS only if all sites being merged PASS\n");
      fprintf(stderr, "    -t, --targets <region>            similar to -r but streams rather than index-jumps\n");
      fprintf(stderr, "    -T, --targets-file <file>         similar to -R but streams rather than index-jumps\n");
-    fprintf(stderr, "    -w, --site-win <int>              buffer for sorting lines which changed position during realignment [1000]\n");
      fprintf(stderr, "        --threads <int>               number of extra output compression threads [0]\n");
+    fprintf(stderr, "    -w, --site-win <int>              buffer for sorting lines which changed position during realignment [1000]\n");
      fprintf(stderr, "\n");
      exit(1);
  }
@@ -1689,6 +1692,7 @@ int main_vcfnorm(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      args->aln_win = 100;
      args->buf_win = 1000;
      args->mrows_collapse = COLLAPSE_BOTH;
@@ -1714,6 +1718,7 @@ int main_vcfnorm(int argc, char *argv[])
          {"threads",required_argument,NULL,9},
          {"check-ref",required_argument,NULL,'c'},
          {"strict-filter",no_argument,NULL,'s'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      char *tmp;
@@ -1771,6 +1776,7 @@ int main_vcfnorm(int argc, char *argv[])
                  if ( *tmp ) error("Could not parse argument: --site-win %s\n", optarg);
                  break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case 'h':
              case '?': usage();
              default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfnorm.c.pysam.c b/bcftools/vcfnorm.c.pysam.c

index 2cdf3994e43723cdc6895af98ec9b64ab32163d5..200ce79935480852ed05bbad9f049fea01440d31 100644 (file)
--- a/bcftools/vcfnorm.c.pysam.c
+++ b/bcftools/vcfnorm.c.pysam.c
@@ -2,7 +2,7 @@
  
  /*  vcfnorm.c -- Left-align and normalize indels.
  
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
  
      Author: Petr Danecek <pd3@sanger.ac.uk>
  
@@ -78,6 +78,7 @@ typedef struct
      char **argv, *output_fname, *ref_fname, *vcf_fname, *region, *targets;
      int argc, rmdup, output_type, n_threads, check_ref, strict_filter, do_indels;
      int nchanged, nskipped, nsplit, ntotal, mrows_op, mrows_collapse, parsimonious;
+    int record_cmd_line;
  }
  args_t;
  
@@ -275,7 +276,7 @@ static int realign(args_t *args, bcf1_t *line)
          if ( args->check_ref==CHECK_REF_EXIT )
              error("Reference allele mismatch at %s:%d .. REF_SEQ:'%s' vs VCF:'%s'\n", bcf_seqname(args->hdr,line),line->pos+1,ref,line->d.allele[0]);
          if ( args->check_ref & CHECK_REF_WARN )
-            fprintf(pysamerr,"REF_MISMATCH\t%s\t%d\t%s\n", bcf_seqname(args->hdr,line),line->pos+1,line->d.allele[0]);
+            fprintf(pysam_stderr,"REF_MISMATCH\t%s\t%d\t%s\n", bcf_seqname(args->hdr,line),line->pos+1,line->d.allele[0]);
          free(ref);
          return ERR_REF_MISMATCH;
      }
@@ -297,17 +298,19 @@ static int realign(args_t *args, bcf1_t *line)
          if ( i>0 && als[i].l==als[0].l && !strcasecmp(als[0].s,als[i].s) ) return ERR_DUP_ALLELE;
      }
  
-
      // trim from right
      int ori_pos = line->pos;
      while (1)
      {
          // is the rightmost base identical in all alleles?
+        int min_len = als[0].l;
          for (i=1; i<line->n_allele; i++)
          {
              if ( als[0].s[ als[0].l-1 ]!=als[i].s[ als[i].l-1 ] ) break;
+            if ( als[i].l < min_len ) min_len = als[i].l;
          }
          if ( i!=line->n_allele ) break; // there are differences, cannot be trimmed
+        if ( min_len<=1 && line->pos==0 ) break;
  
          int pad_from_left = 0;
          for (i=0; i<line->n_allele; i++) // trim all alleles
@@ -345,7 +348,7 @@ static int realign(args_t *args, bcf1_t *line)
              if ( als[0].s[ntrim_left]!=als[i].s[ntrim_left] ) break;
              if ( min_len > als[i].l - ntrim_left ) min_len = als[i].l - ntrim_left;
          }
-        if ( i!=line->n_allele || min_len==1 ) break; // there are differences, cannot be trimmed
+        if ( i!=line->n_allele || min_len<=1 ) break; // there are differences, cannot be trimmed
          ntrim_left++;
      }
      if ( ntrim_left )
@@ -855,7 +858,7 @@ static void merge_info_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_inf
          { \
              /* expecting diploid gt in INFO */ \
              if (nvals_ori!=lines[0]->n_allele*(lines[0]->n_allele+1)/2) { \
-                fprintf(pysamerr, "todo: merge Number=G INFO fields for haploid sites\n"); \
+                fprintf(pysam_stderr, "todo: merge Number=G INFO fields for haploid sites\n"); \
                  error("vcfnorm: number of fields in first record at position %s:%d for INFO tag %s not as expected [found: %d vs expected:%d]\n", bcf_seqname(args->hdr,lines[0]),lines[0]->pos+1, tag, nvals_ori, lines[0]->n_allele*(lines[0]->n_allele+1)/2); \
              } \
              int nvals = dst->n_allele*(dst->n_allele+1)/2; \
@@ -1289,7 +1292,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
      {
          kstring_t *tmp = &args->tmp_str[i];
          kputsn(tmp->s,tmp->l,&str);
-        for (j=tmp->l; j<max_len; j++) kputc(0,tmp);
+        for (j=tmp->l; j<max_len; j++) kputc('\0',&str);
      }
      args->ntmp_arr2 = str.m;
      args->tmp_arr2  = (uint8_t*)str.s;
@@ -1560,7 +1563,7 @@ static void normalize_line(args_t *args, bcf1_t **line_ptr)
                  else if ( args->check_ref==CHECK_REF_EXIT )
                      error("Duplicate alleles at %s:%d; run with -cw to turn the error into warning or with -cs to fix.\n", bcf_seqname(args->hdr,line),line->pos+1);
                  else if ( args->check_ref & CHECK_REF_WARN )
-                    fprintf(pysamerr,"ALT_DUP\t%s\t%d\n", bcf_seqname(args->hdr,line),line->pos+1);
+                    fprintf(pysam_stderr,"ALT_DUP\t%s\t%d\n", bcf_seqname(args->hdr,line),line->pos+1);
              }
          }
      }
@@ -1583,7 +1586,7 @@ static void normalize_vcf(args_t *args)
      htsFile *out = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
      if ( out == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
      if ( args->n_threads ) hts_set_threads(out, args->n_threads);
-    bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
      bcf_hdr_write(out, args->hdr);
  
      int prev_rid = -1, prev_pos = -1, prev_type = 0;
@@ -1643,42 +1646,42 @@ static void normalize_vcf(args_t *args)
              if ( args->lines[ilast]->pos - args->lines[i]->pos < args->buf_win ) break;
              j++;
          }
-        if ( args->rbuf.n==args->rbuf.m ) j = 1;
          if ( j>0 ) flush_buffer(args, out, j);
      }
      flush_buffer(args, out, args->rbuf.n);
      hts_close(out);
  
-    fprintf(pysamerr,"Lines   total/split/realigned/skipped:\t%d/%d/%d/%d\n", args->ntotal,args->nsplit,args->nchanged,args->nskipped);
+    fprintf(pysam_stderr,"Lines   total/split/realigned/skipped:\t%d/%d/%d/%d\n", args->ntotal,args->nsplit,args->nchanged,args->nskipped);
      if ( args->check_ref & CHECK_REF_FIX )
-        fprintf(pysamerr,"REF/ALT total/modified/added:  \t%d/%d/%d\n", args->nref.tot,args->nref.swap,args->nref.set);
+        fprintf(pysam_stderr,"REF/ALT total/modified/added:  \t%d/%d/%d\n", args->nref.tot,args->nref.swap,args->nref.set);
  }
  
  static void usage(void)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Left-align and normalize indels; check if REF alleles match the reference;\n");
-    fprintf(pysamerr, "         split multiallelic sites into multiple rows; recover multiallelics from\n");
-    fprintf(pysamerr, "         multiple rows.\n");
-    fprintf(pysamerr, "Usage:   bcftools norm [options] <in.vcf.gz>\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "    -c, --check-ref <e|w|x|s>         check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites [e]\n");
-    fprintf(pysamerr, "    -D, --remove-duplicates           remove duplicate lines of the same type.\n");
-    fprintf(pysamerr, "    -d, --rm-dup <type>               remove duplicate snps|indels|both|any\n");
-    fprintf(pysamerr, "    -f, --fasta-ref <file>            reference sequence\n");
-    fprintf(pysamerr, "    -m, --multiallelics <-|+>[type]   split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
-    fprintf(pysamerr, "    -N, --do-not-normalize            do not normalize indels (with -m or -c s)\n");
-    fprintf(pysamerr, "    -o, --output <file>               write output to a file [standard output]\n");
-    fprintf(pysamerr, "    -O, --output-type <type>          'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
-    fprintf(pysamerr, "    -r, --regions <region>            restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "    -R, --regions-file <file>         restrict to regions listed in a file\n");
-    fprintf(pysamerr, "    -s, --strict-filter               when merging (-m+), merged site is PASS only if all sites being merged PASS\n");
-    fprintf(pysamerr, "    -t, --targets <region>            similar to -r but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -T, --targets-file <file>         similar to -R but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -w, --site-win <int>              buffer for sorting lines which changed position during realignment [1000]\n");
-    fprintf(pysamerr, "        --threads <int>               number of extra output compression threads [0]\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Left-align and normalize indels; check if REF alleles match the reference;\n");
+    fprintf(pysam_stderr, "         split multiallelic sites into multiple rows; recover multiallelics from\n");
+    fprintf(pysam_stderr, "         multiple rows.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools norm [options] <in.vcf.gz>\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "    -c, --check-ref <e|w|x|s>         check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites [e]\n");
+    fprintf(pysam_stderr, "    -D, --remove-duplicates           remove duplicate lines of the same type.\n");
+    fprintf(pysam_stderr, "    -d, --rm-dup <type>               remove duplicate snps|indels|both|any\n");
+    fprintf(pysam_stderr, "    -f, --fasta-ref <file>            reference sequence\n");
+    fprintf(pysam_stderr, "    -m, --multiallelics <-|+>[type]   split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
+    fprintf(pysam_stderr, "        --no-version                  do not append version and command line to the header\n");
+    fprintf(pysam_stderr, "    -N, --do-not-normalize            do not normalize indels (with -m or -c s)\n");
+    fprintf(pysam_stderr, "    -o, --output <file>               write output to a file [standard output]\n");
+    fprintf(pysam_stderr, "    -O, --output-type <type>          'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+    fprintf(pysam_stderr, "    -r, --regions <region>            restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "    -R, --regions-file <file>         restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "    -s, --strict-filter               when merging (-m+), merged site is PASS only if all sites being merged PASS\n");
+    fprintf(pysam_stderr, "    -t, --targets <region>            similar to -r but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "    -T, --targets-file <file>         similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "        --threads <int>               number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "    -w, --site-win <int>              buffer for sorting lines which changed position during realignment [1000]\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -1691,6 +1694,7 @@ int main_vcfnorm(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      args->aln_win = 100;
      args->buf_win = 1000;
      args->mrows_collapse = COLLAPSE_BOTH;
@@ -1716,6 +1720,7 @@ int main_vcfnorm(int argc, char *argv[])
          {"threads",required_argument,NULL,9},
          {"check-ref",required_argument,NULL,'c'},
          {"strict-filter",no_argument,NULL,'s'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      char *tmp;
@@ -1759,7 +1764,7 @@ int main_vcfnorm(int argc, char *argv[])
                  break;
              case 'o': args->output_fname = optarg; break;
              case 'D':
-                fprintf(pysamerr,"Warning: `-D` is functional but deprecated, replaced by `-d both`.\n"); 
+                fprintf(pysam_stderr,"Warning: `-D` is functional but deprecated, replaced by `-d both`.\n"); 
                  args->rmdup = COLLAPSE_NONE<<1;
                  break;
              case 's': args->strict_filter = 1; break;
@@ -1773,6 +1778,7 @@ int main_vcfnorm(int argc, char *argv[])
                  if ( *tmp ) error("Could not parse argument: --site-win %s\n", optarg);
                  break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case 'h':
              case '?': usage();
              default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfplugin.c b/bcftools/vcfplugin.c

index e2ca04a8e67606256928565568ec550b1c58f654..87a773f5070a0f16c970a1eadcfd79b961753144 100644 (file)
--- a/bcftools/vcfplugin.c
+++ b/bcftools/vcfplugin.c
@@ -140,7 +140,7 @@ typedef struct _args_t
      char **plugin_paths;
  
      char **argv, *output_fname, *regions_list, *targets_list;
-    int argc, drop_header, verbose;
+    int argc, drop_header, verbose, record_cmd_line;
  }
  args_t;
  
@@ -239,13 +239,6 @@ static void print_plugin_usage_hint(void)
          fprintf(stderr,
                  " in\n\tBCFTOOLS_PLUGINS=\"%s\".\n\n"
                  "- Is the plugin path correct?\n\n"
-                "- Are all shared libraries, namely libhts.so, accessible? Verify with\n"
-                "   on Mac OS X: `otool -L your/plugin.so` and set DYLD_LIBRARY_PATH if they are not\n"
-                "   on Linux:    `ldd your/plugin.so` and set LD_LIBRARY_PATH if they are not\n"
-                "\n"
-                "- If not installed systemwide, set the environment variable LD_LIBRARY_PATH (linux) or\n"
-                "DYLD_LIBRARY_PATH (mac) to include directory where *libhts.so* is located.\n"
-                "\n"
                  "- Run \"bcftools plugin -lv\" for more detailed error output.\n"
                  "\n",
                  getenv("BCFTOOLS_PLUGINS")
@@ -418,7 +411,7 @@ static void init_data(args_t *args)
      if ( args->filter_str )
          args->filter = filter_init(args->hdr, args->filter_str);
  
-    bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_plugin");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_plugin");
      if ( !args->drop_header )
      {
          args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
@@ -460,6 +453,7 @@ static void usage(args_t *args)
      fprintf(stderr, "   -t, --targets <region>      similar to -r but streams rather than index-jumps\n");
      fprintf(stderr, "   -T, --targets-file <file>   similar to -R but streams rather than index-jumps\n");
      fprintf(stderr, "VCF output options:\n");
+    fprintf(stderr, "       --no-version            do not append version and command line to the header\n");
      fprintf(stderr, "   -o, --output <file>         write output to a file [standard output]\n");
      fprintf(stderr, "   -O, --output-type <type>    'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
      fprintf(stderr, "       --threads <int>         number of extra output compression threads [0]\n");
@@ -480,12 +474,27 @@ int main_plugin(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      args->nplugin_paths = -1;
      int regions_is_file = 0, targets_is_file = 0, plist_only = 0, usage_only = 0, version_only = 0;
  
      if ( argc==1 ) usage(args);
+
      char *plugin_name = NULL;
-    if ( argv[1][0]!='-' ) { plugin_name = argv[1]; argc--; argv++; }
+    if ( argv[1][0]!='-' )
+    {
+        plugin_name = argv[1]; 
+        argc--; 
+        argv++; 
+        load_plugin(args, plugin_name, 1, &args->plugin);
+        if ( args->plugin.run )
+        {
+            int ret = args->plugin.run(argc, argv);
+            destroy_data(args);
+            free(args);
+            return ret;
+        }
+    }
  
      static struct option loptions[] =
      {
@@ -502,6 +511,7 @@ int main_plugin(int argc, char *argv[])
          {"regions-file",required_argument,NULL,'R'},
          {"targets",required_argument,NULL,'t'},
          {"targets-file",required_argument,NULL,'T'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      while ((c = getopt_long(argc, argv, "h?o:O:r:R:t:T:li:e:vV",loptions,NULL)) >= 0)
@@ -527,6 +537,7 @@ int main_plugin(int argc, char *argv[])
              case 'T': args->targets_list = optarg; targets_is_file = 1; break;
              case 'l': plist_only = 1; break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case '?':
              case 'h': usage_only = 1; break;
              default: error("Unknown argument: %s\n", optarg);
@@ -535,7 +546,6 @@ int main_plugin(int argc, char *argv[])
      if ( plist_only )  return list_plugins(args);
      if ( usage_only && ! plugin_name ) usage(args);
  
-    load_plugin(args, plugin_name, 1, &args->plugin);
      if ( version_only )
      {
          const char *bver, *hver;
@@ -554,15 +564,6 @@ int main_plugin(int argc, char *argv[])
          return 0;
      }
  
-    if ( args->plugin.run )
-    {
-        int iopt = optind; optind = 0;
-        int ret = args->plugin.run(argc-iopt, argv+iopt);
-        destroy_data(args);
-        free(args);
-        return ret;
-    }
-
      char *fname = NULL;
      if ( optind>=argc || argv[optind][0]=='-' )
      {
diff --git a/bcftools/vcfplugin.c.pysam.c b/bcftools/vcfplugin.c.pysam.c

index 5c29993ad0e05f5ffbdb310c1443905367ec0409..8365f7e5ff5b2f065cd92e371fb5ad18a533b700 100644 (file)
--- a/bcftools/vcfplugin.c.pysam.c
+++ b/bcftools/vcfplugin.c.pysam.c
@@ -142,7 +142,7 @@ typedef struct _args_t
      char **plugin_paths;
  
      char **argv, *output_fname, *regions_list, *targets_list;
-    int argc, drop_header, verbose;
+    int argc, drop_header, verbose, record_cmd_line;
  }
  args_t;
  
@@ -172,11 +172,11 @@ static void add_plugin_paths(args_t *args, const char *path)
                  args->plugin_paths = (char**) realloc(args->plugin_paths,sizeof(char*)*(args->nplugin_paths+1));
                  args->plugin_paths[args->nplugin_paths] = dir;
                  args->nplugin_paths++;
-                if ( args->verbose ) fprintf(pysamerr, "plugin directory %s .. ok\n", dir);
+                if ( args->verbose ) fprintf(pysam_stderr, "plugin directory %s .. ok\n", dir);
              }
              else
              {
-                if ( args->verbose ) fprintf(pysamerr, "plugin directory %s .. %s\n", dir, strerror(errno));
+                if ( args->verbose ) fprintf(pysam_stderr, "plugin directory %s .. %s\n", dir, strerror(errno));
                  free(dir);
              }
  
@@ -214,8 +214,8 @@ static void *dlopen_plugin(args_t *args, const char *fname)
              handle = dlopen(tmp, RTLD_NOW); // valgrind complains about unfreed memory, not our problem though
              if ( args->verbose )
              {
-                if ( !handle ) fprintf(pysamerr,"%s:\n\tdlopen   .. %s\n", tmp,dlerror());
-                else fprintf(pysamerr,"%s:\n\tdlopen   .. ok\n", tmp);
+                if ( !handle ) fprintf(pysam_stderr,"%s:\n\tdlopen   .. %s\n", tmp,dlerror());
+                else fprintf(pysam_stderr,"%s:\n\tdlopen   .. ok\n", tmp);
              }
              free(tmp);
              if ( handle ) return handle;
@@ -225,8 +225,8 @@ static void *dlopen_plugin(args_t *args, const char *fname)
      handle = dlopen(fname, RTLD_NOW);
      if ( args->verbose )
      {
-        if ( !handle ) fprintf(pysamerr,"%s:\n\tdlopen   .. %s\n", fname,dlerror());
-        else fprintf(pysamerr,"%s:\n\tdlopen   .. ok\n", fname);
+        if ( !handle ) fprintf(pysam_stderr,"%s:\n\tdlopen   .. %s\n", fname,dlerror());
+        else fprintf(pysam_stderr,"%s:\n\tdlopen   .. ok\n", fname);
      }
  
      return handle;
@@ -234,20 +234,13 @@ static void *dlopen_plugin(args_t *args, const char *fname)
  
  static void print_plugin_usage_hint(void)
  {
-    fprintf(pysamerr, "\nNo functional bcftools plugins were found");
+    fprintf(pysam_stderr, "\nNo functional bcftools plugins were found");
      if ( !getenv("BCFTOOLS_PLUGINS") )
-        fprintf(pysamerr,". The environment variable BCFTOOLS_PLUGINS is not set.\n\n");
+        fprintf(pysam_stderr,". The environment variable BCFTOOLS_PLUGINS is not set.\n\n");
      else
-        fprintf(pysamerr,
+        fprintf(pysam_stderr,
                  " in\n\tBCFTOOLS_PLUGINS=\"%s\".\n\n"
                  "- Is the plugin path correct?\n\n"
-                "- Are all shared libraries, namely libhts.so, accessible? Verify with\n"
-                "   on Mac OS X: `otool -L your/plugin.so` and set DYLD_LIBRARY_PATH if they are not\n"
-                "   on Linux:    `ldd your/plugin.so` and set LD_LIBRARY_PATH if they are not\n"
-                "\n"
-                "- If not installed systemwide, set the environment variable LD_LIBRARY_PATH (linux) or\n"
-                "DYLD_LIBRARY_PATH (mac) to include directory where *libhts.so* is located.\n"
-                "\n"
                  "- Run \"bcftools plugin -lv\" for more detailed error output.\n"
                  "\n",
                  getenv("BCFTOOLS_PLUGINS")
@@ -275,19 +268,19 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
      if ( ret )
          plugin->init = NULL;
      else
-        if ( args->verbose ) fprintf(pysamerr,"\tinit     .. ok\n");
+        if ( args->verbose ) fprintf(pysam_stderr,"\tinit     .. ok\n");
  
      plugin->run = (dl_run_f) dlsym(plugin->handle, "run");
      ret = dlerror();
      if ( ret )
          plugin->run = NULL;
      else
-        if ( args->verbose ) fprintf(pysamerr,"\trun      .. ok\n");
+        if ( args->verbose ) fprintf(pysam_stderr,"\trun      .. ok\n");
  
      if ( !plugin->init && !plugin->run )
      {
          if ( exit_on_error ) error("Could not initialize %s, neither run or init found \n", plugin->name);
-        else if ( args->verbose ) fprintf(pysamerr,"\tinit/run .. not found\n");
+        else if ( args->verbose ) fprintf(pysam_stderr,"\tinit/run .. not found\n");
          return -1;
      }
  
@@ -296,7 +289,7 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
      if ( ret )
      {
          if ( exit_on_error ) error("Could not initialize %s, version string not found\n", plugin->name);
-        else if ( args->verbose ) fprintf(pysamerr,"\tversion  .. not found\n");
+        else if ( args->verbose ) fprintf(pysam_stderr,"\tversion  .. not found\n");
          return -1;
      }
  
@@ -344,12 +337,12 @@ static void init_plugin(args_t *args)
      args->plugin.version(&bver, &hver);
      if ( strcmp(bver,bcftools_version()) && !warned_bcftools )
      {
-        fprintf(pysamerr,"WARNING: bcftools version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", bcftools_version(),args->plugin.name,bver);
+        fprintf(pysam_stderr,"WARNING: bcftools version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", bcftools_version(),args->plugin.name,bver);
          warned_bcftools = 1;
      }
      if ( strcmp(hver,hts_version()) && !warned_htslib )
      {
-        fprintf(pysamerr,"WARNING: htslib version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", hts_version(),args->plugin.name,hver);
+        fprintf(pysam_stderr,"WARNING: htslib version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", hts_version(),args->plugin.name,hver);
          warned_htslib = 1;
      }
      args->drop_header += ret;
@@ -401,8 +394,8 @@ static int list_plugins(args_t *args)
          qsort(plugins, nplugins, sizeof(plugins[0]), cmp_plugin_name);
  
          for (i=0; i<nplugins; i++)
-            printf("\n-- %s --\n%s", plugins[i].name, plugins[i].about());
-        printf("\n");
+            fprintf(pysam_stdout, "\n-- %s --\n%s", plugins[i].name, plugins[i].about());
+        fprintf(pysam_stdout, "\n");
      }
      else
          print_plugin_usage_hint();
@@ -420,7 +413,7 @@ static void init_data(args_t *args)
      if ( args->filter_str )
          args->filter = filter_init(args->hdr, args->filter_str);
  
-    bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_plugin");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_plugin");
      if ( !args->drop_header )
      {
          args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
@@ -449,28 +442,29 @@ static void destroy_data(args_t *args)
  
  static void usage(args_t *args)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Run user defined plugin\n");
-    fprintf(pysamerr, "Usage:   bcftools plugin <name> [OPTIONS] <file> [-- PLUGIN_OPTIONS]\n");
-    fprintf(pysamerr, "         bcftools +name [OPTIONS] <file>  [-- PLUGIN_OPTIONS]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "VCF input options:\n");
-    fprintf(pysamerr, "   -e, --exclude <expr>        exclude sites for which the expression is true\n");
-    fprintf(pysamerr, "   -i, --include <expr>        select sites for which the expression is true\n");
-    fprintf(pysamerr, "   -r, --regions <region>      restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "   -R, --regions-file <file>   restrict to regions listed in a file\n");
-    fprintf(pysamerr, "   -t, --targets <region>      similar to -r but streams rather than index-jumps\n");
-    fprintf(pysamerr, "   -T, --targets-file <file>   similar to -R but streams rather than index-jumps\n");
-    fprintf(pysamerr, "VCF output options:\n");
-    fprintf(pysamerr, "   -o, --output <file>         write output to a file [standard output]\n");
-    fprintf(pysamerr, "   -O, --output-type <type>    'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
-    fprintf(pysamerr, "       --threads <int>         number of extra output compression threads [0]\n");
-    fprintf(pysamerr, "Plugin options:\n");
-    fprintf(pysamerr, "   -h, --help                  list plugin's options\n");
-    fprintf(pysamerr, "   -l, --list-plugins          list available plugins. See BCFTOOLS_PLUGINS environment variable and man page for details\n");
-    fprintf(pysamerr, "   -v, --verbose               print debugging information on plugin failure\n");
-    fprintf(pysamerr, "   -V, --version               print version string and exit\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Run user defined plugin\n");
+    fprintf(pysam_stderr, "Usage:   bcftools plugin <name> [OPTIONS] <file> [-- PLUGIN_OPTIONS]\n");
+    fprintf(pysam_stderr, "         bcftools +name [OPTIONS] <file>  [-- PLUGIN_OPTIONS]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "VCF input options:\n");
+    fprintf(pysam_stderr, "   -e, --exclude <expr>        exclude sites for which the expression is true\n");
+    fprintf(pysam_stderr, "   -i, --include <expr>        select sites for which the expression is true\n");
+    fprintf(pysam_stderr, "   -r, --regions <region>      restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "   -R, --regions-file <file>   restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "   -t, --targets <region>      similar to -r but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "   -T, --targets-file <file>   similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "VCF output options:\n");
+    fprintf(pysam_stderr, "       --no-version            do not append version and command line to the header\n");
+    fprintf(pysam_stderr, "   -o, --output <file>         write output to a file [standard output]\n");
+    fprintf(pysam_stderr, "   -O, --output-type <type>    'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+    fprintf(pysam_stderr, "       --threads <int>         number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "Plugin options:\n");
+    fprintf(pysam_stderr, "   -h, --help                  list plugin's options\n");
+    fprintf(pysam_stderr, "   -l, --list-plugins          list available plugins. See BCFTOOLS_PLUGINS environment variable and man page for details\n");
+    fprintf(pysam_stderr, "   -v, --verbose               print debugging information on plugin failure\n");
+    fprintf(pysam_stderr, "   -V, --version               print version string and exit\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -482,12 +476,27 @@ int main_plugin(int argc, char *argv[])
      args->output_fname = "-";
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      args->nplugin_paths = -1;
      int regions_is_file = 0, targets_is_file = 0, plist_only = 0, usage_only = 0, version_only = 0;
  
      if ( argc==1 ) usage(args);
+
      char *plugin_name = NULL;
-    if ( argv[1][0]!='-' ) { plugin_name = argv[1]; argc--; argv++; }
+    if ( argv[1][0]!='-' )
+    {
+        plugin_name = argv[1]; 
+        argc--; 
+        argv++; 
+        load_plugin(args, plugin_name, 1, &args->plugin);
+        if ( args->plugin.run )
+        {
+            int ret = args->plugin.run(argc, argv);
+            destroy_data(args);
+            free(args);
+            return ret;
+        }
+    }
  
      static struct option loptions[] =
      {
@@ -504,6 +513,7 @@ int main_plugin(int argc, char *argv[])
          {"regions-file",required_argument,NULL,'R'},
          {"targets",required_argument,NULL,'t'},
          {"targets-file",required_argument,NULL,'T'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      while ((c = getopt_long(argc, argv, "h?o:O:r:R:t:T:li:e:vV",loptions,NULL)) >= 0)
@@ -529,6 +539,7 @@ int main_plugin(int argc, char *argv[])
              case 'T': args->targets_list = optarg; targets_is_file = 1; break;
              case 'l': plist_only = 1; break;
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case '?':
              case 'h': usage_only = 1; break;
              default: error("Unknown argument: %s\n", optarg);
@@ -537,34 +548,24 @@ int main_plugin(int argc, char *argv[])
      if ( plist_only )  return list_plugins(args);
      if ( usage_only && ! plugin_name ) usage(args);
  
-    load_plugin(args, plugin_name, 1, &args->plugin);
      if ( version_only )
      {
          const char *bver, *hver;
          args->plugin.version(&bver, &hver);
-        printf("bcftools  %s using htslib %s\n", bcftools_version(), hts_version());
-        printf("plugin at %s using htslib %s\n\n", bver, hver);
+        fprintf(pysam_stdout, "bcftools  %s using htslib %s\n", bcftools_version(), hts_version());
+        fprintf(pysam_stdout, "plugin at %s using htslib %s\n\n", bver, hver);
          return 0;
      }
  
      if ( usage_only )
      {
          if ( args->plugin.usage )
-            fprintf(pysamerr,"%s",args->plugin.usage());
+            fprintf(pysam_stderr,"%s",args->plugin.usage());
          else
-            fprintf(pysamerr,"Usage: bcftools +%s [General Options] -- [Plugin Options]\n",plugin_name);
+            fprintf(pysam_stderr,"Usage: bcftools +%s [General Options] -- [Plugin Options]\n",plugin_name);
          return 0;
      }
  
-    if ( args->plugin.run )
-    {
-        int iopt = optind; optind = 0;
-        int ret = args->plugin.run(argc-iopt, argv+iopt);
-        destroy_data(args);
-        free(args);
-        return ret;
-    }
-
      char *fname = NULL;
      if ( optind>=argc || argv[optind][0]=='-' )
      {
diff --git a/bcftools/vcfquery.c.pysam.c b/bcftools/vcfquery.c.pysam.c

index 1265b57786afebb766e8ba89391136452f1848e1..10f56f157dc9c1c0e779882038dabba4542bad51 100644 (file)
--- a/bcftools/vcfquery.c.pysam.c
+++ b/bcftools/vcfquery.c.pysam.c
@@ -156,7 +156,7 @@ static void list_columns(args_t *args)
      int i;
      bcf_sr_t *reader = &args->files->readers[0];
      for (i=0; i<bcf_hdr_nsamples(reader->header); i++)
-        printf("%s\n", reader->header->samples[i]);
+        fprintf(pysam_stdout, "%s\n", reader->header->samples[i]);
  }
  
  static char **copy_header(bcf_hdr_t *hdr, char **src, int nsrc)
@@ -178,30 +178,30 @@ static int compare_header(bcf_hdr_t *hdr, char **a, int na, char **b, int nb)
  
  static void usage(void)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Extracts fields from VCF/BCF file and prints them in user-defined format\n");
-    fprintf(pysamerr, "Usage:   bcftools query [options] <A.vcf.gz> [<B.vcf.gz> [...]]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "    -c, --collapse <string>           collapse lines with duplicate positions for <snps|indels|both|all|some|none>, see man page [none]\n");
-    fprintf(pysamerr, "    -e, --exclude <expr>              exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(pysamerr, "    -f, --format <string>             see man page for details\n");
-    fprintf(pysamerr, "    -H, --print-header                print header\n");
-    fprintf(pysamerr, "    -i, --include <expr>              select sites for which the expression is true (see man page for details)\n");
-    fprintf(pysamerr, "    -l, --list-samples                print the list of samples and exit\n");
-    fprintf(pysamerr, "    -o, --output-file <file>          output file name [stdout]\n");
-    fprintf(pysamerr, "    -r, --regions <region>            restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "    -R, --regions-file <file>         restrict to regions listed in a file\n");
-    fprintf(pysamerr, "    -s, --samples <list>              list of samples to include\n");
-    fprintf(pysamerr, "    -S, --samples-file <file>         file of samples to include\n");
-    fprintf(pysamerr, "    -t, --targets <region>            similar to -r but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -T, --targets-file <file>         similar to -R but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -u, --allow-undef-tags            print \".\" for undefined tags\n");
-    fprintf(pysamerr, "    -v, --vcf-list <file>             process multiple VCFs listed in the file\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Examples:\n");
-    fprintf(pysamerr, "\tbcftools query -f '%%CHROM\\t%%POS\\t%%REF\\t%%ALT[\\t%%SAMPLE=%%GT]\\n' file.vcf.gz\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Extracts fields from VCF/BCF file and prints them in user-defined format\n");
+    fprintf(pysam_stderr, "Usage:   bcftools query [options] <A.vcf.gz> [<B.vcf.gz> [...]]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "    -c, --collapse <string>           collapse lines with duplicate positions for <snps|indels|both|all|some|none>, see man page [none]\n");
+    fprintf(pysam_stderr, "    -e, --exclude <expr>              exclude sites for which the expression is true (see man page for details)\n");
+    fprintf(pysam_stderr, "    -f, --format <string>             see man page for details\n");
+    fprintf(pysam_stderr, "    -H, --print-header                print header\n");
+    fprintf(pysam_stderr, "    -i, --include <expr>              select sites for which the expression is true (see man page for details)\n");
+    fprintf(pysam_stderr, "    -l, --list-samples                print the list of samples and exit\n");
+    fprintf(pysam_stderr, "    -o, --output-file <file>          output file name [pysam_stdout]\n");
+    fprintf(pysam_stderr, "    -r, --regions <region>            restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "    -R, --regions-file <file>         restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "    -s, --samples <list>              list of samples to include\n");
+    fprintf(pysam_stderr, "    -S, --samples-file <file>         file of samples to include\n");
+    fprintf(pysam_stderr, "    -t, --targets <region>            similar to -r but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "    -T, --targets-file <file>         similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "    -u, --allow-undef-tags            print \".\" for undefined tags\n");
+    fprintf(pysam_stderr, "    -v, --vcf-list <file>             process multiple VCFs listed in the file\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Examples:\n");
+    fprintf(pysam_stderr, "\tbcftools query -f '%%CHROM\\t%%POS\\t%%REF\\t%%ALT[\\t%%SAMPLE=%%GT]\\n' file.vcf.gz\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -300,7 +300,7 @@ int main_vcfquery(int argc, char *argv[])
      }
  
      if ( !args->format_str ) usage();
-    args->out = args->fn_out ? fopen(args->fn_out, "w") : stdout;
+    args->out = args->fn_out ? fopen(args->fn_out, "w") : pysam_stdout;
      if ( !args->out ) error("%s: %s\n", args->fn_out,strerror(errno));
  
      if ( !args->vcf_list )
diff --git a/bcftools/vcfroh.c b/bcftools/vcfroh.c

index fa64b798e99994177ed16e456be425fdf1350b46..95605592f78e0695cf3df2e097613628aef503d5 100644 (file)
--- a/bcftools/vcfroh.c
+++ b/bcftools/vcfroh.c
@@ -368,14 +368,31 @@ static void flush_viterbi(args_t *args)
              }
          }
  
-        // update the transition matrix tprob
+        // update the transition matrix
+        int n = 1;
          for (i=0; i<2; i++)
          {
-            int n = 0;
              for (j=0; j<2; j++) n += MAT(tcounts,2,i,j);
-            if ( !n) error("fixme: state %d not observed\n", i+1);
-            for (j=0; j<2; j++) MAT(tcounts,2,i,j) /= n;
          }
+        for (i=0; i<2; i++)
+        {
+            for (j=0; j<2; j++)
+            {
+                // no transition to i-th state was observed, set to a small number
+                if ( !MAT(tcounts,2,i,j) ) MAT(tcounts,2,i,j) = 0.1/n;
+                else MAT(tcounts,2,i,j) /= n;
+            }
+        }
+
+        // normalize
+        for (i=0; i<2; i++)
+        {
+            double norm = 0;
+            for (j=0; j<2; j++) norm += MAT(tcounts,2,j,i);
+            assert( norm!=0 );
+            for (j=0; j<2; j++) MAT(tcounts,2,j,i) /= norm;
+        }
+
          if ( args->genmap_fname || args->rec_rate > 0 )
              hmm_set_tprob(args->hmm, tcounts, 0);
          else
@@ -385,14 +402,16 @@ static void flush_viterbi(args_t *args)
          deltaz = fabs(MAT(tprob_arr,2,1,0)-t2az_prev);
          delthw = fabs(MAT(tprob_arr,2,0,1)-t2hw_prev);
          niter++;
-
-        fprintf(stderr,"%d: %f %f\n", niter,deltaz,delthw);
+        fprintf(stderr,"Viterbi training, iteration %d: dAZ=%e dHW=%e\tP(HW|HW)=%e  P(AZ|HW)=%e  P(AZ|AZ)=%e  P(HW|AZ)=%e\n", 
+            niter,deltaz,delthw,
+            MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
+            MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
      }
      while ( deltaz > 0.0 || delthw > 0.0 );
-    fprintf(stderr, "Viterbi training converged in %d iterations to", niter);
      double *tprob_arr = hmm_get_tprob(args->hmm);
-    for (i=0; i<2; i++) for (j=0; j<2; j++) fprintf(stderr, " %f", MAT(tprob_arr,2,i,j));
-    fprintf(stderr, "\n");
+    fprintf(stderr, "Viterbi training converged in %d iterations to P(HW|HW)=%e  P(AZ|HW)=%e  P(AZ|AZ)=%e  P(HW|AZ)=%e\n", niter,
+            MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
+            MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
      
      // output the results
      for (i=0; i<args->nrids; i++)
@@ -400,12 +419,16 @@ static void flush_viterbi(args_t *args)
          int ioff = args->rid_offs[i];
          int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff;
          hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
+        hmm_run_fwd_bwd(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
          uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
+        double  *fwd   = hmm_get_fwd_bwd_prob(args->hmm);
  
          const char *chr = bcf_hdr_id2name(args->hdr,args->rids[i]);
          for (j=0; j<nsites; j++)
          {
-            printf("%s\t%d\t%d\t..\n", chr,args->sites[ioff+j]+1,vpath[j*2]==STATE_AZ ? 1 : 0);
+            int state = vpath[j*2];
+            double pval = fwd[j*2 + state];
+            printf("%s\t%d\t%d\t%e\n", chr,args->sites[ioff+j]+1,state==STATE_AZ ? 1 : 0, pval);
          }
      }
  }
diff --git a/bcftools/vcfroh.c.pysam.c b/bcftools/vcfroh.c.pysam.c

index 92a9a4f1b0f290211b56a18a0d25ed4001e42cb6..66ddc170116396965d19db537ecefc0a140fb1ed 100644 (file)
--- a/bcftools/vcfroh.c.pysam.c
+++ b/bcftools/vcfroh.c.pysam.c
@@ -167,12 +167,12 @@ static void init_data(args_t *args)
          args->hmm = hmm_init(2, tprob, 10000);
  
      // print header
-    printf("# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version());
-    printf("# The command line was:\tbcftools %s", args->argv[0]);
+    fprintf(pysam_stdout, "# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version());
+    fprintf(pysam_stdout, "# The command line was:\tbcftools %s", args->argv[0]);
      for (i=1; i<args->argc; i++)
-        printf(" %s",args->argv[i]);
-    printf("\n#\n");
-    printf("# [1]Chromosome\t[2]Position\t[3]State (0:HW, 1:AZ)\t[4]Quality\n");
+        fprintf(pysam_stdout, " %s",args->argv[i]);
+    fprintf(pysam_stdout, "\n#\n");
+    fprintf(pysam_stdout, "# [1]Chromosome\t[2]Position\t[3]State (0:HW, 1:AZ)\t[4]Quality\n");
  }
  
  static void destroy_data(args_t *args)
@@ -336,7 +336,7 @@ static void flush_viterbi(args_t *args)
          {
              int state = vpath[i*2]==STATE_AZ ? 1 : 0;
              double *pval = fwd + i*2;
-            printf("%s\t%d\t%d\t%.1f\n", chr,args->sites[i]+1, state, phred_score(1.0-pval[state]));
+            fprintf(pysam_stdout, "%s\t%d\t%d\t%.1f\n", chr,args->sites[i]+1, state, phred_score(1.0-pval[state]));
          }
          return;
      }
@@ -370,14 +370,31 @@ static void flush_viterbi(args_t *args)
              }
          }
  
-        // update the transition matrix tprob
+        // update the transition matrix
+        int n = 1;
          for (i=0; i<2; i++)
          {
-            int n = 0;
              for (j=0; j<2; j++) n += MAT(tcounts,2,i,j);
-            if ( !n) error("fixme: state %d not observed\n", i+1);
-            for (j=0; j<2; j++) MAT(tcounts,2,i,j) /= n;
          }
+        for (i=0; i<2; i++)
+        {
+            for (j=0; j<2; j++)
+            {
+                // no transition to i-th state was observed, set to a small number
+                if ( !MAT(tcounts,2,i,j) ) MAT(tcounts,2,i,j) = 0.1/n;
+                else MAT(tcounts,2,i,j) /= n;
+            }
+        }
+
+        // normalize
+        for (i=0; i<2; i++)
+        {
+            double norm = 0;
+            for (j=0; j<2; j++) norm += MAT(tcounts,2,j,i);
+            assert( norm!=0 );
+            for (j=0; j<2; j++) MAT(tcounts,2,j,i) /= norm;
+        }
+
          if ( args->genmap_fname || args->rec_rate > 0 )
              hmm_set_tprob(args->hmm, tcounts, 0);
          else
@@ -387,14 +404,16 @@ static void flush_viterbi(args_t *args)
          deltaz = fabs(MAT(tprob_arr,2,1,0)-t2az_prev);
          delthw = fabs(MAT(tprob_arr,2,0,1)-t2hw_prev);
          niter++;
-
-        fprintf(pysamerr,"%d: %f %f\n", niter,deltaz,delthw);
+        fprintf(pysam_stderr,"Viterbi training, iteration %d: dAZ=%e dHW=%e\tP(HW|HW)=%e  P(AZ|HW)=%e  P(AZ|AZ)=%e  P(HW|AZ)=%e\n", 
+            niter,deltaz,delthw,
+            MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
+            MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
      }
      while ( deltaz > 0.0 || delthw > 0.0 );
-    fprintf(pysamerr, "Viterbi training converged in %d iterations to", niter);
      double *tprob_arr = hmm_get_tprob(args->hmm);
-    for (i=0; i<2; i++) for (j=0; j<2; j++) fprintf(pysamerr, " %f", MAT(tprob_arr,2,i,j));
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "Viterbi training converged in %d iterations to P(HW|HW)=%e  P(AZ|HW)=%e  P(AZ|AZ)=%e  P(HW|AZ)=%e\n", niter,
+            MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
+            MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
      
      // output the results
      for (i=0; i<args->nrids; i++)
@@ -402,12 +421,16 @@ static void flush_viterbi(args_t *args)
          int ioff = args->rid_offs[i];
          int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff;
          hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
+        hmm_run_fwd_bwd(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
          uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
+        double  *fwd   = hmm_get_fwd_bwd_prob(args->hmm);
  
          const char *chr = bcf_hdr_id2name(args->hdr,args->rids[i]);
          for (j=0; j<nsites; j++)
          {
-            printf("%s\t%d\t%d\t..\n", chr,args->sites[ioff+j]+1,vpath[j*2]==STATE_AZ ? 1 : 0);
+            int state = vpath[j*2];
+            double pval = fwd[j*2 + state];
+            fprintf(pysam_stdout, "%s\t%d\t%d\t%e\n", chr,args->sites[ioff+j]+1,state==STATE_AZ ? 1 : 0, pval);
          }
      }
  }
@@ -624,7 +647,7 @@ static void vcfroh(args_t *args, bcf1_t *line)
  
      if ( skip_rid )
      {
-        fprintf(pysamerr,"Skipping the sequence, no genmap for %s\n", bcf_seqname(args->hdr,line));
+        fprintf(pysam_stderr,"Skipping the sequence, no genmap for %s\n", bcf_seqname(args->hdr,line));
          args->skip_rid = line->rid;
          return;
      }
@@ -657,30 +680,30 @@ static void vcfroh(args_t *args, bcf1_t *line)
  
  static void usage(args_t *args)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   HMM model for detecting runs of autozygosity.\n");
-    fprintf(pysamerr, "Usage:   bcftools roh [options] <in.vcf.gz>\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "General Options:\n");
-    fprintf(pysamerr, "        --AF-dflt <float>              if AF is not known, use this allele frequency [skip]\n");
-    fprintf(pysamerr, "        --AF-tag <TAG>                 use TAG for allele frequency\n");
-    fprintf(pysamerr, "        --AF-file <file>               read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
-    fprintf(pysamerr, "    -e, --estimate-AF <file>           calculate AC,AN counts on the fly, using either all samples (\"-\") or samples listed in <file>\n");
-    fprintf(pysamerr, "    -G, --GTs-only <float>             use GTs, ignore PLs, use <float> for PL of unseen genotypes. Safe value to use is 30 to account for GT errors.\n");
-    fprintf(pysamerr, "    -I, --skip-indels                  skip indels as their genotypes are enriched for errors\n");
-    fprintf(pysamerr, "    -m, --genetic-map <file>           genetic map in IMPUTE2 format, single file or mask, where string \"{CHROM}\" is replaced with chromosome name\n");
-    fprintf(pysamerr, "    -M, --rec-rate <float>             constant recombination rate per bp\n");
-    fprintf(pysamerr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
-    fprintf(pysamerr, "    -s, --sample <sample>              sample to analyze\n");
-    fprintf(pysamerr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "HMM Options:\n");
-    fprintf(pysamerr, "    -a, --hw-to-az <float>             P(AZ|HW) transition probability from HW (Hardy-Weinberg) to AZ (autozygous) state [6.7e-8]\n");
-    fprintf(pysamerr, "    -H, --az-to-hw <float>             P(HW|AZ) transition probability from AZ to HW state [5e-9]\n");
-    fprintf(pysamerr, "    -V, --viterbi-training             perform Viterbi training to estimate transition probabilities\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   HMM model for detecting runs of autozygosity.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools roh [options] <in.vcf.gz>\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "General Options:\n");
+    fprintf(pysam_stderr, "        --AF-dflt <float>              if AF is not known, use this allele frequency [skip]\n");
+    fprintf(pysam_stderr, "        --AF-tag <TAG>                 use TAG for allele frequency\n");
+    fprintf(pysam_stderr, "        --AF-file <file>               read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
+    fprintf(pysam_stderr, "    -e, --estimate-AF <file>           calculate AC,AN counts on the fly, using either all samples (\"-\") or samples listed in <file>\n");
+    fprintf(pysam_stderr, "    -G, --GTs-only <float>             use GTs, ignore PLs, use <float> for PL of unseen genotypes. Safe value to use is 30 to account for GT errors.\n");
+    fprintf(pysam_stderr, "    -I, --skip-indels                  skip indels as their genotypes are enriched for errors\n");
+    fprintf(pysam_stderr, "    -m, --genetic-map <file>           genetic map in IMPUTE2 format, single file or mask, where string \"{CHROM}\" is replaced with chromosome name\n");
+    fprintf(pysam_stderr, "    -M, --rec-rate <float>             constant recombination rate per bp\n");
+    fprintf(pysam_stderr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "    -s, --sample <sample>              sample to analyze\n");
+    fprintf(pysam_stderr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "HMM Options:\n");
+    fprintf(pysam_stderr, "    -a, --hw-to-az <float>             P(AZ|HW) transition probability from HW (Hardy-Weinberg) to AZ (autozygous) state [6.7e-8]\n");
+    fprintf(pysam_stderr, "    -H, --az-to-hw <float>             P(HW|AZ) transition probability from AZ to HW state [5e-9]\n");
+    fprintf(pysam_stderr, "    -V, --viterbi-training             perform Viterbi training to estimate transition probabilities\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -787,7 +810,7 @@ int main_vcfroh(int argc, char *argv[])
          vcfroh(args, args->files->readers[0].buffer[0]);
      }
      vcfroh(args, NULL);
-    fprintf(pysamerr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
+    fprintf(pysam_stderr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
      destroy_data(args);
      free(args);
      return 0;
diff --git a/bcftools/vcfsom.c.pysam.c b/bcftools/vcfsom.c.pysam.c

index 32e721307fbcc63be47bf31f9f2d130d24c40bdc..58875f664215108bd06acd44acafdec028ec88f7 100644 (file)
--- a/bcftools/vcfsom.c.pysam.c
+++ b/bcftools/vcfsom.c.pysam.c
@@ -104,7 +104,7 @@ char *msprintf(const char *fmt, ...)
  /*
   *  char *t, *p = str;
   *  t = column_next(p, '\t');
- *  if ( strlen("<something>")==t-p && !strncmp(p,"<something>",t-p) ) printf("found!\n");
+ *  if ( strlen("<something>")==t-p && !strncmp(p,"<something>",t-p) ) fprintf(pysam_stdout, "found!\n");
   *
   *  char *t;
   *  t = column_next(str, '\t'); if ( !*t ) error("expected field\n", str);
@@ -574,7 +574,7 @@ static void do_train(args_t *args)
              fprintf(fp,"%e\t%f\t%f\n", prev_score, (float)igood/ngood, (float)ibad/nbad);
          if ( !printed && (float)igood/ngood > 0.9 )
          {
-            printf("%.2f\t%.2f\t%e\t# %% of bad [1] and good [2] sites at a cutoff [3]\n", 100.*ibad/nbad,100.*igood/ngood,prev_score);
+            fprintf(pysam_stdout, "%.2f\t%.2f\t%e\t# %% of bad [1] and good [2] sites at a cutoff [3]\n", 100.*ibad/nbad,100.*igood/ngood,prev_score);
              printed = 1;
          }
  
@@ -582,7 +582,7 @@ static void do_train(args_t *args)
          else if ( igood<ngood ) prev_score = good[igood];
          else prev_score = bad[ibad];
      }
-    if ( !printed ) printf("%.2f\t%.2f\t%e\t# %% of bad [1] and good [2] sites at a cutoff [3]\n", 100.*ibad/nbad,100.*igood/ngood,prev_score);
+    if ( !printed ) fprintf(pysam_stdout, "%.2f\t%.2f\t%e\t# %% of bad [1] and good [2] sites at a cutoff [3]\n", 100.*ibad/nbad,100.*igood/ngood,prev_score);
      if ( fp )
      {
          if ( fclose(fp) ) error("%s.eval: fclose failed: %s\n",args->prefix,strerror(errno));
@@ -607,36 +607,36 @@ static void do_classify(args_t *args)
              case MERGE_MAX: score = get_max_score(args, -1); break;
              case MERGE_AVG: score = get_avg_score(args, -1); break;
          }
-        printf("%e\n", 1.0 - score/max_score);
+        fprintf(pysam_stdout, "%e\n", 1.0 - score/max_score);
      }
      annots_reader_close(args);
  }
  
  static void usage(void)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   SOM (Self-Organizing Map) filtering.\n");
-    fprintf(pysamerr, "Usage:   bcftools som --train    [options] <annots.tab.gz>\n");
-    fprintf(pysamerr, "         bcftools som --classify [options]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Model training options:\n");
-    fprintf(pysamerr, "    -f, --nfold <int>                  n-fold cross-validation (number of maps) [5]\n");
-    fprintf(pysamerr, "    -p, --prefix <string>              prefix of output files\n");
-    fprintf(pysamerr, "    -s, --size <int>                   map size [20]\n");
-    fprintf(pysamerr, "    -t, --train                        \n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Classifying options:\n");
-    fprintf(pysamerr, "    -c, --classify                     \n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Experimental training options (no reason to change):\n");
-    fprintf(pysamerr, "    -b, --bmu-threshold <float>        threshold for selection of best-matching unit [0.9]\n");
-    fprintf(pysamerr, "    -d, --som-dimension <int>          SOM dimension [2]\n");
-    fprintf(pysamerr, "    -e, --exclude-bad                  exclude bad sites from training, use for evaluation only\n");
-    fprintf(pysamerr, "    -l, --learning-rate <float>        learning rate [1.0]\n");
-    fprintf(pysamerr, "    -m, --merge <min|max|avg>          -f merge algorithm [avg]\n");
-    fprintf(pysamerr, "    -n, --ntrain-sites <int>           effective number of training sites [number of good sites]\n");
-    fprintf(pysamerr, "    -r, --random-seed <int>            random seed, 0 for time() [1]\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   SOM (Self-Organizing Map) filtering.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools som --train    [options] <annots.tab.gz>\n");
+    fprintf(pysam_stderr, "         bcftools som --classify [options]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Model training options:\n");
+    fprintf(pysam_stderr, "    -f, --nfold <int>                  n-fold cross-validation (number of maps) [5]\n");
+    fprintf(pysam_stderr, "    -p, --prefix <string>              prefix of output files\n");
+    fprintf(pysam_stderr, "    -s, --size <int>                   map size [20]\n");
+    fprintf(pysam_stderr, "    -t, --train                        \n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Classifying options:\n");
+    fprintf(pysam_stderr, "    -c, --classify                     \n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Experimental training options (no reason to change):\n");
+    fprintf(pysam_stderr, "    -b, --bmu-threshold <float>        threshold for selection of best-matching unit [0.9]\n");
+    fprintf(pysam_stderr, "    -d, --som-dimension <int>          SOM dimension [2]\n");
+    fprintf(pysam_stderr, "    -e, --exclude-bad                  exclude bad sites from training, use for evaluation only\n");
+    fprintf(pysam_stderr, "    -l, --learning-rate <float>        learning rate [1.0]\n");
+    fprintf(pysam_stderr, "    -m, --merge <min|max|avg>          -f merge algorithm [avg]\n");
+    fprintf(pysam_stderr, "    -n, --ntrain-sites <int>           effective number of training sites [number of good sites]\n");
+    fprintf(pysam_stderr, "    -r, --random-seed <int>            random seed, 0 for time() [1]\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -692,7 +692,7 @@ int main_vcfsom(int argc, char *argv[])
              case 'd':
                  args->ndim = atoi(optarg);
                  if ( args->ndim<2 ) error("Expected -d >=2, got %d\n", args->ndim);
-                if ( args->ndim>3 ) fprintf(pysamerr,"Warning: This will take a long time and is not going to make the results better: -d %d\n", args->ndim);
+                if ( args->ndim>3 ) fprintf(pysam_stderr,"Warning: This will take a long time and is not going to make the results better: -d %d\n", args->ndim);
                  break;
              case 't': args->action = SOM_TRAIN; break;
              case 'c': args->action = SOM_CLASSIFY; break;
diff --git a/bcftools/vcfstats.c.pysam.c b/bcftools/vcfstats.c.pysam.c

index fcbc15b120b797a5d103d632d07e37f4e1334c53..5653760ae0694b6cd3a1a1777d79de93d0b99cf9 100644 (file)
--- a/bcftools/vcfstats.c.pysam.c
+++ b/bcftools/vcfstats.c.pysam.c
@@ -195,17 +195,17 @@ static inline int idist_i2bin(idist_t *d, int i)
  static void _indel_ctx_print1(_idc1_t *idc)
  {
      int i;
-    fprintf(stdout, "%d\t", idc->cnt);
+    fprintf(pysam_stdout, "%d\t", idc->cnt);
      for (i=0; i<idc->len; i++)
-        fputc(idc->seq[i], stdout);
-    fputc('\n', stdout);
+        fputc(idc->seq[i], pysam_stdout);
+    fputc('\n', pysam_stdout);
  }
  static void _indel_ctx_print(indel_ctx_t *ctx)
  {
      int i;
      for (i=0; i<ctx->ndat; i++)
          _indel_ctx_print1(&ctx->dat[i]);
-    fputc('\n',stdout);
+    fputc('\n',pysam_stdout);
  }
  #endif
  static int _indel_ctx_lookup(indel_ctx_t *ctx, char *seq, int seq_len, int *hit)
@@ -317,9 +317,9 @@ int indel_ctx_type(indel_ctx_t *ctx, char *chr, int pos, char *ref, char *alt, i
      }
  
      #if IC_DBG
-    fprintf(stdout,"ref: %s\n", ref);
-    fprintf(stdout,"alt: %s\n", alt);
-    fprintf(stdout,"ctx: %s\n", fai_ref);
+    fprintf(pysam_stdout,"ref: %s\n", ref);
+    fprintf(pysam_stdout,"alt: %s\n", alt);
+    fprintf(pysam_stdout,"ctx: %s\n", fai_ref);
      _indel_ctx_print(ctx);
      #endif
  
@@ -900,7 +900,7 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
              case BCF_BT_INT8:  BRANCH_INT(int8_t,  bcf_int8_missing, bcf_int8_vector_end); break;
              case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
              case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
-            default: fprintf(pysamerr, "[E::%s] todo: %d\n", __func__, fmt_ptr->type); exit(1); break;
+            default: fprintf(pysam_stderr, "[E::%s] todo: %d\n", __func__, fmt_ptr->type); exit(1); break;
          }
          #undef BRANCH_INT
      }
@@ -1010,7 +1010,7 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
                  {
                      nmm++;
                      bcf_sr_t *reader = &files->readers[0];
-                    printf("DBG\t%s\t%d\t%s\t%d\t%d\n",reader->header->id[BCF_DT_CTG][reader->buffer[0]->rid].key,reader->buffer[0]->pos+1,files->samples[is],gt,gt2);
+                    fprintf(pysam_stdout, "DBG\t%s\t%d\t%s\t%d\t%d\n",reader->header->id[BCF_DT_CTG][reader->buffer[0]->rid].key,reader->buffer[0]->pos+1,files->samples[is],gt,gt2);
                  }
                  else
                  {
@@ -1019,7 +1019,7 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
                  }
              }
              float nrd = nrefm+nmm ? 100.*nmm/(nrefm+nmm) : 0;
-            printf("PSD\t%s\t%d\t%d\t%d\t%f\n", reader->header->id[BCF_DT_CTG][reader->buffer[0]->rid].key,reader->buffer[0]->pos+1,nm,nmm,nrd);
+            fprintf(pysam_stdout, "PSD\t%s\t%d\t%d\t%d\t%f\n", reader->header->id[BCF_DT_CTG][reader->buffer[0]->rid].key,reader->buffer[0]->pos+1,nm,nmm,nrd);
          }
      }
  }
@@ -1089,38 +1089,38 @@ static void do_vcf_stats(args_t *args)
  static void print_header(args_t *args)
  {
      int i;
-    printf("# This file was produced by bcftools stats (%s+htslib-%s) and can be plotted using plot-vcfstats.\n", bcftools_version(),hts_version());
-    printf("# The command line was:\tbcftools %s ", args->argv[0]);
+    fprintf(pysam_stdout, "# This file was produced by bcftools stats (%s+htslib-%s) and can be plotted using plot-vcfstats.\n", bcftools_version(),hts_version());
+    fprintf(pysam_stdout, "# The command line was:\tbcftools %s ", args->argv[0]);
      for (i=1; i<args->argc; i++)
-        printf(" %s",args->argv[i]);
-    printf("\n#\n");
+        fprintf(pysam_stdout, " %s",args->argv[i]);
+    fprintf(pysam_stdout, "\n#\n");
  
-    printf("# Definition of sets:\n# ID\t[2]id\t[3]tab-separated file names\n");
+    fprintf(pysam_stdout, "# Definition of sets:\n# ID\t[2]id\t[3]tab-separated file names\n");
      if ( args->files->nreaders==1 )
      {
          const char *fname = strcmp("-",args->files->readers[0].fname) ? args->files->readers[0].fname : "<STDIN>";
          if ( args->split_by_id )
          {
-            printf("ID\t0\t%s:known (sites with ID different from \".\")\n", fname);
-            printf("ID\t1\t%s:novel (sites where ID column is \".\")\n", fname);
+            fprintf(pysam_stdout, "ID\t0\t%s:known (sites with ID different from \".\")\n", fname);
+            fprintf(pysam_stdout, "ID\t1\t%s:novel (sites where ID column is \".\")\n", fname);
          }
          else
-            printf("ID\t0\t%s\n", fname);
+            fprintf(pysam_stdout, "ID\t0\t%s\n", fname);
      }
      else
      {
          const char *fname0 = strcmp("-",args->files->readers[0].fname) ? args->files->readers[0].fname : "<STDIN>";
          const char *fname1 = strcmp("-",args->files->readers[1].fname) ? args->files->readers[1].fname : "<STDIN>";
-        printf("ID\t0\t%s\n", fname0);
-        printf("ID\t1\t%s\n", fname1);
-        printf("ID\t2\t%s\t%s\n", fname0,fname1);
+        fprintf(pysam_stdout, "ID\t0\t%s\n", fname0);
+        fprintf(pysam_stdout, "ID\t1\t%s\n", fname1);
+        fprintf(pysam_stdout, "ID\t2\t%s\t%s\n", fname0,fname1);
  
          if ( args->verbose_sites )
          {
-            printf(
+            fprintf(pysam_stdout, 
                      "# Verbose per-site discordance output.\n"
                      "# PSD\t[2]CHROM\t[3]POS\t[4]Number of matches\t[5]Number of mismatches\t[6]NRD\n");
-            printf(
+            fprintf(pysam_stdout, 
                      "# Verbose per-site and per-sample output. Genotype codes: %d:HomRefRef, %d:HomAltAlt, %d:HetAltRef, %d:HetAltAlt, %d:haploidRef, %d:haploidAlt\n"
                      "# DBG\t[2]CHROM\t[3]POS\t[4]Sample\t[5]GT in %s\t[6]GT in %s\n",
                      GT_HOM_RR, GT_HOM_AA, GT_HET_RA, GT_HET_AA, GT_HAPL_R, GT_HAPL_A, fname0,fname1);
@@ -1132,42 +1132,42 @@ static void print_header(args_t *args)
  static void print_stats(args_t *args)
  {
      int i, id;
-    printf("# SN, Summary numbers:\n# SN\t[2]id\t[3]key\t[4]value\n");
+    fprintf(pysam_stdout, "# SN, Summary numbers:\n# SN\t[2]id\t[3]key\t[4]value\n");
      for (id=0; id<args->files->nreaders; id++)
-        printf("SN\t%d\tnumber of samples:\t%d\n", id, bcf_hdr_nsamples(args->files->readers[id].header));
+        fprintf(pysam_stdout, "SN\t%d\tnumber of samples:\t%d\n", id, bcf_hdr_nsamples(args->files->readers[id].header));
      for (id=0; id<args->nstats; id++)
      {
          stats_t *stats = &args->stats[id];
-        printf("SN\t%d\tnumber of records:\t%d\n", id, stats->n_records);
-        printf("SN\t%d\tnumber of no-ALTs:\t%d\n", id, stats->n_noalts);
-        printf("SN\t%d\tnumber of SNPs:\t%d\n", id, stats->n_snps);
-        printf("SN\t%d\tnumber of MNPs:\t%d\n", id, stats->n_mnps);
-        printf("SN\t%d\tnumber of indels:\t%d\n", id, stats->n_indels);
-        printf("SN\t%d\tnumber of others:\t%d\n", id, stats->n_others);
-        printf("SN\t%d\tnumber of multiallelic sites:\t%d\n", id, stats->n_mals);
-        printf("SN\t%d\tnumber of multiallelic SNP sites:\t%d\n", id, stats->n_snp_mals);
+        fprintf(pysam_stdout, "SN\t%d\tnumber of records:\t%d\n", id, stats->n_records);
+        fprintf(pysam_stdout, "SN\t%d\tnumber of no-ALTs:\t%d\n", id, stats->n_noalts);
+        fprintf(pysam_stdout, "SN\t%d\tnumber of SNPs:\t%d\n", id, stats->n_snps);
+        fprintf(pysam_stdout, "SN\t%d\tnumber of MNPs:\t%d\n", id, stats->n_mnps);
+        fprintf(pysam_stdout, "SN\t%d\tnumber of indels:\t%d\n", id, stats->n_indels);
+        fprintf(pysam_stdout, "SN\t%d\tnumber of others:\t%d\n", id, stats->n_others);
+        fprintf(pysam_stdout, "SN\t%d\tnumber of multiallelic sites:\t%d\n", id, stats->n_mals);
+        fprintf(pysam_stdout, "SN\t%d\tnumber of multiallelic SNP sites:\t%d\n", id, stats->n_snp_mals);
      }
-    printf("# TSTV, transitions/transversions:\n# TSTV\t[2]id\t[3]ts\t[4]tv\t[5]ts/tv\t[6]ts (1st ALT)\t[7]tv (1st ALT)\t[8]ts/tv (1st ALT)\n");
+    fprintf(pysam_stdout, "# TSTV, transitions/transversions:\n# TSTV\t[2]id\t[3]ts\t[4]tv\t[5]ts/tv\t[6]ts (1st ALT)\t[7]tv (1st ALT)\t[8]ts/tv (1st ALT)\n");
      for (id=0; id<args->nstats; id++)
      {
          stats_t *stats = &args->stats[id];
          int ts=0,tv=0;
          for (i=0; i<args->m_af; i++) { ts += stats->af_ts[i]; tv += stats->af_tv[i];  }
-        printf("TSTV\t%d\t%d\t%d\t%.2f\t%d\t%d\t%.2f\n", id,ts,tv,tv?(float)ts/tv:0, stats->ts_alt1,stats->tv_alt1,stats->tv_alt1?(float)stats->ts_alt1/stats->tv_alt1:0);
+        fprintf(pysam_stdout, "TSTV\t%d\t%d\t%d\t%.2f\t%d\t%d\t%.2f\n", id,ts,tv,tv?(float)ts/tv:0, stats->ts_alt1,stats->tv_alt1,stats->tv_alt1?(float)stats->ts_alt1/stats->tv_alt1:0);
      }
      if ( args->exons_fname )
      {
-        printf("# FS, Indel frameshifts:\n# FS\t[2]id\t[3]in-frame\t[4]out-frame\t[5]not applicable\t[6]out/(in+out) ratio\t[7]in-frame (1st ALT)\t[8]out-frame (1st ALT)\t[9]not applicable (1st ALT)\t[10]out/(in+out) ratio (1st ALT)\n");
+        fprintf(pysam_stdout, "# FS, Indel frameshifts:\n# FS\t[2]id\t[3]in-frame\t[4]out-frame\t[5]not applicable\t[6]out/(in+out) ratio\t[7]in-frame (1st ALT)\t[8]out-frame (1st ALT)\t[9]not applicable (1st ALT)\t[10]out/(in+out) ratio (1st ALT)\n");
          for (id=0; id<args->nstats; id++)
          {
              int in=args->stats[id].in_frame, out=args->stats[id].out_frame, na=args->stats[id].na_frame;
              int in1=args->stats[id].in_frame_alt1, out1=args->stats[id].out_frame_alt1, na1=args->stats[id].na_frame_alt1;
-            printf("FS\t%d\t%d\t%d\t%d\t%.2f\t%d\t%d\t%d\t%.2f\n", id, in,out,na,out?(float)out/(in+out):0,in1,out1,na1,out1?(float)out1/(in1+out1):0);
+            fprintf(pysam_stdout, "FS\t%d\t%d\t%d\t%d\t%.2f\t%d\t%d\t%d\t%.2f\n", id, in,out,na,out?(float)out/(in+out):0,in1,out1,na1,out1?(float)out1/(in1+out1):0);
          }
      }
      if ( args->indel_ctx )
      {
-        printf("# ICS, Indel context summary:\n# ICS\t[2]id\t[3]repeat-consistent\t[4]repeat-inconsistent\t[5]not applicable\t[6]c/(c+i) ratio\n");
+        fprintf(pysam_stdout, "# ICS, Indel context summary:\n# ICS\t[2]id\t[3]repeat-consistent\t[4]repeat-inconsistent\t[5]not applicable\t[6]c/(c+i) ratio\n");
          for (id=0; id<args->nstats; id++)
          {
              int nc = 0, ni = 0, na = args->stats[id].n_repeat_na;
@@ -1176,25 +1176,25 @@ static void print_stats(args_t *args)
                  nc += args->stats[id].n_repeat[i][0] + args->stats[id].n_repeat[i][2];
                  ni += args->stats[id].n_repeat[i][1] + args->stats[id].n_repeat[i][3];
              }
-            printf("ICS\t%d\t%d\t%d\t%d\t%.4f\n", id, nc,ni,na,nc+ni ? (float)nc/(nc+ni) : 0.0);
+            fprintf(pysam_stdout, "ICS\t%d\t%d\t%d\t%d\t%.4f\n", id, nc,ni,na,nc+ni ? (float)nc/(nc+ni) : 0.0);
          }
-        printf("# ICL, Indel context by length:\n# ICL\t[2]id\t[3]length of repeat element\t[4]repeat-consistent deletions)\t[5]repeat-inconsistent deletions\t[6]consistent insertions\t[7]inconsistent insertions\t[8]c/(c+i) ratio\n");
+        fprintf(pysam_stdout, "# ICL, Indel context by length:\n# ICL\t[2]id\t[3]length of repeat element\t[4]repeat-consistent deletions)\t[5]repeat-inconsistent deletions\t[6]consistent insertions\t[7]inconsistent insertions\t[8]c/(c+i) ratio\n");
          for (id=0; id<args->nstats; id++)
          {
              for (i=1; i<IRC_RLEN; i++)
              {
                  int nc = args->stats[id].n_repeat[i][0]+args->stats[id].n_repeat[i][2], ni = args->stats[id].n_repeat[i][1]+args->stats[id].n_repeat[i][3];
-                printf("ICL\t%d\t%d\t%d\t%d\t%d\t%d\t%.4f\n", id, i+1,
+                fprintf(pysam_stdout, "ICL\t%d\t%d\t%d\t%d\t%d\t%d\t%.4f\n", id, i+1,
                      args->stats[id].n_repeat[i][0],args->stats[id].n_repeat[i][1],args->stats[id].n_repeat[i][2],args->stats[id].n_repeat[i][3],
                      nc+ni ? (float)nc/(nc+ni) : 0.0);
              }
          }
      }
-    printf("# SiS, Singleton stats:\n# SiS\t[2]id\t[3]allele count\t[4]number of SNPs\t[5]number of transitions\t[6]number of transversions\t[7]number of indels\t[8]repeat-consistent\t[9]repeat-inconsistent\t[10]not applicable\n");
+    fprintf(pysam_stdout, "# SiS, Singleton stats:\n# SiS\t[2]id\t[3]allele count\t[4]number of SNPs\t[5]number of transitions\t[6]number of transversions\t[7]number of indels\t[8]repeat-consistent\t[9]repeat-inconsistent\t[10]not applicable\n");
      for (id=0; id<args->nstats; id++)
      {
          stats_t *stats = &args->stats[id];
-        printf("SiS\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,1,stats->af_snps[0],stats->af_ts[0],stats->af_tv[0],
+        fprintf(pysam_stdout, "SiS\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,1,stats->af_snps[0],stats->af_ts[0],stats->af_tv[0],
              stats->af_repeats[0][0]+stats->af_repeats[1][0]+stats->af_repeats[2][0],stats->af_repeats[0][0],stats->af_repeats[1][0],stats->af_repeats[2][0]);
          // put the singletons stats into the first AF bin, note that not all of the stats is transferred (i.e. nrd mismatches)
          stats->af_snps[1]       += stats->af_snps[0];
@@ -1204,32 +1204,32 @@ static void print_stats(args_t *args)
          stats->af_repeats[1][1] += stats->af_repeats[1][0];
          stats->af_repeats[2][1] += stats->af_repeats[2][0];
      }
-    printf("# AF, Stats by non-reference allele frequency:\n# AF\t[2]id\t[3]allele frequency\t[4]number of SNPs\t[5]number of transitions\t[6]number of transversions\t[7]number of indels\t[8]repeat-consistent\t[9]repeat-inconsistent\t[10]not applicable\n");
+    fprintf(pysam_stdout, "# AF, Stats by non-reference allele frequency:\n# AF\t[2]id\t[3]allele frequency\t[4]number of SNPs\t[5]number of transitions\t[6]number of transversions\t[7]number of indels\t[8]repeat-consistent\t[9]repeat-inconsistent\t[10]not applicable\n");
      for (id=0; id<args->nstats; id++)
      {
          stats_t *stats = &args->stats[id];
          for (i=1; i<args->m_af; i++) // note that af[1] now contains also af[0], see SiS stats output above
          {
              if ( stats->af_snps[i]+stats->af_ts[i]+stats->af_tv[i]+stats->af_repeats[0][i]+stats->af_repeats[1][i]+stats->af_repeats[2][i] == 0  ) continue;
-            printf("AF\t%d\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,100.*(i-1)/(args->m_af-1),stats->af_snps[i],stats->af_ts[i],stats->af_tv[i],
+            fprintf(pysam_stdout, "AF\t%d\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,100.*(i-1)/(args->m_af-1),stats->af_snps[i],stats->af_ts[i],stats->af_tv[i],
                  stats->af_repeats[0][i]+stats->af_repeats[1][i]+stats->af_repeats[2][i],stats->af_repeats[0][i],stats->af_repeats[1][i],stats->af_repeats[2][i]);
          }
      }
      #if QUAL_STATS
-        printf("# QUAL, Stats by quality:\n# QUAL\t[2]id\t[3]Quality\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\t[7]number of indels\n");
+        fprintf(pysam_stdout, "# QUAL, Stats by quality:\n# QUAL\t[2]id\t[3]Quality\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\t[7]number of indels\n");
          for (id=0; id<args->nstats; id++)
          {
              stats_t *stats = &args->stats[id];
              for (i=0; i<args->m_qual; i++)
              {
                  if ( stats->qual_snps[i]+stats->qual_ts[i]+stats->qual_tv[i]+stats->qual_indels[i] == 0  ) continue;
-                printf("QUAL\t%d\t%d\t%d\t%d\t%d\t%d\n", id,i,stats->qual_snps[i],stats->qual_ts[i],stats->qual_tv[i],stats->qual_indels[i]);
+                fprintf(pysam_stdout, "QUAL\t%d\t%d\t%d\t%d\t%d\t%d\n", id,i,stats->qual_snps[i],stats->qual_ts[i],stats->qual_tv[i],stats->qual_indels[i]);
              }
          }
      #endif
      for (i=0; i<args->nusr; i++)
      {
-        printf("# USR:%s, Stats by %s:\n# USR:%s\t[2]id\t[3]%s\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\n",
+        fprintf(pysam_stdout, "# USR:%s, Stats by %s:\n# USR:%s\t[2]id\t[3]%s\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\n",
              args->usr[i].tag,args->usr[i].tag,args->usr[i].tag,args->usr[i].tag);
          for (id=0; id<args->nstats; id++)
          {
@@ -1240,32 +1240,32 @@ static void print_stats(args_t *args)
                  if ( usr->vals_ts[j]+usr->vals_tv[j] == 0 ) continue;   // skip empty bins
                  float val = usr->min + (usr->max - usr->min)*j/(usr->nbins-1);
                  const char *fmt = usr->type==BCF_HT_REAL ? "USR:%s\t%d\t%e\t%d\t%d\t%d\n" : "USR:%s\t%d\t%.0f\t%d\t%d\t%d\n";
-                printf(fmt,usr->tag,id,val,usr->vals_ts[j]+usr->vals_tv[j],usr->vals_ts[j],usr->vals_tv[j]);
+                fprintf(pysam_stdout, fmt,usr->tag,id,val,usr->vals_ts[j]+usr->vals_tv[j],usr->vals_ts[j],usr->vals_tv[j]);
              }
          }
      }
-    printf("# IDD, InDel distribution:\n# IDD\t[2]id\t[3]length (deletions negative)\t[4]count\n");
+    fprintf(pysam_stdout, "# IDD, InDel distribution:\n# IDD\t[2]id\t[3]length (deletions negative)\t[4]count\n");
      for (id=0; id<args->nstats; id++)
      {
          stats_t *stats = &args->stats[id];
          for (i=stats->m_indel-1; i>=0; i--)
-            if ( stats->deletions[i] ) printf("IDD\t%d\t%d\t%d\n", id,-i-1,stats->deletions[i]);
+            if ( stats->deletions[i] ) fprintf(pysam_stdout, "IDD\t%d\t%d\t%d\n", id,-i-1,stats->deletions[i]);
          for (i=0; i<stats->m_indel; i++)
-            if ( stats->insertions[i] ) printf("IDD\t%d\t%d\t%d\n", id,i+1,stats->insertions[i]);
+            if ( stats->insertions[i] ) fprintf(pysam_stdout, "IDD\t%d\t%d\t%d\n", id,i+1,stats->insertions[i]);
      }
-    printf("# ST, Substitution types:\n# ST\t[2]id\t[3]type\t[4]count\n");
+    fprintf(pysam_stdout, "# ST, Substitution types:\n# ST\t[2]id\t[3]type\t[4]count\n");
      for (id=0; id<args->nstats; id++)
      {
          int t;
          for (t=0; t<15; t++)
          {
              if ( t>>2 == (t&3) ) continue;
-            printf("ST\t%d\t%c>%c\t%d\n", id, bcf_int2acgt(t>>2),bcf_int2acgt(t&3),args->stats[id].subst[t]);
+            fprintf(pysam_stdout, "ST\t%d\t%c>%c\t%d\n", id, bcf_int2acgt(t>>2),bcf_int2acgt(t&3),args->stats[id].subst[t]);
          }
      }
      if ( args->files->nreaders>1 && args->files->n_smpl )
      {
-        printf("SN\t%d\tnumber of samples:\t%d\n", 2, args->files->n_smpl);
+        fprintf(pysam_stdout, "SN\t%d\tnumber of samples:\t%d\n", 2, args->files->n_smpl);
  
          int x;
          for (x=0; x<2; x++)
@@ -1273,12 +1273,12 @@ static void print_stats(args_t *args)
              gtcmp_t *stats;
              if ( x==0 )
              {
-                printf("# GCsAF, Genotype concordance by non-reference allele frequency (SNPs)\n# GCsAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
+                fprintf(pysam_stdout, "# GCsAF, Genotype concordance by non-reference allele frequency (SNPs)\n# GCsAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
                  stats = args->af_gts_snps;
              }
              else
              {
-                printf("# GCiAF, Genotype concordance by non-reference allele frequency (indels)\n# GCiAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
+                fprintf(pysam_stdout, "# GCiAF, Genotype concordance by non-reference allele frequency (indels)\n# GCiAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
                  stats = args->af_gts_indels;
              }
              uint64_t nrd_m[3] = {0,0,0}, nrd_mm[3] = {0,0,0};
@@ -1292,28 +1292,28 @@ static void print_stats(args_t *args)
                      nrd_mm[j] += stats[i].mm[j];
                  }
                  if ( !i || !n ) continue;   // skip singleton stats and empty bins
-                printf("GC%cAF\t2\t%f", x==0 ? 's' : 'i', 100.*(i-1)/(args->m_af-1));
-                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
-                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
-                printf("\t%f\t%"PRId32"\n", stats[i].r2n ? stats[i].r2sum/stats[i].r2n : -1.0, stats[i].r2n);
+                fprintf(pysam_stdout, "GC%cAF\t2\t%f", x==0 ? 's' : 'i', 100.*(i-1)/(args->m_af-1));
+                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
+                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
+                fprintf(pysam_stdout, "\t%f\t%"PRId32"\n", stats[i].r2n ? stats[i].r2sum/stats[i].r2n : -1.0, stats[i].r2n);
              }
  
              if ( x==0 )
              {
-                printf("# NRD and discordance is calculated as follows:\n");
-                printf("#   m .. number of matches\n");
-                printf("#   x .. number of mismatches\n");
-                printf("#   NRD = (xRR + xRA + xAA) / (xRR + xRA + xAA + mRA + mAA)\n");
-                printf("#   RR discordance = xRR / (xRR + mRR)\n");
-                printf("#   RA discordance = xRA / (xRA + mRA)\n");
-                printf("#   AA discordance = xAA / (xAA + mAA)\n");
-                printf("# Non-Reference Discordance (NRD), SNPs\n# NRDs\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
+                fprintf(pysam_stdout, "# NRD and discordance is calculated as follows:\n");
+                fprintf(pysam_stdout, "#   m .. number of matches\n");
+                fprintf(pysam_stdout, "#   x .. number of mismatches\n");
+                fprintf(pysam_stdout, "#   NRD = (xRR + xRA + xAA) / (xRR + xRA + xAA + mRA + mAA)\n");
+                fprintf(pysam_stdout, "#   RR discordance = xRR / (xRR + mRR)\n");
+                fprintf(pysam_stdout, "#   RA discordance = xRA / (xRA + mRA)\n");
+                fprintf(pysam_stdout, "#   AA discordance = xAA / (xAA + mAA)\n");
+                fprintf(pysam_stdout, "# Non-Reference Discordance (NRD), SNPs\n# NRDs\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
              }
              else
-                printf("# Non-Reference Discordance (NRD), indels\n# NRDi\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
+                fprintf(pysam_stdout, "# Non-Reference Discordance (NRD), indels\n# NRDi\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
              uint64_t m  = nrd_m[T2S(GT_HET_RA)] + nrd_m[T2S(GT_HOM_AA)];
              uint64_t mm = nrd_mm[T2S(GT_HOM_RR)] + nrd_mm[T2S(GT_HET_RA)] + nrd_mm[T2S(GT_HOM_AA)];
-            printf("NRD%c\t2\t%f\t%f\t%f\t%f\n", x==0 ? 's' : 'i',
+            fprintf(pysam_stdout, "NRD%c\t2\t%f\t%f\t%f\t%f\n", x==0 ? 's' : 'i',
                      m+mm ? mm*100.0/(m+mm) : 0,
                      nrd_m[T2S(GT_HOM_RR)]+nrd_mm[T2S(GT_HOM_RR)] ? nrd_mm[T2S(GT_HOM_RR)]*100.0/(nrd_m[T2S(GT_HOM_RR)]+nrd_mm[T2S(GT_HOM_RR)]) : 0,
                      nrd_m[T2S(GT_HET_RA)]+nrd_mm[T2S(GT_HET_RA)] ? nrd_mm[T2S(GT_HET_RA)]*100.0/(nrd_m[T2S(GT_HET_RA)]+nrd_mm[T2S(GT_HET_RA)]) : 0,
@@ -1327,13 +1327,13 @@ static void print_stats(args_t *args)
              smpl_r_t *smpl_r_array;
              if ( x==0 )
              {
-                printf("# GCsS, Genotype concordance by sample (SNPs)\n# GCsS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
+                fprintf(pysam_stdout, "# GCsS, Genotype concordance by sample (SNPs)\n# GCsS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
                  stats = args->smpl_gts_snps;
                  smpl_r_array = args->smpl_r_snps;
              }
              else
              {
-                printf("# GCiS, Genotype concordance by sample (indels)\n# GCiS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
+                fprintf(pysam_stdout, "# GCiS, Genotype concordance by sample (indels)\n# GCiS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
                  stats = args->smpl_gts_indels;
                  smpl_r_array = args->smpl_r_indels;
              }
@@ -1350,16 +1350,16 @@ static void print_stats(args_t *args)
                      double y2_yy = smpl_r->y2-(smpl_r->y*smpl_r->y)/smpl_r->n;
                      r = (sum_crossprod)/sqrt(x2_xx*y2_yy);
                  }
-                printf("GC%cS\t2\t%s\t%.3f",  x==0 ? 's' : 'i', args->files->samples[i], m+mm ? mm*100.0/(m+mm) : 0);
-                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
-                printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
-                if (smpl_r->n && !isnan(r)) printf("\t%f\n", r*r);
-                else printf("\t"NA_STRING"\n");
+                fprintf(pysam_stdout, "GC%cS\t2\t%s\t%.3f",  x==0 ? 's' : 'i', args->files->samples[i], m+mm ? mm*100.0/(m+mm) : 0);
+                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
+                fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
+                if (smpl_r->n && !isnan(r)) fprintf(pysam_stdout, "\t%f\n", r*r);
+                else fprintf(pysam_stdout, "\t"NA_STRING"\n");
              }
          }
      }
  
-    printf("# DP, Depth distribution\n# DP\t[2]id\t[3]bin\t[4]number of genotypes\t[5]fraction of genotypes (%%)\t[6]number of sites\t[7]fraction of sites (%%)\n");
+    fprintf(pysam_stdout, "# DP, Depth distribution\n# DP\t[2]id\t[3]bin\t[4]number of genotypes\t[5]fraction of genotypes (%%)\t[6]number of sites\t[7]fraction of sites (%%)\n");
      for (id=0; id<args->nstats; id++)
      {
          stats_t *stats = &args->stats[id];
@@ -1368,32 +1368,32 @@ static void print_stats(args_t *args)
          for (i=0; i<stats->dp.m_vals; i++)
          {
              if ( stats->dp.vals[i]==0 && stats->dp_sites.vals[i]==0 ) continue;
-            printf("DP\t%d\t", id);
-            if ( i==0 ) printf("<%d", stats->dp.min);
-            else if ( i+1==stats->dp.m_vals ) printf(">%d", stats->dp.max);
-            else printf("%d", idist_i2bin(&stats->dp,i));
-            printf("\t%"PRId64"\t%f", stats->dp.vals[i], sum ? stats->dp.vals[i]*100./sum : 0);
-            printf("\t%"PRId64"\t%f\n", stats->dp_sites.vals[i], sum_sites ? stats->dp_sites.vals[i]*100./sum_sites : 0);
+            fprintf(pysam_stdout, "DP\t%d\t", id);
+            if ( i==0 ) fprintf(pysam_stdout, "<%d", stats->dp.min);
+            else if ( i+1==stats->dp.m_vals ) fprintf(pysam_stdout, ">%d", stats->dp.max);
+            else fprintf(pysam_stdout, "%d", idist_i2bin(&stats->dp,i));
+            fprintf(pysam_stdout, "\t%"PRId64"\t%f", stats->dp.vals[i], sum ? stats->dp.vals[i]*100./sum : 0);
+            fprintf(pysam_stdout, "\t%"PRId64"\t%f\n", stats->dp_sites.vals[i], sum_sites ? stats->dp_sites.vals[i]*100./sum_sites : 0);
          }
      }
  
      if ( args->files->n_smpl )
      {
-        printf("# PSC, Per-sample counts\n# PSC\t[2]id\t[3]sample\t[4]nRefHom\t[5]nNonRefHom\t[6]nHets\t[7]nTransitions\t[8]nTransversions\t[9]nIndels\t[10]average depth\t[11]nSingletons\n");
+        fprintf(pysam_stdout, "# PSC, Per-sample counts\n# PSC\t[2]id\t[3]sample\t[4]nRefHom\t[5]nNonRefHom\t[6]nHets\t[7]nTransitions\t[8]nTransversions\t[9]nIndels\t[10]average depth\t[11]nSingletons\n");
          for (id=0; id<args->nstats; id++)
          {
              stats_t *stats = &args->stats[id];
              for (i=0; i<args->files->n_smpl; i++)
              {
                  float dp = stats->smpl_ndp[i] ? stats->smpl_dp[i]/(float)stats->smpl_ndp[i] : 0;
-                printf("PSC\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%.1f\t%d\n", id,args->files->samples[i],
+                fprintf(pysam_stdout, "PSC\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%.1f\t%d\n", id,args->files->samples[i],
                      stats->smpl_homRR[i], stats->smpl_homAA[i], stats->smpl_hets[i], stats->smpl_ts[i],
                      stats->smpl_tv[i], stats->smpl_indels[i],dp, stats->smpl_sngl[i]);
              }
          }
  
  
-        printf("# PSI, Per-Sample Indels\n# PSI\t[2]id\t[3]sample\t[4]in-frame\t[5]out-frame\t[6]not applicable\t[7]out/(in+out) ratio\t[8]nHets\t[9]nAA\n");
+        fprintf(pysam_stdout, "# PSI, Per-Sample Indels\n# PSI\t[2]id\t[3]sample\t[4]in-frame\t[5]out-frame\t[6]not applicable\t[7]out/(in+out) ratio\t[8]nHets\t[9]nAA\n");
          for (id=0; id<args->nstats; id++)
          {
              stats_t *stats = &args->stats[id];
@@ -1408,12 +1408,12 @@ static void print_stats(args_t *args)
                  }
                  int nhom = stats->smpl_indel_homs[i];
                  int nhet = stats->smpl_indel_hets[i];
-                printf("PSI\t%d\t%s\t%d\t%d\t%d\t%.2f\t%d\t%d\n", id,args->files->samples[i], in,out,na,in+out?1.0*out/(in+out):0,nhet,nhom);
+                fprintf(pysam_stdout, "PSI\t%d\t%s\t%d\t%d\t%d\t%.2f\t%d\t%d\n", id,args->files->samples[i], in,out,na,in+out?1.0*out/(in+out):0,nhet,nhom);
              }
          }
  
          #ifdef HWE_STATS
-        printf("# HWE\n# HWE\t[2]id\t[3]1st ALT allele frequency\t[4]Number of observations\t[5]25th percentile\t[6]median\t[7]75th percentile\n");
+        fprintf(pysam_stdout, "# HWE\n# HWE\t[2]id\t[3]1st ALT allele frequency\t[4]Number of observations\t[5]25th percentile\t[6]median\t[7]75th percentile\n");
          for (id=0; id<args->nstats; id++)
          {
              stats_t *stats = &args->stats[id];
@@ -1426,28 +1426,28 @@ static void print_stats(args_t *args)
                  if ( !sum_tot ) continue;
  
                  int nprn = 3;
-                printf("HWE\t%d\t%f\t%d",id,100.*(i-1)/(args->m_af-1),sum_tot);
+                fprintf(pysam_stdout, "HWE\t%d\t%f\t%d",id,100.*(i-1)/(args->m_af-1),sum_tot);
                  for (j=0; j<args->naf_hwe; j++)
                  {
                      sum_tmp += ptr[j];
                      float frac = (float)sum_tmp/sum_tot;
                      if ( frac >= 0.75 )
                      {
-                        while (nprn>0) { printf("\t%f", (float)j/args->naf_hwe); nprn--; }
+                        while (nprn>0) { fprintf(pysam_stdout, "\t%f", (float)j/args->naf_hwe); nprn--; }
                          break;
                      }
                      if ( frac >= 0.5 )
                      {
-                        while (nprn>1) { printf("\t%f", (float)j/args->naf_hwe); nprn--; }
+                        while (nprn>1) { fprintf(pysam_stdout, "\t%f", (float)j/args->naf_hwe); nprn--; }
                          continue;
                      }
                      if ( frac >= 0.25 )
                      {
-                        while (nprn>2) { printf("\t%f", (float)j/args->naf_hwe); nprn--; }
+                        while (nprn>2) { fprintf(pysam_stdout, "\t%f", (float)j/args->naf_hwe); nprn--; }
                      }
                  }
                  assert(nprn==0);
-                printf("\n");
+                fprintf(pysam_stdout, "\n");
              }
          }
          #endif
@@ -1456,32 +1456,32 @@ static void print_stats(args_t *args)
  
  static void usage(void)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Parses VCF or BCF and produces stats which can be plotted using plot-vcfstats.\n");
-    fprintf(pysamerr, "         When two files are given, the program generates separate stats for intersection\n");
-    fprintf(pysamerr, "         and the complements. By default only sites are compared, -s/-S must given to include\n");
-    fprintf(pysamerr, "         also sample columns.\n");
-    fprintf(pysamerr, "Usage:   bcftools stats [options] <A.vcf.gz> [<B.vcf.gz>]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "    -1, --1st-allele-only              include only 1st allele at multiallelic sites\n");
-    fprintf(pysamerr, "    -c, --collapse <string>            treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
-    fprintf(pysamerr, "    -d, --depth <int,int,int>          depth distribution: min,max,bin size [0,500,1]\n");
-    fprintf(pysamerr, "    -e, --exclude <expr>               exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(pysamerr, "    -E, --exons <file.gz>              tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)\n");
-    fprintf(pysamerr, "    -f, --apply-filters <list>         require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
-    fprintf(pysamerr, "    -F, --fasta-ref <file>             faidx indexed reference sequence file to determine INDEL context\n");
-    fprintf(pysamerr, "    -i, --include <expr>               select sites for which the expression is true (see man page for details)\n");
-    fprintf(pysamerr, "    -I, --split-by-ID                  collect stats for sites with ID separately (known vs novel)\n");
-    fprintf(pysamerr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
-    fprintf(pysamerr, "    -s, --samples <list>               list of samples for sample stats, \"-\" to include all samples\n");
-    fprintf(pysamerr, "    -S, --samples-file <file>          file of samples to include\n");
-    fprintf(pysamerr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
-    fprintf(pysamerr, "    -u, --user-tstv <TAG[:min:max:n]>  collect Ts/Tv stats for any tag using the given binning [0:1:100]\n");
-    fprintf(pysamerr, "    -v, --verbose                      produce verbose per-site and per-sample output\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Parses VCF or BCF and produces stats which can be plotted using plot-vcfstats.\n");
+    fprintf(pysam_stderr, "         When two files are given, the program generates separate stats for intersection\n");
+    fprintf(pysam_stderr, "         and the complements. By default only sites are compared, -s/-S must given to include\n");
+    fprintf(pysam_stderr, "         also sample columns.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools stats [options] <A.vcf.gz> [<B.vcf.gz>]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "    -1, --1st-allele-only              include only 1st allele at multiallelic sites\n");
+    fprintf(pysam_stderr, "    -c, --collapse <string>            treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
+    fprintf(pysam_stderr, "    -d, --depth <int,int,int>          depth distribution: min,max,bin size [0,500,1]\n");
+    fprintf(pysam_stderr, "    -e, --exclude <expr>               exclude sites for which the expression is true (see man page for details)\n");
+    fprintf(pysam_stderr, "    -E, --exons <file.gz>              tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)\n");
+    fprintf(pysam_stderr, "    -f, --apply-filters <list>         require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+    fprintf(pysam_stderr, "    -F, --fasta-ref <file>             faidx indexed reference sequence file to determine INDEL context\n");
+    fprintf(pysam_stderr, "    -i, --include <expr>               select sites for which the expression is true (see man page for details)\n");
+    fprintf(pysam_stderr, "    -I, --split-by-ID                  collect stats for sites with ID separately (known vs novel)\n");
+    fprintf(pysam_stderr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "    -s, --samples <list>               list of samples for sample stats, \"-\" to include all samples\n");
+    fprintf(pysam_stderr, "    -S, --samples-file <file>          file of samples to include\n");
+    fprintf(pysam_stderr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
+    fprintf(pysam_stderr, "    -u, --user-tstv <TAG[:min:max:n]>  collect Ts/Tv stats for any tag using the given binning [0:1:100]\n");
+    fprintf(pysam_stderr, "    -v, --verbose                      produce verbose per-site and per-sample output\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
diff --git a/bcftools/vcfview.c b/bcftools/vcfview.c

index ed415950b3977116052efbb33659257bd955cc3f..c14075d638cfd0434701e4cce691b1aac3fb5006 100644 (file)
--- a/bcftools/vcfview.c
+++ b/bcftools/vcfview.c
@@ -72,6 +72,7 @@ typedef struct _args_t
      int sample_is_file, force_samples;
      char *include_types, *exclude_types;
      int include, exclude;
+    int record_cmd_line;
      htsFile *out;
  }
  args_t;
@@ -86,7 +87,8 @@ static void init_data(args_t *args)
          bcf_hdr_append(args->hdr,"##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Allele count in genotypes\">");
          bcf_hdr_append(args->hdr,"##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">");
      }
-    bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_view");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_view");
+    else bcf_hdr_sync(args->hdr);
  
      // setup sample data
      if (args->sample_names)
@@ -485,6 +487,7 @@ static void usage(args_t *args)
      fprintf(stderr, "    -G,   --drop-genotypes              drop individual genotype information (after subsetting if -s option set)\n");
      fprintf(stderr, "    -h/H, --header-only/--no-header     print the header only/suppress the header in VCF output\n");
      fprintf(stderr, "    -l,   --compression-level [0-9]     compression level: 0 uncompressed, 1 best speed, 9 best compression [%d]\n", args->clevel);
+    fprintf(stderr, "          --no-version                  do not append version and command line to the header\n");
      fprintf(stderr, "    -o,   --output-file <file>          output file name [stdout]\n");
      fprintf(stderr, "    -O,   --output-type <b|u|z|v>       b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
      fprintf(stderr, "    -r, --regions <region>              restrict to comma-separated list of regions\n");
@@ -529,6 +532,7 @@ int main_vcfview(int argc, char *argv[])
      args->update_info = 1;
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      int targets_is_file = 0, regions_is_file = 0;
  
      static struct option loptions[] =
@@ -569,6 +573,7 @@ int main_vcfview(int argc, char *argv[])
          {"max-af",required_argument,NULL,'Q'},
          {"phased",no_argument,NULL,'p'},
          {"exclude-phased",no_argument,NULL,'P'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      char *tmp;
@@ -678,6 +683,7 @@ int main_vcfview(int argc, char *argv[])
                  break;
              }
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case '?': usage(args);
              default: error("Unknown argument: %s\n", optarg);
          }
diff --git a/bcftools/vcfview.c.pysam.c b/bcftools/vcfview.c.pysam.c

index a6a0cc002e4390779f3461576b8e3fdbf0432fc9..53b7c53304a71617e9cced47cd1a24b49f5bed87 100644 (file)
--- a/bcftools/vcfview.c.pysam.c
+++ b/bcftools/vcfview.c.pysam.c
@@ -74,6 +74,7 @@ typedef struct _args_t
      int sample_is_file, force_samples;
      char *include_types, *exclude_types;
      int include, exclude;
+    int record_cmd_line;
      htsFile *out;
  }
  args_t;
@@ -88,7 +89,8 @@ static void init_data(args_t *args)
          bcf_hdr_append(args->hdr,"##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Allele count in genotypes\">");
          bcf_hdr_append(args->hdr,"##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">");
      }
-    bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_view");
+    if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_view");
+    else bcf_hdr_sync(args->hdr);
  
      // setup sample data
      if (args->sample_names)
@@ -112,7 +114,7 @@ static void init_data(args_t *args)
              for (i=0; i<nsmpl; i++) {
                  if (!khash_str2int_has_key(hdr_samples,smpl[i])) {
                      if (args->force_samples) {
-                        fprintf(pysamerr, "Warn: exclude called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]);
+                        fprintf(pysam_stderr, "Warn: exclude called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]);
                      } else {
                          error("Error: exclude called for sample that does not exist in header: \"%s\". Use \"--force-samples\" to ignore this error.\n", smpl[i]);
                      }
@@ -133,7 +135,7 @@ static void init_data(args_t *args)
              for (i=0; i<nsmpl; i++) {
                  if (!khash_str2int_has_key(hdr_samples,smpl[i])) {
                      if (args->force_samples) {
-                        fprintf(pysamerr, "Warn: subset called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]);
+                        fprintf(pysam_stderr, "Warn: subset called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]);
                          continue;
                      } else {
                          error("Error: subset called for sample that does not exist in header: \"%s\". Use \"--force-samples\" to ignore this error.\n", smpl[i]);
@@ -147,7 +149,7 @@ static void init_data(args_t *args)
          free(smpl);
          khash_str2int_destroy(hdr_samples);
          if (args->n_samples == 0) {
-            fprintf(pysamerr, "Warn: subsetting has removed all samples\n");
+            fprintf(pysam_stderr, "Warn: subsetting has removed all samples\n");
              args->sites_only = 1;
          }
      }
@@ -158,7 +160,7 @@ static void init_data(args_t *args)
      // determine variant types to include/exclude
      if (args->include_types || args->exclude_types) {
          if (args->include_types && args->exclude_types) {
-            fprintf(pysamerr, "Error: only supply one of --include-types, --exclude-types options\n");
+            fprintf(pysam_stderr, "Error: only supply one of --include-types, --exclude-types options\n");
              exit(1);
          }
          char **type_list = 0;
@@ -186,8 +188,8 @@ static void init_data(args_t *args)
                  else if (strcmp(type_list[i], "mnps") == 0) args->include |= VCF_MNP;
                  else if (strcmp(type_list[i], "other") == 0) args->include |= VCF_OTHER;
                  else {
-                    fprintf(pysamerr, "[E::%s] unknown type\n", type_list[i]);
-                    fprintf(pysamerr, "Accepted types are snps, indels, mnps, other\n");
+                    fprintf(pysam_stderr, "[E::%s] unknown type\n", type_list[i]);
+                    fprintf(pysam_stderr, "Accepted types are snps, indels, mnps, other\n");
                      exit(1);
                  }
              }
@@ -200,8 +202,8 @@ static void init_data(args_t *args)
                  else if (strcmp(type_list[i], "mnps") == 0) args->exclude |= VCF_MNP;
                  else if (strcmp(type_list[i], "other") == 0) args->exclude |= VCF_OTHER;
                  else {
-                    fprintf(pysamerr, "[E::%s] unknown type\n", type_list[i]);
-                    fprintf(pysamerr, "Accepted types are snps, indels, mnps, other\n");
+                    fprintf(pysam_stderr, "[E::%s] unknown type\n", type_list[i]);
+                    fprintf(pysam_stderr, "Accepted types are snps, indels, mnps, other\n");
                      exit(1);
                  }
              }
@@ -290,7 +292,7 @@ int bcf_all_phased(const bcf_hdr_t *header, bcf1_t *line)
                  case BCF_BT_INT8:  BRANCH_INT(int8_t,  bcf_int8_vector_end); break;
                  case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_vector_end); break;
                  case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_vector_end); break;
-                default: fprintf(pysamerr, "[E::%s] todo: fmt_type %d\n", __func__, fmt_ptr->type); exit(1); break;
+                default: fprintf(pysam_stderr, "[E::%s] todo: fmt_type %d\n", __func__, fmt_ptr->type); exit(1); break;
              }
              #undef BRANCH_INT
              if (!sample_phased) {
@@ -479,44 +481,45 @@ void set_allele_type (int *atype, char *atype_string)
  
  static void usage(args_t *args)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   VCF/BCF conversion, view, subset and filter VCF/BCF files.\n");
-    fprintf(pysamerr, "Usage:   bcftools view [options] <in.vcf.gz> [region1 [...]]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Output options:\n");
-    fprintf(pysamerr, "    -G,   --drop-genotypes              drop individual genotype information (after subsetting if -s option set)\n");
-    fprintf(pysamerr, "    -h/H, --header-only/--no-header     print the header only/suppress the header in VCF output\n");
-    fprintf(pysamerr, "    -l,   --compression-level [0-9]     compression level: 0 uncompressed, 1 best speed, 9 best compression [%d]\n", args->clevel);
-    fprintf(pysamerr, "    -o,   --output-file <file>          output file name [stdout]\n");
-    fprintf(pysamerr, "    -O,   --output-type <b|u|z|v>       b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
-    fprintf(pysamerr, "    -r, --regions <region>              restrict to comma-separated list of regions\n");
-    fprintf(pysamerr, "    -R, --regions-file <file>           restrict to regions listed in a file\n");
-    fprintf(pysamerr, "    -t, --targets [^]<region>           similar to -r but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
-    fprintf(pysamerr, "    -T, --targets-file [^]<file>        similar to -R but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
-    fprintf(pysamerr, "        --threads <int>                 number of extra output compression threads [0]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Subset options:\n");
-    fprintf(pysamerr, "    -a, --trim-alt-alleles        trim alternate alleles not seen in the subset\n");
-    fprintf(pysamerr, "    -I, --no-update               do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN)\n");
-    fprintf(pysamerr, "    -s, --samples [^]<list>       comma separated list of samples to include (or exclude with \"^\" prefix)\n");
-    fprintf(pysamerr, "    -S, --samples-file [^]<file>  file of samples to include (or exclude with \"^\" prefix)\n");
-    fprintf(pysamerr, "        --force-samples           only warn about unknown subset samples\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Filter options:\n");
-    fprintf(pysamerr, "    -c/C, --min-ac/--max-ac <int>[:<type>]      minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent\n");
-    fprintf(pysamerr, "                                                   (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]\n");
-    fprintf(pysamerr, "    -f,   --apply-filters <list>                require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
-    fprintf(pysamerr, "    -g,   --genotype [^]<hom|het|miss>          require one or more hom/het/missing genotype or, if prefixed with \"^\", exclude sites with hom/het/missing genotypes\n");
-    fprintf(pysamerr, "    -i/e, --include/--exclude <expr>            select/exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(pysamerr, "    -k/n, --known/--novel                       select known/novel sites only (ID is not/is '.')\n");
-    fprintf(pysamerr, "    -m/M, --min-alleles/--max-alleles <int>     minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites)\n");
-    fprintf(pysamerr, "    -p/P, --phased/--exclude-phased             select/exclude sites where all samples are phased\n");
-    fprintf(pysamerr, "    -q/Q, --min-af/--max-af <float>[:<type>]    minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent\n");
-    fprintf(pysamerr, "                                                   (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]\n");
-    fprintf(pysamerr, "    -u/U, --uncalled/--exclude-uncalled         select/exclude sites without a called genotype\n");
-    fprintf(pysamerr, "    -v/V, --types/--exclude-types <list>        select/exclude comma-separated list of variant types: snps,indels,mnps,other [null]\n");
-    fprintf(pysamerr, "    -x/X, --private/--exclude-private           select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   VCF/BCF conversion, view, subset and filter VCF/BCF files.\n");
+    fprintf(pysam_stderr, "Usage:   bcftools view [options] <in.vcf.gz> [region1 [...]]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Output options:\n");
+    fprintf(pysam_stderr, "    -G,   --drop-genotypes              drop individual genotype information (after subsetting if -s option set)\n");
+    fprintf(pysam_stderr, "    -h/H, --header-only/--no-header     print the header only/suppress the header in VCF output\n");
+    fprintf(pysam_stderr, "    -l,   --compression-level [0-9]     compression level: 0 uncompressed, 1 best speed, 9 best compression [%d]\n", args->clevel);
+    fprintf(pysam_stderr, "          --no-version                  do not append version and command line to the header\n");
+    fprintf(pysam_stderr, "    -o,   --output-file <file>          output file name [pysam_stdout]\n");
+    fprintf(pysam_stderr, "    -O,   --output-type <b|u|z|v>       b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+    fprintf(pysam_stderr, "    -r, --regions <region>              restrict to comma-separated list of regions\n");
+    fprintf(pysam_stderr, "    -R, --regions-file <file>           restrict to regions listed in a file\n");
+    fprintf(pysam_stderr, "    -t, --targets [^]<region>           similar to -r but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
+    fprintf(pysam_stderr, "    -T, --targets-file [^]<file>        similar to -R but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
+    fprintf(pysam_stderr, "        --threads <int>                 number of extra output compression threads [0]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Subset options:\n");
+    fprintf(pysam_stderr, "    -a, --trim-alt-alleles        trim alternate alleles not seen in the subset\n");
+    fprintf(pysam_stderr, "    -I, --no-update               do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN)\n");
+    fprintf(pysam_stderr, "    -s, --samples [^]<list>       comma separated list of samples to include (or exclude with \"^\" prefix)\n");
+    fprintf(pysam_stderr, "    -S, --samples-file [^]<file>  file of samples to include (or exclude with \"^\" prefix)\n");
+    fprintf(pysam_stderr, "        --force-samples           only warn about unknown subset samples\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Filter options:\n");
+    fprintf(pysam_stderr, "    -c/C, --min-ac/--max-ac <int>[:<type>]      minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent\n");
+    fprintf(pysam_stderr, "                                                   (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]\n");
+    fprintf(pysam_stderr, "    -f,   --apply-filters <list>                require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+    fprintf(pysam_stderr, "    -g,   --genotype [^]<hom|het|miss>          require one or more hom/het/missing genotype or, if prefixed with \"^\", exclude sites with hom/het/missing genotypes\n");
+    fprintf(pysam_stderr, "    -i/e, --include/--exclude <expr>            select/exclude sites for which the expression is true (see man page for details)\n");
+    fprintf(pysam_stderr, "    -k/n, --known/--novel                       select known/novel sites only (ID is not/is '.')\n");
+    fprintf(pysam_stderr, "    -m/M, --min-alleles/--max-alleles <int>     minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites)\n");
+    fprintf(pysam_stderr, "    -p/P, --phased/--exclude-phased             select/exclude sites where all samples are phased\n");
+    fprintf(pysam_stderr, "    -q/Q, --min-af/--max-af <float>[:<type>]    minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent\n");
+    fprintf(pysam_stderr, "                                                   (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]\n");
+    fprintf(pysam_stderr, "    -u/U, --uncalled/--exclude-uncalled         select/exclude sites without a called genotype\n");
+    fprintf(pysam_stderr, "    -v/V, --types/--exclude-types <list>        select/exclude comma-separated list of variant types: snps,indels,mnps,other [null]\n");
+    fprintf(pysam_stderr, "    -x/X, --private/--exclude-private           select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples\n");
+    fprintf(pysam_stderr, "\n");
      exit(1);
  }
  
@@ -531,6 +534,7 @@ int main_vcfview(int argc, char *argv[])
      args->update_info = 1;
      args->output_type = FT_VCF;
      args->n_threads = 0;
+    args->record_cmd_line = 1;
      int targets_is_file = 0, regions_is_file = 0;
  
      static struct option loptions[] =
@@ -571,6 +575,7 @@ int main_vcfview(int argc, char *argv[])
          {"max-af",required_argument,NULL,'Q'},
          {"phased",no_argument,NULL,'p'},
          {"exclude-phased",no_argument,NULL,'P'},
+        {"no-version",no_argument,NULL,8},
          {NULL,0,NULL,0}
      };
      char *tmp;
@@ -680,6 +685,7 @@ int main_vcfview(int argc, char *argv[])
                  break;
              }
              case  9 : args->n_threads = strtol(optarg, 0, 0); break;
+            case  8 : args->record_cmd_line = 0; break;
              case '?': usage(args);
              default: error("Unknown argument: %s\n", optarg);
          }
diff --git a/bcftools/version.c.pysam.c b/bcftools/version.c.pysam.c

index 1fd0d4e31b05e70530b6581c72f9a7d763266e1c..af5453276c44161470f3e29c4c2a6cc1c86dd4bb 100644 (file)
--- a/bcftools/version.c.pysam.c
+++ b/bcftools/version.c.pysam.c
@@ -41,7 +41,7 @@ void error(const char *format, ...)
  {
      va_list ap;
      va_start(ap, format);
-    vfprintf(pysamerr, format, ap);
+    vfprintf(pysam_stderr, format, ap);
      va_end(ap);
      exit(-1);
  }
diff --git a/bcftools/version.h b/bcftools/version.h

index 70d4f9327649bdf7c52f541ab1b5ab37b307ec73..05929f550019c4df79f09df042455a3173c4483b 100644 (file)
--- a/bcftools/version.h
+++ b/bcftools/version.h
@@ -1 +1 @@
-#define BCFTOOLS_VERSION "1.3"
+#define BCFTOOLS_VERSION "1.3.1"
diff --git a/doc/faq.rst b/doc/faq.rst

index 1f45981453a479dd3450453bdfbc08e07883f4a8..d5d84c40c8a5dfad2587daa314d5cef1cbc6a45c 100644 (file)
--- a/doc/faq.rst
+++ b/doc/faq.rst
@@ -10,7 +10,7 @@ use the github URL: https://github.com/pysam-developers/pysam.
  As pysam is a wrapper around htslib and the samtools package, I
  suggest cite `Li et al (2009) <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`.
  
-Is pysam thread-save?
+Is pysam thread-safe?
  =====================
  
  Pysam is a mix of python and C code. Instructions within python are
diff --git a/doc/glossary.rst b/doc/glossary.rst

index f40bcfbe0d7298f9d70003bb88a32fb24814a6a9..e35a537116dc3b057085b28963478e1990f5215e 100644 (file)
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -81,7 +81,8 @@ Glossary
  
        In alignments with soft clipping part of the query sequence
        are not aligned. The unaligned query sequence is still part
-      of the alignment record. This is in difference to hard clipped reads.
+      of the alignment record. This is in difference to 
+      :term:`hard clipped` reads.
  
     hard clipping
     hard clipped
diff --git a/doc/installation.rst b/doc/installation.rst

index a3fa2a2c88f79b257c2ba3bcd71313ff9b05aad9..2dbf2a472c0a90f4ff2d987cf414f3ef5bc953bf 100644 (file)
--- a/doc/installation.rst
+++ b/doc/installation.rst
@@ -58,8 +58,3 @@ python 2.7 contains pre-built C-files and cython needs not be present
  during installation. However, when installing the source tarball on
  python 3 or building from the repository, these pre-built C-files are
  not present and cython needs to be installed beforehand.
-
-
-
-
-
diff --git a/doc/release.rst b/doc/release.rst

index 802c6e55fd363e16b6b1a469fb68169b5b530958..f49b8f05d61a5c8c127a1adbe3e7ae09b1332afb 100644 (file)
--- a/doc/release.rst
+++ b/doc/release.rst
@@ -2,6 +2,26 @@
  Release notes
  =============
  
+Release 0.9.1
+=============
+
+This is a bugfix release addressing some installation problems
+in pysam 0.9.0, in particular:
+
+* patch included htslib to work with older libcurl versions, fixes #262.
+* do not require cython for python 3 install, fixes #260
+* FastaFile does not accept filepath_index any more, see #270
+* add AlignedSegment.get_cigar_stats method.
+* py3 bugfix in VariantFile.subset_samples, fixes #272
+* add missing sysconfig import, fixes #278
+* do not redirect stdout, but instead write to a separately
+  created file. This should resolve issues when pysam is used
+  in notebooks or other environments that redirect stdout.
+* wrap htslib-1.3.1, samtools-1.3.1 and bcftools-1.3.1
+* use bgzf throughout instead of gzip
+* allow specifying a fasta reference for CRAM file when opening
+  for both read and write, fixes #280
+
  Release 0.9.0
  =============
  
diff --git a/import.py b/import.py

index 40186984fe845aceaba2582f87dc46da7a557ed1..12d20162ea182776cc5bd4e6062a28fbb58cfa23 100644 (file)
--- a/import.py
+++ b/import.py
@@ -12,16 +12,49 @@
  # For samtools, type:
  # rm -rf samtools
  # python import.py samtools download/samtools
+#
  # Manually, then:
  # modify config.h to set compatibility flags
-# change bamtk.c.pysam.c/main to bamtk.c.pysam.c/samtools_main
  #
  # For bcftools, type:
  # rm -rf bedtools
  # python import.py bedtools download/bedtools
+# rm -rf bedtools/test bedtools/plugins
+
+import fnmatch
  import os
+import re
+import shutil
  import sys
-import fnmatch
+import hashlib
+
+
+EXCLUDE = {
+    "samtools": (
+        "razip.c", "bgzip.c", "main.c",
+        "calDepth.c", "bam2bed.c", "wgsim.c",
+        "md5fa.c", "md5sum-lite.c", "maq2sam.c",
+        "bamcheck.c", "chk_indel.c", "vcf-miniview.c",
+        "htslib-1.3",   # do not import twice
+        "hfile_irods.c",  # requires irods library
+    ),
+    "bcftools": (
+        "test", "plugins", "peakfit.c",
+        "peakfit.h",
+        # needs to renamed, name conflict with samtools reheader
+        "reheader.c",
+        "polysomy.c"),
+    "htslib": (
+        'htslib/tabix.c', 'htslib/bgzip.c',
+        'htslib/htsfile.c', 'htslib/hfile_irods.c'),
+}
+
+
+MAIN = {
+    "samtools": "bamtk",
+    "bcftools": "main"
+}
+
  
  
  def locate(pattern, root=os.curdir):
@@ -35,20 +68,57 @@ def locate(pattern, root=os.curdir):
  
  def _update_pysam_files(cf, destdir):
      '''update pysam files applying redirection of ouput'''
+    basename = os.path.basename(destdir)
      for filename in cf:
          if not filename:
              continue
          dest = filename + ".pysam.c"
          with open(filename) as infile:
+            lines = "".join(infile.readlines())
              with open(dest, "w") as outfile:
                  outfile.write('#include "pysam.h"\n\n')
-                outfile.write(
-                    re.sub("stderr", "pysamerr", "".join(infile.readlines())))
+                subname, _ = os.path.splitext(os.path.basename(filename))
+                if subname in MAIN.get(basename, []):
+                    lines = re.sub("int main\(", "int {}_main(".format(
+                        basename), lines)
+                else:
+                    lines = re.sub("int main\(", "int {}_{}_main(".format(
+                        basename, subname), lines)
+                lines = re.sub("stderr", "pysam_stderr", lines)
+                lines = re.sub("stdout", "pysam_stdout", lines)
+                lines = re.sub(" printf\(", " fprintf(pysam_stdout, ", lines)
+                lines = re.sub("([^kf])puts\(([^)]+)\)",
+                               r"\1fputs(\2, pysam_stdout) & fputc('\\n', pysam_stdout)",
+                               lines)
+                lines = re.sub("putchar\(([^)]+)\)",
+                               r"fputc(\1, pysam_stdout)", lines)
+
+                fn = os.path.basename(filename)
+                # some specific fixes:
+                SPECIFIC_SUBSTITUTIONS = {
+                    "bam_md.c": (
+                        'sam_open_format("-", mode_w',
+                        'sam_open_format(pysam_stdout_fn, mode_w'),
+                    "phase.c": (
+                        'putc("ACGT"[f->seq[j] == 1? (c&3, pysam_stdout) : (c>>16&3)]);',
+                        'putc("ACGT"[f->seq[j] == 1? (c&3) : (c>>16&3)], pysam_stdout);'),
+                    "cut_target.c": (
+                        'putc(33 + (cns[j]>>8>>2, pysam_stdout));',
+                        'putc(33 + (cns[j]>>8>>2), pysam_stdout);')
+                    }
+                if fn in SPECIFIC_SUBSTITUTIONS:
+                    lines = lines.replace(
+                        SPECIFIC_SUBSTITUTIONS[fn][0],
+                        SPECIFIC_SUBSTITUTIONS[fn][1])
+                outfile.write(lines)
+
              with open(os.path.join(destdir, "pysam.h"), "w")as outfile:
                  outfile.write("""#ifndef PYSAM_H
  #define PYSAM_H
  #include "stdio.h"
-extern FILE * pysamerr;
+extern FILE * pysam_stderr;
+extern FILE * pysam_stdout;
+extern const char * pysam_stdout_fn;
  #endif
  """)
  
@@ -57,7 +127,7 @@ if len(sys.argv) >= 1:
      if len(sys.argv) != 3:
          raise ValueError("import requires dest src")
  
-    dest, srcdir = sys.argv[2:4]
+    dest, srcdir = sys.argv[1:3]
      if dest not in EXCLUDE:
          raise ValueError("import expected one of %s" %
                           ",".join(EXCLUDE.keys()))
diff --git a/pysam/__init__.py b/pysam/__init__.py

index cd32bf57c1f0b72075305bd2a9f1a0debdd70ec8..d1b5d410c80239e2b1169dc6f21435fae4142720 100644 (file)
--- a/pysam/__init__.py
+++ b/pysam/__init__.py
@@ -1,5 +1,6 @@
  import os
  import sys
+import sysconfig
  
  from pysam.libchtslib import *
  from pysam.cutils import *
@@ -23,6 +24,7 @@ import pysam.Pileup as Pileup
  from pysam.samtools import *
  import pysam.config
  
+
  # export all the symbols from separate modules
  __all__ = \
      libchtslib.__all__ +\
diff --git a/pysam/calignedsegment.pyx b/pysam/calignedsegment.pyx

index 0a2b94f8692b0ed5777c5e8ab699ad7792e85d80..f4e0750665832698e30e173da1e8a827a91f371f 100644 (file)
--- a/pysam/calignedsegment.pyx
+++ b/pysam/calignedsegment.pyx
@@ -63,6 +63,7 @@ from cpython cimport array as c_array
  from cpython.version cimport PY_MAJOR_VERSION
  from cpython cimport PyErr_SetString, PyBytes_FromStringAndSize
  from libc.string cimport strchr
+from cpython cimport array as c_array
  
  from pysam.cutils cimport force_bytes, force_str, \
      charptr_to_str, charptr_to_bytes
@@ -76,14 +77,15 @@ cdef char * parray_types = 'bBhHiIf'
  # translation tables
  
  # cigar code to character and vice versa
-cdef char* CODE2CIGAR= "MIDNSHP=X"
+cdef char* CODE2CIGAR= "MIDNSHP=XB"
+cdef int NCIGAR_CODES = 10
  
  if PY_MAJOR_VERSION >= 3:
      CIGAR2CODE = dict([y, x] for x, y in enumerate(CODE2CIGAR))
  else:
      CIGAR2CODE = dict([ord(y), x] for x, y in enumerate(CODE2CIGAR))
  
-CIGAR_REGEX = re.compile("(\d+)([MIDNSHP=X])")
+CIGAR_REGEX = re.compile("(\d+)([MIDNSHP=XB])")
  
  #####################################################################
  # typecode guessing
@@ -93,16 +95,16 @@ cdef inline char map_typecode_htslib_to_python(uint8_t s):
  
      # map type from htslib to python array
      cdef char * f = strchr(htslib_types, s)
+
      if f == NULL:
-        raise ValueError("unknown htslib tag typecode '%s'" % chr(s))
+        return 0
      return parray_types[f - htslib_types]
  
  cdef inline uint8_t map_typecode_python_to_htslib(char s):
      """determine value type from type code of array"""
      cdef char * f = strchr(parray_types, s)
      if f == NULL:
-        raise ValueError(
-            "unknown conversion for array typecode '%s'" % s)
+        return 0
      return htslib_types[f - parray_types]
  
  # optional tag data manipulation
@@ -229,6 +231,8 @@ cdef inline packTags(tags):
      """
      fmts, args = ["<"], []
      
+    cdef char array_typecode
+
      datatype2format = {
          b'c': ('b', 1),
          b'C': ('B', 1),
@@ -273,9 +277,14 @@ cdef inline packTags(tags):
          elif isinstance(value, array.array):
              # binary tags from arrays
              if valuetype is None:
-                valuetype = force_bytes(chr(
-                    map_typecode_python_to_htslib(ord(value.typecode))))
+                array_typecode = map_typecode_python_to_htslib(ord(value.typecode))
+
+                if array_typecode == 0:
+                    raise ValueError("unsupported type code '{}'"
+                                     .format(value.typecode))
  
+                valuetype = force_bytes(chr(array_typecode))
+                    
              if valuetype not in datatype2format:
                  raise ValueError("invalid value type '%s' (%s)" %
                                   (valuetype, type(valuetype)))
@@ -501,6 +510,13 @@ cdef inline bytes build_alignment_sequence(bam1_t * src):
      with the cigar string to reconstitute the query or the reference
      sequence.
  
+    Positions corresponding to `N` (skipped region from the reference)
+    in the CIGAR string will not appear in the returned sequence. The
+    MD should correspondingly not contain these. Thus proper tags are::
+    
+       Deletion from the reference:   cigar=5M1D5M    MD=5^C5
+       Skipped region from reference: cigar=5M1N5M    MD=10
+
      Returns
      -------
  
@@ -542,10 +558,12 @@ cdef inline bytes build_alignment_sequence(bam1_t * src):
                  s[s_idx] = read_sequence[r_idx] 
                  r_idx += 1
                  s_idx += 1
-        elif op == BAM_CDEL or op == BAM_CREF_SKIP:
+        elif op == BAM_CDEL:
              for i from 0 <= i < l:
                  s[s_idx] = '-'
                  s_idx += 1
+        elif op == BAM_CREF_SKIP:
+            pass
          elif op == BAM_CINS:
              for i from 0 <= i < l:
                  # encode insertions into reference as lowercase
@@ -1409,10 +1427,12 @@ cdef class AlignedSegment:
                  for i from 0 <= i < l:
                      result.append(ref_seq[r_idx])
                      r_idx += 1
-            elif op == BAM_CDEL or op == BAM_CREF_SKIP:
+            elif op == BAM_CDEL:
                  for i from 0 <= i < l:
                      result.append(ref_seq[r_idx])
                      r_idx += 1
+            elif op == BAM_CREF_SKIP:
+                pass
              elif op == BAM_CINS:
                  r_idx += l
              elif op == BAM_CSOFT_CLIP:
@@ -1426,7 +1446,6 @@ cdef class AlignedSegment:
  
          return "".join(result)
  
-
      def get_aligned_pairs(self, matches_only=False, with_seq=False):
          """a list of aligned read (query) and reference positions.
  
@@ -1505,7 +1524,7 @@ cdef class AlignedSegment:
                  else:
                      qpos += l
  
-            elif op == BAM_CDEL or op == BAM_CREF_SKIP:
+            elif op == BAM_CDEL:
                  if not _matches_only:
                      if _with_seq:
                          for i from pos <= i < pos + l:
@@ -1519,6 +1538,17 @@ cdef class AlignedSegment:
              elif op == BAM_CHARD_CLIP:
                  pass # advances neither
  
+            elif op == BAM_CREF_SKIP:
+                if not _matches_only:
+                    if _with_seq:
+                        for i from pos <= i < pos + l:
+                            result.append((None, i, None))
+                    else:
+                        for i from pos <= i < pos + l:
+                            result.append((None, i))
+
+                pos += l
+
              elif op == BAM_CPAD:
                  raise NotImplementedError(
                      "Padding (BAM_CPAD, 6) is currently not supported. "
@@ -1597,6 +1627,81 @@ cdef class AlignedSegment:
  
          return overlap
  
+    def get_cigar_stats(self):
+        """summary of operations in cigar string.
+
+        The output order in the array is "MIDNSHP=X" followed by a
+        field for the NM tag. If the NM tag is not present, this
+        field will always be 0.
+
+        +-----+--------------+-----+
+        |M    |BAM_CMATCH    |0    |
+        +-----+--------------+-----+
+        |I    |BAM_CINS      |1    |
+        +-----+--------------+-----+
+        |D    |BAM_CDEL      |2    |
+        +-----+--------------+-----+
+        |N    |BAM_CREF_SKIP |3    |
+        +-----+--------------+-----+
+        |S    |BAM_CSOFT_CLIP|4    |
+        +-----+--------------+-----+
+        |H    |BAM_CHARD_CLIP|5    |
+        +-----+--------------+-----+
+        |P    |BAM_CPAD      |6    |
+        +-----+--------------+-----+
+        |=    |BAM_CEQUAL    |7    |
+        +-----+--------------+-----+
+        |X    |BAM_CDIFF     |8    |
+        +-----+--------------+-----+
+        |NM   |NM tag        |9    |
+        +-----+--------------+-----+
+
+        If no cigar string is present, empty arrays will be returned.
+
+        Parameters
+        ----------
+
+        Returns
+        -------
+
+        arrays : two arrays. The first contains the nucleotide counts within
+           each cigar operation, the second contains the number of blocks for
+           each cigar operation.
+
+        """
+        
+        cdef int nfields = NCIGAR_CODES + 1
+
+        cdef c_array.array base_counts = array.array(
+            "I",
+            [0] * nfields)
+        cdef uint32_t [:] base_view = base_counts
+        cdef c_array.array block_counts = array.array(
+            "I",
+            [0] * nfields)
+        cdef uint32_t [:] block_view = block_counts
+
+        cdef bam1_t * src = self._delegate
+        cdef int op
+        cdef uint32_t l
+        cdef int32_t k
+        cdef uint32_t * cigar_p = pysam_bam_get_cigar(src)
+
+        if cigar_p == NULL:
+            return None
+
+        for k from 0 <= k < pysam_get_n_cigar(src):
+            op = cigar_p[k] & BAM_CIGAR_MASK
+            l = cigar_p[k] >> BAM_CIGAR_SHIFT
+            base_view[op] += l
+            block_view[op] += 1
+
+        cdef uint8_t * v = bam_aux_get(src, 'NM')
+        if v != NULL:
+            base_view[nfields - 1] = <int32_t>bam_aux2i(v)
+
+        return base_counts, block_counts
+
      #####################################################
      ## Unsorted as yet
      # TODO: capture in CIGAR object
diff --git a/pysam/calignmentfile.pxd b/pysam/calignmentfile.pxd

index a7e956d1661bb60612d5fc5bead8e8934086d64c..3384e7efd6ecd1087983e91cd032666efe71e33f 100644 (file)
--- a/pysam/calignmentfile.pxd
+++ b/pysam/calignmentfile.pxd
@@ -39,6 +39,7 @@ ctypedef struct __iterdata:
  cdef class AlignmentFile:
  
      cdef object _filename
+    cdef object _reference_filename
  
      # pointer to htsFile structure
      cdef htsFile * htsfile
diff --git a/pysam/calignmentfile.pyx b/pysam/calignmentfile.pyx

index f258a66e31d051fa3ecbbb9b70eab65526878af6..6473220bae5b648a2847763b186fbd91b488e10b 100644 (file)
--- a/pysam/calignmentfile.pyx
+++ b/pysam/calignmentfile.pyx
@@ -112,7 +112,8 @@ VALID_HEADER_ORDER = {"HD" : ("VN", "SO", "GO"),
                                 "UR", "SP"),
                        "RG" : ("ID", "SM", "LB", "DS", 
                                "PU", "PI", "CN", "DT",
-                              "PL", "FO", "KS", "PG"),
+                              "PL", "FO", "KS", "PG",
+                              "PM"),
                        "PG" : ("PN", "ID", "VN", "CL", 
                                "PP"),}
  
@@ -218,7 +219,7 @@ cdef class AlignmentFile:
      """AlignmentFile(filepath_or_object, mode=None, template=None,
      reference_names=None, reference_lengths=None, text=NULL,
      header=None, add_sq_text=False, check_header=True, check_sq=True,
-    filename=None)
+    reference_filename=None, filename=None)
  
      A :term:`SAM`/:term:`BAM` formatted file. 
  
@@ -248,8 +249,8 @@ cdef class AlignmentFile:
          4. The names (`reference_names`) and lengths
             (`reference_lengths`) are supplied directly as lists.
  
-    For writing a CRAM file, the filename of the reference can be 
-    added through a fasta formatted file (`reference_filename`)
+    When reading or writing a CRAM file, the filename of a FASTA-formatted
+    reference can be specified with `reference_filename`.
  
      By default, if a file is opened in mode 'r', it is checked
      for a valid header (`check_header` = True) and a definition of
@@ -311,6 +312,12 @@ cdef class AlignmentFile:
          when reading, check if SQ entries are present in header
          (default=True)
  
+    reference_filename : string
+        Path to a FASTA-formatted reference file. Valid only for CRAM files.
+        When reading a CRAM file, this overrides both ``$REF_PATH`` and the URL
+        specified in the header (``UR`` tag), which are normally used to find
+        the reference.
+
      filename : string
          Alternative to filepath_or_object. Filename of the file
          to be opened.
@@ -390,6 +397,7 @@ cdef class AlignmentFile:
          will be closed and a new file will be opened.
          '''
          cdef char *cfilename
+        cdef char *creference_filename
          cdef char *cindexname
          cdef char *cmode
  
@@ -433,6 +441,8 @@ cdef class AlignmentFile:
  
          cdef bytes bmode = mode.encode('ascii')
          self._filename = filename = encode_filename(filename)
+        self._reference_filename = reference_filename = encode_filename(
+            reference_filename)
  
          # FIXME: Use htsFormat when it is available
          self.is_stream = filename == b"-"
@@ -515,10 +525,8 @@ cdef class AlignmentFile:
              # is given, the CRAM reference arrays will be built from
              # the @SQ header in the header
              if self.is_cram and reference_filename:
-                # note that fn_aux takes ownership, so create
-                # a copy
-                fn = encode_filename(reference_filename)
-                self.htsfile.fn_aux = strdup(fn)
+                # note that fn_aux takes ownership, so create a copy
+                self.htsfile.fn_aux = strdup(self._reference_filename)
  
              # write header to htsfile
              if self.is_bam or self.is_cram or "h" in mode:
@@ -570,6 +578,13 @@ cdef class AlignmentFile:
                              "- is it SAM format?" % mode )
                      # self.header.ignore_sam_err = True
  
+            # set filename with reference sequences
+            if self.is_cram and reference_filename:
+                creference_filename = self._reference_filename
+                hts_set_opt(self.htsfile,
+                            CRAM_OPT_REFERENCE,
+                            creference_filename)
+
              if check_sq and self.header.n_targets == 0:
                  raise ValueError(
                      ("file has no sequences defined (mode='%s') - "
@@ -854,7 +869,7 @@ cdef class AlignmentFile:
  
          multiple_iterators : bool
             
-           If `multiple_iterators` is True (default) multiple
+           If `multiple_iterators` is True, multiple
             iterators on the same file can be used at the same time. The
             iterator returned will receive its own copy of a filehandle to
             the file effectively re-opening the file. Re-opening a file
@@ -1665,6 +1680,7 @@ cdef class IteratorRow:
  
      def __init__(self, AlignmentFile samfile, int multiple_iterators=False):
          cdef char *cfilename
+        cdef char *creference_filename
          
          if not samfile.is_open():
              raise ValueError("I/O operation on closed file")
@@ -1686,6 +1702,13 @@ cdef class IteratorRow:
                  self.header = sam_hdr_read(self.htsfile)
              assert self.header != NULL
              self.owns_samfile = True
+            # options specific to CRAM files
+            if samfile.is_cram and samfile._reference_filename:
+                creference_filename = samfile._reference_filename
+                hts_set_opt(self.htsfile,
+                            CRAM_OPT_REFERENCE,
+                            creference_filename)
+
          else:
              self.htsfile = self.samfile.htsfile
              self.owns_samfile = False
diff --git a/pysam/cbcf.pyx b/pysam/cbcf.pyx

index 2a1985038b869a5cea6c20a4e85d736ec3e38a32..41fd44f3bc341774f4f54251079421febd65cc6b 100644 (file)
--- a/pysam/cbcf.pyx
+++ b/pysam/cbcf.pyx
@@ -1807,7 +1807,7 @@ cdef class VariantHeader(object):
                  'missing {:d} requested samples'.format(
                      len(missing_samples)))
  
-        keep_samples = force_bytes(b','.join(keep_samples))
+        keep_samples = force_bytes(','.join(keep_samples))
          cdef char *keep = <char *>keep_samples if keep_samples else NULL
          cdef ret = bcf_hdr_set_samples(self.ptr, keep, 0)
  
diff --git a/pysam/cfaidx.pxd b/pysam/cfaidx.pxd

index d3aff0988b1a690c72577b41f890cb754401765b..774927432f3bb9b8ca585e235fc91cc592ff54a3 100644 (file)
--- a/pysam/cfaidx.pxd
+++ b/pysam/cfaidx.pxd
@@ -6,7 +6,7 @@ from libc.stdio cimport FILE, printf
  cimport cython
  
  from cpython cimport array
-from pysam.chtslib cimport faidx_t, gzFile, kstring_t
+from pysam.chtslib cimport faidx_t, kstring_t, BGZF
  
  # These functions are put here and not in chtslib.pxd in order
  # to avoid warnings for unused functions.
@@ -21,13 +21,10 @@ cdef extern from "pysam_stream.h" nogil:
          kstring_t seq
          kstring_t qual
  
-    gzFile gzopen(char *, char *)
-    kseq_t *kseq_init(gzFile)
+    kseq_t *kseq_init(BGZF *)
      int kseq_read(kseq_t *)
      void kseq_destroy(kseq_t *)
-    int gzclose(gzFile)
-
-    kstream_t *ks_init(gzFile)
+    kstream_t *ks_init(BGZF *)
      void ks_destroy(kstream_t *)
  
      # Retrieve characters from stream until delimiter
@@ -62,9 +59,10 @@ cdef class PersistentFastqProxy:
  
  cdef class FastxFile:
      cdef object _filename
-    cdef gzFile fastqfile
+    cdef BGZF * fastqfile
      cdef kseq_t * entry
      cdef bint persist
+    cdef bint is_remote
  
      cdef kseq_t * getCurrent(self)
      cdef int cnext(self)
diff --git a/pysam/cfaidx.pyx b/pysam/cfaidx.pyx

index 4db754e16bfd9ecb573964cef6fe224e08bea36e..78f9aac1133181e2a2a0575585e1a7786f093ead 100644 (file)
--- a/pysam/cfaidx.pyx
+++ b/pysam/cfaidx.pyx
@@ -60,7 +60,8 @@ from cpython.version cimport PY_MAJOR_VERSION
  from pysam.chtslib cimport \
      faidx_nseq, fai_load, fai_destroy, fai_fetch, \
      faidx_seq_len, \
-    faidx_fetch_seq, gzopen, gzclose, hisremote
+    faidx_fetch_seq, hisremote, \
+    bgzf_open, bgzf_close
  
  from pysam.cutils cimport force_bytes, force_str, charptr_to_str
  from pysam.cutils cimport encode_filename, from_string_and_size
@@ -136,6 +137,11 @@ cdef class FastaFile:
          cdef char *cfilename = self._filename
          self.is_remote = hisremote(cfilename)
  
+        if filepath_index is not None:
+            raise NotImplementedError(
+                "setting an explicit path for the index "
+                "is not implemented")
+
          # open file for reading
          if (self._filename != b"-"
              and not self.is_remote
@@ -171,7 +177,9 @@ cdef class FastaFile:
              self.fastafile = NULL
  
      def __dealloc__(self):
-        self.close()
+        if self.fastafile != NULL:
+            fai_destroy(self.fastafile)
+            self.fastafile = NULL
  
      # context manager interface
      def __enter__(self):
@@ -464,30 +472,40 @@ cdef class FastxFile:
              on the file continues.
  
          '''
-        self.close()
+        if self.fastqfile != NULL:
+            self.close()
  
-        if not os.path.exists(filename):
-            raise IOError("no such file or directory: %s" % filename)
+        self._filename = encode_filename(filename)
+        cdef char *cfilename = self._filename
+        self.is_remote = hisremote(cfilename)
+
+        # open file for reading
+        if (self._filename != b"-"
+            and not self.is_remote
+            and not os.path.exists(filename)):
+            raise IOError("file `%s` not found" % filename)
  
          self.persist = persist
  
-        self._filename = encode_filename(filename)
-        cdef char *cfilename = self._filename
          with nogil:
-            self.fastqfile = gzopen(cfilename, "r")
+            self.fastqfile = bgzf_open(cfilename, "r")
              self.entry = kseq_init(self.fastqfile)
          self._filename = filename
  
      def close(self):
          '''close the file.'''
+        if self.fastqfile != NULL:
+            bgzf_close(self.fastqfile)
+            self.fastqfile = NULL
          if self.entry != NULL:
-            gzclose(self.fastqfile)
-            if self.entry:
-                kseq_destroy(self.entry)
-                self.entry = NULL
+            kseq_destroy(self.entry)
+            self.entry = NULL
  
      def __dealloc__(self):
-        self.close()
+        if self.fastqfile != NULL:
+            bgzf_close(self.fastqfile)
+        if self.entry:
+            kseq_destroy(self.entry)
  
      # context manager interface
      def __enter__(self):
diff --git a/pysam/chtslib.pxd b/pysam/chtslib.pxd

index 0cee07552f00dfcbf3c0cc654a75f932b72754ae..33c1559f0e36edc364a93e1c42e17f92c85de747 100644 (file)
--- a/pysam/chtslib.pxd
+++ b/pysam/chtslib.pxd
@@ -9,20 +9,6 @@ cdef extern from "Python.h":
     FILE* PyFile_AsFile(object)
  
  
-cdef extern from "zlib.h" nogil:
-  ctypedef void * gzFile
-  ctypedef int64_t z_off_t
-
-  int gzclose(gzFile fp)
-  int gzread(gzFile fp, void *buf, unsigned int n)
-  char *gzerror(gzFile fp, int *errnum)
-
-  gzFile gzopen( char *path, char *mode)
-  gzFile gzdopen (int fd, char *mode)
-  char * gzgets(gzFile file, char *buf, int len)
-  int gzeof(gzFile file)
-
-
  cdef extern from "htslib/kstring.h" nogil:
      ctypedef struct kstring_t:
          size_t l, m
@@ -398,6 +384,29 @@ cdef extern from "htslib/hts.h" nogil:
          no_compression, gzip, bgzf, custom
          compression_maximum
  
+    enum hts_fmt_option:
+        CRAM_OPT_DECODE_MD,
+        CRAM_OPT_PREFIX,
+        CRAM_OPT_VERBOSITY,
+        CRAM_OPT_SEQS_PER_SLICE,
+        CRAM_OPT_SLICES_PER_CONTAINER,
+        CRAM_OPT_RANGE,
+        CRAM_OPT_VERSION,
+        CRAM_OPT_EMBED_REF,
+        CRAM_OPT_IGNORE_MD5,
+        CRAM_OPT_REFERENCE,
+        CRAM_OPT_MULTI_SEQ_PER_SLICE,
+        CRAM_OPT_NO_REF,
+        CRAM_OPT_USE_BZIP2,
+        CRAM_OPT_SHARED_REF,
+        CRAM_OPT_NTHREADS,
+        CRAM_OPT_THREAD_POOL,
+        CRAM_OPT_USE_LZMA,
+        CRAM_OPT_USE_RANS,
+        CRAM_OPT_REQUIRED_FIELDS,
+        HTS_OPT_COMPRESSION_LEVEL,
+        HTS_OPT_NTHREADS,
+
      ctypedef struct htsVersion:
          short major, minor
  
@@ -519,7 +528,7 @@ cdef extern from "htslib/hts.h" nogil:
      # @param opt The CRAM_OPT_* option.
      # @param ... Optional arguments, dependent on the option used.
      # @return    0 for success, or negative if an error occurred.
-    #int hts_set_opt(htsFile *fp, hts_fmt_option opt, ...)
+    int hts_set_opt(htsFile *fp, hts_fmt_option opt, ...)
  
      int hts_getline(htsFile *fp, int delimiter, kstring_t *str)
      char **hts_readlines(const char *fn, int *_n)
diff --git a/pysam/ctabix.pxd b/pysam/ctabix.pxd

index 39eed7755ac80b0b3101574b46a7a1f9f210de18..028090efc6baf79980ced8b353d927a02cd44d81 100644 (file)
--- a/pysam/ctabix.pxd
+++ b/pysam/ctabix.pxd
@@ -14,7 +14,7 @@ cdef extern from "unistd.h" nogil:
      int close(int fd)
  
  from pysam.chtslib cimport hts_idx_t, hts_itr_t, htsFile, \
-    gzFile, tbx_t, kstring_t
+    tbx_t, kstring_t, BGZF
  
  # These functions are put here and not in chtslib.pxd in order
  # to avoid warnings for unused functions.
@@ -29,13 +29,10 @@ cdef extern from "pysam_stream.h" nogil:
          kstring_t seq
          kstring_t qual
  
-    gzFile gzopen(char *, char *)
-    kseq_t *kseq_init(gzFile)
+    kseq_t *kseq_init(BGZF *)
      int kseq_read(kseq_t *)
      void kseq_destroy(kseq_t *)
-    int gzclose(gzFile)
-
-    kstream_t *ks_init(gzFile)
+    kstream_t *ks_init(BGZF *)
      void ks_destroy(kstream_t *)
  
      # Retrieve characters from stream until delimiter
@@ -47,7 +44,7 @@ cdef extern from "pysam_stream.h" nogil:
  
  
  cdef class tabix_file_iterator:
-    cdef gzFile fh
+    cdef BGZF * fh
      cdef kstream_t * kstream
      cdef kstring_t buffer
      cdef size_t size
@@ -104,7 +101,7 @@ cdef class TabixIteratorParsed(TabixIterator):
  
  cdef class GZIterator:
      cdef object _filename
-    cdef gzFile gzipfile
+    cdef BGZF * gzipfile
      cdef kstream_t * kstream
      cdef kstring_t buffer
      cdef int __cnext__(self)
diff --git a/pysam/ctabix.pyx b/pysam/ctabix.pyx

index 0bb1284a391e6808fdf0398f5ae8646befe2c278..a23fa87574235b8352fb90fad529f6b97bd6a45c 100644 (file)
--- a/pysam/ctabix.pyx
+++ b/pysam/ctabix.pyx
@@ -69,10 +69,10 @@ from cpython.version cimport PY_MAJOR_VERSION
  cimport pysam.ctabixproxies as ctabixproxies
  
  from pysam.chtslib cimport htsFile, hts_open, hts_close, HTS_IDX_START,\
-    BGZF, bgzf_open, bgzf_close, bgzf_write, gzFile, \
+    BGZF, bgzf_open, bgzf_dopen, bgzf_close, bgzf_write, \
      tbx_index_build, tbx_index_load, tbx_itr_queryi, tbx_itr_querys, \
      tbx_conf_t, tbx_seqnames, tbx_itr_next, tbx_itr_destroy, \
-    tbx_destroy, gzopen, gzclose, gzerror, gzdopen, hisremote
+    tbx_destroy, hisremote
  
  from pysam.cutils cimport force_bytes, force_str, charptr_to_str
  from pysam.cutils cimport encode_filename, from_string_and_size
@@ -532,7 +532,7 @@ cdef class TabixFile:
              cdef int x
              result = []
              for x from 0 <= x < nsequences:
-                result.append(sequences[x])
+                result.append(force_str(sequences[x]))
              
              # htslib instructions:
              # only free container, not the sequences themselves
@@ -681,7 +681,7 @@ cdef class GZIterator:
          filename = encode_filename(filename)
          cdef char *cfilename = filename
          with nogil:
-            self.gzipfile = gzopen(cfilename, "r")
+            self.gzipfile = bgzf_open(cfilename, "r")
          self._filename = filename
          self.kstream = ks_init(self.gzipfile)
          self.encoding = encoding
@@ -693,11 +693,12 @@ cdef class GZIterator:
      def __dealloc__(self):
          '''close file.'''
          if self.gzipfile != NULL:
-            gzclose(self.gzipfile)
+            bgzf_close(self.gzipfile)
              self.gzipfile = NULL
          if self.buffer.s != NULL:
              free(self.buffer.s)
-        ks_destroy(self.kstream)
+        if self.kstream != NULL:
+            ks_destroy(self.kstream)
  
      def __iter__(self):
          return self
@@ -1003,10 +1004,10 @@ def tabix_index( filename,
  #########################################################
  ## Iterators for parsing through unindexed files.
  #########################################################
-cdef buildGzipError(void *gzfp):
-    cdef int errnum = 0
-    cdef char *s = gzerror(gzfp, &errnum)
-    return "error (%d): %s (%d: %s)" % (errno, strerror(errno), errnum, s)
+# cdef buildGzipError(void *gzfp):
+#     cdef int errnum = 0
+#     cdef char *s = gzerror(gzfp, &errnum)
+#     return "error (%d): %s (%d: %s)" % (errno, strerror(errno), errnum, s)
  
  
  cdef class tabix_file_iterator:
@@ -1034,7 +1035,7 @@ cdef class tabix_file_iterator:
          # in this case gzread will directly read from the file without decompression. 
          # When reading, this will be detected automatically by looking 
          # for the magic two-byte gzip header. 
-        self.fh = gzdopen(self.duplicated_fd, 'r')
+        self.fh = bgzf_dopen(self.duplicated_fd, 'r')
  
          if self.fh == NULL: 
              raise IOError('%s' % strerror(errno))
@@ -1076,14 +1077,14 @@ cdef class tabix_file_iterator:
              # gzgets terminates at \n, no need to test
  
              # parser creates a copy
-            return self.parser.parse( b, self.buffer.l)
+            return self.parser.parse(b, self.buffer.l)
  
          raise StopIteration
  
      def __dealloc__(self):
          free(self.buffer.s)
          ks_destroy(self.kstream)
-        gzclose(self.fh)
+        bgzf_close(self.fh)
          
      def __next__(self):
          return self.__cnext__()
diff --git a/pysam/ctabixproxies.pyx b/pysam/ctabixproxies.pyx

index d72f0821a5d4e4308fe3d8c2379b9f785ac7565f..f5288cc61ade00df81e802525198ba1c37fe997a 100644 (file)
--- a/pysam/ctabixproxies.pyx
+++ b/pysam/ctabixproxies.pyx
@@ -8,6 +8,8 @@ from libc.stdlib cimport atoi, atol, atof
  from pysam.cutils cimport force_bytes, force_str, charptr_to_str
  from pysam.cutils cimport encode_filename, from_string_and_size
  
+import collections
+
  cdef char *StrOrEmpty(char * buffer):
       if buffer == NULL:
           return ""
@@ -88,7 +90,8 @@ cdef class TupleProxy:
          elif op == 3:  # != operator
              return self.compare(other) != 0
          else:
-            return NotImplemented
+            err_msg = "op {0} isn't implemented yet".format(op)
+            raise NotImplementedError(err_msg)
  
      cdef take(self, char * buffer, size_t nbytes):
          '''start presenting buffer.
@@ -390,6 +393,8 @@ cdef class GTFProxy(TupleProxy):
          def __get__(self):
              return self._getindex(1)
          def __set__(self, value):
+            if value is None:
+                value = "."
              self._setindex(1, value)
  
      property feature:
@@ -397,6 +402,8 @@ cdef class GTFProxy(TupleProxy):
          def __get__(self):
              return self._getindex(2)
          def __set__(self, value):
+            if value is None:
+                value = "."
              self._setindex(2, value)
  
      property start:
@@ -423,29 +430,40 @@ cdef class GTFProxy(TupleProxy):
                  return float(v)
  
          def __set__(self, value):
-            self._setindex(5, value)
+            if value is None:
+                value = "."
+            self._setindex(5, str(value))
  
      property strand:
          '''feature strand.'''
-        def __get__(self ):
-            return self._getindex(6)
+        def __get__(self):
+           return self._getindex(6)
          def __set__(self, value ):
+            if value is None:
+                value = "."
              self._setindex(6, value)
  
      property frame:
         '''feature frame.'''
         def __get__(self):
-           return self._getindex(7)
+            v = self._getindex(7)
+            if v == "" or v[0] == '.':
+                return v
+            else:
+                return int(v)
+
         def __set__(self, value):
-           self._setindex(7, value)
+            if value is None:
+                value = "."
+            self._setindex(7, str(value))
  
      property attributes:
          '''feature attributes (as a string).'''
          def __get__(self): 
              if self.hasOwnAttributes:
-                return self._attributes
+                return force_str(self._attributes)
              else:
-                return self._getindex(8)
+                return force_str(self._getindex(8))
          def __set__( self, value): 
              if self.hasOwnAttributes:
                  free(self._attributes)
@@ -481,7 +499,7 @@ cdef class GTFProxy(TupleProxy):
          # Remove white space to prevent a last empty field.
          fields = [x.strip() for x in attributes.strip().split("; ")]
          
-        result = {}
+        result = collections.OrderedDict()
  
          for f in fields:
  
@@ -529,7 +547,7 @@ cdef class GTFProxy(TupleProxy):
              else:
                  aa.append( '%s %s' % (k,str(v)) )
  
-        a = "; ".join( aa ) + ";"
+        a = force_bytes("; ".join(aa) + ";")
          p = a
          l = len(a)
          self._attributes = <char *>calloc(l + 1, sizeof(char))
@@ -552,9 +570,9 @@ cdef class GTFProxy(TupleProxy):
                   str(self.start+1),
                   str(self.end),
                   toDot(self.score),
-                 self.strand,
-                 self.frame,
-                 self.attributes ) )
+                 toDot(self.strand),
+                 toDot(self.frame),
+                 self.attributes))
          else: 
              return TupleProxy.__str__(self)
  
@@ -638,6 +656,26 @@ cdef class GTFProxy(TupleProxy):
          r[name] = value
          self.fromDict(r)
  
+    def __cmp__(self, other):
+        return (self.contig, self.strand, self.start) < \
+            (other.contig, other.strand, other.start)
+
+    # python 3 compatibility
+    def __richcmp__(GTFProxy self, GTFProxy other, int op):
+        if op == 0:
+            return (self.contig, self.strand, self.start) < \
+                (other.contig, other.strand, other.start)
+        elif op == 1:
+            return (self.contig, self.strand, self.start) <= \
+                (other.contig, other.strand, other.start)
+        elif op == 2:
+            return self.compare(other) == 0
+        elif op == 3:
+            return self.compare(other) != 0
+        else:
+            err_msg = "op {0} isn't implemented yet".format(op)
+            raise NotImplementedError(err_msg)
+
  
  cdef class NamedTupleProxy(TupleProxy):
  
@@ -705,8 +743,8 @@ cdef class BedProxy(NamedTupleProxy):
  
          # do automatic conversion
          self.contig = self.fields[0]
-        self.start = atoi( self.fields[1] ) 
-        self.end = atoi( self.fields[2] )
+        self.start = atoi(self.fields[1]) 
+        self.end = atoi(self.fields[2])
  
      # __setattr__ in base class seems to take precedence
      # hence implement setters in __setattr__
diff --git a/pysam/cutils.pxd b/pysam/cutils.pxd

index 36fe5549facaae9d8d2a39930b5c39431a378f6e..81e544a306d6d443d1348d5ad967bf9383ab5d43 100644 (file)
--- a/pysam/cutils.pxd
+++ b/pysam/cutils.pxd
@@ -32,4 +32,7 @@ cdef extern from "pysam_util.h":
      int bcftools_main(int argc, char *argv[])
      void pysam_set_stderr(int fd)
      void pysam_unset_stderr()
+    void pysam_set_stdout(int fd)
+    void pysam_set_stdout_fn(const char *)
+    void pysam_unset_stdout()
      void set_optind(int)
diff --git a/pysam/cutils.pyx b/pysam/cutils.pyx

index 482db894e44fe315dded63a760add75d59893741..751072713ad3aae32e39ad6434d04f0fc3ab5d8d 100644 (file)
--- a/pysam/cutils.pyx
+++ b/pysam/cutils.pyx
@@ -14,6 +14,7 @@ from libc.stdlib cimport calloc, free
  from libc.string cimport strncpy
  from libc.stdio cimport fprintf, stderr, fflush
  from libc.stdio cimport stdout as c_stdout
+from posix.fcntl cimport open as c_open, O_WRONLY
  
  #####################################################################
  # hard-coded constants
@@ -227,129 +228,75 @@ cpdef parse_region(reference=None,
      return force_bytes(reference), rstart, rend
  
  
-@contextmanager
-def stdout_redirector(to=os.devnull):
-    '''
-    import os
-
-    with stdout_redirected(to=filename):
-        print("from Python")
-        os.system("echo non-Python applications are also supported")
-
-    see http://stackoverflow.com/questions/5081657/how-do-i-prevent-a-c-shared-library-to-print-on-stdout-in-python/17954769#17954769
-    '''
-    fd = sys.stdout.fileno()
-
-    def _redirect_stdout(to):
-        # flush C-level stdout
-        try:
-            fflush(c_stdout)
-            sys.stdout.close()
-        except (OSError, IOError):
-            # some tools close stdout
-            # Py3: OSError
-            # Py2: IOError
-            pass
-
-        # fd writes to 'to' file
-        os.dup2(to.fileno(), fd)
-        # Python writes to fd
-        if IS_PYTHON3:
-            sys.stdout = io.TextIOWrapper(
-                os.fdopen(fd, 'wb'))
-        else:
-            sys.stdout = os.fdopen(fd, 'w')
-        
-    with os.fdopen(os.dup(fd), 'w') as old_stdout:
-        _redirect_stdout(to)
-        try:
-            yield # allow code to be run with the redirected stdout
-        finally:
-            _redirect_stdout(old_stdout)
-            # restore stdout.
-            # buffering and flags may be different
-
-# def stdout_redirector(stream):
-#     """
-#     See discussion in:
-
-#     http://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
-#     """
-
-#     # The original fd stdout points to. Usually 1 on POSIX systems.
-#     original_stdout_fd = sys.stdout.fileno()
-#     print ("original_fd=", original_stdout_fd)
-#     def _redirect_stdout(to_fd):
-#         """Redirect stdout to the given file descriptor."""
-#         # Flush the C-level buffer stdout
-#         fflush(c_stdout)
-#         # Flush and close sys.stdout - also closes the file descriptor
-#         # (fd)
-#         sys.stdout.close()
-#         # Make original_stdout_fd point to the same file as to_fd
-#         os.dup2(to_fd, original_stdout_fd)
-#         # Create a new sys.stdout that points to the redirected fd
-#         if IS_PYTHON3:
-#             sys.stdout = io.TextIOWrapper(
-#                 os.fdopen(original_stdout_fd, 'wb'))
-
-#     # Save a copy of the original stdout fd in saved_stdout_fd
-#     saved_stdout_fd = os.dup(original_stdout_fd)
-#     try:
-#         # Create a temporary file and redirect stdout to it
-#         tfile = tempfile.TemporaryFile(mode='w+b')
-#         _redirect_stdout(tfile.fileno())
-#         # Yield to caller, then redirect stdout back to the saved fd
-#         yield
-#         _redirect_stdout(saved_stdout_fd)
-#         # Copy contents of temporary file to the given stream
-#         tfile.flush()
-#         tfile.seek(0, io.SEEK_SET)
-#         stream.write(tfile.read())
-#     finally:
-#         tfile.close()
-#         os.close(saved_stdout_fd)
-
-
  def _pysam_dispatch(collection,
                      method,
-                    args=(),
-                    catch_stdout=True):
+                    args=None,
+                    catch_stdout=True,
+                    save_stdout=None):
      '''call ``method`` in samtools/bcftools providing arguments in args.
      
-    .. note:: 
-       This method redirects stdout to capture it 
-       from samtools. If for some reason stdout disappears
-       the reason might be in this method.
-
-    .. note::
-       This method captures stdout and stderr using temporary files,
-       which are then read into memory in their entirety. This method
-       is slow and might cause large memory overhead.
-
-    Catching of stdout can be turned of by setting *catch_stdout* to
+    Catching of stdout can be turned off by setting *catch_stdout* to
      False.
  
-    See http://bytes.com/topic/c/answers/487231-how-capture-stdout-temporarily
-    on the topic of redirecting stderr/stdout.
-
      '''
  
-    # note that debugging this module can be a problem
-    # as stdout/stderr will not appear on the terminal
-    # some special cases
      if method == "index":
          if not os.path.exists(args[0]):
              raise IOError("No such file or directory: '%s'" % args[0])
+            
+    if args is None:
+        args = []
+    else:
+        args = list(args)
  
-    # redirect stderr and stdout to file
+    # redirect stderr to file
      stderr_h, stderr_f = tempfile.mkstemp()
      pysam_set_stderr(stderr_h)
  
+    # redirect stdout to file
+    if save_stdout:
+        stdout_f = save_stdout
+        stdout_h = c_open(force_bytes(stdout_f),
+                          O_WRONLY)
+        if stdout_h == -1:
+            raise OSError("error while opening {} for writing".format(stdout_f))
+
+        pysam_set_stdout_fn(force_bytes(stdout_f))
+        pysam_set_stdout(stdout_h)
+    elif catch_stdout:
+        stdout_h, stdout_f = tempfile.mkstemp()
+
+        MAP_STDOUT_OPTIONS = {
+            "samtools": {
+                "view": "-o {}",
+                "mpileup": "-o {}",
+                "depad": "-o {}",
+                "calmd": "",  # uses pysam_stdout_fn
+            },
+            "bcftools": {}
+        }
+
+        stdout_option = None
+        if collection == "bcftools":
+            # in bcftools, most methods accept -o, the exceptions
+            # are below:
+            if method not in ("index", "roh", "stats"):
+                stdout_option = "-o {}"
+        elif method in MAP_STDOUT_OPTIONS[collection]:
+            stdout_option = MAP_STDOUT_OPTIONS[collection][method]
+
+        if stdout_option is not None:
+            os.close(stdout_h)
+            pysam_set_stdout_fn(force_bytes(stdout_f))
+            args.extend(stdout_option.format(stdout_f).split(" "))
+        else:
+            pysam_set_stdout(stdout_h)
+    else:
+        pysam_set_stdout_fn("-")
+
      # setup the function call to samtools/bcftools main
      cdef char ** cargs
      cdef int i, n, retval, l
-
      n = len(args)
      method = force_bytes(method)
      collection = force_bytes(collection)
@@ -381,41 +328,40 @@ def _pysam_dispatch(collection,
          set_optind(0)
  
      # call samtools/bcftools
-    if catch_stdout:
-        with tempfile.TemporaryFile(mode='w+b') as tfile:
-            with stdout_redirector(tfile):
-                if collection == b"samtools":
-                    retval = samtools_main(n + 2, cargs)
-                elif collection == b"bcftools":
-                    retval = bcftools_main(n + 2, cargs)
-            tfile.flush()
-            tfile.seek(0)
-            # do not force str, as output might be binary,
-            # for example BAM, VCF.gz, etc.
-            out_stdout = tfile.read()
-    else:
-        if collection == b"samtools":
-            retval = samtools_main(n + 2, cargs)
-        elif collection == b"bcftools":
-            retval = bcftools_main(n + 2, cargs)
-        out_stdout = None
+    if collection == b"samtools":
+        retval = samtools_main(n + 2, cargs)
+    elif collection == b"bcftools":
+        retval = bcftools_main(n + 2, cargs)
  
      for i from 0 <= i < n:
          free(cargs[i + 2])
      free(cargs)
  
      # get error messages
+    def _collect(fn):
+        out = []
+        try:
+            with open(fn, "r") as inf:
+                out = inf.read()
+        except UnicodeDecodeError:
+            with open(fn, "rb") as inf:
+                # read binary output
+                out = inf.read()
+        finally:
+            os.remove(fn)
+        return out
+
      pysam_unset_stderr()
-    out_stderr = []
-    try:
-        with open(stderr_f, "r") as inf:
-            out_stderr = inf.readlines()
-    except UnicodeDecodeError:
-        with open( stderr_f, "rb") as inf:
-            # read binary output
-            out_stderr = inf.read()
-    finally:
-        os.remove(stderr_f)
+    out_stderr = _collect(stderr_f)
+
+    if save_stdout:
+        pysam_unset_stdout()
+        out_stdout = None
+    elif catch_stdout:
+        pysam_unset_stdout()
+        out_stdout = _collect(stdout_f)
+    else:
+        out_stdout = None
  
      return retval, out_stderr, out_stdout
  
diff --git a/pysam/cvcf.pyx b/pysam/cvcf.pyx

index 83d3663def52f73250ed160e5474d6120351be39..5e2fda2f2bdbaa83ea365a7955641e70c98f10ca 100644 (file)
--- a/pysam/cvcf.pyx
+++ b/pysam/cvcf.pyx
@@ -114,6 +114,7 @@ cdef class VCFRecord( ctabixproxies.TupleProxy):
      def __init__(self, vcf):
          self.vcf = vcf
          self.encoding = vcf.encoding
+
          # if len(data) != len(self.vcf._samples):
          #     self.vcf.error(str(data),
          #                self.BAD_NUMBER_OF_COLUMNS,
@@ -133,7 +134,7 @@ cdef class VCFRecord( ctabixproxies.TupleProxy):
      def error(self, line, error, opt=None):
          '''raise error.'''
          # pass to vcf file for error handling
-        return self.vcf.error( line, error, opt )
+        return self.vcf.error(line, error, opt)
  
      cdef update(self, char * buffer, size_t nbytes):
          '''update internal data.
@@ -349,6 +350,7 @@ class VCF(object):
          if leftalign: self._leftalign = leftalign
          self._lines = lines
          self.encoding = "ascii"
+        self.tabixfile = None
  
      def error(self,line,error,opt=None):
          if error in self._ignored_errors: return
@@ -1047,6 +1049,15 @@ class VCF(object):
          self.tabixfile = pysam.Tabixfile(filename, encoding=encoding)
          self._parse_header(self.tabixfile.header)
  
+    def __del__(self):
+        self.close()
+        self.tabixfile = None
+
+    def close(self):
+        if self.tabixfile:
+            self.tabixfile.close()
+            self.tabixfile = None
+
      def fetch(self,
                reference=None,
                start=None,
diff --git a/pysam/pysam_stream.h b/pysam/pysam_stream.h

index 3e93e2985f4fdb50f35548213e7da417111a0e1d..3a4eb16cfa06af29a7c6680fa642b886ed4230be 100644 (file)
--- a/pysam/pysam_stream.h
+++ b/pysam/pysam_stream.h
@@ -5,7 +5,8 @@
  
  // #######################################################
  // fastq parsing
-KSEQ_INIT(gzFile, gzread)
+// KSEQ_INIT(gzFile, gzread)
+KSEQ_INIT(BGZF *, bgzf_read)
  
  //KSTREAM_INIT( gzFile, gzread, 16384)
  
diff --git a/pysam/pysam_util.c b/pysam/pysam_util.c

index e669e1dac69cddbb8eef6638fd2457b9668e550f..94717c846371b45c7a82b21ba3c418c689a3b744 100644 (file)
--- a/pysam/pysam_util.c
+++ b/pysam/pysam_util.c
@@ -1,6 +1,7 @@
  #include <ctype.h>
  #include <assert.h>
  #include <unistd.h>
+#include <stdio.h>
  #include "bam.h"
  #include "bam_endian.h"
  #include "htslib/khash.h"
@@ -8,23 +9,52 @@
  #include "htslib/knetfile.h"
  #include "pysam_util.h"
  
-// Definition of pysamerr
-#include "stdio.h"
-FILE * pysamerr = NULL;
+
+FILE * pysam_stderr = NULL;
+FILE * pysam_stdout = NULL;
+const char * pysam_stdout_fn = NULL;
+int PYSAM_STDOUT_FILENO = STDOUT_FILENO;
+
  
  FILE * pysam_set_stderr(int fd)
  {
-  if (pysamerr != NULL)
-    fclose(pysamerr);
-  pysamerr = fdopen(fd, "w");
-  return pysamerr;
+  if (pysam_stderr != NULL)
+    fclose(pysam_stderr);
+  pysam_stderr = fdopen(fd, "w");
+  return pysam_stderr;
  }
  
  void pysam_unset_stderr(void)
  {
-  if (pysamerr != NULL)
-    fclose(pysamerr);
-  pysamerr = fopen("/dev/null", "w");
+  if (pysam_stderr != NULL)
+    fclose(pysam_stderr);
+  pysam_stderr = fopen("/dev/null", "w");
+}
+
+FILE * pysam_set_stdout(int fd)
+{
+  if (pysam_stdout != NULL)
+    fclose(pysam_stdout);
+  pysam_stdout = fdopen(fd, "w");
+  if (pysam_stdout == NULL)
+    {
+      fprintf(pysam_stderr, "could not set stdout to fd %i", fd);
+    }
+  PYSAM_STDOUT_FILENO = fd;
+  return pysam_stdout;
+}
+
+void pysam_set_stdout_fn(const char *fn)
+{
+  pysam_stdout_fn = fn;
+}
+
+void pysam_unset_stdout(void)
+{
+  if (pysam_stdout != NULL)
+    fclose(pysam_stdout);
+  pysam_stdout = fopen("/dev/null", "w");
+  PYSAM_STDOUT_FILENO = STDOUT_FILENO;
  }
  
  void set_optind(int val)
diff --git a/pysam/pysam_util.h b/pysam/pysam_util.h

index 5f2359f97a5f0caa007133c8732c97ac405d97d5..a30808f6c8bac7c77ad603de1acab8a4fc02b490 100644 (file)
--- a/pysam/pysam_util.h
+++ b/pysam/pysam_util.h
@@ -1,20 +1,35 @@
  #ifndef PYSAM_UTIL_H
  #define PYSAM_UTIL_H
  
-//////////////////////////////////////////////////////////////////
  /*! set pysam standard error to point to file descriptor
  
    Setting the stderr will close the previous stderr.
   */
  FILE * pysam_set_stderr(int fd);
  
-//////////////////////////////////////////////////////////////////
+/*! set pysam standard output to point to file descriptor
+
+  Setting the stderr will close the previous stdout.
+ */
+FILE * pysam_set_stdout(int fd);
+
+/*! set pysam standard output to point to filename
+
+ */
+void pysam_set_stdout_fn(const char * fn);
+
  /*! set pysam standard error to /dev/null.
    
    Unsetting the stderr will close the previous stderr.
   */
  void pysam_unset_stderr(void);
  
+/*! set pysam standard error to /dev/null.
+  
+  Unsetting the stderr will close the previous stderr.
+ */
+void pysam_unset_stdout(void);
+
  int pysam_dispatch(int argc, char *argv[]);
  
  void set_optind(int);
diff --git a/pysam/tabix_util.c b/pysam/tabix_util.c

index f94b09d4a741abe2eb3d7672a1f8d6e0434825d6..bff140e6b0591dd91d736d08b87cea5486278405 100644 (file)
--- a/pysam/tabix_util.c
+++ b/pysam/tabix_util.c
@@ -1,8 +1,6 @@
-// Definition of pysamerr
  #include <stdio.h>
  #include <unistd.h>
  #include <stdlib.h>
-FILE * pysamerr = NULL;
  
  #if !(_POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE >= 700)
  /*
diff --git a/pysam/utils.py b/pysam/utils.py

index 0e49d54def888a7e392bac58597bea2a387273de..c5bb5393b213cece56392749bf821856c25dc11d 100644 (file)
--- a/pysam/utils.py
+++ b/pysam/utils.py
@@ -46,14 +46,24 @@ class PysamDispatcher(object):
          '''execute a samtools command.
  
          Keyword arguments:
-        catch_stdout -- redirect stdout from the samtools command and return as variable (default True)
+        catch_stdout -- redirect stdout from the samtools command and
+            return as variable (default True)
+        save_stdout -- redirect stdout to a filename.
          raw -- ignore any parsers associated with this samtools command.
+        split_lines -- return stdout (if catch_stdout is True and stderr
+                       as a list of strings.
          '''
          retval, stderr, stdout = _pysam_dispatch(
              self.collection,
              self.dispatch,
              args,
-            catch_stdout=kwargs.get("catch_stdout", True))
+            catch_stdout=kwargs.get("catch_stdout", True),
+            save_stdout=kwargs.get("save_stdout", None))
+
+        if kwargs.get("split_lines", False):
+            stdout = stdout.splitlines()
+            if stderr:
+                stderr = stderr.splitlines()
  
          if retval:
              raise SamtoolsError(
@@ -61,8 +71,8 @@ class PysamDispatcher(object):
                  "stdout=%s, stderr=%s" %
                  (self.collection,
                   retval, 
-                 "\n".join(stdout),
-                 "\n".join(stderr)))
+                 stdout,
+                 stderr))
  
          self.stderr = stderr
  
@@ -84,5 +94,5 @@ class PysamDispatcher(object):
          '''return the samtools usage information for this command'''
          retval, stderr, stdout = csamtools._samtools_dispatch(
              self.dispatch)
-        return "".join(stderr)
+        return stderr
  
diff --git a/pysam/version.py b/pysam/version.py

index 815e4b9d89c4d21ea8de45d802d47d16c5784f0d..15cefc49fe18d3cec8b2c86c852e8b9e44d0405f 100644 (file)
--- a/pysam/version.py
+++ b/pysam/version.py
@@ -1,7 +1,7 @@
  # pysam versioning information
  
-__version__ = "0.9.0"
+__version__ = "0.9.1"
  
-__samtools_version__ = "1.3"
+__samtools_version__ = "1.3.1"
  
-__htslib_version__ = "1.3"
+__htslib_version__ = "1.3.1"
diff --git a/run_tests_travis.sh b/run_tests_travis.sh

index d2d9988b499449913ab17771ca7011f233bc7e6c..f1fcdcece7958b57288d41546cacb6dbb744c2c6 100755 (executable)
--- a/run_tests_travis.sh
+++ b/run_tests_travis.sh
@@ -34,21 +34,21 @@ mkdir -p $WORKDIR/external-tools
  
  # install htslib
  cd $WORKDIR/external-tools
-curl -L https://github.com/samtools/htslib/releases/download/1.3/htslib-1.3.tar.bz2 > htslib-1.3.tar.bz2
-tar xjvf htslib-1.3.tar.bz2
-cd htslib-1.3
+curl -L https://github.com/samtools/htslib/releases/download/1.3.1/htslib-1.3.1.tar.bz2 > htslib-1.3.1.tar.bz2
+tar xjvf htslib-1.3.1.tar.bz2
+cd htslib-1.3.1
  make
-PATH=$PATH:$WORKDIR/external-tools/htslib-1.3
-LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$WORKDIR/external-tools/htslib-1.3
+PATH=$PATH:$WORKDIR/external-tools/htslib-1.3.1
+LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$WORKDIR/external-tools/htslib-1.3.1
  
  # install samtools, compile against htslib
  cd $WORKDIR/external-tools
-curl -L http://downloads.sourceforge.net/project/samtools/samtools/1.3/samtools-1.3.tar.bz2 > samtools-1.3.tar.bz2
-tar xjvf samtools-1.3.tar.bz2
-cd samtools-1.3
-./configure --with-htslib=../htslib-1.3
+curl -L http://downloads.sourceforge.net/project/samtools/samtools/1.3.1/samtools-1.3.1.tar.bz2 > samtools-1.3.1.tar.bz2
+tar xjvf samtools-1.3.1.tar.bz2
+cd samtools-1.3.1
+./configure --with-htslib=../htslib-1.3.1
  make
-PATH=$PATH:$WORKDIR/external-tools/samtools-1.3
+PATH=$PATH:$WORKDIR/external-tools/samtools-1.3.1
  
  echo "installed samtools"
  samtools --version
@@ -59,12 +59,12 @@ fi
  
  # install bcftools
  cd $WORKDIR/external-tools
-curl -L https://github.com/samtools/bcftools/releases/download/1.3/bcftools-1.3.tar.bz2 > bcftools-1.3.tar.bz2
-tar xjf bcftools-1.3.tar.bz2
-cd bcftools-1.3
-./configure --with-htslib=../htslib-1.3
+curl -L https://github.com/samtools/bcftools/releases/download/1.3.1/bcftools-1.3.1.tar.bz2 > bcftools-1.3.1.tar.bz2
+tar xjf bcftools-1.3.1.tar.bz2
+cd bcftools-1.3.1
+./configure --with-htslib=../htslib-1.3.1
  make
-PATH=$PATH:$WORKDIR/external-tools/bcftools-1.3
+PATH=$PATH:$WORKDIR/external-tools/bcftools-1.3.1
  
  echo "installed bcftools"
  bcftools --version
diff --git a/samtools/bam.c b/samtools/bam.c

index afab6683975bc6f4d1d1d74f1f08b02a4640e4e4..4965e2445cd6721fde7503aa9c1e09216dfe5386 100644 (file)
--- a/samtools/bam.c
+++ b/samtools/bam.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <ctype.h>
  #include <errno.h>
@@ -34,15 +36,22 @@ char *bam_format1(const bam_header_t *header, const bam1_t *b)
  {
      kstring_t str;
      str.l = str.m = 0; str.s = NULL;
-    sam_format1(header, b, &str);
+    if (sam_format1(header, b, &str) < 0) {
+        free(str.s);
+        str.s = NULL;
+        return NULL;
+    }
      return str.s;
  }
  
-void bam_view1(const bam_header_t *header, const bam1_t *b)
+int bam_view1(const bam_header_t *header, const bam1_t *b)
  {
      char *s = bam_format1(header, b);
-    puts(s);
+    int ret = -1;
+    if (!s) return -1;
+    if (puts(s) != EOF) ret = 0;
      free(s);
+    return ret;
  }
  
  int bam_validate1(const bam_header_t *header, const bam1_t *b)
@@ -103,6 +112,9 @@ const char *bam_get_library(bam_header_t *h, const bam1_t *b)
              last = *cp++;
          }
  
+        if (!ID || !LB)
+            continue;
+
          // Check it's the correct ID
          if (strncmp(rg, ID, strlen(rg)) != 0 || ID[strlen(rg)] != '\t')
              continue;
diff --git a/samtools/bam.c.pysam.c b/samtools/bam.c.pysam.c

index a9da5b9ea00c10ac62f0098f4bd1d2d487687b29..188fe8c189524ed1844f360ba6e497416c5272e3 100644 (file)
--- a/samtools/bam.c.pysam.c
+++ b/samtools/bam.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <ctype.h>
  #include <errno.h>
@@ -36,15 +38,22 @@ char *bam_format1(const bam_header_t *header, const bam1_t *b)
  {
      kstring_t str;
      str.l = str.m = 0; str.s = NULL;
-    sam_format1(header, b, &str);
+    if (sam_format1(header, b, &str) < 0) {
+        free(str.s);
+        str.s = NULL;
+        return NULL;
+    }
      return str.s;
  }
  
-void bam_view1(const bam_header_t *header, const bam1_t *b)
+int bam_view1(const bam_header_t *header, const bam1_t *b)
  {
      char *s = bam_format1(header, b);
-    puts(s);
+    int ret = -1;
+    if (!s) return -1;
+    if (fputs(s, pysam_stdout) & fputc('\n', pysam_stdout) != EOF) ret = 0;
      free(s);
+    return ret;
  }
  
  int bam_validate1(const bam_header_t *header, const bam1_t *b)
@@ -105,6 +114,9 @@ const char *bam_get_library(bam_header_t *h, const bam1_t *b)
              last = *cp++;
          }
  
+        if (!ID || !LB)
+            continue;
+
          // Check it's the correct ID
          if (strncmp(rg, ID, strlen(rg)) != 0 || ID[strlen(rg)] != '\t')
              continue;
diff --git a/samtools/bam.h b/samtools/bam.h

index 57aa04499454fd0e8f1afcdebcefbcd13fe04854..e928ce43631c43bcc8183e2a145bf189eb7a97e4 100644 (file)
--- a/samtools/bam.h
+++ b/samtools/bam.h
@@ -38,7 +38,7 @@ DEALINGS IN THE SOFTWARE.  */
    @copyright Genome Research Ltd.
   */
  
-#define BAM_VERSION "1.3"
+#define BAM_VERSION "1.3.1"
  
  #include <stdint.h>
  #include <stdlib.h>
@@ -322,8 +322,11 @@ extern "C" {
       */
      char *bam_format1(const bam_header_t *header, const bam1_t *b);
  
-    /*! @abstract     Formats a BAM record and writes it and \n to stdout */
-    void bam_view1(const bam_header_t *header, const bam1_t *b);
+    /*!
+      @abstract     Formats a BAM record and writes it and \n to stdout
+      @return       0 if successful, -1 on error
+    */
+    int bam_view1(const bam_header_t *header, const bam1_t *b);
  
      /*!
        @abstract       Check whether a BAM record is plausibly valid
diff --git a/samtools/bam2bcf.c b/samtools/bam2bcf.c

index ed433b174b4f419de3d6510d1eee912212725e56..85ce30788bd0e79897c32f5e25d21d16293cd4ee 100644 (file)
--- a/samtools/bam2bcf.c
+++ b/samtools/bam2bcf.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <math.h>
  #include <stdint.h>
  #include <assert.h>
diff --git a/samtools/bam2bcf.c.pysam.c b/samtools/bam2bcf.c.pysam.c

index be3876d987bde6b8a2096de7af66a968197d2a76..6938ec08122fdfb0c0d2480212efd95d66186bf0 100644 (file)
--- a/samtools/bam2bcf.c.pysam.c
+++ b/samtools/bam2bcf.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <math.h>
  #include <stdint.h>
  #include <assert.h>
@@ -108,7 +110,7 @@ static int get_position(const bam_pileup1_t *p, int *len)
          if ( cig==BAM_CHARD_CLIP ) continue;
          if ( cig==BAM_CPAD ) continue;
          if ( cig==BAM_CREF_SKIP ) continue;
-        fprintf(pysamerr,"todo: cigar %d\n", cig);
+        fprintf(pysam_stderr,"todo: cigar %d\n", cig);
          assert(0);
      }
      *len = n_tot_bases;
@@ -479,7 +481,7 @@ void calc_SegBias(const bcf_callret1_t *bcr, bcf_call_t *call)
      double sum = 0;
      const double log2 = log(2.0);
  
-    // fprintf(pysamerr,"M=%.1f  p=%e q=%e f=%f  dp=%d\n",M,p,q,f,avg_dp);
+    // fprintf(pysam_stderr,"M=%.1f  p=%e q=%e f=%f  dp=%d\n",M,p,q,f,avg_dp);
      int i;
      for (i=0; i<call->n; i++)
      {
@@ -494,7 +496,7 @@ void calc_SegBias(const bcf_callret1_t *bcr, bcf_call_t *call)
          else
              tmp = log(2*f*(1-f)*exp(-q) + f*f*exp(-2*q) + (1-f)*(1-f)) + p;
          sum += tmp;
-        // fprintf(pysamerr,"oi=%d %e\n", oi,tmp);
+        // fprintf(pysam_stderr,"oi=%d %e\n", oi,tmp);
      }
      call->seg_bias = sum;
  }
@@ -658,7 +660,7 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
              }
          }
  
-//      if (ref_base < 0) fprintf(pysamerr, "%d,%d,%f,%d\n", call->n_alleles, x, sum_min, call->unseen);
+//      if (ref_base < 0) fprintf(pysam_stderr, "%d,%d,%f,%d\n", call->n_alleles, x, sum_min, call->unseen);
          call->shift = (int)(sum_min + .499);
      }
      // combine annotations
diff --git a/samtools/bam2bcf_indel.c b/samtools/bam2bcf_indel.c

index e1c45c4d6b87d6a1a2e3a934276cce205f7f2f37..5b353fc7c4dabc0d758d65210aa6a80ceac43f42 100644 (file)
--- a/samtools/bam2bcf_indel.c
+++ b/samtools/bam2bcf_indel.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <assert.h>
  #include <ctype.h>
  #include <string.h>
diff --git a/samtools/bam2bcf_indel.c.pysam.c b/samtools/bam2bcf_indel.c.pysam.c

index 45e110169e05e8bd3de5eb0e47cbd5034c083258..21cbb039235d7f52e53c13b78e6fb5c06ddbd9be 100644 (file)
--- a/samtools/bam2bcf_indel.c.pysam.c
+++ b/samtools/bam2bcf_indel.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <assert.h>
  #include <ctype.h>
  #include <string.h>
@@ -225,7 +227,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
              free(aux);
              // TODO revisit how/whether to control printing this warning
              if (hts_verbose >= 2)
-                fprintf(pysamerr, "[%s] excessive INDEL alleles at position %d. Skip the position.\n", __func__, pos + 1);
+                fprintf(pysam_stderr, "[%s] excessive INDEL alleles at position %d. Skip the position.\n", __func__, pos + 1);
              return -1;
          }
          types = (int*)calloc(n_types, sizeof(int));
@@ -298,7 +300,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
              if ((double)(max2&0xffff) / ((max2&0xffff) + (max2>>16)) >= 0.7) max2_i = -1;
              if (max_i >= 0) r[max_i] = 15;
              if (max2_i >= 0) r[max2_i] = 15;
-            //for (i = 0; i < right - left; ++i) fputc("=ACMGRSVTWYHKDBN"[(int)r[i]], pysamerr); fputc('\n', pysamerr);
+            //for (i = 0; i < right - left; ++i) fputc("=ACMGRSVTWYHKDBN"[(int)r[i]], pysam_stderr); fputc('\n', pysam_stderr);
          }
          free(ref0); free(cns);
      }
@@ -366,7 +368,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
          else if (types[t] > 0) ir = est_indelreg(pos, ref, types[t], &inscns[t*max_ins]);
          else ir = est_indelreg(pos, ref, -types[t], 0);
          if (ir > bca->indelreg) bca->indelreg = ir;
-//      fprintf(pysamerr, "%d, %d, %d\n", pos, types[t], ir);
+//      fprintf(pysam_stderr, "%d, %d, %d\n", pos, types[t], ir);
          // realignment
          for (s = K = 0; s < n; ++s) {
              // write ref2
@@ -428,11 +430,11 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                  }
  /*
                  for (l = 0; l < tend - tbeg + abs(types[t]); ++l)
-                    fputc("ACGTN"[(int)ref2[tbeg-left+l]], pysamerr);
-                fputc('\n', pysamerr);
-                for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[(int)query[l]], pysamerr);
-                fputc('\n', pysamerr);
-                fprintf(pysamerr, "pos=%d type=%d read=%d:%d name=%s qbeg=%d tbeg=%d score=%d\n", pos, types[t], s, i, bam1_qname(p->b), qbeg, tbeg, sc);
+                    fputc("ACGTN"[(int)ref2[tbeg-left+l]], pysam_stderr);
+                fputc('\n', pysam_stderr);
+                for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[(int)query[l]], pysam_stderr);
+                fputc('\n', pysam_stderr);
+                fprintf(pysam_stderr, "pos=%d type=%d read=%d:%d name=%s qbeg=%d tbeg=%d score=%d\n", pos, types[t], s, i, bam1_qname(p->b), qbeg, tbeg, sc);
  */
              }
          }
@@ -488,7 +490,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                  if (seqQ > 255) seqQ = 255;
                  p->aux = (sc[0]&0x3f)<<16 | seqQ<<8 | indelQ; // use 22 bits in total
                  sumq[sc[0]&0x3f] += indelQ < seqQ? indelQ : seqQ;
-//              fprintf(pysamerr, "pos=%d read=%d:%d name=%s call=%d indelQ=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ, seqQ);
+//              fprintf(pysam_stderr, "pos=%d read=%d:%d name=%s call=%d indelQ=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ, seqQ);
              }
          }
          // determine bca->indel_types[] and bca->inscns
@@ -520,7 +522,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                      if (x == bca->indel_types[j]) break;
                  p->aux = j<<16 | (j == 4? 0 : (p->aux&0xffff));
                  if ((p->aux>>16&0x3f) > 0) ++n_alt;
-                //fprintf(pysamerr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam1_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
+                //fprintf(pysam_stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam1_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
              }
          }
      }
diff --git a/samtools/bam2depth.c b/samtools/bam2depth.c

index f1094477411d2795bb076783c12280f618c3c5ad..21220f15ea4fe0b485ff53ad74c9570331b30afb 100644 (file)
--- a/samtools/bam2depth.c
+++ b/samtools/bam2depth.c
@@ -30,6 +30,8 @@ DEALINGS IN THE SOFTWARE.  */
   *   gcc -g -O2 -Wall -o bam2depth -D_MAIN_BAM2DEPTH bam2depth.c -lhts -lz
   */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <string.h>
  #include <stdio.h>
diff --git a/samtools/bam2depth.c.pysam.c b/samtools/bam2depth.c.pysam.c

index 654994938d4fe7e5522ce80a33fe9e4ebfed109f..9d9dc406ca1cf9b1eb790c95861ee2b49694ff6b 100644 (file)
--- a/samtools/bam2depth.c.pysam.c
+++ b/samtools/bam2depth.c.pysam.c
@@ -32,6 +32,8 @@ DEALINGS IN THE SOFTWARE.  */
   *   gcc -g -O2 -Wall -o bam2depth -D_MAIN_BAM2DEPTH bam2depth.c -lhts -lz
   */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <string.h>
  #include <stdio.h>
@@ -73,26 +75,26 @@ static int read_bam(void *data, bam1_t *b) // read level filters better go here
  int read_file_list(const char *file_list,int *n,char **argv[]);
  
  static int usage() {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "   -a                  output all positions (including zero depth)\n");
-    fprintf(pysamerr, "   -a -a (or -aa)      output absolutely all positions, including unused ref. sequences\n");
-    fprintf(pysamerr, "   -b <bed>            list of positions or regions\n");
-    fprintf(pysamerr, "   -f <list>           list of input BAM filenames, one per line [null]\n");
-    fprintf(pysamerr, "   -l <int>            read length threshold (ignore reads shorter than <int>)\n");
-    fprintf(pysamerr, "   -d/-m <int>         maximum coverage depth [8000]\n");  // the htslib's default
-    fprintf(pysamerr, "   -q <int>            base quality threshold\n");
-    fprintf(pysamerr, "   -Q <int>            mapping quality threshold\n");
-    fprintf(pysamerr, "   -r <chr:from-to>    region\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "   -a                  output all positions (including zero depth)\n");
+    fprintf(pysam_stderr, "   -a -a (or -aa)      output absolutely all positions, including unused ref. sequences\n");
+    fprintf(pysam_stderr, "   -b <bed>            list of positions or regions\n");
+    fprintf(pysam_stderr, "   -f <list>           list of input BAM filenames, one per line [null]\n");
+    fprintf(pysam_stderr, "   -l <int>            read length threshold (ignore reads shorter than <int>)\n");
+    fprintf(pysam_stderr, "   -d/-m <int>         maximum coverage depth [8000]\n");  // the htslib's default
+    fprintf(pysam_stderr, "   -q <int>            base quality threshold\n");
+    fprintf(pysam_stderr, "   -Q <int>            mapping quality threshold\n");
+    fprintf(pysam_stderr, "   -r <chr:from-to>    region\n");
  
-    sam_global_opt_help(pysamerr, "-.--.");
+    sam_global_opt_help(pysam_stderr, "-.--.");
  
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "The output is a simple tab-separated table with three columns: reference name,\n");
-    fprintf(pysamerr, "position, and coverage depth.  Note that positions with zero coverage may be\n");
-    fprintf(pysamerr, "omitted by default; see the -a option.\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "The output is a simple tab-separated table with three columns: reference name,\n");
+    fprintf(pysam_stderr, "position, and coverage depth.  Note that positions with zero coverage may be\n");
+    fprintf(pysam_stderr, "omitted by default; see the -a option.\n");
+    fprintf(pysam_stderr, "\n");
  
      return 1;
  }
@@ -162,18 +164,18 @@ int main_depth(int argc, char *argv[])
          rf = SAM_FLAG | SAM_RNAME | SAM_POS | SAM_MAPQ | SAM_CIGAR | SAM_SEQ;
          if (baseQ) rf |= SAM_QUAL;
          if (hts_set_opt(data[i]->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
-            fprintf(pysamerr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
+            fprintf(pysam_stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
              return 1;
          }
          if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
-            fprintf(pysamerr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+            fprintf(pysam_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
              return 1;
          }
          data[i]->min_mapQ = mapQ;                    // set the mapQ filter
          data[i]->min_len  = min_len;                 // set the qlen filter
          data[i]->hdr = sam_hdr_read(data[i]->fp);    // read the BAM header
          if (data[i]->hdr == NULL) {
-            fprintf(pysamerr, "Couldn't read header for \"%s\"\n",
+            fprintf(pysam_stderr, "Couldn't read header for \"%s\"\n",
                      argv[optind+i]);
              status = EXIT_FAILURE;
              goto depth_end;
@@ -218,10 +220,10 @@ int main_depth(int argc, char *argv[])
                      while (++last_pos < h->target_len[last_tid]) {
                          if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
                              continue;
-                        fputs(h->target_name[last_tid], stdout); printf("\t%d", last_pos+1);
+                        fputs(h->target_name[last_tid], pysam_stdout); fprintf(pysam_stdout, "\t%d", last_pos+1);
                          for (i = 0; i < n; i++)
-                            putchar('\t'), putchar('0');
-                        putchar('\n');
+                            fputc('\t', pysam_stdout), fputc('0', pysam_stdout);
+                        fputc('\n', pysam_stdout);
                      }
                  }
                  last_tid++;
@@ -233,16 +235,16 @@ int main_depth(int argc, char *argv[])
                  if (last_pos < beg) continue; // out of range; skip
                  if (bed && bed_overlap(bed, h->target_name[tid], last_pos, last_pos + 1) == 0)
                      continue;
-                fputs(h->target_name[tid], stdout); printf("\t%d", last_pos+1);
+                fputs(h->target_name[tid], pysam_stdout); fprintf(pysam_stdout, "\t%d", last_pos+1);
                  for (i = 0; i < n; i++)
-                    putchar('\t'), putchar('0');
-                putchar('\n');
+                    fputc('\t', pysam_stdout), fputc('0', pysam_stdout);
+                fputc('\n', pysam_stdout);
              }
  
              last_tid = tid;
              last_pos = pos;
          }
-        fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster
+        fputs(h->target_name[tid], pysam_stdout); fprintf(pysam_stdout, "\t%d", pos+1); // a customized fprintf(pysam_stdout, ) would be faster
          for (i = 0; i < n; ++i) { // base level filters have to go here
              int j, m = 0;
              for (j = 0; j < n_plp[i]; ++j) {
@@ -250,9 +252,9 @@ int main_depth(int argc, char *argv[])
                  if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos
                  else if (bam_get_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality
              }
-            printf("\t%d", n_plp[i] - m); // this the depth to output
+            fprintf(pysam_stdout, "\t%d", n_plp[i] - m); // this the depth to output
          }
-        putchar('\n');
+        fputc('\n', pysam_stdout);
      }
      if (ret < 0) status = EXIT_FAILURE;
      free(n_plp); free(plp);
@@ -265,10 +267,10 @@ int main_depth(int argc, char *argv[])
                  if (last_pos >= end) break;
                  if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
                      continue;
-                fputs(h->target_name[last_tid], stdout); printf("\t%d", last_pos+1);
+                fputs(h->target_name[last_tid], pysam_stdout); fprintf(pysam_stdout, "\t%d", last_pos+1);
                  for (i = 0; i < n; i++)
-                    putchar('\t'), putchar('0');
-                putchar('\n');
+                    fputc('\t', pysam_stdout), fputc('0', pysam_stdout);
+                fputc('\n', pysam_stdout);
              }
              last_tid++;
              last_pos = -1;
@@ -296,7 +298,7 @@ depth_end:
  }
  
  #ifdef _MAIN_BAM2DEPTH
-int main(int argc, char *argv[])
+int samtools_bam2depth_main(int argc, char *argv[])
  {
      return main_depth(argc, argv);
  }
diff --git a/samtools/bam_addrprg.c b/samtools/bam_addrprg.c

index 2b4939f75ccdba2124a4cd6d72629e4284d84732..f7bbfab0d74a2171bc70cfcd39b4c19091742b16 100644 (file)
--- a/samtools/bam_addrprg.c
+++ b/samtools/bam_addrprg.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <htslib/sam.h>
  #include <htslib/kstring.h>
  #include "samtools.h"
diff --git a/samtools/bam_addrprg.c.pysam.c b/samtools/bam_addrprg.c.pysam.c

index 91fa9cdf2c195a1d9b897c4593dee9e2dad34500..2ddd1b16d30ee663e9b12d42f9e704fcb91bcb40 100644 (file)
--- a/samtools/bam_addrprg.c.pysam.c
+++ b/samtools/bam_addrprg.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <htslib/sam.h>
  #include <htslib/kstring.h>
  #include "samtools.h"
@@ -95,7 +97,7 @@ static char* basic_unescape(const char* in)
          if (*in == '\\') {
              ++in;
              if (*in == '\0') {
-                fprintf(pysamerr, "[%s] Unterminated escape sequence.\n", __func__);
+                fprintf(pysam_stderr, "[%s] Unterminated escape sequence.\n", __func__);
                  free(out);
                  return NULL;
              }
@@ -107,11 +109,11 @@ static char* basic_unescape(const char* in)
                  *ptr = '\t';
                  break;
              case 'n':
-                fprintf(pysamerr, "[%s] \\n in escape sequence is not supported.\n", __func__);
+                fprintf(pysam_stderr, "[%s] \\n in escape sequence is not supported.\n", __func__);
                  free(out);
                  return NULL;
              default:
-                fprintf(pysamerr, "[%s] Unsupported escape sequence.\n", __func__);
+                fprintf(pysam_stderr, "[%s] Unsupported escape sequence.\n", __func__);
                  free(out);
                  return NULL;
              }
@@ -226,7 +228,7 @@ static void usage(FILE *fp)
              "\n"
              "Options:\n"
              "  -m MODE   Set the mode of operation from one of overwrite_all, orphan_only [overwrite_all]\n"
-            "  -o FILE   Where to write output to [stdout]\n"
+            "  -o FILE   Where to write output to [pysam_stdout]\n"
              "  -r STRING @RG line text\n"
              "  -R STRING ID of @RG line in existing header to use\n"
              );
@@ -238,11 +240,11 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
      *opts = NULL;
      int n;
  
-    if (argc == 1) { usage(stdout); return true; }
+    if (argc == 1) { usage(pysam_stdout); return true; }
  
      parsed_opts_t* retval = calloc(1, sizeof(parsed_opts_t));
      if (! retval ) {
-        fprintf(pysamerr, "[%s] Out of memory allocating parsed_opts_t\n", __func__);
+        fprintf(pysam_stderr, "[%s] Out of memory allocating parsed_opts_t\n", __func__);
          return false;
      }
      // Set defaults
@@ -276,7 +278,7 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
                  } else if (strcmp(optarg, "orphan_only") == 0) {
                      retval->mode = orphan_only;
                  } else {
-                    usage(pysamerr);
+                    usage(pysam_stderr);
                      return false;
                  }
                  break;
@@ -285,17 +287,17 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
                  retval->output_name = strdup(optarg);
                  break;
              case 'h':
-                usage(stdout);
+                usage(pysam_stdout);
                  free(retval);
                  return true;
              case '?':
-                usage(pysamerr);
+                usage(pysam_stderr);
                  free(retval);
                  return false;
              case 'O':
              default:
                  if (parse_sam_global_opt(n, optarg, lopts, &retval->ga) == 0) break;
-                usage(pysamerr);
+                usage(pysam_stderr);
                  free(retval);
                  return false;
          }
@@ -303,13 +305,13 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
      retval->rg_line = ks_release(&rg_line);
  
      if (argc-optind < 1) {
-        fprintf(pysamerr, "You must specify an input file.\n");
-        usage(pysamerr);
+        fprintf(pysam_stderr, "You must specify an input file.\n");
+        usage(pysam_stderr);
          cleanup_opts(retval);
          return false;
      }
      if (retval->rg_id && retval->rg_line) {
-        fprintf(pysamerr, "The options -r and -R are mutually exclusive.\n");
+        fprintf(pysam_stderr, "The options -r and -R are mutually exclusive.\n");
          cleanup_opts(retval);
          return false;
      }
@@ -319,7 +321,7 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
          char* tmp = basic_unescape(retval->rg_line);
  
          if ((retval->rg_id = get_rg_id(tmp)) == NULL) {
-            fprintf(pysamerr, "[%s] The supplied RG line lacks an ID tag.\n", __func__);
+            fprintf(pysam_stderr, "[%s] The supplied RG line lacks an ID tag.\n", __func__);
              free(tmp);
              cleanup_opts(retval);
              return false;
@@ -361,7 +363,7 @@ static void orphan_only_func(const state_t* state, bam1_t* file_read)
  static bool init(const parsed_opts_t* opts, state_t** state_out) {
      state_t* retval = (state_t*) calloc(1, sizeof(state_t));
      if (retval == NULL) {
-        fprintf(pysamerr, "[init] Out of memory allocating state struct.\n");
+        fprintf(pysam_stderr, "[init] Out of memory allocating state struct.\n");
          return false;
      }
      *state_out = retval;
@@ -369,7 +371,7 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
      // Open files
      retval->input_file = sam_open_format(opts->input_name, "r", &opts->ga.in);
      if (retval->input_file == NULL) {
-        fprintf(pysamerr, "[init] Could not open input file: %s\n", opts->input_name);
+        fprintf(pysam_stderr, "[init] Could not open input file: %s\n", opts->input_name);
          return false;
      }
      retval->input_header = sam_hdr_read(retval->input_file);
@@ -386,14 +388,14 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
          // Append new RG line to header.
          // Check does not already exist
          if ( confirm_rg(retval->output_header, opts->rg_id) ) {
-            fprintf(pysamerr, "[init] ID of new RG line specified conflicts with that of an existing header RG line. Overwrite not yet implemented.\n");
+            fprintf(pysam_stderr, "[init] ID of new RG line specified conflicts with that of an existing header RG line. Overwrite not yet implemented.\n");
              return false;
          }
          retval->rg_id = strdup(opts->rg_id);
          size_t new_len = strlen( retval->output_header->text ) + strlen( opts->rg_line ) + 2;
          char* new_header = malloc(new_len);
          if (!new_header) {
-            fprintf(pysamerr, "[init] Out of memory whilst writing new header.\n");
+            fprintf(pysam_stderr, "[init] Out of memory whilst writing new header.\n");
              return false;
          }
          sprintf(new_header,"%s%s\n", retval->output_header->text, opts->rg_line);
@@ -404,13 +406,13 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
          if (opts->rg_id) {
              // Confirm what has been supplied exists
              if ( !confirm_rg(retval->output_header, opts->rg_id) ) {
-                fprintf(pysamerr, "RG ID supplied does not exist in header. Supply full @RG line with -r instead?\n");
+                fprintf(pysam_stderr, "RG ID supplied does not exist in header. Supply full @RG line with -r instead?\n");
                  return false;
              }
              retval->rg_id = strdup(opts->rg_id);
          } else {
              if ((retval->rg_id = get_first_rgid(retval->output_header)) == NULL ) {
-                fprintf(pysamerr, "No RG specified on command line or in existing header.\n");
+                fprintf(pysam_stderr, "No RG specified on command line or in existing header.\n");
                  return false;
              }
          }
diff --git a/samtools/bam_aux.c b/samtools/bam_aux.c

index 7a67de899f821664854f9f171cfc21e61107ee48..d90b4a86eca508a4a327c8169ac8e90278aad43f 100644 (file)
--- a/samtools/bam_aux.c
+++ b/samtools/bam_aux.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <ctype.h>
  #include <limits.h>
  #include "bam.h"
diff --git a/samtools/bam_aux.c.pysam.c b/samtools/bam_aux.c.pysam.c

index 475c7726f8aef5ab150e00824966f9ad61f99f04..c6bd0aab3c81db78db1bf30c3de5fa010d4a16d5 100644 (file)
--- a/samtools/bam_aux.c.pysam.c
+++ b/samtools/bam_aux.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <ctype.h>
  #include <limits.h>
  #include "bam.h"
diff --git a/samtools/bam_cat.c b/samtools/bam_cat.c

index 83cc0fbc0f6c4248a8f479497283f5e574d66b8c..5c303d1ed27db2ad0a6422564376145458c225ec 100644 (file)
--- a/samtools/bam_cat.c
+++ b/samtools/bam_cat.c
@@ -1,6 +1,6 @@
  /*  bam_cat.c -- efficiently concatenates bam files.
  
-    Copyright (C) 2008-2009, 2011-2013 Genome Research Ltd.
+    Copyright (C) 2008-2009, 2011-2013, 2015-2016 Genome Research Ltd.
      Modified SAMtools work copyright (C) 2010 Illumina, Inc.
  
  Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -34,6 +34,8 @@ and modified to perform concatenation by Chris Saunders on behalf of
  Illumina.
  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
@@ -43,6 +45,7 @@ Illumina.
  #include "htslib/sam.h"
  #include "htslib/cram.h"
  #include "htslib/khash.h"
+#include "samtools.h"
  
  KHASH_MAP_INIT_STR(s2i, int)
  
@@ -195,7 +198,7 @@ static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t
  
          in = sam_open(fn[i], "rc");
          if (in == 0) {
-            fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
+            print_error_errno("cat", "fail to open file '%s'", fn[i]);
              return NULL;
          }
          in_c = in->fp.cram;
@@ -302,15 +305,18 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
      sprintf(vers, "%d.%d", vers_maj, vers_min);
      out = sam_open(outcram, "wc");
      if (out == 0) {
-        fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outcram);
-        return 1;
+        print_error_errno("cat", "fail to open output file '%s'", outcram);
+        return -1;
      }
      out_c = out->fp.cram;
      cram_set_option(out_c, CRAM_OPT_VERSION, vers);
      //fprintf(stderr, "Creating cram vers %s\n", vers);
  
      cram_fd_set_header(out_c, sam_hdr_parse_(new_h->text,  new_h->l_text)); // needed?
-    sam_hdr_write(out, new_h);
+    if (sam_hdr_write(out, new_h) < 0) {
+        print_error_errno("cat", "Couldn't write header");
+        return -1;
+    }
  
      for (i = 0; i < nfn; ++i) {
          samFile *in;
@@ -321,7 +327,7 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
  
          in = sam_open(fn[i], "rc");
          if (in == 0) {
-            fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
+            print_error_errno("cat", "fail to open file '%s'", fn[i]);
              return -1;
          }
          in_c = in->fp.cram;
@@ -414,29 +420,37 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
  
  int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
  {
-    BGZF *fp;
-    uint8_t *buf;
+    BGZF *fp, *in = NULL;
+    uint8_t *buf = NULL;
      uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
      const int es=BGZF_EMPTY_BLOCK_SIZE;
      int i;
  
      fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w");
      if (fp == 0) {
-        fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam);
-        return 1;
+        print_error_errno("cat", "fail to open output file '%s'", outbam);
+        return -1;
+    }
+    if (h) {
+        if (bam_hdr_write(fp, h) < 0) {
+            print_error_errno("cat", "Couldn't write header");
+            goto fail;
+        }
      }
-    if (h) bam_hdr_write(fp, h);
  
      buf = (uint8_t*) malloc(BUF_SIZE);
+    if (!buf) {
+        fprintf(stderr, "[%s] Couldn't allocate buffer\n", __func__);
+        goto fail;
+    }
      for(i = 0; i < nfn; ++i){
-        BGZF *in;
          bam_hdr_t *old;
          int len,j;
  
          in = strcmp(fn[i], "-")? bgzf_open(fn[i], "r") : bgzf_fdopen(fileno(stdin), "r");
          if (in == 0) {
-            fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
-            return -1;
+            print_error_errno("cat", "fail to open file '%s'", fn[i]);
+            goto fail;
          }
          if (in->is_write) return -1;
  
@@ -444,14 +458,18 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
          if (old == NULL) {
              fprintf(stderr, "[%s] ERROR: couldn't read header for '%s'.\n",
                      __func__, fn[i]);
-            bgzf_close(in);
-            return -1;
+            goto fail;
+        }
+        if (h == 0 && i == 0) {
+            if (bam_hdr_write(fp, old) < 0) {
+                print_error_errno("cat", "Couldn't write header");
+                goto fail;
+            }
          }
-        if (h == 0 && i == 0) bam_hdr_write(fp, old);
  
          if (in->block_offset < in->block_length) {
-            bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
-            bgzf_flush(fp);
+            if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+            if (bgzf_flush(fp) != 0) goto write_fail;
          }
  
          j=0;
@@ -460,16 +478,19 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
                  int diff=es-len;
                  if(j==0) {
                      fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
-                    return -1;
+                    goto fail;
                  }
-                bgzf_raw_write(fp, ebuf, len);
+                if (bgzf_raw_write(fp, ebuf, len) < 0) goto write_fail;
+
                  memcpy(ebuf,ebuf+len,diff);
                  memcpy(ebuf+diff,buf,len);
              } else {
-                if(j!=0) bgzf_raw_write(fp, ebuf, es);
+                if(j!=0) {
+                    if (bgzf_raw_write(fp, ebuf, es) < 0) goto write_fail;
+                }
                  len-= es;
                  memcpy(ebuf,buf+len,es);
-                bgzf_raw_write(fp, buf, len);
+                if (bgzf_raw_write(fp, buf, len) < 0) goto write_fail;
              }
              j=1;
          }
@@ -482,15 +503,27 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
              if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
                  fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
                  fprintf(stderr, " Possible output corruption.\n");
-                bgzf_raw_write(fp, ebuf, es);
+                if (bgzf_raw_write(fp, ebuf, es) < 0) goto write_fail;
              }
          }
          bam_hdr_destroy(old);
          bgzf_close(in);
+        in = NULL;
      }
      free(buf);
-    bgzf_close(fp);
+    if (bgzf_close(fp) < 0) {
+        fprintf(stderr, "[%s] Error on closing '%s'.\n", __func__, outbam);
+        return -1;
+    }
      return 0;
+
+ write_fail:
+    fprintf(stderr, "[%s] Error writing to '%s'.\n", __func__, outbam);
+ fail:
+    if (in) bgzf_close(in);
+    if (fp) bgzf_close(fp);
+    free(buf);
+    return -1;
  }
  
  
@@ -498,7 +531,7 @@ int main_cat(int argc, char *argv[])
  {
      bam_hdr_t *h = 0;
      char *outfn = 0;
-    int c, ret;
+    int c, ret = 0;
      samFile *in;
  
      while ((c = getopt(argc, argv, "h:o:")) >= 0) {
@@ -529,19 +562,21 @@ int main_cat(int argc, char *argv[])
  
      in = sam_open(argv[optind], "r");
      if (!in) {
-        fprintf(stderr, "[%s] ERROR: failed to open file '%s'.\n", __func__, argv[optind]);
+        print_error_errno("cat", "failed to open file '%s'", argv[optind]);
          return 1;
      }
  
      switch (hts_get_format(in)->format) {
      case bam:
          sam_close(in);
-        ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
+        if (bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+            ret = 1;
          break;
  
      case cram:
          sam_close(in);
-        ret = cram_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
+        if (cram_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+            ret = 1;
          break;
  
      default:
diff --git a/samtools/bam_cat.c.pysam.c b/samtools/bam_cat.c.pysam.c

index 004911aa756d29462461b99e1a4700e1f5460348..daa0454f50fb4a2089ffeebb6e12921a773e25c1 100644 (file)
--- a/samtools/bam_cat.c.pysam.c
+++ b/samtools/bam_cat.c.pysam.c
@@ -2,7 +2,7 @@
  
  /*  bam_cat.c -- efficiently concatenates bam files.
  
-    Copyright (C) 2008-2009, 2011-2013 Genome Research Ltd.
+    Copyright (C) 2008-2009, 2011-2013, 2015-2016 Genome Research Ltd.
      Modified SAMtools work copyright (C) 2010 Illumina, Inc.
  
  Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -36,6 +36,8 @@ and modified to perform concatenation by Chris Saunders on behalf of
  Illumina.
  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
@@ -45,6 +47,7 @@ Illumina.
  #include "htslib/sam.h"
  #include "htslib/cram.h"
  #include "htslib/khash.h"
+#include "samtools.h"
  
  KHASH_MAP_INIT_STR(s2i, int)
  
@@ -197,7 +200,7 @@ static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t
  
          in = sam_open(fn[i], "rc");
          if (in == 0) {
-            fprintf(pysamerr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
+            print_error_errno("cat", "fail to open file '%s'", fn[i]);
              return NULL;
          }
          in_c = in->fp.cram;
@@ -206,7 +209,7 @@ static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t
          int vmin = cram_minor_vers(in_c);
          if ((vers_maj != -1 && vers_maj != vmaj) ||
              (vers_min != -1 && vers_min != vmin)) {
-            fprintf(pysamerr, "[%s] ERROR: input files have differing version numbers.\n",
+            fprintf(pysam_stderr, "[%s] ERROR: input files have differing version numbers.\n",
                      __func__);
              return NULL;
          }
@@ -226,7 +229,7 @@ static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t
              int added;
  
              new_rg = hash_s2i_inc(*rg2id, rg2id_in->id[ki], rg2id_in->line[ki], &added);
-            //fprintf(pysamerr, "RG %s: #%d -> #%d\n",
+            //fprintf(pysam_stderr, "RG %s: #%d -> #%d\n",
              //        rg2id_in->id[ki], ki, new_rg);
  
              if (added) {
@@ -242,7 +245,7 @@ static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t
              }
  
              if (new_rg != ki && rg2id_in->n_id > 1) {
-                fprintf(pysamerr, "[%s] ERROR: Same size @RG lists but differing order / contents\n",
+                fprintf(pysam_stderr, "[%s] ERROR: Same size @RG lists but differing order / contents\n",
                          __func__);
                  return NULL;
              }
@@ -304,15 +307,18 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
      sprintf(vers, "%d.%d", vers_maj, vers_min);
      out = sam_open(outcram, "wc");
      if (out == 0) {
-        fprintf(pysamerr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outcram);
-        return 1;
+        print_error_errno("cat", "fail to open output file '%s'", outcram);
+        return -1;
      }
      out_c = out->fp.cram;
      cram_set_option(out_c, CRAM_OPT_VERSION, vers);
-    //fprintf(pysamerr, "Creating cram vers %s\n", vers);
+    //fprintf(pysam_stderr, "Creating cram vers %s\n", vers);
  
      cram_fd_set_header(out_c, sam_hdr_parse_(new_h->text,  new_h->l_text)); // needed?
-    sam_hdr_write(out, new_h);
+    if (sam_hdr_write(out, new_h) < 0) {
+        print_error_errno("cat", "Couldn't write header");
+        return -1;
+    }
  
      for (i = 0; i < nfn; ++i) {
          samFile *in;
@@ -323,7 +329,7 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
  
          in = sam_open(fn[i], "rc");
          if (in == 0) {
-            fprintf(pysamerr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
+            print_error_errno("cat", "fail to open file '%s'", fn[i]);
              return -1;
          }
          in_c = in->fp.cram;
@@ -367,7 +373,7 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
              // we need to edit the compression header. IF WE CAN.
              if (new_rg) {
                  int zero = 0;
-                //fprintf(pysamerr, "Transcode RG %d to %d\n", 0, new_rg);
+                //fprintf(pysam_stderr, "Transcode RG %d to %d\n", 0, new_rg);
                  cram_transcode_rg(in_c, out_c, c, 1, &zero, &new_rg);
              } else {
                  int32_t num_slices;
@@ -416,44 +422,56 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
  
  int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
  {
-    BGZF *fp;
-    uint8_t *buf;
+    BGZF *fp, *in = NULL;
+    uint8_t *buf = NULL;
      uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
      const int es=BGZF_EMPTY_BLOCK_SIZE;
      int i;
  
-    fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w");
+    fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(pysam_stdout), "w");
      if (fp == 0) {
-        fprintf(pysamerr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam);
-        return 1;
+        print_error_errno("cat", "fail to open output file '%s'", outbam);
+        return -1;
+    }
+    if (h) {
+        if (bam_hdr_write(fp, h) < 0) {
+            print_error_errno("cat", "Couldn't write header");
+            goto fail;
+        }
      }
-    if (h) bam_hdr_write(fp, h);
  
      buf = (uint8_t*) malloc(BUF_SIZE);
+    if (!buf) {
+        fprintf(pysam_stderr, "[%s] Couldn't allocate buffer\n", __func__);
+        goto fail;
+    }
      for(i = 0; i < nfn; ++i){
-        BGZF *in;
          bam_hdr_t *old;
          int len,j;
  
          in = strcmp(fn[i], "-")? bgzf_open(fn[i], "r") : bgzf_fdopen(fileno(stdin), "r");
          if (in == 0) {
-            fprintf(pysamerr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
-            return -1;
+            print_error_errno("cat", "fail to open file '%s'", fn[i]);
+            goto fail;
          }
          if (in->is_write) return -1;
  
          old = bam_hdr_read(in);
          if (old == NULL) {
-            fprintf(pysamerr, "[%s] ERROR: couldn't read header for '%s'.\n",
+            fprintf(pysam_stderr, "[%s] ERROR: couldn't read header for '%s'.\n",
                      __func__, fn[i]);
-            bgzf_close(in);
-            return -1;
+            goto fail;
+        }
+        if (h == 0 && i == 0) {
+            if (bam_hdr_write(fp, old) < 0) {
+                print_error_errno("cat", "Couldn't write header");
+                goto fail;
+            }
          }
-        if (h == 0 && i == 0) bam_hdr_write(fp, old);
  
          if (in->block_offset < in->block_length) {
-            bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
-            bgzf_flush(fp);
+            if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+            if (bgzf_flush(fp) != 0) goto write_fail;
          }
  
          j=0;
@@ -461,17 +479,20 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
              if(len<es){
                  int diff=es-len;
                  if(j==0) {
-                    fprintf(pysamerr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
-                    return -1;
+                    fprintf(pysam_stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
+                    goto fail;
                  }
-                bgzf_raw_write(fp, ebuf, len);
+                if (bgzf_raw_write(fp, ebuf, len) < 0) goto write_fail;
+
                  memcpy(ebuf,ebuf+len,diff);
                  memcpy(ebuf+diff,buf,len);
              } else {
-                if(j!=0) bgzf_raw_write(fp, ebuf, es);
+                if(j!=0) {
+                    if (bgzf_raw_write(fp, ebuf, es) < 0) goto write_fail;
+                }
                  len-= es;
                  memcpy(ebuf,buf+len,es);
-                bgzf_raw_write(fp, buf, len);
+                if (bgzf_raw_write(fp, buf, len) < 0) goto write_fail;
              }
              j=1;
          }
@@ -482,17 +503,29 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
              const uint8_t gzip2=ebuf[1];
              const uint32_t isize=*((uint32_t*)(ebuf+es-4));
              if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
-                fprintf(pysamerr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
-                fprintf(pysamerr, " Possible output corruption.\n");
-                bgzf_raw_write(fp, ebuf, es);
+                fprintf(pysam_stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
+                fprintf(pysam_stderr, " Possible output corruption.\n");
+                if (bgzf_raw_write(fp, ebuf, es) < 0) goto write_fail;
              }
          }
          bam_hdr_destroy(old);
          bgzf_close(in);
+        in = NULL;
      }
      free(buf);
-    bgzf_close(fp);
+    if (bgzf_close(fp) < 0) {
+        fprintf(pysam_stderr, "[%s] Error on closing '%s'.\n", __func__, outbam);
+        return -1;
+    }
      return 0;
+
+ write_fail:
+    fprintf(pysam_stderr, "[%s] Error writing to '%s'.\n", __func__, outbam);
+ fail:
+    if (in) bgzf_close(in);
+    if (fp) bgzf_close(fp);
+    free(buf);
+    return -1;
  }
  
  
@@ -500,7 +533,7 @@ int main_cat(int argc, char *argv[])
  {
      bam_hdr_t *h = 0;
      char *outfn = 0;
-    int c, ret;
+    int c, ret = 0;
      samFile *in;
  
      while ((c = getopt(argc, argv, "h:o:")) >= 0) {
@@ -508,12 +541,12 @@ int main_cat(int argc, char *argv[])
              case 'h': {
                  samFile *fph = sam_open(optarg, "r");
                  if (fph == 0) {
-                    fprintf(pysamerr, "[%s] ERROR: fail to read the header from '%s'.\n", __func__, argv[1]);
+                    fprintf(pysam_stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __func__, argv[1]);
                      return 1;
                  }
                  h = sam_hdr_read(fph);
                  if (h == NULL) {
-                    fprintf(pysamerr,
+                    fprintf(pysam_stderr,
                              "[%s] ERROR: failed to read the header for '%s'.\n",
                              __func__, argv[1]);
                      return 1;
@@ -525,30 +558,32 @@ int main_cat(int argc, char *argv[])
          }
      }
      if (argc - optind < 1) {
-        fprintf(pysamerr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> [...]\n");
+        fprintf(pysam_stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> [...]\n");
          return 1;
      }
  
      in = sam_open(argv[optind], "r");
      if (!in) {
-        fprintf(pysamerr, "[%s] ERROR: failed to open file '%s'.\n", __func__, argv[optind]);
+        print_error_errno("cat", "failed to open file '%s'", argv[optind]);
          return 1;
      }
  
      switch (hts_get_format(in)->format) {
      case bam:
          sam_close(in);
-        ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
+        if (bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+            ret = 1;
          break;
  
      case cram:
          sam_close(in);
-        ret = cram_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
+        if (cram_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+            ret = 1;
          break;
  
      default:
          sam_close(in);
-        fprintf(pysamerr, "[%s] ERROR: input is not BAM or CRAM\n", __func__);
+        fprintf(pysam_stderr, "[%s] ERROR: input is not BAM or CRAM\n", __func__);
          return 1;
      }
      free(outfn);
diff --git a/samtools/bam_color.c b/samtools/bam_color.c

index 3983c442a872692e9c55a9723f47a309bbfafb52..bee19b9da4a45cd1fef1672d557567bec40f9ca7 100644 (file)
--- a/samtools/bam_color.c
+++ b/samtools/bam_color.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <ctype.h>
  #include "bam.h"
  
diff --git a/samtools/bam_color.c.pysam.c b/samtools/bam_color.c.pysam.c

index 78d8510feee7864c088be5ff914a8035f35cda75..6bd12c4fe00e8bf143b71319b6e828e78a6ea5b9 100644 (file)
--- a/samtools/bam_color.c.pysam.c
+++ b/samtools/bam_color.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <ctype.h>
  #include "bam.h"
  
diff --git a/samtools/bam_flags.c b/samtools/bam_flags.c

index ddc7b11820163e954b29d56791614aa82af17be6..11a82b6eb377ba28c8dd6170fb60ac732b841db3 100644 (file)
--- a/samtools/bam_flags.c
+++ b/samtools/bam_flags.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <ctype.h>
  #include <string.h>
  #include <stdlib.h>
diff --git a/samtools/bam_flags.c.pysam.c b/samtools/bam_flags.c.pysam.c

index f4df05769f3c98a2a91a9a702615e4320d0113ae..4895f9a00def9761f68c518dfa181da259d1485c 100644 (file)
--- a/samtools/bam_flags.c.pysam.c
+++ b/samtools/bam_flags.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <ctype.h>
  #include <string.h>
  #include <stdlib.h>
@@ -35,24 +37,24 @@ DEALINGS IN THE SOFTWARE.  */
  
  static void usage(void)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About: Convert between textual and numeric flag representation\n");
-    fprintf(pysamerr, "Usage: samtools flags INT|STR[,...]\n");
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Flags:\n");
-    fprintf(pysamerr, "\t0x%x\tPAIRED        .. paired-end (or multiple-segment) sequencing technology\n", BAM_FPAIRED);
-    fprintf(pysamerr, "\t0x%x\tPROPER_PAIR   .. each segment properly aligned according to the aligner\n", BAM_FPROPER_PAIR);
-    fprintf(pysamerr, "\t0x%x\tUNMAP         .. segment unmapped\n", BAM_FUNMAP);
-    fprintf(pysamerr, "\t0x%x\tMUNMAP        .. next segment in the template unmapped\n", BAM_FMUNMAP);
-    fprintf(pysamerr, "\t0x%x\tREVERSE       .. SEQ is reverse complemented\n", BAM_FREVERSE);
-    fprintf(pysamerr, "\t0x%x\tMREVERSE      .. SEQ of the next segment in the template is reversed\n", BAM_FMREVERSE);
-    fprintf(pysamerr, "\t0x%x\tREAD1         .. the first segment in the template\n", BAM_FREAD1);
-    fprintf(pysamerr, "\t0x%x\tREAD2         .. the last segment in the template\n", BAM_FREAD2);
-    fprintf(pysamerr, "\t0x%x\tSECONDARY     .. secondary alignment\n", BAM_FSECONDARY);
-    fprintf(pysamerr, "\t0x%x\tQCFAIL        .. not passing quality controls\n", BAM_FQCFAIL);
-    fprintf(pysamerr, "\t0x%x\tDUP           .. PCR or optical duplicate\n", BAM_FDUP);
-    fprintf(pysamerr, "\t0x%x\tSUPPLEMENTARY .. supplementary alignment\n", BAM_FSUPPLEMENTARY);
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About: Convert between textual and numeric flag representation\n");
+    fprintf(pysam_stderr, "Usage: samtools flags INT|STR[,...]\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Flags:\n");
+    fprintf(pysam_stderr, "\t0x%x\tPAIRED        .. paired-end (or multiple-segment) sequencing technology\n", BAM_FPAIRED);
+    fprintf(pysam_stderr, "\t0x%x\tPROPER_PAIR   .. each segment properly aligned according to the aligner\n", BAM_FPROPER_PAIR);
+    fprintf(pysam_stderr, "\t0x%x\tUNMAP         .. segment unmapped\n", BAM_FUNMAP);
+    fprintf(pysam_stderr, "\t0x%x\tMUNMAP        .. next segment in the template unmapped\n", BAM_FMUNMAP);
+    fprintf(pysam_stderr, "\t0x%x\tREVERSE       .. SEQ is reverse complemented\n", BAM_FREVERSE);
+    fprintf(pysam_stderr, "\t0x%x\tMREVERSE      .. SEQ of the next segment in the template is reversed\n", BAM_FMREVERSE);
+    fprintf(pysam_stderr, "\t0x%x\tREAD1         .. the first segment in the template\n", BAM_FREAD1);
+    fprintf(pysam_stderr, "\t0x%x\tREAD2         .. the last segment in the template\n", BAM_FREAD2);
+    fprintf(pysam_stderr, "\t0x%x\tSECONDARY     .. secondary alignment\n", BAM_FSECONDARY);
+    fprintf(pysam_stderr, "\t0x%x\tQCFAIL        .. not passing quality controls\n", BAM_FQCFAIL);
+    fprintf(pysam_stderr, "\t0x%x\tDUP           .. PCR or optical duplicate\n", BAM_FDUP);
+    fprintf(pysam_stderr, "\t0x%x\tSUPPLEMENTARY .. supplementary alignment\n", BAM_FSUPPLEMENTARY);
+    fprintf(pysam_stderr, "\n");
  }
  
  
@@ -62,8 +64,8 @@ int main_flags(int argc, char *argv[])
      else
      {
          int mask = bam_str2flag(argv[1]);
-        if ( mask<0 ) { fprintf(pysamerr,"Error: Could not parse \"%s\"\n", argv[1]); usage(); return 1; }
-        printf("0x%x\t%d\t%s\n", mask, mask, bam_flag2str(mask));
+        if ( mask<0 ) { fprintf(pysam_stderr,"Error: Could not parse \"%s\"\n", argv[1]); usage(); return 1; }
+        fprintf(pysam_stdout, "0x%x\t%d\t%s\n", mask, mask, bam_flag2str(mask));
      }
      return 0;
  }
diff --git a/samtools/bam_import.c b/samtools/bam_import.c

index d959d0ec0d1747f50f8d0d25d14c6c7e9062f63c..96f81581b3257cfcb57aabdea55d0297915dacb5 100644 (file)
--- a/samtools/bam_import.c
+++ b/samtools/bam_import.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <zlib.h>
  #include <stdio.h>
  #include <string.h>
diff --git a/samtools/bam_import.c.pysam.c b/samtools/bam_import.c.pysam.c

index c2854f4470ecf3cecb44837ed26098fc118f9378..3b5dd4a51b1e17337730da1f1edfba6932ad3f44 100644 (file)
--- a/samtools/bam_import.c.pysam.c
+++ b/samtools/bam_import.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <zlib.h>
  #include <stdio.h>
  #include <string.h>
@@ -60,6 +62,6 @@ bam_header_t *sam_header_read2(const char *fn)
      free(str->s); free(str);
      header = sam_hdr_parse(samstr.l, samstr.s? samstr.s : "");
      free(samstr.s);
-    fprintf(pysamerr, "[sam_header_read2] %d sequences loaded.\n", n_targets);
+    fprintf(pysam_stderr, "[sam_header_read2] %d sequences loaded.\n", n_targets);
      return header;
  }
diff --git a/samtools/bam_index.c b/samtools/bam_index.c

index 83a855d8976578744a31fc6a1d99bfd1ba12a5ab..3a5acf6f8e1e933ec1f9173f9d458f7e138db014 100644 (file)
--- a/samtools/bam_index.c
+++ b/samtools/bam_index.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <htslib/hts.h>
  #include <htslib/sam.h>
  #include <htslib/khash.h>
diff --git a/samtools/bam_index.c.pysam.c b/samtools/bam_index.c.pysam.c

index ed902c54878091d1911f8b660c8fcb61d1850a5f..6c0efdcf056ad5891cf22405d5beccaa6408a273 100644 (file)
--- a/samtools/bam_index.c.pysam.c
+++ b/samtools/bam_index.c.pysam.c
@@ -26,6 +26,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <htslib/hts.h>
  #include <htslib/sam.h>
  #include <htslib/khash.h>
@@ -61,12 +63,12 @@ int bam_index(int argc, char *argv[])
          case 'c': csi = 1; break;
          case 'm': csi = 1; min_shift = atoi(optarg); break;
          default:
-            index_usage(pysamerr);
+            index_usage(pysam_stderr);
              return 1;
          }
  
      if (optind == argc) {
-        index_usage(stdout);
+        index_usage(pysam_stdout);
          return 1;
      }
  
@@ -91,31 +93,31 @@ int bam_idxstats(int argc, char *argv[])
      samFile* fp;
  
      if (argc < 2) {
-        fprintf(pysamerr, "Usage: samtools idxstats <in.bam>\n");
+        fprintf(pysam_stderr, "Usage: samtools idxstats <in.bam>\n");
          return 1;
      }
      fp = sam_open(argv[1], "r");
-    if (fp == NULL) { fprintf(pysamerr, "[%s] fail to open BAM.\n", __func__); return 1; }
+    if (fp == NULL) { fprintf(pysam_stderr, "[%s] fail to open BAM.\n", __func__); return 1; }
      header = sam_hdr_read(fp);
      if (header == NULL) {
-        fprintf(pysamerr, "[%s] failed to read header for '%s'.\n",
+        fprintf(pysam_stderr, "[%s] failed to read header for '%s'.\n",
                  __func__, argv[1]);
          return 1;
      }
      idx = sam_index_load(fp, argv[1]);
-    if (idx == NULL) { fprintf(pysamerr, "[%s] fail to load the index.\n", __func__); return 1; }
+    if (idx == NULL) { fprintf(pysam_stderr, "[%s] fail to load the index.\n", __func__); return 1; }
  
      int i;
      for (i = 0; i < header->n_targets; ++i) {
          // Print out contig name and length
-        printf("%s\t%d", header->target_name[i], header->target_len[i]);
+        fprintf(pysam_stdout, "%s\t%d", header->target_name[i], header->target_len[i]);
          // Now fetch info about it from the meta bin
          uint64_t u, v;
          hts_idx_get_stat(idx, i, &u, &v);
-        printf("\t%" PRIu64 "\t%" PRIu64 "\n", u, v);
+        fprintf(pysam_stdout, "\t%" PRIu64 "\t%" PRIu64 "\n", u, v);
      }
      // Dump information about unmapped reads
-    printf("*\t0\t0\t%" PRIu64 "\n", hts_idx_get_n_no_coor(idx));
+    fprintf(pysam_stdout, "*\t0\t0\t%" PRIu64 "\n", hts_idx_get_n_no_coor(idx));
      bam_hdr_destroy(header);
      hts_idx_destroy(idx);
      sam_close(fp);
diff --git a/samtools/bam_lpileup.c b/samtools/bam_lpileup.c

index 0cee701e19a840b971637830337eb0c8ea4ffa75..e20cc92a5eb3ee97d5a1792bfddef495e6886512 100644 (file)
--- a/samtools/bam_lpileup.c
+++ b/samtools/bam_lpileup.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <stdio.h>
  #include <assert.h>
diff --git a/samtools/bam_lpileup.c.pysam.c b/samtools/bam_lpileup.c.pysam.c

index bdf434870b5d66962463d5a52c39331d3fb40265..9f7f06355ae48820e00dc05f9e5aed791930745d 100644 (file)
--- a/samtools/bam_lpileup.c.pysam.c
+++ b/samtools/bam_lpileup.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <stdio.h>
  #include <assert.h>
@@ -179,14 +181,14 @@ static int tview_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl
      }
      tv->n_pre = l;
  /*
-    fprintf(pysamerr, "%d\t", pos+1);
+    fprintf(pysam_stderr, "%d\t", pos+1);
      for (i = 0; i < n; ++i) {
          const bam_pileup1_t *p = pl + i;
-        if (p->is_head) fprintf(pysamerr, "^");
-        if (p->is_tail) fprintf(pysamerr, "$");
-        fprintf(pysamerr, "%d,", p->level);
+        if (p->is_head) fprintf(pysam_stderr, "^");
+        if (p->is_tail) fprintf(pysam_stderr, "$");
+        fprintf(pysam_stderr, "%d,", p->level);
      }
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
  */
      return 0;
  }
diff --git a/samtools/bam_mate.c b/samtools/bam_mate.c

index 54c3ed327c5950e5990899e87075d706883ce58f..5b13b2e348ca2f7123eca90b0d0fa85482046931 100644 (file)
--- a/samtools/bam_mate.c
+++ b/samtools/bam_mate.c
@@ -1,6 +1,6 @@
  /*  bam_mate.c -- fix mate pairing information and clean up flags.
  
-    Copyright (C) 2009, 2011-2014 Genome Research Ltd.
+    Copyright (C) 2009, 2011-2016 Genome Research Ltd.
      Portions copyright (C) 2011 Broad Institute.
      Portions copyright (C) 2012 Peter Cock, The James Hutton Institute.
  
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <assert.h>
  #include <stdbool.h>
  #include <stdlib.h>
@@ -32,6 +34,7 @@ DEALINGS IN THE SOFTWARE.  */
  #include "sam_opts.h"
  #include "htslib/kstring.h"
  #include "htslib/sam.h"
+#include "samtools.h"
  
  /*
   * This function calculates ct tag for two bams, it assumes they are from the same template and
@@ -177,10 +180,10 @@ static void sync_mate(bam1_t* a, bam1_t* b)
  }
  
  // currently, this function ONLY works if each read has one hit
-static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int proper_pair_check, int add_ct)
+static int bam_mating_core(samFile* in, samFile* out, int remove_reads, int proper_pair_check, int add_ct)
  {
      bam_hdr_t *header;
-    bam1_t *b[2];
+    bam1_t *b[2] = { NULL, NULL };
      int curr, has_prev, pre_end = 0, cur_end = 0;
      kstring_t str;
  
@@ -188,7 +191,7 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
      header = sam_hdr_read(in);
      if (header == NULL) {
          fprintf(stderr, "[bam_mating_core] ERROR: Couldn't read header\n");
-        exit(1);
+        return 1;
      }
      // Accept unknown, unsorted, or queryname sort order, but error on coordinate sorted.
      if ((header->l_text > 3) && (strncmp(header->text, "@HD", 3) == 0)) {
@@ -199,10 +202,10 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
          // (e.g. must ignore in a @CO comment line later in header)
          if ((p != 0) && (p < q)) {
              fprintf(stderr, "[bam_mating_core] ERROR: Coordinate sorted, require grouped/sorted by queryname.\n");
-            exit(1);
+            goto fail;
          }
      }
-    sam_hdr_write(out, header);
+    if (sam_hdr_write(out, header) < 0) goto write_fail;
  
      b[0] = bam_init1();
      b[1] = bam_init1();
@@ -211,12 +214,14 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
          bam1_t *cur = b[curr], *pre = b[1-curr];
          if (cur->core.flag & BAM_FSECONDARY)
          {
-            if ( !remove_reads ) sam_write1(out, header, cur);
+            if ( !remove_reads ) {
+                if (sam_write1(out, header, cur) < 0) goto write_fail;
+            }
              continue; // skip secondary alignments
          }
          if (cur->core.flag & BAM_FSUPPLEMENTARY)
          {
-            sam_write1(out, header, cur);
+            if (sam_write1(out, header, cur) < 0) goto write_fail;
              continue; // pass supplementary alignments through unchanged (TODO:make them match read they came from)
          }
          if (cur->core.tid < 0 || cur->core.pos < 0) // If unmapped set the flag
@@ -253,14 +258,18 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
  
                  // Write out result
                  if ( !remove_reads ) {
-                    sam_write1(out, header, pre);
-                    sam_write1(out, header, cur);
+                    if (sam_write1(out, header, pre) < 0) goto write_fail;
+                    if (sam_write1(out, header, cur) < 0) goto write_fail;
                  } else {
                      // If we have to remove reads make sure we do it in a way that doesn't create orphans with bad flags
                      if(pre->core.flag&BAM_FUNMAP) cur->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
                      if(cur->core.flag&BAM_FUNMAP) pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
-                    if(!(pre->core.flag&BAM_FUNMAP)) sam_write1(out, header, pre);
-                    if(!(cur->core.flag&BAM_FUNMAP)) sam_write1(out, header, cur);
+                    if(!(pre->core.flag&BAM_FUNMAP)) {
+                        if (sam_write1(out, header, pre) < 0) goto write_fail;
+                    }
+                    if(!(cur->core.flag&BAM_FUNMAP)) {
+                        if (sam_write1(out, header, cur) < 0) goto write_fail;
+                    }
                  }
                  has_prev = 0;
              } else { // unpaired?  clear bad info and write it out
@@ -271,7 +280,9 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
                  }
                  pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
                  pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
-                if ( !remove_reads || !(pre->core.flag&BAM_FUNMAP) ) sam_write1(out, header, pre);
+                if ( !remove_reads || !(pre->core.flag&BAM_FUNMAP) ) {
+                    if (sam_write1(out, header, pre) < 0) goto write_fail;
+                }
              }
          } else has_prev = 1;
          curr = 1 - curr;
@@ -287,12 +298,21 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
          pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
          pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
  
-        sam_write1(out, header, pre);
+        if (sam_write1(out, header, pre) < 0) goto write_fail;
      }
      bam_hdr_destroy(header);
      bam_destroy1(b[0]);
      bam_destroy1(b[1]);
      free(str.s);
+    return 0;
+
+ write_fail:
+    print_error_errno("fixmate", "Couldn't write to output file");
+ fail:
+    bam_hdr_destroy(header);
+    bam_destroy1(b[0]);
+    bam_destroy1(b[1]);
+    return 1;
  }
  
  void usage(FILE* where)
@@ -315,8 +335,8 @@ void usage(FILE* where)
  
  int bam_mating(int argc, char *argv[])
  {
-    samFile *in, *out;
-    int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0;
+    samFile *in = NULL, *out = NULL;
+    int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0, res = 1;
      sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
      char wmode[3] = {'w', 'b', 0};
      static const struct option lopts[] = {
@@ -333,30 +353,40 @@ int bam_mating(int argc, char *argv[])
              case 'c': add_ct = 1; break;
              default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                        /* else fall-through */
-            case '?': usage(stderr); return 1;
+            case '?': usage(stderr); goto fail;
          }
      }
-    if (optind+1 >= argc) { usage(stderr); return 1; }
+    if (optind+1 >= argc) { usage(stderr); goto fail; }
  
      // init
      if ((in = sam_open_format(argv[optind], "rb", &ga.in)) == NULL) {
-        fprintf(stderr, "[bam_mating] cannot open input file\n");
-        return 1;
+        print_error_errno("fixmate", "cannot open input file");
+        goto fail;
      }
      sam_open_mode(wmode+1, argv[optind+1], NULL);
      if ((out = sam_open_format(argv[optind+1], wmode, &ga.out)) == NULL) {
-        fprintf(stderr, "[bam_mating] cannot open output file\n");
-        return 1;
+        print_error_errno("fixmate", "cannot open output file");
+        goto fail;
      }
  
      // run
-    bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct);
+    res = bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct);
  
      // cleanup
-    sam_close(in); sam_close(out);
+    sam_close(in);
+    if (sam_close(out) < 0) {
+        fprintf(stderr, "[bam_mating] error while closing output file\n");
+        res = 1;
+    }
+
      sam_global_args_free(&ga);
+    return res;
  
-    return 0;
+ fail:
+    if (in) sam_close(in);
+    if (out) sam_close(out);
+    sam_global_args_free(&ga);
+    return 1;
  }
  
  
diff --git a/samtools/bam_mate.c.pysam.c b/samtools/bam_mate.c.pysam.c

index c7900a1a537242052d95c3990d1d8a1fcaff81f0..a416d071aed36077845d6bbd2dcab4925e78bad2 100644 (file)
--- a/samtools/bam_mate.c.pysam.c
+++ b/samtools/bam_mate.c.pysam.c
@@ -2,7 +2,7 @@
  
  /*  bam_mate.c -- fix mate pairing information and clean up flags.
  
-    Copyright (C) 2009, 2011-2014 Genome Research Ltd.
+    Copyright (C) 2009, 2011-2016 Genome Research Ltd.
      Portions copyright (C) 2011 Broad Institute.
      Portions copyright (C) 2012 Peter Cock, The James Hutton Institute.
  
@@ -26,6 +26,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <assert.h>
  #include <stdbool.h>
  #include <stdlib.h>
@@ -34,6 +36,7 @@ DEALINGS IN THE SOFTWARE.  */
  #include "sam_opts.h"
  #include "htslib/kstring.h"
  #include "htslib/sam.h"
+#include "samtools.h"
  
  /*
   * This function calculates ct tag for two bams, it assumes they are from the same template and
@@ -179,18 +182,18 @@ static void sync_mate(bam1_t* a, bam1_t* b)
  }
  
  // currently, this function ONLY works if each read has one hit
-static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int proper_pair_check, int add_ct)
+static int bam_mating_core(samFile* in, samFile* out, int remove_reads, int proper_pair_check, int add_ct)
  {
      bam_hdr_t *header;
-    bam1_t *b[2];
+    bam1_t *b[2] = { NULL, NULL };
      int curr, has_prev, pre_end = 0, cur_end = 0;
      kstring_t str;
  
      str.l = str.m = 0; str.s = 0;
      header = sam_hdr_read(in);
      if (header == NULL) {
-        fprintf(pysamerr, "[bam_mating_core] ERROR: Couldn't read header\n");
-        exit(1);
+        fprintf(pysam_stderr, "[bam_mating_core] ERROR: Couldn't read header\n");
+        return 1;
      }
      // Accept unknown, unsorted, or queryname sort order, but error on coordinate sorted.
      if ((header->l_text > 3) && (strncmp(header->text, "@HD", 3) == 0)) {
@@ -200,11 +203,11 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
          // Looking for SO:coordinate within the @HD line only
          // (e.g. must ignore in a @CO comment line later in header)
          if ((p != 0) && (p < q)) {
-            fprintf(pysamerr, "[bam_mating_core] ERROR: Coordinate sorted, require grouped/sorted by queryname.\n");
-            exit(1);
+            fprintf(pysam_stderr, "[bam_mating_core] ERROR: Coordinate sorted, require grouped/sorted by queryname.\n");
+            goto fail;
          }
      }
-    sam_hdr_write(out, header);
+    if (sam_hdr_write(out, header) < 0) goto write_fail;
  
      b[0] = bam_init1();
      b[1] = bam_init1();
@@ -213,12 +216,14 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
          bam1_t *cur = b[curr], *pre = b[1-curr];
          if (cur->core.flag & BAM_FSECONDARY)
          {
-            if ( !remove_reads ) sam_write1(out, header, cur);
+            if ( !remove_reads ) {
+                if (sam_write1(out, header, cur) < 0) goto write_fail;
+            }
              continue; // skip secondary alignments
          }
          if (cur->core.flag & BAM_FSUPPLEMENTARY)
          {
-            sam_write1(out, header, cur);
+            if (sam_write1(out, header, cur) < 0) goto write_fail;
              continue; // pass supplementary alignments through unchanged (TODO:make them match read they came from)
          }
          if (cur->core.tid < 0 || cur->core.pos < 0) // If unmapped set the flag
@@ -255,14 +260,18 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
  
                  // Write out result
                  if ( !remove_reads ) {
-                    sam_write1(out, header, pre);
-                    sam_write1(out, header, cur);
+                    if (sam_write1(out, header, pre) < 0) goto write_fail;
+                    if (sam_write1(out, header, cur) < 0) goto write_fail;
                  } else {
                      // If we have to remove reads make sure we do it in a way that doesn't create orphans with bad flags
                      if(pre->core.flag&BAM_FUNMAP) cur->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
                      if(cur->core.flag&BAM_FUNMAP) pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
-                    if(!(pre->core.flag&BAM_FUNMAP)) sam_write1(out, header, pre);
-                    if(!(cur->core.flag&BAM_FUNMAP)) sam_write1(out, header, cur);
+                    if(!(pre->core.flag&BAM_FUNMAP)) {
+                        if (sam_write1(out, header, pre) < 0) goto write_fail;
+                    }
+                    if(!(cur->core.flag&BAM_FUNMAP)) {
+                        if (sam_write1(out, header, cur) < 0) goto write_fail;
+                    }
                  }
                  has_prev = 0;
              } else { // unpaired?  clear bad info and write it out
@@ -273,7 +282,9 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
                  }
                  pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
                  pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
-                if ( !remove_reads || !(pre->core.flag&BAM_FUNMAP) ) sam_write1(out, header, pre);
+                if ( !remove_reads || !(pre->core.flag&BAM_FUNMAP) ) {
+                    if (sam_write1(out, header, pre) < 0) goto write_fail;
+                }
              }
          } else has_prev = 1;
          curr = 1 - curr;
@@ -289,12 +300,21 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
          pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
          pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
  
-        sam_write1(out, header, pre);
+        if (sam_write1(out, header, pre) < 0) goto write_fail;
      }
      bam_hdr_destroy(header);
      bam_destroy1(b[0]);
      bam_destroy1(b[1]);
      free(str.s);
+    return 0;
+
+ write_fail:
+    print_error_errno("fixmate", "Couldn't write to output file");
+ fail:
+    bam_hdr_destroy(header);
+    bam_destroy1(b[0]);
+    bam_destroy1(b[1]);
+    return 1;
  }
  
  void usage(FILE* where)
@@ -310,15 +330,15 @@ void usage(FILE* where)
  
      fprintf(where,
  "\n"
-"As elsewhere in samtools, use '-' as the filename for stdin/stdout. The input\n"
+"As elsewhere in samtools, use '-' as the filename for stdin/pysam_stdout. The input\n"
  "file must be grouped by read name (e.g. sorted by name). Coordinated sorted\n"
  "input is not accepted.\n");
  }
  
  int bam_mating(int argc, char *argv[])
  {
-    samFile *in, *out;
-    int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0;
+    samFile *in = NULL, *out = NULL;
+    int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0, res = 1;
      sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
      char wmode[3] = {'w', 'b', 0};
      static const struct option lopts[] = {
@@ -327,7 +347,7 @@ int bam_mating(int argc, char *argv[])
      };
  
      // parse args
-    if (argc == 1) { usage(stdout); return 0; }
+    if (argc == 1) { usage(pysam_stdout); return 0; }
      while ((c = getopt_long(argc, argv, "rpcO:", lopts, NULL)) >= 0) {
          switch (c) {
              case 'r': remove_reads = 1; break;
@@ -335,30 +355,40 @@ int bam_mating(int argc, char *argv[])
              case 'c': add_ct = 1; break;
              default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                        /* else fall-through */
-            case '?': usage(pysamerr); return 1;
+            case '?': usage(pysam_stderr); goto fail;
          }
      }
-    if (optind+1 >= argc) { usage(pysamerr); return 1; }
+    if (optind+1 >= argc) { usage(pysam_stderr); goto fail; }
  
      // init
      if ((in = sam_open_format(argv[optind], "rb", &ga.in)) == NULL) {
-        fprintf(pysamerr, "[bam_mating] cannot open input file\n");
-        return 1;
+        print_error_errno("fixmate", "cannot open input file");
+        goto fail;
      }
      sam_open_mode(wmode+1, argv[optind+1], NULL);
      if ((out = sam_open_format(argv[optind+1], wmode, &ga.out)) == NULL) {
-        fprintf(pysamerr, "[bam_mating] cannot open output file\n");
-        return 1;
+        print_error_errno("fixmate", "cannot open output file");
+        goto fail;
      }
  
      // run
-    bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct);
+    res = bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct);
  
      // cleanup
-    sam_close(in); sam_close(out);
+    sam_close(in);
+    if (sam_close(out) < 0) {
+        fprintf(pysam_stderr, "[bam_mating] error while closing output file\n");
+        res = 1;
+    }
+
      sam_global_args_free(&ga);
+    return res;
  
-    return 0;
+ fail:
+    if (in) sam_close(in);
+    if (out) sam_close(out);
+    sam_global_args_free(&ga);
+    return 1;
  }
  
  
diff --git a/samtools/bam_md.c b/samtools/bam_md.c

index 30f3243d5c9a8cf79e4ae3dca29e74445f829c5b..71206cda7da804fde382f8ffcf7c581593e3ad83 100644 (file)
--- a/samtools/bam_md.c
+++ b/samtools/bam_md.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <unistd.h>
  #include <string.h>
  #include <ctype.h>
@@ -33,6 +35,7 @@ DEALINGS IN THE SOFTWARE.  */
  #include "htslib/kstring.h"
  #include "kprobaln.h"
  #include "sam_opts.h"
+#include "samtools.h"
  
  #define USE_EQUAL 1
  #define DROP_TAG  2
@@ -349,11 +352,11 @@ int calmd_usage() {
  int bam_fillmd(int argc, char *argv[])
  {
      int c, flt_flag, tid = -2, ret, len, is_bam_out, is_uncompressed, max_nm, is_realn, capQ, baq_flag;
-    samFile *fp, *fpout = 0;
-    bam_hdr_t *header;
-    faidx_t *fai;
-    char *ref = 0, mode_w[8], *ref_file;
-    bam1_t *b;
+    samFile *fp = NULL, *fpout = NULL;
+    bam_hdr_t *header = NULL;
+    faidx_t *fai = NULL;
+    char *ref = NULL, mode_w[8], *ref_file;
+    bam1_t *b = NULL;
      sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
  
      static const struct option lopts[] = {
@@ -391,35 +394,51 @@ int bam_fillmd(int argc, char *argv[])
      if (optind + (ga.reference == NULL) >= argc)
          return calmd_usage();
      fp = sam_open_format(argv[optind], "r", &ga.in);
-    if (fp == 0) return 1;
+    if (fp == NULL) {
+        print_error_errno("calmd", "Failed to open input file '%s'", argv[optind]);
+        return 1;
+    }
  
      header = sam_hdr_read(fp);
      if (header == NULL || header->n_targets == 0) {
          fprintf(stderr, "[bam_fillmd] input SAM does not have header. Abort!\n");
-        return 1;
+        goto fail;
      }
  
      fpout = sam_open_format("-", mode_w, &ga.out);
-    sam_hdr_write(fpout, header);
+    if (fpout == NULL) {
+        print_error_errno("calmd", "Failed to open output");
+        goto fail;
+    }
+    if (sam_hdr_write(fpout, header) < 0) {
+        print_error_errno("calmd", "Failed to write sam header");
+        goto fail;
+    }
  
      ref_file = argc > optind + 1 ? argv[optind+1] : ga.reference;
      fai = fai_load(ref_file);
  
      if (!fai) {
-        perror(ref_file);
-        return 1;
+        print_error_errno("calmd", "Failed to open reference file '%s'", ref_file);
+        goto fail;
      }
  
      b = bam_init1();
+    if (!b) {
+        fprintf(stderr, "[bam_fillmd] Failed to allocate bam struct\n");
+        goto fail;
+    }
      while ((ret = sam_read1(fp, header, b)) >= 0) {
          if (b->core.tid >= 0) {
              if (tid != b->core.tid) {
                  free(ref);
                  ref = fai_fetch(fai, header->target_name[b->core.tid], &len);
                  tid = b->core.tid;
-                if (ref == 0)
+                if (ref == 0) { // FIXME: Should this always be fatal?
                      fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
                              header->target_name[tid]);
+                    if (is_realn || capQ > 10) goto fail; // Would otherwise crash
+                }
              }
              if (is_realn) bam_prob_realn_core(b, ref, len, baq_flag);
              if (capQ > 10) {
@@ -428,7 +447,14 @@ int bam_fillmd(int argc, char *argv[])
              }
              if (ref) bam_fillmd1_core(b, ref, len, flt_flag, max_nm);
          }
-        sam_write1(fpout, header, b);
+        if (sam_write1(fpout, header, b) < 0) {
+            print_error_errno("calmd", "failed to write to output file");
+            goto fail;
+        }
+    }
+    if (ret < -1) {
+        fprintf(stderr, "[bam_fillmd] Error reading input.\n");
+        goto fail;
      }
      bam_destroy1(b);
      bam_hdr_destroy(header);
@@ -436,6 +462,18 @@ int bam_fillmd(int argc, char *argv[])
      free(ref);
      fai_destroy(fai);
      sam_close(fp);
-    sam_close(fpout);
+    if (sam_close(fpout) < 0) {
+        fprintf(stderr, "[bam_fillmd] error when closing output file\n");
+        return 1;
+    }
      return 0;
+
+ fail:
+    free(ref);
+    if (b) bam_destroy1(b);
+    if (header) bam_hdr_destroy(header);
+    if (fai) fai_destroy(fai);
+    if (fp) sam_close(fp);
+    if (fpout) sam_close(fpout);
+    return 1;
  }
diff --git a/samtools/bam_md.c.pysam.c b/samtools/bam_md.c.pysam.c

index 070f9cdb9e6d56fbf623e9eeb797df17e8e28f76..d00c01d066bd83d43ca8eb33a3346c77c12f68e4 100644 (file)
--- a/samtools/bam_md.c.pysam.c
+++ b/samtools/bam_md.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <unistd.h>
  #include <string.h>
  #include <ctype.h>
@@ -35,6 +37,7 @@ DEALINGS IN THE SOFTWARE.  */
  #include "htslib/kstring.h"
  #include "kprobaln.h"
  #include "sam_opts.h"
+#include "samtools.h"
  
  #define USE_EQUAL 1
  #define DROP_TAG  2
@@ -115,7 +118,7 @@ void bam_fillmd1_core(bam1_t *b, char *ref, int ref_len, int flag, int max_nm)
          if (old_nm) old_nm_i = bam_aux2i(old_nm);
          if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
          else if (nm != old_nm_i) {
-            fprintf(pysamerr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam_get_qname(b), old_nm_i, nm);
+            fprintf(pysam_stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam_get_qname(b), old_nm_i, nm);
              bam_aux_del(b, old_nm);
              bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
          }
@@ -133,7 +136,7 @@ void bam_fillmd1_core(bam1_t *b, char *ref, int ref_len, int flag, int max_nm)
                  if (i < str->l) is_diff = 1;
              } else is_diff = 1;
              if (is_diff) {
-                fprintf(pysamerr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam_get_qname(b), old_md+1, str->s);
+                fprintf(pysam_stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam_get_qname(b), old_md+1, str->s);
                  bam_aux_del(b, old_md);
                  bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s);
              }
@@ -207,7 +210,7 @@ int bam_cap_mapQ(bam1_t *b, char *ref, int ref_len, int thres)
      if (t > thres) return -1;
      if (t < 0) t = 0;
      t = sqrt((thres - t) / thres) * thres;
-//  fprintf(pysamerr, "%s %lf %d\n", bam_get_qname(b), t, q);
+//  fprintf(pysam_stderr, "%s %lf %d\n", bam_get_qname(b), t, q);
      return (int)(t + .499);
  }
  
@@ -333,7 +336,7 @@ int bam_prob_realn(bam1_t *b, const char *ref)
  }
  
  int calmd_usage() {
-    fprintf(pysamerr,
+    fprintf(pysam_stderr,
  "Usage: samtools calmd [-eubrAES] <aln.bam> <ref.fasta>\n"
  "Options:\n"
  "  -e       change identical bases to '='\n"
@@ -344,18 +347,18 @@ int calmd_usage() {
  "  -r       compute the BQ tag (without -A) or cap baseQ by BAQ (with -A)\n"
  "  -E       extended BAQ for better sensitivity but lower specificity\n");
  
-    sam_global_opt_help(pysamerr, "-....");
+    sam_global_opt_help(pysam_stderr, "-....");
      return 1;
  }
  
  int bam_fillmd(int argc, char *argv[])
  {
      int c, flt_flag, tid = -2, ret, len, is_bam_out, is_uncompressed, max_nm, is_realn, capQ, baq_flag;
-    samFile *fp, *fpout = 0;
-    bam_hdr_t *header;
-    faidx_t *fai;
-    char *ref = 0, mode_w[8], *ref_file;
-    bam1_t *b;
+    samFile *fp = NULL, *fpout = NULL;
+    bam_hdr_t *header = NULL;
+    faidx_t *fai = NULL;
+    char *ref = NULL, mode_w[8], *ref_file;
+    bam1_t *b = NULL;
      sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
  
      static const struct option lopts[] = {
@@ -382,7 +385,7 @@ int bam_fillmd(int argc, char *argv[])
          case 'A': baq_flag |= 1; break;
          case 'E': baq_flag |= 2; break;
          default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
-            fprintf(pysamerr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
+            fprintf(pysam_stderr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
              /* else fall-through */
          case '?': return calmd_usage();
          }
@@ -393,35 +396,51 @@ int bam_fillmd(int argc, char *argv[])
      if (optind + (ga.reference == NULL) >= argc)
          return calmd_usage();
      fp = sam_open_format(argv[optind], "r", &ga.in);
-    if (fp == 0) return 1;
+    if (fp == NULL) {
+        print_error_errno("calmd", "Failed to open input file '%s'", argv[optind]);
+        return 1;
+    }
  
      header = sam_hdr_read(fp);
      if (header == NULL || header->n_targets == 0) {
-        fprintf(pysamerr, "[bam_fillmd] input SAM does not have header. Abort!\n");
-        return 1;
+        fprintf(pysam_stderr, "[bam_fillmd] input SAM does not have header. Abort!\n");
+        goto fail;
+    }
+    
+    fpout = sam_open_format(pysam_stdout_fn, mode_w, &ga.out);
+    if (fpout == NULL) {
+        print_error_errno("calmd", "Failed to open output");
+        goto fail;
+    }
+    if (sam_hdr_write(fpout, header) < 0) {
+        print_error_errno("calmd", "Failed to write sam header");
+        goto fail;
      }
-
-    fpout = sam_open_format("-", mode_w, &ga.out);
-    sam_hdr_write(fpout, header);
  
      ref_file = argc > optind + 1 ? argv[optind+1] : ga.reference;
      fai = fai_load(ref_file);
  
      if (!fai) {
-        perror(ref_file);
-        return 1;
+        print_error_errno("calmd", "Failed to open reference file '%s'", ref_file);
+        goto fail;
      }
  
      b = bam_init1();
+    if (!b) {
+        fprintf(pysam_stderr, "[bam_fillmd] Failed to allocate bam struct\n");
+        goto fail;
+    }
      while ((ret = sam_read1(fp, header, b)) >= 0) {
          if (b->core.tid >= 0) {
              if (tid != b->core.tid) {
                  free(ref);
                  ref = fai_fetch(fai, header->target_name[b->core.tid], &len);
                  tid = b->core.tid;
-                if (ref == 0)
-                    fprintf(pysamerr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
+                if (ref == 0) { // FIXME: Should this always be fatal?
+                    fprintf(pysam_stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
                              header->target_name[tid]);
+                    if (is_realn || capQ > 10) goto fail; // Would otherwise crash
+                }
              }
              if (is_realn) bam_prob_realn_core(b, ref, len, baq_flag);
              if (capQ > 10) {
@@ -430,7 +449,14 @@ int bam_fillmd(int argc, char *argv[])
              }
              if (ref) bam_fillmd1_core(b, ref, len, flt_flag, max_nm);
          }
-        sam_write1(fpout, header, b);
+        if (sam_write1(fpout, header, b) < 0) {
+            print_error_errno("calmd", "failed to write to output file");
+            goto fail;
+        }
+    }
+    if (ret < -1) {
+        fprintf(pysam_stderr, "[bam_fillmd] Error reading input.\n");
+        goto fail;
      }
      bam_destroy1(b);
      bam_hdr_destroy(header);
@@ -438,6 +464,18 @@ int bam_fillmd(int argc, char *argv[])
      free(ref);
      fai_destroy(fai);
      sam_close(fp);
-    sam_close(fpout);
+    if (sam_close(fpout) < 0) {
+        fprintf(pysam_stderr, "[bam_fillmd] error when closing output file\n");
+        return 1;
+    }
      return 0;
+
+ fail:
+    free(ref);
+    if (b) bam_destroy1(b);
+    if (header) bam_hdr_destroy(header);
+    if (fai) fai_destroy(fai);
+    if (fp) sam_close(fp);
+    if (fpout) sam_close(fpout);
+    return 1;
  }
diff --git a/samtools/bam_plbuf.c b/samtools/bam_plbuf.c

index a579b77cd12c1fb95ed7783463c8dc0ae756feb0..12ea25037274227b86e1810d9c92661d90495a11 100644 (file)
--- a/samtools/bam_plbuf.c
+++ b/samtools/bam_plbuf.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <ctype.h>
diff --git a/samtools/bam_plbuf.c.pysam.c b/samtools/bam_plbuf.c.pysam.c

index 5b8dda04be87a7ec9f156a938802a9ad6461fa75..76c1ac1813956186c2020a250f423c062ad7b06b 100644 (file)
--- a/samtools/bam_plbuf.c.pysam.c
+++ b/samtools/bam_plbuf.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <ctype.h>
diff --git a/samtools/bam_plcmd.c b/samtools/bam_plcmd.c

index 9e008368c23e021f30b825367d37fefe9f1b4481..dc12bf3a034da8f7325b4432bba229cb8c197c4f 100644 (file)
--- a/samtools/bam_plcmd.c
+++ b/samtools/bam_plcmd.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <math.h>
  #include <stdio.h>
  #include <stdlib.h>
@@ -785,7 +787,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
  "  -b, --bam-list FILE     list of input BAM filenames, one per line\n"
  "  -B, --no-BAQ            disable BAQ (per-Base Alignment Quality)\n"
  "  -C, --adjust-MQ INT     adjust mapping quality; recommended:50, disable:0 [0]\n"
-"  -d, --max-depth INT     max per-BAM depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
+"  -d, --max-depth INT     max per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
      fprintf(fp,
  "  -E, --redo-BAQ          recalculate BAQ on the fly, ignore existing BQs\n"
  "  -f, --fasta-ref FILE    faidx indexed reference sequence file\n"
@@ -826,7 +828,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
  "  -h, --tandem-qual INT   coefficient for homopolymer errors [%d]\n", mplp->tandemQ);
      fprintf(fp,
  "  -I, --skip-indels       do not perform indel calling\n"
-"  -L, --max-idepth INT    maximum per-sample depth for INDEL calling [%d]\n", mplp->max_indel_depth);
+"  -L, --max-idepth INT    maximum per-file depth for INDEL calling [%d]\n", mplp->max_indel_depth);
      fprintf(fp,
  "  -m, --min-ireads INT    minimum number gapped reads for indel candidates [%d]\n", mplp->min_support);
      fprintf(fp,
diff --git a/samtools/bam_plcmd.c.pysam.c b/samtools/bam_plcmd.c.pysam.c

index bafbb928e7dd5ce55034b1df7de4f59fa6e00d9e..650e818969363baf78309f5e1f28481a0f8b3938 100644 (file)
--- a/samtools/bam_plcmd.c.pysam.c
+++ b/samtools/bam_plcmd.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <math.h>
  #include <stdio.h>
  #include <stdlib.h>
@@ -248,7 +250,7 @@ static int mplp_func(void *data, bam1_t *b)
          if (ma->conf->fai && b->core.tid >= 0) {
              has_ref = mplp_get_ref(ma, b->core.tid, &ref, &ref_len);
              if (has_ref && ref_len <= b->core.pos) { // exclude reads outside of the reference sequence
-                fprintf(pysamerr,"[%s] Skipping because %d is outside of %d [ref:%d]\n",
+                fprintf(pysam_stderr,"[%s] Skipping because %d is outside of %d [ref:%d]\n",
                          __func__, b->core.pos, ref_len, b->core.tid);
                  skip = 1;
                  continue;
@@ -285,7 +287,7 @@ static void group_smpl(mplp_pileup_t *m, bam_sample_t *sm, kstring_t *buf,
              if (id < 0) id = bam_smpl_rg2smid(sm, fn[i], 0, buf);
              if (id < 0 || id >= m->n) {
                  assert(q); // otherwise a bug
-                fprintf(pysamerr, "[%s] Read group %s used in file %s but absent from the header or an alignment missing read group.\n", __func__, (char*)q+1, fn[i]);
+                fprintf(pysam_stderr, "[%s] Read group %s used in file %s but absent from the header or an alignment missing read group.\n", __func__, (char*)q+1, fn[i]);
                  exit(EXIT_FAILURE);
              }
              if (m->n_plp[id] == m->m_plp[id]) {
@@ -336,7 +338,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
      sm = bam_smpl_init();
  
      if (n == 0) {
-        fprintf(pysamerr,"[%s] no input file/data given\n", __func__);
+        fprintf(pysam_stderr,"[%s] no input file/data given\n", __func__);
          exit(EXIT_FAILURE);
      }
  
@@ -347,15 +349,15 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
          data[i]->fp = sam_open_format(fn[i], "rb", &conf->ga.in);
          if ( !data[i]->fp )
          {
-            fprintf(pysamerr, "[%s] failed to open %s: %s\n", __func__, fn[i], strerror(errno));
+            fprintf(pysam_stderr, "[%s] failed to open %s: %s\n", __func__, fn[i], strerror(errno));
              exit(EXIT_FAILURE);
          }
          if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
-            fprintf(pysamerr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+            fprintf(pysam_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
              exit(EXIT_FAILURE);
          }
          if (conf->fai_fname && hts_set_fai_filename(data[i]->fp, conf->fai_fname) != 0) {
-            fprintf(pysamerr, "[%s] failed to process %s: %s\n",
+            fprintf(pysam_stderr, "[%s] failed to process %s: %s\n",
                      __func__, conf->fai_fname, strerror(errno));
              exit(EXIT_FAILURE);
          }
@@ -363,7 +365,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
          data[i]->ref = &mp_ref;
          h_tmp = sam_hdr_read(data[i]->fp);
          if ( !h_tmp ) {
-            fprintf(pysamerr,"[%s] fail to read the header of %s\n", __func__, fn[i]);
+            fprintf(pysam_stderr,"[%s] fail to read the header of %s\n", __func__, fn[i]);
              exit(EXIT_FAILURE);
          }
          bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : h_tmp->text);
@@ -372,11 +374,11 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
          if (conf->reg) {
              hts_idx_t *idx = sam_index_load(data[i]->fp, fn[i]);
              if (idx == NULL) {
-                fprintf(pysamerr, "[%s] fail to load index for %s\n", __func__, fn[i]);
+                fprintf(pysam_stderr, "[%s] fail to load index for %s\n", __func__, fn[i]);
                  exit(EXIT_FAILURE);
              }
              if ( (data[i]->iter=sam_itr_querys(idx, h_tmp, conf->reg)) == 0) {
-                fprintf(pysamerr, "[E::%s] fail to parse region '%s' with %s\n", __func__, conf->reg, fn[i]);
+                fprintf(pysam_stderr, "[E::%s] fail to parse region '%s' with %s\n", __func__, conf->reg, fn[i]);
                  exit(EXIT_FAILURE);
              }
              if (i == 0) beg0 = data[i]->iter->beg, end0 = data[i]->iter->end;
@@ -401,7 +403,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
      gplp.m_plp = calloc(sm->n, sizeof(int));
      gplp.plp = calloc(sm->n, sizeof(bam_pileup1_t*));
  
-    fprintf(pysamerr, "[%s] %d samples in %d input files\n", __func__, sm->n, n);
+    fprintf(pysam_stderr, "[%s] %d samples in %d input files\n", __func__, sm->n, n);
      // write the VCF header
      if (conf->flag & MPLP_BCF)
      {
@@ -413,7 +415,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
  
          bcf_fp = bcf_open(conf->output_fname? conf->output_fname : "-", mode);
          if (bcf_fp == NULL) {
-            fprintf(pysamerr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
+            fprintf(pysam_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
              exit(EXIT_FAILURE);
          }
  
@@ -529,10 +531,10 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
          }
      }
      else {
-        pileup_fp = conf->output_fname? fopen(conf->output_fname, "w") : stdout;
+        pileup_fp = conf->output_fname? fopen(conf->output_fname, "w") : pysam_stdout;
  
          if (pileup_fp == NULL) {
-            fprintf(pysamerr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname, strerror(errno));
+            fprintf(pysam_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname, strerror(errno));
              exit(EXIT_FAILURE);
          }
      }
@@ -542,10 +544,10 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
      if ( conf->flag & MPLP_SMART_OVERLAPS ) bam_mplp_init_overlaps(iter);
      max_depth = conf->max_depth;
      if (max_depth * sm->n > 1<<20)
-        fprintf(pysamerr, "(%s) Max depth is above 1M. Potential memory hog!\n", __func__);
+        fprintf(pysam_stderr, "(%s) Max depth is above 1M. Potential memory hog!\n", __func__);
      if (max_depth * sm->n < 8000) {
          max_depth = 8000 / sm->n;
-        fprintf(pysamerr, "<%s> Set max per-file depth to %d\n", __func__, max_depth);
+        fprintf(pysam_stderr, "<%s> Set max per-file depth to %d\n", __func__, max_depth);
      }
      max_indel_depth = conf->max_indel_depth * sm->n;
      bam_mplp_set_maxcnt(iter, max_depth);
@@ -639,7 +641,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                              if ( c < conf->min_baseQ ) continue;
  
                              if (last++) putc(',', pileup_fp);
-                            fprintf(pileup_fp, "%d", plp[i][j].qpos + 1); // FIXME: printf() is very slow...
+                            fprintf(pileup_fp, "%d", plp[i][j].qpos + 1); // FIXME: fprintf(pysam_stdout, ) is very slow...
                          }
                      }
                  }
@@ -695,7 +697,7 @@ int read_file_list(const char *file_list,int *n,char **argv[])
      FILE *fh = fopen(file_list,"r");
      if ( !fh )
      {
-        fprintf(pysamerr,"%s: %s\n", file_list,strerror(errno));
+        fprintf(pysam_stderr,"%s: %s\n", file_list,strerror(errno));
          return 1;
      }
  
@@ -717,9 +719,9 @@ int read_file_list(const char *file_list,int *n,char **argv[])
              for (i=0; i<len; i++)
                  if (!isprint(buf[i])) { safe_to_print = 0; break; }
              if ( safe_to_print )
-                fprintf(pysamerr,"The file list \"%s\" appears broken, could not locate: %s\n", file_list,buf);
+                fprintf(pysam_stderr,"The file list \"%s\" appears broken, could not locate: %s\n", file_list,buf);
              else
-                fprintf(pysamerr,"Does the file \"%s\" really contain a list of files and do all exist?\n", file_list);
+                fprintf(pysam_stderr,"Does the file \"%s\" really contain a list of files and do all exist?\n", file_list);
              return 1;
          }
  
@@ -730,7 +732,7 @@ int read_file_list(const char *file_list,int *n,char **argv[])
      fclose(fh);
      if ( !nfiles )
      {
-        fprintf(pysamerr,"No files read from %s\n", file_list);
+        fprintf(pysam_stderr,"No files read from %s\n", file_list);
          return 1;
      }
      *argv = files;
@@ -746,11 +748,11 @@ int parse_format_flag(const char *str)
      for(i=0; i<n_tags; i++)
      {
          if ( !strcasecmp(tags[i],"DP") ) flag |= B2B_FMT_DP;
-        else if ( !strcasecmp(tags[i],"DV") ) { flag |= B2B_FMT_DV; fprintf(pysamerr, "[warning] tag DV functional, but deprecated. Please switch to `AD` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"DV") ) { flag |= B2B_FMT_DV; fprintf(pysam_stderr, "[warning] tag DV functional, but deprecated. Please switch to `AD` in future.\n"); }
          else if ( !strcasecmp(tags[i],"SP") ) flag |= B2B_FMT_SP;
-        else if ( !strcasecmp(tags[i],"DP4") ) { flag |= B2B_FMT_DP4; fprintf(pysamerr, "[warning] tag DP4 functional, but deprecated. Please switch to `ADF` and `ADR` in future.\n"); }
-        else if ( !strcasecmp(tags[i],"DPR") ) { flag |= B2B_FMT_DPR; fprintf(pysamerr, "[warning] tag DPR functional, but deprecated. Please switch to `AD` in future.\n"); }
-        else if ( !strcasecmp(tags[i],"INFO/DPR") ) { flag |= B2B_INFO_DPR; fprintf(pysamerr, "[warning] tag INFO/DPR functional, but deprecated. Please switch to `INFO/AD` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"DP4") ) { flag |= B2B_FMT_DP4; fprintf(pysam_stderr, "[warning] tag DP4 functional, but deprecated. Please switch to `ADF` and `ADR` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"DPR") ) { flag |= B2B_FMT_DPR; fprintf(pysam_stderr, "[warning] tag DPR functional, but deprecated. Please switch to `AD` in future.\n"); }
+        else if ( !strcasecmp(tags[i],"INFO/DPR") ) { flag |= B2B_INFO_DPR; fprintf(pysam_stderr, "[warning] tag INFO/DPR functional, but deprecated. Please switch to `INFO/AD` in future.\n"); }
          else if ( !strcasecmp(tags[i],"AD") ) flag |= B2B_FMT_AD;
          else if ( !strcasecmp(tags[i],"ADF") ) flag |= B2B_FMT_ADF;
          else if ( !strcasecmp(tags[i],"ADR") ) flag |= B2B_FMT_ADR;
@@ -759,7 +761,7 @@ int parse_format_flag(const char *str)
          else if ( !strcasecmp(tags[i],"INFO/ADR") ) flag |= B2B_INFO_ADR;
          else
          {
-            fprintf(pysamerr,"Could not parse tag \"%s\" in \"%s\"\n", tags[i], str);
+            fprintf(pysam_stderr,"Could not parse tag \"%s\" in \"%s\"\n", tags[i], str);
              exit(EXIT_FAILURE);
          }
          free(tags[i]);
@@ -787,7 +789,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
  "  -b, --bam-list FILE     list of input BAM filenames, one per line\n"
  "  -B, --no-BAQ            disable BAQ (per-Base Alignment Quality)\n"
  "  -C, --adjust-MQ INT     adjust mapping quality; recommended:50, disable:0 [0]\n"
-"  -d, --max-depth INT     max per-BAM depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
+"  -d, --max-depth INT     max per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
      fprintf(fp,
  "  -E, --redo-BAQ          recalculate BAQ on the fly, ignore existing BQs\n"
  "  -f, --fasta-ref FILE    faidx indexed reference sequence file\n"
@@ -828,7 +830,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
  "  -h, --tandem-qual INT   coefficient for homopolymer errors [%d]\n", mplp->tandemQ);
      fprintf(fp,
  "  -I, --skip-indels       do not perform indel calling\n"
-"  -L, --max-idepth INT    maximum per-sample depth for INDEL calling [%d]\n", mplp->max_indel_depth);
+"  -L, --max-idepth INT    maximum per-file depth for INDEL calling [%d]\n", mplp->max_indel_depth);
      fprintf(fp,
  "  -m, --min-ireads INT    minimum number gapped reads for indel candidates [%d]\n", mplp->min_support);
      fprintf(fp,
@@ -921,11 +923,11 @@ int bam_mpileup(int argc, char *argv[])
          case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
          case  1 :
              mplp.rflag_require = bam_str2flag(optarg);
-            if ( mplp.rflag_require<0 ) { fprintf(pysamerr,"Could not parse --rf %s\n", optarg); return 1; }
+            if ( mplp.rflag_require<0 ) { fprintf(pysam_stderr,"Could not parse --rf %s\n", optarg); return 1; }
              break;
          case  2 :
              mplp.rflag_filter = bam_str2flag(optarg);
-            if ( mplp.rflag_filter<0 ) { fprintf(pysamerr,"Could not parse --ff %s\n", optarg); return 1; }
+            if ( mplp.rflag_filter<0 ) { fprintf(pysam_stderr,"Could not parse --ff %s\n", optarg); return 1; }
              break;
          case  3 : mplp.output_fname = optarg; break;
          case  4 : mplp.openQ = atoi(optarg); break;
@@ -949,9 +951,9 @@ int bam_mpileup(int argc, char *argv[])
          case 'v': mplp.flag |= MPLP_BCF | MPLP_VCF; break;
          case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_BCF; break;
          case 'B': mplp.flag &= ~MPLP_REALN; break;
-        case 'D': mplp.fmt_flag |= B2B_FMT_DP; fprintf(pysamerr, "[warning] samtools mpileup option `-D` is functional, but deprecated. Please switch to `-t DP` in future.\n"); break;
-        case 'S': mplp.fmt_flag |= B2B_FMT_SP; fprintf(pysamerr, "[warning] samtools mpileup option `-S` is functional, but deprecated. Please switch to `-t SP` in future.\n"); break;
-        case 'V': mplp.fmt_flag |= B2B_FMT_DV; fprintf(pysamerr, "[warning] samtools mpileup option `-V` is functional, but deprecated. Please switch to `-t DV` in future.\n"); break;
+        case 'D': mplp.fmt_flag |= B2B_FMT_DP; fprintf(pysam_stderr, "[warning] samtools mpileup option `-D` is functional, but deprecated. Please switch to `-t DP` in future.\n"); break;
+        case 'S': mplp.fmt_flag |= B2B_FMT_SP; fprintf(pysam_stderr, "[warning] samtools mpileup option `-S` is functional, but deprecated. Please switch to `-t SP` in future.\n"); break;
+        case 'V': mplp.fmt_flag |= B2B_FMT_DV; fprintf(pysam_stderr, "[warning] samtools mpileup option `-V` is functional, but deprecated. Please switch to `-t DV` in future.\n"); break;
          case 'I': mplp.flag |= MPLP_NO_INDEL; break;
          case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
          case '6': mplp.flag |= MPLP_ILLUMINA13; break;
@@ -981,7 +983,7 @@ int bam_mpileup(int argc, char *argv[])
                  char buf[1024];
                  mplp.rghash = khash_str2int_init();
                  if ((fp_rg = fopen(optarg, "r")) == NULL)
-                    fprintf(pysamerr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg);
+                    fprintf(pysam_stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg);
                  while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me...
                      khash_str2int_inc(mplp.rghash, strdup(buf));
                  fclose(fp_rg);
@@ -992,7 +994,7 @@ int bam_mpileup(int argc, char *argv[])
              if (parse_sam_global_opt(c, optarg, lopts, &mplp.ga) == 0) break;
              /* else fall-through */
          case '?':
-            print_usage(pysamerr, &mplp);
+            print_usage(pysam_stderr, &mplp);
              return 1;
          }
      }
@@ -1004,13 +1006,13 @@ int bam_mpileup(int argc, char *argv[])
  
      if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ )
      {
-        fprintf(pysamerr,"Error: The -B option cannot be combined with -E\n");
+        fprintf(pysam_stderr,"Error: The -B option cannot be combined with -E\n");
          return 1;
      }
      if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
      if (argc == 1)
      {
-        print_usage(pysamerr, &mplp);
+        print_usage(pysam_stderr, &mplp);
          return 1;
      }
      int ret;
diff --git a/samtools/bam_quickcheck.c b/samtools/bam_quickcheck.c

index 8d1e7ef9ad86357de41022c3e0240369861b9ce3..6c3c66452737959f7b6ccdbfd24513522c6b6c97 100644 (file)
--- a/samtools/bam_quickcheck.c
+++ b/samtools/bam_quickcheck.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <htslib/hts.h>
  #include <htslib/sam.h>
  #include <htslib/bgzf.h>
@@ -36,6 +38,21 @@ static void usage_quickcheck(FILE *write_to)
  "Options:\n"
  "  -v              verbose output (repeat for more verbosity)\n"
  "\n"
+"Notes:\n"
+"\n"
+"1. In order to use this command effectively, you should check its exit status;\n"
+"   without any -v options it will NOT print any output, even when some files\n"
+"   fail the check. One way to use quickcheck might be as a check that all\n"
+"   BAM files in a directory are okay:\n"
+"\n"
+"\tsamtools quickcheck *.bam && echo 'all ok' \\\n"
+"\t   || echo 'fail!'\n"
+"\n"
+"   To also determine which files have failed, use the -v option:\n"
+"\n"
+"\tsamtools quickcheck -v *.bam > bad_bams.fofn \\\n"
+"\t   && echo 'all ok' \\\n"
+"\t   || echo 'some files failed check, see bad_bams.fofn'\n"
      );
  }
  
@@ -121,7 +138,10 @@ int main_quickcheck(int argc, char** argv)
                  }
              }
  
-            hts_close(hts_fp);
+            if (hts_close(hts_fp) < 0) {
+                file_state |= 32;
+                if (verbose >= 2) fprintf(stderr, "%s did not close cleanly\n", fn);
+            }
          }
  
          if (file_state > 0 && verbose >= 1) {
diff --git a/samtools/bam_quickcheck.c.pysam.c b/samtools/bam_quickcheck.c.pysam.c

index b589d46c0740a472621fbefa543341b5d59cce00..26dbeb9ab153e593705c3f698671aab8e326f1f2 100644 (file)
--- a/samtools/bam_quickcheck.c.pysam.c
+++ b/samtools/bam_quickcheck.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <htslib/hts.h>
  #include <htslib/sam.h>
  #include <htslib/bgzf.h>
@@ -38,6 +40,21 @@ static void usage_quickcheck(FILE *write_to)
  "Options:\n"
  "  -v              verbose output (repeat for more verbosity)\n"
  "\n"
+"Notes:\n"
+"\n"
+"1. In order to use this command effectively, you should check its exit status;\n"
+"   without any -v options it will NOT print any output, even when some files\n"
+"   fail the check. One way to use quickcheck might be as a check that all\n"
+"   BAM files in a directory are okay:\n"
+"\n"
+"\tsamtools quickcheck *.bam && echo 'all ok' \\\n"
+"\t   || echo 'fail!'\n"
+"\n"
+"   To also determine which files have failed, use the -v option:\n"
+"\n"
+"\tsamtools quickcheck -v *.bam > bad_bams.fofn \\\n"
+"\t   && echo 'all ok' \\\n"
+"\t   || echo 'some files failed check, see bad_bams.fofn'\n"
      );
  }
  
@@ -54,7 +71,7 @@ int main_quickcheck(int argc, char** argv)
              verbose++;
              break;
          default:
-            usage_quickcheck(pysamerr);
+            usage_quickcheck(pysam_stderr);
              return 1;
          }
      }
@@ -63,12 +80,12 @@ int main_quickcheck(int argc, char** argv)
      argv += optind;
  
      if (argc < 1) {
-        usage_quickcheck(stdout);
+        usage_quickcheck(pysam_stdout);
          return 1;
      }
  
      if (verbose >= 2) {
-        fprintf(pysamerr, "verbosity set to %d\n", verbose);
+        fprintf(pysam_stderr, "verbosity set to %d\n", verbose);
      }
  
      if (verbose >= 4) {
@@ -82,52 +99,55 @@ int main_quickcheck(int argc, char** argv)
          char* fn = argv[i];
          int file_state = 0;
  
-        if (verbose >= 3) fprintf(pysamerr, "checking %s\n", fn);
+        if (verbose >= 3) fprintf(pysam_stderr, "checking %s\n", fn);
  
          // attempt to open
          htsFile *hts_fp = hts_open(fn, "r");
          if (hts_fp == NULL) {
-            if (verbose >= 2) fprintf(pysamerr, "%s could not be opened for reading\n", fn);
+            if (verbose >= 2) fprintf(pysam_stderr, "%s could not be opened for reading\n", fn);
              file_state |= 2;
          }
          else {
-            if (verbose >= 3) fprintf(pysamerr, "opened %s\n", fn);
+            if (verbose >= 3) fprintf(pysam_stderr, "opened %s\n", fn);
              // make sure we have sequence data
              const htsFormat *fmt = hts_get_format(hts_fp);
              if (fmt->category != sequence_data ) {
-                if (verbose >= 2) fprintf(pysamerr, "%s was not identified as sequence data\n", fn);
+                if (verbose >= 2) fprintf(pysam_stderr, "%s was not identified as sequence data\n", fn);
                  file_state |= 4;
              }
              else {
-                if (verbose >= 3) fprintf(pysamerr, "%s is sequence data\n", fn);
+                if (verbose >= 3) fprintf(pysam_stderr, "%s is sequence data\n", fn);
                  // check header
                  bam_hdr_t *header = sam_hdr_read(hts_fp);
                  if (header->n_targets <= 0) {
-                    if (verbose >= 2) fprintf(pysamerr, "%s had no targets in header\n", fn);
+                    if (verbose >= 2) fprintf(pysam_stderr, "%s had no targets in header\n", fn);
                      file_state |= 8;
                  }
                  else {
-                    if (verbose >= 3) fprintf(pysamerr, "%s has %d targets in header\n", fn, header->n_targets);
+                    if (verbose >= 3) fprintf(pysam_stderr, "%s has %d targets in header\n", fn, header->n_targets);
                  }
  
                  // only check EOF on BAM for now
                  // TODO implement and use hts_check_EOF() to include CRAM support
                  if (fmt->format == bam) {
                      if (bgzf_check_EOF(hts_fp->fp.bgzf) <= 0) {
-                        if (verbose >= 2) fprintf(pysamerr, "%s was missing EOF block\n", fn);
+                        if (verbose >= 2) fprintf(pysam_stderr, "%s was missing EOF block\n", fn);
                          file_state |= 16;
                      }
                      else {
-                        if (verbose >= 3) fprintf(pysamerr, "%s has good EOF block\n", fn);
+                        if (verbose >= 3) fprintf(pysam_stderr, "%s has good EOF block\n", fn);
                      }
                  }
              }
  
-            hts_close(hts_fp);
+            if (hts_close(hts_fp) < 0) {
+                file_state |= 32;
+                if (verbose >= 2) fprintf(pysam_stderr, "%s did not close cleanly\n", fn);
+            }
          }
  
          if (file_state > 0 && verbose >= 1) {
-            fprintf(stdout, "%s\n", fn);
+            fprintf(pysam_stdout, "%s\n", fn);
          }
          ret |= file_state;
      }
diff --git a/samtools/bam_reheader.c b/samtools/bam_reheader.c

index dc4380701c9caf4d7b69af548a7e83ac77652abf..0469c06715dc75125cfdb169a5ba9d69fcc9900f 100644 (file)
--- a/samtools/bam_reheader.c
+++ b/samtools/bam_reheader.c
@@ -1,7 +1,7 @@
  /*  bam_reheader.c -- reheader subcommand.
  
      Copyright (C) 2010 Broad Institute.
-    Copyright (C) 2012, 2013 Genome Research Ltd.
+    Copyright (C) 2012-2015 Genome Research Ltd.
  
      Author: Heng Li <lh3@sanger.ac.uk>
  
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <assert.h>
@@ -43,47 +45,77 @@ DEALINGS IN THE SOFTWARE.  */
  int bam_reheader(BGZF *in, bam_hdr_t *h, int fd,
                   const char *arg_list, int add_PG)
  {
-    BGZF *fp;
+    BGZF *fp = NULL;
      ssize_t len;
-    uint8_t *buf;
+    uint8_t *buf = NULL;
+    SAM_hdr *sh = NULL;
      if (in->is_write) return -1;
      buf = malloc(BUF_SIZE);
+    if (!buf) {
+        fprintf(stderr, "Out of memory\n");
+        return -1;
+    }
      if (bam_hdr_read(in) == NULL) {
          fprintf(stderr, "Couldn't read header\n");
-        free(buf);
-        return -1;
+        goto fail;
      }
      fp = bgzf_fdopen(fd, "w");
+    if (!fp) {
+        print_error_errno("reheader", "Couldn't open output file");
+        goto fail;
+    }
  
      if (add_PG) {
          // Around the houses, but it'll do until we can manipulate bam_hdr_t natively.
-        SAM_hdr *sh = sam_hdr_parse_(h->text, h->l_text);
+        sh = sam_hdr_parse_(h->text, h->l_text);
+        if (!sh)
+            goto fail;
          if (sam_hdr_add_PG(sh, "samtools",
                             "VN", samtools_version(),
                             arg_list ? "CL": NULL,
                             arg_list ? arg_list : NULL,
                             NULL) != 0)
-            return -1;
+            goto fail;
  
          free(h->text);
          h->text = strdup(sam_hdr_str(sh));
          h->l_text = sam_hdr_length(sh);
          if (!h->text)
-            return -1;
+            goto fail;
          sam_hdr_free(sh);
+        sh = NULL;
      }
  
-    bam_hdr_write(fp, h);
+    if (bam_hdr_write(fp, h) < 0) {
+        print_error_errno("reheader", "Couldn't write header");
+        goto fail;
+    }
      if (in->block_offset < in->block_length) {
-        bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
-        bgzf_flush(fp);
+        if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+        if (bgzf_flush(fp) < 0) goto write_fail;
+    }
+    while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) {
+        if (bgzf_raw_write(fp, buf, len) < 0) goto write_fail;
+    }
+    if (len < 0) {
+        fprintf(stderr, "[%s] Error reading input file\n", __func__);
+        goto fail;
      }
-    while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0)
-        bgzf_raw_write(fp, buf, len);
      free(buf);
      fp->block_offset = in->block_offset = 0;
-    bgzf_close(fp);
+    if (bgzf_close(fp) < 0) {
+        fprintf(stderr, "[%s] Error closing output file\n", __func__);
+        return -1;
+    }
      return 0;
+
+ write_fail:
+    print_error_errno("reheader", "Error writing to output file");
+ fail:
+    bgzf_close(fp);
+    free(buf);
+    sam_hdr_free(sh);
+    return -1;
  }
  
  /*
@@ -445,7 +477,7 @@ int main_reheader(int argc, char *argv[])
      { // read the header
          samFile *fph = sam_open(argv[optind], "r");
          if (fph == 0) {
-            fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[optind]);
+            print_error_errno("reheader", "fail to read the header from '%s'", argv[optind]);
              return 1;
          }
          h = sam_hdr_read(fph);
@@ -458,7 +490,7 @@ int main_reheader(int argc, char *argv[])
      }
      in = sam_open(argv[optind+1], inplace?"r+":"r");
      if (in == 0) {
-        fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[optind+1]);
+        print_error_errno("reheader", "fail to open file '%s'", argv[optind+1]);
          return 1;
      }
      if (hts_get_format(in)->format == bam) {
diff --git a/samtools/bam_reheader.c.pysam.c b/samtools/bam_reheader.c.pysam.c

index 0519137d7a4a771a2740793c83a3972e1f900de8..16990e69e74522a30e8307d235113c5739ce6fa0 100644 (file)
--- a/samtools/bam_reheader.c.pysam.c
+++ b/samtools/bam_reheader.c.pysam.c
@@ -3,7 +3,7 @@
  /*  bam_reheader.c -- reheader subcommand.
  
      Copyright (C) 2010 Broad Institute.
-    Copyright (C) 2012, 2013 Genome Research Ltd.
+    Copyright (C) 2012-2015 Genome Research Ltd.
  
      Author: Heng Li <lh3@sanger.ac.uk>
  
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <assert.h>
@@ -45,51 +47,81 @@ DEALINGS IN THE SOFTWARE.  */
  int bam_reheader(BGZF *in, bam_hdr_t *h, int fd,
                   const char *arg_list, int add_PG)
  {
-    BGZF *fp;
+    BGZF *fp = NULL;
      ssize_t len;
-    uint8_t *buf;
+    uint8_t *buf = NULL;
+    SAM_hdr *sh = NULL;
      if (in->is_write) return -1;
      buf = malloc(BUF_SIZE);
-    if (bam_hdr_read(in) == NULL) {
-        fprintf(pysamerr, "Couldn't read header\n");
-        free(buf);
+    if (!buf) {
+        fprintf(pysam_stderr, "Out of memory\n");
          return -1;
      }
+    if (bam_hdr_read(in) == NULL) {
+        fprintf(pysam_stderr, "Couldn't read header\n");
+        goto fail;
+    }
      fp = bgzf_fdopen(fd, "w");
+    if (!fp) {
+        print_error_errno("reheader", "Couldn't open output file");
+        goto fail;
+    }
  
      if (add_PG) {
          // Around the houses, but it'll do until we can manipulate bam_hdr_t natively.
-        SAM_hdr *sh = sam_hdr_parse_(h->text, h->l_text);
+        sh = sam_hdr_parse_(h->text, h->l_text);
+        if (!sh)
+            goto fail;
          if (sam_hdr_add_PG(sh, "samtools",
                             "VN", samtools_version(),
                             arg_list ? "CL": NULL,
                             arg_list ? arg_list : NULL,
                             NULL) != 0)
-            return -1;
+            goto fail;
  
          free(h->text);
          h->text = strdup(sam_hdr_str(sh));
          h->l_text = sam_hdr_length(sh);
          if (!h->text)
-            return -1;
+            goto fail;
          sam_hdr_free(sh);
+        sh = NULL;
      }
  
-    bam_hdr_write(fp, h);
+    if (bam_hdr_write(fp, h) < 0) {
+        print_error_errno("reheader", "Couldn't write header");
+        goto fail;
+    }
      if (in->block_offset < in->block_length) {
-        bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
-        bgzf_flush(fp);
+        if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+        if (bgzf_flush(fp) < 0) goto write_fail;
+    }
+    while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) {
+        if (bgzf_raw_write(fp, buf, len) < 0) goto write_fail;
+    }
+    if (len < 0) {
+        fprintf(pysam_stderr, "[%s] Error reading input file\n", __func__);
+        goto fail;
      }
-    while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0)
-        bgzf_raw_write(fp, buf, len);
      free(buf);
      fp->block_offset = in->block_offset = 0;
-    bgzf_close(fp);
+    if (bgzf_close(fp) < 0) {
+        fprintf(pysam_stderr, "[%s] Error closing output file\n", __func__);
+        return -1;
+    }
      return 0;
+
+ write_fail:
+    print_error_errno("reheader", "Error writing to output file");
+ fail:
+    bgzf_close(fp);
+    free(buf);
+    sam_hdr_free(sh);
+    return -1;
  }
  
  /*
- * Reads a file and outputs a new CRAM file to stdout with 'h'
+ * Reads a file and outputs a new CRAM file to pysam_stdout with 'h'
   * replaced as the header.  No checks are made to the validity.
   *
   * FIXME: error checking
@@ -173,7 +205,7 @@ int cram_reheader_inplace2(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
  
      if (cram_major_vers(fd) < 2 ||
          cram_major_vers(fd) > 3) {
-        fprintf(pysamerr, "[%s] unsupported CRAM version %d\n", __func__,
+        fprintf(pysam_stderr, "[%s] unsupported CRAM version %d\n", __func__,
                  cram_major_vers(fd));
          goto err;
      }
@@ -206,7 +238,7 @@ int cram_reheader_inplace2(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
          goto err;
  
      if (cram_block_get_uncomp_size(b) < header_len+4) {
-        fprintf(pysamerr, "New header will not fit. Use non-inplace version (%d > %d)\n",
+        fprintf(pysam_stderr, "New header will not fit. Use non-inplace version (%d > %d)\n",
                  header_len+4, cram_block_get_uncomp_size(b));
          ret = -2;
          goto err;
@@ -269,7 +301,7 @@ int cram_reheader_inplace3(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
  
      if (cram_major_vers(fd) < 2 ||
          cram_major_vers(fd) > 3) {
-        fprintf(pysamerr, "[%s] unsupported CRAM version %d\n", __func__,
+        fprintf(pysam_stderr, "[%s] unsupported CRAM version %d\n", __func__,
                  cram_major_vers(fd));
          goto err;
      }
@@ -341,7 +373,7 @@ int cram_reheader_inplace3(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
          goto err;
  
      if (old_container_sz != container_sz) {
-        fprintf(pysamerr, "Quirk of fate makes this troublesome! "
+        fprintf(pysam_stderr, "Quirk of fate makes this troublesome! "
                  "Please use non-inplace version.\n");
          goto err;
      }
@@ -360,7 +392,7 @@ int cram_reheader_inplace3(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
          goto err;
  
      if (cram_block_size(b) > cram_container_get_length(c)) {
-        fprintf(pysamerr, "New header will not fit. Use non-inplace version"
+        fprintf(pysam_stderr, "New header will not fit. Use non-inplace version"
                  " (%d > %d)\n",
                  (int)cram_block_size(b), cram_container_get_length(c));
          ret = -2;
@@ -398,7 +430,7 @@ int cram_reheader_inplace(cram_fd *fd, const bam_hdr_t *h, const char *arg_list,
      case 2: return cram_reheader_inplace2(fd, h, arg_list, add_PG);
      case 3: return cram_reheader_inplace3(fd, h, arg_list, add_PG);
      default:
-        fprintf(pysamerr, "[%s] unsupported CRAM version %d\n", __func__,
+        fprintf(pysam_stderr, "[%s] unsupported CRAM version %d\n", __func__,
                  cram_major_vers(fd));
          return -1;
      }
@@ -412,7 +444,7 @@ static void usage(FILE *fp, int ret) {
             "Options:\n"
             "    -P, --no-PG      Do not generate an @PG header line.\n"
             "    -i, --in-place   Modify the bam/cram file directly.\n"
-           "                     (Defaults to outputting to stdout.)\n");
+           "                     (Defaults to outputting to pysam_stdout.)\n");
      exit(ret);
  }
  
@@ -431,41 +463,40 @@ int main_reheader(int argc, char *argv[])
      };
  
      while ((c = getopt_long(argc, argv, "hiP", lopts, NULL)) >= 0) {
-        fprintf(stderr, " %i %c %s\n", optind, c, argv[optind-1]);
          switch (c) {
          case 'P': add_PG = 0; break;
          case 'i': inplace = 1; break;
-        case 'h': usage(stdout, 0); break;
+        case 'h': usage(pysam_stdout, 0); break;
          default:
-            fprintf(pysamerr, "Invalid option '%c'\n", c);
-            usage(pysamerr, 1);
+            fprintf(pysam_stderr, "Invalid option '%c'\n", c);
+            usage(pysam_stderr, 1);
          }
      }
  
      if (argc - optind != 2)
-        usage(pysamerr, 1);
+        usage(pysam_stderr, 1);
  
      { // read the header
          samFile *fph = sam_open(argv[optind], "r");
          if (fph == 0) {
-            fprintf(pysamerr, "[%s] fail to read the header from %s.\n", __func__, argv[optind]);
+            print_error_errno("reheader", "fail to read the header from '%s'", argv[optind]);
              return 1;
          }
          h = sam_hdr_read(fph);
          sam_close(fph);
          if (h == NULL) {
-            fprintf(pysamerr, "[%s] failed to read the header for '%s'.\n",
+            fprintf(pysam_stderr, "[%s] failed to read the header for '%s'.\n",
                      __func__, argv[1]);
              return 1;
          }
      }
      in = sam_open(argv[optind+1], inplace?"r+":"r");
      if (in == 0) {
-        fprintf(pysamerr, "[%s] fail to open file %s.\n", __func__, argv[optind+1]);
+        print_error_errno("reheader", "fail to open file '%s'", argv[optind+1]);
          return 1;
      }
      if (hts_get_format(in)->format == bam) {
-        r = bam_reheader(in->fp.bgzf, h, fileno(stdout), arg_list, add_PG);
+        r = bam_reheader(in->fp.bgzf, h, fileno(pysam_stdout), arg_list, add_PG);
      } else {
          if (inplace)
              r = cram_reheader_inplace(in->fp.cram, h, arg_list, add_PG);
diff --git a/samtools/bam_rmdup.c b/samtools/bam_rmdup.c

index cdca87826de96eaa13bc71b8ca33de9edcbadab9..57612b40acef81089647ad0fa497ec41ff543b9d 100644 (file)
--- a/samtools/bam_rmdup.c
+++ b/samtools/bam_rmdup.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <string.h>
  #include <stdio.h>
@@ -30,6 +32,7 @@ DEALINGS IN THE SOFTWARE.  */
  #include <unistd.h>
  #include "htslib/sam.h"
  #include "sam_opts.h"
+#include "samtools.h"
  #include "bam.h" // for bam_get_library
  
  typedef bam1_t *bam1_p;
@@ -60,14 +63,24 @@ static inline void stack_insert(tmp_stack_t *stack, bam1_t *b)
      stack->a[stack->n++] = b;
  }
  
-static inline void dump_best(tmp_stack_t *stack, samFile *out, bam_hdr_t *hdr)
+static inline int dump_best(tmp_stack_t *stack, samFile *out, bam_hdr_t *hdr)
  {
      int i;
      for (i = 0; i != stack->n; ++i) {
-        sam_write1(out, hdr, stack->a[i]);
+        if (sam_write1(out, hdr, stack->a[i]) < 0) return -1;
          bam_destroy1(stack->a[i]);
+        stack->a[i] = NULL;
      }
      stack->n = 0;
+    return 0;
+}
+
+static inline void clear_stack(tmp_stack_t *stack) {
+    int i;
+    if (!stack->a) return;
+    for (i = 0; i != stack->n; ++i) {
+        bam_destroy1(stack->a[i]);
+    }
  }
  
  static void clear_del_set(khash_t(name) *del_set)
@@ -114,25 +127,29 @@ static inline int sum_qual(const bam1_t *b)
      return q;
  }
  
-void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
+int bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
  {
-    bam1_t *b;
-    int last_tid = -1, last_pos = -1;
+    bam1_t *b = NULL;
+    int last_tid = -1, last_pos = -1, r;
      tmp_stack_t stack;
      khint_t k;
-    khash_t(lib) *aux;
-    khash_t(name) *del_set;
+    khash_t(lib) *aux = NULL;
+    khash_t(name) *del_set = NULL;
  
+    memset(&stack, 0, sizeof(tmp_stack_t));
      aux = kh_init(lib);
      del_set = kh_init(name);
      b = bam_init1();
-    memset(&stack, 0, sizeof(tmp_stack_t));
+    if (!aux || !del_set || !b) {
+        perror(__func__);
+        goto fail;
+    }
  
      kh_resize(name, del_set, 4 * BUFFER_SIZE);
-    while (sam_read1(in, hdr, b) >= 0) {
+    while ((r = sam_read1(in, hdr, b)) >= 0) {
          bam1_core_t *c = &b->core;
          if (c->tid != last_tid || last_pos != c->pos) {
-            dump_best(&stack, out, hdr); // write the result
+            if (dump_best(&stack, out, hdr) < 0) goto write_fail; // write the result
              clear_best(aux, BUFFER_SIZE);
              if (c->tid != last_tid) {
                  clear_best(aux, 0);
@@ -141,8 +158,10 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
                      clear_del_set(del_set);
                  }
                  if ((int)c->tid == -1) { // append unmapped reads
-                    sam_write1(out, hdr, b);
-                    while (sam_read1(in, hdr, b) >= 0) sam_write1(out, hdr, b);
+                    if (sam_write1(out, hdr, b) < 0) goto write_fail;
+                    while ((r = sam_read1(in, hdr, b)) >= 0) {
+                        if (sam_write1(out, hdr, b) < 0) goto write_fail;
+                    }
                      break;
                  }
                  last_tid = c->tid;
@@ -150,7 +169,7 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
              }
          }
          if (!(c->flag&BAM_FPAIRED) || (c->flag&(BAM_FUNMAP|BAM_FMUNMAP)) || (c->mtid >= 0 && c->tid != c->mtid)) {
-            sam_write1(out, hdr, b);
+            if (sam_write1(out, hdr, b) < 0) goto write_fail;
          } else if (c->isize > 0) { // paired, head
              uint64_t key = (uint64_t)c->pos<<32 | c->isize;
              const char *lib;
@@ -178,19 +197,26 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
              if (k != kh_end(del_set)) {
                  free((char*)kh_key(del_set, k));
                  kh_del(name, del_set, k);
-            } else sam_write1(out, hdr, b);
+            } else {
+                if (sam_write1(out, hdr, b) < 0) goto write_fail;
+            }
          }
          last_pos = c->pos;
      }
+    if (r < -1) {
+        fprintf(stderr, "[%s] failed to read input file\n", __func__);
+        goto fail;
+    }
  
      for (k = kh_begin(aux); k != kh_end(aux); ++k) {
          if (kh_exist(aux, k)) {
              lib_aux_t *q = &kh_val(aux, k);
-            dump_best(&stack, out, hdr);
+            if (dump_best(&stack, out, hdr) < 0) goto write_fail;
              fprintf(stderr, "[bam_rmdup_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
                      (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
              kh_destroy(pos, q->best_hash);
              free((char*)kh_key(aux, k));
+            kh_del(lib, aux, k);
          }
      }
      kh_destroy(lib, aux);
@@ -199,9 +225,32 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
      kh_destroy(name, del_set);
      free(stack.a);
      bam_destroy1(b);
+    return 0;
+
+ write_fail:
+    print_error_errno("rmdup", "failed to write record");
+ fail:
+    clear_stack(&stack);
+    free(stack.a);
+    if (aux) {
+        for (k = kh_begin(aux); k != kh_end(aux); ++k) {
+            if (kh_exist(aux, k)) {
+                lib_aux_t *q = &kh_val(aux, k);
+                kh_destroy(pos, q->best_hash);
+                free((char*)kh_key(aux, k));
+            }
+        }
+        kh_destroy(lib, aux);
+    }
+    if (del_set) {
+        clear_del_set(del_set);
+        kh_destroy(name, del_set);
+    }
+    bam_destroy1(b);
+    return 1;
  }
  
-void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se);
+int bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se);
  
  static int rmdup_usage(void) {
      fprintf(stderr, "\n");
@@ -215,7 +264,7 @@ static int rmdup_usage(void) {
  
  int bam_rmdup(int argc, char *argv[])
  {
-    int c, is_se = 0, force_se = 0;
+    int c, ret, is_se = 0, force_se = 0;
      samFile *in, *out;
      bam_hdr_t *header;
      char wmode[3] = {'w', 'b', 0};
@@ -239,6 +288,10 @@ int bam_rmdup(int argc, char *argv[])
          return rmdup_usage();
  
      in = sam_open_format(argv[optind], "r", &ga.in);
+    if (!in) {
+        print_error_errno("rmdup", "failed to open \"%s\" for input", argv[optind]);
+        return 1;
+    }
      header = sam_hdr_read(in);
      if (header == NULL || header->n_targets == 0) {
          fprintf(stderr, "[bam_rmdup] input SAM does not have header. Abort!\n");
@@ -247,15 +300,23 @@ int bam_rmdup(int argc, char *argv[])
  
      sam_open_mode(wmode+1, argv[optind+1], NULL);
      out = sam_open_format(argv[optind+1], wmode, &ga.out);
-    if (in == 0 || out == 0) {
-        fprintf(stderr, "[bam_rmdup] fail to read/write input files\n");
+    if (!out) {
+        print_error_errno("rmdup", "failed to open \"%s\" for output", argv[optind+1]);
+        return 1;
+    }
+    if (sam_hdr_write(out, header) < 0) {
+        print_error_errno("rmdup", "failed to write header");
          return 1;
      }
-    sam_hdr_write(out, header);
  
-    if (is_se) bam_rmdupse_core(in, header, out, force_se);
-    else bam_rmdup_core(in, header, out);
+    if (is_se) ret = bam_rmdupse_core(in, header, out, force_se);
+    else ret = bam_rmdup_core(in, header, out);
+
      bam_hdr_destroy(header);
-    sam_close(in); sam_close(out);
-    return 0;
+    sam_close(in);
+    if (sam_close(out) < 0) {
+        fprintf(stderr, "[bam_rmdup] error closing output file\n");
+        ret = 1;
+    }
+    return ret;
  }
diff --git a/samtools/bam_rmdup.c.pysam.c b/samtools/bam_rmdup.c.pysam.c

index 4ece6f2ba8cb0517a98a2450f1bba9c1deb2413a..3c160259ec67b36e0c7157d11f50792121aa2e00 100644 (file)
--- a/samtools/bam_rmdup.c.pysam.c
+++ b/samtools/bam_rmdup.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <string.h>
  #include <stdio.h>
@@ -32,6 +34,7 @@ DEALINGS IN THE SOFTWARE.  */
  #include <unistd.h>
  #include "htslib/sam.h"
  #include "sam_opts.h"
+#include "samtools.h"
  #include "bam.h" // for bam_get_library
  
  typedef bam1_t *bam1_p;
@@ -62,14 +65,24 @@ static inline void stack_insert(tmp_stack_t *stack, bam1_t *b)
      stack->a[stack->n++] = b;
  }
  
-static inline void dump_best(tmp_stack_t *stack, samFile *out, bam_hdr_t *hdr)
+static inline int dump_best(tmp_stack_t *stack, samFile *out, bam_hdr_t *hdr)
  {
      int i;
      for (i = 0; i != stack->n; ++i) {
-        sam_write1(out, hdr, stack->a[i]);
+        if (sam_write1(out, hdr, stack->a[i]) < 0) return -1;
          bam_destroy1(stack->a[i]);
+        stack->a[i] = NULL;
      }
      stack->n = 0;
+    return 0;
+}
+
+static inline void clear_stack(tmp_stack_t *stack) {
+    int i;
+    if (!stack->a) return;
+    for (i = 0; i != stack->n; ++i) {
+        bam_destroy1(stack->a[i]);
+    }
  }
  
  static void clear_del_set(khash_t(name) *del_set)
@@ -116,43 +129,49 @@ static inline int sum_qual(const bam1_t *b)
      return q;
  }
  
-void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
+int bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
  {
-    bam1_t *b;
-    int last_tid = -1, last_pos = -1;
+    bam1_t *b = NULL;
+    int last_tid = -1, last_pos = -1, r;
      tmp_stack_t stack;
      khint_t k;
-    khash_t(lib) *aux;
-    khash_t(name) *del_set;
+    khash_t(lib) *aux = NULL;
+    khash_t(name) *del_set = NULL;
  
+    memset(&stack, 0, sizeof(tmp_stack_t));
      aux = kh_init(lib);
      del_set = kh_init(name);
      b = bam_init1();
-    memset(&stack, 0, sizeof(tmp_stack_t));
+    if (!aux || !del_set || !b) {
+        perror(__func__);
+        goto fail;
+    }
  
      kh_resize(name, del_set, 4 * BUFFER_SIZE);
-    while (sam_read1(in, hdr, b) >= 0) {
+    while ((r = sam_read1(in, hdr, b)) >= 0) {
          bam1_core_t *c = &b->core;
          if (c->tid != last_tid || last_pos != c->pos) {
-            dump_best(&stack, out, hdr); // write the result
+            if (dump_best(&stack, out, hdr) < 0) goto write_fail; // write the result
              clear_best(aux, BUFFER_SIZE);
              if (c->tid != last_tid) {
                  clear_best(aux, 0);
                  if (kh_size(del_set)) { // check
-                    fprintf(pysamerr, "[bam_rmdup_core] %llu unmatched pairs\n", (long long)kh_size(del_set));
+                    fprintf(pysam_stderr, "[bam_rmdup_core] %llu unmatched pairs\n", (long long)kh_size(del_set));
                      clear_del_set(del_set);
                  }
                  if ((int)c->tid == -1) { // append unmapped reads
-                    sam_write1(out, hdr, b);
-                    while (sam_read1(in, hdr, b) >= 0) sam_write1(out, hdr, b);
+                    if (sam_write1(out, hdr, b) < 0) goto write_fail;
+                    while ((r = sam_read1(in, hdr, b)) >= 0) {
+                        if (sam_write1(out, hdr, b) < 0) goto write_fail;
+                    }
                      break;
                  }
                  last_tid = c->tid;
-                fprintf(pysamerr, "[bam_rmdup_core] processing reference %s...\n", hdr->target_name[c->tid]);
+                fprintf(pysam_stderr, "[bam_rmdup_core] processing reference %s...\n", hdr->target_name[c->tid]);
              }
          }
          if (!(c->flag&BAM_FPAIRED) || (c->flag&(BAM_FUNMAP|BAM_FMUNMAP)) || (c->mtid >= 0 && c->tid != c->mtid)) {
-            sam_write1(out, hdr, b);
+            if (sam_write1(out, hdr, b) < 0) goto write_fail;
          } else if (c->isize > 0) { // paired, head
              uint64_t key = (uint64_t)c->pos<<32 | c->isize;
              const char *lib;
@@ -170,7 +189,7 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
                      bam_copy1(p, b); // replaced as b
                  } else kh_put(name, del_set, strdup(bam_get_qname(b)), &ret); // b will be removed
                  if (ret == 0)
-                    fprintf(pysamerr, "[bam_rmdup_core] inconsistent BAM file for pair '%s'. Continue anyway.\n", bam_get_qname(b));
+                    fprintf(pysam_stderr, "[bam_rmdup_core] inconsistent BAM file for pair '%s'. Continue anyway.\n", bam_get_qname(b));
              } else { // not found in best_hash
                  kh_val(q->best_hash, k) = bam_dup1(b);
                  stack_insert(&stack, kh_val(q->best_hash, k));
@@ -180,19 +199,26 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
              if (k != kh_end(del_set)) {
                  free((char*)kh_key(del_set, k));
                  kh_del(name, del_set, k);
-            } else sam_write1(out, hdr, b);
+            } else {
+                if (sam_write1(out, hdr, b) < 0) goto write_fail;
+            }
          }
          last_pos = c->pos;
      }
+    if (r < -1) {
+        fprintf(pysam_stderr, "[%s] failed to read input file\n", __func__);
+        goto fail;
+    }
  
      for (k = kh_begin(aux); k != kh_end(aux); ++k) {
          if (kh_exist(aux, k)) {
              lib_aux_t *q = &kh_val(aux, k);
-            dump_best(&stack, out, hdr);
-            fprintf(pysamerr, "[bam_rmdup_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
+            if (dump_best(&stack, out, hdr) < 0) goto write_fail;
+            fprintf(pysam_stderr, "[bam_rmdup_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
                      (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
              kh_destroy(pos, q->best_hash);
              free((char*)kh_key(aux, k));
+            kh_del(lib, aux, k);
          }
      }
      kh_destroy(lib, aux);
@@ -201,23 +227,46 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
      kh_destroy(name, del_set);
      free(stack.a);
      bam_destroy1(b);
+    return 0;
+
+ write_fail:
+    print_error_errno("rmdup", "failed to write record");
+ fail:
+    clear_stack(&stack);
+    free(stack.a);
+    if (aux) {
+        for (k = kh_begin(aux); k != kh_end(aux); ++k) {
+            if (kh_exist(aux, k)) {
+                lib_aux_t *q = &kh_val(aux, k);
+                kh_destroy(pos, q->best_hash);
+                free((char*)kh_key(aux, k));
+            }
+        }
+        kh_destroy(lib, aux);
+    }
+    if (del_set) {
+        clear_del_set(del_set);
+        kh_destroy(name, del_set);
+    }
+    bam_destroy1(b);
+    return 1;
  }
  
-void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se);
+int bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se);
  
  static int rmdup_usage(void) {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Usage:  samtools rmdup [-sS] <input.srt.bam> <output.bam>\n\n");
-    fprintf(pysamerr, "Option: -s    rmdup for SE reads\n");
-    fprintf(pysamerr, "        -S    treat PE reads as SE in rmdup (force -s)\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Usage:  samtools rmdup [-sS] <input.srt.bam> <output.bam>\n\n");
+    fprintf(pysam_stderr, "Option: -s    rmdup for SE reads\n");
+    fprintf(pysam_stderr, "        -S    treat PE reads as SE in rmdup (force -s)\n");
  
-    sam_global_opt_help(pysamerr, "-....");
+    sam_global_opt_help(pysam_stderr, "-....");
      return 1;
  }
  
  int bam_rmdup(int argc, char *argv[])
  {
-    int c, is_se = 0, force_se = 0;
+    int c, ret, is_se = 0, force_se = 0;
      samFile *in, *out;
      bam_hdr_t *header;
      char wmode[3] = {'w', 'b', 0};
@@ -241,23 +290,35 @@ int bam_rmdup(int argc, char *argv[])
          return rmdup_usage();
  
      in = sam_open_format(argv[optind], "r", &ga.in);
+    if (!in) {
+        print_error_errno("rmdup", "failed to open \"%s\" for input", argv[optind]);
+        return 1;
+    }
      header = sam_hdr_read(in);
      if (header == NULL || header->n_targets == 0) {
-        fprintf(pysamerr, "[bam_rmdup] input SAM does not have header. Abort!\n");
+        fprintf(pysam_stderr, "[bam_rmdup] input SAM does not have header. Abort!\n");
          return 1;
      }
  
      sam_open_mode(wmode+1, argv[optind+1], NULL);
      out = sam_open_format(argv[optind+1], wmode, &ga.out);
-    if (in == 0 || out == 0) {
-        fprintf(pysamerr, "[bam_rmdup] fail to read/write input files\n");
+    if (!out) {
+        print_error_errno("rmdup", "failed to open \"%s\" for output", argv[optind+1]);
+        return 1;
+    }
+    if (sam_hdr_write(out, header) < 0) {
+        print_error_errno("rmdup", "failed to write header");
          return 1;
      }
-    sam_hdr_write(out, header);
  
-    if (is_se) bam_rmdupse_core(in, header, out, force_se);
-    else bam_rmdup_core(in, header, out);
+    if (is_se) ret = bam_rmdupse_core(in, header, out, force_se);
+    else ret = bam_rmdup_core(in, header, out);
+
      bam_hdr_destroy(header);
-    sam_close(in); sam_close(out);
-    return 0;
+    sam_close(in);
+    if (sam_close(out) < 0) {
+        fprintf(pysam_stderr, "[bam_rmdup] error closing output file\n");
+        ret = 1;
+    }
+    return ret;
  }
diff --git a/samtools/bam_rmdupse.c b/samtools/bam_rmdupse.c

index d17f6f5dd2d646ce1625602cfe9ab1ab680b2385..f6baef0ada17dd73f29f89baf0649d063cbf10d1 100644 (file)
--- a/samtools/bam_rmdupse.c
+++ b/samtools/bam_rmdupse.c
@@ -23,12 +23,15 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <math.h>
  #include <stdio.h>
  #include "bam.h" // for bam_get_library
  #include "htslib/sam.h"
  #include "htslib/khash.h"
  #include "htslib/klist.h"
+#include "samtools.h"
  
  #define QUEUE_CLEAR_SIZE 0x100000
  #define MAX_POS 0x7fffffff
@@ -93,8 +96,8 @@ static void clear_besthash(besthash_t *h, int32_t pos)
              kh_del(best, h, k);
  }
  
-static void dump_alignment(samFile *out, bam_hdr_t *hdr,
-                           queue_t *queue, int32_t pos, khash_t(lib) *h)
+static int dump_alignment(samFile *out, bam_hdr_t *hdr,
+                          queue_t *queue, int32_t pos, khash_t(lib) *h)
  {
      if (queue->size > QUEUE_CLEAR_SIZE || pos == MAX_POS) {
          khint_t k;
@@ -108,7 +111,7 @@ static void dump_alignment(samFile *out, bam_hdr_t *hdr,
                  continue;
              }
              if ((q->b->core.flag&BAM_FREVERSE) && q->endpos > pos) break;
-            sam_write1(out, hdr, q->b);
+            if (sam_write1(out, hdr, q->b) < 0) return -1;
              q->b->l_data = 0;
              kl_shift(q, queue, 0);
          }
@@ -119,28 +122,40 @@ static void dump_alignment(samFile *out, bam_hdr_t *hdr,
              }
          }
      }
+    return 0;
  }
  
-void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
+int bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
  {
-    bam1_t *b;
-    queue_t *queue;
+    bam1_t *b = NULL;
+    queue_t *queue = NULL;
      khint_t k;
-    int last_tid = -2;
-    khash_t(lib) *aux;
+    int last_tid = -2, r;
+    khash_t(lib) *aux = NULL;
  
      aux = kh_init(lib);
      b = bam_init1();
      queue = kl_init(q);
-    while (sam_read1(in, hdr, b) >= 0) {
+    if (!aux || !b || !queue) {
+        perror(__func__);
+        goto fail;
+    }
+
+    while ((r = sam_read1(in, hdr, b)) >= 0) {
          bam1_core_t *c = &b->core;
          int endpos = bam_endpos(b);
          int score = sum_qual(b);
  
          if (last_tid != c->tid) {
-            if (last_tid >= 0) dump_alignment(out, hdr, queue, MAX_POS, aux);
+            if (last_tid >= 0) {
+                if (dump_alignment(out, hdr, queue, MAX_POS, aux) < 0)
+                    goto write_fail;
+            }
              last_tid = c->tid;
-        } else dump_alignment(out, hdr, queue, c->pos, aux);
+        } else {
+            if (dump_alignment(out, hdr, queue, c->pos, aux) < 0)
+                goto write_fail;
+        }
          if ((c->flag&BAM_FUNMAP) || ((c->flag&BAM_FPAIRED) && !force_se)) {
              push_queue(queue, b, endpos, score);
          } else {
@@ -170,7 +185,12 @@ void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
              } else kh_val(h, k) = push_queue(queue, b, endpos, score);
          }
      }
-    dump_alignment(out, hdr, queue, MAX_POS, aux);
+    if (r < -1) {
+        fprintf(stderr, "[%s] error reading input file\n", __func__);
+        goto fail;
+    }
+
+    if (dump_alignment(out, hdr, queue, MAX_POS, aux) < 0) goto write_fail;
  
      for (k = kh_begin(aux); k != kh_end(aux); ++k) {
          if (kh_exist(aux, k)) {
@@ -179,9 +199,29 @@ void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
                      (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
              kh_destroy(best, q->left); kh_destroy(best, q->rght);
              free((char*)kh_key(aux, k));
+            kh_del(lib, aux, k);
          }
      }
      kh_destroy(lib, aux);
      bam_destroy1(b);
      kl_destroy(q, queue);
+    return 0;
+
+ write_fail:
+    print_error_errno("rmdup", "failed to write record");
+ fail:
+    if (aux) {
+        for (k = kh_begin(aux); k != kh_end(aux); ++k) {
+            if (kh_exist(aux, k)) {
+                lib_aux_t *q = &kh_val(aux, k);
+                kh_destroy(best, q->left);
+                kh_destroy(best, q->rght);
+                free((char*)kh_key(aux, k));
+            }
+        }
+        kh_destroy(lib, aux);
+    }
+    bam_destroy1(b);
+    kl_destroy(q, queue);
+    return 1;
  }
diff --git a/samtools/bam_rmdupse.c.pysam.c b/samtools/bam_rmdupse.c.pysam.c

index 06895a825e1077e5f3a7dbc1f083b7325fc108aa..3a3d0d03b3be8435e93bcf5e41b477cc1fdbb405 100644 (file)
--- a/samtools/bam_rmdupse.c.pysam.c
+++ b/samtools/bam_rmdupse.c.pysam.c
@@ -25,12 +25,15 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <math.h>
  #include <stdio.h>
  #include "bam.h" // for bam_get_library
  #include "htslib/sam.h"
  #include "htslib/khash.h"
  #include "htslib/klist.h"
+#include "samtools.h"
  
  #define QUEUE_CLEAR_SIZE 0x100000
  #define MAX_POS 0x7fffffff
@@ -95,8 +98,8 @@ static void clear_besthash(besthash_t *h, int32_t pos)
              kh_del(best, h, k);
  }
  
-static void dump_alignment(samFile *out, bam_hdr_t *hdr,
-                           queue_t *queue, int32_t pos, khash_t(lib) *h)
+static int dump_alignment(samFile *out, bam_hdr_t *hdr,
+                          queue_t *queue, int32_t pos, khash_t(lib) *h)
  {
      if (queue->size > QUEUE_CLEAR_SIZE || pos == MAX_POS) {
          khint_t k;
@@ -110,7 +113,7 @@ static void dump_alignment(samFile *out, bam_hdr_t *hdr,
                  continue;
              }
              if ((q->b->core.flag&BAM_FREVERSE) && q->endpos > pos) break;
-            sam_write1(out, hdr, q->b);
+            if (sam_write1(out, hdr, q->b) < 0) return -1;
              q->b->l_data = 0;
              kl_shift(q, queue, 0);
          }
@@ -121,28 +124,40 @@ static void dump_alignment(samFile *out, bam_hdr_t *hdr,
              }
          }
      }
+    return 0;
  }
  
-void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
+int bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
  {
-    bam1_t *b;
-    queue_t *queue;
+    bam1_t *b = NULL;
+    queue_t *queue = NULL;
      khint_t k;
-    int last_tid = -2;
-    khash_t(lib) *aux;
+    int last_tid = -2, r;
+    khash_t(lib) *aux = NULL;
  
      aux = kh_init(lib);
      b = bam_init1();
      queue = kl_init(q);
-    while (sam_read1(in, hdr, b) >= 0) {
+    if (!aux || !b || !queue) {
+        perror(__func__);
+        goto fail;
+    }
+
+    while ((r = sam_read1(in, hdr, b)) >= 0) {
          bam1_core_t *c = &b->core;
          int endpos = bam_endpos(b);
          int score = sum_qual(b);
  
          if (last_tid != c->tid) {
-            if (last_tid >= 0) dump_alignment(out, hdr, queue, MAX_POS, aux);
+            if (last_tid >= 0) {
+                if (dump_alignment(out, hdr, queue, MAX_POS, aux) < 0)
+                    goto write_fail;
+            }
              last_tid = c->tid;
-        } else dump_alignment(out, hdr, queue, c->pos, aux);
+        } else {
+            if (dump_alignment(out, hdr, queue, c->pos, aux) < 0)
+                goto write_fail;
+        }
          if ((c->flag&BAM_FUNMAP) || ((c->flag&BAM_FPAIRED) && !force_se)) {
              push_queue(queue, b, endpos, score);
          } else {
@@ -172,18 +187,43 @@ void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
              } else kh_val(h, k) = push_queue(queue, b, endpos, score);
          }
      }
-    dump_alignment(out, hdr, queue, MAX_POS, aux);
+    if (r < -1) {
+        fprintf(pysam_stderr, "[%s] error reading input file\n", __func__);
+        goto fail;
+    }
+
+    if (dump_alignment(out, hdr, queue, MAX_POS, aux) < 0) goto write_fail;
  
      for (k = kh_begin(aux); k != kh_end(aux); ++k) {
          if (kh_exist(aux, k)) {
              lib_aux_t *q = &kh_val(aux, k);
-            fprintf(pysamerr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
+            fprintf(pysam_stderr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
                      (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
              kh_destroy(best, q->left); kh_destroy(best, q->rght);
              free((char*)kh_key(aux, k));
+            kh_del(lib, aux, k);
          }
      }
      kh_destroy(lib, aux);
      bam_destroy1(b);
      kl_destroy(q, queue);
+    return 0;
+
+ write_fail:
+    print_error_errno("rmdup", "failed to write record");
+ fail:
+    if (aux) {
+        for (k = kh_begin(aux); k != kh_end(aux); ++k) {
+            if (kh_exist(aux, k)) {
+                lib_aux_t *q = &kh_val(aux, k);
+                kh_destroy(best, q->left);
+                kh_destroy(best, q->rght);
+                free((char*)kh_key(aux, k));
+            }
+        }
+        kh_destroy(lib, aux);
+    }
+    bam_destroy1(b);
+    kl_destroy(q, queue);
+    return 1;
  }
diff --git a/samtools/bam_sort.c b/samtools/bam_sort.c

index 7a441ae4efe584c2b654930ea1e4a61da49294ca..4955dccd06d1debf5992645b1dcc552d17231fc8 100644 (file)
--- a/samtools/bam_sort.c
+++ b/samtools/bam_sort.c
@@ -1,6 +1,6 @@
  /*  bam_sort.c -- sorting and merging.
  
-    Copyright (C) 2008-2015 Genome Research Ltd.
+    Copyright (C) 2008-2016 Genome Research Ltd.
      Portions copyright (C) 2009-2012 Broad Institute.
  
      Author: Heng Li <lh3@sanger.ac.uk>
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdbool.h>
  #include <stdlib.h>
  #include <ctype.h>
@@ -31,6 +33,7 @@ DEALINGS IN THE SOFTWARE.  */
  #include <stdio.h>
  #include <string.h>
  #include <time.h>
+#include <sys/stat.h>
  #include <unistd.h>
  #include <getopt.h>
  #include <assert.h>
@@ -404,7 +407,7 @@ static int trans_tbl_add_sq(merged_header_t* merged_hdr, bam_hdr_t *translate,
      hdr_match_t *new_sq_matches = NULL;
      char *text;
      hdr_match_t matches[2];
-    int32_t i, missing;
+    int32_t i;
      int32_t old_n_targets = merged_hdr->n_targets;
      khiter_t iter;
      int min_tid = -1;
@@ -502,20 +505,20 @@ static int trans_tbl_add_sq(merged_header_t* merged_hdr, bam_hdr_t *translate,
          text += matches[0].rm_eo;
      }
  
-    // Check if any new targets have been missed
-    missing = 0;
+    // Copy the @SQ headers found and recreate any missing from binary header.
      for (i = 0; i < merged_hdr->n_targets - old_n_targets; i++) {
          if (new_sq_matches[i].rm_so >= 0) {
              if (match_to_ks(translate->text, &new_sq_matches[i], out_text))
                  goto memfail;
              if (kputc('\n', out_text) == EOF) goto memfail;
          } else {
-            fprintf(stderr, "[E::%s] @SQ SN (%s) found in binary header but not text header.\n",
-                    __func__, merged_hdr->target_name[i + old_n_targets]);
-            missing++;
+            if (kputs("@SQ\tSN:", out_text) == EOF ||
+                kputs(merged_hdr->target_name[i + old_n_targets], out_text) == EOF ||
+                kputs("\tLN:", out_text) == EOF ||
+                kputuw(merged_hdr->target_len[i + old_n_targets], out_text) == EOF ||
+                kputc('\n', out_text) == EOF) goto memfail;
          }
      }
-    if (missing) goto fail;
  
      free(new_sq_matches);
      return 0;
@@ -775,7 +778,7 @@ static int finish_rg_pg(bool is_rg, klist_t(hdrln) *hdr_lines,
  
  static int trans_tbl_init(merged_header_t* merged_hdr, bam_hdr_t* translate,
                            trans_tbl_t* tbl, bool merge_rg, bool merge_pg,
-                          char* rg_override)
+                          bool copy_co, char* rg_override)
  {
      klist_t(hdrln) *rg_list = NULL;
      klist_t(hdrln) *pg_list = NULL;
@@ -817,20 +820,22 @@ static int trans_tbl_init(merged_header_t* merged_hdr, bam_hdr_t* translate,
      kl_destroy(hdrln, rg_list); rg_list = NULL;
      kl_destroy(hdrln, pg_list); pg_list = NULL;
  
-    // Just append @CO headers without translation
-    const char *line, *end_pointer;
-    for (line = translate->text; *line; line = end_pointer + 1) {
-        end_pointer = strchr(line, '\n');
-        if (strncmp(line, "@CO", 3) == 0) {
-            if (end_pointer) {
-                if (kputsn(line, end_pointer - line + 1, &merged_hdr->out_co) == EOF)
-                    goto memfail;
-            } else { // Last line with no trailing '\n'
-                if (kputs(line, &merged_hdr->out_co) == EOF) goto memfail;
-                if (kputc('\n', &merged_hdr->out_co) == EOF) goto memfail;
+    if (copy_co) {
+        // Just append @CO headers without translation
+        const char *line, *end_pointer;
+        for (line = translate->text; *line; line = end_pointer + 1) {
+            end_pointer = strchr(line, '\n');
+            if (strncmp(line, "@CO", 3) == 0) {
+                if (end_pointer) {
+                    if (kputsn(line, end_pointer - line + 1, &merged_hdr->out_co) == EOF)
+                        goto memfail;
+                } else { // Last line with no trailing '\n'
+                    if (kputs(line, &merged_hdr->out_co) == EOF) goto memfail;
+                    if (kputc('\n', &merged_hdr->out_co) == EOF) goto memfail;
+                }
              }
+            if (end_pointer == NULL) break;
          }
-        if (end_pointer == NULL) break;
      }
  
      return 0;
@@ -1036,6 +1041,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
      // Create reverse translation table for tids
      int* rtrans = (int*)malloc(sizeof(int32_t)*n*n_targets);
      const int32_t NOTID = INT32_MIN;
+    if (!rtrans) return NULL;
      memset_pattern4((void*)rtrans, &NOTID, sizeof(int32_t)*n*n_targets);
      int i;
      for (i = 0; i < n; ++i) {
@@ -1056,6 +1062,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
  #define MERGE_FORCE       8 // Overwrite output BAM if it exists
  #define MERGE_COMBINE_RG 16 // Combine RG tags frather than redefining them
  #define MERGE_COMBINE_PG 32 // Combine PG tags frather than redefining them
+#define MERGE_FIRST_CO   64 // Use only first file's @CO headers (sort cmd only)
  
  /*
   * How merging is handled
@@ -1101,8 +1108,8 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
                      const char *reg, int n_threads,
                      const htsFormat *in_fmt, const htsFormat *out_fmt)
  {
-    samFile *fpout, **fp;
-    heap1_t *heap;
+    samFile *fpout, **fp = NULL;
+    heap1_t *heap = NULL;
      bam_hdr_t *hout = NULL;
      bam_hdr_t *hin  = NULL;
      int i, j, *RG_len = NULL;
@@ -1111,6 +1118,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
      hts_itr_t **iter = NULL;
      bam_hdr_t **hdr = NULL;
      trans_tbl_t *translation_tbl = NULL;
+    int *rtrans = NULL;
      merged_header_t *merged_hdr = init_merged_header();
      if (!merged_hdr) return -1;
  
@@ -1127,20 +1135,36 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
          if (hin == NULL) {
              fprintf(stderr, "[bam_merge_core] couldn't read headers for '%s'\n",
                      headers);
-            return -1;
+            goto mem_fail;
+        }
+    } else  {
+        hout = bam_hdr_init();
+        if (!hout) {
+            fprintf(stderr, "[bam_merge_core] couldn't allocate bam header\n");
+            goto mem_fail;
          }
+        hout->text = strdup("");
+        if (!hout->text) goto mem_fail;
      }
  
      g_is_by_qname = by_qname;
      fp = (samFile**)calloc(n, sizeof(samFile*));
+    if (!fp) goto mem_fail;
      heap = (heap1_t*)calloc(n, sizeof(heap1_t));
+    if (!heap) goto mem_fail;
      iter = (hts_itr_t**)calloc(n, sizeof(hts_itr_t*));
+    if (!iter) goto mem_fail;
      hdr = (bam_hdr_t**)calloc(n, sizeof(bam_hdr_t*));
+    if (!hdr) goto mem_fail;
      translation_tbl = (trans_tbl_t*)calloc(n, sizeof(trans_tbl_t));
+    if (!translation_tbl) goto mem_fail;
      RG = (char**)calloc(n, sizeof(char*));
+    if (!RG) goto mem_fail;
+
      // prepare RG tag from file names
      if (flag & MERGE_RG) {
          RG_len = (int*)calloc(n, sizeof(int));
+        if (!RG_len) goto mem_fail;
          for (i = 0; i != n; ++i) {
              int l = strlen(fn[i]);
              const char *s = fn[i];
@@ -1149,6 +1173,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
              for (j = l - 1; j >= 0; --j) if (s[j] == '/') break;
              ++j; l -= j;
              RG[i] = (char*)calloc(l + 1, 1);
+            if (!RG[i]) goto mem_fail;
              RG_len[i] = l;
              strncpy(RG[i], s + j, l);
          }
@@ -1159,7 +1184,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
          trans_tbl_t dummy;
          int res;
          res = trans_tbl_init(merged_hdr, hin, &dummy, flag & MERGE_COMBINE_RG,
-                             flag & MERGE_COMBINE_PG, NULL);
+                             flag & MERGE_COMBINE_PG, true, NULL);
          trans_tbl_destroy(&dummy);
          if (res) return -1; // FIXME: memory leak
      }
@@ -1169,31 +1194,19 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
          bam_hdr_t *hin;
          fp[i] = sam_open_format(fn[i], "r", in_fmt);
          if (fp[i] == NULL) {
-            int j;
              fprintf(stderr, "[bam_merge_core] fail to open file %s\n", fn[i]);
-            for (j = 0; j < i; ++j) {
-                bam_hdr_destroy(hdr[i]);
-                sam_close(fp[j]);
-            }
-            free(fp); free(heap);
-            // FIXME: possible memory leak
-            return -1;
+            goto fail;
          }
          hin = sam_hdr_read(fp[i]);
          if (hin == NULL) {
              fprintf(stderr, "[bam_merge_core] failed to read header for '%s'\n",
                      fn[i]);
-            for (j = 0; j < i; ++j) {
-                bam_hdr_destroy(hdr[i]);
-                sam_close(fp[j]);
-            }
-            free(fp); free(heap);
-            // FIXME: possible memory leak
-            return -1;
+            goto fail;
          }
  
          if (trans_tbl_init(merged_hdr, hin, translation_tbl+i,
                             flag & MERGE_COMBINE_RG, flag & MERGE_COMBINE_PG,
+                           (flag & MERGE_FIRST_CO)? (i == 0) : true,
                             RG[i]))
              return -1; // FIXME: memory leak
  
@@ -1224,12 +1237,16 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
  
      // If we're only merging a specified region move our iters to start at that point
      if (reg) {
-        int* rtrans = rtrans_build(n, hout->n_targets, translation_tbl);
-
          int tid, beg, end;
-        const char *name_lim = hts_parse_reg(reg, &beg, &end);
+        const char *name_lim;
+
+        rtrans = rtrans_build(n, hout->n_targets, translation_tbl);
+        if (!rtrans) goto mem_fail;
+
+        name_lim = hts_parse_reg(reg, &beg, &end);
          if (name_lim) {
              char *name = malloc(name_lim - reg + 1);
+            if (!name) goto mem_fail;
              memcpy(name, reg, name_lim - reg);
              name[name_lim - reg] = '\0';
              tid = bam_name2id(hout, name);
@@ -1244,7 +1261,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
          if (tid < 0) {
              if (name_lim) fprintf(stderr, "[%s] Region \"%s\" specifies an unknown reference name\n", __func__, reg);
              else fprintf(stderr, "[%s] Badly formatted region: \"%s\"\n", __func__, reg);
-            return -1;
+            goto fail;
          }
          for (i = 0; i < n; ++i) {
              hts_idx_t *idx = sam_index_load(fp[i], fn[i]);
@@ -1253,7 +1270,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
              if (idx == NULL) {
                  fprintf(stderr, "[%s] failed to load index for %s.  Random alignment retrieval only works for indexed BAM or CRAM files.\n",
                          __func__, fn[i]);
-                return -1;
+                goto fail;
              }
              if (mapped_tid != INT32_MIN) {
                  iter[i] = sam_itr_queryi(idx, mapped_tid, beg, end);
@@ -1261,47 +1278,70 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
                  iter[i] = sam_itr_queryi(idx, HTS_IDX_NONE, 0, 0);
              }
              hts_idx_destroy(idx);
-            if (iter[i] == NULL) break;
+            if (iter[i] == NULL) {
+                if (mapped_tid != INT32_MIN) {
+                    fprintf(stderr,
+                            "[%s] failed to get iterator over "
+                            "{%s, %d, %d, %d}\n",
+                            __func__, fn[i], mapped_tid, beg, end);
+                } else {
+                    fprintf(stderr,
+                            "[%s] failed to get iterator over "
+                            "{%s, HTS_IDX_NONE, 0, 0}\n",
+                            __func__, fn[i]);
+                }
+                goto fail;
+            }
          }
          free(rtrans);
+        rtrans = NULL;
      } else {
          for (i = 0; i < n; ++i) {
              if (hdr[i] == NULL) {
                  iter[i] = sam_itr_queryi(NULL, HTS_IDX_REST, 0, 0);
-                if (iter[i] == NULL) break;
+                if (iter[i] == NULL) {
+                    fprintf(stderr, "[%s] failed to get iterator\n", __func__);
+                    goto fail;
+                }
              }
              else iter[i] = NULL;
          }
      }
  
-    if (i < n) {
-        fprintf(stderr, "[%s] Memory allocation failed\n", __func__);
-        return -1;
-    }
-
      // Load the first read from each file into the heap
      for (i = 0; i < n; ++i) {
          heap1_t *h = heap + i;
+        int res;
          h->i = i;
          h->b = bam_init1();
-        if ((iter[i]? sam_itr_next(fp[i], iter[i], h->b) : sam_read1(fp[i], hdr[i], h->b)) >= 0) {
+        if (!h->b) goto mem_fail;
+        res = iter[i] ? sam_itr_next(fp[i], iter[i], h->b) : sam_read1(fp[i], hdr[i], h->b);
+        if (res >= 0) {
              bam_translate(h->b, translation_tbl + i);
              h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)((int32_t)h->b->core.pos+1)<<1 | bam_is_rev(h->b);
              h->idx = idx++;
          }
-        else {
+        else if (res == -1 && (!iter[i] || iter[i]->finished)) {
              h->pos = HEAP_EMPTY;
              bam_destroy1(h->b);
              h->b = NULL;
+        } else {
+            fprintf(stderr, "[%s] failed to read first record from %s\n",
+                    __func__, fn[i]);
+            goto fail;
          }
      }
  
      // Open output file and write header
      if ((fpout = sam_open_format(out, mode, out_fmt)) == 0) {
-        fprintf(stderr, "[%s] fail to create the output file.\n", __func__);
+        fprintf(stderr, "[%s] failed to create \"%s\": %s\n", __func__, out, strerror(errno));
+        return -1;
+    }
+    if (sam_hdr_write(fpout, hout) != 0) {
+        fprintf(stderr, "[%s] failed to write header.\n", __func__);
+        sam_close(fpout);
          return -1;
      }
-    sam_hdr_write(fpout, hout);
      if (!(flag & MERGE_UNCOMP)) hts_set_threads(fpout, n_threads);
  
      // Begin the actual merge
@@ -1313,16 +1353,24 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
              if (rg) bam_aux_del(b, rg);
              bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]);
          }
-        sam_write1(fpout, hout, b);
+        if (sam_write1(fpout, hout, b) < 0) {
+            fprintf(stderr, "[%s] failed to write to output file.\n", __func__);
+            sam_close(fpout);
+            return -1;
+        }
          if ((j = (iter[heap->i]? sam_itr_next(fp[heap->i], iter[heap->i], b) : sam_read1(fp[heap->i], hdr[heap->i], b))) >= 0) {
              bam_translate(b, translation_tbl + heap->i);
              heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam_is_rev(b);
              heap->idx = idx++;
-        } else if (j == -1) {
+        } else if (j == -1 && (!iter[heap->i] || iter[heap->i]->finished)) {
              heap->pos = HEAP_EMPTY;
              bam_destroy1(heap->b);
              heap->b = NULL;
-        } else fprintf(stderr, "[bam_merge_core] '%s' is truncated. Continue anyway.\n", fn[heap->i]);
+        } else {
+            fprintf(stderr, "[bam_merge_core] error: '%s' is truncated.\n",
+                    fn[heap->i]);
+            goto fail;
+        }
          ks_heapadjust(heap, 0, n, heap);
      }
  
@@ -1340,9 +1388,39 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
      bam_hdr_destroy(hin);
      bam_hdr_destroy(hout);
      free_merged_header(merged_hdr);
-    sam_close(fpout);
      free(RG); free(translation_tbl); free(fp); free(heap); free(iter); free(hdr);
+    if (sam_close(fpout) < 0) {
+        fprintf(stderr, "[bam_merge_core] error closing output file\n");
+        return -1;
+    }
      return 0;
+
+ mem_fail:
+    fprintf(stderr, "[bam_merge_core] Out of memory\n");
+
+ fail:
+    if (flag & MERGE_RG) {
+        if (RG) {
+            for (i = 0; i != n; ++i) free(RG[i]);
+        }
+        free(RG_len);
+    }
+    for (i = 0; i < n; ++i) {
+        if (translation_tbl && translation_tbl[i].tid_trans) trans_tbl_destroy(translation_tbl + i);
+        if (iter && iter[i]) hts_itr_destroy(iter[i]);
+        if (hdr && hdr[i]) bam_hdr_destroy(hdr[i]);
+        if (fp && fp[i]) sam_close(fp[i]);
+        if (heap && heap[i].b) bam_destroy1(heap[i].b);
+    }
+    if (hout) bam_hdr_destroy(hout);
+    free(RG);
+    free(translation_tbl);
+    free(hdr);
+    free(iter);
+    free(heap);
+    free(fp);
+    free(rtrans);
+    return -1;
  }
  
  // Unused here but may be used by legacy samtools-using third-party code
@@ -1361,7 +1439,7 @@ static void merge_usage(FILE *to)
  "Usage: samtools merge [-nurlf] [-h inh.sam] [-b <bamlist.fofn>] <out.bam> <in1.bam> [<in2.bam> ... <inN.bam>]\n"
  "\n"
  "Options:\n"
-"  -n         Sort by read names\n"
+"  -n         Input files are sorted by read name\n"
  "  -r         Attach RG tag (inferred from file names)\n"
  "  -u         Uncompressed BAM output\n"
  "  -f         Overwrite the output BAM if exist\n"
@@ -1541,29 +1619,40 @@ typedef struct {
      bam1_p *buf;
      const bam_hdr_t *h;
      int index;
+    int error;
  } worker_t;
  
-static void write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_hdr_t *h, int n_threads, const htsFormat *fmt)
+// Returns 0 for success
+//        -1 for failure
+static int write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_hdr_t *h, int n_threads, const htsFormat *fmt)
  {
      size_t i;
      samFile* fp;
      fp = sam_open_format(fn, mode, fmt);
-    if (fp == NULL) return;
-    sam_hdr_write(fp, h);
+    if (fp == NULL) return -1;
+    if (sam_hdr_write(fp, h) != 0) goto fail;
      if (n_threads > 1) hts_set_threads(fp, n_threads);
-    for (i = 0; i < l; ++i)
-        sam_write1(fp, h, buf[i]);
+    for (i = 0; i < l; ++i) {
+        if (sam_write1(fp, h, buf[i]) < 0) goto fail;
+    }
+    if (sam_close(fp) < 0) return -1;
+    return 0;
+ fail:
      sam_close(fp);
+    return -1;
  }
  
  static void *worker(void *data)
  {
      worker_t *w = (worker_t*)data;
      char *name;
+    w->error = 0;
      ks_mergesort(sort, w->buf_len, w->buf, 0);
      name = (char*)calloc(strlen(w->prefix) + 20, 1);
+    if (!name) { w->error = errno; return 0; }
      sprintf(name, "%s.%.4d.bam", w->prefix, w->index);
-    write_buffer(name, "wb1", w->buf_len, w->buf, w->h, 0, NULL);
+    if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL) < 0)
+        w->error = errno;
  
  // Consider using CRAM temporary files if the final output is CRAM.
  // Typically it is comparable speed while being smaller.
@@ -1572,7 +1661,8 @@ static void *worker(void *data)
  //        {"no_ref",      CRAM_OPT_NO_REF,  {1},     NULL}
  //    };
  //    opt[0].next = &opt[1];
-//    write_buffer(name, "wc1", w->buf_len, w->buf, w->h, 0, opt);
+//    if (write_buffer(name, "wc1", w->buf_len, w->buf, w->h, 0, opt) < 0)
+//        w->error = errno;
  
      free(name);
      return 0;
@@ -1586,6 +1676,7 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
      pthread_t *tid;
      pthread_attr_t attr;
      worker_t *w;
+    int n_failed = 0;
  
      if (n_threads < 1) n_threads = 1;
      if (k < n_threads * 64) n_threads = 1; // use a single thread if we only sort a small batch of records
@@ -1603,9 +1694,15 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
          b += w[i].buf_len; rest -= w[i].buf_len;
          pthread_create(&tid[i], &attr, worker, &w[i]);
      }
-    for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
+    for (i = 0; i < n_threads; ++i) {
+        pthread_join(tid[i], 0);
+        if (w[i].error != 0) {
+            fprintf(stderr, "[bam_sort_core] failed to create temporary file \"%s.%.4d.bam\": %s\n", prefix, w[i].index, strerror(w[i].error));
+            n_failed++;
+        }
+    }
      free(tid); free(w);
-    return n_files + n_threads;
+    return (n_failed == 0)? n_files + n_threads : -1;
  }
  
  /*!
@@ -1675,6 +1772,10 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
          ++k;
          if (mem >= max_mem) {
              n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
+            if (n_files < 0) {
+                ret = -1;
+                goto err;
+            }
              mem = k = 0;
          }
      }
@@ -1687,10 +1788,18 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
      // write the final output
      if (n_files == 0) { // a single block
          ks_mergesort(sort, k, buf, 0);
-        write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt);
+        if (write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt) != 0) {
+            fprintf(stderr, "[bam_sort_core] failed to create \"%s\": %s\n", fnout, strerror(errno));
+            ret = -1;
+            goto err;
+        }
      } else { // then merge
          char **fns;
          n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
+        if (n_files == -1) {
+            ret = -1;
+            goto err;
+        }
          fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n_files);
          fns = (char**)calloc(n_files, sizeof(char*));
          for (i = 0; i < n_files; ++i) {
@@ -1698,8 +1807,8 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
              sprintf(fns[i], "%s.%.4d.bam", prefix, i);
          }
          if (bam_merge_core2(is_by_qname, fnout, modeout, NULL, n_files, fns,
-                            MERGE_COMBINE_RG|MERGE_COMBINE_PG, NULL, n_threads,
-                            in_fmt, out_fmt) < 0) {
+                            MERGE_COMBINE_RG|MERGE_COMBINE_PG|MERGE_FIRST_CO,
+                            NULL, n_threads, in_fmt, out_fmt) < 0) {
              // Propagate bam_merge_core2() failure; it has already emitted a
              // message explaining the failure, so no further message is needed.
              goto err;
@@ -1754,6 +1863,7 @@ int bam_sort(int argc, char *argv[])
      int c, nargs, is_by_qname = 0, ret, o_seen = 0, n_threads = 0, level = -1;
      char *fnout = "-", modeout[12];
      kstring_t tmpprefix = { 0, 0, NULL };
+    struct stat st;
      sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
  
      static const struct option lopts[] = {
@@ -1804,8 +1914,15 @@ int bam_sort(int argc, char *argv[])
      sam_open_mode(modeout+1, fnout, NULL);
      if (level >= 0) sprintf(strchr(modeout, '\0'), "%d", level < 9? level : 9);
  
-    if (tmpprefix.l == 0)
-        ksprintf(&tmpprefix, "%s.tmp", (nargs > 0)? argv[optind] : "STDIN");
+    if (tmpprefix.l == 0) {
+        if (strcmp(fnout, "-") != 0) ksprintf(&tmpprefix, "%s.tmp", fnout);
+        else kputc('.', &tmpprefix);
+    }
+    if (stat(tmpprefix.s, &st) == 0 && S_ISDIR(st.st_mode)) {
+        unsigned t = ((unsigned) time(NULL)) ^ ((unsigned) clock());
+        if (tmpprefix.s[tmpprefix.l-1] != '/') kputc('/', &tmpprefix);
+        ksprintf(&tmpprefix, "samtools.%d.%u.tmp", (int) getpid(), t % 10000);
+    }
  
      ret = bam_sort_core_ext(is_by_qname, (nargs > 0)? argv[optind] : "-",
                              tmpprefix.s, fnout, modeout, max_mem, n_threads,
diff --git a/samtools/bam_sort.c.pysam.c b/samtools/bam_sort.c.pysam.c

index d486beb8f5c551ecd6b345918560a831a1d630c2..b2b625d359f1b19784ebf7abdcd65831546dab09 100644 (file)
--- a/samtools/bam_sort.c.pysam.c
+++ b/samtools/bam_sort.c.pysam.c
@@ -2,7 +2,7 @@
  
  /*  bam_sort.c -- sorting and merging.
  
-    Copyright (C) 2008-2015 Genome Research Ltd.
+    Copyright (C) 2008-2016 Genome Research Ltd.
      Portions copyright (C) 2009-2012 Broad Institute.
  
      Author: Heng Li <lh3@sanger.ac.uk>
@@ -26,6 +26,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdbool.h>
  #include <stdlib.h>
  #include <ctype.h>
@@ -33,6 +35,7 @@ DEALINGS IN THE SOFTWARE.  */
  #include <stdio.h>
  #include <string.h>
  #include <time.h>
+#include <sys/stat.h>
  #include <unistd.h>
  #include <getopt.h>
  #include <assert.h>
@@ -406,7 +409,7 @@ static int trans_tbl_add_sq(merged_header_t* merged_hdr, bam_hdr_t *translate,
      hdr_match_t *new_sq_matches = NULL;
      char *text;
      hdr_match_t matches[2];
-    int32_t i, missing;
+    int32_t i;
      int32_t old_n_targets = merged_hdr->n_targets;
      khiter_t iter;
      int min_tid = -1;
@@ -483,7 +486,7 @@ static int trans_tbl_add_sq(merged_header_t* merged_hdr, bam_hdr_t *translate,
  
          if (iter == kh_end(sq_tids)) {
              // Warn about this, but it's not really fatal.
-            fprintf(pysamerr, "[W::%s] @SQ SN (%.*s) found in text header but not binary header.\n",
+            fprintf(pysam_stderr, "[W::%s] @SQ SN (%.*s) found in text header but not binary header.\n",
                      __func__,
                      (int) (matches[1].rm_eo - matches[1].rm_so),
                      text + matches[1].rm_so);
@@ -504,20 +507,20 @@ static int trans_tbl_add_sq(merged_header_t* merged_hdr, bam_hdr_t *translate,
          text += matches[0].rm_eo;
      }
  
-    // Check if any new targets have been missed
-    missing = 0;
+    // Copy the @SQ headers found and recreate any missing from binary header.
      for (i = 0; i < merged_hdr->n_targets - old_n_targets; i++) {
          if (new_sq_matches[i].rm_so >= 0) {
              if (match_to_ks(translate->text, &new_sq_matches[i], out_text))
                  goto memfail;
              if (kputc('\n', out_text) == EOF) goto memfail;
          } else {
-            fprintf(pysamerr, "[E::%s] @SQ SN (%s) found in binary header but not text header.\n",
-                    __func__, merged_hdr->target_name[i + old_n_targets]);
-            missing++;
+            if (kputs("@SQ\tSN:", out_text) == EOF ||
+                kputs(merged_hdr->target_name[i + old_n_targets], out_text) == EOF ||
+                kputs("\tLN:", out_text) == EOF ||
+                kputuw(merged_hdr->target_len[i + old_n_targets], out_text) == EOF ||
+                kputc('\n', out_text) == EOF) goto memfail;
          }
      }
-    if (missing) goto fail;
  
      free(new_sq_matches);
      return 0;
@@ -720,7 +723,7 @@ static int finish_rg_pg(bool is_rg, klist_t(hdrln) *hdr_lines,
              idx = kh_get(c2c, pg_map, id);
              if (idx == kh_end(pg_map)) {
                  // Not found, warn.
-                fprintf(pysamerr, "[W::%s] Tag %s%s not found in @PG records\n",
+                fprintf(pysam_stderr, "[W::%s] Tag %s%s not found in @PG records\n",
                          __func__, search + 1, id);
              } else {
                  // Remember new id and splice points on original string
@@ -777,7 +780,7 @@ static int finish_rg_pg(bool is_rg, klist_t(hdrln) *hdr_lines,
  
  static int trans_tbl_init(merged_header_t* merged_hdr, bam_hdr_t* translate,
                            trans_tbl_t* tbl, bool merge_rg, bool merge_pg,
-                          char* rg_override)
+                          bool copy_co, char* rg_override)
  {
      klist_t(hdrln) *rg_list = NULL;
      klist_t(hdrln) *pg_list = NULL;
@@ -819,20 +822,22 @@ static int trans_tbl_init(merged_header_t* merged_hdr, bam_hdr_t* translate,
      kl_destroy(hdrln, rg_list); rg_list = NULL;
      kl_destroy(hdrln, pg_list); pg_list = NULL;
  
-    // Just append @CO headers without translation
-    const char *line, *end_pointer;
-    for (line = translate->text; *line; line = end_pointer + 1) {
-        end_pointer = strchr(line, '\n');
-        if (strncmp(line, "@CO", 3) == 0) {
-            if (end_pointer) {
-                if (kputsn(line, end_pointer - line + 1, &merged_hdr->out_co) == EOF)
-                    goto memfail;
-            } else { // Last line with no trailing '\n'
-                if (kputs(line, &merged_hdr->out_co) == EOF) goto memfail;
-                if (kputc('\n', &merged_hdr->out_co) == EOF) goto memfail;
+    if (copy_co) {
+        // Just append @CO headers without translation
+        const char *line, *end_pointer;
+        for (line = translate->text; *line; line = end_pointer + 1) {
+            end_pointer = strchr(line, '\n');
+            if (strncmp(line, "@CO", 3) == 0) {
+                if (end_pointer) {
+                    if (kputsn(line, end_pointer - line + 1, &merged_hdr->out_co) == EOF)
+                        goto memfail;
+                } else { // Last line with no trailing '\n'
+                    if (kputs(line, &merged_hdr->out_co) == EOF) goto memfail;
+                    if (kputc('\n', &merged_hdr->out_co) == EOF) goto memfail;
+                }
              }
+            if (end_pointer == NULL) break;
          }
-        if (end_pointer == NULL) break;
      }
  
      return 0;
@@ -869,7 +874,7 @@ static bam_hdr_t * finish_merged_header(merged_header_t *merged_hdr) {
                + ks_len(&merged_hdr->out_pg)
                + ks_len(&merged_hdr->out_co));
      if (txt_sz >= INT32_MAX) {
-        fprintf(pysamerr, "[%s] Output header text too long\n", __func__);
+        fprintf(pysam_stderr, "[%s] Output header text too long\n", __func__);
          return NULL;
      }
  
@@ -986,7 +991,7 @@ static void bam_translate(bam1_t* b, trans_tbl_t* tbl)
              }
          } else {
              char *tmp = strdup(decoded_rg);
-            fprintf(pysamerr,
+            fprintf(pysam_stderr,
                      "[bam_translate] RG tag \"%s\" on read \"%s\" encountered "
                      "with no corresponding entry in header, tag lost. "
                      "Unknown tags are only reported once per input file for "
@@ -1016,7 +1021,7 @@ static void bam_translate(bam1_t* b, trans_tbl_t* tbl)
              }
          } else {
              char *tmp = strdup(decoded_pg);
-            fprintf(pysamerr,
+            fprintf(pysam_stderr,
                      "[bam_translate] PG tag \"%s\" on read \"%s\" encountered "
                      "with no corresponding entry in header, tag lost. "
                      "Unknown tags are only reported once per input file for "
@@ -1038,6 +1043,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
      // Create reverse translation table for tids
      int* rtrans = (int*)malloc(sizeof(int32_t)*n*n_targets);
      const int32_t NOTID = INT32_MIN;
+    if (!rtrans) return NULL;
      memset_pattern4((void*)rtrans, &NOTID, sizeof(int32_t)*n*n_targets);
      int i;
      for (i = 0; i < n; ++i) {
@@ -1058,6 +1064,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
  #define MERGE_FORCE       8 // Overwrite output BAM if it exists
  #define MERGE_COMBINE_RG 16 // Combine RG tags frather than redefining them
  #define MERGE_COMBINE_PG 32 // Combine PG tags frather than redefining them
+#define MERGE_FIRST_CO   64 // Use only first file's @CO headers (sort cmd only)
  
  /*
   * How merging is handled
@@ -1103,8 +1110,8 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
                      const char *reg, int n_threads,
                      const htsFormat *in_fmt, const htsFormat *out_fmt)
  {
-    samFile *fpout, **fp;
-    heap1_t *heap;
+    samFile *fpout, **fp = NULL;
+    heap1_t *heap = NULL;
      bam_hdr_t *hout = NULL;
      bam_hdr_t *hin  = NULL;
      int i, j, *RG_len = NULL;
@@ -1113,6 +1120,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
      hts_itr_t **iter = NULL;
      bam_hdr_t **hdr = NULL;
      trans_tbl_t *translation_tbl = NULL;
+    int *rtrans = NULL;
      merged_header_t *merged_hdr = init_merged_header();
      if (!merged_hdr) return -1;
  
@@ -1121,28 +1129,44 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
          samFile* fpheaders = sam_open(headers, "r");
          if (fpheaders == NULL) {
              const char *message = strerror(errno);
-            fprintf(pysamerr, "[bam_merge_core] cannot open '%s': %s\n", headers, message);
+            fprintf(pysam_stderr, "[bam_merge_core] cannot open '%s': %s\n", headers, message);
              return -1;
          }
          hin = sam_hdr_read(fpheaders);
          sam_close(fpheaders);
          if (hin == NULL) {
-            fprintf(pysamerr, "[bam_merge_core] couldn't read headers for '%s'\n",
+            fprintf(pysam_stderr, "[bam_merge_core] couldn't read headers for '%s'\n",
                      headers);
-            return -1;
+            goto mem_fail;
+        }
+    } else  {
+        hout = bam_hdr_init();
+        if (!hout) {
+            fprintf(pysam_stderr, "[bam_merge_core] couldn't allocate bam header\n");
+            goto mem_fail;
          }
+        hout->text = strdup("");
+        if (!hout->text) goto mem_fail;
      }
  
      g_is_by_qname = by_qname;
      fp = (samFile**)calloc(n, sizeof(samFile*));
+    if (!fp) goto mem_fail;
      heap = (heap1_t*)calloc(n, sizeof(heap1_t));
+    if (!heap) goto mem_fail;
      iter = (hts_itr_t**)calloc(n, sizeof(hts_itr_t*));
+    if (!iter) goto mem_fail;
      hdr = (bam_hdr_t**)calloc(n, sizeof(bam_hdr_t*));
+    if (!hdr) goto mem_fail;
      translation_tbl = (trans_tbl_t*)calloc(n, sizeof(trans_tbl_t));
+    if (!translation_tbl) goto mem_fail;
      RG = (char**)calloc(n, sizeof(char*));
+    if (!RG) goto mem_fail;
+
      // prepare RG tag from file names
      if (flag & MERGE_RG) {
          RG_len = (int*)calloc(n, sizeof(int));
+        if (!RG_len) goto mem_fail;
          for (i = 0; i != n; ++i) {
              int l = strlen(fn[i]);
              const char *s = fn[i];
@@ -1151,6 +1175,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
              for (j = l - 1; j >= 0; --j) if (s[j] == '/') break;
              ++j; l -= j;
              RG[i] = (char*)calloc(l + 1, 1);
+            if (!RG[i]) goto mem_fail;
              RG_len[i] = l;
              strncpy(RG[i], s + j, l);
          }
@@ -1161,7 +1186,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
          trans_tbl_t dummy;
          int res;
          res = trans_tbl_init(merged_hdr, hin, &dummy, flag & MERGE_COMBINE_RG,
-                             flag & MERGE_COMBINE_PG, NULL);
+                             flag & MERGE_COMBINE_PG, true, NULL);
          trans_tbl_destroy(&dummy);
          if (res) return -1; // FIXME: memory leak
      }
@@ -1171,31 +1196,19 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
          bam_hdr_t *hin;
          fp[i] = sam_open_format(fn[i], "r", in_fmt);
          if (fp[i] == NULL) {
-            int j;
-            fprintf(pysamerr, "[bam_merge_core] fail to open file %s\n", fn[i]);
-            for (j = 0; j < i; ++j) {
-                bam_hdr_destroy(hdr[i]);
-                sam_close(fp[j]);
-            }
-            free(fp); free(heap);
-            // FIXME: possible memory leak
-            return -1;
+            fprintf(pysam_stderr, "[bam_merge_core] fail to open file %s\n", fn[i]);
+            goto fail;
          }
          hin = sam_hdr_read(fp[i]);
          if (hin == NULL) {
-            fprintf(pysamerr, "[bam_merge_core] failed to read header for '%s'\n",
+            fprintf(pysam_stderr, "[bam_merge_core] failed to read header for '%s'\n",
                      fn[i]);
-            for (j = 0; j < i; ++j) {
-                bam_hdr_destroy(hdr[i]);
-                sam_close(fp[j]);
-            }
-            free(fp); free(heap);
-            // FIXME: possible memory leak
-            return -1;
+            goto fail;
          }
  
          if (trans_tbl_init(merged_hdr, hin, translation_tbl+i,
                             flag & MERGE_COMBINE_RG, flag & MERGE_COMBINE_PG,
+                           (flag & MERGE_FIRST_CO)? (i == 0) : true,
                             RG[i]))
              return -1; // FIXME: memory leak
  
@@ -1205,13 +1218,13 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
          else { bam_hdr_destroy(hin); hdr[i] = NULL; }
  
          if ((translation_tbl+i)->lost_coord_sort && !by_qname) {
-            fprintf(pysamerr, "[bam_merge_core] Order of targets in file %s caused coordinate sort to be lost\n", fn[i]);
+            fprintf(pysam_stderr, "[bam_merge_core] Order of targets in file %s caused coordinate sort to be lost\n", fn[i]);
          }
      }
  
      // Did we get an @HD line?
      if (!merged_hdr->have_hd) {
-        fprintf(pysamerr, "[W::%s] No @HD tag found.\n", __func__);
+        fprintf(pysam_stderr, "[W::%s] No @HD tag found.\n", __func__);
          /* FIXME:  Should we add an @HD line here, and if so what should
             we put in it? Ideally we want a way of getting htslib to tell
             us the SAM version number to assume given no @HD line.  Is
@@ -1226,12 +1239,16 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
  
      // If we're only merging a specified region move our iters to start at that point
      if (reg) {
-        int* rtrans = rtrans_build(n, hout->n_targets, translation_tbl);
-
          int tid, beg, end;
-        const char *name_lim = hts_parse_reg(reg, &beg, &end);
+        const char *name_lim;
+
+        rtrans = rtrans_build(n, hout->n_targets, translation_tbl);
+        if (!rtrans) goto mem_fail;
+
+        name_lim = hts_parse_reg(reg, &beg, &end);
          if (name_lim) {
              char *name = malloc(name_lim - reg + 1);
+            if (!name) goto mem_fail;
              memcpy(name, reg, name_lim - reg);
              name[name_lim - reg] = '\0';
              tid = bam_name2id(hout, name);
@@ -1244,18 +1261,18 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
              end = INT_MAX;
          }
          if (tid < 0) {
-            if (name_lim) fprintf(pysamerr, "[%s] Region \"%s\" specifies an unknown reference name\n", __func__, reg);
-            else fprintf(pysamerr, "[%s] Badly formatted region: \"%s\"\n", __func__, reg);
-            return -1;
+            if (name_lim) fprintf(pysam_stderr, "[%s] Region \"%s\" specifies an unknown reference name\n", __func__, reg);
+            else fprintf(pysam_stderr, "[%s] Badly formatted region: \"%s\"\n", __func__, reg);
+            goto fail;
          }
          for (i = 0; i < n; ++i) {
              hts_idx_t *idx = sam_index_load(fp[i], fn[i]);
              // (rtrans[i*n+tid]) Look up what hout tid translates to in input tid space
              int mapped_tid = rtrans[i*hout->n_targets+tid];
              if (idx == NULL) {
-                fprintf(pysamerr, "[%s] failed to load index for %s.  Random alignment retrieval only works for indexed BAM or CRAM files.\n",
+                fprintf(pysam_stderr, "[%s] failed to load index for %s.  Random alignment retrieval only works for indexed BAM or CRAM files.\n",
                          __func__, fn[i]);
-                return -1;
+                goto fail;
              }
              if (mapped_tid != INT32_MIN) {
                  iter[i] = sam_itr_queryi(idx, mapped_tid, beg, end);
@@ -1263,47 +1280,70 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
                  iter[i] = sam_itr_queryi(idx, HTS_IDX_NONE, 0, 0);
              }
              hts_idx_destroy(idx);
-            if (iter[i] == NULL) break;
+            if (iter[i] == NULL) {
+                if (mapped_tid != INT32_MIN) {
+                    fprintf(pysam_stderr,
+                            "[%s] failed to get iterator over "
+                            "{%s, %d, %d, %d}\n",
+                            __func__, fn[i], mapped_tid, beg, end);
+                } else {
+                    fprintf(pysam_stderr,
+                            "[%s] failed to get iterator over "
+                            "{%s, HTS_IDX_NONE, 0, 0}\n",
+                            __func__, fn[i]);
+                }
+                goto fail;
+            }
          }
          free(rtrans);
+        rtrans = NULL;
      } else {
          for (i = 0; i < n; ++i) {
              if (hdr[i] == NULL) {
                  iter[i] = sam_itr_queryi(NULL, HTS_IDX_REST, 0, 0);
-                if (iter[i] == NULL) break;
+                if (iter[i] == NULL) {
+                    fprintf(pysam_stderr, "[%s] failed to get iterator\n", __func__);
+                    goto fail;
+                }
              }
              else iter[i] = NULL;
          }
      }
  
-    if (i < n) {
-        fprintf(pysamerr, "[%s] Memory allocation failed\n", __func__);
-        return -1;
-    }
-
      // Load the first read from each file into the heap
      for (i = 0; i < n; ++i) {
          heap1_t *h = heap + i;
+        int res;
          h->i = i;
          h->b = bam_init1();
-        if ((iter[i]? sam_itr_next(fp[i], iter[i], h->b) : sam_read1(fp[i], hdr[i], h->b)) >= 0) {
+        if (!h->b) goto mem_fail;
+        res = iter[i] ? sam_itr_next(fp[i], iter[i], h->b) : sam_read1(fp[i], hdr[i], h->b);
+        if (res >= 0) {
              bam_translate(h->b, translation_tbl + i);
              h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)((int32_t)h->b->core.pos+1)<<1 | bam_is_rev(h->b);
              h->idx = idx++;
          }
-        else {
+        else if (res == -1 && (!iter[i] || iter[i]->finished)) {
              h->pos = HEAP_EMPTY;
              bam_destroy1(h->b);
              h->b = NULL;
+        } else {
+            fprintf(pysam_stderr, "[%s] failed to read first record from %s\n",
+                    __func__, fn[i]);
+            goto fail;
          }
      }
  
      // Open output file and write header
      if ((fpout = sam_open_format(out, mode, out_fmt)) == 0) {
-        fprintf(pysamerr, "[%s] fail to create the output file.\n", __func__);
+        fprintf(pysam_stderr, "[%s] failed to create \"%s\": %s\n", __func__, out, strerror(errno));
+        return -1;
+    }
+    if (sam_hdr_write(fpout, hout) != 0) {
+        fprintf(pysam_stderr, "[%s] failed to write header.\n", __func__);
+        sam_close(fpout);
          return -1;
      }
-    sam_hdr_write(fpout, hout);
      if (!(flag & MERGE_UNCOMP)) hts_set_threads(fpout, n_threads);
  
      // Begin the actual merge
@@ -1315,16 +1355,24 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
              if (rg) bam_aux_del(b, rg);
              bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]);
          }
-        sam_write1(fpout, hout, b);
+        if (sam_write1(fpout, hout, b) < 0) {
+            fprintf(pysam_stderr, "[%s] failed to write to output file.\n", __func__);
+            sam_close(fpout);
+            return -1;
+        }
          if ((j = (iter[heap->i]? sam_itr_next(fp[heap->i], iter[heap->i], b) : sam_read1(fp[heap->i], hdr[heap->i], b))) >= 0) {
              bam_translate(b, translation_tbl + heap->i);
              heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam_is_rev(b);
              heap->idx = idx++;
-        } else if (j == -1) {
+        } else if (j == -1 && (!iter[heap->i] || iter[heap->i]->finished)) {
              heap->pos = HEAP_EMPTY;
              bam_destroy1(heap->b);
              heap->b = NULL;
-        } else fprintf(pysamerr, "[bam_merge_core] '%s' is truncated. Continue anyway.\n", fn[heap->i]);
+        } else {
+            fprintf(pysam_stderr, "[bam_merge_core] error: '%s' is truncated.\n",
+                    fn[heap->i]);
+            goto fail;
+        }
          ks_heapadjust(heap, 0, n, heap);
      }
  
@@ -1342,9 +1390,39 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
      bam_hdr_destroy(hin);
      bam_hdr_destroy(hout);
      free_merged_header(merged_hdr);
-    sam_close(fpout);
      free(RG); free(translation_tbl); free(fp); free(heap); free(iter); free(hdr);
+    if (sam_close(fpout) < 0) {
+        fprintf(pysam_stderr, "[bam_merge_core] error closing output file\n");
+        return -1;
+    }
      return 0;
+
+ mem_fail:
+    fprintf(pysam_stderr, "[bam_merge_core] Out of memory\n");
+
+ fail:
+    if (flag & MERGE_RG) {
+        if (RG) {
+            for (i = 0; i != n; ++i) free(RG[i]);
+        }
+        free(RG_len);
+    }
+    for (i = 0; i < n; ++i) {
+        if (translation_tbl && translation_tbl[i].tid_trans) trans_tbl_destroy(translation_tbl + i);
+        if (iter && iter[i]) hts_itr_destroy(iter[i]);
+        if (hdr && hdr[i]) bam_hdr_destroy(hdr[i]);
+        if (fp && fp[i]) sam_close(fp[i]);
+        if (heap && heap[i].b) bam_destroy1(heap[i].b);
+    }
+    if (hout) bam_hdr_destroy(hout);
+    free(RG);
+    free(translation_tbl);
+    free(hdr);
+    free(iter);
+    free(heap);
+    free(fp);
+    free(rtrans);
+    return -1;
  }
  
  // Unused here but may be used by legacy samtools-using third-party code
@@ -1363,7 +1441,7 @@ static void merge_usage(FILE *to)
  "Usage: samtools merge [-nurlf] [-h inh.sam] [-b <bamlist.fofn>] <out.bam> <in1.bam> [<in2.bam> ... <inN.bam>]\n"
  "\n"
  "Options:\n"
-"  -n         Sort by read names\n"
+"  -n         Input files are sorted by read name\n"
  "  -r         Attach RG tag (inferred from file names)\n"
  "  -u         Uncompressed BAM output\n"
  "  -f         Overwrite the output BAM if exist\n"
@@ -1396,7 +1474,7 @@ int bam_merge(int argc, char *argv[])
      };
  
      if (argc == 1) {
-        merge_usage(stdout);
+        merge_usage(pysam_stdout);
          return 0;
      }
  
@@ -1426,7 +1504,7 @@ int bam_merge(int argc, char *argv[])
                  fn_size += nfiles;
              }
              else {
-                fprintf(pysamerr, "[%s] Invalid file list \"%s\"\n", __func__, optarg);
+                fprintf(pysam_stderr, "[%s] Invalid file list \"%s\"\n", __func__, optarg);
                  ret = 1;
              }
              break;
@@ -1434,12 +1512,12 @@ int bam_merge(int argc, char *argv[])
  
          default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                    /* else fall-through */
-        case '?': merge_usage(pysamerr); return 1;
+        case '?': merge_usage(pysam_stderr); return 1;
          }
      }
      if ( argc - optind < 1 ) {
-        fprintf(pysamerr, "You must at least specify the output file.\n");
-        merge_usage(pysamerr);
+        fprintf(pysam_stderr, "You must at least specify the output file.\n");
+        merge_usage(pysam_stderr);
          return 1;
      }
  
@@ -1448,7 +1526,7 @@ int bam_merge(int argc, char *argv[])
          FILE *fp = fopen(argv[optind], "rb");
          if (fp != NULL) {
              fclose(fp);
-            fprintf(pysamerr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, argv[optind]);
+            fprintf(pysam_stderr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, argv[optind]);
              return 1;
          }
      }
@@ -1461,8 +1539,8 @@ int bam_merge(int argc, char *argv[])
          memcpy(fn+fn_size, argv + (optind+1), nargcfiles * sizeof(char*));
      }
      if (fn_size+nargcfiles < 1) {
-        fprintf(pysamerr, "You must specify at least one (and usually two or more) input files.\n");
-        merge_usage(pysamerr);
+        fprintf(pysam_stderr, "You must specify at least one (and usually two or more) input files.\n");
+        merge_usage(pysam_stderr);
          return 1;
      }
      strcpy(mode, "wb");
@@ -1543,29 +1621,40 @@ typedef struct {
      bam1_p *buf;
      const bam_hdr_t *h;
      int index;
+    int error;
  } worker_t;
  
-static void write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_hdr_t *h, int n_threads, const htsFormat *fmt)
+// Returns 0 for success
+//        -1 for failure
+static int write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_hdr_t *h, int n_threads, const htsFormat *fmt)
  {
      size_t i;
      samFile* fp;
      fp = sam_open_format(fn, mode, fmt);
-    if (fp == NULL) return;
-    sam_hdr_write(fp, h);
+    if (fp == NULL) return -1;
+    if (sam_hdr_write(fp, h) != 0) goto fail;
      if (n_threads > 1) hts_set_threads(fp, n_threads);
-    for (i = 0; i < l; ++i)
-        sam_write1(fp, h, buf[i]);
+    for (i = 0; i < l; ++i) {
+        if (sam_write1(fp, h, buf[i]) < 0) goto fail;
+    }
+    if (sam_close(fp) < 0) return -1;
+    return 0;
+ fail:
      sam_close(fp);
+    return -1;
  }
  
  static void *worker(void *data)
  {
      worker_t *w = (worker_t*)data;
      char *name;
+    w->error = 0;
      ks_mergesort(sort, w->buf_len, w->buf, 0);
      name = (char*)calloc(strlen(w->prefix) + 20, 1);
+    if (!name) { w->error = errno; return 0; }
      sprintf(name, "%s.%.4d.bam", w->prefix, w->index);
-    write_buffer(name, "wb1", w->buf_len, w->buf, w->h, 0, NULL);
+    if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL) < 0)
+        w->error = errno;
  
  // Consider using CRAM temporary files if the final output is CRAM.
  // Typically it is comparable speed while being smaller.
@@ -1574,7 +1663,8 @@ static void *worker(void *data)
  //        {"no_ref",      CRAM_OPT_NO_REF,  {1},     NULL}
  //    };
  //    opt[0].next = &opt[1];
-//    write_buffer(name, "wc1", w->buf_len, w->buf, w->h, 0, opt);
+//    if (write_buffer(name, "wc1", w->buf_len, w->buf, w->h, 0, opt) < 0)
+//        w->error = errno;
  
      free(name);
      return 0;
@@ -1588,6 +1678,7 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
      pthread_t *tid;
      pthread_attr_t attr;
      worker_t *w;
+    int n_failed = 0;
  
      if (n_threads < 1) n_threads = 1;
      if (k < n_threads * 64) n_threads = 1; // use a single thread if we only sort a small batch of records
@@ -1605,9 +1696,15 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
          b += w[i].buf_len; rest -= w[i].buf_len;
          pthread_create(&tid[i], &attr, worker, &w[i]);
      }
-    for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
+    for (i = 0; i < n_threads; ++i) {
+        pthread_join(tid[i], 0);
+        if (w[i].error != 0) {
+            fprintf(pysam_stderr, "[bam_sort_core] failed to create temporary file \"%s.%.4d.bam\": %s\n", prefix, w[i].index, strerror(w[i].error));
+            n_failed++;
+        }
+    }
      free(tid); free(w);
-    return n_files + n_threads;
+    return (n_failed == 0)? n_files + n_threads : -1;
  }
  
  /*!
@@ -1647,12 +1744,12 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
      fp = sam_open_format(fn, "r", in_fmt);
      if (fp == NULL) {
          const char *message = strerror(errno);
-        fprintf(pysamerr, "[bam_sort_core] fail to open '%s': %s\n", fn, message);
+        fprintf(pysam_stderr, "[bam_sort_core] fail to open '%s': %s\n", fn, message);
          return -2;
      }
      header = sam_hdr_read(fp);
      if (header == NULL) {
-        fprintf(pysamerr, "[bam_sort_core] failed to read header for '%s'\n", fn);
+        fprintf(pysam_stderr, "[bam_sort_core] failed to read header for '%s'\n", fn);
          goto err;
      }
      if (is_by_qname) change_SO(header, "queryname");
@@ -1677,11 +1774,15 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
          ++k;
          if (mem >= max_mem) {
              n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
+            if (n_files < 0) {
+                ret = -1;
+                goto err;
+            }
              mem = k = 0;
          }
      }
      if (ret != -1) {
-        fprintf(pysamerr, "[bam_sort_core] truncated file. Aborting.\n");
+        fprintf(pysam_stderr, "[bam_sort_core] truncated file. Aborting.\n");
          ret = -1;
          goto err;
      }
@@ -1689,19 +1790,27 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
      // write the final output
      if (n_files == 0) { // a single block
          ks_mergesort(sort, k, buf, 0);
-        write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt);
+        if (write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt) != 0) {
+            fprintf(pysam_stderr, "[bam_sort_core] failed to create \"%s\": %s\n", fnout, strerror(errno));
+            ret = -1;
+            goto err;
+        }
      } else { // then merge
          char **fns;
          n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
-        fprintf(pysamerr, "[bam_sort_core] merging from %d files...\n", n_files);
+        if (n_files == -1) {
+            ret = -1;
+            goto err;
+        }
+        fprintf(pysam_stderr, "[bam_sort_core] merging from %d files...\n", n_files);
          fns = (char**)calloc(n_files, sizeof(char*));
          for (i = 0; i < n_files; ++i) {
              fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
              sprintf(fns[i], "%s.%.4d.bam", prefix, i);
          }
          if (bam_merge_core2(is_by_qname, fnout, modeout, NULL, n_files, fns,
-                            MERGE_COMBINE_RG|MERGE_COMBINE_PG, NULL, n_threads,
-                            in_fmt, out_fmt) < 0) {
+                            MERGE_COMBINE_RG|MERGE_COMBINE_PG|MERGE_FIRST_CO,
+                            NULL, n_threads, in_fmt, out_fmt) < 0) {
              // Propagate bam_merge_core2() failure; it has already emitted a
              // message explaining the failure, so no further message is needed.
              goto err;
@@ -1756,6 +1865,7 @@ int bam_sort(int argc, char *argv[])
      int c, nargs, is_by_qname = 0, ret, o_seen = 0, n_threads = 0, level = -1;
      char *fnout = "-", modeout[12];
      kstring_t tmpprefix = { 0, 0, NULL };
+    struct stat st;
      sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
  
      static const struct option lopts[] = {
@@ -1782,22 +1892,22 @@ int bam_sort(int argc, char *argv[])
  
          default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                    /* else fall-through */
-        case '?': sort_usage(pysamerr); ret = EXIT_FAILURE; goto sort_end;
+        case '?': sort_usage(pysam_stderr); ret = EXIT_FAILURE; goto sort_end;
          }
      }
  
      nargs = argc - optind;
      if (nargs == 0 && isatty(STDIN_FILENO)) {
-        sort_usage(stdout);
+        sort_usage(pysam_stdout);
          ret = EXIT_SUCCESS;
          goto sort_end;
      }
      else if (nargs >= 2) {
          // If exactly two, user probably tried to specify legacy <out.prefix>
          if (nargs == 2)
-            fprintf(pysamerr, "[bam_sort] Use -T PREFIX / -o FILE to specify temporary and final output files\n");
+            fprintf(pysam_stderr, "[bam_sort] Use -T PREFIX / -o FILE to specify temporary and final output files\n");
  
-        sort_usage(pysamerr);
+        sort_usage(pysam_stderr);
          ret = EXIT_FAILURE;
          goto sort_end;
      }
@@ -1806,8 +1916,15 @@ int bam_sort(int argc, char *argv[])
      sam_open_mode(modeout+1, fnout, NULL);
      if (level >= 0) sprintf(strchr(modeout, '\0'), "%d", level < 9? level : 9);
  
-    if (tmpprefix.l == 0)
-        ksprintf(&tmpprefix, "%s.tmp", (nargs > 0)? argv[optind] : "STDIN");
+    if (tmpprefix.l == 0) {
+        if (strcmp(fnout, "-") != 0) ksprintf(&tmpprefix, "%s.tmp", fnout);
+        else kputc('.', &tmpprefix);
+    }
+    if (stat(tmpprefix.s, &st) == 0 && S_ISDIR(st.st_mode)) {
+        unsigned t = ((unsigned) time(NULL)) ^ ((unsigned) clock());
+        if (tmpprefix.s[tmpprefix.l-1] != '/') kputc('/', &tmpprefix);
+        ksprintf(&tmpprefix, "samtools.%d.%u.tmp", (int) getpid(), t % 10000);
+    }
  
      ret = bam_sort_core_ext(is_by_qname, (nargs > 0)? argv[optind] : "-",
                              tmpprefix.s, fnout, modeout, max_mem, n_threads,
@@ -1819,7 +1936,7 @@ int bam_sort(int argc, char *argv[])
          // If we failed on opening the input file & it has no .bam/.cram/etc
          // extension, the user probably tried legacy -o <infile> <out.prefix>
          if (ret == -2 && o_seen && nargs > 0 && sam_open_mode(dummy, argv[optind], NULL) < 0)
-            fprintf(pysamerr, "[bam_sort] Note the <out.prefix> argument has been replaced by -T/-o options\n");
+            fprintf(pysam_stderr, "[bam_sort] Note the <out.prefix> argument has been replaced by -T/-o options\n");
  
          ret = EXIT_FAILURE;
      }
diff --git a/samtools/bam_split.c b/samtools/bam_split.c

index e44acc0491abc2b162ba49b635252909ab9546d2..9a2998aded3adc7e86abd4877c67e911333d5fcb 100644 (file)
--- a/samtools/bam_split.c
+++ b/samtools/bam_split.c
@@ -1,6 +1,6 @@
  /*  bam_split.c -- split subcommand.
  
-    Copyright (C) 2013, 2014 Genome Research Ltd.
+    Copyright (C) 2013-2015 Genome Research Ltd.
  
      Author: Martin Pollard <mp15@sanger.ac.uk>
  
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <htslib/sam.h>
  #include <string.h>
  #include <stdio.h>
@@ -55,6 +57,7 @@ struct state {
      bam_hdr_t* unaccounted_header;
      size_t output_count;
      char** rg_id;
+    char **rg_output_file_name;
      samFile** rg_output_file;
      bam_hdr_t** rg_output_header;
      kh_c2i_t* rg_hash;
@@ -62,7 +65,7 @@ struct state {
  
  typedef struct state state_t;
  
-static int cleanup_state(state_t* status);
+static int cleanup_state(state_t* status, bool check_close);
  static void cleanup_opts(parsed_opts_t* opts);
  
  static void usage(FILE *write_to)
@@ -334,7 +337,7 @@ static state_t* init(parsed_opts_t* opts)
      if (retval->merged_input_header == NULL) {
          fprintf(stderr, "Could not read header for file '%s'\n",
                  opts->merged_input_name);
-        cleanup_state(retval);
+        cleanup_state(retval, false);
          return NULL;
      }
  
@@ -343,14 +346,14 @@ static state_t* init(parsed_opts_t* opts)
              samFile* hdr_load = sam_open_format(opts->unaccounted_header_name, "r", &opts->ga.in);
              if (!hdr_load) {
                  fprintf(stderr, "Could not open unaccounted header file (%s)\n", opts->unaccounted_header_name);
-                cleanup_state(retval);
+                cleanup_state(retval, false);
                  return NULL;
              }
              retval->unaccounted_header = sam_hdr_read(hdr_load);
              if (retval->unaccounted_header == NULL) {
                  fprintf(stderr, "Could not read header for file '%s'\n",
                          opts->unaccounted_header_name);
-                cleanup_state(retval);
+                cleanup_state(retval, false);
                  return NULL;
              }
              sam_close(hdr_load);
@@ -361,7 +364,7 @@ static state_t* init(parsed_opts_t* opts)
          retval->unaccounted_file = sam_open_format(opts->unaccounted_name, "wb", &opts->ga.out);
          if (retval->unaccounted_file == NULL) {
              fprintf(stderr, "Could not open unaccounted output file: %s\n", opts->unaccounted_name);
-            cleanup_state(retval);
+            cleanup_state(retval, false);
              return NULL;
          }
      }
@@ -370,12 +373,13 @@ static state_t* init(parsed_opts_t* opts)
      if (!count_RG(retval->merged_input_header, &retval->output_count, &retval->rg_id)) return NULL;
      if (opts->verbose) fprintf(stderr, "@RG's found %zu\n",retval->output_count);
  
+    retval->rg_output_file_name = (char **)calloc(retval->output_count, sizeof(char *));
      retval->rg_output_file = (samFile**)calloc(retval->output_count, sizeof(samFile*));
      retval->rg_output_header = (bam_hdr_t**)calloc(retval->output_count, sizeof(bam_hdr_t*));
      retval->rg_hash = kh_init_c2i();
-    if (!retval->rg_output_file || !retval->rg_output_header) {
+    if (!retval->rg_output_file_name || !retval->rg_output_file || !retval->rg_output_header || !retval->rg_hash) {
          fprintf(stderr, "Could not allocate memory for output file array. Out of memory?");
-        cleanup_state(retval);
+        cleanup_state(retval, false);
          return NULL;
      }
  
@@ -383,7 +387,7 @@ static state_t* init(parsed_opts_t* opts)
      char* input_base_name = strdup(dirsep? dirsep+1 : opts->merged_input_name);
      if (!input_base_name) {
          fprintf(stderr, "Out of memory\n");
-        cleanup_state(retval);
+        cleanup_state(retval, false);
          return NULL;
      }
      char* extension = strrchr(input_base_name, '.');
@@ -399,16 +403,17 @@ static state_t* init(parsed_opts_t* opts)
                                                 &opts->ga.out);
  
          if ( output_filename == NULL ) {
-            fprintf(stderr, "Error expanding output filename format string.\r\n");
-            cleanup_state(retval);
+            fprintf(stderr, "Error expanding output filename format string.\n");
+            cleanup_state(retval, false);
              free(input_base_name);
              return NULL;
          }
  
+        retval->rg_output_file_name[i] = output_filename;
          retval->rg_output_file[i] = sam_open_format(output_filename, "wb", &opts->ga.out);
          if (retval->rg_output_file[i] == NULL) {
-            fprintf(stderr, "Could not open output file: %s\r\n", output_filename);
-            cleanup_state(retval);
+            fprintf(stderr, "Could not open output file: %s\n", output_filename);
+            cleanup_state(retval, false);
              free(input_base_name);
              return NULL;
          }
@@ -421,13 +426,11 @@ static state_t* init(parsed_opts_t* opts)
          // Set and edit header
          retval->rg_output_header[i] = bam_hdr_dup(retval->merged_input_header);
          if ( !filter_header_rg(retval->rg_output_header[i], retval->rg_id[i]) ) {
-            fprintf(stderr, "Could not rewrite header for file: %s\r\n", output_filename);
-            cleanup_state(retval);
-            free(output_filename);
+            fprintf(stderr, "Could not rewrite header for file: %s\n", output_filename);
+            cleanup_state(retval, false);
              free(input_base_name);
              return NULL;
          }
-        free(output_filename);
      }
  
      free(input_base_name);
@@ -444,7 +447,8 @@ static bool split(state_t* state)
      size_t i;
      for (i = 0; i < state->output_count; i++) {
          if (sam_hdr_write(state->rg_output_file[i], state->rg_output_header[i]) != 0) {
-            fprintf(stderr, "Could not write output file header\n");
+            fprintf(stderr, "Could not write output file header for '%s'\n",
+                    state->rg_output_file_name[i]);
              return false;
          }
      }
@@ -457,7 +461,7 @@ static bool split(state_t* state)
          bam_destroy1(file_read);
          file_read = NULL;
          if (r < -1) {
-            fprintf(stderr, "Could not write read sequence\n");
+            fprintf(stderr, "Could not read first input record\n");
              return false;
          }
      }
@@ -478,7 +482,9 @@ static bool split(state_t* state)
              // if found write to the appropriate untangled bam
              int i = kh_val(state->rg_hash,iter);
              if (sam_write1(state->rg_output_file[i], state->rg_output_header[i], file_read) < 0) {
-                fprintf(stderr, "Could not write sequence\n");
+                fprintf(stderr, "Could not write to output file '%s'\n",
+                        state->rg_output_file_name[i]);
+                bam_destroy1(file_read);
                  return false;
              }
          } else {
@@ -493,7 +499,8 @@ static bool split(state_t* state)
                  return false;
              } else {
                  if (sam_write1(state->unaccounted_file, state->unaccounted_header, file_read) < 0) {
-                    fprintf(stderr, "Could not write sequence\n");
+                    fprintf(stderr, "Could not write to unaccounted output file\n");
+                    bam_destroy1(file_read);
                      return false;
                  }
              }
@@ -505,7 +512,7 @@ static bool split(state_t* state)
              bam_destroy1(file_read);
              file_read = NULL;
              if (r < -1) {
-                fprintf(stderr, "Could not write read sequence\n");
+                fprintf(stderr, "Could not read input record\n");
                  return false;
              }
          }
@@ -514,23 +521,38 @@ static bool split(state_t* state)
      return true;
  }
  
-static int cleanup_state(state_t* status)
+static int cleanup_state(state_t* status, bool check_close)
  {
      int ret = 0;
  
      if (!status) return 0;
      if (status->unaccounted_header) bam_hdr_destroy(status->unaccounted_header);
-    if (status->unaccounted_file) ret |= sam_close(status->unaccounted_file);
+    if (status->unaccounted_file) {
+        if (sam_close(status->unaccounted_file) < 0 && check_close) {
+            fprintf(stderr, "Error on closing unaccounted file\n");
+            ret = -1;
+        }
+    }
      sam_close(status->merged_input_file);
      size_t i;
      for (i = 0; i < status->output_count; i++) {
-        bam_hdr_destroy(status->rg_output_header[i]);
-        ret |= sam_close(status->rg_output_file[i]);
-        free(status->rg_id[i]);
+        if (status->rg_output_header && status->rg_output_header[i])
+            bam_hdr_destroy(status->rg_output_header[i]);
+        if (status->rg_output_file && status->rg_output_file[i]) {
+            if (sam_close(status->rg_output_file[i]) < 0 && check_close) {
+                fprintf(stderr, "Error on closing output file '%s'\n",
+                        status->rg_output_file_name[i]);
+                ret = -1;
+            }
+        }
+        if (status->rg_id) free(status->rg_id[i]);
+        if (status->rg_output_file_name) free(status->rg_output_file_name[i]);
      }
-    bam_hdr_destroy(status->merged_input_header);
+    if (status->merged_input_header)
+        bam_hdr_destroy(status->merged_input_header);
      free(status->rg_output_header);
      free(status->rg_output_file);
+    free(status->rg_output_file_name);
      kh_destroy_c2i(status->rg_hash);
      free(status->rg_id);
      free(status);
@@ -553,13 +575,17 @@ int main_split(int argc, char** argv)
  {
      int ret = 1;
      parsed_opts_t* opts = parse_args(argc, argv);
-    if (!opts ) goto cleanup_opts;
+    if (!opts) goto cleanup_opts;
      state_t* status = init(opts);
      if (!status) goto cleanup_opts;
  
-    if (split(status)) ret = 0;
+    if (!split(status)) {
+        cleanup_state(status, false);
+        goto cleanup_opts;
+    }
+
+    ret = cleanup_state(status, true);
  
-    ret |= (cleanup_state(status) != 0);
  cleanup_opts:
      cleanup_opts(opts);
  
diff --git a/samtools/bam_split.c.pysam.c b/samtools/bam_split.c.pysam.c

index 329556fba6e1d236c0cb1036c38fe55c484168d2..2348f48769676ea05112dc2579d191ba22d18bc0 100644 (file)
--- a/samtools/bam_split.c.pysam.c
+++ b/samtools/bam_split.c.pysam.c
@@ -2,7 +2,7 @@
  
  /*  bam_split.c -- split subcommand.
  
-    Copyright (C) 2013, 2014 Genome Research Ltd.
+    Copyright (C) 2013-2015 Genome Research Ltd.
  
      Author: Martin Pollard <mp15@sanger.ac.uk>
  
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <htslib/sam.h>
  #include <string.h>
  #include <stdio.h>
@@ -57,6 +59,7 @@ struct state {
      bam_hdr_t* unaccounted_header;
      size_t output_count;
      char** rg_id;
+    char **rg_output_file_name;
      samFile** rg_output_file;
      bam_hdr_t** rg_output_header;
      kh_c2i_t* rg_hash;
@@ -64,7 +67,7 @@ struct state {
  
  typedef struct state state_t;
  
-static int cleanup_state(state_t* status);
+static int cleanup_state(state_t* status, bool check_close);
  static void cleanup_opts(parsed_opts_t* opts);
  
  static void usage(FILE *write_to)
@@ -92,7 +95,7 @@ static void usage(FILE *write_to)
  // Takes the command line options and turns them into something we can understand
  static parsed_opts_t* parse_args(int argc, char** argv)
  {
-    if (argc == 1) { usage(stdout); return NULL; }
+    if (argc == 1) { usage(pysam_stdout); return NULL; }
  
      const char* optstring = "vf:u:";
      char* delim;
@@ -130,7 +133,7 @@ static parsed_opts_t* parse_args(int argc, char** argv)
              if (parse_sam_global_opt(opt, optarg, lopts, &retval->ga) == 0) break;
              /* else fall-through */
          case '?':
-            usage(stdout);
+            usage(pysam_stdout);
              free(retval);
              return NULL;
          }
@@ -142,8 +145,8 @@ static parsed_opts_t* parse_args(int argc, char** argv)
      argv += optind;
  
      if (argc != 1) {
-        fprintf(pysamerr, "Invalid number of arguments: %d\n", argc);
-        usage(pysamerr);
+        fprintf(pysam_stderr, "Invalid number of arguments: %d\n", argc);
+        usage(pysam_stderr);
          free(retval);
          return NULL;
      }
@@ -184,11 +187,11 @@ static char* expand_format_string(const char* format_string, const char* basenam
                      kputs("bam", &str);
                  break;
              case '\0':
-                // Error is: fprintf(pysamerr, "bad format string, trailing %%\n");
+                // Error is: fprintf(pysam_stderr, "bad format string, trailing %%\n");
                  free(str.s);
                  return NULL;
              default:
-                // Error is: fprintf(pysamerr, "bad format string, unknown format specifier\n");
+                // Error is: fprintf(pysam_stderr, "bad format string, unknown format specifier\n");
                  free(str.s);
                  return NULL;
          }
@@ -322,21 +325,21 @@ static state_t* init(parsed_opts_t* opts)
  {
      state_t* retval = calloc(sizeof(state_t), 1);
      if (!retval) {
-        fprintf(pysamerr, "Out of memory");
+        fprintf(pysam_stderr, "Out of memory");
          return NULL;
      }
  
      retval->merged_input_file = sam_open_format(opts->merged_input_name, "rb", &opts->ga.in);
      if (!retval->merged_input_file) {
-        fprintf(pysamerr, "Could not open input file (%s)\n", opts->merged_input_name);
+        fprintf(pysam_stderr, "Could not open input file (%s)\n", opts->merged_input_name);
          free(retval);
          return NULL;
      }
      retval->merged_input_header = sam_hdr_read(retval->merged_input_file);
      if (retval->merged_input_header == NULL) {
-        fprintf(pysamerr, "Could not read header for file '%s'\n",
+        fprintf(pysam_stderr, "Could not read header for file '%s'\n",
                  opts->merged_input_name);
-        cleanup_state(retval);
+        cleanup_state(retval, false);
          return NULL;
      }
  
@@ -344,15 +347,15 @@ static state_t* init(parsed_opts_t* opts)
          if (opts->unaccounted_header_name) {
              samFile* hdr_load = sam_open_format(opts->unaccounted_header_name, "r", &opts->ga.in);
              if (!hdr_load) {
-                fprintf(pysamerr, "Could not open unaccounted header file (%s)\n", opts->unaccounted_header_name);
-                cleanup_state(retval);
+                fprintf(pysam_stderr, "Could not open unaccounted header file (%s)\n", opts->unaccounted_header_name);
+                cleanup_state(retval, false);
                  return NULL;
              }
              retval->unaccounted_header = sam_hdr_read(hdr_load);
              if (retval->unaccounted_header == NULL) {
-                fprintf(pysamerr, "Could not read header for file '%s'\n",
+                fprintf(pysam_stderr, "Could not read header for file '%s'\n",
                          opts->unaccounted_header_name);
-                cleanup_state(retval);
+                cleanup_state(retval, false);
                  return NULL;
              }
              sam_close(hdr_load);
@@ -362,30 +365,31 @@ static state_t* init(parsed_opts_t* opts)
  
          retval->unaccounted_file = sam_open_format(opts->unaccounted_name, "wb", &opts->ga.out);
          if (retval->unaccounted_file == NULL) {
-            fprintf(pysamerr, "Could not open unaccounted output file: %s\n", opts->unaccounted_name);
-            cleanup_state(retval);
+            fprintf(pysam_stderr, "Could not open unaccounted output file: %s\n", opts->unaccounted_name);
+            cleanup_state(retval, false);
              return NULL;
          }
      }
  
      // Open output files for RGs
      if (!count_RG(retval->merged_input_header, &retval->output_count, &retval->rg_id)) return NULL;
-    if (opts->verbose) fprintf(pysamerr, "@RG's found %zu\n",retval->output_count);
+    if (opts->verbose) fprintf(pysam_stderr, "@RG's found %zu\n",retval->output_count);
  
+    retval->rg_output_file_name = (char **)calloc(retval->output_count, sizeof(char *));
      retval->rg_output_file = (samFile**)calloc(retval->output_count, sizeof(samFile*));
      retval->rg_output_header = (bam_hdr_t**)calloc(retval->output_count, sizeof(bam_hdr_t*));
      retval->rg_hash = kh_init_c2i();
-    if (!retval->rg_output_file || !retval->rg_output_header) {
-        fprintf(pysamerr, "Could not allocate memory for output file array. Out of memory?");
-        cleanup_state(retval);
+    if (!retval->rg_output_file_name || !retval->rg_output_file || !retval->rg_output_header || !retval->rg_hash) {
+        fprintf(pysam_stderr, "Could not allocate memory for output file array. Out of memory?");
+        cleanup_state(retval, false);
          return NULL;
      }
  
      char* dirsep = strrchr(opts->merged_input_name, '/');
      char* input_base_name = strdup(dirsep? dirsep+1 : opts->merged_input_name);
      if (!input_base_name) {
-        fprintf(pysamerr, "Out of memory\n");
-        cleanup_state(retval);
+        fprintf(pysam_stderr, "Out of memory\n");
+        cleanup_state(retval, false);
          return NULL;
      }
      char* extension = strrchr(input_base_name, '.');
@@ -401,16 +405,17 @@ static state_t* init(parsed_opts_t* opts)
                                                 &opts->ga.out);
  
          if ( output_filename == NULL ) {
-            fprintf(pysamerr, "Error expanding output filename format string.\r\n");
-            cleanup_state(retval);
+            fprintf(pysam_stderr, "Error expanding output filename format string.\n");
+            cleanup_state(retval, false);
              free(input_base_name);
              return NULL;
          }
  
+        retval->rg_output_file_name[i] = output_filename;
          retval->rg_output_file[i] = sam_open_format(output_filename, "wb", &opts->ga.out);
          if (retval->rg_output_file[i] == NULL) {
-            fprintf(pysamerr, "Could not open output file: %s\r\n", output_filename);
-            cleanup_state(retval);
+            fprintf(pysam_stderr, "Could not open output file: %s\n", output_filename);
+            cleanup_state(retval, false);
              free(input_base_name);
              return NULL;
          }
@@ -423,13 +428,11 @@ static state_t* init(parsed_opts_t* opts)
          // Set and edit header
          retval->rg_output_header[i] = bam_hdr_dup(retval->merged_input_header);
          if ( !filter_header_rg(retval->rg_output_header[i], retval->rg_id[i]) ) {
-            fprintf(pysamerr, "Could not rewrite header for file: %s\r\n", output_filename);
-            cleanup_state(retval);
-            free(output_filename);
+            fprintf(pysam_stderr, "Could not rewrite header for file: %s\n", output_filename);
+            cleanup_state(retval, false);
              free(input_base_name);
              return NULL;
          }
-        free(output_filename);
      }
  
      free(input_base_name);
@@ -440,13 +443,14 @@ static state_t* init(parsed_opts_t* opts)
  static bool split(state_t* state)
  {
      if (state->unaccounted_file && sam_hdr_write(state->unaccounted_file, state->unaccounted_header) != 0) {
-        fprintf(pysamerr, "Could not write output file header\n");
+        fprintf(pysam_stderr, "Could not write output file header\n");
          return false;
      }
      size_t i;
      for (i = 0; i < state->output_count; i++) {
          if (sam_hdr_write(state->rg_output_file[i], state->rg_output_header[i]) != 0) {
-            fprintf(pysamerr, "Could not write output file header\n");
+            fprintf(pysam_stderr, "Could not write output file header for '%s'\n",
+                    state->rg_output_file_name[i]);
              return false;
          }
      }
@@ -459,7 +463,7 @@ static bool split(state_t* state)
          bam_destroy1(file_read);
          file_read = NULL;
          if (r < -1) {
-            fprintf(pysamerr, "Could not write read sequence\n");
+            fprintf(pysam_stderr, "Could not read first input record\n");
              return false;
          }
      }
@@ -480,22 +484,25 @@ static bool split(state_t* state)
              // if found write to the appropriate untangled bam
              int i = kh_val(state->rg_hash,iter);
              if (sam_write1(state->rg_output_file[i], state->rg_output_header[i], file_read) < 0) {
-                fprintf(pysamerr, "Could not write sequence\n");
+                fprintf(pysam_stderr, "Could not write to output file '%s'\n",
+                        state->rg_output_file_name[i]);
+                bam_destroy1(file_read);
                  return false;
              }
          } else {
              // otherwise write to the unaccounted bam if there is one or fail
              if (state->unaccounted_file == NULL) {
                  if (tag) {
-                    fprintf(pysamerr, "Read \"%s\" with unaccounted for tag \"%s\".\n", bam_get_qname(file_read), bam_aux2Z(tag));
+                    fprintf(pysam_stderr, "Read \"%s\" with unaccounted for tag \"%s\".\n", bam_get_qname(file_read), bam_aux2Z(tag));
                  } else {
-                    fprintf(pysamerr, "Read \"%s\" has no RG tag.\n", bam_get_qname(file_read));
+                    fprintf(pysam_stderr, "Read \"%s\" has no RG tag.\n", bam_get_qname(file_read));
                  }
                  bam_destroy1(file_read);
                  return false;
              } else {
                  if (sam_write1(state->unaccounted_file, state->unaccounted_header, file_read) < 0) {
-                    fprintf(pysamerr, "Could not write sequence\n");
+                    fprintf(pysam_stderr, "Could not write to unaccounted output file\n");
+                    bam_destroy1(file_read);
                      return false;
                  }
              }
@@ -507,7 +514,7 @@ static bool split(state_t* state)
              bam_destroy1(file_read);
              file_read = NULL;
              if (r < -1) {
-                fprintf(pysamerr, "Could not write read sequence\n");
+                fprintf(pysam_stderr, "Could not read input record\n");
                  return false;
              }
          }
@@ -516,23 +523,38 @@ static bool split(state_t* state)
      return true;
  }
  
-static int cleanup_state(state_t* status)
+static int cleanup_state(state_t* status, bool check_close)
  {
      int ret = 0;
  
      if (!status) return 0;
      if (status->unaccounted_header) bam_hdr_destroy(status->unaccounted_header);
-    if (status->unaccounted_file) ret |= sam_close(status->unaccounted_file);
+    if (status->unaccounted_file) {
+        if (sam_close(status->unaccounted_file) < 0 && check_close) {
+            fprintf(pysam_stderr, "Error on closing unaccounted file\n");
+            ret = -1;
+        }
+    }
      sam_close(status->merged_input_file);
      size_t i;
      for (i = 0; i < status->output_count; i++) {
-        bam_hdr_destroy(status->rg_output_header[i]);
-        ret |= sam_close(status->rg_output_file[i]);
-        free(status->rg_id[i]);
+        if (status->rg_output_header && status->rg_output_header[i])
+            bam_hdr_destroy(status->rg_output_header[i]);
+        if (status->rg_output_file && status->rg_output_file[i]) {
+            if (sam_close(status->rg_output_file[i]) < 0 && check_close) {
+                fprintf(pysam_stderr, "Error on closing output file '%s'\n",
+                        status->rg_output_file_name[i]);
+                ret = -1;
+            }
+        }
+        if (status->rg_id) free(status->rg_id[i]);
+        if (status->rg_output_file_name) free(status->rg_output_file_name[i]);
      }
-    bam_hdr_destroy(status->merged_input_header);
+    if (status->merged_input_header)
+        bam_hdr_destroy(status->merged_input_header);
      free(status->rg_output_header);
      free(status->rg_output_file);
+    free(status->rg_output_file_name);
      kh_destroy_c2i(status->rg_hash);
      free(status->rg_id);
      free(status);
@@ -555,13 +577,17 @@ int main_split(int argc, char** argv)
  {
      int ret = 1;
      parsed_opts_t* opts = parse_args(argc, argv);
-    if (!opts ) goto cleanup_opts;
+    if (!opts) goto cleanup_opts;
      state_t* status = init(opts);
      if (!status) goto cleanup_opts;
  
-    if (split(status)) ret = 0;
+    if (!split(status)) {
+        cleanup_state(status, false);
+        goto cleanup_opts;
+    }
+
+    ret = cleanup_state(status, true);
  
-    ret |= (cleanup_state(status) != 0);
  cleanup_opts:
      cleanup_opts(opts);
  
diff --git a/samtools/bam_stat.c b/samtools/bam_stat.c

index 5cb3235f0bd4690af7d99d90939fe4746b3d3e6b..f6cf1d5c4b9cf2cea978cd7d45aaa47c98cd87ac 100644 (file)
--- a/samtools/bam_stat.c
+++ b/samtools/bam_stat.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <unistd.h>
  #include <stdint.h>
  #include <stdlib.h>
diff --git a/samtools/bam_stat.c.pysam.c b/samtools/bam_stat.c.pysam.c

index a519312fafef206d3c99fafd8a58f668bf573a57..cdca4dd7d3b328daebfd219cc249b3569ec8cf63 100644 (file)
--- a/samtools/bam_stat.c.pysam.c
+++ b/samtools/bam_stat.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <unistd.h>
  #include <stdint.h>
  #include <stdlib.h>
@@ -81,7 +83,7 @@ bam_flagstat_t *bam_flagstat_core(samFile *fp, bam_hdr_t *h)
          flagstat_loop(s, c);
      bam_destroy1(b);
      if (ret != -1)
-        fprintf(pysamerr, "[bam_flagstat_core] Truncated file? Continue anyway.\n");
+        fprintf(pysam_stderr, "[bam_flagstat_core] Truncated file? Continue anyway.\n");
      return s;
  }
  
@@ -120,16 +122,16 @@ int bam_flagstat(int argc, char *argv[])
          switch (c) {
          case INPUT_FMT_OPTION:
              if (hts_opt_add(&in_opts, optarg) < 0)
-                usage_exit(pysamerr, EXIT_FAILURE);
+                usage_exit(pysam_stderr, EXIT_FAILURE);
              break;
          default:
-            usage_exit(pysamerr, EXIT_FAILURE);
+            usage_exit(pysam_stderr, EXIT_FAILURE);
          }
      }
  
      if (argc != optind+1) {
-        if (argc == optind) usage_exit(stdout, EXIT_SUCCESS);
-        else usage_exit(pysamerr, EXIT_FAILURE);
+        if (argc == optind) usage_exit(pysam_stdout, EXIT_SUCCESS);
+        else usage_exit(pysam_stderr, EXIT_FAILURE);
      }
      fp = sam_open(argv[optind], "r");
      if (fp == NULL) {
@@ -137,40 +139,40 @@ int bam_flagstat(int argc, char *argv[])
          return 1;
      }
      if (hts_opt_apply(fp, in_opts)) {
-        fprintf(pysamerr, "Failed to apply input-fmt-options\n");
+        fprintf(pysam_stderr, "Failed to apply input-fmt-options\n");
          return 1;
      }
  
      if (hts_set_opt(fp, CRAM_OPT_REQUIRED_FIELDS,
                      SAM_FLAG | SAM_MAPQ | SAM_RNEXT)) {
-        fprintf(pysamerr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
+        fprintf(pysam_stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
          return 1;
      }
  
      if (hts_set_opt(fp, CRAM_OPT_DECODE_MD, 0)) {
-        fprintf(pysamerr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+        fprintf(pysam_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
          return 1;
      }
  
      header = sam_hdr_read(fp);
      if (header == NULL) {
-        fprintf(pysamerr, "Failed to read header for \"%s\"\n", argv[optind]);
+        fprintf(pysam_stderr, "Failed to read header for \"%s\"\n", argv[optind]);
          return 1;
      }
      s = bam_flagstat_core(fp, header);
-    printf("%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
-    printf("%lld + %lld secondary\n", s->n_secondary[0], s->n_secondary[1]);
-    printf("%lld + %lld supplementary\n", s->n_supp[0], s->n_supp[1]);
-    printf("%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]);
-    printf("%lld + %lld mapped (%s : %s)\n", s->n_mapped[0], s->n_mapped[1], percent(b0, s->n_mapped[0], s->n_reads[0]), percent(b1, s->n_mapped[1], s->n_reads[1]));
-    printf("%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
-    printf("%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]);
-    printf("%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]);
-    printf("%lld + %lld properly paired (%s : %s)\n", s->n_pair_good[0], s->n_pair_good[1], percent(b0, s->n_pair_good[0], s->n_pair_all[0]), percent(b1, s->n_pair_good[1], s->n_pair_all[1]));
-    printf("%lld + %lld with itself and mate mapped\n", s->n_pair_map[0], s->n_pair_map[1]);
-    printf("%lld + %lld singletons (%s : %s)\n", s->n_sgltn[0], s->n_sgltn[1], percent(b0, s->n_sgltn[0], s->n_pair_all[0]), percent(b1, s->n_sgltn[1], s->n_pair_all[1]));
-    printf("%lld + %lld with mate mapped to a different chr\n", s->n_diffchr[0], s->n_diffchr[1]);
-    printf("%lld + %lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh[0], s->n_diffhigh[1]);
+    fprintf(pysam_stdout, "%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
+    fprintf(pysam_stdout, "%lld + %lld secondary\n", s->n_secondary[0], s->n_secondary[1]);
+    fprintf(pysam_stdout, "%lld + %lld supplementary\n", s->n_supp[0], s->n_supp[1]);
+    fprintf(pysam_stdout, "%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]);
+    fprintf(pysam_stdout, "%lld + %lld mapped (%s : %s)\n", s->n_mapped[0], s->n_mapped[1], percent(b0, s->n_mapped[0], s->n_reads[0]), percent(b1, s->n_mapped[1], s->n_reads[1]));
+    fprintf(pysam_stdout, "%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
+    fprintf(pysam_stdout, "%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]);
+    fprintf(pysam_stdout, "%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]);
+    fprintf(pysam_stdout, "%lld + %lld properly paired (%s : %s)\n", s->n_pair_good[0], s->n_pair_good[1], percent(b0, s->n_pair_good[0], s->n_pair_all[0]), percent(b1, s->n_pair_good[1], s->n_pair_all[1]));
+    fprintf(pysam_stdout, "%lld + %lld with itself and mate mapped\n", s->n_pair_map[0], s->n_pair_map[1]);
+    fprintf(pysam_stdout, "%lld + %lld singletons (%s : %s)\n", s->n_sgltn[0], s->n_sgltn[1], percent(b0, s->n_sgltn[0], s->n_pair_all[0]), percent(b1, s->n_sgltn[1], s->n_pair_all[1]));
+    fprintf(pysam_stdout, "%lld + %lld with mate mapped to a different chr\n", s->n_diffchr[0], s->n_diffchr[1]);
+    fprintf(pysam_stdout, "%lld + %lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh[0], s->n_diffhigh[1]);
      free(s);
      bam_hdr_destroy(header);
      sam_close(fp);
diff --git a/samtools/bam_tview.c b/samtools/bam_tview.c

index f86ae435b7b34f627041e1d392519e7ea1dc7aec..f1f0cc77ae1f0c4aa6a9eea522b981ecb7fa5e2b 100644 (file)
--- a/samtools/bam_tview.c
+++ b/samtools/bam_tview.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <regex.h>
  #include <assert.h>
  #include "bam_tview.h"
diff --git a/samtools/bam_tview.c.pysam.c b/samtools/bam_tview.c.pysam.c

index 736b588fd4adb244803e1e5be1c90796b23c9d82..a47bced147b4509e24a68022387fd4f64f16ea8e 100644 (file)
--- a/samtools/bam_tview.c.pysam.c
+++ b/samtools/bam_tview.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <regex.h>
  #include <assert.h>
  #include "bam_tview.h"
@@ -68,7 +70,7 @@ int base_tv_init(tview_t* tv, const char *fn, const char *fn_fa,
      tv->fp = sam_open_format(fn, "r", fmt);
      if(tv->fp == NULL)
      {
-        fprintf(pysamerr,"sam_open %s. %s\n", fn,fn_fa);
+        fprintf(pysam_stderr,"sam_open %s. %s\n", fn,fn_fa);
          exit(EXIT_FAILURE);
      }
      // TODO bgzf_set_cache_size(tv->fp->fp.bgzf, 8 * 1024 *1024);
@@ -77,13 +79,13 @@ int base_tv_init(tview_t* tv, const char *fn, const char *fn_fa,
      tv->header = sam_hdr_read(tv->fp);
      if(tv->header == NULL)
      {
-        fprintf(pysamerr,"Cannot read '%s'.\n", fn);
+        fprintf(pysam_stderr,"Cannot read '%s'.\n", fn);
          exit(EXIT_FAILURE);
      }
      tv->idx = sam_index_load(tv->fp, fn);
      if (tv->idx == NULL)
      {
-        fprintf(pysamerr,"Cannot read index for '%s'.\n", fn);
+        fprintf(pysam_stderr,"Cannot read index for '%s'.\n", fn);
          exit(EXIT_FAILURE);
      }
      tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv);
@@ -297,7 +299,7 @@ int base_draw_aln(tview_t *tv, int tid, int pos)
          free(str);
          if ( !tv->ref )
          {
-            fprintf(pysamerr,"Could not read the reference sequence. Is it seekable (plain text or compressed + .gzi indexed with bgzip)?\n");
+            fprintf(pysam_stderr,"Could not read the reference sequence. Is it seekable (plain text or compressed + .gzi indexed with bgzip)?\n");
              exit(1);
          }
      }
@@ -326,19 +328,19 @@ static void error(const char *format, ...)
  {
      if ( !format )
      {
-        fprintf(pysamerr,
+        fprintf(pysam_stderr,
  "Usage: samtools tview [options] <aln.bam> [ref.fasta]\n"
  "Options:\n"
  "   -d display      output as (H)tml or (C)urses or (T)ext \n"
  "   -p chr:pos      go directly to this position\n"
  "   -s STR          display only reads from this sample or group\n");
-        sam_global_opt_help(pysamerr, "-.--.");
+        sam_global_opt_help(pysam_stderr, "-.--.");
      }
      else
      {
          va_list ap;
          va_start(ap, format);
-        vfprintf(pysamerr, format, ap);
+        vfprintf(pysam_stderr, format, ap);
          va_end(ap);
      }
      exit(-1);
@@ -428,7 +430,7 @@ int bam_tview_main(int argc, char *argv[])
          }
          if ( i==tv->header->n_targets )
          {
-            fprintf(pysamerr,"None of the BAM sequence names present in the fasta file\n");
+            fprintf(pysam_stderr,"None of the BAM sequence names present in the fasta file\n");
              exit(EXIT_FAILURE);
          }
          tv->curr_tid = i;
diff --git a/samtools/bam_tview_curses.c.pysam.c b/samtools/bam_tview_curses.c.pysam.c

index bbeedf89b4e124d61af23fa5dc46292599f055f7..90a83353daa59c2d7d02863ca104c8eeac1d3294 100644 (file)
--- a/samtools/bam_tview_curses.c.pysam.c
+++ b/samtools/bam_tview_curses.c.pysam.c
@@ -304,7 +304,7 @@ tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples,
      tview_t* base=(tview_t*)tv;
      if(tv==0)
          {
-        fprintf(pysamerr,"Calloc failed\n");
+        fprintf(pysam_stderr,"Calloc failed\n");
          return 0;
          }
  
diff --git a/samtools/bam_tview_html.c b/samtools/bam_tview_html.c

index 9db8fcea4b098dadf08bdcbcaf02de7b6f8c2df1..e3aecda3003fc84632375749ecb4d128b478b629 100644 (file)
--- a/samtools/bam_tview_html.c
+++ b/samtools/bam_tview_html.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <unistd.h>
  #include "bam_tview.h"
  
diff --git a/samtools/bam_tview_html.c.pysam.c b/samtools/bam_tview_html.c.pysam.c

index b42c7371020129817d427827f000599078aca387..164e33da8f19bae7a9c06cfbb386ec5064b82488 100644 (file)
--- a/samtools/bam_tview_html.c.pysam.c
+++ b/samtools/bam_tview_html.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <unistd.h>
  #include "bam_tview.h"
  
@@ -183,7 +185,7 @@ static int html_drawaln(struct AbstractTview* tv, int tid, int pos)
              fprintf(ptr->out,"<span");
                  while(css<32)
                      {
-                    //if(y>1) fprintf(pysamerr,"css=%d pow2=%d vs %d\n",css,(1 << (css)),ptr->screen[y][x].attributes);
+                    //if(y>1) fprintf(pysam_stderr,"css=%d pow2=%d vs %d\n",css,(1 << (css)),ptr->screen[y][x].attributes);
                      if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0)
                          {
  
@@ -322,12 +324,12 @@ tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples,
      tview_t* base=(tview_t*)tv;
      if(tv==0)
          {
-        fprintf(pysamerr,"Calloc failed\n");
+        fprintf(pysam_stderr,"Calloc failed\n");
          return 0;
          }
      tv->row_count=0;
      tv->screen=NULL;
-    tv->out=stdout;
+    tv->out=pysam_stdout;
      tv->attributes=0;
      base_tv_init(base,fn,fn_fa,samples,fmt);
      /* initialize callbacks */
diff --git a/samtools/bamshuf.c b/samtools/bamshuf.c

index ac97bb84dfe3ef87e3fd215ab3553c8593174f4e..044bc4e96a40fc3795db5c0cb06adb55a998523f 100644 (file)
--- a/samtools/bamshuf.c
+++ b/samtools/bamshuf.c
@@ -1,7 +1,7 @@
  /*  bamshuf.c -- collate subcommand.
  
      Copyright (C) 2012 Broad Institute.
-    Copyright (C) 2013 Genome Research Ltd.
+    Copyright (C) 2013, 2015 Genome Research Ltd.
  
      Author: Heng Li <lh3@sanger.ac.uk>
  
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <unistd.h>
  #include <stdio.h>
  #include <stdlib.h>
@@ -77,14 +79,16 @@ KSORT_INIT(bamshuf, elem_t, elem_lt)
  static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
                     int is_stdout, sam_global_args *ga)
  {
-    samFile *fp, *fpw, **fpt;
-    char **fnt, modew[8];
-    bam1_t *b;
-    int i, l;
-    bam_hdr_t *h;
-    int64_t *cnt;
+    samFile *fp, *fpw = NULL, **fpt = NULL;
+    char **fnt = NULL, modew[8];
+    bam1_t *b = NULL;
+    int i, l, r;
+    bam_hdr_t *h = NULL;
+    int64_t j, max_cnt = 0, *cnt = NULL;
+    elem_t *a = NULL;
  
-    // split
+    // Read input, distribute reads pseudo-randomly into n_files temporary
+    // files.
      fp = sam_open_format(fn, "r", &ga->in);
      if (fp == NULL) {
          print_error_errno("collate", "Cannot open input file \"%s\"", fn);
@@ -94,39 +98,69 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
      h = sam_hdr_read(fp);
      if (h == NULL) {
          fprintf(stderr, "Couldn't read header for '%s'\n", fn);
-        return 1;
+        goto fail;
      }
      fnt = (char**)calloc(n_files, sizeof(char*));
+    if (!fnt) goto mem_fail;
      fpt = (samFile**)calloc(n_files, sizeof(samFile*));
+    if (!fpt) goto mem_fail;
      cnt = (int64_t*)calloc(n_files, 8);
+    if (!cnt) goto mem_fail;
+
      l = strlen(pre);
  
      for (i = 0; i < n_files; ++i) {
          fnt[i] = (char*)calloc(l + 10, 1);
+        if (!fnt[i]) goto mem_fail;
          sprintf(fnt[i], "%s.%.4d.bam", pre, i);
          fpt[i] = sam_open(fnt[i], "wb1");
          if (fpt[i] == NULL) {
              print_error_errno("collate", "Cannot open intermediate file \"%s\"", fnt[i]);
-            return 1;
+            goto fail;
+        }
+        if (sam_hdr_write(fpt[i], h) < 0) {
+            print_error_errno("collate", "Couldn't write header to intermediate file \"%s\"", fnt[i]);
+            goto fail;
          }
-        sam_hdr_write(fpt[i], h);
      }
      b = bam_init1();
-    while (sam_read1(fp, h, b) >= 0) {
+    if (!b) goto mem_fail;
+    while ((r = sam_read1(fp, h, b)) >= 0) {
          uint32_t x;
          x = hash_X31_Wang(bam_get_qname(b)) % n_files;
-        sam_write1(fpt[x], h, b);
+        if (sam_write1(fpt[x], h, b) < 0) {
+            print_error_errno("collate", "Couldn't write to intermediate file \"%s\"", fnt[x]);
+            goto fail;
+        }
          ++cnt[x];
      }
      bam_destroy1(b);
-    for (i = 0; i < n_files; ++i) sam_close(fpt[i]);
+    b = NULL;
+    if (r < -1) {
+        fprintf(stderr, "Error reading input file\n");
+        goto fail;
+    }
+    for (i = 0; i < n_files; ++i) {
+        // Close split output
+        r = sam_close(fpt[i]);
+        fpt[i] = NULL;
+        if (r < 0) {
+            fprintf(stderr, "Error on closing '%s'\n", fnt[i]);
+            return 1;
+        }
+
+        // Find biggest count
+        if (max_cnt < cnt[i]) max_cnt = cnt[i];
+    }
      free(fpt);
+    fpt = NULL;
      sam_close(fp);
-
+    fp = NULL;
      // merge
      sprintf(modew, "wb%d", (clevel >= 0 && clevel <= 9)? clevel : DEF_CLEVEL);
      if (!is_stdout) { // output to a file
          char *fnw = (char*)calloc(l + 5, 1);
+        if (!fnw) goto mem_fail;
          if (ga->out.format == unknown_format)
              sprintf(fnw, "%s.bam", pre); // "wb" above makes BAM the default
          else
@@ -137,37 +171,86 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
      if (fpw == NULL) {
          if (is_stdout) print_error_errno("collate", "Cannot open standard output");
          else print_error_errno("collate", "Cannot open output file \"%s.bam\"", pre);
-        return 1;
+        goto fail;
+    }
+
+    if (sam_hdr_write(fpw, h) < 0) {
+        print_error_errno("collate", "Couldn't write header");
+        goto fail;
+    }
+
+    a = malloc(max_cnt * sizeof(elem_t));
+    if (!a) goto mem_fail;
+    for (j = 0; j < max_cnt; ++j) {
+        a[j].b = bam_init1();
+        if (!a[j].b) { max_cnt = j; goto mem_fail; }
      }
  
-    sam_hdr_write(fpw, h);
      for (i = 0; i < n_files; ++i) {
-        int64_t j, c = cnt[i];
-        elem_t *a;
+        int64_t c = cnt[i];
          fp = sam_open_format(fnt[i], "r", &ga->in);
-        bam_hdr_destroy(sam_hdr_read(fp));
-        a = (elem_t*)calloc(c, sizeof(elem_t));
+        if (NULL == fp) {
+            print_error_errno("collate", "Couldn't open \"%s\"", fnt[i]);
+            goto fail;
+        }
+        bam_hdr_destroy(sam_hdr_read(fp)); // Skip over header
+
+        // Slurp in one of the split files
          for (j = 0; j < c; ++j) {
-            a[j].b = bam_init1();
-            sam_read1(fp, h, a[j].b);
+            if (sam_read1(fp, h, a[j].b) < 0) {
+                fprintf(stderr, "Error reading '%s'\n", fnt[i]);
+                goto fail;
+            }
              a[j].key = hash_X31_Wang(bam_get_qname(a[j].b));
          }
          sam_close(fp);
          unlink(fnt[i]);
          free(fnt[i]);
-        ks_introsort(bamshuf, c, a);
+        fnt[i] = NULL;
+
+        ks_introsort(bamshuf, c, a); // Shuffle all the reads
+
+        // Write them out again
          for (j = 0; j < c; ++j) {
-            sam_write1(fpw, h, a[j].b);
-            bam_destroy1(a[j].b);
+            if (sam_write1(fpw, h, a[j].b) < 0) {
+                print_error_errno("collate", "Error writing to output");
+                goto fail;
+            }
          }
-        free(a);
      }
-    sam_close(fpw);
+
      bam_hdr_destroy(h);
-    free(fnt); free(cnt);
+    for (j = 0; j < max_cnt; ++j) bam_destroy1(a[j].b);
+    free(a); free(fnt); free(cnt);
      sam_global_args_free(ga);
+    if (sam_close(fpw) < 0) {
+        fprintf(stderr, "Error on closing output\n");
+        return 1;
+    }
  
      return 0;
+
+ mem_fail:
+    fprintf(stderr, "Out of memory\n");
+
+ fail:
+    if (fp) sam_close(fp);
+    if (fpw) sam_close(fpw);
+    if (h) bam_hdr_destroy(h);
+    if (b) bam_destroy1(b);
+    for (i = 0; i < n_files; ++i) {
+        if (fnt) free(fnt[i]);
+        if (fpt && fpt[i]) sam_close(fpt[i]);
+    }
+    if (a) {
+        for (j = 0; j < max_cnt; ++j) bam_destroy1(a[j].b);
+        free(a);
+    }
+    free(fnt);
+    free(fpt);
+    free(cnt);
+    sam_global_args_free(ga);
+    return 1;
  }
  
  static int usage(FILE *fp, int n_files) {
diff --git a/samtools/bamshuf.c.pysam.c b/samtools/bamshuf.c.pysam.c

index d17cf9bba8369973e53ed32bad38e28717798b25..fb1a5ac705fd5af36b0b33bed3450205849b1f85 100644 (file)
--- a/samtools/bamshuf.c.pysam.c
+++ b/samtools/bamshuf.c.pysam.c
@@ -3,7 +3,7 @@
  /*  bamshuf.c -- collate subcommand.
  
      Copyright (C) 2012 Broad Institute.
-    Copyright (C) 2013 Genome Research Ltd.
+    Copyright (C) 2013, 2015 Genome Research Ltd.
  
      Author: Heng Li <lh3@sanger.ac.uk>
  
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <unistd.h>
  #include <stdio.h>
  #include <stdlib.h>
@@ -77,16 +79,18 @@ static inline int elem_lt(elem_t x, elem_t y)
  KSORT_INIT(bamshuf, elem_t, elem_lt)
  
  static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
-                   int is_stdout, sam_global_args *ga)
+                   int is_pysam_stdout, sam_global_args *ga)
  {
-    samFile *fp, *fpw, **fpt;
-    char **fnt, modew[8];
-    bam1_t *b;
-    int i, l;
-    bam_hdr_t *h;
-    int64_t *cnt;
+    samFile *fp, *fpw = NULL, **fpt = NULL;
+    char **fnt = NULL, modew[8];
+    bam1_t *b = NULL;
+    int i, l, r;
+    bam_hdr_t *h = NULL;
+    int64_t j, max_cnt = 0, *cnt = NULL;
+    elem_t *a = NULL;
  
-    // split
+    // Read input, distribute reads pseudo-randomly into n_files temporary
+    // files.
      fp = sam_open_format(fn, "r", &ga->in);
      if (fp == NULL) {
          print_error_errno("collate", "Cannot open input file \"%s\"", fn);
@@ -95,88 +99,167 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
  
      h = sam_hdr_read(fp);
      if (h == NULL) {
-        fprintf(pysamerr, "Couldn't read header for '%s'\n", fn);
-        return 1;
+        fprintf(pysam_stderr, "Couldn't read header for '%s'\n", fn);
+        goto fail;
      }
      fnt = (char**)calloc(n_files, sizeof(char*));
+    if (!fnt) goto mem_fail;
      fpt = (samFile**)calloc(n_files, sizeof(samFile*));
+    if (!fpt) goto mem_fail;
      cnt = (int64_t*)calloc(n_files, 8);
+    if (!cnt) goto mem_fail;
+
      l = strlen(pre);
  
      for (i = 0; i < n_files; ++i) {
          fnt[i] = (char*)calloc(l + 10, 1);
+        if (!fnt[i]) goto mem_fail;
          sprintf(fnt[i], "%s.%.4d.bam", pre, i);
          fpt[i] = sam_open(fnt[i], "wb1");
          if (fpt[i] == NULL) {
              print_error_errno("collate", "Cannot open intermediate file \"%s\"", fnt[i]);
-            return 1;
+            goto fail;
+        }
+        if (sam_hdr_write(fpt[i], h) < 0) {
+            print_error_errno("collate", "Couldn't write header to intermediate file \"%s\"", fnt[i]);
+            goto fail;
          }
-        sam_hdr_write(fpt[i], h);
      }
      b = bam_init1();
-    while (sam_read1(fp, h, b) >= 0) {
+    if (!b) goto mem_fail;
+    while ((r = sam_read1(fp, h, b)) >= 0) {
          uint32_t x;
          x = hash_X31_Wang(bam_get_qname(b)) % n_files;
-        sam_write1(fpt[x], h, b);
+        if (sam_write1(fpt[x], h, b) < 0) {
+            print_error_errno("collate", "Couldn't write to intermediate file \"%s\"", fnt[x]);
+            goto fail;
+        }
          ++cnt[x];
      }
      bam_destroy1(b);
-    for (i = 0; i < n_files; ++i) sam_close(fpt[i]);
+    b = NULL;
+    if (r < -1) {
+        fprintf(pysam_stderr, "Error reading input file\n");
+        goto fail;
+    }
+    for (i = 0; i < n_files; ++i) {
+        // Close split output
+        r = sam_close(fpt[i]);
+        fpt[i] = NULL;
+        if (r < 0) {
+            fprintf(pysam_stderr, "Error on closing '%s'\n", fnt[i]);
+            return 1;
+        }
+
+        // Find biggest count
+        if (max_cnt < cnt[i]) max_cnt = cnt[i];
+    }
      free(fpt);
+    fpt = NULL;
      sam_close(fp);
-
+    fp = NULL;
      // merge
      sprintf(modew, "wb%d", (clevel >= 0 && clevel <= 9)? clevel : DEF_CLEVEL);
-    if (!is_stdout) { // output to a file
+    if (!is_pysam_stdout) { // output to a file
          char *fnw = (char*)calloc(l + 5, 1);
+        if (!fnw) goto mem_fail;
          if (ga->out.format == unknown_format)
              sprintf(fnw, "%s.bam", pre); // "wb" above makes BAM the default
          else
              sprintf(fnw, "%s.%s", pre,  hts_format_file_extension(&ga->out));
          fpw = sam_open_format(fnw, modew, &ga->out);
          free(fnw);
-    } else fpw = sam_open_format("-", modew, &ga->out); // output to stdout
+    } else fpw = sam_open_format("-", modew, &ga->out); // output to pysam_stdout
      if (fpw == NULL) {
-        if (is_stdout) print_error_errno("collate", "Cannot open standard output");
+        if (is_pysam_stdout) print_error_errno("collate", "Cannot open standard output");
          else print_error_errno("collate", "Cannot open output file \"%s.bam\"", pre);
-        return 1;
+        goto fail;
+    }
+
+    if (sam_hdr_write(fpw, h) < 0) {
+        print_error_errno("collate", "Couldn't write header");
+        goto fail;
+    }
+
+    a = malloc(max_cnt * sizeof(elem_t));
+    if (!a) goto mem_fail;
+    for (j = 0; j < max_cnt; ++j) {
+        a[j].b = bam_init1();
+        if (!a[j].b) { max_cnt = j; goto mem_fail; }
      }
  
-    sam_hdr_write(fpw, h);
      for (i = 0; i < n_files; ++i) {
-        int64_t j, c = cnt[i];
-        elem_t *a;
+        int64_t c = cnt[i];
          fp = sam_open_format(fnt[i], "r", &ga->in);
-        bam_hdr_destroy(sam_hdr_read(fp));
-        a = (elem_t*)calloc(c, sizeof(elem_t));
+        if (NULL == fp) {
+            print_error_errno("collate", "Couldn't open \"%s\"", fnt[i]);
+            goto fail;
+        }
+        bam_hdr_destroy(sam_hdr_read(fp)); // Skip over header
+
+        // Slurp in one of the split files
          for (j = 0; j < c; ++j) {
-            a[j].b = bam_init1();
-            sam_read1(fp, h, a[j].b);
+            if (sam_read1(fp, h, a[j].b) < 0) {
+                fprintf(pysam_stderr, "Error reading '%s'\n", fnt[i]);
+                goto fail;
+            }
              a[j].key = hash_X31_Wang(bam_get_qname(a[j].b));
          }
          sam_close(fp);
          unlink(fnt[i]);
          free(fnt[i]);
-        ks_introsort(bamshuf, c, a);
+        fnt[i] = NULL;
+
+        ks_introsort(bamshuf, c, a); // Shuffle all the reads
+
+        // Write them out again
          for (j = 0; j < c; ++j) {
-            sam_write1(fpw, h, a[j].b);
-            bam_destroy1(a[j].b);
+            if (sam_write1(fpw, h, a[j].b) < 0) {
+                print_error_errno("collate", "Error writing to output");
+                goto fail;
+            }
          }
-        free(a);
      }
-    sam_close(fpw);
+
      bam_hdr_destroy(h);
-    free(fnt); free(cnt);
+    for (j = 0; j < max_cnt; ++j) bam_destroy1(a[j].b);
+    free(a); free(fnt); free(cnt);
      sam_global_args_free(ga);
+    if (sam_close(fpw) < 0) {
+        fprintf(pysam_stderr, "Error on closing output\n");
+        return 1;
+    }
  
      return 0;
+
+ mem_fail:
+    fprintf(pysam_stderr, "Out of memory\n");
+
+ fail:
+    if (fp) sam_close(fp);
+    if (fpw) sam_close(fpw);
+    if (h) bam_hdr_destroy(h);
+    if (b) bam_destroy1(b);
+    for (i = 0; i < n_files; ++i) {
+        if (fnt) free(fnt[i]);
+        if (fpt && fpt[i]) sam_close(fpt[i]);
+    }
+    if (a) {
+        for (j = 0; j < max_cnt; ++j) bam_destroy1(a[j].b);
+        free(a);
+    }
+    free(fnt);
+    free(fpt);
+    free(cnt);
+    sam_global_args_free(ga);
+    return 1;
  }
  
  static int usage(FILE *fp, int n_files) {
      fprintf(fp,
              "Usage:   samtools collate [-Ou] [-n nFiles] [-c cLevel] <in.bam> <out.prefix>\n\n"
              "Options:\n"
-            "      -O       output to stdout\n"
+            "      -O       output to pysam_stdout\n"
              "      -u       uncompressed BAM output\n"
              "      -l INT   compression level [%d]\n" // DEF_CLEVEL
              "      -n INT   number of temporary files [%d]\n", // n_files
@@ -189,7 +272,7 @@ static int usage(FILE *fp, int n_files) {
  
  int main_bamshuf(int argc, char *argv[])
  {
-    int c, n_files = 64, clevel = DEF_CLEVEL, is_stdout = 0, is_un = 0;
+    int c, n_files = 64, clevel = DEF_CLEVEL, is_pysam_stdout = 0, is_un = 0;
      sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
      static const struct option lopts[] = {
          SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
@@ -201,15 +284,15 @@ int main_bamshuf(int argc, char *argv[])
          case 'n': n_files = atoi(optarg); break;
          case 'l': clevel = atoi(optarg); break;
          case 'u': is_un = 1; break;
-        case 'O': is_stdout = 1; break;
+        case 'O': is_pysam_stdout = 1; break;
          default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                    /* else fall-through */
-        case '?': return usage(pysamerr, n_files);
+        case '?': return usage(pysam_stderr, n_files);
          }
      }
      if (is_un) clevel = 0;
      if (optind + 2 > argc)
-        return usage(pysamerr, n_files);
+        return usage(pysam_stderr, n_files);
  
-    return bamshuf(argv[optind], n_files, argv[optind+1], clevel, is_stdout, &ga);
+    return bamshuf(argv[optind], n_files, argv[optind+1], clevel, is_pysam_stdout, &ga);
  }
diff --git a/samtools/bamtk.c b/samtools/bamtk.c

index 4b4df770bbb2975efddfcadd7494a0f2745d7e54..5c1c60d7332d772640000bbdfc2aa58f427b0351 100644 (file)
--- a/samtools/bamtk.c
+++ b/samtools/bamtk.c
@@ -1,6 +1,6 @@
  /*  bamtk.c -- main samtools command front-end.
  
-    Copyright (C) 2008-2015 Genome Research Ltd.
+    Copyright (C) 2008-2016 Genome Research Ltd.
  
      Author: Heng Li <lh3@sanger.ac.uk>
  
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <unistd.h>
  #include <fcntl.h>
@@ -213,7 +215,7 @@ int main(int argc, char *argv[])
          printf(
  "samtools %s\n"
  "Using htslib %s\n"
-"Copyright (C) 2015 Genome Research Ltd.\n",
+"Copyright (C) 2016 Genome Research Ltd.\n",
                 samtools_version(), hts_version());
      }
      else if (strcmp(argv[1], "--version-only") == 0) {
diff --git a/samtools/bamtk.c.pysam.c b/samtools/bamtk.c.pysam.c

index a3698101ba3764352d900cac5ca8b42977f5d1b8..1f3d93843e5105c6ce9f615ea67c19e3505fcfb5 100644 (file)
--- a/samtools/bamtk.c.pysam.c
+++ b/samtools/bamtk.c.pysam.c
@@ -2,7 +2,7 @@
  
  /*  bamtk.c -- main samtools command front-end.
  
-    Copyright (C) 2008-2015 Genome Research Ltd.
+    Copyright (C) 2008-2016 Genome Research Ltd.
  
      Author: Heng Li <lh3@sanger.ac.uk>
  
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <unistd.h>
  #include <fcntl.h>
@@ -71,13 +73,13 @@ const char *samtools_version()
  
  static void vprint_error_core(const char *subcommand, const char *format, va_list args, const char *extra)
  {
-    fflush(stdout);
-    if (subcommand && *subcommand) fprintf(pysamerr, "samtools %s: ", subcommand);
-    else fprintf(pysamerr, "samtools: ");
-    vfprintf(pysamerr, format, args);
-    if (extra) fprintf(pysamerr, ": %s\n", extra);
-    else fprintf(pysamerr, "\n");
-    fflush(pysamerr);
+    fflush(pysam_stdout);
+    if (subcommand && *subcommand) fprintf(pysam_stderr, "samtools %s: ", subcommand);
+    else fprintf(pysam_stderr, "samtools: ");
+    vfprintf(pysam_stderr, format, args);
+    if (extra) fprintf(pysam_stderr, ": %s\n", extra);
+    else fprintf(pysam_stderr, "\n");
+    fflush(pysam_stderr);
  }
  
  void print_error(const char *subcommand, const char *format, ...)
@@ -158,14 +160,13 @@ static void usage(FILE *fp)
  int samtools_main(int argc, char *argv[])
  {
  #ifdef _WIN32
-    setmode(fileno(stdout), O_BINARY);
+    setmode(fileno(pysam_stdout), O_BINARY);
      setmode(fileno(stdin),  O_BINARY);
  #endif
+    if (argc < 2) { usage(pysam_stderr); return 1; }
  
-    if (argc < 2) { usage(pysamerr); return 1; }
-    
      if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0) {
-        if (argc == 2) { usage(stdout); return 0; }
+        if (argc == 2) { usage(pysam_stdout); return 0; }
  
          // Otherwise change "samtools help COMMAND [...]" to "samtools COMMAND";
          // main_xyz() functions by convention display the subcommand's usage
@@ -173,6 +174,7 @@ int samtools_main(int argc, char *argv[])
          argv++;
          argc = 2;
      }
+
      int ret = 0;
      if (strcmp(argv[1], "view") == 0)           ret = main_samview(argc-1, argv+1);
      else if (strcmp(argv[1], "import") == 0)    ret = main_import(argc-1, argv+1);
@@ -207,22 +209,22 @@ int samtools_main(int argc, char *argv[])
      else if (strcmp(argv[1], "quickcheck") == 0)  ret = main_quickcheck(argc-1, argv+1);
      else if (strcmp(argv[1], "addreplacerg") == 0) ret = main_addreplacerg(argc-1, argv+1);
      else if (strcmp(argv[1], "pileup") == 0) {
-        fprintf(pysamerr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n");
+        fprintf(pysam_stderr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n");
          return 1;
      }
      else if (strcmp(argv[1], "tview") == 0)   ret = bam_tview_main(argc-1, argv+1);
      else if (strcmp(argv[1], "--version") == 0) {
-        printf(
+        fprintf(pysam_stdout, 
  "samtools %s\n"
  "Using htslib %s\n"
-"Copyright (C) 2015 Genome Research Ltd.\n",
+"Copyright (C) 2016 Genome Research Ltd.\n",
                 samtools_version(), hts_version());
      }
      else if (strcmp(argv[1], "--version-only") == 0) {
-        printf("%s+htslib-%s\n", samtools_version(), hts_version());
+        fprintf(pysam_stdout, "%s+htslib-%s\n", samtools_version(), hts_version());
      }
      else {
-        fprintf(pysamerr, "[main] unrecognized command '%s'\n", argv[1]);
+        fprintf(pysam_stderr, "[main] unrecognized command '%s'\n", argv[1]);
          return 1;
      }
      return ret;
diff --git a/samtools/bedcov.c b/samtools/bedcov.c

index e2f0db8520a7359dc872ef1354cbf905e5a924d7..d4dceeee136e56c4f15e9a45e61874d8d36e27fd 100644 (file)
--- a/samtools/bedcov.c
+++ b/samtools/bedcov.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <zlib.h>
  #include <stdio.h>
  #include <ctype.h>
diff --git a/samtools/bedcov.c.pysam.c b/samtools/bedcov.c.pysam.c

index 6faa7bfbb85e23a851af78bcca58c86bcaeb027f..25fdffcd8f3fb37e97c8b6d8d8719823416e3f93 100644 (file)
--- a/samtools/bedcov.c.pysam.c
+++ b/samtools/bedcov.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <zlib.h>
  #include <stdio.h>
  #include <ctype.h>
@@ -88,9 +90,9 @@ int main_bedcov(int argc, char *argv[])
          if (usage) break;
      }
      if (usage || optind + 2 > argc) {
-        fprintf(pysamerr, "Usage: samtools bedcov [options] <in.bed> <in1.bam> [...]\n\n");
-        fprintf(pysamerr, "  -Q INT       Only count bases of at least INT quality [0]\n");
-        sam_global_opt_help(pysamerr, "-.--.");
+        fprintf(pysam_stderr, "Usage: samtools bedcov [options] <in.bed> <in1.bam> [...]\n\n");
+        fprintf(pysam_stderr, "  -Q INT       Only count bases of at least INT quality [0]\n");
+        sam_global_opt_help(pysam_stderr, "-.--.");
          return 1;
      }
      memset(&str, 0, sizeof(kstring_t));
@@ -104,13 +106,13 @@ int main_bedcov(int argc, char *argv[])
          if (aux[i]->fp)
              idx[i] = sam_index_load(aux[i]->fp, argv[i+optind+1]);
          if (aux[i]->fp == 0 || idx[i] == 0) {
-            fprintf(pysamerr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]);
+            fprintf(pysam_stderr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]);
              return 2;
          }
          // TODO bgzf_set_cache_size(aux[i]->fp, 20);
          aux[i]->header = sam_hdr_read(aux[i]->fp);
          if (aux[i]->header == NULL) {
-            fprintf(pysamerr, "ERROR: failed to read header for '%s'\n",
+            fprintf(pysam_stderr, "ERROR: failed to read header for '%s'\n",
                      argv[i+optind+1]);
              return 2;
          }
@@ -153,12 +155,12 @@ int main_bedcov(int argc, char *argv[])
              kputc('\t', &str);
              kputl(cnt[i], &str);
          }
-        puts(str.s);
+        fputs(str.s, pysam_stdout) & fputc('\n', pysam_stdout);
          bam_mplp_destroy(mplp);
          continue;
  
  bed_error:
-        fprintf(pysamerr, "Errors in BED line '%s'\n", str.s);
+        fprintf(pysam_stderr, "Errors in BED line '%s'\n", str.s);
      }
      free(n_plp); free(plp);
      ks_destroy(ks);
diff --git a/samtools/bedidx.c b/samtools/bedidx.c

index 627783eac16c1f51597fb2bea0920d680c66928e..c1954ad79f64fd567fd43e6c786ca8e2b509f092 100644 (file)
--- a/samtools/bedidx.c
+++ b/samtools/bedidx.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <stdint.h>
  #include <string.h>
diff --git a/samtools/bedidx.c.pysam.c b/samtools/bedidx.c.pysam.c

index 716aee5c8c868f0d12388377867bd2665139eafe..5b7df0c31124c17016c6295cead2c8065b9bed89 100644 (file)
--- a/samtools/bedidx.c.pysam.c
+++ b/samtools/bedidx.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <stdint.h>
  #include <string.h>
@@ -199,7 +201,7 @@ void *bed_read(const char *fn)
              // has called their reference "browser" or "track".
              if (0 == strcmp(ref, "browser")) continue;
              if (0 == strcmp(ref, "track")) continue;
-            fprintf(pysamerr, "[bed_read] Parse error reading %s at line %u\n",
+            fprintf(pysam_stderr, "[bed_read] Parse error reading %s at line %u\n",
                      fn, line);
              goto fail_no_msg;
          }
@@ -236,7 +238,7 @@ void *bed_read(const char *fn)
      bed_index(h);
      return h;
   fail:
-    fprintf(pysamerr, "[bed_read] Error reading %s : %s\n", fn, strerror(errno));
+    fprintf(pysam_stderr, "[bed_read] Error reading %s : %s\n", fn, strerror(errno));
   fail_no_msg:
      if (ks) ks_destroy(ks);
      if (fp) gzclose(fp);
diff --git a/samtools/cut_target.c b/samtools/cut_target.c

index 56ec9f925ae562811725bf805e9f9062be336b3f..71a6c85387038eadbb083e5c0975d54792717049 100644 (file)
--- a/samtools/cut_target.c
+++ b/samtools/cut_target.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <unistd.h>
  #include <stdlib.h>
  #include <string.h>
diff --git a/samtools/cut_target.c.pysam.c b/samtools/cut_target.c.pysam.c

index 92b15a0da8304c091ebc6e377df9209dfa644c48..82a4c4c04c789715f85e7e9a38665a20b8204cab 100644 (file)
--- a/samtools/cut_target.c.pysam.c
+++ b/samtools/cut_target.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <unistd.h>
  #include <stdlib.h>
  #include <string.h>
@@ -126,18 +128,18 @@ static void process_cns(bam_hdr_t *h, int tid, int l, uint16_t *cns)
          if (i == l || ((b[i]>>2&3) == 0 && s >= 0)) {
              if (s >= 0) {
                  int j;
-                printf("%s:%d-%d\t0\t%s\t%d\t60\t%dM\t*\t0\t0\t", h->target_name[tid], s+1, i, h->target_name[tid], s+1, i-s);
+                fprintf(pysam_stdout, "%s:%d-%d\t0\t%s\t%d\t60\t%dM\t*\t0\t0\t", h->target_name[tid], s+1, i, h->target_name[tid], s+1, i-s);
                  for (j = s; j < i; ++j) {
                      int c = cns[j]>>8;
-                    if (c == 0) putchar('N');
-                    else putchar("ACGT"[c&3]);
+                    if (c == 0) fputc('N', pysam_stdout);
+                    else fputc("ACGT"[c&3], pysam_stdout);
                  }
-                putchar('\t');
+                fputc('\t', pysam_stdout);
                  for (j = s; j < i; ++j)
-                    putchar(33 + (cns[j]>>8>>2));
-                putchar('\n');
+                    fputc(33 + (cns[j]>>8>>2), pysam_stdout);
+                fputc('\n', pysam_stdout);
              }
-            //if (s >= 0) printf("%s\t%d\t%d\t%d\n", h->target_name[tid], s, i, i - s);
+            //if (s >= 0) fprintf(pysam_stdout, "%s\t%d\t%d\t%d\n", h->target_name[tid], s, i, i - s);
              s = -1;
          } else if ((b[i]>>2&3) && s < 0) s = i;
      }
@@ -197,18 +199,18 @@ int main_cut_target(int argc, char *argv[])
      }
      if (ga.reference) {
          g.fai = fai_load(ga.reference);
-        if (g.fai == 0) fprintf(pysamerr, "[%s] fail to load the fasta index.\n", __func__);
+        if (g.fai == 0) fprintf(pysam_stderr, "[%s] fail to load the fasta index.\n", __func__);
      }
      if (usage || argc == optind) {
-        fprintf(pysamerr, "Usage: samtools targetcut [-Q minQ] [-i inPen] [-0 em0] [-1 em1] [-2 em2] <in.bam>\n");
-        sam_global_opt_help(pysamerr, "-.--f");
+        fprintf(pysam_stderr, "Usage: samtools targetcut [-Q minQ] [-i inPen] [-0 em0] [-1 em1] [-2 em2] <in.bam>\n");
+        sam_global_opt_help(pysam_stderr, "-.--f");
          return 1;
      }
      l = max_l = 0; cns = 0;
      g.fp = sam_open_format(argv[optind], "r", &ga.in);
      g.h = sam_hdr_read(g.fp);
      if (g.h == NULL) {
-        fprintf(pysamerr, "Couldn't read header for '%s'\n", argv[optind]);
+        fprintf(pysam_stderr, "Couldn't read header for '%s'\n", argv[optind]);
          sam_close(g.fp);
          return 1;
      }
diff --git a/samtools/dict.c b/samtools/dict.c

index 241d11969d0c3a1f26961c11125d3abe4614b52c..fa64a161cc1d92779b72adb9cd4cca9f4c9d2c20 100644 (file)
--- a/samtools/dict.c
+++ b/samtools/dict.c
@@ -22,7 +22,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
+#include <unistd.h>
  #include <zlib.h>
  #include <getopt.h>
  #include "htslib/kseq.h"
@@ -140,7 +143,7 @@ int dict_main(int argc, char *argv[])
      char *fname = NULL;
      if ( optind>=argc )
      {
-        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
+        if ( !isatty(STDIN_FILENO) ) fname = "-";  // reading from stdin
          else return dict_usage();
      }
      else fname = argv[optind];
diff --git a/samtools/dict.c.pysam.c b/samtools/dict.c.pysam.c

index 6b4a25a5c1af170d466c71d096ddb8eaf71d7ec6..53688518dd2f6c7e3300b22dbbab93a59ff7270a 100644 (file)
--- a/samtools/dict.c.pysam.c
+++ b/samtools/dict.c.pysam.c
@@ -24,7 +24,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
+#include <unistd.h>
  #include <zlib.h>
  #include <getopt.h>
  #include "htslib/kseq.h"
@@ -51,14 +54,14 @@ static void write_dict(const char *fn, args_t *args)
  
      fp = strcmp(fn, "-") ? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
      if (fp == 0) {
-        fprintf(pysamerr, "dict: %s: No such file or directory\n", fn);
+        fprintf(pysam_stderr, "dict: %s: No such file or directory\n", fn);
          exit(1);
      }
-    FILE *out = stdout;
+    FILE *out = pysam_stdout;
      if (args->output_fname) {
          out = fopen(args->output_fname, "w");
          if (out == NULL) {
-          fprintf(pysamerr, "dict: %s: Cannot open file for writing\n", args->output_fname);
+          fprintf(pysam_stderr, "dict: %s: Cannot open file for writing\n", args->output_fname);
            exit(1);
          }
      }
@@ -97,15 +100,15 @@ static void write_dict(const char *fn, args_t *args)
  
  static int dict_usage(void)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "About:   Create a sequence dictionary file from a fasta file\n");
-    fprintf(pysamerr, "Usage:   samtools dict [options] <file.fa|file.fa.gz>\n\n");
-    fprintf(pysamerr, "Options: -a, --assembly STR    assembly\n");
-    fprintf(pysamerr, "         -H, --no-header       do not print @HD line\n");
-    fprintf(pysamerr, "         -o, --output STR      file to write out dict file [stdout]\n");
-    fprintf(pysamerr, "         -s, --species STR     species\n");
-    fprintf(pysamerr, "         -u, --uri STR         URI [file:///abs/path/to/file.fa]\n");
-    fprintf(pysamerr, "\n");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "About:   Create a sequence dictionary file from a fasta file\n");
+    fprintf(pysam_stderr, "Usage:   samtools dict [options] <file.fa|file.fa.gz>\n\n");
+    fprintf(pysam_stderr, "Options: -a, --assembly STR    assembly\n");
+    fprintf(pysam_stderr, "         -H, --no-header       do not print @HD line\n");
+    fprintf(pysam_stderr, "         -o, --output STR      file to write out dict file [pysam_stdout]\n");
+    fprintf(pysam_stderr, "         -s, --species STR     species\n");
+    fprintf(pysam_stderr, "         -u, --uri STR         URI [file:///abs/path/to/file.fa]\n");
+    fprintf(pysam_stderr, "\n");
      return 1;
  }
  
@@ -142,7 +145,7 @@ int dict_main(int argc, char *argv[])
      char *fname = NULL;
      if ( optind>=argc )
      {
-        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
+        if ( !isatty(STDIN_FILENO) ) fname = "-";  // reading from stdin
          else return dict_usage();
      }
      else fname = argv[optind];
diff --git a/samtools/errmod.c b/samtools/errmod.c

index f8b5aa7d9a4adf25d576b35ddad2deafbb82c532..c37c6d1cc31a03219facf74a5cb75c89b7f81762 100644 (file)
--- a/samtools/errmod.c
+++ b/samtools/errmod.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <math.h>
  #include "errmod.h"
  #include "htslib/ksort.h"
diff --git a/samtools/errmod.c.pysam.c b/samtools/errmod.c.pysam.c

index fce3042f852d6c6bbe753acbffe247cf0e890f94..12176cf78e75c15f3ff7ad86deb4fb5643366f2c 100644 (file)
--- a/samtools/errmod.c.pysam.c
+++ b/samtools/errmod.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <math.h>
  #include "errmod.h"
  #include "htslib/ksort.h"
diff --git a/samtools/faidx.c b/samtools/faidx.c

index dcc10419bdeb070d51fa3226526653075c43b3ec..336bde5eb590eacaa475737e70b2906a21802cdb 100644 (file)
--- a/samtools/faidx.c
+++ b/samtools/faidx.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <ctype.h>
  #include <string.h>
  #include <stdlib.h>
@@ -67,7 +69,9 @@ int faidx_main(int argc, char *argv[])
          error(NULL);
      if ( argc==2 )
      {
-        fai_build(argv[optind]);
+        if (fai_build(argv[optind]) != 0) {
+            error("Could not build fai index %s.fai\n", argv[optind]);
+        }
          return 0;
      }
  
diff --git a/samtools/faidx.c.pysam.c b/samtools/faidx.c.pysam.c

index 971db3b5120884c8646abd1cf06d9b12d689e9dc..ac066473f68db1f4e16695b643982cf898a2a854 100644 (file)
--- a/samtools/faidx.c.pysam.c
+++ b/samtools/faidx.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <ctype.h>
  #include <string.h>
  #include <stdlib.h>
@@ -40,14 +42,14 @@ static void error(const char *format, ...)
      {
          va_list ap;
          va_start(ap, format);
-        vfprintf(pysamerr, format, ap);
+        vfprintf(pysam_stderr, format, ap);
          va_end(ap);
      }
      else
      {
-        fprintf(pysamerr, "\n");
-        fprintf(pysamerr, "Usage:   samtools faidx <file.fa|file.fa.gz> [<reg> [...]]\n");
-        fprintf(pysamerr, "\n");
+        fprintf(pysam_stderr, "\n");
+        fprintf(pysam_stderr, "Usage:   samtools faidx <file.fa|file.fa.gz> [<reg> [...]]\n");
+        fprintf(pysam_stderr, "\n");
      }
      exit(-1);
  }
@@ -69,7 +71,9 @@ int faidx_main(int argc, char *argv[])
          error(NULL);
      if ( argc==2 )
      {
-        fai_build(argv[optind]);
+        if (fai_build(argv[optind]) != 0) {
+            error("Could not build fai index %s.fai\n", argv[optind]);
+        }
          return 0;
      }
  
@@ -78,15 +82,15 @@ int faidx_main(int argc, char *argv[])
  
      while ( ++optind<argc )
      {
-        printf(">%s\n", argv[optind]);
+        fprintf(pysam_stdout, ">%s\n", argv[optind]);
          int i, j, seq_len;
          char *seq = fai_fetch(fai, argv[optind], &seq_len);
          if ( seq_len < 0 ) error("Failed to fetch sequence in %s\n", argv[optind]);
          for (i=0; i<seq_len; i+=60)
          {
              for (j=0; j<60 && i+j<seq_len; j++)
-                putchar(seq[i+j]);
-            putchar('\n');
+                fputc(seq[i+j], pysam_stdout);
+            fputc('\n', pysam_stdout);
          }
          free(seq);
      }
diff --git a/samtools/kprobaln.c b/samtools/kprobaln.c

index c7468034896bd79882082bdc3aaaf390878420a0..e31970813f96e808234cd1a3d4b7eaf7748fd9e0 100644 (file)
--- a/samtools/kprobaln.c
+++ b/samtools/kprobaln.c
@@ -23,6 +23,8 @@
     SOFTWARE.
  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <stdio.h>
  #include <string.h>
diff --git a/samtools/kprobaln.c.pysam.c b/samtools/kprobaln.c.pysam.c

index 63dad4cbed307fd6077398955de769923e2d8edb..630b730e1dda332de8fb11a3786e7d29492b5c2e 100644 (file)
--- a/samtools/kprobaln.c.pysam.c
+++ b/samtools/kprobaln.c.pysam.c
@@ -25,6 +25,8 @@
     SOFTWARE.
  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <stdio.h>
  #include <string.h>
@@ -144,7 +146,7 @@ int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_quer
                         fi[u+1] = EI * (m[1] * fi1[v10+0] + m[4] * fi1[v10+1]);
                         fi[u+2] = m[2] * fi[v01+0] + m[8] * fi[v01+2];
                         sum += fi[u] + fi[u+1] + fi[u+2];
-//                     fprintf(pysamerr, "F (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, fi[u], fi[u+1], fi[u+2]); // DEBUG
+//                     fprintf(pysam_stderr, "F (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, fi[u], fi[u+1], fi[u+2]); // DEBUG
                 }
                 // rescale
                 s[i] = sum;
@@ -199,7 +201,7 @@ int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_quer
                         bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e.
                         bi[u+1] = e * m[3] + EI * m[4] * bi1[v10+1];
                         bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y;
-//                     fprintf(pysamerr, "B (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, bi[u], bi[u+1], bi[u+2]); // DEBUG
+//                     fprintf(pysam_stderr, "B (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, bi[u], bi[u+1], bi[u+2]); // DEBUG
                 }
                 // rescale
                 set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2;
@@ -236,7 +238,7 @@ int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_quer
                 if (state) state[i-1] = max_k;
                 if (q) k = (int)(-4.343 * log(1. - max) + .499), q[i-1] = k > 100? 99 : k;
  #ifdef _MAIN
-               fprintf(pysamerr, "(%.10lg,%.10lg) (%d,%d:%c,%c:%d) %lg\n", pb, sum, i-1, max_k>>2,
+               fprintf(pysam_stderr, "(%.10lg,%.10lg) (%d,%d:%c,%c:%d) %lg\n", pb, sum, i-1, max_k>>2,
                                 "ACGT"[query[i]], "ACGT"[ref[(max_k>>2)+1]], max_k&3, max); // DEBUG
  #endif
         }
@@ -250,7 +252,7 @@ int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_quer
  
  #ifdef _MAIN
  #include <unistd.h>
-int main(int argc, char *argv[])
+int samtools_kprobaln_main(int argc, char *argv[])
  {
         uint8_t conv[256], *iqual, *ref, *query;
         int c, l_ref, l_query, i, q = 30, b = 10, P;
@@ -261,7 +263,7 @@ int main(int argc, char *argv[])
                 }
         }
         if (optind + 2 > argc) {
-               fprintf(pysamerr, "Usage: %s [-q %d] [-b %d] <ref> <query>\n", argv[0], q, b); // example: acttc attc
+               fprintf(pysam_stderr, "Usage: %s [-q %d] [-b %d] <ref> <query>\n", argv[0], q, b); // example: acttc attc
                 return 1;
         }
         memset(conv, 4, 256);
@@ -275,7 +277,7 @@ int main(int argc, char *argv[])
         memset(iqual, q, l_query);
         kpa_par_def.bw = b;
         P = kpa_glocal(ref, l_ref, query, l_query, iqual, &kpa_par_alt, 0, 0);
-       fprintf(pysamerr, "%d\n", P);
+       fprintf(pysam_stderr, "%d\n", P);
         free(iqual);
         return 0;
  }
diff --git a/samtools/misc/ace2sam.c b/samtools/misc/ace2sam.c

index 24b6933860c3eb45d2c5ec1d3c1c7b6411f00180..77b9993cc64cf142a16e6bc1f3863e1c0eb6c573 100644 (file)
--- a/samtools/misc/ace2sam.c
+++ b/samtools/misc/ace2sam.c
@@ -23,6 +23,8 @@
     SOFTWARE.
  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
diff --git a/samtools/misc/ace2sam.c.pysam.c b/samtools/misc/ace2sam.c.pysam.c

index a7f92e260f55b0851db33552ed97022c43e9adf5..a663399c250436b787928316ce16912aa1f83214 100644 (file)
--- a/samtools/misc/ace2sam.c.pysam.c
+++ b/samtools/misc/ace2sam.c.pysam.c
@@ -25,6 +25,8 @@
     SOFTWARE.
  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
@@ -49,7 +51,7 @@ KSTREAM_INIT(gzFile, gzread, 16384)
  // a fatal error
  static void fatal(const char *msg)
  {
-    fprintf(pysamerr, "E %s\n", msg);
+    fprintf(pysam_stderr, "E %s\n", msg);
      exit(1);
  }
  // remove pads
@@ -64,7 +66,7 @@ static void remove_pads(const kstring_t *src, kstring_t *dst)
      dst->l = j;
  }
  
-int main(int argc, char *argv[])
+int samtools_ace2sam_main(int argc, char *argv[])
  {
      gzFile fp;
      kstream_t *ks;
@@ -80,13 +82,13 @@ int main(int argc, char *argv[])
          }
      }
      if (argc == optind) {
-        fprintf(pysamerr, "\nUsage:   ace2sam [-pc] <in.ace>\n\n");
-        fprintf(pysamerr, "Options: -p     output padded SAM\n");
-        fprintf(pysamerr, "         -c     write the contig sequence in SAM\n\n");
-        fprintf(pysamerr, "Notes: 1. Fields must appear in the following order: (CO->[BQ]->(AF)->(RD->QA))\n");
-        fprintf(pysamerr, "       2. The order of reads in AF and in RD must be identical\n");
-        fprintf(pysamerr, "       3. Except in BQ, words and numbers must be separated by a single SPACE or TAB\n");
-        fprintf(pysamerr, "       4. This program writes the headerless SAM to stdout and header to pysamerr\n\n");
+        fprintf(pysam_stderr, "\nUsage:   ace2sam [-pc] <in.ace>\n\n");
+        fprintf(pysam_stderr, "Options: -p     output padded SAM\n");
+        fprintf(pysam_stderr, "         -c     write the contig sequence in SAM\n\n");
+        fprintf(pysam_stderr, "Notes: 1. Fields must appear in the following order: (CO->[BQ]->(AF)->(RD->QA))\n");
+        fprintf(pysam_stderr, "       2. The order of reads in AF and in RD must be identical\n");
+        fprintf(pysam_stderr, "       3. Except in BQ, words and numbers must be separated by a single SPACE or TAB\n");
+        fprintf(pysam_stderr, "       4. This program writes the headerless SAM to pysam_stdout and header to pysam_stderr\n\n");
          return 1;
      }
  
@@ -111,14 +113,14 @@ int main(int argc, char *argv[])
                  if (t[1].s[i] != '*') ++k;
              }
              // write out the SAM header and contig sequences
-            fprintf(pysamerr, "H @SQ\tSN:%s\tLN:%llu\n", t[0].s, (unsigned long long)(t[is_padded?1:2].l)); // The SAM header line
+            fprintf(pysam_stderr, "H @SQ\tSN:%s\tLN:%llu\n", t[0].s, (unsigned long long)(t[is_padded?1:2].l)); // The SAM header line
              cns = &t[is_padded?1:2];
-            fprintf(pysamerr, "S >%s\n", t[0].s);
+            fprintf(pysam_stderr, "S >%s\n", t[0].s);
              for (i = 0; i < cns->l; i += LINE_LEN) {
-                fputs("S ", pysamerr);
+                fputs("S ", pysam_stderr);
                  for (k = 0; k < LINE_LEN && i + k < cns->l; ++k)
-                    fputc(cns->s[i + k], pysamerr);
-                fputc('\n', pysamerr);
+                    fputc(cns->s[i + k], pysam_stderr);
+                fputc('\n', pysam_stderr);
              }
  
  #define __padded2cigar(sp) do { \
@@ -152,7 +154,7 @@ int main(int argc, char *argv[])
              if (write_cns) t[4].s[--t[4].l] = 0; // remove the trailing "*"
              for (i = 0; i < t[2].l; ++i) { // read the consensus quality
                  int q;
-                if (ks_getuntil(ks, 0, &s, &dret) < 0) fprintf(pysamerr, "E truncated contig quality\n");
+                if (ks_getuntil(ks, 0, &s, &dret) < 0) fprintf(pysam_stderr, "E truncated contig quality\n");
                  if (s.l) {
                      q = atoi(s.s) + 33;
                      if (q > 126) q = 126;
@@ -161,12 +163,12 @@ int main(int argc, char *argv[])
              }
              if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
              ks_getuntil(ks, '\n', &s, &dret); // skip the empty line
-            if (write_cns) puts(t[4].s); t[4].l = 0;
+            if (write_cns) fputs(t[4].s, pysam_stdout) & fputc('\n', pysam_stdout); t[4].l = 0;
          } else if (strcmp(s.s, "AF") == 0) { // padded read position
              int reversed, neg, pos;
              if (t[0].l == 0) fatal("come to 'AF' before reading 'CO'");
              if (write_cns) {
-                if (t[4].l) puts(t[4].s);
+                if (t[4].l) fputs(t[4].s, pysam_stdout) & fputc('\n', pysam_stdout);
                  t[4].l = 0;
              }
              ks_getuntil(ks, 0, &s, &dret); // read name
@@ -239,7 +241,7 @@ int main(int argc, char *argv[])
              kputs("\t*\t0\t0\t", &t[4]); // empty MRNM, MPOS and TLEN
              kputsn(t[3].s, t[3].l, &t[4]); // unpadded SEQ
              kputs("\t*", &t[4]); // QUAL
-            puts(t[4].s); // print to stdout
+            fputs(t[4].s, pysam_stdout) & fputc('\n', pysam_stdout); // print to pysam_stdout
              ++af_i;
          } else if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
      }
diff --git a/samtools/padding.c b/samtools/padding.c

index 436d7161681fbcfaba4085b67c42e796c747dd4b..cea79cfa68c0126b7afef2184a67c640d04c3810 100644 (file)
--- a/samtools/padding.c
+++ b/samtools/padding.c
@@ -1,7 +1,7 @@
  /*  padding.c -- depad subcommand.
  
      Copyright (C) 2011, 2012 Broad Institute.
-    Copyright (C) 2014, 2015 Genome Research Ltd.
+    Copyright (C) 2014-2016 Genome Research Ltd.
      Portions copyright (C) 2012, 2013 Peter Cock, The James Hutton Institute.
  
      Author: Heng Li <lh3@sanger.ac.uk>
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <string.h>
  #include <assert.h>
  #include <unistd.h>
@@ -32,6 +34,7 @@ DEALINGS IN THE SOFTWARE.  */
  #include <htslib/faidx.h>
  #include "sam_header.h"
  #include "sam_opts.h"
+#include "samtools.h"
  
  #define bam_reg2bin(b,e) hts_reg2bin((b),(e), 14, 5)
  
@@ -191,6 +194,10 @@ int bam_pad2unpad(samFile *in, samFile *out,  bam_hdr_t *h, faidx_t *fai)
      int ret = 0, n2 = 0, m2 = 0, *posmap = 0;
  
      b = bam_init1();
+    if (!b) {
+        fprintf(stderr, "[depad] Couldn't allocate bam struct\n");
+        return -1;
+    }
      r.l = r.m = q.l = q.m = 0; r.s = q.s = 0;
      int read_ret;
      while ((read_ret = sam_read1(in, h, b)) >= 0) { // read one alignment from `in'
@@ -357,7 +364,10 @@ int bam_pad2unpad(samFile *in, samFile *out,  bam_hdr_t *h, faidx_t *fai)
          b->core.bin = bam_reg2bin(b->core.pos, bam_endpos(b));
  
      next_seq:
-        sam_write1(out, h, b);
+        if (sam_write1(out, h, b) < 0) {
+            print_error_errno("depad", "error writing to output");
+            return -1;
+        }
      }
      if (read_ret < -1) {
          fprintf(stderr, "[depad] truncated file.\n");
@@ -525,7 +535,7 @@ int main_pad2unpad(int argc, char *argv[])
      }
      // open file handlers
      if ((in = sam_open_format(argv[optind], in_mode, &ga.in)) == 0) {
-        fprintf(stderr, "[depad] failed to open \"%s\" for reading.\n", argv[optind]);
+        print_error_errno("depad", "failed to open \"%s\" for reading", argv[optind]);
          ret = 1;
          goto depad_end;
      }
@@ -548,7 +558,7 @@ int main_pad2unpad(int argc, char *argv[])
      char wmode[2];
      strcat(out_mode, sam_open_mode(wmode, fn_out, NULL)==0 ? wmode : "b");
      if ((out = sam_open_format(fn_out? fn_out : "-", out_mode, &ga.out)) == 0) {
-        fprintf(stderr, "[depad] failed to open \"%s\" for writing.\n", fn_out? fn_out : "standard output");
+        print_error_errno("depad", "failed to open \"%s\" for writing", fn_out? fn_out : "standard output");
          ret = 1;
          goto depad_end;
      }
@@ -565,14 +575,17 @@ int main_pad2unpad(int argc, char *argv[])
      }
  
      // Do the depad
-    ret = bam_pad2unpad(in, out, h, fai);
+    if (bam_pad2unpad(in, out, h, fai) != 0) ret = 1;
  
  depad_end:
      // close files, free and return
      if (fai) fai_destroy(fai);
      if (h) bam_hdr_destroy(h);
-    sam_close(in);
-    sam_close(out);
+    if (in) sam_close(in);
+    if (out && sam_close(out) < 0) {
+        fprintf(stderr, "[depad] error on closing output file.\n");
+        ret = 1;
+    }
      free(fn_list); free(fn_out);
      return ret;
  }
@@ -593,12 +606,13 @@ static int usage(int is_long_help)
      sam_global_opt_help(stderr, "-...-");
  
      if (is_long_help)
-        fprintf(stderr, "Notes:\n\
-\n\
-  1. Requires embedded reference sequences (before the reads for that reference),\n\
-     or ideally a FASTA file of the padded reference sequences (via the -T argument).\n\
-\n\
-  2. The input padded alignment read's CIGAR strings must not use P or I operators.\n\
-\n");
+        fprintf(stderr,
+"Notes:\n"
+"\n"
+"1. Requires embedded reference sequences (before the reads for that reference),\n"
+"   or ideally a FASTA file of the padded reference sequences (via a -T option).\n"
+"\n"
+"2. Input padded alignment reads' CIGAR strings must not use P or I operators.\n"
+"\n");
      return 1;
  }
diff --git a/samtools/padding.c.pysam.c b/samtools/padding.c.pysam.c

index fd889f3d098b5359a842ac6436ed698fc015ca2e..9f85c9580460a7b20ca5bbffb1aa9a68ee072205 100644 (file)
--- a/samtools/padding.c.pysam.c
+++ b/samtools/padding.c.pysam.c
@@ -3,7 +3,7 @@
  /*  padding.c -- depad subcommand.
  
      Copyright (C) 2011, 2012 Broad Institute.
-    Copyright (C) 2014, 2015 Genome Research Ltd.
+    Copyright (C) 2014-2016 Genome Research Ltd.
      Portions copyright (C) 2012, 2013 Peter Cock, The James Hutton Institute.
  
      Author: Heng Li <lh3@sanger.ac.uk>
@@ -26,6 +26,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <string.h>
  #include <assert.h>
  #include <unistd.h>
@@ -34,6 +36,7 @@ DEALINGS IN THE SOFTWARE.  */
  #include <htslib/faidx.h>
  #include "sam_header.h"
  #include "sam_opts.h"
+#include "samtools.h"
  
  #define bam_reg2bin(b,e) hts_reg2bin((b),(e), 14, 5)
  
@@ -96,10 +99,10 @@ static int unpad_seq(bam1_t *b, kstring_t *s)
              for (i = 0; i < ol; ++i) s->s[s->l++] = 0;
              if (0 == cigar_n_warning) {
                  cigar_n_warning = -1;
-                fprintf(pysamerr, "[depad] WARNING: CIGAR op N treated as op D in read %s\n", bam_get_qname(b));
+                fprintf(pysam_stderr, "[depad] WARNING: CIGAR op N treated as op D in read %s\n", bam_get_qname(b));
              }
          } else {
-            fprintf(pysamerr, "[depad] ERROR: Didn't expect CIGAR op %c in read %s\n", BAM_CIGAR_STR[op], bam_get_qname(b));
+            fprintf(pysam_stderr, "[depad] ERROR: Didn't expect CIGAR op %c in read %s\n", BAM_CIGAR_STR[op], bam_get_qname(b));
              return -1;
          }
      }
@@ -114,7 +117,7 @@ int load_unpadded_ref(faidx_t *fai, char *ref_name, int ref_len, kstring_t *seq)
  
      fai_ref = fai_fetch(fai, ref_name, &fai_ref_len);
      if (fai_ref_len != ref_len) {
-        fprintf(pysamerr, "[depad] ERROR: FASTA sequence %s length %i, expected %i\n", ref_name, fai_ref_len, ref_len);
+        fprintf(pysam_stderr, "[depad] ERROR: FASTA sequence %s length %i, expected %i\n", ref_name, fai_ref_len, ref_len);
          free(fai_ref);
          return -1;
      }
@@ -128,7 +131,7 @@ int load_unpadded_ref(faidx_t *fai, char *ref_name, int ref_len, kstring_t *seq)
          } else {
              int i = seq_nt16_table[(int)base];
              if (i == 0 || i==16) { // Equals maps to 0, anything unexpected to 16
-                fprintf(pysamerr, "[depad] ERROR: Invalid character %c (ASCII %i) in FASTA sequence %s\n", base, (int)base, ref_name);
+                fprintf(pysam_stderr, "[depad] ERROR: Invalid character %c (ASCII %i) in FASTA sequence %s\n", base, (int)base, ref_name);
                  free(fai_ref);
                  return -1;
              }
@@ -149,19 +152,19 @@ int get_unpadded_len(faidx_t *fai, char *ref_name, int padded_len)
  
      fai_ref = fai_fetch(fai, ref_name, &fai_ref_len);
      if (fai_ref_len != padded_len) {
-        fprintf(pysamerr, "[depad] ERROR: FASTA sequence '%s' length %i, expected %i\n", ref_name, fai_ref_len, padded_len);
+        fprintf(pysam_stderr, "[depad] ERROR: FASTA sequence '%s' length %i, expected %i\n", ref_name, fai_ref_len, padded_len);
          free(fai_ref);
          return -1;
      }
      for (k = 0; k < padded_len; ++k) {
-        //fprintf(pysamerr, "[depad] checking base %i of %i or %i\n", k+1, ref_len, strlen(fai_ref));
+        //fprintf(pysam_stderr, "[depad] checking base %i of %i or %i\n", k+1, ref_len, strlen(fai_ref));
          base = fai_ref[k];
          if (base == '-' || base == '*') {
              gaps += 1;
          } else {
              int i = seq_nt16_table[(int)base];
              if (i == 0 || i==16) { // Equals maps to 0, anything unexpected to 16
-                fprintf(pysamerr, "[depad] ERROR: Invalid character %c (ASCII %i) in FASTA sequence '%s'\n", base, (int)base, ref_name);
+                fprintf(pysam_stderr, "[depad] ERROR: Invalid character %c (ASCII %i) in FASTA sequence '%s'\n", base, (int)base, ref_name);
                  free(fai_ref);
                  return -1;
              }
@@ -193,6 +196,10 @@ int bam_pad2unpad(samFile *in, samFile *out,  bam_hdr_t *h, faidx_t *fai)
      int ret = 0, n2 = 0, m2 = 0, *posmap = 0;
  
      b = bam_init1();
+    if (!b) {
+        fprintf(pysam_stderr, "[depad] Couldn't allocate bam struct\n");
+        return -1;
+    }
      r.l = r.m = q.l = q.m = 0; r.s = q.s = 0;
      int read_ret;
      while ((read_ret = sam_read1(in, h, b)) >= 0) { // read one alignment from `in'
@@ -203,20 +210,20 @@ int bam_pad2unpad(samFile *in, samFile *out,  bam_hdr_t *h, faidx_t *fai)
          uint32_t *cigar = bam_get_cigar(b);
          n2 = 0;
          if (b->core.pos == 0 && b->core.tid >= 0 && strcmp(bam_get_qname(b), h->target_name[b->core.tid]) == 0) {
-            // fprintf(pysamerr, "[depad] Found embedded reference '%s'\n", bam_get_qname(b));
+            // fprintf(pysam_stderr, "[depad] Found embedded reference '%s'\n", bam_get_qname(b));
              r_tid = b->core.tid;
              if (0!=unpad_seq(b, &r)) {
-                fprintf(pysamerr, "[depad] ERROR: Problem parsing SEQ and/or CIGAR in reference %s\n", bam_get_qname(b));
+                fprintf(pysam_stderr, "[depad] ERROR: Problem parsing SEQ and/or CIGAR in reference %s\n", bam_get_qname(b));
                  return -1;
              };
              if (h->target_len[r_tid] != r.l) {
-                fprintf(pysamerr, "[depad] ERROR: (Padded) length of '%s' is %u in BAM header, but %llu in embedded reference\n", bam_get_qname(b), h->target_len[r_tid], (unsigned long long)(r.l));
+                fprintf(pysam_stderr, "[depad] ERROR: (Padded) length of '%s' is %u in BAM header, but %llu in embedded reference\n", bam_get_qname(b), h->target_len[r_tid], (unsigned long long)(r.l));
                  return -1;
              }
              if (fai) {
                  // Check the embedded reference matches the FASTA file
                  if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &q)) {
-                    fprintf(pysamerr, "[depad] ERROR: Failed to load embedded reference '%s' from FASTA\n", h->target_name[b->core.tid]);
+                    fprintf(pysam_stderr, "[depad] ERROR: Failed to load embedded reference '%s' from FASTA\n", h->target_name[b->core.tid]);
                      return -1;
                  }
                  assert(r.l == q.l);
@@ -224,7 +231,7 @@ int bam_pad2unpad(samFile *in, samFile *out,  bam_hdr_t *h, faidx_t *fai)
                  for (i = 0; i < r.l; ++i) {
                      if (r.s[i] != q.s[i]) {
                          // Show gaps as ASCII 45
-                        fprintf(pysamerr, "[depad] ERROR: Embedded sequence and reference FASTA don't match for %s base %i, '%c' vs '%c'\n",
+                        fprintf(pysam_stderr, "[depad] ERROR: Embedded sequence and reference FASTA don't match for %s base %i, '%c' vs '%c'\n",
                              h->target_name[b->core.tid], i+1,
                              r.s[i] ? seq_nt16_str[(int)r.s[i]] : 45,
                              q.s[i] ? seq_nt16_str[(int)q.s[i]] : 45);
@@ -238,25 +245,25 @@ int bam_pad2unpad(samFile *in, samFile *out,  bam_hdr_t *h, faidx_t *fai)
          } else if (b->core.n_cigar > 0) {
              int i, k, op;
              if (b->core.tid < 0) {
-                fprintf(pysamerr, "[depad] ERROR: Read '%s' has CIGAR but no RNAME\n", bam_get_qname(b));
+                fprintf(pysam_stderr, "[depad] ERROR: Read '%s' has CIGAR but no RNAME\n", bam_get_qname(b));
                  return -1;
              } else if (b->core.tid == r_tid) {
                  ; // good case, reference available
-                //fprintf(pysamerr, "[depad] Have ref '%s' for read '%s'\n", h->target_name[b->core.tid], bam_get_qname(b));
+                //fprintf(pysam_stderr, "[depad] Have ref '%s' for read '%s'\n", h->target_name[b->core.tid], bam_get_qname(b));
              } else if (fai) {
                  if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &r)) {
-                    fprintf(pysamerr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]);
+                    fprintf(pysam_stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]);
                      return -1;
                  }
                  posmap = update_posmap(posmap, r);
                  r_tid = b->core.tid;
-                // fprintf(pysamerr, "[depad] Loaded %s from FASTA file\n", h->target_name[b->core.tid]);
+                // fprintf(pysam_stderr, "[depad] Loaded %s from FASTA file\n", h->target_name[b->core.tid]);
              } else {
-                fprintf(pysamerr, "[depad] ERROR: Missing %s embedded reference sequence (and no FASTA file)\n", h->target_name[b->core.tid]);
+                fprintf(pysam_stderr, "[depad] ERROR: Missing %s embedded reference sequence (and no FASTA file)\n", h->target_name[b->core.tid]);
                  return -1;
              }
              if (0!=unpad_seq(b, &q)) {
-                fprintf(pysamerr, "[depad] ERROR: Problem parsing SEQ and/or CIGAR in read %s\n", bam_get_qname(b));
+                fprintf(pysam_stderr, "[depad] ERROR: Problem parsing SEQ and/or CIGAR in read %s\n", bam_get_qname(b));
                  return -1;
              };
              if (bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) {
@@ -325,32 +332,32 @@ int bam_pad2unpad(samFile *in, samFile *out,  bam_hdr_t *h, faidx_t *fai)
          if (b->core.pos != -1) b->core.pos = posmap[b->core.pos];
          if (b->core.mtid < 0 || b->core.mpos < 0) {
              /* Nice case, no mate to worry about*/
-            // fprintf(pysamerr, "[depad] Read '%s' mate not mapped\n", bam_get_qname(b));
+            // fprintf(pysam_stderr, "[depad] Read '%s' mate not mapped\n", bam_get_qname(b));
              /* TODO - Warning if FLAG says mate should be mapped? */
              /* Clean up funny input where mate position is given but mate reference is missing: */
              b->core.mtid = -1;
              b->core.mpos = -1;
          } else if (b->core.mtid == b->core.tid) {
              /* Nice case, same reference */
-            // fprintf(pysamerr, "[depad] Read '%s' mate mapped to same ref\n", bam_get_qname(b));
+            // fprintf(pysam_stderr, "[depad] Read '%s' mate mapped to same ref\n", bam_get_qname(b));
              b->core.mpos = posmap[b->core.mpos];
          } else {
              /* Nasty case, Must load alternative posmap */
-            // fprintf(pysamerr, "[depad] Loading reference '%s' temporarily\n", h->target_name[b->core.mtid]);
+            // fprintf(pysam_stderr, "[depad] Loading reference '%s' temporarily\n", h->target_name[b->core.mtid]);
              if (!fai) {
-                fprintf(pysamerr, "[depad] ERROR: Needed reference %s sequence for mate (and no FASTA file)\n", h->target_name[b->core.mtid]);
+                fprintf(pysam_stderr, "[depad] ERROR: Needed reference %s sequence for mate (and no FASTA file)\n", h->target_name[b->core.mtid]);
                  return -1;
              }
              /* Temporarily load the other reference sequence */
              if (load_unpadded_ref(fai, h->target_name[b->core.mtid], h->target_len[b->core.mtid], &r)) {
-                fprintf(pysamerr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.mtid]);
+                fprintf(pysam_stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.mtid]);
                  return -1;
              }
              posmap = update_posmap(posmap, r);
              b->core.mpos = posmap[b->core.mpos];
              /* Restore the reference and posmap*/
              if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &r)) {
-                fprintf(pysamerr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]);
+                fprintf(pysam_stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]);
                  return -1;
              }
              posmap = update_posmap(posmap, r);
@@ -359,10 +366,13 @@ int bam_pad2unpad(samFile *in, samFile *out,  bam_hdr_t *h, faidx_t *fai)
          b->core.bin = bam_reg2bin(b->core.pos, bam_endpos(b));
  
      next_seq:
-        sam_write1(out, h, b);
+        if (sam_write1(out, h, b) < 0) {
+            print_error_errno("depad", "error writing to output");
+            return -1;
+        }
      }
      if (read_ret < -1) {
-        fprintf(pysamerr, "[depad] truncated file.\n");
+        fprintf(pysam_stderr, "[depad] truncated file.\n");
          ret = 1;
      }
      free(r.s); free(q.s); free(posmap);
@@ -379,10 +389,10 @@ bam_hdr_t * fix_header(bam_hdr_t *old, faidx_t *fai)
      for (i = 0; i < old->n_targets; ++i) {
          unpadded_len = get_unpadded_len(fai, old->target_name[i], old->target_len[i]);
          if (unpadded_len < 0) {
-            fprintf(pysamerr, "[depad] ERROR getting unpadded length of '%s', padded length %i\n", old->target_name[i], old->target_len[i]);
+            fprintf(pysam_stderr, "[depad] ERROR getting unpadded length of '%s', padded length %i\n", old->target_name[i], old->target_len[i]);
          } else {
              header->target_len[i] = unpadded_len;
-            //fprintf(pysamerr, "[depad] Recalculating '%s' length %i -> %i\n", old->target_name[i], old->target_len[i], header->target_len[i]);
+            //fprintf(pysam_stderr, "[depad] Recalculating '%s' length %i -> %i\n", old->target_name[i], old->target_len[i], header->target_len[i]);
          }
      }
      /* Duplicating the header allocated new buffer for header string */
@@ -404,7 +414,7 @@ bam_hdr_t * fix_header(bam_hdr_t *old, faidx_t *fai)
              char *name = strstr(text, "\tSN:");
              char *name_end;
              if (!name) {
-                fprintf(pysamerr, "Unable to find SN: header field\n");
+                fprintf(pysam_stderr, "Unable to find SN: header field\n");
                  return NULL;
              }
              name += 4;
@@ -458,7 +468,7 @@ bam_hdr_t * fix_header(bam_hdr_t *old, faidx_t *fai)
      /* Check we didn't overflow the buffer */
      assert (strlen(header->text) <= strlen(old->text));
      if (strlen(header->text) < header->l_text) {
-        //fprintf(pysamerr, "[depad] Reallocating header buffer\n");
+        //fprintf(pysam_stderr, "[depad] Reallocating header buffer\n");
          assert (newtext == header->text);
          newtext = malloc(strlen(header->text) + 1);
          strcpy(newtext, header->text);
@@ -466,7 +476,7 @@ bam_hdr_t * fix_header(bam_hdr_t *old, faidx_t *fai)
          header->text = newtext;
          header->l_text = strlen(newtext);
      }
-    //fprintf(pysamerr, "[depad] Here is the new header (pending @SQ lines),\n\n%s\n(end)\n", header->text);
+    //fprintf(pysam_stderr, "[depad] Here is the new header (pending @SQ lines),\n\n%s\n(end)\n", header->text);
      return header;
  }
  
@@ -507,7 +517,7 @@ int main_pad2unpad(int argc, char *argv[])
              break;
          case '?': is_long_help = 1; break;
          default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
-            fprintf(pysamerr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
+            fprintf(pysam_stderr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
              return usage(is_long_help);
          }
      }
@@ -527,30 +537,30 @@ int main_pad2unpad(int argc, char *argv[])
      }
      // open file handlers
      if ((in = sam_open_format(argv[optind], in_mode, &ga.in)) == 0) {
-        fprintf(pysamerr, "[depad] failed to open \"%s\" for reading.\n", argv[optind]);
+        print_error_errno("depad", "failed to open \"%s\" for reading", argv[optind]);
          ret = 1;
          goto depad_end;
      }
      if (fn_list && hts_set_fai_filename(in, fn_list) != 0) {
-        fprintf(pysamerr, "[depad] failed to load reference file \"%s\".\n", fn_list);
+        fprintf(pysam_stderr, "[depad] failed to load reference file \"%s\".\n", fn_list);
          ret = 1;
          goto depad_end;
      }
      if ((h = sam_hdr_read(in)) == 0) {
-        fprintf(pysamerr, "[depad] failed to read the header from \"%s\".\n", argv[optind]);
+        fprintf(pysam_stderr, "[depad] failed to read the header from \"%s\".\n", argv[optind]);
          ret = 1;
          goto depad_end;
      }
      if (fai) {
          h_fix = fix_header(h, fai);
      } else {
-        fprintf(pysamerr, "[depad] Warning - reference lengths will not be corrected without FASTA reference\n");
+        fprintf(pysam_stderr, "[depad] Warning - reference lengths will not be corrected without FASTA reference\n");
          h_fix = h;
      }
      char wmode[2];
      strcat(out_mode, sam_open_mode(wmode, fn_out, NULL)==0 ? wmode : "b");
      if ((out = sam_open_format(fn_out? fn_out : "-", out_mode, &ga.out)) == 0) {
-        fprintf(pysamerr, "[depad] failed to open \"%s\" for writing.\n", fn_out? fn_out : "standard output");
+        print_error_errno("depad", "failed to open \"%s\" for writing", fn_out? fn_out : "standard output");
          ret = 1;
          goto depad_end;
      }
@@ -561,46 +571,50 @@ int main_pad2unpad(int argc, char *argv[])
          hts_set_opt(out, CRAM_OPT_NO_REF, 1);
  
      if (sam_hdr_write(out, h_fix) != 0) {
-        fprintf(pysamerr, "[depad] failed to write header.\n");
+        fprintf(pysam_stderr, "[depad] failed to write header.\n");
          ret = 1;
          goto depad_end;
      }
  
      // Do the depad
-    ret = bam_pad2unpad(in, out, h, fai);
+    if (bam_pad2unpad(in, out, h, fai) != 0) ret = 1;
  
  depad_end:
      // close files, free and return
      if (fai) fai_destroy(fai);
      if (h) bam_hdr_destroy(h);
-    sam_close(in);
-    sam_close(out);
+    if (in) sam_close(in);
+    if (out && sam_close(out) < 0) {
+        fprintf(pysam_stderr, "[depad] error on closing output file.\n");
+        ret = 1;
+    }
      free(fn_list); free(fn_out);
      return ret;
  }
  
  static int usage(int is_long_help)
  {
-    fprintf(pysamerr, "\n");
-    fprintf(pysamerr, "Usage:   samtools depad <in.bam>\n\n");
-    fprintf(pysamerr, "Options:\n");
-    fprintf(pysamerr, "  -s           Output is SAM (default is BAM)\n");
-    fprintf(pysamerr, "  -S           Input is SAM (default is BAM)\n");
-    fprintf(pysamerr, "  -u           Uncompressed BAM output (can't use with -s)\n");
-    fprintf(pysamerr, "  -1           Fast compression BAM output (can't use with -s)\n");
-    fprintf(pysamerr, "  -T, --reference FILE\n");
-    fprintf(pysamerr, "               Padded reference sequence file [null]\n");
-    fprintf(pysamerr, "  -o FILE      Output file name [stdout]\n");
-    fprintf(pysamerr, "  -?           Longer help\n");
-    sam_global_opt_help(pysamerr, "-...-");
+    fprintf(pysam_stderr, "\n");
+    fprintf(pysam_stderr, "Usage:   samtools depad <in.bam>\n\n");
+    fprintf(pysam_stderr, "Options:\n");
+    fprintf(pysam_stderr, "  -s           Output is SAM (default is BAM)\n");
+    fprintf(pysam_stderr, "  -S           Input is SAM (default is BAM)\n");
+    fprintf(pysam_stderr, "  -u           Uncompressed BAM output (can't use with -s)\n");
+    fprintf(pysam_stderr, "  -1           Fast compression BAM output (can't use with -s)\n");
+    fprintf(pysam_stderr, "  -T, --reference FILE\n");
+    fprintf(pysam_stderr, "               Padded reference sequence file [null]\n");
+    fprintf(pysam_stderr, "  -o FILE      Output file name [pysam_stdout]\n");
+    fprintf(pysam_stderr, "  -?           Longer help\n");
+    sam_global_opt_help(pysam_stderr, "-...-");
  
      if (is_long_help)
-        fprintf(pysamerr, "Notes:\n\
-\n\
-  1. Requires embedded reference sequences (before the reads for that reference),\n\
-     or ideally a FASTA file of the padded reference sequences (via the -T argument).\n\
-\n\
-  2. The input padded alignment read's CIGAR strings must not use P or I operators.\n\
-\n");
+        fprintf(pysam_stderr,
+"Notes:\n"
+"\n"
+"1. Requires embedded reference sequences (before the reads for that reference),\n"
+"   or ideally a FASTA file of the padded reference sequences (via a -T option).\n"
+"\n"
+"2. Input padded alignment reads' CIGAR strings must not use P or I operators.\n"
+"\n");
      return 1;
  }
diff --git a/samtools/phase.c b/samtools/phase.c

index 0667ea5078967956a59a397756705a99e08385cc..69099125c0d38587033e08eda5b5c0f97631e856 100644 (file)
--- a/samtools/phase.c
+++ b/samtools/phase.c
@@ -1,7 +1,7 @@
  /*  phase.c -- phase subcommand.
  
      Copyright (C) 2011 Broad Institute.
-    Copyright (C) 2013, 2014 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
  
      Author: Heng Li <lh3@sanger.ac.uk>
  
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
@@ -30,8 +32,10 @@ DEALINGS IN THE SOFTWARE.  */
  #include <math.h>
  #include <zlib.h>
  #include "htslib/sam.h"
+#include "htslib/kstring.h"
  #include "errmod.h"
  #include "sam_opts.h"
+#include "samtools.h"
  
  #include "htslib/kseq.h"
  KSTREAM_INIT(gzFile, gzread, 16384)
@@ -53,6 +57,7 @@ typedef struct {
      samFile* fp;
      bam_hdr_t* fp_hdr;
      char *pre;
+    char *out_name[3];
      samFile* out[3];
      bam_hdr_t* out_hdr[3];
      // alignment queue
@@ -333,7 +338,7 @@ static int clean_seqs(int vpos, nseq_t *hash)
      return ret;
  }
  
-static void dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
+static int dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
  {
      int i, is_flip, drop_ambi;
      drop_ambi = g->flag & FLAG_DROP_AMBI;
@@ -361,12 +366,16 @@ static void dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
              if (which < 2 && is_flip) which = 1 - which; // increase the randomness
          }
          if (which == 3) which = (drand48() < 0.5);
-        sam_write1(g->out[which], g->out_hdr[which], b);
+        if (sam_write1(g->out[which], g->out_hdr[which], b) < 0) {
+            print_error_errno("phase", "error writing to '%s'", g->out_name[which]);
+            return -1;
+        }
          bam_destroy1(b);
          g->b[i] = 0;
      }
      memmove(g->b, g->b + i, (g->n - i) * sizeof(void*));
      g->n -= i;
+    return 0;
  }
  
  static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *hash)
@@ -393,7 +402,7 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
                  else f->phased = 1, f->phase = f->seq[0] - 1;
              }
          }
-        dump_aln(g, min_pos, hash);
+        if (dump_aln(g, min_pos, hash) < 0) return -1;
          ++g->vpos_shift;
          return 1;
      }
@@ -451,7 +460,7 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
      printf("//\n");
      fflush(stdout);
      g->vpos_shift += vpos;
-    dump_aln(g, min_pos, hash);
+    if (dump_aln(g, min_pos, hash) < 0) return -1;
      return vpos;
  }
  
@@ -536,6 +545,26 @@ static int gl2cns(float q[16])
      return (min_ij>>2&3) == (min_ij&3)? 0 : 1<<18 | (min_ij>>2&3)<<16 | (min_ij&3) | (int)(min2 - min + .499) << 2;
  }
  
+static int start_output(phaseg_t *g, int c, const char *middle, const htsFormat *fmt)
+{
+    kstring_t s = { 0, 0, NULL };
+    ksprintf(&s, "%s.%s.%s", g->pre, middle, hts_format_file_extension(fmt));
+    g->out_name[c] = ks_release(&s);
+    g->out[c] = sam_open_format(g->out_name[c], "wb", fmt);
+    if (! g->out[c]) {
+        print_error_errno("phase", "Failed to open output file '%s'", g->out_name[c]);
+        return -1;
+    }
+
+    g->out_hdr[c] = bam_hdr_dup(g->fp_hdr);
+    if (sam_hdr_write(g->out[c], g->out_hdr[c]) < 0) {
+        print_error_errno("phase", "Failed to write header for '%s'", g->out_name[c]);
+        return -1;
+    }
+
+    return 0;
+}
+
  int main_phase(int argc, char *argv[])
  {
      int c, tid, pos, vpos = 0, n, lasttid = -1, max_vpos = 0, usage = 0;
@@ -555,6 +584,8 @@ int main_phase(int argc, char *argv[])
          { NULL, 0, NULL, 0 }
      };
  
+    // FIXME Leaks galore in the case of error returns
+
      memset(&g, 0, sizeof(phaseg_t));
      g.flag = FLAG_FIX_CHIMERA;
      g.min_varLOD = 37; g.k = 13; g.min_baseQ = 13; g.max_depth = 256;
@@ -594,9 +625,14 @@ int main_phase(int argc, char *argv[])
          return 1;
      }
      g.fp = sam_open_format(argv[optind], "r", &ga.in);
+    if (!g.fp) {
+        print_error_errno("phase", "Couldn't open '%s'", argv[optind]);
+        return 1;
+    }
      g.fp_hdr = sam_hdr_read(g.fp);
      if (g.fp_hdr == NULL) {
-        fprintf(stderr, "Failed to read header for '%s'\n", argv[optind]);
+        fprintf(stderr, "[%s] Failed to read header for '%s'\n",
+                __func__, argv[optind]);
          return 1;
      }
      if (fn_list) { // read the list of sites to phase
@@ -604,20 +640,13 @@ int main_phase(int argc, char *argv[])
          free(fn_list);
      } else g.flag &= ~FLAG_LIST_EXCL;
      if (g.pre) { // open BAMs to write
-        char *s = (char*)malloc(strlen(g.pre) + 20);
          if (ga.out.format == unknown_format)
              ga.out.format = bam; // default via "wb".
-        strcpy(s, g.pre); strcat(s, ".0."); strcat(s, hts_format_file_extension(&ga.out));
-        g.out[0] = sam_open_format(s, "wb", &ga.out);
-        strcpy(s, g.pre); strcat(s, ".1."); strcat(s, hts_format_file_extension(&ga.out));
-        g.out[1] = sam_open_format(s, "wb", &ga.out);
-        strcpy(s, g.pre); strcat(s, ".chimera."); strcat(s, hts_format_file_extension(&ga.out));
-        g.out[2] = sam_open_format(s, "wb", &ga.out);
-        for (c = 0; c <= 2; ++c) {
-            g.out_hdr[c] = bam_hdr_dup(g.fp_hdr);
-            sam_hdr_write(g.out[c], g.out_hdr[c]);
-        }
-        free(s);
+
+        // Open each output file g.out[0..2], dupping and writing the header
+        if (start_output(&g, 0, "0", &ga.out) < 0 ||
+            start_output(&g, 1, "1", &ga.out) < 0 ||
+            start_output(&g, 2, "chimera", &ga.out) < 0) return 1;
      }
  
      iter = bam_plp_init(readaln, &g);
@@ -647,7 +676,10 @@ int main_phase(int argc, char *argv[])
              g.vpos_shift = 0;
              if (lasttid >= 0) {
                  seqs = shrink_hash(seqs);
-                phase(&g, g.fp_hdr->target_name[lasttid], vpos, cns, seqs);
+                if (phase(&g, g.fp_hdr->target_name[lasttid],
+                          vpos, cns, seqs) < 0) {
+                    return 1;
+                }
                  update_vpos(0x7fffffff, seqs);
              }
              lasttid = tid;
@@ -716,14 +748,20 @@ int main_phase(int argc, char *argv[])
          }
          if (dophase) {
              seqs = shrink_hash(seqs);
-            phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs);
+            if (phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs) < 0) {
+                return 1;
+            }
              update_vpos(vpos, seqs);
              cns[0] = cns[vpos];
              vpos = 0;
          }
          ++vpos;
      }
-    if (tid >= 0) phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs);
+    if (tid >= 0) {
+        if (phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs) < 0) {
+            return 1;
+        }
+    }
      bam_hdr_destroy(g.fp_hdr);
      bam_plp_destroy(iter);
      sam_close(g.fp);
@@ -733,11 +771,18 @@ int main_phase(int argc, char *argv[])
      errmod_destroy(em);
      free(bases);
      if (g.pre) {
+        int res = 0;
          for (c = 0; c <= 2; ++c) {
-            sam_close(g.out[c]);
+            if (sam_close(g.out[c]) < 0) {
+                fprintf(stderr, "[%s] error on closing '%s'\n",
+                        __func__, g.out_name[c]);
+                res = 1;
+            }
              bam_hdr_destroy(g.out_hdr[c]);
+            free(g.out_name[c]);
          }
          free(g.pre); free(g.b);
+        if (res) return 1;
      }
      sam_global_args_free(&ga);
      return 0;
diff --git a/samtools/phase.c.pysam.c b/samtools/phase.c.pysam.c

index bc1d4558f875c35704f43ededb16a4d75996da80..3babd3704349f09938a782912a7be1023f4a5b6a 100644 (file)
--- a/samtools/phase.c.pysam.c
+++ b/samtools/phase.c.pysam.c
@@ -3,7 +3,7 @@
  /*  phase.c -- phase subcommand.
  
      Copyright (C) 2011 Broad Institute.
-    Copyright (C) 2013, 2014 Genome Research Ltd.
+    Copyright (C) 2013-2016 Genome Research Ltd.
  
      Author: Heng Li <lh3@sanger.ac.uk>
  
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
@@ -32,8 +34,10 @@ DEALINGS IN THE SOFTWARE.  */
  #include <math.h>
  #include <zlib.h>
  #include "htslib/sam.h"
+#include "htslib/kstring.h"
  #include "errmod.h"
  #include "sam_opts.h"
+#include "samtools.h"
  
  #include "htslib/kseq.h"
  KSTREAM_INIT(gzFile, gzread, 16384)
@@ -55,6 +59,7 @@ typedef struct {
      samFile* fp;
      bam_hdr_t* fp_hdr;
      char *pre;
+    char *out_name[3];
      samFile* out[3];
      bam_hdr_t* out_hdr[3];
      // alignment queue
@@ -335,7 +340,7 @@ static int clean_seqs(int vpos, nseq_t *hash)
      return ret;
  }
  
-static void dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
+static int dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
  {
      int i, is_flip, drop_ambi;
      drop_ambi = g->flag & FLAG_DROP_AMBI;
@@ -363,12 +368,16 @@ static void dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
              if (which < 2 && is_flip) which = 1 - which; // increase the randomness
          }
          if (which == 3) which = (drand48() < 0.5);
-        sam_write1(g->out[which], g->out_hdr[which], b);
+        if (sam_write1(g->out[which], g->out_hdr[which], b) < 0) {
+            print_error_errno("phase", "error writing to '%s'", g->out_name[which]);
+            return -1;
+        }
          bam_destroy1(b);
          g->b[i] = 0;
      }
      memmove(g->b, g->b + i, (g->n - i) * sizeof(void*));
      g->n -= i;
+    return 0;
  }
  
  static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *hash)
@@ -383,8 +392,8 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
      i = clean_seqs(vpos, hash); // i is true if hash has an element with its vpos >= vpos
      min_pos = i? cns[vpos]>>32 : 0x7fffffff;
      if (vpos == 1) {
-        printf("PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1);
-        printf("M0\t%s\t%d\t%d\t%c\t%c\t%d\t0\t0\t0\t0\n//\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1,
+        fprintf(pysam_stdout, "PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1);
+        fprintf(pysam_stdout, "M0\t%s\t%d\t%d\t%c\t%c\t%d\t0\t0\t0\t0\n//\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1,
              "ACGTX"[cns[0]&3], "ACGTX"[cns[0]>>16&3], g->vpos_shift + 1);
          for (k = 0; k < kh_end(hash); ++k) {
              if (kh_exist(hash, k)) {
@@ -395,14 +404,14 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
                  else f->phased = 1, f->phase = f->seq[0] - 1;
              }
          }
-        dump_aln(g, min_pos, hash);
+        if (dump_aln(g, min_pos, hash) < 0) return -1;
          ++g->vpos_shift;
          return 1;
      }
      { // phase
          int **cnt;
          uint64_t *mask;
-        printf("PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[vpos-1]>>32) + 1);
+        fprintf(pysam_stdout, "PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[vpos-1]>>32) + 1);
          sitemask = calloc(vpos, 1);
          cnt = count_all(g->k, vpos, hash);
          path = dynaprog(g->k, vpos, cnt);
@@ -423,13 +432,13 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
          }
      }
      for (i = 0; i < n_masked; ++i)
-        printf("FL\t%s\t%d\t%d\n", chr, (int)(regmask[i]>>32) + 1, (int)regmask[i] + 1);
+        fprintf(pysam_stdout, "FL\t%s\t%d\t%d\n", chr, (int)(regmask[i]>>32) + 1, (int)regmask[i] + 1);
      for (i = 0; i < vpos; ++i) {
          uint64_t x = pcnt[i];
          int8_t c[2];
          c[0] = (cns[i]&0xffff)>>2 == 0? 4 : (cns[i]&3);
          c[1] = (cns[i]>>16&0xffff)>>2 == 0? 4 : (cns[i]>>16&3);
-        printf("M%d\t%s\t%d\t%d\t%c\t%c\t%d\t%d\t%d\t%d\t%d\n", sitemask[i]+1, chr, (int)(cns[0]>>32) + 1, (int)(cns[i]>>32) + 1, "ACGTX"[c[path[i]]], "ACGTX"[c[1-path[i]]],
+        fprintf(pysam_stdout, "M%d\t%s\t%d\t%d\t%c\t%c\t%d\t%d\t%d\t%d\t%d\n", sitemask[i]+1, chr, (int)(cns[0]>>32) + 1, (int)(cns[i]>>32) + 1, "ACGTX"[c[path[i]]], "ACGTX"[c[1-path[i]]],
              i + g->vpos_shift + 1, (int)(x&0xffff), (int)(x>>16&0xffff), (int)(x>>32&0xffff), (int)(x>>48&0xffff));
      }
      free(path); free(pcnt); free(regmask); free(sitemask);
@@ -441,19 +450,19 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
      ks_introsort_rseq(n_seqs, seqs);
      for (i = 0; i < n_seqs; ++i) {
          frag_t *f = seqs[i];
-        printf("EV\t0\t%s\t%d\t40\t%dM\t*\t0\t0\t", chr, f->vpos + 1 + g->vpos_shift, f->vlen);
+        fprintf(pysam_stdout, "EV\t0\t%s\t%d\t40\t%dM\t*\t0\t0\t", chr, f->vpos + 1 + g->vpos_shift, f->vlen);
          for (j = 0; j < f->vlen; ++j) {
              uint32_t c = cns[f->vpos + j];
-            if (f->seq[j] == 0) putchar('N');
-            else putchar("ACGT"[f->seq[j] == 1? (c&3) : (c>>16&3)]);
+            if (f->seq[j] == 0) fputc('N', pysam_stdout);
+            else fputc("ACGT"[f->seq[j] == 1? (c&3) : (c>>16&3)], pysam_stdout);
          }
-        printf("\t*\tYP:i:%d\tYF:i:%d\tYI:i:%d\tYO:i:%d\tYS:i:%d\n", f->phase, f->flip, f->in, f->out, f->beg+1);
+        fprintf(pysam_stdout, "\t*\tYP:i:%d\tYF:i:%d\tYI:i:%d\tYO:i:%d\tYS:i:%d\n", f->phase, f->flip, f->in, f->out, f->beg+1);
      }
      free(seqs);
-    printf("//\n");
-    fflush(stdout);
+    fprintf(pysam_stdout, "//\n");
+    fflush(pysam_stdout);
      g->vpos_shift += vpos;
-    dump_aln(g, min_pos, hash);
+    if (dump_aln(g, min_pos, hash) < 0) return -1;
      return vpos;
  }
  
@@ -538,6 +547,26 @@ static int gl2cns(float q[16])
      return (min_ij>>2&3) == (min_ij&3)? 0 : 1<<18 | (min_ij>>2&3)<<16 | (min_ij&3) | (int)(min2 - min + .499) << 2;
  }
  
+static int start_output(phaseg_t *g, int c, const char *middle, const htsFormat *fmt)
+{
+    kstring_t s = { 0, 0, NULL };
+    ksprintf(&s, "%s.%s.%s", g->pre, middle, hts_format_file_extension(fmt));
+    g->out_name[c] = ks_release(&s);
+    g->out[c] = sam_open_format(g->out_name[c], "wb", fmt);
+    if (! g->out[c]) {
+        print_error_errno("phase", "Failed to open output file '%s'", g->out_name[c]);
+        return -1;
+    }
+
+    g->out_hdr[c] = bam_hdr_dup(g->fp_hdr);
+    if (sam_hdr_write(g->out[c], g->out_hdr[c]) < 0) {
+        print_error_errno("phase", "Failed to write header for '%s'", g->out_name[c]);
+        return -1;
+    }
+
+    return 0;
+}
+
  int main_phase(int argc, char *argv[])
  {
      int c, tid, pos, vpos = 0, n, lasttid = -1, max_vpos = 0, usage = 0;
@@ -557,6 +586,8 @@ int main_phase(int argc, char *argv[])
          { NULL, 0, NULL, 0 }
      };
  
+    // FIXME Leaks galore in the case of error returns
+
      memset(&g, 0, sizeof(phaseg_t));
      g.flag = FLAG_FIX_CHIMERA;
      g.min_varLOD = 37; g.k = 13; g.min_baseQ = 13; g.max_depth = 256;
@@ -578,27 +609,32 @@ int main_phase(int argc, char *argv[])
          if (usage) break;
      }
      if (usage || argc == optind) {
-        fprintf(pysamerr, "\n");
-        fprintf(pysamerr, "Usage:   samtools phase [options] <in.bam>\n\n");
-        fprintf(pysamerr, "Options: -k INT    block length [%d]\n", g.k);
-        fprintf(pysamerr, "         -b STR    prefix of BAMs to output [null]\n");
-        fprintf(pysamerr, "         -q INT    min het phred-LOD [%d]\n", g.min_varLOD);
-        fprintf(pysamerr, "         -Q INT    min base quality in het calling [%d]\n", g.min_baseQ);
-        fprintf(pysamerr, "         -D INT    max read depth [%d]\n", g.max_depth);
-//      fprintf(pysamerr, "         -l FILE   list of sites to phase [null]\n");
-        fprintf(pysamerr, "         -F        do not attempt to fix chimeras\n");
-        fprintf(pysamerr, "         -A        drop reads with ambiguous phase\n");
-//      fprintf(pysamerr, "         -e        do not discover SNPs (effective with -l)\n");
-        fprintf(pysamerr, "\n");
-
-        sam_global_opt_help(pysamerr, "-....");
+        fprintf(pysam_stderr, "\n");
+        fprintf(pysam_stderr, "Usage:   samtools phase [options] <in.bam>\n\n");
+        fprintf(pysam_stderr, "Options: -k INT    block length [%d]\n", g.k);
+        fprintf(pysam_stderr, "         -b STR    prefix of BAMs to output [null]\n");
+        fprintf(pysam_stderr, "         -q INT    min het phred-LOD [%d]\n", g.min_varLOD);
+        fprintf(pysam_stderr, "         -Q INT    min base quality in het calling [%d]\n", g.min_baseQ);
+        fprintf(pysam_stderr, "         -D INT    max read depth [%d]\n", g.max_depth);
+//      fprintf(pysam_stderr, "         -l FILE   list of sites to phase [null]\n");
+        fprintf(pysam_stderr, "         -F        do not attempt to fix chimeras\n");
+        fprintf(pysam_stderr, "         -A        drop reads with ambiguous phase\n");
+//      fprintf(pysam_stderr, "         -e        do not discover SNPs (effective with -l)\n");
+        fprintf(pysam_stderr, "\n");
+
+        sam_global_opt_help(pysam_stderr, "-....");
  
          return 1;
      }
      g.fp = sam_open_format(argv[optind], "r", &ga.in);
+    if (!g.fp) {
+        print_error_errno("phase", "Couldn't open '%s'", argv[optind]);
+        return 1;
+    }
      g.fp_hdr = sam_hdr_read(g.fp);
      if (g.fp_hdr == NULL) {
-        fprintf(pysamerr, "Failed to read header for '%s'\n", argv[optind]);
+        fprintf(pysam_stderr, "[%s] Failed to read header for '%s'\n",
+                __func__, argv[optind]);
          return 1;
      }
      if (fn_list) { // read the list of sites to phase
@@ -606,20 +642,13 @@ int main_phase(int argc, char *argv[])
          free(fn_list);
      } else g.flag &= ~FLAG_LIST_EXCL;
      if (g.pre) { // open BAMs to write
-        char *s = (char*)malloc(strlen(g.pre) + 20);
          if (ga.out.format == unknown_format)
              ga.out.format = bam; // default via "wb".
-        strcpy(s, g.pre); strcat(s, ".0."); strcat(s, hts_format_file_extension(&ga.out));
-        g.out[0] = sam_open_format(s, "wb", &ga.out);
-        strcpy(s, g.pre); strcat(s, ".1."); strcat(s, hts_format_file_extension(&ga.out));
-        g.out[1] = sam_open_format(s, "wb", &ga.out);
-        strcpy(s, g.pre); strcat(s, ".chimera."); strcat(s, hts_format_file_extension(&ga.out));
-        g.out[2] = sam_open_format(s, "wb", &ga.out);
-        for (c = 0; c <= 2; ++c) {
-            g.out_hdr[c] = bam_hdr_dup(g.fp_hdr);
-            sam_hdr_write(g.out[c], g.out_hdr[c]);
-        }
-        free(s);
+
+        // Open each output file g.out[0..2], dupping and writing the header
+        if (start_output(&g, 0, "0", &ga.out) < 0 ||
+            start_output(&g, 1, "1", &ga.out) < 0 ||
+            start_output(&g, 2, "chimera", &ga.out) < 0) return 1;
      }
  
      iter = bam_plp_init(readaln, &g);
@@ -627,20 +656,20 @@ int main_phase(int argc, char *argv[])
      seqs = kh_init(64);
      em = errmod_init(1. - 0.83);
      bases = calloc(g.max_depth, 2);
-    printf("CC\n");
-    printf("CC\tDescriptions:\nCC\n");
-    printf("CC\t  CC      comments\n");
-    printf("CC\t  PS      start of a phase set\n");
-    printf("CC\t  FL      filtered region\n");
-    printf("CC\t  M[012]  markers; 0 for singletons, 1 for phased and 2 for filtered\n");
-    printf("CC\t  EV      supporting reads; SAM format\n");
-    printf("CC\t  //      end of a phase set\nCC\n");
-    printf("CC\tFormats of PS, FL and M[012] lines (1-based coordinates):\nCC\n");
-    printf("CC\t  PS  chr  phaseSetStart  phaseSetEnd\n");
-    printf("CC\t  FL  chr  filterStart    filterEnd\n");
-    printf("CC\t  M?  chr  PS  pos  allele0  allele1  hetIndex  #supports0  #errors0  #supp1  #err1\n");
-    printf("CC\nCC\n");
-    fflush(stdout);
+    fprintf(pysam_stdout, "CC\n");
+    fprintf(pysam_stdout, "CC\tDescriptions:\nCC\n");
+    fprintf(pysam_stdout, "CC\t  CC      comments\n");
+    fprintf(pysam_stdout, "CC\t  PS      start of a phase set\n");
+    fprintf(pysam_stdout, "CC\t  FL      filtered region\n");
+    fprintf(pysam_stdout, "CC\t  M[012]  markers; 0 for singletons, 1 for phased and 2 for filtered\n");
+    fprintf(pysam_stdout, "CC\t  EV      supporting reads; SAM format\n");
+    fprintf(pysam_stdout, "CC\t  //      end of a phase set\nCC\n");
+    fprintf(pysam_stdout, "CC\tFormats of PS, FL and M[012] lines (1-based coordinates):\nCC\n");
+    fprintf(pysam_stdout, "CC\t  PS  chr  phaseSetStart  phaseSetEnd\n");
+    fprintf(pysam_stdout, "CC\t  FL  chr  filterStart    filterEnd\n");
+    fprintf(pysam_stdout, "CC\t  M?  chr  PS  pos  allele0  allele1  hetIndex  #supports0  #errors0  #supp1  #err1\n");
+    fprintf(pysam_stdout, "CC\nCC\n");
+    fflush(pysam_stdout);
      while ((plp = bam_plp_auto(iter, &tid, &pos, &n)) != 0) {
          int i, k, c, tmp, dophase = 1, in_set = 0;
          float q[16];
@@ -649,7 +678,10 @@ int main_phase(int argc, char *argv[])
              g.vpos_shift = 0;
              if (lasttid >= 0) {
                  seqs = shrink_hash(seqs);
-                phase(&g, g.fp_hdr->target_name[lasttid], vpos, cns, seqs);
+                if (phase(&g, g.fp_hdr->target_name[lasttid],
+                          vpos, cns, seqs) < 0) {
+                    return 1;
+                }
                  update_vpos(0x7fffffff, seqs);
              }
              lasttid = tid;
@@ -718,14 +750,20 @@ int main_phase(int argc, char *argv[])
          }
          if (dophase) {
              seqs = shrink_hash(seqs);
-            phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs);
+            if (phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs) < 0) {
+                return 1;
+            }
              update_vpos(vpos, seqs);
              cns[0] = cns[vpos];
              vpos = 0;
          }
          ++vpos;
      }
-    if (tid >= 0) phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs);
+    if (tid >= 0) {
+        if (phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs) < 0) {
+            return 1;
+        }
+    }
      bam_hdr_destroy(g.fp_hdr);
      bam_plp_destroy(iter);
      sam_close(g.fp);
@@ -735,11 +773,18 @@ int main_phase(int argc, char *argv[])
      errmod_destroy(em);
      free(bases);
      if (g.pre) {
+        int res = 0;
          for (c = 0; c <= 2; ++c) {
-            sam_close(g.out[c]);
+            if (sam_close(g.out[c]) < 0) {
+                fprintf(pysam_stderr, "[%s] error on closing '%s'\n",
+                        __func__, g.out_name[c]);
+                res = 1;
+            }
              bam_hdr_destroy(g.out_hdr[c]);
+            free(g.out_name[c]);
          }
          free(g.pre); free(g.b);
+        if (res) return 1;
      }
      sam_global_args_free(&ga);
      return 0;
diff --git a/samtools/pysam.h b/samtools/pysam.h

index 008cbbd9ee9e0b3ed79c1c4660a3066c70732770..b0fc4fb565eabbd084b3507fb049a87ad821d106 100644 (file)
--- a/samtools/pysam.h
+++ b/samtools/pysam.h
@@ -1,5 +1,7 @@
  #ifndef PYSAM_H
  #define PYSAM_H
  #include "stdio.h"
-extern FILE * pysamerr;
+extern FILE * pysam_stderr;
+extern FILE * pysam_stdout;
+extern const char * pysam_stdout_fn;
  #endif
diff --git a/samtools/sam.c b/samtools/sam.c

index d6cc9f6f4202c991c78ffc98f7d19102a4a22405..237c3e88cc3ab8a29980c0a67eff2281dacb9d13 100644 (file)
--- a/samtools/sam.c
+++ b/samtools/sam.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <string.h>
  #include <unistd.h>
  #include "htslib/faidx.h"
@@ -31,7 +33,7 @@ DEALINGS IN THE SOFTWARE.  */
  int samthreads(samfile_t *fp, int n_threads, int n_sub_blks)
  {
      if (hts_get_format(fp->file)->format != bam || !fp->is_write) return -1;
-    bgzf_mt(fp->x.bam, n_threads, n_sub_blks);
+    if (bgzf_mt(fp->x.bam, n_threads, n_sub_blks) < 0) return -1;
      return 0;
  }
  
@@ -42,6 +44,10 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
      if (hts_fp == NULL)  return NULL;
  
      samfile_t *fp = malloc(sizeof (samfile_t));
+    if (!fp) {
+        sam_close(hts_fp);
+        return NULL;
+    }
      fp->file = hts_fp;
      fp->x.bam = hts_fp->fp.bgzf;
      if (strchr(mode, 'r')) {
@@ -66,7 +72,15 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
          enum htsExactFormat fmt = hts_get_format(fp->file)->format;
          fp->header = (bam_hdr_t *)aux;  // For writing, we won't free it
          fp->is_write = 1;
-        if (!(fmt == text_format || fmt == sam) || strchr(mode, 'h')) sam_hdr_write(fp->file, fp->header);
+        if (!(fmt == text_format || fmt == sam) || strchr(mode, 'h')) {
+            if (sam_hdr_write(fp->file, fp->header) < 0) {
+                if (bam_verbose >= 1)
+                    fprintf(stderr, "[samopen] Couldn't write header\n");
+                sam_close(hts_fp);
+                free(fp);
+                return NULL;
+            }
+        }
      }
  
      return fp;
diff --git a/samtools/sam.c.pysam.c b/samtools/sam.c.pysam.c

index e7c4cac20213d1aa022ec425677295b87ee23d8d..f7db82090083cfdfd6aeda4db9b75d0335d73a4f 100644 (file)
--- a/samtools/sam.c.pysam.c
+++ b/samtools/sam.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <string.h>
  #include <unistd.h>
  #include "htslib/faidx.h"
@@ -33,7 +35,7 @@ DEALINGS IN THE SOFTWARE.  */
  int samthreads(samfile_t *fp, int n_threads, int n_sub_blks)
  {
      if (hts_get_format(fp->file)->format != bam || !fp->is_write) return -1;
-    bgzf_mt(fp->x.bam, n_threads, n_sub_blks);
+    if (bgzf_mt(fp->x.bam, n_threads, n_sub_blks) < 0) return -1;
      return 0;
  }
  
@@ -44,6 +46,10 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
      if (hts_fp == NULL)  return NULL;
  
      samfile_t *fp = malloc(sizeof (samfile_t));
+    if (!fp) {
+        sam_close(hts_fp);
+        return NULL;
+    }
      fp->file = hts_fp;
      fp->x.bam = hts_fp->fp.bgzf;
      if (strchr(mode, 'r')) {
@@ -62,13 +68,21 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
          }
          fp->is_write = 0;
          if (fp->header->n_targets == 0 && bam_verbose >= 1)
-            fprintf(pysamerr, "[samopen] no @SQ lines in the header.\n");
+            fprintf(pysam_stderr, "[samopen] no @SQ lines in the header.\n");
      }
      else {
          enum htsExactFormat fmt = hts_get_format(fp->file)->format;
          fp->header = (bam_hdr_t *)aux;  // For writing, we won't free it
          fp->is_write = 1;
-        if (!(fmt == text_format || fmt == sam) || strchr(mode, 'h')) sam_hdr_write(fp->file, fp->header);
+        if (!(fmt == text_format || fmt == sam) || strchr(mode, 'h')) {
+            if (sam_hdr_write(fp->file, fp->header) < 0) {
+                if (bam_verbose >= 1)
+                    fprintf(pysam_stderr, "[samopen] Couldn't write header\n");
+                sam_close(hts_fp);
+                free(fp);
+                return NULL;
+            }
+        }
      }
  
      return fp;
@@ -122,11 +136,11 @@ char *samfaipath(const char *fn_ref)
      strcat(strcpy(fn_list, fn_ref), ".fai");
      if (access(fn_list, R_OK) == -1) { // fn_list is unreadable
          if (access(fn_ref, R_OK) == -1) {
-            fprintf(pysamerr, "[samfaipath] fail to read file %s.\n", fn_ref);
+            fprintf(pysam_stderr, "[samfaipath] fail to read file %s.\n", fn_ref);
          } else {
-            if (bam_verbose >= 3) fprintf(pysamerr, "[samfaipath] build FASTA index...\n");
+            if (bam_verbose >= 3) fprintf(pysam_stderr, "[samfaipath] build FASTA index...\n");
              if (fai_build(fn_ref) == -1) {
-                fprintf(pysamerr, "[samfaipath] fail to build FASTA index.\n");
+                fprintf(pysam_stderr, "[samfaipath] fail to build FASTA index.\n");
                  free(fn_list); fn_list = 0;
              }
          }
diff --git a/samtools/sam_header.c b/samtools/sam_header.c

index 75ca724ff40973396bd508a1c4feedebfe36a055..64da68f3716122902c84c28c8fda223110cedb6c 100644 (file)
--- a/samtools/sam_header.c
+++ b/samtools/sam_header.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "sam_header.h"
  #include <stdio.h>
  #include <string.h>
diff --git a/samtools/sam_header.c.pysam.c b/samtools/sam_header.c.pysam.c

index ecf937c50e48134e2b950de883457a9e5644ca93..e39807d15aa234b1516fd24cf41f8d91240b2895 100644 (file)
--- a/samtools/sam_header.c.pysam.c
+++ b/samtools/sam_header.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "sam_header.h"
  #include <stdio.h>
  #include <string.h>
@@ -81,7 +83,7 @@ static void debug(const char *format, ...)
  {
      va_list ap;
      va_start(ap, format);
-    vfprintf(pysamerr, format, ap);
+    vfprintf(pysam_stderr, format, ap);
      va_end(ap);
  }
  
@@ -775,8 +777,8 @@ void *sam_header_merge(int n, const void **_dicts)
  
                  if ( status==2 )
                  {
-                    print_header_line(pysamerr,tmpl_hlines->data);
-                    print_header_line(pysamerr,out_hlines->data);
+                    print_header_line(pysam_stderr,tmpl_hlines->data);
+                    print_header_line(pysam_stderr,out_hlines->data);
                      debug("Conflicting lines, cannot merge the headers.\n");
                      return 0;
                  }
diff --git a/samtools/sam_opts.c b/samtools/sam_opts.c

index 0ed197e23c107a9b9a2c384ace71e24d77a82181..936914562bada35a7fee3fc42fab492989822f58 100644 (file)
--- a/samtools/sam_opts.c
+++ b/samtools/sam_opts.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
diff --git a/samtools/sam_opts.c.pysam.c b/samtools/sam_opts.c.pysam.c

index c9764382e0ba22d7ed800aa5b737bb302e7e410a..d0b56a32e28a1db9a99d745d051ade284353dc71 100644 (file)
--- a/samtools/sam_opts.c.pysam.c
+++ b/samtools/sam_opts.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
@@ -79,7 +81,7 @@ int parse_sam_global_opt(int c, const char *optarg, const struct option *lopt,
      }
  
      if (!lopt->name) {
-        fprintf(pysamerr, "Unexpected global option: %s\n", lopt->name);
+        fprintf(pysam_stderr, "Unexpected global option: %s\n", lopt->name);
          return -1;
      }
  
diff --git a/samtools/sam_view.c b/samtools/sam_view.c

index 4358a1c0786e43d16391cc41a5c70b737bed2938..402e1d30fc47f328dd357c8ba49835cefe5ed514 100644 (file)
--- a/samtools/sam_view.c
+++ b/samtools/sam_view.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <string.h>
  #include <stdio.h>
@@ -96,7 +98,7 @@ static int process_aln(const bam_hdr_t *h, bam1_t *b, samview_settings_t* settin
      }
      if (settings->library) {
          const char *p = bam_get_library((bam_hdr_t*)h, b);
-        if (p && strcmp(p, settings->library) != 0) return 1;
+        if (!p || strcmp(p, settings->library) != 0) return 1;
      }
      if (settings->remove_aux_len) {
          size_t i;
@@ -400,18 +402,18 @@ int main_samview(int argc, char *argv[])
              }
          }
          if (fn_un_out) {
-                if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
+            if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
                  print_error_errno("view", "failed to open \"%s\" for writing", fn_un_out);
                  ret = 1;
                  goto view_end;
              }
-                if (fn_list) {
-                    if (hts_set_fai_filename(un_out, fn_list) != 0) {
-                        fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
-                        ret = 1;
-                        goto view_end;
-                    }
+            if (fn_list) {
+                if (hts_set_fai_filename(un_out, fn_list) != 0) {
+                    fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+                    ret = 1;
+                    goto view_end;
                  }
+            }
              if (*out_format || is_header ||
                  out_un_mode[1] == 'b' || out_un_mode[1] == 'c' ||
                  (ga.out.format != sam && ga.out.format != unknown_format))  {
@@ -556,35 +558,37 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
          fprintf(fp,
  "Notes:\n"
  "\n"
-"  1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
-"     Further control over the CRAM format can be specified by using the\n"
-"     --output-fmt-option, e.g. to specify the number of sequences per slice\n"
-"     and to use avoid reference based compression:\n"
-"     `samtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
-"         --output-fmt-option no_ref -o out.cram in.bam'\n"
+"1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
+"   Further control over the CRAM format can be specified by using the\n"
+"   --output-fmt-option, e.g. to specify the number of sequences per slice\n"
+"   and to use avoid reference based compression:\n"
  "\n"
-"     Options can also be specified as a comma separated list within the\n"
-"     --output-fmt value too.  For example this is equivalent to the above\n"
-"     `samtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
-"         -o out.cram in.bam'\n"
+"\tsamtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
+"\t   --output-fmt-option no_ref -o out.cram in.bam\n"
  "\n"
-"  2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
-"     two fields of each line consisting of the reference name and the\n"
-"     corresponding sequence length. The `.fai' file generated by \n"
-"     `samtools faidx' is suitable for use as this file. This may be an\n"
-"     empty file if reads are unaligned.\n"
+"   Options can also be specified as a comma separated list within the\n"
+"   --output-fmt value too.  For example this is equivalent to the above\n"
  "\n"
-"  3. SAM->BAM conversion: `samtools view -bT ref.fa in.sam.gz'.\n"
+"\tsamtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
+"\t   -o out.cram in.bam\n"
  "\n"
-"  4. BAM->SAM conversion: `samtools view -h in.bam'.\n"
+"2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
+"   two fields of each line consisting of the reference name and the\n"
+"   corresponding sequence length. The `.fai' file generated by \n"
+"   `samtools faidx' is suitable for use as this file. This may be an\n"
+"   empty file if reads are unaligned.\n"
  "\n"
-"  5. A region should be presented in one of the following formats:\n"
-"     `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
-"     specified, the input alignment file must be a sorted and indexed\n"
-"     alignment (BAM/CRAM) file.\n"
+"3. SAM->BAM conversion:  samtools view -bT ref.fa in.sam.gz\n"
  "\n"
-"  6. Option `-u' is preferred over `-b' when the output is piped to\n"
-"     another samtools command.\n"
+"4. BAM->SAM conversion:  samtools view -h in.bam\n"
+"\n"
+"5. A region should be presented in one of the following formats:\n"
+"   `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
+"   specified, the input alignment file must be a sorted and indexed\n"
+"   alignment (BAM/CRAM) file.\n"
+"\n"
+"6. Option `-u' is preferred over `-b' when the output is piped to\n"
+"   another samtools command.\n"
  "\n");
  
      return exit_status;
@@ -611,6 +615,7 @@ static const char *copied_tags[] = { "RG", "BC", "QT", NULL };
  
  static void bam2fq_usage(FILE *to, const char *command)
  {
+    int fq = strcasecmp("fastq", command) == 0 || strcasecmp("bam2fq", command) == 0;
      fprintf(to,
  "Usage: samtools %s [options...] <in.bam>\n", command);
      fprintf(to,
@@ -620,10 +625,14 @@ static void bam2fq_usage(FILE *to, const char *command)
  "  -2 FILE   write paired reads flagged READ2 to FILE\n"
  "  -f INT    only include reads with all bits set in INT set in FLAG [0]\n"
  "  -F INT    only include reads with none of the bits set in INT set in FLAG [0]\n"
-"  -n        don't append /1 and /2 to the read name\n"
-"  -O        output quality in the OQ tag if present\n"
+"  -n        don't append /1 and /2 to the read name\n");
+    if (fq) fprintf(to,
+"  -O        output quality in the OQ tag if present\n");
+    fprintf(to,
  "  -s FILE   write singleton reads to FILE [assume single-end]\n"
-"  -t        copy RG, BC and QT tags to the FASTQ header line\n"
+"  -t        copy RG, BC and QT tags to the %s header line\n",
+    fq ? "FASTQ" : "FASTA");
+    if (fq) fprintf(to,
  "  -v INT    default quality score if not given in file [1]\n");
      sam_global_opt_help(to, "-.--.");
  }
@@ -673,7 +682,10 @@ static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t
      uint8_t *seq;
      uint8_t *qual = bam_get_qual(b);
      const uint8_t *oq = NULL;
-    if (state->use_oq) oq = bam_aux_get(b, "OQ") + 1;
+    if (state->use_oq) {
+        oq = bam_aux_get(b, "OQ");
+        if (oq) oq++; // skip tag type
+    }
      bool has_qual = (qual[0] != 0xff || (state->use_oq && oq)); // test if there is quality
  
      linebuf->l = 0;
@@ -921,7 +933,7 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
  
      bool valid = true;
      while (true) {
-        at_eof = sam_read1(state->fp, state->h, b);
+        at_eof = sam_read1(state->fp, state->h, b) < 0;
  
          if (!at_eof && filter_it_out(b, state)) continue;
          if (!at_eof) ++n_reads;
diff --git a/samtools/sam_view.c.pysam.c b/samtools/sam_view.c.pysam.c

index dfc806542f13cf2712a2e9f44a36e472dae659c9..3d5ffa56a3cbdf8fb820e4244e736a7b72e8a7a0 100644 (file)
--- a/samtools/sam_view.c.pysam.c
+++ b/samtools/sam_view.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <string.h>
  #include <stdio.h>
@@ -98,7 +100,7 @@ static int process_aln(const bam_hdr_t *h, bam1_t *b, samview_settings_t* settin
      }
      if (settings->library) {
          const char *p = bam_get_library((bam_hdr_t*)h, b);
-        if (p && strcmp(p, settings->library) != 0) return 1;
+        if (!p || strcmp(p, settings->library) != 0) return 1;
      }
      if (settings->remove_aux_len) {
          size_t i;
@@ -317,8 +319,8 @@ int main_samview(int argc, char *argv[])
          case 'x':
              {
                  if (strlen(optarg) != 2) {
-                    fprintf(pysamerr, "main_samview: Error parsing -x auxiliary tags should be exactly two characters long.\n");
-                    return usage(pysamerr, EXIT_FAILURE, is_long_help);
+                    fprintf(pysam_stderr, "main_samview: Error parsing -x auxiliary tags should be exactly two characters long.\n");
+                    return usage(pysam_stderr, EXIT_FAILURE, is_long_help);
                  }
                  settings.remove_aux = (char**)realloc(settings.remove_aux, sizeof(char*) * (++settings.remove_aux_len));
                  settings.remove_aux[settings.remove_aux_len-1] = optarg;
@@ -327,7 +329,7 @@ int main_samview(int argc, char *argv[])
  
          default:
              if (parse_sam_global_opt(c, optarg, lopts, &ga) != 0)
-                return usage(pysamerr, EXIT_FAILURE, is_long_help);
+                return usage(pysam_stderr, EXIT_FAILURE, is_long_help);
              break;
          }
      }
@@ -347,7 +349,7 @@ int main_samview(int argc, char *argv[])
          strcat(out_mode, tmp);
          strcat(out_un_mode, tmp);
      }
-    if (argc == optind && isatty(STDIN_FILENO)) return usage(stdout, EXIT_SUCCESS, is_long_help); // potential memory leak...
+    if (argc == optind && isatty(STDIN_FILENO)) return usage(pysam_stdout, EXIT_SUCCESS, is_long_help); // potential memory leak...
  
      fn_in = (optind < argc)? argv[optind] : "-";
      // generate the fn_list if necessary
@@ -361,13 +363,13 @@ int main_samview(int argc, char *argv[])
  
      if (fn_list) {
          if (hts_set_fai_filename(in, fn_list) != 0) {
-            fprintf(pysamerr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+            fprintf(pysam_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
              ret = 1;
              goto view_end;
          }
      }
      if ((header = sam_hdr_read(in)) == 0) {
-        fprintf(pysamerr, "[main_samview] fail to read the header from \"%s\".\n", fn_in);
+        fprintf(pysam_stderr, "[main_samview] fail to read the header from \"%s\".\n", fn_in);
          ret = 1;
          goto view_end;
      }
@@ -387,7 +389,7 @@ int main_samview(int argc, char *argv[])
          }
          if (fn_list) {
              if (hts_set_fai_filename(out, fn_list) != 0) {
-                fprintf(pysamerr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+                fprintf(pysam_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
                  ret = 1;
                  goto view_end;
              }
@@ -396,29 +398,29 @@ int main_samview(int argc, char *argv[])
              out_mode[1] == 'b' || out_mode[1] == 'c' ||
              (ga.out.format != sam && ga.out.format != unknown_format))  {
              if (sam_hdr_write(out, header) != 0) {
-                fprintf(pysamerr, "[main_samview] failed to write the SAM header\n");
+                fprintf(pysam_stderr, "[main_samview] failed to write the SAM header\n");
                  ret = 1;
                  goto view_end;
              }
          }
          if (fn_un_out) {
-                if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
+            if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
                  print_error_errno("view", "failed to open \"%s\" for writing", fn_un_out);
                  ret = 1;
                  goto view_end;
              }
-                if (fn_list) {
-                    if (hts_set_fai_filename(un_out, fn_list) != 0) {
-                        fprintf(pysamerr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
-                        ret = 1;
-                        goto view_end;
-                    }
+            if (fn_list) {
+                if (hts_set_fai_filename(un_out, fn_list) != 0) {
+                    fprintf(pysam_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+                    ret = 1;
+                    goto view_end;
                  }
+            }
              if (*out_format || is_header ||
                  out_un_mode[1] == 'b' || out_un_mode[1] == 'c' ||
                  (ga.out.format != sam && ga.out.format != unknown_format))  {
                  if (sam_hdr_write(un_out, header) != 0) {
-                    fprintf(pysamerr, "[main_samview] failed to write the SAM header\n");
+                    fprintf(pysam_stderr, "[main_samview] failed to write the SAM header\n");
                      ret = 1;
                      goto view_end;
                  }
@@ -441,7 +443,7 @@ int main_samview(int argc, char *argv[])
              }
          }
          if (r < -1) {
-            fprintf(pysamerr, "[main_samview] truncated file.\n");
+            fprintf(pysam_stderr, "[main_samview] truncated file.\n");
              ret = 1;
          }
          bam_destroy1(b);
@@ -450,7 +452,7 @@ int main_samview(int argc, char *argv[])
          bam1_t *b;
          hts_idx_t *idx = sam_index_load(in, fn_in); // load index
          if (idx == 0) { // index is unavailable
-            fprintf(pysamerr, "[main_samview] random alignment retrieval only works for indexed BAM or CRAM files.\n");
+            fprintf(pysam_stderr, "[main_samview] random alignment retrieval only works for indexed BAM or CRAM files.\n");
              ret = 1;
              goto view_end;
          }
@@ -461,9 +463,9 @@ int main_samview(int argc, char *argv[])
              if (iter == NULL) { // region invalid or reference name not found
                  int beg, end;
                  if (hts_parse_reg(argv[i], &beg, &end))
-                    fprintf(pysamerr, "[main_samview] region \"%s\" specifies an unknown reference name. Continue anyway.\n", argv[i]);
+                    fprintf(pysam_stderr, "[main_samview] region \"%s\" specifies an unknown reference name. Continue anyway.\n", argv[i]);
                  else
-                    fprintf(pysamerr, "[main_samview] region \"%s\" could not be parsed. Continue anyway.\n", argv[i]);
+                    fprintf(pysam_stderr, "[main_samview] region \"%s\" could not be parsed. Continue anyway.\n", argv[i]);
                  continue;
              }
              // fetch alignments
@@ -477,7 +479,7 @@ int main_samview(int argc, char *argv[])
              }
              hts_itr_destroy(iter);
              if (result < -1) {
-                fprintf(pysamerr, "[main_samview] retrieval of region \"%s\" failed due to truncated file or corrupt BAM index file\n", argv[i]);
+                fprintf(pysam_stderr, "[main_samview] retrieval of region \"%s\" failed due to truncated file or corrupt BAM index file\n", argv[i]);
                  ret = 1;
                  break;
              }
@@ -488,7 +490,7 @@ int main_samview(int argc, char *argv[])
  
  view_end:
      if (is_count && ret == 0)
-        printf("%" PRId64 "\n", count);
+        fprintf(pysam_stdout, "%" PRId64 "\n", count);
  
      // close files, free and return
      if (in) check_sam_close("view", in, fn_in, "standard input", &ret);
@@ -526,7 +528,7 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
  "  -h       include header in SAM output\n"
  "  -H       print SAM header only (no alignments)\n"
  "  -c       print only the count of matching records\n"
-"  -o FILE  output file name [stdout]\n"
+"  -o FILE  output file name [pysam_stdout]\n"
  "  -U FILE  output reads not selected by filters to FILE [null]\n"
  // extra input
  "  -t FILE  FILE listing reference names and lengths (see long help) [null]\n"
@@ -558,35 +560,37 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
          fprintf(fp,
  "Notes:\n"
  "\n"
-"  1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
-"     Further control over the CRAM format can be specified by using the\n"
-"     --output-fmt-option, e.g. to specify the number of sequences per slice\n"
-"     and to use avoid reference based compression:\n"
-"     `samtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
-"         --output-fmt-option no_ref -o out.cram in.bam'\n"
+"1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
+"   Further control over the CRAM format can be specified by using the\n"
+"   --output-fmt-option, e.g. to specify the number of sequences per slice\n"
+"   and to use avoid reference based compression:\n"
  "\n"
-"     Options can also be specified as a comma separated list within the\n"
-"     --output-fmt value too.  For example this is equivalent to the above\n"
-"     `samtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
-"         -o out.cram in.bam'\n"
+"\tsamtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
+"\t   --output-fmt-option no_ref -o out.cram in.bam\n"
  "\n"
-"  2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
-"     two fields of each line consisting of the reference name and the\n"
-"     corresponding sequence length. The `.fai' file generated by \n"
-"     `samtools faidx' is suitable for use as this file. This may be an\n"
-"     empty file if reads are unaligned.\n"
+"   Options can also be specified as a comma separated list within the\n"
+"   --output-fmt value too.  For example this is equivalent to the above\n"
  "\n"
-"  3. SAM->BAM conversion: `samtools view -bT ref.fa in.sam.gz'.\n"
+"\tsamtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
+"\t   -o out.cram in.bam\n"
  "\n"
-"  4. BAM->SAM conversion: `samtools view -h in.bam'.\n"
+"2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
+"   two fields of each line consisting of the reference name and the\n"
+"   corresponding sequence length. The `.fai' file generated by \n"
+"   `samtools faidx' is suitable for use as this file. This may be an\n"
+"   empty file if reads are unaligned.\n"
  "\n"
-"  5. A region should be presented in one of the following formats:\n"
-"     `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
-"     specified, the input alignment file must be a sorted and indexed\n"
-"     alignment (BAM/CRAM) file.\n"
+"3. SAM->BAM conversion:  samtools view -bT ref.fa in.sam.gz\n"
  "\n"
-"  6. Option `-u' is preferred over `-b' when the output is piped to\n"
-"     another samtools command.\n"
+"4. BAM->SAM conversion:  samtools view -h in.bam\n"
+"\n"
+"5. A region should be presented in one of the following formats:\n"
+"   `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
+"   specified, the input alignment file must be a sorted and indexed\n"
+"   alignment (BAM/CRAM) file.\n"
+"\n"
+"6. Option `-u' is preferred over `-b' when the output is piped to\n"
+"   another samtools command.\n"
  "\n");
  
      return exit_status;
@@ -597,7 +601,7 @@ int main_import(int argc, char *argv[])
      int argc2, ret;
      char **argv2;
      if (argc != 4) {
-        fprintf(pysamerr, "Usage: samtools import <in.ref_list> <in.sam> <out.bam>\n");
+        fprintf(pysam_stderr, "Usage: samtools import <in.ref_list> <in.sam> <out.bam>\n");
          return 1;
      }
      argc2 = 6;
@@ -613,6 +617,7 @@ static const char *copied_tags[] = { "RG", "BC", "QT", NULL };
  
  static void bam2fq_usage(FILE *to, const char *command)
  {
+    int fq = strcasecmp("fastq", command) == 0 || strcasecmp("bam2fq", command) == 0;
      fprintf(to,
  "Usage: samtools %s [options...] <in.bam>\n", command);
      fprintf(to,
@@ -622,10 +627,14 @@ static void bam2fq_usage(FILE *to, const char *command)
  "  -2 FILE   write paired reads flagged READ2 to FILE\n"
  "  -f INT    only include reads with all bits set in INT set in FLAG [0]\n"
  "  -F INT    only include reads with none of the bits set in INT set in FLAG [0]\n"
-"  -n        don't append /1 and /2 to the read name\n"
-"  -O        output quality in the OQ tag if present\n"
+"  -n        don't append /1 and /2 to the read name\n");
+    if (fq) fprintf(to,
+"  -O        output quality in the OQ tag if present\n");
+    fprintf(to,
  "  -s FILE   write singleton reads to FILE [assume single-end]\n"
-"  -t        copy RG, BC and QT tags to the FASTQ header line\n"
+"  -t        copy RG, BC and QT tags to the %s header line\n",
+    fq ? "FASTQ" : "FASTA");
+    if (fq) fprintf(to,
  "  -v INT    default quality score if not given in file [1]\n");
      sam_global_opt_help(to, "-.--.");
  }
@@ -675,7 +684,10 @@ static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t
      uint8_t *seq;
      uint8_t *qual = bam_get_qual(b);
      const uint8_t *oq = NULL;
-    if (state->use_oq) oq = bam_aux_get(b, "OQ") + 1;
+    if (state->use_oq) {
+        oq = bam_aux_get(b, "OQ");
+        if (oq) oq++; // skip tag type
+    }
      bool has_qual = (qual[0] != 0xff || (state->use_oq && oq)); // test if there is quality
  
      linebuf->l = 0;
@@ -776,10 +788,10 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
              case 's': opts->fnse = optarg; break;
              case 't': opts->copy_tags = true; break;
              case 'v': opts->def_qual = atoi(optarg); break;
-            case '?': bam2fq_usage(pysamerr, argv[0]); free(opts); return false;
+            case '?': bam2fq_usage(pysam_stderr, argv[0]); free(opts); return false;
              default:
                  if (parse_sam_global_opt(c, optarg, lopts, &opts->ga) != 0) {
-                    bam2fq_usage(pysamerr, argv[0]); free(opts); return false;
+                    bam2fq_usage(pysam_stderr, argv[0]); free(opts); return false;
                  }
                  break;
          }
@@ -788,8 +800,8 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
      if (opts->fnr[1] || opts->fnr[2]) opts->has12 = false;
  
      if (opts->def_qual < 0 || 93 < opts->def_qual) {
-        fprintf(pysamerr, "Invalid -v default quality %i, allowed range 0 to 93\n", opts->def_qual);
-        bam2fq_usage(pysamerr, argv[0]);
+        fprintf(pysam_stderr, "Invalid -v default quality %i, allowed range 0 to 93\n", opts->def_qual);
+        bam2fq_usage(pysam_stderr, argv[0]);
          free(opts);
          return true;
      }
@@ -801,20 +813,20 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
          opts->filetype = FASTA;
      } else {
          print_error("bam2fq", "Unrecognised type call \"%s\", this should be impossible... but you managed it!", type_str);
-        bam2fq_usage(pysamerr, argv[0]);
+        bam2fq_usage(pysam_stderr, argv[0]);
          free(opts);
          return false;
      }
  
      if ((argc - (optind)) == 0) {
-        bam2fq_usage(stdout, argv[0]);
+        bam2fq_usage(pysam_stdout, argv[0]);
          free(opts);
          return false;
      }
  
      if ((argc - (optind)) != 1) {
-        fprintf(pysamerr, "Too many arguments.\n");
-        bam2fq_usage(pysamerr, argv[0]);
+        fprintf(pysam_stderr, "Too many arguments.\n");
+        bam2fq_usage(pysam_stderr, argv[0]);
          free(opts);
          return false;
      }
@@ -843,12 +855,12 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
      uint32_t rf = SAM_QNAME | SAM_FLAG | SAM_SEQ | SAM_QUAL;
      if (opts->use_oq) rf |= SAM_AUX;
      if (hts_set_opt(state->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
-        fprintf(pysamerr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
+        fprintf(pysam_stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
          free(state);
          return false;
      }
      if (hts_set_opt(state->fp, CRAM_OPT_DECODE_MD, 0)) {
-        fprintf(pysamerr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+        fprintf(pysam_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
          free(state);
          return false;
      }
@@ -871,13 +883,13 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
                  return false;
              }
          } else {
-            state->fpr[i] = stdout;
+            state->fpr[i] = pysam_stdout;
          }
      }
  
      state->h = sam_hdr_read(state->fp);
      if (state->h == NULL) {
-        fprintf(pysamerr, "Failed to read header for \"%s\"\n", opts->fn_input);
+        fprintf(pysam_stderr, "Failed to read header for \"%s\"\n", opts->fn_input);
          free(state);
          return false;
      }
@@ -894,7 +906,7 @@ static bool destroy_state(const bam2fq_opts_t *opts, bam2fq_state_t *state, int*
      if (state->fpse && fclose(state->fpse)) { print_error_errno("bam2fq", "Error closing singleton file \"%s\"", opts->fnse); valid = false; }
      int i;
      for (i = 0; i < 3; ++i) {
-        if (state->fpr[i] != stdout && fclose(state->fpr[i])) { print_error_errno("bam2fq", "Error closing r%d file \"%s\"", i, opts->fnr[i]); valid = false; }
+        if (state->fpr[i] != pysam_stdout && fclose(state->fpr[i])) { print_error_errno("bam2fq", "Error closing r%d file \"%s\"", i, opts->fnr[i]); valid = false; }
      }
      free(state);
      return valid;
@@ -923,7 +935,7 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
  
      bool valid = true;
      while (true) {
-        at_eof = sam_read1(state->fp, state->h, b);
+        at_eof = sam_read1(state->fp, state->h, b) < 0;
  
          if (!at_eof && filter_it_out(b, state)) continue;
          if (!at_eof) ++n_reads;
@@ -960,7 +972,7 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
          int b_score = bam_get_qual(b)[0] != 0xff? 2 : 1;
          if (b_score > score[which_readpart(b)]) {
              if(!bam1_to_fq(b, &linebuf[which_readpart(b)], state)) {
-                fprintf(pysamerr, "[%s] Error converting read to FASTA/Q\n", __func__);
+                fprintf(pysam_stderr, "[%s] Error converting read to FASTA/Q\n", __func__);
                  return false;
              }
              score[which_readpart(b)] = b_score;
@@ -975,8 +987,8 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
      free(linebuf[0].s);
      free(linebuf[1].s);
      free(linebuf[2].s);
-    fprintf(pysamerr, "[M::%s] discarded %" PRId64 " singletons\n", __func__, n_singletons);
-    fprintf(pysamerr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads);
+    fprintf(pysam_stderr, "[M::%s] discarded %" PRId64 " singletons\n", __func__, n_singletons);
+    fprintf(pysam_stderr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads);
  
      return valid;
  }
@@ -1003,7 +1015,7 @@ static bool bam2fq_mainloop(bam2fq_state_t *state)
      free(linebuf.s);
      bam_destroy1(b);
  
-    fprintf(pysamerr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads);
+    fprintf(pysam_stderr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads);
      return true;
  }
  
diff --git a/samtools/sample.c b/samtools/sample.c

index aa3813200ab3d7045fac6f2c25971f28a70e413b..4cc89ce9b5fc7556900c398d89f6a06f1ddca26b 100644 (file)
--- a/samtools/sample.c
+++ b/samtools/sample.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <string.h>
  #include "sample.h"
diff --git a/samtools/sample.c.pysam.c b/samtools/sample.c.pysam.c

index 73ec01fbca0664c3f8180b018f317c3f7fc222a5..dff818825ad988a2e092e13f53bf43a476da63ed 100644 (file)
--- a/samtools/sample.c.pysam.c
+++ b/samtools/sample.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdlib.h>
  #include <string.h>
  #include "sample.h"
diff --git a/samtools/stats.c b/samtools/stats.c

index 512df1d67d3557d7d3abdef63e2addedf8b50f4c..eb6bb525dc6c881ad3effebef9dd396620cbd270 100644 (file)
--- a/samtools/stats.c
+++ b/samtools/stats.c
@@ -37,6 +37,8 @@ DEALINGS IN THE SOFTWARE.  */
  
  */
  
+#include <config.h>
+
  #include <unistd.h> // for isatty()
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/samtools/stats.c.pysam.c b/samtools/stats.c.pysam.c

index e30b2ad14aabfd8d95987f946004c5d8d14dc425..da187ac4b5e460f9e95c4c2bdfe2c43d30d1b4dd 100644 (file)
--- a/samtools/stats.c.pysam.c
+++ b/samtools/stats.c.pysam.c
@@ -39,6 +39,8 @@ DEALINGS IN THE SOFTWARE.  */
  
  */
  
+#include <config.h>
+
  #include <unistd.h> // for isatty()
  #include <stdio.h>
  #include <stdlib.h>
@@ -1240,7 +1242,7 @@ void init_regions(stats_t *stats, const char *file)
          if ( tid < 0 )
          {
              if ( !warned )
-                fprintf(pysamerr,"Warning: Some sequences not present in the BAM, e.g. \"%s\". This message is printed only once.\n", line.s);
+                fprintf(pysam_stderr,"Warning: Some sequences not present in the BAM, e.g. \"%s\". This message is printed only once.\n", line.s);
              warned = 1;
              continue;
          }
@@ -1334,7 +1336,7 @@ void init_group_id(stats_t *stats, const char *id)
          {
              khiter_t k = kh_get(kh_rg, stats->rg_hash, key);
              if ( k != kh_end(stats->rg_hash) )
-                fprintf(pysamerr, "[init_group_id] The group ID not unique: \"%s\"\n", key);
+                fprintf(pysam_stderr, "[init_group_id] The group ID not unique: \"%s\"\n", key);
              int ret;
              k = kh_put(kh_rg, stats->rg_hash, key, &ret);
              kh_value(stats->rg_hash, k) = val;
@@ -1344,7 +1346,7 @@ void init_group_id(stats_t *stats, const char *id)
      if ( !n )
          error("The sample or read group \"%s\" not present.\n", id);
  #else
-    fprintf(pysamerr, "Samtools-htslib: init_group_id() header parsing not yet implemented\n");
+    fprintf(pysam_stderr, "Samtools-htslib: init_group_id() header parsing not yet implemented\n");
      abort();
  #endif
  }
@@ -1354,35 +1356,35 @@ static void error(const char *format, ...)
  {
      if ( !format )
      {
-        printf("About: The program collects statistics from BAM files. The output can be visualized using plot-bamstats.\n");
-        printf("Usage: samtools stats [OPTIONS] file.bam\n");
-        printf("       samtools stats [OPTIONS] file.bam chr:from-to\n");
-        printf("Options:\n");
-        printf("    -c, --coverage <int>,<int>,<int>    Coverage distribution min,max,step [1,1000,1]\n");
-        printf("    -d, --remove-dups                   Exclude from statistics reads marked as duplicates\n");
-        printf("    -f, --required-flag  <str|int>      Required flag, 0 for unset. See also `samtools flags` [0]\n");
-        printf("    -F, --filtering-flag <str|int>      Filtering flag, 0 for unset. See also `samtools flags` [0]\n");
-        printf("        --GC-depth <float>              the size of GC-depth bins (decreasing bin size increases memory requirement) [2e4]\n");
-        printf("    -h, --help                          This help message\n");
-        printf("    -i, --insert-size <int>             Maximum insert size [8000]\n");
-        printf("    -I, --id <string>                   Include only listed read group or sample name\n");
-        printf("    -l, --read-length <int>             Include in the statistics only reads with the given read length []\n");
-        printf("    -m, --most-inserts <float>          Report only the main part of inserts [0.99]\n");
-        printf("    -P, --split-prefix <str>            Path or string prefix for filepaths output by -S (default is input filename)\n");
-        printf("    -q, --trim-quality <int>            The BWA trimming parameter [0]\n");
-        printf("    -r, --ref-seq <file>                Reference sequence (required for GC-depth and mismatches-per-cycle calculation).\n");
-        printf("    -s, --sam                           Ignored (input format is auto-detected).\n");
-        printf("    -S, --split <tag>                   Also write statistics to separate files split by tagged field.\n");
-        printf("    -t, --target-regions <file>         Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive.\n");
-        printf("    -x, --sparse                        Suppress outputting IS rows where there are no insertions.\n");
-        sam_global_opt_help(stdout, "-.--.");
-        printf("\n");
+        fprintf(pysam_stdout, "About: The program collects statistics from BAM files. The output can be visualized using plot-bamstats.\n");
+        fprintf(pysam_stdout, "Usage: samtools stats [OPTIONS] file.bam\n");
+        fprintf(pysam_stdout, "       samtools stats [OPTIONS] file.bam chr:from-to\n");
+        fprintf(pysam_stdout, "Options:\n");
+        fprintf(pysam_stdout, "    -c, --coverage <int>,<int>,<int>    Coverage distribution min,max,step [1,1000,1]\n");
+        fprintf(pysam_stdout, "    -d, --remove-dups                   Exclude from statistics reads marked as duplicates\n");
+        fprintf(pysam_stdout, "    -f, --required-flag  <str|int>      Required flag, 0 for unset. See also `samtools flags` [0]\n");
+        fprintf(pysam_stdout, "    -F, --filtering-flag <str|int>      Filtering flag, 0 for unset. See also `samtools flags` [0]\n");
+        fprintf(pysam_stdout, "        --GC-depth <float>              the size of GC-depth bins (decreasing bin size increases memory requirement) [2e4]\n");
+        fprintf(pysam_stdout, "    -h, --help                          This help message\n");
+        fprintf(pysam_stdout, "    -i, --insert-size <int>             Maximum insert size [8000]\n");
+        fprintf(pysam_stdout, "    -I, --id <string>                   Include only listed read group or sample name\n");
+        fprintf(pysam_stdout, "    -l, --read-length <int>             Include in the statistics only reads with the given read length []\n");
+        fprintf(pysam_stdout, "    -m, --most-inserts <float>          Report only the main part of inserts [0.99]\n");
+        fprintf(pysam_stdout, "    -P, --split-prefix <str>            Path or string prefix for filepaths output by -S (default is input filename)\n");
+        fprintf(pysam_stdout, "    -q, --trim-quality <int>            The BWA trimming parameter [0]\n");
+        fprintf(pysam_stdout, "    -r, --ref-seq <file>                Reference sequence (required for GC-depth and mismatches-per-cycle calculation).\n");
+        fprintf(pysam_stdout, "    -s, --sam                           Ignored (input format is auto-detected).\n");
+        fprintf(pysam_stdout, "    -S, --split <tag>                   Also write statistics to separate files split by tagged field.\n");
+        fprintf(pysam_stdout, "    -t, --target-regions <file>         Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive.\n");
+        fprintf(pysam_stdout, "    -x, --sparse                        Suppress outputting IS rows where there are no insertions.\n");
+        sam_global_opt_help(pysam_stdout, "-.--.");
+        fprintf(pysam_stdout, "\n");
      }
      else
      {
          va_list ap;
          va_start(ap, format);
-        vfprintf(pysamerr, format, ap);
+        vfprintf(pysam_stderr, format, ap);
          va_end(ap);
      }
      exit(1);
@@ -1708,13 +1710,13 @@ int main_stats(int argc, char *argv[])
          }
  
          if (ret < -1) {
-            fprintf(pysamerr, "Failure while decoding file\n");
+            fprintf(pysam_stderr, "Failure while decoding file\n");
              return 1;
          }
      }
  
      round_buffer_flush(all_stats, -1);
-    output_stats(stdout, all_stats, sparse);
+    output_stats(pysam_stdout, all_stats, sparse);
      if (info->split_tag)
          output_split_stats(split_hash, bam_fname, sparse);
  
diff --git a/samtools/stats_isize.c b/samtools/stats_isize.c

index e6b9dc1d7ea39d0ee4abef7a716ef5c03b341f19..3aa9c205f2c0ff11edfc51e93e6d176b13ac1b97 100644 (file)
--- a/samtools/stats_isize.c
+++ b/samtools/stats_isize.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include "stats_isize.h"
  #include <htslib/khash.h>
diff --git a/samtools/stats_isize.c.pysam.c b/samtools/stats_isize.c.pysam.c

index a25e4d7b0fe5001d370551625db650549564ccd5..6ae908886c57b23f2ea4ba96e72d0ffb68a7ccdd 100644 (file)
--- a/samtools/stats_isize.c.pysam.c
+++ b/samtools/stats_isize.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <stdio.h>
  #include "stats_isize.h"
  #include <htslib/khash.h>
@@ -94,7 +96,7 @@ static void sparse_set_f(isize_data_t data, int at, isize_insert_t field, uint64
              kh_value(h, it) = rec;
              a->max = max(at, a->max);
          } else {
-            fprintf(pysamerr, "%s\n", "Failed to allocate memory for isize_sparse_record_t");
+            fprintf(pysam_stderr, "%s\n", "Failed to allocate memory for isize_sparse_record_t");
              exit(11);
          }
      } else {
diff --git a/samtools/test/merge/test_bam_translate.c b/samtools/test/merge/test_bam_translate.c

index 854779be5eeffbb5ac8e93576c1710437dc66227..6ed561e4f1bdd7087091c000c303693ce7dac99b 100644 (file)
--- a/samtools/test/merge/test_bam_translate.c
+++ b/samtools/test/merge/test_bam_translate.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_sort.c"
  #include "../test.h"
  #include <stdio.h>
diff --git a/samtools/test/merge/test_bam_translate.c.pysam.c b/samtools/test/merge/test_bam_translate.c.pysam.c

index d11fbf88ac3f9b9448eb6d975c80f3cd449c1c67..193954d5b219c0f36fe437bef47aff5525363d85 100644 (file)
--- a/samtools/test/merge/test_bam_translate.c.pysam.c
+++ b/samtools/test/merge/test_bam_translate.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_sort.c"
  #include "../test.h"
  #include <stdio.h>
@@ -33,40 +35,40 @@ DEALINGS IN THE SOFTWARE.  */
  #include <unistd.h>
  
  void dump_read(bam1_t* b) {
-    printf("->core.tid:(%d)\n", b->core.tid);
-    printf("->core.pos:(%d)\n", b->core.pos);
-    printf("->core.bin:(%d)\n", b->core.bin);
-    printf("->core.qual:(%d)\n", b->core.qual);
-    printf("->core.l_qname:(%d)\n", b->core.l_qname);
-    printf("->core.flag:(%d)\n", b->core.flag);
-    printf("->core.n_cigar:(%d)\n", b->core.n_cigar);
-    printf("->core.l_qseq:(%d)\n", b->core.l_qseq);
-    printf("->core.mtid:(%d)\n", b->core.mtid);
-    printf("->core.mpos:(%d)\n", b->core.mpos);
-    printf("->core.isize:(%d)\n", b->core.isize);
+    fprintf(pysam_stdout, "->core.tid:(%d)\n", b->core.tid);
+    fprintf(pysam_stdout, "->core.pos:(%d)\n", b->core.pos);
+    fprintf(pysam_stdout, "->core.bin:(%d)\n", b->core.bin);
+    fprintf(pysam_stdout, "->core.qual:(%d)\n", b->core.qual);
+    fprintf(pysam_stdout, "->core.l_qname:(%d)\n", b->core.l_qname);
+    fprintf(pysam_stdout, "->core.flag:(%d)\n", b->core.flag);
+    fprintf(pysam_stdout, "->core.n_cigar:(%d)\n", b->core.n_cigar);
+    fprintf(pysam_stdout, "->core.l_qseq:(%d)\n", b->core.l_qseq);
+    fprintf(pysam_stdout, "->core.mtid:(%d)\n", b->core.mtid);
+    fprintf(pysam_stdout, "->core.mpos:(%d)\n", b->core.mpos);
+    fprintf(pysam_stdout, "->core.isize:(%d)\n", b->core.isize);
      if (b->data) {
-        printf("->data:");
+        fprintf(pysam_stdout, "->data:");
          int i;
          for (i = 0; i < b->l_data; ++i) {
-            printf("%x ", b->data[i]);
+            fprintf(pysam_stdout, "%x ", b->data[i]);
          }
-        printf("\n");
+        fprintf(pysam_stdout, "\n");
      }
      if (b->core.l_qname) {
-        printf("qname: %s\n",bam_get_qname(b));
+        fprintf(pysam_stdout, "qname: %s\n",bam_get_qname(b));
      }
      if (b->core.l_qseq) {
-        printf("qseq:");
+        fprintf(pysam_stdout, "qseq:");
          int i;
          for (i = 0; i < b->core.l_qseq; ++i) {
-            printf("%c",seq_nt16_str[seq_nt16_table[bam_seqi(bam_get_seq(b),i)]]);
+            fprintf(pysam_stdout, "%c",seq_nt16_str[seq_nt16_table[bam_seqi(bam_get_seq(b),i)]]);
          }
-        printf("\n");
-        printf("qual:");
+        fprintf(pysam_stdout, "\n");
+        fprintf(pysam_stdout, "qual:");
          for (i = 0; i < b->core.l_qseq; ++i) {
-            printf("%c",bam_get_qual(b)[i]);
+            fprintf(pysam_stdout, "%c",bam_get_qual(b)[i]);
          }
-        printf("\n");
+        fprintf(pysam_stdout, "\n");
  
      }
  
@@ -75,18 +77,18 @@ void dump_read(bam1_t* b) {
          uint8_t* aux = bam_get_aux(b);
  
          while (i < bam_get_l_aux(b)) {
-            printf("%.2s:%c:",aux+i,*(aux+i+2));
+            fprintf(pysam_stdout, "%.2s:%c:",aux+i,*(aux+i+2));
              i += 2;
              switch (*(aux+i)) {
                  case 'Z':
-                    while (*(aux+1+i) != '\0') { putc(*(aux+1+i), stdout); ++i; }
+                    while (*(aux+1+i) != '\0') { putc(*(aux+1+i), pysam_stdout); ++i; }
                      break;
              }
-            putc('\n',stdout);
+            putc('\n',pysam_stdout);
              ++i;++i;
          }
      }
-    printf("\n");
+    fprintf(pysam_stdout, "\n");
  }
  
  void trans_tbl_test_init(trans_tbl_t* tbl, int32_t n_targets)
@@ -334,7 +336,7 @@ void setup_test_6(bam1_t** b_in, trans_tbl_t* tbl) {
  }
  
  
-int main(int argc, char**argv)
+int samtools_test_bam_translate_main(int argc, char**argv)
  {
      // test state
      const int NUM_TESTS = 6;
@@ -355,30 +357,30 @@ int main(int argc, char**argv)
  
      bam1_t* b;
  
-    // Setup pysamerr redirect
+    // Setup pysam_stderr redirect
      kstring_t res = { 0, 0, NULL };
-    FILE* orig_pysamerr = fdopen(dup(STDERR_FILENO), "a"); // Save pysamerr
+    FILE* orig_pysam_stderr = fdopen(dup(STDERR_FILENO), "a"); // Save pysam_stderr
      char* tempfname = (optind < argc)? argv[optind] : "test_bam_translate.tmp";
      FILE* check = NULL;
  
      // setup
-    if (verbose) printf("BEGIN test 1\n");  // TID test
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n");  // TID test
      trans_tbl_t tbl1;
      setup_test_1(&b,&tbl1);
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
      }
-    if (verbose) printf("RUN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
  
      // test
-    xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      bam_translate(b, &tbl1);
-    fclose(pysamerr);
+    fclose(pysam_stderr);
  
-    if (verbose) printf("END RUN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
      }
  
@@ -390,33 +392,33 @@ int main(int argc, char**argv)
          ++success;
      } else {
          ++failure;
-        if (verbose) printf("FAIL test 1\n");
+        if (verbose) fprintf(pysam_stdout, "FAIL test 1\n");
      }
      fclose(check);
  
      // teardown
      bam_destroy1(b);
      trans_tbl_destroy(&tbl1);
-    if (verbose) printf("END test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END test 1\n");
  
      // setup
-    if (verbose) printf("BEGIN test 2\n");  // RG exists and translate test
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 2\n");  // RG exists and translate test
      trans_tbl_t tbl2;
      setup_test_2(&b,&tbl2);
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
      }
-    if (verbose) printf("RUN test 2\n");
+    if (verbose) fprintf(pysam_stdout, "RUN test 2\n");
  
      // test
-    xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      bam_translate(b, &tbl2);
-    fclose(pysamerr);
+    fclose(pysam_stderr);
  
-    if (verbose) printf("END RUN test 2\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 2\n");
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
      }
  
@@ -428,33 +430,33 @@ int main(int argc, char**argv)
          ++success;
      } else {
          ++failure;
-        if (verbose) printf("FAIL test 2\n");
+        if (verbose) fprintf(pysam_stdout, "FAIL test 2\n");
      }
      fclose(check);
  
      // teardown
      bam_destroy1(b);
      trans_tbl_destroy(&tbl2);
-    if (verbose) printf("END test 2\n");
+    if (verbose) fprintf(pysam_stdout, "END test 2\n");
  
-    if (verbose) printf("BEGIN test 3\n");  // PG exists and translate  test
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 3\n");  // PG exists and translate  test
      // setup
      trans_tbl_t tbl3;
      setup_test_3(&b,&tbl3);
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
      }
-    if (verbose) printf("RUN test 3\n");
+    if (verbose) fprintf(pysam_stdout, "RUN test 3\n");
  
      // test
-    xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      bam_translate(b, &tbl3);
-    fclose(pysamerr);
+    fclose(pysam_stderr);
  
-    if (verbose) printf("END RUN test 3\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 3\n");
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
      }
  
@@ -466,33 +468,33 @@ int main(int argc, char**argv)
          ++success;
      } else {
          ++failure;
-        if (verbose) printf("FAIL test 3\n");
+        if (verbose) fprintf(pysam_stdout, "FAIL test 3\n");
      }
      fclose(check);
  
      // teardown
      bam_destroy1(b);
      trans_tbl_destroy(&tbl3);
-    if (verbose) printf("END test 3\n");
+    if (verbose) fprintf(pysam_stdout, "END test 3\n");
  
-    if (verbose) printf("BEGIN test 4\n");  // RG test non-existent
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 4\n");  // RG test non-existent
      // setup
      trans_tbl_t tbl4;
      setup_test_4(&b,&tbl4);
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
      }
-    if (verbose) printf("RUN test 4\n");
+    if (verbose) fprintf(pysam_stdout, "RUN test 4\n");
  
      // test
-    xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      bam_translate(b, &tbl4);
-    fclose(pysamerr);
+    fclose(pysam_stderr);
  
-    if (verbose) printf("END RUN test 4\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 4\n");
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
      }
      // check result
@@ -503,32 +505,32 @@ int main(int argc, char**argv)
          ++success;
      } else {
          ++failure;
-        if (verbose) printf("FAIL test 4\n");
+        if (verbose) fprintf(pysam_stdout, "FAIL test 4\n");
      }
      fclose(check);
  
      // teardown
      bam_destroy1(b);
      trans_tbl_destroy(&tbl4);
-    if (verbose) printf("END test 4\n");
+    if (verbose) fprintf(pysam_stdout, "END test 4\n");
  
-    if (verbose) printf("BEGIN test 5\n");  // PG test non-existent
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 5\n");  // PG test non-existent
      // setup
      trans_tbl_t tbl5;
      setup_test_5(&b,&tbl5);
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
-        printf("RUN test 5\n");
+        fprintf(pysam_stdout, "RUN test 5\n");
      }
      // test
-    xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      bam_translate(b, &tbl5);
-    fclose(pysamerr);
+    fclose(pysam_stderr);
  
-    if (verbose) printf("END RUN test 5\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 5\n");
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
      }
  
@@ -540,33 +542,33 @@ int main(int argc, char**argv)
          ++success;
      } else {
          ++failure;
-        if (verbose) printf("FAIL test 5\n");
+        if (verbose) fprintf(pysam_stdout, "FAIL test 5\n");
      }
      fclose(check);
  
      // teardown
      bam_destroy1(b);
      trans_tbl_destroy(&tbl5);
-    if (verbose) printf("END test 5\n");
+    if (verbose) fprintf(pysam_stdout, "END test 5\n");
  
-    if (verbose) printf("BEGIN test 6\n");  // RG and PG exists and translate test
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 6\n");  // RG and PG exists and translate test
      // setup
      trans_tbl_t tbl6;
      setup_test_6(&b,&tbl6);
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
      }
-    if (verbose) printf("RUN test 6\n");
+    if (verbose) fprintf(pysam_stdout, "RUN test 6\n");
  
      // test
-    xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      bam_translate(b, &tbl6);
-    fclose(pysamerr);
+    fclose(pysam_stderr);
  
-    if (verbose) printf("END RUN test 6\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 6\n");
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_read(b);
      }
  
@@ -578,21 +580,21 @@ int main(int argc, char**argv)
          ++success;
      } else {
          ++failure;
-        if (verbose) printf("FAIL test 6\n");
+        if (verbose) fprintf(pysam_stdout, "FAIL test 6\n");
      }
      fclose(check);
  
      // teardown
      bam_destroy1(b);
      trans_tbl_destroy(&tbl6);
-    if (verbose) printf("END test 6\n");
+    if (verbose) fprintf(pysam_stdout, "END test 6\n");
  
      // Cleanup
      free(res.s);
      remove(tempfname);
      if (failure > 0)
-        fprintf(orig_pysamerr, "%d failures %d successes\n", failure, success);
-    fclose(orig_pysamerr);
+        fprintf(orig_pysam_stderr, "%d failures %d successes\n", failure, success);
+    fclose(orig_pysam_stderr);
  
      return (success == NUM_TESTS)? EXIT_SUCCESS : EXIT_FAILURE;
  }
diff --git a/samtools/test/merge/test_rtrans_build.c b/samtools/test/merge/test_rtrans_build.c

index df50921598139445679e094f050d9666fb15c576..0f23b48d5739b93bdda37d66e7decc7d28932e78 100644 (file)
--- a/samtools/test/merge/test_rtrans_build.c
+++ b/samtools/test/merge/test_rtrans_build.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_sort.c"
  
  void dump_rtrans(int* rtrans, int n, int n_targets) {
diff --git a/samtools/test/merge/test_rtrans_build.c.pysam.c b/samtools/test/merge/test_rtrans_build.c.pysam.c

index fcbc4585b43d2ebfb9bb29fc022bb7c2da1022f3..0ac136762b1449c76f7dbbf5b757346c7906856c 100644 (file)
--- a/samtools/test/merge/test_rtrans_build.c.pysam.c
+++ b/samtools/test/merge/test_rtrans_build.c.pysam.c
@@ -24,16 +24,18 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_sort.c"
  
  void dump_rtrans(int* rtrans, int n, int n_targets) {
-    printf("->n_targets:(%d)\n", n_targets);
+    fprintf(pysam_stdout, "->n_targets:(%d)\n", n_targets);
      int i, j;
      for (i = 0; i < n; ++i) {
-        fprintf(pysamerr, "%d",rtrans[i*n_targets+0]);
+        fprintf(pysam_stderr, "%d",rtrans[i*n_targets+0]);
          for (j = 1; j < n_targets; ++j)
-            fprintf(pysamerr, "\t%d",rtrans[i*n_targets+j]);
-        fprintf(pysamerr, "\n");
+            fprintf(pysam_stderr, "\t%d",rtrans[i*n_targets+j]);
+        fprintf(pysam_stderr, "\n");
      }
  }
  
@@ -62,7 +64,7 @@ bool check_test_1(trans_tbl_t* tbl, int* rtrans) {
  }
  
  
-int main(int argc, char**argv)
+int samtools_test_rtrans_build_main(int argc, char**argv)
  {
      const int NUM_TESTS = 1;
      int verbose = 0;
@@ -81,7 +83,7 @@ int main(int argc, char**argv)
      const long GIMMICK_SEED = 0x1234330e;
      srand48(GIMMICK_SEED);
  
-    if (verbose) printf("BEGIN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n");
      // setup
      trans_tbl_t tbl_1[2];
      int n_targets_1 = 3;
@@ -92,29 +94,29 @@ int main(int argc, char**argv)
      if (verbose > 1) {
          // dump_trans_tid
      }
-    if (verbose) printf("RUN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
      rtrans_1 = rtrans_build(n_1, n_targets_1, &tbl_1[0]);
-    if (verbose) printf("END RUN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
      if (verbose > 1) {
-        printf("rtrans\n");
+        fprintf(pysam_stdout, "rtrans\n");
          dump_rtrans(rtrans_1, n_1, n_targets_1);
      }
      if (check_test_1(&tbl_1[0], rtrans_1)) {
          ++success;
      } else {
          ++failure;
-        if (verbose) printf("FAIL test 1\n");
+        if (verbose) fprintf(pysam_stdout, "FAIL test 1\n");
      }
      // teardown
      trans_tbl_destroy(&tbl_1[0]);
      trans_tbl_destroy(&tbl_1[1]);
      free(rtrans_1);
-    if (verbose) printf("END test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END test 1\n");
  
      if (success == NUM_TESTS) {
          return 0;
      } else {
-        fprintf(pysamerr, "%d failures %d successes\n", failure, success);
+        fprintf(pysam_stderr, "%d failures %d successes\n", failure, success);
          return 1;
      }
  }
diff --git a/samtools/test/merge/test_trans_tbl_init.c b/samtools/test/merge/test_trans_tbl_init.c

index b1164a3aca73463758a0b5c5fc1a03ef7342122c..d557932fc72fd5db730855a9df238b160719ca0d 100644 (file)
--- a/samtools/test/merge/test_trans_tbl_init.c
+++ b/samtools/test/merge/test_trans_tbl_init.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_sort.c"
  #include <assert.h>
  #include <regex.h>
@@ -47,7 +49,7 @@ void dump_header(bam_hdr_t* hdr) {
  static int populate_merged_header(bam_hdr_t *hdr, merged_header_t *merged_hdr) {
      trans_tbl_t dummy;
      int res;
-    res = trans_tbl_init(merged_hdr, hdr, &dummy, 0, 0, NULL);
+    res = trans_tbl_init(merged_hdr, hdr, &dummy, 0, 0, 1, NULL);
      trans_tbl_destroy(&dummy);
      return res;
  }
@@ -359,7 +361,7 @@ int main(int argc, char**argv)
          dump_header(translate);
      }
      if (verbose) printf("RUN test 1\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_1, false, false, NULL);
+    trans_tbl_init(merged_hdr, translate, &tbl_1, false, false, true, NULL);
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
      if (verbose) printf("END RUN test 1\n");
@@ -396,7 +398,7 @@ int main(int argc, char**argv)
          dump_header(translate);
      }
      if (verbose) printf("RUN test 2\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_2, false, false, NULL);
+    trans_tbl_init(merged_hdr, translate, &tbl_2, false, false, true, NULL);
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
      if (verbose) printf("END RUN test 2\n");
@@ -432,7 +434,7 @@ int main(int argc, char**argv)
          dump_header(translate);
       }
      if (verbose) printf("RUN test 3\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_3, false, false, NULL);
+    trans_tbl_init(merged_hdr, translate, &tbl_3, false, false, true, NULL);
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
      if (verbose) printf("END RUN test 3\n");
@@ -468,7 +470,7 @@ int main(int argc, char**argv)
          dump_header(translate);
      }
      if (verbose) printf("RUN test 4\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_4, false, false, NULL);
+    trans_tbl_init(merged_hdr, translate, &tbl_4, false, false, true, NULL);
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
      if (verbose) printf("END RUN test 4\n");
@@ -505,7 +507,7 @@ int main(int argc, char**argv)
          dump_header(translate);
      }
      if (verbose) printf("RUN test 5\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_5, false, false, NULL);
+    trans_tbl_init(merged_hdr, translate, &tbl_5, false, false, true, NULL);
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
      if (verbose) printf("END RUN test 5\n");
@@ -541,7 +543,7 @@ int main(int argc, char**argv)
          dump_header(translate);
      }
      if (verbose) printf("RUN test 6\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_6, false, false, "filename");
+    trans_tbl_init(merged_hdr, translate, &tbl_6, false, false, true, "filename");
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
      if (verbose) printf("END RUN test 6\n");
diff --git a/samtools/test/merge/test_trans_tbl_init.c.pysam.c b/samtools/test/merge/test_trans_tbl_init.c.pysam.c

index 0f54989a6b6ca3dbc0fb42b067d4d776cf9410ef..af8af43375e0ffe3817d0c95fc7e6fe69583cf52 100644 (file)
--- a/samtools/test/merge/test_trans_tbl_init.c.pysam.c
+++ b/samtools/test/merge/test_trans_tbl_init.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_sort.c"
  #include <assert.h>
  #include <regex.h>
@@ -34,22 +36,22 @@ typedef struct refseq_info {
  } refseq_info_t;
  
  void dump_header(bam_hdr_t* hdr) {
-    printf("->n_targets:(%d)\n", hdr->n_targets);
+    fprintf(pysam_stdout, "->n_targets:(%d)\n", hdr->n_targets);
      int i;
      for (i = 0; i < hdr->n_targets; ++i) {
-        printf("->target_name[%d]:(%s)\n",i,hdr->target_name[i]);
-        printf("->target_len[%d]:(%d)\n",i,hdr->target_len[i]);
+        fprintf(pysam_stdout, "->target_name[%d]:(%s)\n",i,hdr->target_name[i]);
+        fprintf(pysam_stdout, "->target_len[%d]:(%d)\n",i,hdr->target_len[i]);
      }
  
-    printf("->text:(");
-    fwrite((void*)hdr->text, (size_t) hdr->l_text, 1, stdout);
-    printf(")\n");
+    fprintf(pysam_stdout, "->text:(");
+    fwrite((void*)hdr->text, (size_t) hdr->l_text, 1, pysam_stdout);
+    fprintf(pysam_stdout, ")\n");
  }
  
  static int populate_merged_header(bam_hdr_t *hdr, merged_header_t *merged_hdr) {
      trans_tbl_t dummy;
      int res;
-    res = trans_tbl_init(merged_hdr, hdr, &dummy, 0, 0, NULL);
+    res = trans_tbl_init(merged_hdr, hdr, &dummy, 0, 0, 1, NULL);
      trans_tbl_destroy(&dummy);
      return res;
  }
@@ -325,7 +327,7 @@ bool check_test_6(bam_hdr_t* translate, bam_hdr_t* out, trans_tbl_t* tbl) {
      return true;
  }
  
-int main(int argc, char**argv)
+int samtools_test_trans_tbl_init_main(int argc, char**argv)
  {
      const int NUM_TESTS = 6;
      int verbose = 0;
@@ -349,7 +351,7 @@ int main(int argc, char**argv)
      bam_hdr_t* out;
      bam_hdr_t* translate;
  
-    if (verbose) printf("BEGIN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n");
      // setup
      trans_tbl_t tbl_1;
      merged_header_t *merged_hdr = init_merged_header();
@@ -357,36 +359,36 @@ int main(int argc, char**argv)
      assert(translate);
      // test
      if (verbose > 1) {
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
      }
-    if (verbose) printf("RUN test 1\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_1, false, false, NULL);
+    if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
+    trans_tbl_init(merged_hdr, translate, &tbl_1, false, false, true, NULL);
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
-    if (verbose) printf("END RUN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
      if (verbose > 1) {
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
-        printf("out\n");
+        fprintf(pysam_stdout, "out\n");
          dump_header(out);
      }
      if (check_test_1(translate, out, &tbl_1)) {
-        if (verbose) printf("Test 1 : PASS\n");
+        if (verbose) fprintf(pysam_stdout, "Test 1 : PASS\n");
          ++success;
      } else {
-        if (verbose) printf("Test 1 : FAIL\n");
-        fprintf(pysamerr, "Test 1 : FAIL\n");
+        if (verbose) fprintf(pysam_stdout, "Test 1 : FAIL\n");
+        fprintf(pysam_stderr, "Test 1 : FAIL\n");
          ++failure;
      }
      // teardown
      bam_hdr_destroy(translate);
      bam_hdr_destroy(out);
      trans_tbl_destroy(&tbl_1);
-    if (verbose) printf("END test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END test 1\n");
  
      // test
-    if (verbose) printf("BEGIN test 2\n");
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 2\n");
      // reinit
      trans_tbl_t tbl_2;
  
@@ -394,108 +396,108 @@ int main(int argc, char**argv)
      translate = setup_test_2(merged_hdr);
      assert(translate);
      if (verbose > 1) {
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
      }
-    if (verbose) printf("RUN test 2\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_2, false, false, NULL);
+    if (verbose) fprintf(pysam_stdout, "RUN test 2\n");
+    trans_tbl_init(merged_hdr, translate, &tbl_2, false, false, true, NULL);
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
-    if (verbose) printf("END RUN test 2\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 2\n");
      if (verbose > 1) {
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
-        printf("out\n");
+        fprintf(pysam_stdout, "out\n");
          dump_header(out);
      }
      if (check_test_2(translate, out, &tbl_2)) {
-        if (verbose) printf("Test 2 : PASS\n");
+        if (verbose) fprintf(pysam_stdout, "Test 2 : PASS\n");
          ++success;
      } else {
-        if (verbose) printf("Test 2 : FAIL\n");
-        fprintf(pysamerr, "Test 2 : FAIL\n");
+        if (verbose) fprintf(pysam_stdout, "Test 2 : FAIL\n");
+        fprintf(pysam_stderr, "Test 2 : FAIL\n");
          ++failure;
      }
      // teardown
      bam_hdr_destroy(translate);
      bam_hdr_destroy(out);
      trans_tbl_destroy(&tbl_2);
-    if (verbose) printf("END test 2\n");
+    if (verbose) fprintf(pysam_stdout, "END test 2\n");
  
      // test
-    if (verbose) printf("BEGIN test 3\n");
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 3\n");
      // reinit
      trans_tbl_t tbl_3;
      merged_hdr = init_merged_header();
      translate = setup_test_3(merged_hdr);
      assert(translate);
      if (verbose > 1) {
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
       }
-    if (verbose) printf("RUN test 3\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_3, false, false, NULL);
+    if (verbose) fprintf(pysam_stdout, "RUN test 3\n");
+    trans_tbl_init(merged_hdr, translate, &tbl_3, false, false, true, NULL);
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
-    if (verbose) printf("END RUN test 3\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 3\n");
      if (verbose > 1) {
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
-        printf("out\n");
+        fprintf(pysam_stdout, "out\n");
          dump_header(out);
      }
      if (check_test_3(translate, out, &tbl_3)) {
-        if (verbose) printf("Test 3 : PASS\n");
+        if (verbose) fprintf(pysam_stdout, "Test 3 : PASS\n");
          ++success;
      } else {
-        if (verbose) printf("Test 3 : FAIL\n");
-        fprintf(pysamerr, "Test 3 : FAIL\n");
+        if (verbose) fprintf(pysam_stdout, "Test 3 : FAIL\n");
+        fprintf(pysam_stderr, "Test 3 : FAIL\n");
          ++failure;
      }
      // teardown
      bam_hdr_destroy(translate);
      bam_hdr_destroy(out);
      trans_tbl_destroy(&tbl_3);
-    if (verbose) printf("END test 3\n");
+    if (verbose) fprintf(pysam_stdout, "END test 3\n");
  
      // test
-    if (verbose) printf("BEGIN test 4\n");
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 4\n");
      // reinit
      trans_tbl_t tbl_4;
      merged_hdr = init_merged_header();
      translate = setup_test_4(merged_hdr);
      assert(translate);
      if (verbose > 1) {
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
      }
-    if (verbose) printf("RUN test 4\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_4, false, false, NULL);
+    if (verbose) fprintf(pysam_stdout, "RUN test 4\n");
+    trans_tbl_init(merged_hdr, translate, &tbl_4, false, false, true, NULL);
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
-    if (verbose) printf("END RUN test 4\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 4\n");
      if (verbose > 1) {
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
-        printf("out\n");
+        fprintf(pysam_stdout, "out\n");
          dump_header(out);
      }
      if (check_test_4(translate, out, &tbl_4)) {
-        if (verbose) printf("Test 4 : PASS\n");
+        if (verbose) fprintf(pysam_stdout, "Test 4 : PASS\n");
          ++success;
      } else {
-        if (verbose) printf("Test 4 : FAIL\n");
-        fprintf(pysamerr, "Test 4 : FAIL\n");
+        if (verbose) fprintf(pysam_stdout, "Test 4 : FAIL\n");
+        fprintf(pysam_stderr, "Test 4 : FAIL\n");
          ++failure;
      }
      // teardown
      bam_hdr_destroy(translate);
      bam_hdr_destroy(out);
      trans_tbl_destroy(&tbl_4);
-    if (verbose) printf("END test 4\n");
+    if (verbose) fprintf(pysam_stdout, "END test 4\n");
  
      // test
-    if (verbose) printf("BEGIN test 5\n");
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 5\n");
      // reinit
      trans_tbl_t tbl_5;
      merged_hdr = init_merged_header();
@@ -503,74 +505,74 @@ int main(int argc, char**argv)
      assert(translate);
      if (verbose > 1) {
  
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
      }
-    if (verbose) printf("RUN test 5\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_5, false, false, NULL);
+    if (verbose) fprintf(pysam_stdout, "RUN test 5\n");
+    trans_tbl_init(merged_hdr, translate, &tbl_5, false, false, true, NULL);
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
-    if (verbose) printf("END RUN test 5\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 5\n");
      if (verbose > 1) {
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
-        printf("out\n");
+        fprintf(pysam_stdout, "out\n");
          dump_header(out);
      }
      if (check_test_5(translate, out, &tbl_5)) {
-        if (verbose) printf("Test 5 : PASS\n");
+        if (verbose) fprintf(pysam_stdout, "Test 5 : PASS\n");
          ++success;
      } else {
-        if (verbose) printf("Test 5 : FAIL\n");
-        fprintf(pysamerr, "Test 5 : FAIL\n");
+        if (verbose) fprintf(pysam_stdout, "Test 5 : FAIL\n");
+        fprintf(pysam_stderr, "Test 5 : FAIL\n");
          ++failure;
      }
      // teardown
      bam_hdr_destroy(translate);
      bam_hdr_destroy(out);
      trans_tbl_destroy(&tbl_5);
-    if (verbose) printf("END test 5\n");
+    if (verbose) fprintf(pysam_stdout, "END test 5\n");
  
      // test
-    if (verbose) printf("BEGIN test 6\n");
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 6\n");
      // reinit
      trans_tbl_t tbl_6;
      merged_hdr = init_merged_header();
      translate = setup_test_6(merged_hdr);
      assert(translate);
      if (verbose > 1) {
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
      }
-    if (verbose) printf("RUN test 6\n");
-    trans_tbl_init(merged_hdr, translate, &tbl_6, false, false, "filename");
+    if (verbose) fprintf(pysam_stdout, "RUN test 6\n");
+    trans_tbl_init(merged_hdr, translate, &tbl_6, false, false, true, "filename");
      out = finish_merged_header(merged_hdr);
      free_merged_header(merged_hdr);
-    if (verbose) printf("END RUN test 6\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 6\n");
      if (verbose > 1) {
-        printf("translate\n");
+        fprintf(pysam_stdout, "translate\n");
          dump_header(translate);
-        printf("out\n");
+        fprintf(pysam_stdout, "out\n");
          dump_header(out);
      }
      if (check_test_6(translate, out, &tbl_6)) {
-        if (verbose) printf("Test 6 : PASS\n");
+        if (verbose) fprintf(pysam_stdout, "Test 6 : PASS\n");
          ++success;
      } else {
-        if (verbose) printf("Test 6 : FAIL\n");
-        fprintf(pysamerr, "Test 6 : FAIL\n");
+        if (verbose) fprintf(pysam_stdout, "Test 6 : FAIL\n");
+        fprintf(pysam_stderr, "Test 6 : FAIL\n");
          ++failure;
      }
      // teardown
      bam_hdr_destroy(translate);
      bam_hdr_destroy(out);
      trans_tbl_destroy(&tbl_6);
-    if (verbose) printf("END test 6\n");
+    if (verbose) fprintf(pysam_stdout, "END test 6\n");
  
      if (success == NUM_TESTS) {
          return 0;
      } else {
-        fprintf(pysamerr, "%d failures %d successes\n", failure, success);
+        fprintf(pysam_stderr, "%d failures %d successes\n", failure, success);
          return 1;
      }
  }
diff --git a/samtools/test/split/test_count_rg.c b/samtools/test/split/test_count_rg.c

index 97512a8776c27e63964cfcf612cd02bb6795564a..4038f97002d64cb5a54ba431d17118070ff179f5 100644 (file)
--- a/samtools/test/split/test_count_rg.c
+++ b/samtools/test/split/test_count_rg.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_split.c"
  #include "../test.h"
  #include <stdlib.h>
diff --git a/samtools/test/split/test_count_rg.c.pysam.c b/samtools/test/split/test_count_rg.c.pysam.c

index eda8abb82381ea09e6b9613dc00d1c7e0b056a94..25131a84f19cd40cb300c75332558a1885b88d8c 100644 (file)
--- a/samtools/test/split/test_count_rg.c.pysam.c
+++ b/samtools/test/split/test_count_rg.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_split.c"
  #include "../test.h"
  #include <stdlib.h>
@@ -40,7 +42,7 @@ void setup_test_1(bam_hdr_t** hdr_in)
      (*hdr_in)->l_text = strlen(test1);
  }
  
-int main(int argc, char**argv)
+int samtools_test_count_rg_main(int argc, char**argv)
  {
      // test state
      const int NUM_TESTS = 1;
@@ -55,7 +57,7 @@ int main(int argc, char**argv)
                  ++verbose;
                  break;
              default:
-                printf(
+                fprintf(pysam_stdout, 
                         "usage: test_count_rg [-v]\n\n"
                         " -v verbose output\n"
                         );
@@ -64,32 +66,32 @@ int main(int argc, char**argv)
      }
  
  
-    // Setup pysamerr redirect
+    // Setup pysam_stderr redirect
      kstring_t res = { 0, 0, NULL };
-    FILE* orig_pysamerr = fdopen(dup(STDERR_FILENO), "a"); // Save pysamerr
+    FILE* orig_pysam_stderr = fdopen(dup(STDERR_FILENO), "a"); // Save pysam_stderr
      char* tempfname = (optind < argc)? argv[optind] : "test_count_rg.tmp";
      FILE* check = NULL;
  
      // setup
-    if (verbose) printf("BEGIN test 1\n");  // TID test
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n");  // TID test
      bam_hdr_t* hdr1;
      size_t count;
      char** output;
      setup_test_1(&hdr1);
      if (verbose > 1) {
-        printf("hdr1\n");
+        fprintf(pysam_stdout, "hdr1\n");
          dump_hdr(hdr1);
      }
-    if (verbose) printf("RUN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
  
      // test
-    xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      bool result_1 = count_RG(hdr1, &count, &output);
-    fclose(pysamerr);
+    fclose(pysam_stderr);
  
-    if (verbose) printf("END RUN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
      if (verbose > 1) {
-        printf("b\n");
+        fprintf(pysam_stdout, "b\n");
          dump_hdr(hdr1);
      }
  
@@ -101,7 +103,7 @@ int main(int argc, char**argv)
          ++success;
      } else {
          ++failure;
-        if (verbose) printf("FAIL test 1\n");
+        if (verbose) fprintf(pysam_stdout, "FAIL test 1\n");
      }
      fclose(check);
  
@@ -112,14 +114,14 @@ int main(int argc, char**argv)
      }
      free(output);
      bam_hdr_destroy(hdr1);
-    if (verbose) printf("END test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END test 1\n");
  
      // Cleanup
      free(res.s);
      remove(tempfname);
      if (failure > 0)
-        fprintf(orig_pysamerr, "%d failures %d successes\n", failure, success);
-    fclose(orig_pysamerr);
+        fprintf(orig_pysam_stderr, "%d failures %d successes\n", failure, success);
+    fclose(orig_pysam_stderr);
  
      return (success == NUM_TESTS)? EXIT_SUCCESS : EXIT_FAILURE;
  }
diff --git a/samtools/test/split/test_expand_format_string.c b/samtools/test/split/test_expand_format_string.c

index ede7586f45d5c4345bc9b3a0a79c30e4ed01a75e..7c90b6293395bdff9f290333d0a003b53443e81b 100644 (file)
--- a/samtools/test/split/test_expand_format_string.c
+++ b/samtools/test/split/test_expand_format_string.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_split.c"
  #include "../test.h"
  #include <stdlib.h>
diff --git a/samtools/test/split/test_expand_format_string.c.pysam.c b/samtools/test/split/test_expand_format_string.c.pysam.c

index 94e7732c3f90e690556972ece7db184129d113c2..fe9a426c9bf9acefe1a4e4eed3ec36e3240a57c3 100644 (file)
--- a/samtools/test/split/test_expand_format_string.c.pysam.c
+++ b/samtools/test/split/test_expand_format_string.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_split.c"
  #include "../test.h"
  #include <stdlib.h>
@@ -40,7 +42,7 @@ void setup_test_1(bam_hdr_t** hdr_in)
      (*hdr_in)->l_text = strlen(test1);
  }
  
-int main(int argc, char**argv)
+int samtools_test_expand_format_string_main(int argc, char**argv)
  {
      // test state
      const int NUM_TESTS = 1;
@@ -55,7 +57,7 @@ int main(int argc, char**argv)
                  ++verbose;
                  break;
              default:
-                printf(
+                fprintf(pysam_stdout, 
                         "usage: test_expand_format_string [-v]\n\n"
                         " -v verbose output\n"
                         );
@@ -64,34 +66,34 @@ int main(int argc, char**argv)
      }
  
  
-    // Setup pysamerr redirect
+    // Setup pysam_stderr redirect
      kstring_t res = { 0, 0, NULL };
-    FILE* orig_pysamerr = fdopen(dup(STDERR_FILENO), "a"); // Save pysamerr
+    FILE* orig_pysam_stderr = fdopen(dup(STDERR_FILENO), "a"); // Save pysam_stderr
      char* tempfname = (optind < argc)? argv[optind] : "test_expand_format_string.tmp";
      FILE* check = NULL;
  
      // setup
-    if (verbose) printf("BEGIN test 1\n");  // default format string test
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n");  // default format string test
      const char* format_string_1 = "%*_%#.bam";
      const char* basename_1 = "basename";
      const char* rg_id_1 = "1#2.3";
      const int rg_idx_1 = 4;
      if (verbose > 1) {
-        printf("format_string:%s\n"
+        fprintf(pysam_stdout, "format_string:%s\n"
                 "basename:%s\n"
                 "rg_id:%s\n"
                 "rg_idx:%d\n", format_string_1, basename_1, rg_id_1, rg_idx_1);
      }
-    if (verbose) printf("RUN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
  
      // test
-    xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      char* output_1 = expand_format_string(format_string_1, basename_1, rg_id_1, rg_idx_1, NULL);
-    fclose(pysamerr);
+    fclose(pysam_stderr);
  
-    if (verbose) printf("END RUN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
      if (verbose > 1) {
-        printf("format_string:%s\n"
+        fprintf(pysam_stdout, "format_string:%s\n"
                 "basename:%s\n"
                 "rg_id:%s\n"
                 "rg_idx:%d\n", format_string_1, basename_1, rg_id_1, rg_idx_1);
@@ -106,20 +108,20 @@ int main(int argc, char**argv)
          ++success;
      } else {
          ++failure;
-        if (verbose) printf("FAIL test 1\n");
+        if (verbose) fprintf(pysam_stdout, "FAIL test 1\n");
      }
      fclose(check);
  
      // teardown
      free(output_1);
-    if (verbose) printf("END test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END test 1\n");
  
      // Cleanup test harness
      free(res.s);
      remove(tempfname);
      if (failure > 0)
-        fprintf(orig_pysamerr, "%d failures %d successes\n", failure, success);
-    fclose(orig_pysamerr);
+        fprintf(orig_pysam_stderr, "%d failures %d successes\n", failure, success);
+    fclose(orig_pysam_stderr);
  
      return (success == NUM_TESTS)? EXIT_SUCCESS : EXIT_FAILURE;
  }
diff --git a/samtools/test/split/test_filter_header_rg.c b/samtools/test/split/test_filter_header_rg.c

index f4e1266f7458493f18d77e98529aa168b09f0fb8..d9505d67e5ad1b385510b961bad1415bef670309 100644 (file)
--- a/samtools/test/split/test_filter_header_rg.c
+++ b/samtools/test/split/test_filter_header_rg.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_split.c"
  #include "../test.h"
  #include <unistd.h>
diff --git a/samtools/test/split/test_filter_header_rg.c.pysam.c b/samtools/test/split/test_filter_header_rg.c.pysam.c

index 4a5b6d570774a02242a8e0a2a084567e65d323d5..97b3573075af28b5645875700d13d77d919b1f32 100644 (file)
--- a/samtools/test/split/test_filter_header_rg.c.pysam.c
+++ b/samtools/test/split/test_filter_header_rg.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_split.c"
  #include "../test.h"
  #include <unistd.h>
@@ -73,7 +75,7 @@ bool check_test_2(const bam_hdr_t* hdr) {
      return true;
  }
  
-int main(int argc, char**argv)
+int samtools_test_filter_header_rg_main(int argc, char**argv)
  {
      // test state
      const int NUM_TESTS = 2;
@@ -88,7 +90,7 @@ int main(int argc, char**argv)
                  ++verbose;
                  break;
              default:
-                printf(
+                fprintf(pysam_stdout, 
                         "usage: test_filter_header_rg [-v]\n\n"
                         " -v verbose output\n"
                         );
@@ -97,31 +99,31 @@ int main(int argc, char**argv)
      }
  
  
-    // Setup pysamerr redirect
+    // Setup pysam_stderr redirect
      kstring_t res = { 0, 0, NULL };
-    FILE* orig_pysamerr = fdopen(dup(STDERR_FILENO), "a"); // Save pysamerr
+    FILE* orig_pysam_stderr = fdopen(dup(STDERR_FILENO), "a"); // Save pysam_stderr
      char* tempfname = (optind < argc)? argv[optind] : "test_count_rg.tmp";
      FILE* check = NULL;
  
      // setup
-    if (verbose) printf("BEGIN test 1\n");  // test eliminating a tag that isn't there
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n");  // test eliminating a tag that isn't there
      bam_hdr_t* hdr1;
      const char* id_to_keep_1 = "1#2.3";
      setup_test_1(&hdr1);
      if (verbose > 1) {
-        printf("hdr1\n");
+        fprintf(pysam_stdout, "hdr1\n");
          dump_hdr(hdr1);
      }
-    if (verbose) printf("RUN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
  
      // test
-    xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      bool result_1 = filter_header_rg(hdr1, id_to_keep_1);
-    fclose(pysamerr);
+    fclose(pysam_stderr);
  
-    if (verbose) printf("END RUN test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
      if (verbose > 1) {
-        printf("hdr1\n");
+        fprintf(pysam_stdout, "hdr1\n");
          dump_hdr(hdr1);
      }
  
@@ -135,32 +137,32 @@ int main(int argc, char**argv)
          ++success;
      } else {
          ++failure;
-        if (verbose) printf("FAIL test 1\n");
+        if (verbose) fprintf(pysam_stdout, "FAIL test 1\n");
      }
      fclose(check);
  
      // teardown
      bam_hdr_destroy(hdr1);
-    if (verbose) printf("END test 1\n");
+    if (verbose) fprintf(pysam_stdout, "END test 1\n");
  
-    if (verbose) printf("BEGIN test 2\n");  // test eliminating a tag that is there
+    if (verbose) fprintf(pysam_stdout, "BEGIN test 2\n");  // test eliminating a tag that is there
      bam_hdr_t* hdr2;
      const char* id_to_keep_2 = "fish";
      setup_test_2(&hdr2);
      if (verbose > 1) {
-        printf("hdr2\n");
+        fprintf(pysam_stdout, "hdr2\n");
          dump_hdr(hdr2);
      }
-    if (verbose) printf("RUN test 2\n");
+    if (verbose) fprintf(pysam_stdout, "RUN test 2\n");
  
      // test
-    xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      bool result_2 = filter_header_rg(hdr2, id_to_keep_2);
-    fclose(pysamerr);
+    fclose(pysam_stderr);
  
-    if (verbose) printf("END RUN test 2\n");
+    if (verbose) fprintf(pysam_stdout, "END RUN test 2\n");
      if (verbose > 1) {
-        printf("hdr2\n");
+        fprintf(pysam_stdout, "hdr2\n");
          dump_hdr(hdr2);
      }
  
@@ -174,21 +176,21 @@ int main(int argc, char**argv)
          ++success;
      } else {
          ++failure;
-        if (verbose) printf("FAIL test 2\n");
+        if (verbose) fprintf(pysam_stdout, "FAIL test 2\n");
      }
      fclose(check);
  
      // teardown
      bam_hdr_destroy(hdr2);
-    if (verbose) printf("END test 2\n");
+    if (verbose) fprintf(pysam_stdout, "END test 2\n");
  
  
      // Cleanup
      free(res.s);
      remove(tempfname);
      if (failure > 0)
-        fprintf(orig_pysamerr, "%d failures %d successes\n", failure, success);
-    fclose(orig_pysamerr);
+        fprintf(orig_pysam_stderr, "%d failures %d successes\n", failure, success);
+    fclose(orig_pysam_stderr);
  
      return (success == NUM_TESTS)? EXIT_SUCCESS : EXIT_FAILURE;
  }
diff --git a/samtools/test/split/test_parse_args.c b/samtools/test/split/test_parse_args.c

index 66c7c88e97e9d27d9f757118e2f2ff36fdc657a1..85a196ac83626487444733d081e3820db76efdde 100644 (file)
--- a/samtools/test/split/test_parse_args.c
+++ b/samtools/test/split/test_parse_args.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_split.c"
  #include "../test.h"
  #include <stdlib.h>
diff --git a/samtools/test/split/test_parse_args.c.pysam.c b/samtools/test/split/test_parse_args.c.pysam.c

index 608ec7c915857a0365eb01337add15687d16b813..2c3e749e1af56d153b4fe96b6e69cd20212febca 100644 (file)
--- a/samtools/test/split/test_parse_args.c.pysam.c
+++ b/samtools/test/split/test_parse_args.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_split.c"
  #include "../test.h"
  #include <stdlib.h>
@@ -65,7 +67,7 @@ bool check_test_2(const parsed_opts_t* opts) {
      return true;
  }
  
-int main(int argc, char**argv)
+int samtools_test_parse_args_main(int argc, char**argv)
  {
      // test state
      const int NUM_TESTS = 2;
@@ -80,7 +82,7 @@ int main(int argc, char**argv)
                  ++verbose;
                  break;
              default:
-                printf(
+                fprintf(pysam_stdout, 
                         "usage: test_parse_args [-v]\n\n"
                         " -v verbose output\n"
                         );
@@ -88,58 +90,58 @@ int main(int argc, char**argv)
          }
      }
  
-    // Setup stdout and pysamerr redirect
-    kstring_t res_stdout = { 0, 0, NULL };
-    kstring_t res_pysamerr = { 0, 0, NULL };
-    FILE* orig_stdout = fdopen(dup(STDOUT_FILENO), "a"); // Save pysamerr
-    FILE* orig_pysamerr = fdopen(dup(STDERR_FILENO), "a"); // Save pysamerr
-    char* tempfname_stdout = (optind < argc)? argv[optind] : "test_parse_args.tmp.o";
-    char* tempfname_pysamerr = (optind < argc)? argv[optind] : "test_parse_args.tmp.e";
-    FILE* check_stdout = NULL;
-    FILE* check_pysamerr = NULL;
+    // Setup pysam_stdout and pysam_stderr redirect
+    kstring_t res_pysam_stdout = { 0, 0, NULL };
+    kstring_t res_pysam_stderr = { 0, 0, NULL };
+    FILE* orig_pysam_stdout = fdopen(dup(STDOUT_FILENO), "a"); // Save pysam_stderr
+    FILE* orig_pysam_stderr = fdopen(dup(STDERR_FILENO), "a"); // Save pysam_stderr
+    char* tempfname_pysam_stdout = (optind < argc)? argv[optind] : "test_parse_args.tmp.o";
+    char* tempfname_pysam_stderr = (optind < argc)? argv[optind] : "test_parse_args.tmp.e";
+    FILE* check_pysam_stdout = NULL;
+    FILE* check_pysam_stderr = NULL;
  
      // Cleanup getopt
      optind = 1;
  
      // setup
-    if (verbose) fprintf(orig_stdout,"BEGIN test 1\n");  // test eliminating a tag that isn't there
+    if (verbose) fprintf(orig_pysam_stdout,"BEGIN test 1\n");  // test eliminating a tag that isn't there
      int argc_1;
      char** argv_1;
      setup_test_1(&argc_1, &argv_1);
      if (verbose > 1) {
-        fprintf(orig_stdout, "argc: %d\n", argc_1);
+        fprintf(orig_pysam_stdout, "argc: %d\n", argc_1);
      }
-    if (verbose) fprintf(orig_stdout,"RUN test 1\n");
+    if (verbose) fprintf(orig_pysam_stdout,"RUN test 1\n");
  
      // test
-    xfreopen(tempfname_stdout, "w", stdout); // Redirect stdout to pipe
-    xfreopen(tempfname_pysamerr, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname_pysam_stdout, "w", pysam_stdout); // Redirect pysam_stdout to pipe
+    xfreopen(tempfname_pysam_stderr, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      parsed_opts_t* result_1 = parse_args(argc_1, argv_1);
-    fclose(stdout);
-    fclose(pysamerr);
+    fclose(pysam_stdout);
+    fclose(pysam_stderr);
  
-    if (verbose) fprintf(orig_stdout, "END RUN test 1\n");
+    if (verbose) fprintf(orig_pysam_stdout, "END RUN test 1\n");
      if (verbose > 1) {
-        fprintf(orig_stdout, "argc: %d\n", argc_1);
+        fprintf(orig_pysam_stdout, "argc: %d\n", argc_1);
      }
  
      // check result
-    res_stdout.l = res_pysamerr.l = 0;
-    check_stdout = fopen(tempfname_stdout, "r");
-    check_pysamerr = fopen(tempfname_pysamerr, "r");
+    res_pysam_stdout.l = res_pysam_stderr.l = 0;
+    check_pysam_stdout = fopen(tempfname_pysam_stdout, "r");
+    check_pysam_stderr = fopen(tempfname_pysam_stderr, "r");
      if ( !result_1
-        && kgetline(&res_stdout, (kgets_func *)fgets, check_stdout) >= 0
-        && !feof(check_stdout)
-        && res_stdout.l > 0
-        && kgetline(&res_pysamerr, (kgets_func *)fgets, check_pysamerr) < 0
-        && (feof(check_pysamerr) || res_pysamerr.l == 0)) {
+        && kgetline(&res_pysam_stdout, (kgets_func *)fgets, check_pysam_stdout) >= 0
+        && !feof(check_pysam_stdout)
+        && res_pysam_stdout.l > 0
+        && kgetline(&res_pysam_stderr, (kgets_func *)fgets, check_pysam_stderr) < 0
+        && (feof(check_pysam_stderr) || res_pysam_stderr.l == 0)) {
          ++success;
      } else {
          ++failure;
-        if (verbose) fprintf(orig_stdout, "FAIL test 1\n");
+        if (verbose) fprintf(orig_pysam_stdout, "FAIL test 1\n");
      }
-    fclose(check_pysamerr);
-    fclose(check_stdout);
+    fclose(check_pysam_stderr);
+    fclose(check_pysam_stdout);
  
      // teardown
      cleanup_opts(result_1);
@@ -148,49 +150,49 @@ int main(int argc, char**argv)
          free(argv_1[i]);
      }
      free(argv_1);
-    if (verbose) fprintf(orig_stdout, "END test 1\n");
+    if (verbose) fprintf(orig_pysam_stdout, "END test 1\n");
  
      // Cleanup getopt
      optind = 1;
  
-    if (verbose) fprintf(orig_stdout, "BEGIN test 2\n");  // test eliminating a tag that is there
+    if (verbose) fprintf(orig_pysam_stdout, "BEGIN test 2\n");  // test eliminating a tag that is there
      int argc_2;
      char** argv_2;
      setup_test_2(&argc_2, &argv_2);
      if (verbose > 1) {
-        fprintf(orig_stdout, "argc: %d\n", argc_2);
+        fprintf(orig_pysam_stdout, "argc: %d\n", argc_2);
      }
-    if (verbose) fprintf(orig_stdout, "RUN test 2\n");
+    if (verbose) fprintf(orig_pysam_stdout, "RUN test 2\n");
  
      // test
-    xfreopen(tempfname_stdout, "w", stdout); // Redirect stdout to pipe
-    xfreopen(tempfname_pysamerr, "w", pysamerr); // Redirect pysamerr to pipe
+    xfreopen(tempfname_pysam_stdout, "w", pysam_stdout); // Redirect pysam_stdout to pipe
+    xfreopen(tempfname_pysam_stderr, "w", pysam_stderr); // Redirect pysam_stderr to pipe
      parsed_opts_t* result_2 = parse_args(argc_2, argv_2);
-    fclose(stdout);
-    fclose(pysamerr);
+    fclose(pysam_stdout);
+    fclose(pysam_stderr);
  
-    if (verbose) fprintf(orig_stdout, "END RUN test 2\n");
+    if (verbose) fprintf(orig_pysam_stdout, "END RUN test 2\n");
      if (verbose > 1) {
-        fprintf(orig_stdout, "argc: %d\n", argc_2);
+        fprintf(orig_pysam_stdout, "argc: %d\n", argc_2);
      }
  
      // check result
-    res_stdout.l = res_pysamerr.l = 0;
-    check_stdout = fopen(tempfname_stdout, "r");
-    check_pysamerr = fopen(tempfname_pysamerr, "r");
+    res_pysam_stdout.l = res_pysam_stderr.l = 0;
+    check_pysam_stdout = fopen(tempfname_pysam_stdout, "r");
+    check_pysam_stderr = fopen(tempfname_pysam_stderr, "r");
      if ( result_2
          && check_test_2(result_2)
-        && kgetline(&res_stdout, (kgets_func *)fgets, check_stdout) < 0
-        && (feof(check_stdout) || res_stdout.l == 0)
-        && kgetline(&res_pysamerr, (kgets_func *)fgets, check_pysamerr) < 0
-        && (feof(check_pysamerr) || res_pysamerr.l == 0)) {
+        && kgetline(&res_pysam_stdout, (kgets_func *)fgets, check_pysam_stdout) < 0
+        && (feof(check_pysam_stdout) || res_pysam_stdout.l == 0)
+        && kgetline(&res_pysam_stderr, (kgets_func *)fgets, check_pysam_stderr) < 0
+        && (feof(check_pysam_stderr) || res_pysam_stderr.l == 0)) {
          ++success;
      } else {
          ++failure;
-        if (verbose) fprintf(orig_stdout, "FAIL test 2\n");
+        if (verbose) fprintf(orig_pysam_stdout, "FAIL test 2\n");
      }
-    fclose(check_stdout);
-    fclose(check_pysamerr);
+    fclose(check_pysam_stdout);
+    fclose(check_pysam_stderr);
  
      // teardown
      cleanup_opts(result_2);
@@ -200,18 +202,18 @@ int main(int argc, char**argv)
      }
      free(argv_2);
  
-    if (verbose) fprintf(orig_stdout, "END test 2\n");
+    if (verbose) fprintf(orig_pysam_stdout, "END test 2\n");
  
  
      // Cleanup
-    free(res_stdout.s);
-    free(res_pysamerr.s);
-    remove(tempfname_stdout);
-    remove(tempfname_pysamerr);
-    fclose(orig_stdout);
+    free(res_pysam_stdout.s);
+    free(res_pysam_stderr.s);
+    remove(tempfname_pysam_stdout);
+    remove(tempfname_pysam_stderr);
+    fclose(orig_pysam_stdout);
      if (failure > 0)
-        fprintf(orig_pysamerr, "%d failures %d successes\n", failure, success);
-    fclose(orig_pysamerr);
+        fprintf(orig_pysam_stderr, "%d failures %d successes\n", failure, success);
+    fclose(orig_pysam_stderr);
  
      return (success == NUM_TESTS)? EXIT_SUCCESS : EXIT_FAILURE;
  }
diff --git a/samtools/test/test.c b/samtools/test/test.c

index ef1d1f9c5ac24cc81ef8e182656b649c8a0634e9..7ab38afbec54f8dc6918a87d8060184533d899d4 100644 (file)
--- a/samtools/test/test.c
+++ b/samtools/test/test.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <errno.h>
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/samtools/test/test.c.pysam.c b/samtools/test/test.c.pysam.c

index 735eb7b912b2225306f9c59ae13baa865b801a37..a8295b5610324b289d69cb5bca2b577f3e28317d 100644 (file)
--- a/samtools/test/test.c.pysam.c
+++ b/samtools/test/test.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include <errno.h>
  #include <stdio.h>
  #include <stdlib.h>
@@ -35,7 +37,7 @@ DEALINGS IN THE SOFTWARE.  */
  void xfreopen(const char *path, const char *mode, FILE *stream)
  {
      if (freopen(path, mode, stream) == NULL) {
-        fprintf(pysamerr, __FILE__": error reopening %s: %s\n",
+        fprintf(pysam_stderr, __FILE__": error reopening %s: %s\n",
                  path, strerror(errno));
          exit(2);
      }
@@ -43,13 +45,13 @@ void xfreopen(const char *path, const char *mode, FILE *stream)
  
  void dump_hdr(const bam_hdr_t* hdr)
  {
-    printf("n_targets: %d\n", hdr->n_targets);
-    printf("ignore_sam_err: %d\n", hdr->ignore_sam_err);
-    printf("l_text: %u\n", hdr->l_text);
-    printf("idx\ttarget_len\ttarget_name:\n");
+    fprintf(pysam_stdout, "n_targets: %d\n", hdr->n_targets);
+    fprintf(pysam_stdout, "ignore_sam_err: %d\n", hdr->ignore_sam_err);
+    fprintf(pysam_stdout, "l_text: %u\n", hdr->l_text);
+    fprintf(pysam_stdout, "idx\ttarget_len\ttarget_name:\n");
      int32_t target;
      for (target = 0; target < hdr->n_targets; ++target) {
-        printf("%d\t%u\t\"%s\"\n", target, hdr->target_len[target], hdr->target_name[target]);
+        fprintf(pysam_stdout, "%d\t%u\t\"%s\"\n", target, hdr->target_len[target], hdr->target_name[target]);
      }
-    printf("text: \"%s\"\n", hdr->text);
+    fprintf(pysam_stdout, "text: \"%s\"\n", hdr->text);
  }
diff --git a/samtools/test/tview/test_get_rg_sample.c b/samtools/test/tview/test_get_rg_sample.c

index c22ba9da1b1c47bc151e15e421f320b1a25ffa44..3db9da2a223fe62ab12d4a02928b4fe22f5ee90a 100644 (file)
--- a/samtools/test/tview/test_get_rg_sample.c
+++ b/samtools/test/tview/test_get_rg_sample.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_tview.c"
  #include <stdbool.h>
  
diff --git a/samtools/test/tview/test_get_rg_sample.c.pysam.c b/samtools/test/tview/test_get_rg_sample.c.pysam.c

index 99a217fef456ca220ffc39b771350483a2e353a4..8c441f9434c996c30e88ba0f2ac37ba74db37038 100644 (file)
--- a/samtools/test/tview/test_get_rg_sample.c.pysam.c
+++ b/samtools/test/tview/test_get_rg_sample.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.  */
  
+#include <config.h>
+
  #include "../../bam_tview.c"
  #include <stdbool.h>
  
@@ -59,7 +61,7 @@ void teardown_1(khash_t(kh_rg)* test_result, char* header)
      free(header);
  }
  
-int main(int argc, char** argv)
+int samtools_test_get_rg_sample_main(int argc, char** argv)
  {
      const int NUM_TESTS = 1;
      int success = 0;
@@ -77,7 +79,7 @@ int main(int argc, char** argv)
      if (success == NUM_TESTS) {
          return 0;
      } else {
-        fprintf(pysamerr, "%d failures %d successes\n", failure, success);
+        fprintf(pysam_stderr, "%d failures %d successes\n", failure, success);
          return 1;
      }
  }
diff --git a/samtools/version.h b/samtools/version.h

index abe052ce2f648b703111ae131b6b7ce0b7dca1bf..ec46e67f1fc3deda3995f021d914c83a51f3be4f 100644 (file)
--- a/samtools/version.h
+++ b/samtools/version.h
@@ -1 +1 @@
-#define SAMTOOLS_VERSION "1.3"
+#define SAMTOOLS_VERSION "1.3.1"
diff --git a/setup.py b/setup.py

index 7b59b6983ac96082074467f555c626b18a7fb85b..080bc247e8c52bfe5b8d02059af209abf95cc333 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@ This module provides a low-level wrapper around the htslib C-API as
  using cython and a high-level API for convenient access to the data
  within standard genomic file formats.
  
-The current version wraps htslib-1.3, samtools-1.3 and bcftools-1.3.
+The current version wraps htslib-1.3.1, samtools-1.3.1 and bcftools-1.3.1.
  
  See:
  http://www.htslib.org
@@ -155,7 +155,7 @@ if HTSLIB_MODE in ['shared', 'separate']:
              outf.write(
                  "/* empty config.h created by pysam */\n")
              outf.write(
-                "/* conservative compilation options */")
+                "/* conservative compilation options */\n")
  
  if HTSLIB_LIBRARY_DIR:
      # linking against a shared, externally installed htslib version, no
@@ -259,6 +259,16 @@ if HTSLIB_SOURCE == "builtin":
                 "adding shared libcurl and libcrypto")
          external_htslib_libraries.extend(["curl", "crypto"])
  
+# create empty config.h files if they have not been created automatically
+# or created by the user:
+for fn in "samtools/config.h", "htslib/config.h":
+    if not os.path.exists(fn):
+        with open(fn, "w") as outf:
+            outf.write(
+                "/* empty config.h created by pysam */\n")
+            outf.write(
+                "/* conservative compilation options */\n")
+
  parts = ["samtools",
           "bcftools",
           "htslib",
@@ -271,15 +281,6 @@ parts = ["samtools",
           "vcf",
           "bcf"]
  
-# remove existing files to recompute
-# necessary to be both compatible for python 2.7 and 3.3
-if IS_PYTHON3:
-    for part in parts:
-        try:
-            os.unlink("pysam/c%s.c" % part)
-        except:
-            pass
-
  # Exit if there are no pre-compiled files and no cython available
  fn = source_pattern % "htslib"
  if not os.path.exists(fn):
@@ -449,7 +450,7 @@ ctabixproxies = Extension(
      "pysam.ctabixproxies",
      [source_pattern % "tabixproxies"] +
      os_c_files,
-    library_dirs=[],
+    library_dirs=htslib_library_dirs,
      include_dirs=include_os,
      libraries=external_htslib_libraries + internal_htslib_libraries,
      language="c",
@@ -461,7 +462,7 @@ cvcf = Extension(
      "pysam.cvcf",
      [source_pattern % "vcf"] +
      os_c_files,
-    library_dirs=[],
+    library_dirs=htslib_library_dirs,
      include_dirs=["htslib", "."] + include_os + htslib_include_dirs,
      libraries=external_htslib_libraries + internal_htslib_libraries,
      language="c",
diff --git a/tests/AlignedSegment_test.py b/tests/AlignedSegment_test.py

index 5995faa42e7c31071a0021a575ab329759e55042..94b2eb361b08471c2f8e67d910e36125e524e20a 100644 (file)
--- a/tests/AlignedSegment_test.py
+++ b/tests/AlignedSegment_test.py
@@ -3,6 +3,7 @@ import pysam
  import unittest
  import collections
  import copy
+import array
  
  from TestUtils import checkFieldEqual
  
@@ -319,7 +320,7 @@ class TestAlignedSegment(ReadTest):
               (None, 25, 'T'), (None, 26, 'T'),
               (5, 27, 'A'), (6, 28, 'A'), (7, 29, 'A'), (8, 30, 'A')]
              )
-        
+
          a.cigarstring = "5M2D2I2M"
          a.set_tag("MD", "4C^TT2")
          self.assertEqual(
@@ -331,6 +332,34 @@ class TestAlignedSegment(ReadTest):
               (7, 27, 'A'), (8, 28, 'A')]
              )
  
+    def test_get_aligned_pairs_skip_reference(self):
+        a = self.buildRead()
+        a.query_sequence = "A" * 10
+        a.cigarstring = "5M1N5M"
+        a.set_tag("MD", "10")
+
+        self.assertEqual(
+            a.get_aligned_pairs(with_seq=True),
+            [(0, 20, 'A'), (1, 21, 'A'), (2, 22, 'A'),
+             (3, 23, 'A'), (4, 24, 'A'), (None, 25, None),
+             (5, 26, 'A'), (6, 27, 'A'), (7, 28, 'A'),
+             (8, 29, 'A'), (9, 30, 'A')])
+
+        self.assertEqual(
+            a.get_aligned_pairs(with_seq=False),
+            [(0, 20), (1, 21), (2, 22),
+             (3, 23), (4, 24), (None, 25),
+             (5, 26), (6, 27), (7, 28),
+             (8, 29), (9, 30)])
+
+        self.assertEqual(
+            a.get_aligned_pairs(matches_only=True, with_seq=False),
+            [(0, 20), (1, 21),
+             (2, 22), (3, 23),
+             (4, 24), (5, 26),
+             (6, 27), (7, 28),
+             (8, 29), (9, 30)])
+
      def testNoSequence(self):
          '''issue 176: retrieving length without query sequence
          with soft-clipping.
@@ -347,13 +376,60 @@ class TestAlignedSegment(ReadTest):
          self.assertEqual(a.query_alignment_length, 20)
  
  
+class TestCigarStats(ReadTest):
+    
+    def testStats(self):
+        
+        a = self.buildRead()
+
+        a.cigarstring = None
+        self.assertEqual(
+            [list(x) for x in a.get_cigar_stats()],
+            [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+             [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
+
+        a.cigarstring = "10M"
+        self.assertEqual(
+            [list(x) for x in a.get_cigar_stats()],
+            [[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
+
+        a.cigarstring = "10M2I2M"
+        self.assertEqual(
+            [list(x) for x in a.get_cigar_stats()],
+            [[12, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+             [2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
+
+        for i, x in enumerate("MIDNSHP=X"):
+            a.cigarstring = "2{}".format(x)
+            expected = [[0] * 11, [0] * 11]
+            expected[0][i] = 2
+            expected[1][i] = 1
+            self.assertEqual(
+                [list(x) for x in a.get_cigar_stats()],
+                expected)
+
+        a.cigarstring = "10M"
+        a.set_tag("NM", 5)
+        self.assertEqual(
+            [list(x) for x in a.get_cigar_stats()],
+            [[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
+             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
+
+        a.cigarstring = None
+        self.assertEqual(
+            [list(x) for x in a.get_cigar_stats()],
+            [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
+             [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
+
+
  class TestAlignedPairs(unittest.TestCase):
      filename = os.path.join(DATADIR, "example_aligned_pairs.bam")
  
      def testReferenceBases(self):
          """reference bases should always be the same nucleotide
          """
-        reference_bases = collections.defaultdict(list)        
+        reference_bases = collections.defaultdict(list)
          with pysam.AlignmentFile(self.filename) as inf:
              for c in inf.pileup():
                  for r in c.pileups:
@@ -389,7 +465,25 @@ class TestTags(ReadTest):
          self.assertEqual(False, a.has_tag("NM"))
          # check if deleting a non-existing tag is fine
          a.set_tag("NM", None)
+        a.set_tag("NM", None)
  
+    def testArrayTags(self):
+        read = self.buildRead()
+        supported_dtypes = "bhBHf"
+        unsupported_dtypes = "lLd"
+
+        for dtype in supported_dtypes:
+            key = "F" + dtype
+            read.set_tag(key, array.array(dtype, range(10)))
+            ary = read.get_tag(key)
+
+        for dtype in unsupported_dtypes:
+            key = "F" + dtype
+            self.assertRaises(ValueError,
+                              read.set_tag,
+                              key,
+                              array.array(dtype, range(10)))
+        
      def testAddTagsType(self):
          a = self.buildRead()
          a.tags = None
@@ -551,6 +645,23 @@ class TestTags(ReadTest):
              "A" * 5 + "C" * 3 + "A" * 5,
              a.get_reference_sequence())
  
+    def testMDTagRefSkipping(self):
+        a = self.buildRead()
+
+        a.cigarstring = "5M1N5M"
+        a.query_sequence = "A" * 10
+        a.set_tag('MD', "10")
+        self.assertEqual(
+            "A" * 10,
+            a.get_reference_sequence())
+
+        a.cigarstring = "5M3N5M"
+        a.query_sequence = "A" * 10
+        a.set_tag('MD', "10")
+        self.assertEqual(
+            "A" * 10,
+            a.get_reference_sequence())
+
      def testMDTagSoftClipping(self):
          a = self.buildRead()
  
@@ -561,7 +672,7 @@ class TestTags(ReadTest):
          self.assertEqual(
              "A" * 5 + "C" + "A" * 5,
              a.get_reference_sequence())
-        
+
          # all together
          a.cigarstring = "5S5M1D5M1I5M5S"
          a.query_sequence = "G" * 5 + "A" * 16 + "G" * 5
@@ -579,7 +690,7 @@ class TestTags(ReadTest):
          self.assertEqual(
              "AAcAATCAAAAA",
              a.get_reference_sequence())
-        
+
          a.cigarstring = "5S5M2D1I5M5S"
          a.query_sequence = "G" * 5 + "A" * 11 + "G" * 5
          a.set_tag('MD', "2C2^TC5")
@@ -606,7 +717,7 @@ class TestTags(ReadTest):
  
  
  class TestCopy(ReadTest):
-    
+
      def testCopy(self):
          a = self.buildRead()
          b = copy.copy(a)
diff --git a/tests/AlignmentFile_test.py b/tests/AlignmentFile_test.py

index c03e23480aa2ebb9a6984cccb741475b72195d66..9a3372232e5e606ff2f27af07b60427df585a5a0 100644 (file)
--- a/tests/AlignmentFile_test.py
+++ b/tests/AlignmentFile_test.py
@@ -23,7 +23,7 @@ from functools import partial
  import pysam
  import pysam.samtools
  from TestUtils import checkBinaryEqual, checkURL, \
-    checkSamtoolsViewEqual, checkFieldEqual, force_str
+    check_samtools_view_equal, checkFieldEqual, force_str
  
  
  DATADIR = "pysam_data"
@@ -49,6 +49,9 @@ class BasicTestBAMFromFetch(unittest.TestCase):
              "rb")
          self.reads = list(self.samfile.fetch())
  
+    def tearDown(self):
+        self.samfile.close()
+
      def testARqname(self):
          self.assertEqual(
              self.reads[0].query_name,
@@ -261,9 +264,6 @@ class BasicTestBAMFromFetch(unittest.TestCase):
          self.assertEqual(self.reads[0].opt("XT"), "U")
          self.assertEqual(self.reads[1].opt("XT"), "R")
  
-    def tearDown(self):
-        self.samfile.close()
-
  
  class BasicTestSAMFromFetch(BasicTestBAMFromFetch):
  
@@ -426,42 +426,42 @@ class TestIO(unittest.TestCase):
          The *checkf* is used to determine if the files are
          equal.
          '''
-        infile = pysam.AlignmentFile(
-            os.path.join(DATADIR, input_filename),
-            input_mode)
-
-        if "b" in input_mode:
-            self.assertTrue(infile.is_bam)
-            self.assertFalse(infile.is_cram)
-        elif "c" in input_mode:
-            self.assertFalse(infile.is_bam)
-            self.assertTrue(infile.is_cram)
-        else:
-            self.assertFalse(infile.is_cram)
-            self.assertFalse(infile.is_bam)
-
-        if use_template:
-            outfile = pysam.AlignmentFile(
-                output_filename,
-                output_mode,
-                reference_filename=sequence_filename,
-                template=infile)
-        else:
-            outfile = pysam.AlignmentFile(
-                output_filename,
-                output_mode,
-                reference_names=infile.references,
-                reference_lengths=infile.lengths,
-                reference_filename=sequence_filename,
-                add_sq_text=False)
  
-        iter = infile.fetch()
+        with pysam.AlignmentFile(
+                os.path.join(DATADIR, input_filename),
+                input_mode) as infile:
+
+            if "b" in input_mode:
+                self.assertTrue(infile.is_bam)
+                self.assertFalse(infile.is_cram)
+            elif "c" in input_mode:
+                self.assertFalse(infile.is_bam)
+                self.assertTrue(infile.is_cram)
+            else:
+                self.assertFalse(infile.is_cram)
+                self.assertFalse(infile.is_bam)
+
+            if use_template:
+                outfile = pysam.AlignmentFile(
+                    output_filename,
+                    output_mode,
+                    reference_filename=sequence_filename,
+                    template=infile)
+            else:
+                outfile = pysam.AlignmentFile(
+                    output_filename,
+                    output_mode,
+                    reference_names=infile.references,
+                    reference_lengths=infile.lengths,
+                    reference_filename=sequence_filename,
+                    add_sq_text=False)
  
-        for x in iter:
-            outfile.write(x)
+            iter = infile.fetch()
  
-        infile.close()
-        outfile.close()
+            for x in iter:
+                outfile.write(x)
+
+            outfile.close()
  
          self.assertTrue(checkf(
              os.path.join(DATADIR, reference_filename),
@@ -490,7 +490,7 @@ class TestIO(unittest.TestCase):
                         "tmp_ex2.cram",
                         "rc", "wc",
                         sequence_filename="pysam_data/ex1.fa",
-                       checkf=checkSamtoolsViewEqual)
+                       checkf=check_samtools_view_equal)
  
      def testSAM2BAM(self):
          self.checkEcho("ex2.sam",
@@ -512,7 +512,7 @@ class TestIO(unittest.TestCase):
                         "rb", "wc",
                         sequence_filename="pysam_data/ex1.fa",
                         checkf=partial(
-                           checkSamtoolsViewEqual,
+                           check_samtools_view_equal,
                             without_header=True))
  
      def testCRAM2BAM(self):
@@ -523,7 +523,7 @@ class TestIO(unittest.TestCase):
                         "rc", "wb",
                         sequence_filename="pysam_data/ex1.fa",
                         checkf=partial(
-                           checkSamtoolsViewEqual,
+                           check_samtools_view_equal,
                             without_header=True))
  
      def testSAM2CRAM(self):
@@ -533,7 +533,7 @@ class TestIO(unittest.TestCase):
                         "r", "wc",
                         sequence_filename="pysam_data/ex1.fa",
                         checkf=partial(
-                           checkSamtoolsViewEqual,
+                           check_samtools_view_equal,
                             without_header=True))
  
      def testCRAM2SAM(self):
@@ -543,7 +543,7 @@ class TestIO(unittest.TestCase):
                         "rc", "wh",
                         sequence_filename="pysam_data/ex1.fa",
                         checkf=partial(
-                           checkSamtoolsViewEqual,
+                           check_samtools_view_equal,
                             without_header=True))
  
      # Disabled - should work, files are not binary equal, but are
@@ -858,12 +858,18 @@ class TestIteratorRowBAM(unittest.TestCase):
  
      filename = os.path.join(DATADIR, "ex2.bam")
      mode = "rb"
+    reference_filename = None
  
      def setUp(self):
          self.samfile = pysam.AlignmentFile(
-            self.filename, self.mode,
+            self.filename,
+            self.mode,
+            reference_filename=self.reference_filename,
          )
  
+    def tearDown(self):
+        self.samfile.close()
+
      def checkRange(self, rnge):
          '''compare results from iterator with those from samtools.'''
          ps = list(self.samfile.fetch(region=rnge))
@@ -911,9 +917,6 @@ class TestIteratorRowBAM(unittest.TestCase):
                  self.checkRange("%s:%i-%i" %
                                  (contig, start, start + 90))
  
-    def tearDown(self):
-        self.samfile.close()
-
  
  class TestIteratorRowAllBAM(unittest.TestCase):
  
@@ -1034,9 +1037,9 @@ class TestIteratorRowCRAM(TestIteratorRowBAM):
      mode = "rc"
  
  
-class TestIteratorRowCRAM(TestIteratorRowBAM):
-    filename = os.path.join(DATADIR, "ex2.cram")
-    mode = "rc"
+class TestIteratorRowCRAMWithReferenceFilename(TestIteratorRowCRAM):
+    reference_filename = os.path.join(DATADIR, "ex1.fa")
+
  
  ##########################################################
  ##########################################################
@@ -1840,40 +1843,54 @@ class TestBTagBam(TestBTagSam):
      filename = os.path.join(DATADIR, 'example_btag.bam')
  
  
-class TestDoubleFetch(unittest.TestCase):
-
+class TestDoubleFetchBAM(unittest.TestCase):
      '''check if two iterators on the same bamfile are independent.'''
  
      filename = os.path.join(DATADIR, 'ex1.bam')
+    mode = "rb"
  
      def testDoubleFetch(self):
  
-        samfile1 = pysam.AlignmentFile(self.filename, 'rb')
-
-        for a, b in zip(samfile1.fetch(multiple_iterators=True),
-                        samfile1.fetch(multiple_iterators=True)):
-            self.assertEqual(a.compare(b), 0)
+        with pysam.AlignmentFile(self.filename, self.mode) as samfile1:
+            for a, b in zip(samfile1.fetch(multiple_iterators=True),
+                            samfile1.fetch(multiple_iterators=True)):
+                self.assertEqual(a.compare(b), 0)
  
      def testDoubleFetchWithRegion(self):
  
-        samfile1 = pysam.AlignmentFile(self.filename, 'rb')
-        chr, start, stop = 'chr1', 200, 3000000
-        # just making sure the test has something to catch
-        self.assertTrue(len(list(samfile1.fetch(chr, start, stop))) > 0)
+        with pysam.AlignmentFile(self.filename, self.mode) as samfile1:
+            contig, start, stop = 'chr1', 200, 3000000
+            # just making sure the test has something to catch
+            self.assertTrue(len(list(samfile1.fetch(contig, start, stop))) > 0)
  
-        for a, b in zip(samfile1.fetch(chr, start, stop),
-                        samfile1.fetch(chr, start, stop,
-                                       multiple_iterators=True)):
-            self.assertEqual(a.compare(b), 0)
+            # see Issue #293
+            # The following fails for CRAM files, but works for BAM
+            # files when the first is multiple_iterators=False:
+            for a, b in zip(samfile1.fetch(contig, start, stop,
+                                           multiple_iterators=True),
+                            samfile1.fetch(contig, start, stop,
+                                           multiple_iterators=True)):
+                self.assertEqual(a.compare(b), 0)
  
      def testDoubleFetchUntilEOF(self):
  
-        samfile1 = pysam.AlignmentFile(self.filename, 'rb')
+        with pysam.AlignmentFile(self.filename, self.mode) as samfile1:
+
+            for a, b in zip(samfile1.fetch(until_eof=True),
+                            samfile1.fetch(until_eof=True,
+                                           multiple_iterators=True)):
+                self.assertEqual(a.compare(b), 0)
+
+
+class TestDoubleFetchCRAM(TestDoubleFetchBAM):
+    filename = os.path.join(DATADIR, 'ex2.cram')
+    mode = "rc"
+
  
-        for a, b in zip(samfile1.fetch(until_eof=True),
-                        samfile1.fetch(until_eof=True,
-                                       multiple_iterators=True)):
-            self.assertEqual(a.compare(b), 0)
+class TestDoubleFetchCRAMWithReference(TestDoubleFetchBAM):
+    filename = os.path.join(DATADIR, 'ex2.cram')
+    mode = "rc"
+    reference_filename = os.path.join(DATADIR, 'ex1.fa')
  
  
  class TestRemoteFileFTP(unittest.TestCase):
@@ -1926,10 +1943,11 @@ class TestRemoteFileHTTP(unittest.TestCase):
          if not checkURL(self.url):
              return
  
-        samfile = pysam.AlignmentFile(self.url, "rb")
-        result = list(samfile.fetch(region=self.region))
-        samfile_local = pysam.AlignmentFile(self.local, "rb")
-        ref = list(samfile_local.fetch(region=self.region))
+        with pysam.AlignmentFile(self.url, "rb") as samfile:
+            result = list(samfile.fetch(region=self.region))
+
+        with pysam.AlignmentFile(self.local, "rb") as samfile_local:
+            ref = list(samfile_local.fetch(region=self.region))
  
          self.assertEqual(len(ref), len(result))
          for x, y in zip(result, ref):
@@ -1939,10 +1957,11 @@ class TestRemoteFileHTTP(unittest.TestCase):
          if not checkURL(self.url):
              return
  
-        samfile = pysam.AlignmentFile(self.url, "rb")
-        result = list(samfile.fetch())
-        samfile_local = pysam.AlignmentFile(self.local, "rb")
-        ref = list(samfile_local.fetch())
+        with pysam.AlignmentFile(self.url, "rb") as samfile:
+            result = list(samfile.fetch())
+
+        with pysam.AlignmentFile(self.local, "rb") as samfile_local:
+            ref = list(samfile_local.fetch())
  
          self.assertEqual(len(ref), len(result))
          for x, y in zip(result, ref):
@@ -2009,6 +2028,10 @@ class TestPileup(unittest.TestCase):
          self.samfile = pysam.AlignmentFile(self.samfilename)
          self.fastafile = pysam.Fastafile(self.fastafilename)
  
+    def tearDown(self):
+        self.samfile.close()
+        self.fastafile.close()
+
      def checkEqual(self, references, iterator):
  
          for x, column in enumerate(iterator):
@@ -2070,6 +2093,10 @@ class TestCountCoverage(unittest.TestCase):
          samfile.close()
          pysam.samtools.index("test_count_coverage_read_all.bam")
  
+    def tearDown(self):
+        self.samfile.close()
+        self.fastafile.close()
+
      def count_coverage_python(self, bam, chrom, start, stop,
                                read_callback,
                                quality_threshold=15):
@@ -2161,23 +2188,26 @@ class TestCountCoverage(unittest.TestCase):
          self.assertEqual(fast_counts[3], manual_counts[3])
  
      def test_count_coverage_read_all(self):
-        samfile = pysam.AlignmentFile("test_count_coverage_read_all.bam")
+
          chrom = 'chr1'
          start = 0
          stop = 2000
  
          def filter(read):
              return not (read.flag & (0x4 | 0x100 | 0x200 | 0x400))
-        fast_counts = samfile.count_coverage(
-            chrom, start, stop,
-            read_callback='all',
-            #read_callback = lambda read: ~(read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
-            quality_threshold=0)
-        manual_counts = samfile.count_coverage(
-            chrom, start, stop,
-            read_callback=lambda read: not(
-                read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
-            quality_threshold=0)
+
+        with pysam.AlignmentFile("test_count_coverage_read_all.bam") as samfile:
+
+            fast_counts = samfile.count_coverage(
+                chrom, start, stop,
+                read_callback='all',
+                #read_callback = lambda read: ~(read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
+                quality_threshold=0)
+            manual_counts = samfile.count_coverage(
+                chrom, start, stop,
+                read_callback=lambda read: not(
+                    read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
+                quality_threshold=0)
  
          os.unlink("test_count_coverage_read_all.bam")
          os.unlink("test_count_coverage_read_all.bam.bai")
@@ -2202,18 +2232,20 @@ class TestCountCoverage(unittest.TestCase):
              samfile.write(read)
          samfile.close()
          pysam.samtools.index("test_count_coverage_nofilter.bam")
-        samfile = pysam.AlignmentFile("test_count_coverage_nofilter.bam")
          chr = 'chr1'
          start = 0
          stop = 2000
-        fast_counts = samfile.count_coverage(chr, start, stop,
-                                             read_callback='nofilter',
-                                             quality_threshold=0)
  
-        manual_counts = self.count_coverage_python(samfile, chr, start, stop,
-                                                   read_callback=lambda x: True,
-                                                   quality_threshold=0)
-        samfile.close()
+        with pysam.AlignmentFile("test_count_coverage_nofilter.bam") as samfile:
+
+            fast_counts = samfile.count_coverage(chr, start, stop,
+                                                 read_callback='nofilter',
+                                                 quality_threshold=0)
+
+            manual_counts = self.count_coverage_python(samfile, chr, start, stop,
+                                                       read_callback=lambda x: True,
+                                                       quality_threshold=0)
+
          os.unlink("test_count_coverage_nofilter.bam")
          os.unlink("test_count_coverage_nofilter.bam.bai")
          self.assertEqual(fast_counts[0], manual_counts[0])
@@ -2223,7 +2255,7 @@ class TestCountCoverage(unittest.TestCase):
  
  
  class TestPileupQueryPosition(unittest.TestCase):
-    
+
      filename = "test_query_position.bam"
  
      def testPileup(self):
@@ -2260,8 +2292,8 @@ class TestLogging(unittest.TestCase):
              log_hand.setFormatter(formatter)
              logger.addHandler(log_hand)
  
-        bam = pysam.AlignmentFile(bamfile, 'rb')
-        cols = bam.pileup()
+        with pysam.AlignmentFile(bamfile, 'rb') as bam:
+            cols = bam.pileup()
          self.assertTrue(True)
  
      def testFail1(self):
@@ -2292,40 +2324,41 @@ class TestAlignmentFileUtilityFunctions(unittest.TestCase):
  
      def testCount(self):
  
-        samfile = pysam.AlignmentFile(os.path.join(DATADIR, "ex1.bam"),
-                                      "rb")
+        with pysam.AlignmentFile(
+                os.path.join(DATADIR, "ex1.bam"),
+                "rb") as samfile:
  
-        for contig in ("chr1", "chr2"):
-            for start in range(0, 2000, 100):
-                end = start + 1
-                self.assertEqual(
-                    len(list(samfile.fetch(contig, start, end))),
-                    samfile.count(contig, start, end),
-                    'number mismatch for %s:%i-%i %i != %i' % (
-                        contig, start, end,
+            for contig in ("chr1", "chr2"):
+                for start in range(0, 2000, 100):
+                    end = start + 1
+                    self.assertEqual(
                          len(list(samfile.fetch(contig, start, end))),
-                        samfile.count(contig, start, end)))
+                        samfile.count(contig, start, end),
+                        'number mismatch for %s:%i-%i %i != %i' % (
+                            contig, start, end,
+                            len(list(samfile.fetch(contig, start, end))),
+                            samfile.count(contig, start, end)))
  
-                # test empty intervals
-                self.assertEqual(
-                    len(list(samfile.fetch(contig, start, start))),
-                    samfile.count(contig, start, start),
-                    'number mismatch for %s:%i-%i %i != %i' % (
-                        contig, start, start,
+                    # test empty intervals
+                    self.assertEqual(
                          len(list(samfile.fetch(contig, start, start))),
-                        samfile.count(contig, start, start)))
+                        samfile.count(contig, start, start),
+                        'number mismatch for %s:%i-%i %i != %i' % (
+                            contig, start, start,
+                            len(list(samfile.fetch(contig, start, start))),
+                            samfile.count(contig, start, start)))
  
-                # test half empty intervals
-                self.assertEqual(len(list(samfile.fetch(contig, start))),
-                                 samfile.count(contig, start))
+                    # test half empty intervals
+                    self.assertEqual(len(list(samfile.fetch(contig, start))),
+                                     samfile.count(contig, start))
  
-                self.assertEqual(
-                    len(list(samfile.fetch(contig, start))),
-                    samfile.count(contig, start),
-                    'number mismatch for %s:%i %i != %i' % (
-                        contig, start,
+                    self.assertEqual(
                          len(list(samfile.fetch(contig, start))),
-                        samfile.count(contig, start)))
+                        samfile.count(contig, start),
+                        'number mismatch for %s:%i %i != %i' % (
+                            contig, start,
+                            len(list(samfile.fetch(contig, start))),
+                            samfile.count(contig, start)))
  
      def testMate(self):
          '''test mate access.'''
@@ -2339,35 +2372,35 @@ class TestAlignmentFileUtilityFunctions(unittest.TestCase):
          for x in readnames:
              counts[x] += 1
  
-        samfile = pysam.AlignmentFile(os.path.join(DATADIR, "ex1.bam"),
-                                      "rb")
+        with pysam.AlignmentFile(os.path.join(DATADIR, "ex1.bam"),
+                                 "rb") as samfile:
  
-        for read in samfile.fetch():
-            if not read.is_paired:
-                self.assertRaises(ValueError, samfile.mate, read)
-            elif read.mate_is_unmapped:
-                self.assertRaises(ValueError, samfile.mate, read)
-            else:
-                if counts[read.query_name] == 1:
+            for read in samfile.fetch():
+                if not read.is_paired:
+                    self.assertRaises(ValueError, samfile.mate, read)
+                elif read.mate_is_unmapped:
                      self.assertRaises(ValueError, samfile.mate, read)
                  else:
-                    mate = samfile.mate(read)
-                    self.assertEqual(read.query_name, mate.query_name)
-                    self.assertEqual(read.is_read1, mate.is_read2)
-                    self.assertEqual(read.is_read2, mate.is_read1)
-                    self.assertEqual(
-                        read.reference_start, mate.next_reference_start)
-                    self.assertEqual(
-                        read.next_reference_start, mate.reference_start)
+                    if counts[read.query_name] == 1:
+                        self.assertRaises(ValueError, samfile.mate, read)
+                    else:
+                        mate = samfile.mate(read)
+                        self.assertEqual(read.query_name, mate.query_name)
+                        self.assertEqual(read.is_read1, mate.is_read2)
+                        self.assertEqual(read.is_read2, mate.is_read1)
+                        self.assertEqual(
+                            read.reference_start, mate.next_reference_start)
+                        self.assertEqual(
+                            read.next_reference_start, mate.reference_start)
  
      def testIndexStats(self):
          '''test if total number of mapped/unmapped reads is correct.'''
  
-        samfile = pysam.AlignmentFile(os.path.join(DATADIR, "ex1.bam"),
-                                      "rb")
-        self.assertEqual(samfile.mapped, 3235)
-        self.assertEqual(samfile.unmapped, 35)
-        self.assertEqual(samfile.nocoordinate, 0)
+        with pysam.AlignmentFile(os.path.join(DATADIR, "ex1.bam"),
+                                 "rb") as samfile:
+            self.assertEqual(samfile.mapped, 3235)
+            self.assertEqual(samfile.unmapped, 35)
+            self.assertEqual(samfile.nocoordinate, 0)
  
  
  class TestMappedUnmapped(unittest.TestCase):
@@ -2452,26 +2485,29 @@ class TestAlignmentFileIndex(unittest.TestCase):
  class TestExplicitIndex(unittest.TestCase):
  
      def testExplicitIndexBAM(self):
-        samfile = pysam.AlignmentFile(
-            os.path.join(DATADIR, "explicit_index.bam"),
-            "rb",
-            filepath_index=os.path.join(DATADIR, 'ex1.bam.bai'))
-                                      
-        samfile.fetch("chr1")
+        with pysam.AlignmentFile(
+                os.path.join(DATADIR, "explicit_index.bam"),
+                "rb",
+                filepath_index=os.path.join(DATADIR, 'ex1.bam.bai')) as samfile:
+            samfile.fetch("chr1")
  
      def testExplicitIndexCRAM(self):
-        samfile = pysam.AlignmentFile(
-            os.path.join(DATADIR, "explicit_index.cram"),
-            "rc",
-            filepath_index=os.path.join(DATADIR, 'ex1.cram.crai'))
+        with pysam.AlignmentFile(
+                os.path.join(DATADIR, "explicit_index.cram"),
+                "rc",
+                filepath_index=os.path.join(DATADIR, 'ex1.cram.crai')) as samfile:
+            samfile.fetch("chr1")
  
      def testRemoteExplicitIndexBAM(self):
-        samfile = pysam.AlignmentFile(
-            "http://genserv.anat.ox.ac.uk/downloads/pysam/test/noindex.bam",
-            "rb",
-            filepath_index=os.path.join(DATADIR, 'ex1.bam.bai'))
+        if not checkURL(
+                "http://genserv.anat.ox.ac.uk/downloads/pysam/test/noindex.bam"):
+            return
  
-        samfile.fetch("chr1")
+        with pysam.AlignmentFile(
+                "http://genserv.anat.ox.ac.uk/downloads/pysam/test/noindex.bam",
+                "rb",
+                filepath_index=os.path.join(DATADIR, 'ex1.bam.bai')) as samfile:
+            samfile.fetch("chr1")
  
  
  class TestVerbosity(unittest.TestCase):
diff --git a/tests/TestUtils.py b/tests/TestUtils.py

index efb2333cb93dbcae936c8938b8076357bf9d5a80..71ab22a69f7309f6b535ba3c05034f8162f616fb 100644 (file)
--- a/tests/TestUtils.py
+++ b/tests/TestUtils.py
@@ -18,15 +18,28 @@ else:
  
  if IS_PYTHON3:
      def force_str(s):
-        return s.decode('ascii')
+        try:
+            return s.decode('ascii')
+        except AttributeError:
+            return s
+    def force_bytes(s):
+        try:
+            return s.encode('ascii')
+        except AttributeError:
+            return s
  else:
      def force_str(s):
          return s
+    def force_bytes(s):
+        return s
  
  
  def openfile(fn):
      if fn.endswith(".gz"):
-        return gzip.open(fn)
+        try:
+            return gzip.open(fn, "rt", encoding="utf-8")
+        except TypeError:
+            return gzip.open(fn, "r")
      else:
          return open(fn)
  
@@ -59,8 +72,9 @@ def checkBinaryEqual(filename1, filename2):
      return found
  
  
-def checkSamtoolsViewEqual(filename1, filename2,
-                           without_header=False):
+def check_samtools_view_equal(
+        filename1, filename2,
+        without_header=False):
      '''return true if the two files are equal in their
      content through samtools view.
      '''
@@ -139,7 +153,7 @@ def checkFieldEqual(cls, read1, read2, exclude=[]):
                          (n, getattr(read1, n), getattr(read2, n)))
  
  
-def check_lines_equal(cls, a, b, sort=False, filter_f=None):
+def check_lines_equal(cls, a, b, sort=False, filter_f=None, msg=None):
      """check if contents of two files are equal comparing line-wise.
  
      sort: bool
@@ -147,17 +161,17 @@ def check_lines_equal(cls, a, b, sort=False, filter_f=None):
      filter_f:
         remover lines in both a and b where expression is True
      """
-
      aa = openfile(a).readlines()
      bb = openfile(b).readlines()
  
      if filter_f is not None:
-        aa = [x for x in aa if not filter_f]
-        bb = [x for x in bb if not filter_f]
+        aa = [x for x in aa if not filter_f(x)]
+        bb = [x for x in bb if not filter_f(x)]
+
      if sort:
-        cls.assertEqual(sorted(aa), sorted(bb))
+        cls.assertEqual(sorted(aa), sorted(bb), msg)
      else:
-        cls.assertEqual(aa, bb)
+        cls.assertEqual(aa, bb, msg)
  
  
  def get_temp_filename(suffix=""):
diff --git a/tests/VariantFile_test.py b/tests/VariantFile_test.py

index a7e54acf34b9d69c6644c200d342d5eeccefe749..ef21245418b7f1c5d9b208a3ed5e31bb18abd288 100644 (file)
--- a/tests/VariantFile_test.py
+++ b/tests/VariantFile_test.py
@@ -2,6 +2,7 @@ import os
  import unittest
  import pysam
  import gzip
+import subprocess
  from TestUtils import get_temp_filename, check_lines_equal
  
  DATADIR="cbcf_data"
@@ -9,7 +10,6 @@ from tabix_test import loadAndConvert
  
  
  def read_header(filename):
-
      data = []
      if filename.endswith(".gz"):
          for line in gzip.open(filename):
@@ -21,6 +21,7 @@ def read_header(filename):
              for line in f:
                  if line.startswith("#"):
                      data.append(line)
+
      return data
  
  
@@ -135,8 +136,9 @@ class TestOpening(unittest.TestCase):
              self.assertEqual(len(list(inf.fetch())), 5)
  
      def testDetectBCF(self):
-        with pysam.VariantFile(os.path.join(DATADIR,
-            "example_vcf40.bcf")) as inf:
+        with pysam.VariantFile(os.path.join(
+                DATADIR,
+                "example_vcf40.bcf")) as inf:
              self.assertEqual(inf.category, 'VARIANTS')
              self.assertEqual(inf.format, 'BCF')
              self.assertEqual(inf.compression, 'BGZF')
@@ -333,7 +335,7 @@ class TestConstructionVCFWithContigs(unittest.TestCase):
  
          check_lines_equal(
              self, fn_in, fn_out, sort=True,
-            filter_f=lambda x: not x.startswith("##contig"))
+            filter_f=lambda x: x.startswith("##contig"))
          os.unlink(fn_out)
  
      def testConstructionWithRecords(self):
@@ -413,6 +415,43 @@ class TestConstructionVCFGZWithoutContigs(TestConstructionVCFWithContigs):
      filename = "example_vcf42.vcf.gz"
  
  
+class TestSettingRecordValues(unittest.TestCase):
+
+    filename = "example_vcf40.vcf"
+
+    def testSetQual(self):
+        with pysam.VariantFile(os.path.join(DATADIR, self.filename)) as inf:
+            record = next(inf)
+            self.assertEqual(record.qual, 47)
+            record.qual = record.qual
+            self.assertEqual(record.qual, 47)
+            record.qual = 10
+            self.assertEqual(record.qual, 10)
+            self.assertEqual(str(record).split("\t")[5], "10")
+
+    def testGenotype(self):
+        with pysam.VariantFile(os.path.join(DATADIR, self.filename)) as inf:
+            record = next(inf)
+            sample = record.samples["NA00001"]
+            print (sample["GT"])
+            self.assertEqual(sample["GT"], (0, 0))
+#      Fails with TypeError
+#            sample["GT"] = sample["GT"]
+
+class TestSubsetting(unittest.TestCase):
+    
+    filename = "example_vcf42.vcf.gz"
+    
+    def testSubsetting(self):
+        with pysam.VariantFile(os.path.join(DATADIR,
+                                            self.filename)) as inf:
+            inf.subset_samples(["NA00001"])
+
  
  if __name__ == "__main__":
+    # build data files
+    print ("building data files")
+    subprocess.call("make -C %s" % DATADIR, shell=True)
+    print ("starting tests")
      unittest.main()
+    print ("completed tests")
diff --git a/tests/cbcf_data/example_vcf42.vcf b/tests/cbcf_data/example_vcf42.vcf

index c6c703077a5d7311b146993134251afdfb29a278..f103e1f8f65f70deccef7cc32a3d50faeaa982fa 100644 (file)
--- a/tests/cbcf_data/example_vcf42.vcf
+++ b/tests/cbcf_data/example_vcf42.vcf
@@ -17,8 +17,8 @@
  ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
  ##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
  #CHROM POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA00001 NA00002 NA00003
-M      1230237 .       T       .       47      PASS    NS=3;DP=13;AA=T GT:GQ:DP:HQ     0|0:54:7:56,60  0|0:48:4:51,51  0/0:61:2
+M      1230237 .       T       .       47      PASS    NS=3;DP=13;AA=T GT:GQ:DP:HQ     0|0:54:7:56,60  0|0:48:4:51,51  0/0:61:2:.
  17     14370   rs6054257       G       A       29      PASS    NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ     0|0:48:1:51,51  1|0:48:8:51,51  1/1:43:5:.,.
-20     17330   .       T       A       3       q10     NS=3;DP=11;AF=0.017     GT:GQ:DP:HQ     0|0:49:3:58,50  0|1:3:5:65,3    0/0:41:3
-20     1110696 rs6040355       A       G,T     67      PASS    NS=2;DP=10;AF=0.333,0.667;AA=T;DB       GT:GQ:DP:HQ     1|2:21:6:23,27  2|1:2:0:18,2    2/2:35:4
+20     17330   .       T       A       3       q10     NS=3;DP=11;AF=0.017     GT:GQ:DP:HQ     0|0:49:3:58,50  0|1:3:5:65,3    0/0:41:3:.
+20     1110696 rs6040355       A       G,T     67      PASS    NS=2;DP=10;AF=0.333,0.667;AA=T;DB       GT:GQ:DP:HQ     1|2:21:6:23,27  2|1:2:0:18,2    2/2:35:4:.
  20     1234567 microsat1       GTCT    G,GTACT 50      PASS    NS=3;DP=9;AA=G  GT:GQ:DP        0/1:35:4        0/2:17:2        1/1:40:3
diff --git a/tests/faidx_test.py b/tests/faidx_test.py

index f3e6cc4bfd15fd453e087e61e71702209ae860c3..a12355046122a330790ae9fddcf1d487c6f3313b 100644 (file)
--- a/tests/faidx_test.py
+++ b/tests/faidx_test.py
@@ -2,6 +2,7 @@ import pysam
  import unittest
  import os
  import gzip
+import shutil
  
  from TestUtils import checkURL
  
@@ -56,6 +57,53 @@ class TestFastaFile(unittest.TestCase):
          self.file.close()
  
  
+class TestFastaFilePathIndex(unittest.TestCase):
+
+    filename = os.path.join(DATADIR, "ex1.fa")
+
+    def testGarbageIndex(self):
+        self.assertRaises(NotImplementedError,
+                          pysam.FastaFile,
+                          self.filename,
+                          filepath_index="garbage.fa.fai")
+        return
+
+        self.assertRaises(ValueError,
+                          pysam.FastaFile,
+                          self.filename,
+                          filepath_index="garbage.fa.fai")
+
+    def testOpenWithoutIndex(self):
+        faidx = pysam.FastaFile(self.filename)
+        faidx.close()
+
+    def testOpenWithStandardIndex(self):
+        self.assertRaises(NotImplementedError,
+                          pysam.FastaFile,
+                          self.filename,
+                          filepath_index=self.filename + ".fai")
+        return
+
+        faidx = pysam.FastaFile(self.filename,
+                                filepath_index=self.filename + ".fai")
+        faidx.close()
+
+    def testOpenWithOtherIndex(self):
+        return
+        tmpfilename = "tmp_" + os.path.basename(self.filename)
+        shutil.copyfile(self.filename, tmpfilename)
+        faidx = pysam.FastaFile(tmpfilename,
+                                filepath_index=self.filename + ".fai")
+        faidx.close()
+        # index should not be auto-generated
+        self.assertFalse(os.path.exists(tmpfilename + ".fai"))
+        os.unlink(tmpfilename)
+
+class TestFastaFilePathIndexCompressed(TestFastaFilePathIndex):
+    
+    filename = os.path.join(DATADIR, "ex1.fa.gz")
+
+
  class TestFastxFileFastq(unittest.TestCase):
  
      filetype = pysam.FastxFile
@@ -67,6 +115,9 @@ class TestFastxFileFastq(unittest.TestCase):
                                    persist=self.persist)
          self.has_quality = self.filename.endswith('.fq')
  
+    def tearDown(self):
+        self.file.close()
+
      def checkFirst(self, s):
          # test first entry
          self.assertEqual(s.sequence, "GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC")
@@ -160,8 +211,8 @@ class TestFastxFileWithEmptySequence(unittest.TestCase):
          with gzip.open(fn) as inf:
              ref_num = len(list(inf)) / 4
  
-        f = self.filetype(fn)
-        l = len(list(f))
+        with self.filetype(fn) as f:
+            l = len(list(f))
          self.assertEqual(ref_num, l)
  
  
@@ -175,10 +226,10 @@ class TestRemoteFileFTP(unittest.TestCase):
      def testFTPView(self):
          if not checkURL(self.url):
              return
-        f = pysam.Fastafile(self.url)
-        self.assertEqual(
-            len(f.fetch("chr1", 0, 1000)),
-            1000)
+        with pysam.Fastafile(self.url) as f:
+            self.assertEqual(
+                len(f.fetch("chr1", 0, 1000)),
+                1000)
  
  
  if __name__ == "__main__":
diff --git a/tests/pysam_data/Makefile b/tests/pysam_data/Makefile

index aed77b5db94d813bbeb75136619023c2eaf2b295..89a4a0c7e2402152756fc953566542e7cfe441d2 100644 (file)
--- a/tests/pysam_data/Makefile
+++ b/tests/pysam_data/Makefile
@@ -17,7 +17,8 @@ all: ex1.pileup.gz \
         ex2_truncated.bam \
         empty.bam empty.bam.bai \
         explicit_index.bam explicit_index.cram \
-       faidx_empty_seq.fq.gz
+       faidx_empty_seq.fq.gz \
+       ex1.fa.gz ex1.fa.gz.fai
  
  # ex2.sam - as ex1.sam, but with header
  ex2.sam.gz: ex1.bam ex1.bam.bai
@@ -82,3 +83,9 @@ clean:
  
  %.fq.gz: %.fq
         gzip < $< > $@
+
+%.fa.gz: %.fa
+       bgzip < $< > $@
+
+%.fa.gz.fai: %.fa.gz
+       samtools faidx $<
diff --git a/tests/samtools_test.py b/tests/samtools_test.py

index e5fd8b912c1df552cc223afc1355f6cd3df62de3..d5b27913a23a0903bcf67d49f30b9497d89ffac1 100644 (file)
--- a/tests/samtools_test.py
+++ b/tests/samtools_test.py
@@ -15,7 +15,8 @@ import glob
  import sys
  import subprocess
  import shutil
-from TestUtils import checkBinaryEqual
+from TestUtils import checkBinaryEqual, check_lines_equal, \
+    check_samtools_view_equal, get_temp_filename, force_bytes
  
  IS_PYTHON3 = sys.version_info[0] >= 3
  
@@ -80,6 +81,8 @@ class SamtoolsTest(unittest.TestCase):
          "idxstats ex1.bam > %(out)s_ex1.idxstats",
          "fixmate ex1.bam %(out)s_ex1.fixmate.bam",
          "flagstat ex1.bam > %(out)s_ex1.flagstat",
+        # Fails python 3.3 on linux, passes on OsX and when
+        # run locally
          "calmd ex1.bam ex1.fa > %(out)s_ex1.calmd.bam",
          # use -s option, otherwise the following error in samtools 1.2:
          # Samtools-htslib-API: bam_get_library() not yet implemented
@@ -132,7 +135,7 @@ class SamtoolsTest(unittest.TestCase):
                   samtools_version))
  
      def setUp(self):
-        '''setup tests. 
+        '''setup tests.
  
          For setup, all commands will be run before the first test is
          executed. Individual tests will then just compare the output
@@ -146,7 +149,7 @@ class SamtoolsTest(unittest.TestCase):
              os.makedirs(WORKDIR)
  
          for f in self.requisites:
-            shutil.copy(os.path.join(DATADIR, f), 
+            shutil.copy(os.path.join(DATADIR, f),
                          os.path.join(WORKDIR, f))
  
          self.savedir = os.getcwd()
@@ -184,13 +187,11 @@ class SamtoolsTest(unittest.TestCase):
          output = pysam_method(*pysam_parts,
                                raw=True,
                                catch_stdout=True)
-        
          # sys.stdout.write(" pysam ok\n")
-
          if ">" in statement:
              with open(pysam_targets[-1], "wb") as outfile:
                  if output is not None:
-                    outfile = outfile.write(output)
+                    outfile.write(force_bytes(output))
  
          for samtools_target, pysam_target in zip(samtools_targets,
                                                   pysam_targets):
@@ -204,17 +205,32 @@ class SamtoolsTest(unittest.TestCase):
              else:
                  samtools_files = [samtools_target]
                  pysam_files = [pysam_target]
-                
+
              for s, p in zip(samtools_files, pysam_files):
-                self.assertTrue(
-                    checkBinaryEqual(s, p),
-                    "%s failed: files %s and %s are not the same" %
-                    (command, s, p))
+                binary_equal = checkBinaryEqual(s, p)
+                error_msg = "%s failed: files %s and %s are not the same" % (command, s, p)
+                if binary_equal:
+                    continue
+                if s.endswith(".bam"):
+                    self.assertTrue(
+                        check_samtools_view_equal(
+                            s, p, without_header=True),
+                        error_msg)
+                check_lines_equal(
+                    self, s, p,
+                    filter_f=lambda x: x.startswith("#"),
+                    msg=error_msg)
  
      def testStatements(self):
          for statement in self.statements:
+            if (statement.startswith("calmd") and 
+                list(sys.version_info[:2]) == [3, 3]):
+                # skip calmd test, fails only on python 3.3.5
+                # in linux (empty output). Works in OsX and passes
+                # for 3.4 and 3.5, see issue #293
+                continue
              self.check_statement(statement)
-        
+
      def tearDown(self):
          if os.path.exists(WORKDIR):
              shutil.rmtree(WORKDIR)
@@ -227,6 +243,28 @@ class EmptyIndexTest(unittest.TestCase):
          self.assertRaises(IOError, pysam.samtools.index,
                            "exdoesntexist.bam")
  
+class TestReturnType(unittest.TestCase):
+    
+    def testReturnValueString(self):
+        retval = pysam.idxstats(os.path.join(DATADIR, "ex1.bam"))
+        if IS_PYTHON3:
+            self.assertFalse(isinstance(retval, bytes))
+            self.assertTrue(isinstance(retval, str))
+        else:
+            self.assertTrue(isinstance(retval, bytes))
+            self.assertTrue(isinstance(retval, basestring))
+
+    def testReturnValueData(self):
+        args = "-O BAM {}".format(os.path.join(DATADIR, "ex1.bam")).split(" ")
+        retval = pysam.view(*args)
+
+        if IS_PYTHON3:
+            self.assertTrue(isinstance(retval, bytes))
+            self.assertFalse(isinstance(retval, str))
+        else:
+            self.assertTrue(isinstance(retval, bytes))
+            self.assertTrue(isinstance(retval, basestring))
+
  
  class StdoutTest(unittest.TestCase):
      '''test if stdout can be redirected.'''
@@ -242,11 +280,29 @@ class StdoutTest(unittest.TestCase):
              catch_stdout=False)
          self.assertEqual(r, None)
  
+    def testDoubleCalling(self):
+        # The following would fail if there is an
+        # issue with stdout being improperly caught.
+        retvals = pysam.idxstats(
+            os.path.join(DATADIR, "ex1.bam"))
+        retvals = pysam.idxstats(
+            os.path.join(DATADIR, "ex1.bam"))
+
+    def testSaveStdout(self):
+        outfile = get_temp_filename(suffix=".tsv")
+        r = pysam.samtools.flagstat(
+            os.path.join(DATADIR, "ex1.bam"),
+            save_stdout=outfile)
+        self.assertEqual(r, None)
+        with open(outfile) as inf:
+            r = inf.read()
+        self.assertTrue(len(r) > 0)
+
  
  class PysamTest(SamtoolsTest):
      """check access to samtools command in the pysam 
      main package.
-    
+
      This is for backwards capability.
      """
  
diff --git a/tests/tabix_test.py b/tests/tabix_test.py

index f09ba8cba75cd4678aa580cc4dbee5e93caa7962..ec1e37e3c4ffc7ce1a4faa7f444e0237b5800e72 100644 (file)
--- a/tests/tabix_test.py
+++ b/tests/tabix_test.py
@@ -270,6 +270,9 @@ class TestIterationWithoutComments(IterationTest):
          IterationTest.setUp(self)
          self.tabix = pysam.TabixFile(self.filename)
  
+    def tearDown(self):
+        self.tabix.close()
+
      def testRegionStrings(self):
          """test if access with various region strings
          works"""
@@ -351,7 +354,7 @@ class TestIterationWithoutComments(IterationTest):
          self.tabix.fetch("chr1", 100, 100)
  
      def testGetContigs(self):
-        self.assertEqual(sorted(self.tabix.contigs), [b"chr1", b"chr2"])
+        self.assertEqual(sorted(self.tabix.contigs), ["chr1", "chr2"])
          # check that contigs is read-only
          self.assertRaises(
              AttributeError, setattr, self.tabix, "contigs", ["chr1", "chr2"])
@@ -374,13 +377,10 @@ class TestIterationWithoutComments(IterationTest):
              # opens any tabix file
              with pysam.TabixFile(self.filename) as inf:
                  pass
-            
+
          for i in range(1000):
              func1()
  
-    def tearDown(self):
-        self.tabix.close()
-
  
  class TestIterationWithComments(TestIterationWithoutComments):
  
@@ -405,6 +405,9 @@ class TestParser(unittest.TestCase):
          self.tabix = pysam.TabixFile(self.filename)
          self.compare = loadAndConvert(self.filename)
  
+    def tearDown(self):
+        self.tabix.close()
+
      def testRead(self):
  
          for x, r in enumerate(self.tabix.fetch(parser=pysam.asTuple())):
@@ -505,6 +508,36 @@ class TestParser(unittest.TestCase):
          self.assertEqual(a, b)
  
  
+class TestGTF(TestParser):
+
+    def testRead(self):
+
+        for x, r in enumerate(self.tabix.fetch(parser=pysam.asGTF())):
+            c = self.compare[x]
+            self.assertEqual(len(c), len(r))
+            self.assertEqual(list(c), list(r))
+            self.assertEqual(c, str(r).split("\t"))
+            self.assertTrue(r.gene_id.startswith("ENSG"))
+            if r.feature != 'gene':
+                self.assertTrue(r.transcript_id.startswith("ENST"))
+            self.assertEqual(c[0], r.contig)
+            self.assertEqual("\t".join(map(str, c)),
+                             str(r))
+
+    def testSetting(self):
+
+        for r in self.tabix.fetch(parser=pysam.asGTF()):
+            r.contig = r.contig + "_test"          
+            r.source = r.source + "_test"
+            r.feature = r.feature + "_test"
+            r.start += 10
+            r.end += 10
+            r.score = 20
+            r.strand = "+"
+            r.frame = 0
+            r.attributes = 'gene_id "0001";'
+
+
  class TestIterators(unittest.TestCase):
  
      filename = os.path.join(DATADIR, "example.gtf.gz")
@@ -522,6 +555,10 @@ class TestIterators(unittest.TestCase):
               open(self.tmpfilename_uncompressed, "wb") as outfile:
              outfile.write(infile.read())
  
+    def tearDown(self):
+        self.tabix.close()
+        os.unlink(self.tmpfilename_uncompressed)
+
      def open(self):
  
          if self.is_compressed:
@@ -566,9 +603,6 @@ class TestIterators(unittest.TestCase):
          # Not implemented
          # self.assertRaises(ValueError, i.next)
  
-    def tearUp(self):
-        os.unlink(self.tmpfilename_uncompressed)
-
  
  class TestIteratorsGenericCompressed(TestIterators):
      is_compressed = True
@@ -584,23 +618,6 @@ class TestIteratorsFileUncompressed(TestIterators):
      is_compressed = False
  
  
-class TestGTF(TestParser):
-
-    def testRead(self):
-
-        for x, r in enumerate(self.tabix.fetch(parser=pysam.asGTF())):
-            c = self.compare[x]
-            self.assertEqual(len(c), len(r))
-            self.assertEqual(list(c), list(r))
-            self.assertEqual(c, str(r).split("\t"))
-            self.assertTrue(r.gene_id.startswith("ENSG"))
-            if r.feature != 'gene':
-                self.assertTrue(r.transcript_id.startswith("ENST"))
-            self.assertEqual(c[0], r.contig)
-            self.assertEqual("\t".join(map(str, c)),
-                             str(r))
-
-
  class TestIterationMalformattedGTFFiles(unittest.TestCase):
  
      '''test reading from malformatted gtf files.'''
@@ -638,6 +655,9 @@ class TestBed(unittest.TestCase):
          self.tabix = pysam.TabixFile(self.filename)
          self.compare = loadAndConvert(self.filename)
  
+    def tearDown(self):
+        self.tabix.close()
+
      def testRead(self):
  
          for x, r in enumerate(self.tabix.fetch(parser=pysam.asBed())):
@@ -670,9 +690,6 @@ class TestBed(unittest.TestCase):
              self.assertEqual(int(c[2]) + 1, r.end)
              self.assertEqual(str(int(c[2]) + 1), r[2])
  
-    def tearDown(self):
-        self.tabix.close()
-
  
  class TestVCF(unittest.TestCase):
  
@@ -736,6 +753,9 @@ class TestVCFFromTabix(TestVCF):
          self.tabix = pysam.TabixFile(self.tmpfilename + ".gz")
          self.compare = loadAndConvert(self.filename)
  
+    def tearDown(self):
+        self.tabix.close()
+
      def testRead(self):
  
          ncolumns = len(self.columns)
@@ -804,9 +824,6 @@ class TestVCFFromTabix(TestVCF):
                  c[ncolumns + y] = "test_%i" % y
                  r[y] = "test_%i" % y
                  self.assertEqual(c[ncolumns + y], r[y])
-                
-    def tearDown(self):
-        self.tabix.close()
  
  
  class TestVCFFromVCF(TestVCF):
@@ -843,6 +860,9 @@ class TestVCFFromVCF(TestVCF):
          self.vcf = pysam.VCF()
          self.compare = loadAndConvert(self.filename, encode=False)
  
+    def tearDown(self):
+        self.vcf.close()
+
      def testConnecting(self):
  
          fn = os.path.basename(self.filename)
@@ -856,15 +876,25 @@ class TestVCFFromVCF(TestVCF):
  
      def get_iterator(self):
  
-        f = open(self.filename)
-        fn = os.path.basename(self.filename)
+        with open(self.filename) as f:
+            fn = os.path.basename(self.filename)
  
-        for x, msg in self.fail_on_opening:
-            if "%i.vcf" % x == fn:
-                self.assertRaises(ValueError, self.vcf.parse, f)
-                return
+            for x, msg in self.fail_on_opening:
+                if "%i.vcf" % x == fn:
+                    self.assertRaises(ValueError, self.vcf.parse, f)
+                    return
  
-        return self.vcf.parse(f)
+            for vcf_code, msg in self.fail_on_parsing:
+                if "%i.vcf" % vcf_code == fn:
+                    self.assertRaises((ValueError,
+                                       AssertionError),
+                                      list, self.vcf.parse(f))
+                    return
+                # python 2.7
+                # self.assertRaisesRegexp(
+                # ValueError, re.compile(msg), self.vcf.parse, f)
+
+            return list(self.vcf.parse(f))
  
      def get_field_value(self, record, field):
          return record[field]
@@ -1063,6 +1093,8 @@ class TestVCFFromVariantFile(TestVCFFromVCF):
      missing_value = None
      missing_quality = None
  
+    vcf = None
+
      def filter2value(self, r, v):
          if r == "PASS":
              return ["PASS"], list(v)
@@ -1104,9 +1136,14 @@ class TestVCFFromVariantFile(TestVCFFromVCF):
          TestVCF.setUp(self)
          self.compare = loadAndConvert(self.filename, encode=False)
  
+    def tearDown(self):
+        if self.vcf:
+            self.vcf.close()
+        self.vcf = None
+
      def get_iterator(self):
-        vcf = pysam.VariantFile(self.filename)
-        return vcf.fetch()
+        self.vcf = pysam.VariantFile(self.filename)
+        return self.vcf.fetch()
  
      def get_field_value(self, record, field):
          return getattr(record, field)
@@ -1124,11 +1161,22 @@ class TestRemoteFileHTTP(unittest.TestCase):
      local = os.path.join(DATADIR, "example.gtf.gz")
  
      def setUp(self):
+        if not checkURL(self.url):
+            self.remote_file = None
+            return
+
          self.remote_file = pysam.TabixFile(self.url, "r")
          self.local_file = pysam.TabixFile(self.local, "r")
  
+    def tearDown(self):
+        if self.remote_file is None:
+            return
+
+        self.remote_file.close()
+        self.local_file.close()
+
      def testFetchAll(self):
-        if not checkURL(self.url):
+        if self.remote_file is None:
              return
  
          remote_result = list(self.remote_file.fetch())
@@ -1139,16 +1187,15 @@ class TestRemoteFileHTTP(unittest.TestCase):
              self.assertEqual(x, y)
  
      def testHeader(self):
+        if self.remote_file is None:
+            return
+
          self.assertEqual(list(self.local_file.header), [])
          self.assertRaises(AttributeError,
                            getattr,
                            self.remote_file,
                            "header")
  
-    def tearDown(self):
-        self.remote_file.close()
-        self.local_file.close()
-
  
  class TestIndexArgument(unittest.TestCase):
  
@@ -1163,13 +1210,11 @@ class TestIndexArgument(unittest.TestCase):
          shutil.copyfile(self.index_src, self.index_dst)
  
          with pysam.TabixFile(
-            self.filename_src, "r", index=self.index_src) as \
-            same_basename_file:
+                self.filename_src, "r", index=self.index_src) as same_basename_file:
              same_basename_results = list(same_basename_file.fetch())
  
          with pysam.TabixFile(
-            self.filename_dst, "r", index=self.index_dst) as \
-            diff_index_file:
+                self.filename_dst, "r", index=self.index_dst) as diff_index_file:
              diff_index_result = list(diff_index_file.fetch())
  
          self.assertEqual(len(same_basename_results), len(diff_index_result))
@@ -1263,7 +1308,7 @@ class TestMultipleIterators(unittest.TestCase):
  
      def testDoubleFetch(self):
  
-        with pysam.TabixFile(self.filename) as f: 
+        with pysam.TabixFile(self.filename) as f:
  
              for a, b in zip(f.fetch(multiple_iterators=True),
                              f.fetch(multiple_iterators=True)):
diff --git a/tests/test_samtools_python.py b/tests/test_samtools_python.py

new file mode 100644 (file)

index 0000000..1b915fd
--- /dev/null
+++ b/tests/test_samtools_python.py
@@ -0,0 +1,35 @@
+import pysam
+
+def test_idxstats_parse_split_lines():
+    bam_filename = "./pysam_data/ex2.bam"
+    lines = pysam.idxstats(bam_filename, split_lines=True)  # Test pysam 0.8.X style output, which returns a list of lines
+    for line in lines:
+        _seqname, _seqlen, nmapped, _nunmapped = line.split()
+
+
+def test_bedcov_split_lines():
+    bam_filename = "./pysam_data/ex1.bam"
+    bed_filename = "./pysam_data/ex1.bed"
+    lines = pysam.bedcov(bed_filename, bam_filename, split_lines=True)  # Test pysam 0.8.X style output, which returns a list of lines
+    for line in lines:
+        fields = line.split('\t')
+        assert len(fields) in [4, 5], "bedcov should give tab delimited output with 4 or 5 fields.  Split line (%s) gives %d fields." % (fields, len(fields))
+
+
+def test_idxstats_parse():
+    bam_filename = "./pysam_data/ex2.bam"
+    idxstats_string = pysam.idxstats(bam_filename, split_lines=False)  # Test pysam 0.9.X style output, which returns a string that needs to be split by \n
+    lines = idxstats_string.splitlines()
+    for line in lines:
+        splt = line.split("\t")
+        _seqname, _seqlen, nmapped, _nunmapped = splt
+
+
+def test_bedcov():
+    bam_filename = "./pysam_data/ex1.bam"
+    bed_filename = "./pysam_data/ex1.bed"
+    bedcov_string = pysam.bedcov(bed_filename, bam_filename, split_lines=False)  # Test pysam 0.9.X style output, which returns a string that needs to be split by \n
+    lines = bedcov_string.splitlines()
+    for line in lines:
+        fields = line.split('\t')
+        assert len(fields) in [4, 5], "bedcov should give tab delimited output with 4 or 5 fields.  Split line (%s) gives %d fields." % (fields, len(fields))
author	Afif Elghraoui <afif@ghraoui.name>
	Sun, 19 Jun 2016 21:17:37 +0000 (14:17 -0700)
committer	Afif Elghraoui <afif@ghraoui.name>
	Sun, 19 Jun 2016 21:17:37 +0000 (14:17 -0700)
INSTALL		patch \| blob \| history
bcftools/bcftools.h		patch \| blob \| history
bcftools/consensus.c		patch \| blob \| history
bcftools/consensus.c.pysam.c		patch \| blob \| history
bcftools/convert.c.pysam.c		patch \| blob \| history
bcftools/em.c.pysam.c		patch \| blob \| history
bcftools/filter.c.pysam.c		patch \| blob \| history
bcftools/khash_str2str.h		patch \| blob \| history
bcftools/main.c		patch \| blob \| history
bcftools/main.c.pysam.c		patch \| blob \| history
bcftools/mcall.c.pysam.c		patch \| blob \| history
bcftools/ploidy.c		patch \| blob \| history
bcftools/ploidy.c.pysam.c		patch \| blob \| history
bcftools/prob1.c.pysam.c		patch \| blob \| history
bcftools/pysam.h		patch \| blob \| history
bcftools/tabix.c.pysam.c		patch \| blob \| history
bcftools/vcfannotate.c		patch \| blob \| history
bcftools/vcfannotate.c.pysam.c		patch \| blob \| history
bcftools/vcfcall.c		patch \| blob \| history
bcftools/vcfcall.c.pysam.c		patch \| blob \| history
bcftools/vcfcnv.c.pysam.c		patch \| blob \| history
bcftools/vcfconcat.c		patch \| blob \| history
bcftools/vcfconcat.c.pysam.c		patch \| blob \| history
bcftools/vcfconvert.c		patch \| blob \| history
bcftools/vcfconvert.c.pysam.c		patch \| blob \| history
bcftools/vcffilter.c		patch \| blob \| history
bcftools/vcffilter.c.pysam.c		patch \| blob \| history
bcftools/vcfgtcheck.c.pysam.c		patch \| blob \| history
bcftools/vcfindex.c		patch \| blob \| history
bcftools/vcfindex.c.pysam.c		patch \| blob \| history
bcftools/vcfisec.c		patch \| blob \| history
bcftools/vcfisec.c.pysam.c		patch \| blob \| history
bcftools/vcfmerge.c		patch \| blob \| history
bcftools/vcfmerge.c.pysam.c		patch \| blob \| history
bcftools/vcfnorm.c		patch \| blob \| history
bcftools/vcfnorm.c.pysam.c		patch \| blob \| history
bcftools/vcfplugin.c		patch \| blob \| history
bcftools/vcfplugin.c.pysam.c		patch \| blob \| history
bcftools/vcfquery.c.pysam.c		patch \| blob \| history
bcftools/vcfroh.c		patch \| blob \| history
bcftools/vcfroh.c.pysam.c		patch \| blob \| history
bcftools/vcfsom.c.pysam.c		patch \| blob \| history
bcftools/vcfstats.c.pysam.c		patch \| blob \| history
bcftools/vcfview.c		patch \| blob \| history
bcftools/vcfview.c.pysam.c		patch \| blob \| history
bcftools/version.c.pysam.c		patch \| blob \| history
bcftools/version.h		patch \| blob \| history
doc/faq.rst		patch \| blob \| history
doc/glossary.rst		patch \| blob \| history
doc/installation.rst		patch \| blob \| history
doc/release.rst		patch \| blob \| history
import.py		patch \| blob \| history
pysam/__init__.py		patch \| blob \| history
pysam/calignedsegment.pyx		patch \| blob \| history
pysam/calignmentfile.pxd		patch \| blob \| history
pysam/calignmentfile.pyx		patch \| blob \| history
pysam/cbcf.pyx		patch \| blob \| history
pysam/cfaidx.pxd		patch \| blob \| history
pysam/cfaidx.pyx		patch \| blob \| history
pysam/chtslib.pxd		patch \| blob \| history
pysam/ctabix.pxd		patch \| blob \| history
pysam/ctabix.pyx		patch \| blob \| history
pysam/ctabixproxies.pyx		patch \| blob \| history
pysam/cutils.pxd		patch \| blob \| history
pysam/cutils.pyx		patch \| blob \| history
pysam/cvcf.pyx		patch \| blob \| history
pysam/pysam_stream.h		patch \| blob \| history
pysam/pysam_util.c		patch \| blob \| history
pysam/pysam_util.h		patch \| blob \| history
pysam/tabix_util.c		patch \| blob \| history
pysam/utils.py		patch \| blob \| history
pysam/version.py		patch \| blob \| history
run_tests_travis.sh		patch \| blob \| history
samtools/bam.c		patch \| blob \| history
samtools/bam.c.pysam.c		patch \| blob \| history
samtools/bam.h		patch \| blob \| history
samtools/bam2bcf.c		patch \| blob \| history
samtools/bam2bcf.c.pysam.c		patch \| blob \| history
samtools/bam2bcf_indel.c		patch \| blob \| history
samtools/bam2bcf_indel.c.pysam.c		patch \| blob \| history
samtools/bam2depth.c		patch \| blob \| history
samtools/bam2depth.c.pysam.c		patch \| blob \| history
samtools/bam_addrprg.c		patch \| blob \| history
samtools/bam_addrprg.c.pysam.c		patch \| blob \| history
samtools/bam_aux.c		patch \| blob \| history
samtools/bam_aux.c.pysam.c		patch \| blob \| history
samtools/bam_cat.c		patch \| blob \| history
samtools/bam_cat.c.pysam.c		patch \| blob \| history
samtools/bam_color.c		patch \| blob \| history
samtools/bam_color.c.pysam.c		patch \| blob \| history
samtools/bam_flags.c		patch \| blob \| history
samtools/bam_flags.c.pysam.c		patch \| blob \| history
samtools/bam_import.c		patch \| blob \| history
samtools/bam_import.c.pysam.c		patch \| blob \| history
samtools/bam_index.c		patch \| blob \| history
samtools/bam_index.c.pysam.c		patch \| blob \| history
samtools/bam_lpileup.c		patch \| blob \| history
samtools/bam_lpileup.c.pysam.c		patch \| blob \| history
samtools/bam_mate.c		patch \| blob \| history
samtools/bam_mate.c.pysam.c		patch \| blob \| history
samtools/bam_md.c		patch \| blob \| history
samtools/bam_md.c.pysam.c		patch \| blob \| history
samtools/bam_plbuf.c		patch \| blob \| history
samtools/bam_plbuf.c.pysam.c		patch \| blob \| history
samtools/bam_plcmd.c		patch \| blob \| history
samtools/bam_plcmd.c.pysam.c		patch \| blob \| history
samtools/bam_quickcheck.c		patch \| blob \| history
samtools/bam_quickcheck.c.pysam.c		patch \| blob \| history
samtools/bam_reheader.c		patch \| blob \| history
samtools/bam_reheader.c.pysam.c		patch \| blob \| history
samtools/bam_rmdup.c		patch \| blob \| history
samtools/bam_rmdup.c.pysam.c		patch \| blob \| history
samtools/bam_rmdupse.c		patch \| blob \| history
samtools/bam_rmdupse.c.pysam.c		patch \| blob \| history
samtools/bam_sort.c		patch \| blob \| history
samtools/bam_sort.c.pysam.c		patch \| blob \| history
samtools/bam_split.c		patch \| blob \| history
samtools/bam_split.c.pysam.c		patch \| blob \| history
samtools/bam_stat.c		patch \| blob \| history
samtools/bam_stat.c.pysam.c		patch \| blob \| history
samtools/bam_tview.c		patch \| blob \| history
samtools/bam_tview.c.pysam.c		patch \| blob \| history
samtools/bam_tview_curses.c.pysam.c		patch \| blob \| history
samtools/bam_tview_html.c		patch \| blob \| history
samtools/bam_tview_html.c.pysam.c		patch \| blob \| history
samtools/bamshuf.c		patch \| blob \| history
samtools/bamshuf.c.pysam.c		patch \| blob \| history
samtools/bamtk.c		patch \| blob \| history
samtools/bamtk.c.pysam.c		patch \| blob \| history
samtools/bedcov.c		patch \| blob \| history
samtools/bedcov.c.pysam.c		patch \| blob \| history
samtools/bedidx.c		patch \| blob \| history
samtools/bedidx.c.pysam.c		patch \| blob \| history
samtools/cut_target.c		patch \| blob \| history
samtools/cut_target.c.pysam.c		patch \| blob \| history
samtools/dict.c		patch \| blob \| history
samtools/dict.c.pysam.c		patch \| blob \| history
samtools/errmod.c		patch \| blob \| history
samtools/errmod.c.pysam.c		patch \| blob \| history
samtools/faidx.c		patch \| blob \| history
samtools/faidx.c.pysam.c		patch \| blob \| history
samtools/kprobaln.c		patch \| blob \| history
samtools/kprobaln.c.pysam.c		patch \| blob \| history
samtools/misc/ace2sam.c		patch \| blob \| history
samtools/misc/ace2sam.c.pysam.c		patch \| blob \| history
samtools/padding.c		patch \| blob \| history
samtools/padding.c.pysam.c		patch \| blob \| history
samtools/phase.c		patch \| blob \| history
samtools/phase.c.pysam.c		patch \| blob \| history
samtools/pysam.h		patch \| blob \| history
samtools/sam.c		patch \| blob \| history
samtools/sam.c.pysam.c		patch \| blob \| history
samtools/sam_header.c		patch \| blob \| history
samtools/sam_header.c.pysam.c		patch \| blob \| history
samtools/sam_opts.c		patch \| blob \| history
samtools/sam_opts.c.pysam.c		patch \| blob \| history
samtools/sam_view.c		patch \| blob \| history
samtools/sam_view.c.pysam.c		patch \| blob \| history
samtools/sample.c		patch \| blob \| history
samtools/sample.c.pysam.c		patch \| blob \| history
samtools/stats.c		patch \| blob \| history
samtools/stats.c.pysam.c		patch \| blob \| history
samtools/stats_isize.c		patch \| blob \| history
samtools/stats_isize.c.pysam.c		patch \| blob \| history
samtools/test/merge/test_bam_translate.c		patch \| blob \| history
samtools/test/merge/test_bam_translate.c.pysam.c		patch \| blob \| history
samtools/test/merge/test_rtrans_build.c		patch \| blob \| history
samtools/test/merge/test_rtrans_build.c.pysam.c		patch \| blob \| history
samtools/test/merge/test_trans_tbl_init.c		patch \| blob \| history
samtools/test/merge/test_trans_tbl_init.c.pysam.c		patch \| blob \| history
samtools/test/split/test_count_rg.c		patch \| blob \| history
samtools/test/split/test_count_rg.c.pysam.c		patch \| blob \| history
samtools/test/split/test_expand_format_string.c		patch \| blob \| history
samtools/test/split/test_expand_format_string.c.pysam.c		patch \| blob \| history
samtools/test/split/test_filter_header_rg.c		patch \| blob \| history
samtools/test/split/test_filter_header_rg.c.pysam.c		patch \| blob \| history
samtools/test/split/test_parse_args.c		patch \| blob \| history
samtools/test/split/test_parse_args.c.pysam.c		patch \| blob \| history
samtools/test/test.c		patch \| blob \| history
samtools/test/test.c.pysam.c		patch \| blob \| history
samtools/test/tview/test_get_rg_sample.c		patch \| blob \| history
samtools/test/tview/test_get_rg_sample.c.pysam.c		patch \| blob \| history
samtools/version.h		patch \| blob \| history
setup.py		patch \| blob \| history
tests/AlignedSegment_test.py		patch \| blob \| history
tests/AlignmentFile_test.py		patch \| blob \| history
tests/TestUtils.py		patch \| blob \| history
tests/VariantFile_test.py		patch \| blob \| history
tests/cbcf_data/example_vcf42.vcf		patch \| blob \| history
tests/faidx_test.py		patch \| blob \| history
tests/pysam_data/Makefile		patch \| blob \| history
tests/samtools_test.py		patch \| blob \| history
tests/tabix_test.py		patch \| blob \| history
tests/test_samtools_python.py	[new file with mode: 0644]	patch \| blob