From: Michael R. Crusoe <michael.crusoe@gmail.com>
Date: Tue, 6 Aug 2019 17:37:29 +0000 (+0200)
Subject: New upstream version 0.15.3+ds
X-Git-Tag: archive/raspbian/0.22.0+ds-1+rpi1~1^2^2~12^2~7
X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=169a628899d883a30944ece2535f066881671cf4;p=python-pysam.git

New upstream version 0.15.3+ds
---

diff --git a/.gitignore b/.gitignore
index fd28d6d..b07a532 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,11 +17,13 @@ tests/cbcf_data
 tests/tabix_data
 
 samtools/config.h
-bcftools/config.g
+bcftools/config.h
 htslib/config.status
 htslib/config.h
 htslib/config.log
 htslib/config.mk
+htslib/htslib.pc.tmp
+htslib/htslib-uninstalled.pc
 pysam/config.py
 
 # linking tests
diff --git a/.travis.yml b/.travis.yml
index 3642281..b30d9b8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,5 @@
 os:
-  - linx
+  - linux
   - osx
 
 language: c
@@ -7,11 +7,58 @@ language: c
 env:
   matrix:
     - CONDA_PY=2.7
-    - CONDA_PY=3.4
-    - CONDA_PY=3.5
     - CONDA_PY=3.6
+    - CONDA_PY=3.7
   global:
     - PYSAM_LINKING_TEST=1
+    - TWINE_USERNAME=grepall
+    - secure: 'OcwwP8/o21+SGW0UVAnnCQwllhGSCq2HJzpI9EhX3kh6J9RTkyx/+drkg45bx1Z5u8zymuAFappEYzlpzqZE886XezkjOYGVa/u+Coqr1oT/BEJHFCkCA4o26yESp7Zy8aNj/juhB7Rfa77pIDXBayqTzbALz/AURMtZapasB18='
+
+_deploy_common: &deploy_common
+  if: tag IS present
+  install:
+    - python3 -m pip install cibuildwheel twine
+
+matrix:
+  include:
+    - stage: deploy
+      os: linux
+      language: python
+      python: '3.5'
+      services:
+        - docker
+      env:
+        - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && pip install -r requirements.txt"
+        - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
+      addons:
+        apt:
+          packages:
+            - gcc
+            - g++
+            - libcurl4-openssl-dev  # for libcurl support in sdist
+            - libssl-dev  # for s3 support in sdist
+      <<: *deploy_common
+      script:
+        - set -e
+        - cibuildwheel --output-dir dist
+        - python3 -m pip install Cython
+        - python3 setup.py build_ext --inplace
+        - python3 setup.py sdist
+        - twine check dist/*
+        - twine upload --skip-existing dist/*
+    - stage: deploy
+      os: osx
+      language: generic
+      env:
+        - CIBW_BEFORE_BUILD="pip install -r requirements.txt"
+        - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
+      addons: {}
+      <<: *deploy_common
+      script:
+        - set -e
+        - cibuildwheel --output-dir dist
+        - twine check dist/*
+        - twine upload --skip-existing dist/*
 
 addons:
   apt:
diff --git a/NEWS b/NEWS
index 4861555..49ce485 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,26 @@ http://pysam.readthedocs.io/en/latest/release.html
 Release notes
 =============
 
+Release 0.15.3
+==============
+
+Bugfix release.
+
+* [#824] allow reading of UTF-8 encoded text in VCF/BCF files.
+* [#780] close all filehandles before opening new ones in pysam_dispatch
+* [#773] do not cache VariantRecord.id to avoid memory leak
+* [#781] default of multiple_iterators=True is changed to False for
+  CRAM files.
+* [#825] fix collections.abc import
+* [#825] use bcf_hdr_format instead of bcf_hdr_fmt_text, fix memcpy
+  bug when setting FORMAT fields.
+* [#804] Use HTSlib's kstring_t, which reallocates and enlarges its
+  memory as needed, rather than a fixed-size char buffer.
+* [#814] Build wheels and upload them to PyPI
+* [#755] Allow passing flags and arguments to index methods
+* [#763] Strip \0 in header check
+* [#761] Test Tabix index contents, not the compression
+
 Release 0.15.2
 ==============
 
diff --git a/README.rst b/README.rst
index 2005eb6..f8a641d 100644
--- a/README.rst
+++ b/README.rst
@@ -25,6 +25,8 @@ as it resolves non-python dependencies and uses pre-configured
 compilation options. Especially for OS X this will potentially save a
 lot of trouble.
 
+The current version of pysam wraps 3rd-party code from htslib-1.9, samtools-1.9, and bcftools-1.9.
+
 Pysam is available through `pypi
 <https://pypi.python.org/pypi/pysam>`_. To install, type::
 
diff --git a/bcftools/bcftools.pysam.c b/bcftools/bcftools.pysam.c
index e3e251e..de8739d 100644
--- a/bcftools/bcftools.pysam.c
+++ b/bcftools/bcftools.pysam.c
@@ -10,7 +10,6 @@
 FILE * bcftools_stderr = NULL;
 FILE * bcftools_stdout = NULL;
 const char * bcftools_stdout_fn = NULL;
-int bcftools_stdout_fileno = STDOUT_FILENO;
 
 
 FILE * bcftools_set_stderr(int fd)
@@ -21,11 +20,10 @@ FILE * bcftools_set_stderr(int fd)
   return bcftools_stderr;
 }
 
-void bcftools_unset_stderr(void)
+void bcftools_close_stderr(void)
 {
-  if (bcftools_stderr != NULL)
-    fclose(bcftools_stderr);
-  bcftools_stderr = fopen("/dev/null", "w");
+  fclose(bcftools_stderr);
+  bcftools_stderr = NULL;
 }
 
 FILE * bcftools_set_stdout(int fd)
@@ -37,7 +35,6 @@ FILE * bcftools_set_stdout(int fd)
     {
       fprintf(bcftools_stderr, "could not set stdout to fd %i", fd);
     }
-  bcftools_stdout_fileno = fd;
   return bcftools_stdout;
 }
 
@@ -46,12 +43,10 @@ void bcftools_set_stdout_fn(const char *fn)
   bcftools_stdout_fn = fn;
 }
 
-void bcftools_unset_stdout(void)
+void bcftools_close_stdout(void)
 {
-  if (bcftools_stdout != NULL)
-    fclose(bcftools_stdout);
-  bcftools_stdout = fopen("/dev/null", "w");
-  bcftools_stdout_fileno = STDOUT_FILENO;
+  fclose(bcftools_stdout);
+  bcftools_stdout = NULL;
 }
 
 int bcftools_puts(const char *s)
diff --git a/bcftools/bcftools.pysam.h b/bcftools/bcftools.pysam.h
index 92ab370..453567a 100644
--- a/bcftools/bcftools.pysam.h
+++ b/bcftools/bcftools.pysam.h
@@ -1,7 +1,7 @@
-#ifndef PYSAM_H
-#define PYSAM_H
+#ifndef bcftools_PYSAM_H
+#define bcftools_PYSAM_H
 
-#include "stdio.h"
+#include <stdio.h>
 
 extern FILE * bcftools_stderr;
 
@@ -17,7 +17,7 @@ FILE * bcftools_set_stderr(int fd);
 
 /*! set pysam standard output to point to file descriptor
 
-  Setting the stderr will close the previous stdout.
+  Setting the stdout will close the previous stdout.
  */
 FILE * bcftools_set_stdout(int fd);
 
@@ -26,17 +26,15 @@ FILE * bcftools_set_stdout(int fd);
  */
 void bcftools_set_stdout_fn(const char * fn);
 
-/*! set pysam standard error to /dev/null.
+/*! close pysam standard error and set to NULL
   
-  Unsetting the stderr will close the previous stderr.
  */
-void bcftools_unset_stderr(void);
+void bcftools_close_stderr(void);
 
-/*! set pysam standard error to /dev/null.
+/*! close pysam standard output and set to NULL
   
-  Unsetting the stderr will close the previous stderr.
  */
-void bcftools_unset_stdout(void);
+void bcftools_close_stdout(void);
 
 int bcftools_puts(const char *s);
 
diff --git a/devtools/buildwheels.sh b/devtools/buildwheels.sh
index ae0d953..bf71bf9 100755
--- a/devtools/buildwheels.sh
+++ b/devtools/buildwheels.sh
@@ -5,6 +5,10 @@
 #
 # It is best to run this in a fresh clone of the repository!
 #
+# Before running, make sure to update image:
+#
+#   docker pull quay.io/pypa/manylinux1_x86_64
+#
 # Run this within the repository root:
 #   docker run --rm -v $(pwd):/io quay.io/pypa/manylinux1_x86_64 /io/buildwheels.sh
 #
@@ -24,13 +28,6 @@ fi
 
 yum install -y zlib-devel bzip2-devel xz-devel
 
-# Python 2.6 is not supported
-rm -r /opt/python/cp26*
-
-# Python 3.3 builds fail with:
-#  /opt/rh/devtoolset-2/root/usr/libexec/gcc/x86_64-CentOS-linux/4.8.2/ld: cannot find -lchtslib
-rm -r /opt/python/cp33*
-
 # Without libcurl support, htslib can open files from HTTP and FTP URLs.
 # With libcurl support, it also supports HTTPS and S3 URLs, but libcurl needs a
 # current version of OpenSSL, and we do not want to be responsible for
diff --git a/devtools/import.py b/devtools/import.py
index 44e606a..7430cc5 100644
--- a/devtools/import.py
+++ b/devtools/import.py
@@ -94,16 +94,16 @@ def _update_pysam_files(cf, destdir):
                 outfile.write('#include "{}.pysam.h"\n\n'.format(basename))
                 subname, _ = os.path.splitext(os.path.basename(filename))
                 if subname in MAIN.get(basename, []):
-                    lines = re.sub("int main\(", "int {}_main(".format(
+                    lines = re.sub(r"int main\(", "int {}_main(".format(
                         basename), lines)
                 else:
-                    lines = re.sub("int main\(", "int {}_{}_main(".format(
+                    lines = re.sub(r"int main\(", "int {}_{}_main(".format(
                         basename, subname), lines)
                 lines = re.sub("stderr", "{}_stderr".format(basename), lines)
                 lines = re.sub("stdout", "{}_stdout".format(basename), lines)
-                lines = re.sub(" printf\(", " fprintf({}_stdout, ".format(basename), lines)
-                lines = re.sub("([^kf])puts\(", r"\1{}_puts(".format(basename), lines)
-                lines = re.sub("putchar\(([^)]+)\)",
+                lines = re.sub(r" printf\(", " fprintf({}_stdout, ".format(basename), lines)
+                lines = re.sub(r"([^kf])puts\(", r"\1{}_puts(".format(basename), lines)
+                lines = re.sub(r"putchar\(([^)]+)\)",
                                r"fputc(\1, {}_stdout)".format(basename), lines)
 
                 fn = os.path.basename(filename)
diff --git a/devtools/run_tests_travis.sh b/devtools/run_tests_travis.sh
index 89988bb..b20c8a0 100755
--- a/devtools/run_tests_travis.sh
+++ b/devtools/run_tests_travis.sh
@@ -31,30 +31,33 @@ bash Miniconda3.sh -b
 # activate testenv environment
 source ~/miniconda3/bin/activate testenv
 
-conda config --add channels r
 conda config --add channels defaults
-conda config --add channels conda-forge
 conda config --add channels bioconda
+conda config --add channels conda-forge
 
 # pin versions, so that tests do not fail when pysam/htslib out of step
 # add htslib dependencies
-conda install -y "samtools=1.7" "bcftools=1.6" "htslib=1.7" xz curl bzip2
+# NB: we force conda-forge:ncurses due to bioconda/bioconda-recipes#13488
+conda install -y "samtools=1.9" "bcftools=1.9" "htslib=1.9" xz curl bzip2 conda-forge:ncurses
 
 # Need to make C compiler and linker use the anaconda includes and libraries:
 export PREFIX=~/miniconda3/
 export CFLAGS="-I${PREFIX}/include -L${PREFIX}/lib"
-export HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl --disable-lzma"
+export HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"
 
+echo "show samtools, htslib, and bcftools versions"
 samtools --version
-htslib --version
+htsfile --version
 bcftools --version
 
 # Try building conda recipe first
 ~/miniconda3/bin/conda-build devtools/conda-recipe/ --python=$CONDA_PY
 
 # install code from the repository via setup.py
-echo "installing via setup.py from repository"
-python setup.py install
+echo
+echo "============ installing via setup.py from repository ============"
+echo
+python setup.py install || exit
 
 # create auxilliary data
 echo
diff --git a/doc/api.rst b/doc/api.rst
index d2a9a80..3f2c042 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -142,6 +142,9 @@ BAM/SAM formatted files.
 .. autoclass:: pysam.AlignmentFile
    :members:
 
+.. autoclass:: pysam.AlignmentHeader
+   :members:
+
 An :class:`~pysam.AlignedSegment` represents an aligned segment within
 a SAM/BAM file.
 
diff --git a/doc/glossary.rst b/doc/glossary.rst
index de3e032..4e9fa57 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -24,7 +24,7 @@ Glossary
       are 0-based, half-open intervals, i.e., the position 10,000 is
       part of the interval, but 20,000 is not. An exception are
       :term:`samtools` compatible region strings such as
-      'chr1:10000:20000', which are closed, i.e., both positions 10,000
+      'chr1:10000-20000', which are closed, i.e., both positions 10,000
       and 20,000 are part of the interval.
 
    column
diff --git a/doc/release.rst b/doc/release.rst
index 5adc437..ee1875b 100644
--- a/doc/release.rst
+++ b/doc/release.rst
@@ -2,7 +2,27 @@
 Release notes
 =============
 
-Release 0.15.1
+Release 0.15.3
+==============
+
+Bugfix release.
+
+* [#824] allow reading of UTF-8 encoded text in VCF/BCF files.
+* [#780] close all filehandles before opening new ones in pysam_dispatch
+* [#773] do not cache VariantRecord.id to avoid memory leak
+* [#781] default of multiple_iterators=True is changed to False for
+  CRAM files.
+* [#825] fix collections.abc import
+* [#825] use bcf_hdr_format instead of bcf_hdr_fmt_text, fix memcpy
+  bug when setting FORMAT fields.
+* [#804] Use HTSlib's kstring_t, which reallocates and enlarges its
+  memory as needed, rather than a fixed-size char buffer.
+* [#814] Build wheels and upload them to PyPI
+* [#755] Allow passing flags and arguments to index methods
+* [#763] Strip \0 in header check
+* [#761] Test Tabix index contents, not the compression
+
+Release 0.15.2
 ==============
 
 Bugfix release.
diff --git a/doc/usage.rst b/doc/usage.rst
index 6172329..f4b7498 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -189,7 +189,7 @@ region can be retrieved by calling :meth:`~pysam.TabixFile.fetch()`::
          print (str(row))
 
 This will return a tuple-like data structure in which columns can
-be retrieved by numeric index:
+be retrieved by numeric index::
 
     for row in tbx.fetch("chr1", 1000, 2000):
          print ("chromosome is", row[0])
@@ -385,7 +385,7 @@ flagstat command and consists of three files:
 2. The cython implementation :file:`_pysam_flagstat.pyx`. This script
    imports the pysam API via::
 
-      from pysam.calignmentfile cimport AlignmentFile, AlignedSegment
+      from pysam.libcalignmentfile cimport AlignmentFile, AlignedSegment
 
    This statement imports, amongst others, :class:`AlignedSegment`
    into the namespace. Speed can be gained from declaring
diff --git a/import/pysam.c b/import/pysam.c
index d08cf50..5692622 100644
--- a/import/pysam.c
+++ b/import/pysam.c
@@ -10,7 +10,6 @@
 FILE * @pysam@_stderr = NULL;
 FILE * @pysam@_stdout = NULL;
 const char * @pysam@_stdout_fn = NULL;
-int @pysam@_stdout_fileno = STDOUT_FILENO;
 
 
 FILE * @pysam@_set_stderr(int fd)
@@ -21,11 +20,10 @@ FILE * @pysam@_set_stderr(int fd)
   return @pysam@_stderr;
 }
 
-void @pysam@_unset_stderr(void)
+void @pysam@_close_stderr(void)
 {
-  if (@pysam@_stderr != NULL)
-    fclose(@pysam@_stderr);
-  @pysam@_stderr = fopen("/dev/null", "w");
+  fclose(@pysam@_stderr);
+  @pysam@_stderr = NULL;
 }
 
 FILE * @pysam@_set_stdout(int fd)
@@ -37,7 +35,6 @@ FILE * @pysam@_set_stdout(int fd)
     {
       fprintf(@pysam@_stderr, "could not set stdout to fd %i", fd);
     }
-  @pysam@_stdout_fileno = fd;
   return @pysam@_stdout;
 }
 
@@ -46,12 +43,10 @@ void @pysam@_set_stdout_fn(const char *fn)
   @pysam@_stdout_fn = fn;
 }
 
-void @pysam@_unset_stdout(void)
+void @pysam@_close_stdout(void)
 {
-  if (@pysam@_stdout != NULL)
-    fclose(@pysam@_stdout);
-  @pysam@_stdout = fopen("/dev/null", "w");
-  @pysam@_stdout_fileno = STDOUT_FILENO;
+  fclose(@pysam@_stdout);
+  @pysam@_stdout = NULL;
 }
 
 int @pysam@_puts(const char *s)
diff --git a/import/pysam.h b/import/pysam.h
index c3cc623..6abb884 100644
--- a/import/pysam.h
+++ b/import/pysam.h
@@ -1,7 +1,7 @@
-#ifndef PYSAM_H
-#define PYSAM_H
+#ifndef @pysam@_PYSAM_H
+#define @pysam@_PYSAM_H
 
-#include "stdio.h"
+#include <stdio.h>
 
 extern FILE * @pysam@_stderr;
 
@@ -17,7 +17,7 @@ FILE * @pysam@_set_stderr(int fd);
 
 /*! set pysam standard output to point to file descriptor
 
-  Setting the stderr will close the previous stdout.
+  Setting the stdout will close the previous stdout.
  */
 FILE * @pysam@_set_stdout(int fd);
 
@@ -26,17 +26,15 @@ FILE * @pysam@_set_stdout(int fd);
  */
 void @pysam@_set_stdout_fn(const char * fn);
 
-/*! set pysam standard error to /dev/null.
+/*! close pysam standard error and set to NULL
   
-  Unsetting the stderr will close the previous stderr.
  */
-void @pysam@_unset_stderr(void);
+void @pysam@_close_stderr(void);
 
-/*! set pysam standard error to /dev/null.
+/*! close pysam standard output and set to NULL
   
-  Unsetting the stderr will close the previous stderr.
  */
-void @pysam@_unset_stdout(void);
+void @pysam@_close_stdout(void);
 
 int @pysam@_puts(const char *s);
 
diff --git a/pysam/htslib_util.h b/pysam/htslib_util.h
index 25bd3e4..33c46c9 100644
--- a/pysam/htslib_util.h
+++ b/pysam/htslib_util.h
@@ -18,20 +18,6 @@ typedef khash_t(vdict) vdict_t;
 KHASH_DECLARE(s2i, kh_cstr_t, int64_t)
 typedef khash_t(s2i) s2i_t;
 		     
-//////////////////////////////////////////////////////////////////
-/*! set pysam standard error to point to file descriptor
-
-  Setting the stderr will close the previous stderr.
- */
-// FILE * pysam_set_stderr(int fd);
-
-//////////////////////////////////////////////////////////////////
-/*! set pysam standard error to /dev/null.
-  
-  Unsetting the stderr will close the previous stderr.
- */
-// void pysam_unset_stderr();
-
 //////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////
diff --git a/pysam/libcalignedsegment.pxd b/pysam/libcalignedsegment.pxd
index 48ca93f..c964160 100644
--- a/pysam/libcalignedsegment.pxd
+++ b/pysam/libcalignedsegment.pxd
@@ -70,7 +70,7 @@ cdef class PileupColumn:
     cdef int n_pu
     cdef AlignmentHeader header
     cdef uint32_t min_base_quality
-    cdef uint8_t * buf
+    cdef kstring_t buf
     cdef char * reference_sequence
 
 cdef class PileupRead:
diff --git a/pysam/libcalignedsegment.pyx b/pysam/libcalignedsegment.pyx
index 7ad61ac..02ef5b6 100644
--- a/pysam/libcalignedsegment.pyx
+++ b/pysam/libcalignedsegment.pyx
@@ -69,7 +69,6 @@ from cpython cimport array as c_array
 from libc.stdint cimport INT8_MIN, INT16_MIN, INT32_MIN, \
     INT8_MAX, INT16_MAX, INT32_MAX, \
     UINT8_MAX, UINT16_MAX, UINT32_MAX
-from libc.stdio cimport snprintf
 
 from pysam.libchtslib cimport HTS_IDX_NOCOOR
 from pysam.libcutils cimport force_bytes, force_str, \
@@ -89,9 +88,6 @@ cdef bint IS_PYTHON3 = PY_MAJOR_VERSION >= 3
 cdef char* CODE2CIGAR= "MIDNSHP=XB"
 cdef int NCIGAR_CODES = 10
 
-# dimensioned for 8000 pileup limit (+ insertions/deletions)
-cdef uint32_t MAX_PILEUP_BUFFER_SIZE = 10000
-
 if IS_PYTHON3:
     CIGAR2CODE = dict([y, x] for x, y in enumerate(CODE2CIGAR))
     maketrans = str.maketrans
@@ -632,9 +628,8 @@ cdef PileupColumn makePileupColumn(bam_pileup1_t ** plp,
     dest.n_pu = n_pu
     dest.min_base_quality = min_base_quality
     dest.reference_sequence = reference_sequence
-    dest.buf = <uint8_t *>calloc(MAX_PILEUP_BUFFER_SIZE, sizeof(uint8_t))
-    if dest.buf == NULL:
-        raise MemoryError("could not allocate pileup buffer")
+    dest.buf.l = dest.buf.m = 0
+    dest.buf.s = NULL
 
     return dest
 
@@ -1853,10 +1848,9 @@ cdef class AlignedSegment:
         return s
 
     def get_forward_qualities(self):
-        """return the original read sequence.
+        """return base qualities of the read sequence.
         
-        Reads mapping to the reverse strand will be reverse
-        complemented.
+        Reads mapping to the reverse strand will be reversed.
         """
         if self.is_reverse:
             return self.query_qualities[::-1]
@@ -2799,8 +2793,7 @@ cdef class PileupColumn:
             "\n".join(map(str, self.pileups))
 
     def __dealloc__(self):
-        if self.buf is not NULL:
-            free(self.buf)
+        free(self.buf.s)
 
     def set_min_base_quality(self, min_base_quality):
         """set the minimum base quality for this pileup column.
@@ -2971,15 +2964,16 @@ cdef class PileupColumn:
         cdef uint32_t x = 0
         cdef uint32_t j = 0
         cdef uint32_t c = 0
-        cdef uint32_t n = 0
         cdef uint8_t cc = 0
         cdef uint8_t rb = 0
-        cdef uint8_t * buf = self.buf
+        cdef kstring_t * buf = &self.buf
         cdef bam_pileup1_t * p = NULL
         
         if self.plp == NULL or self.plp[0] == NULL:
             raise ValueError("PileupColumn accessed after iterator finished")
 
+        buf.l = 0
+
         # todo: reference sequence to count matches/mismatches
         # todo: convert assertions to exceptions
         for x from 0 <= x < self.n_pu:
@@ -2992,16 +2986,12 @@ cdef class PileupColumn:
                 continue
             # see samtools pileup_seq
             if mark_ends and p.is_head:
-                buf[n] = '^'
-                n += 1
-                assert n < MAX_PILEUP_BUFFER_SIZE
+                kputc('^', buf)
                 
                 if p.b.core.qual > 93:
-                    buf[n] = 126
+                    kputc(126, buf)
                 else:
-                    buf[n] = p.b.core.qual + 33
-                n += 1
-                assert n < MAX_PILEUP_BUFFER_SIZE
+                    kputc(p.b.core.qual + 33, buf)
             if not p.is_del:
                 if p.qpos < p.b.core.l_qseq:
                     cc = <uint8_t>seq_nt16_str[bam_seqi(bam_get_seq(p.b), p.qpos)]
@@ -3012,68 +3002,44 @@ cdef class PileupColumn:
                     rb = self.reference_sequence[self.reference_pos]
                     if seq_nt16_table[cc] == seq_nt16_table[rb]:
                         cc = "="
-                buf[n] = strand_mark_char(cc, p.b)
-                n += 1
-                assert n < MAX_PILEUP_BUFFER_SIZE
+                kputc(strand_mark_char(cc, p.b), buf)
             elif add_indels:
                 if p.is_refskip:
                     if bam_is_rev(p.b):
-                        buf[n] = '<'
+                        kputc('<', buf)
                     else:
-                        buf[n] = '>'
+                        kputc('>', buf)
                 else:
-                    buf[n] = '*'
-                n += 1
-                assert n < MAX_PILEUP_BUFFER_SIZE
+                    kputc('*', buf)
             if add_indels:
                 if p.indel > 0:
-                    buf[n] = '+'
-                    n += 1
-                    assert n < MAX_PILEUP_BUFFER_SIZE
-                    n += snprintf(<char *>&(buf[n]),
-                                  MAX_PILEUP_BUFFER_SIZE - n,
-                                  "%i",
-                                  p.indel)
-                    assert n < MAX_PILEUP_BUFFER_SIZE
+                    kputc('+', buf)
+                    kputw(p.indel, buf)
                     for j from 1 <= j <= p.indel:
                         cc = seq_nt16_str[bam_seqi(bam_get_seq(p.b), p.qpos + j)]
-                        buf[n] = strand_mark_char(cc, p.b)
-                        n += 1
-                        assert n < MAX_PILEUP_BUFFER_SIZE
+                        kputc(strand_mark_char(cc, p.b), buf)
                 elif p.indel < 0:
-                    buf[n] = '-'
-                    n += 1
-                    assert n < MAX_PILEUP_BUFFER_SIZE
-                    n += snprintf(<char *>&(buf[n]),
-                                  MAX_PILEUP_BUFFER_SIZE - n,
-                                  "%i",
-                                  -p.indel)
-                    assert n < MAX_PILEUP_BUFFER_SIZE
+                    kputc('-', buf)
+                    kputw(-p.indel, buf)
                     for j from 1 <= j <= -p.indel:
                         # TODO: out-of-range check here?
                         if self.reference_sequence == NULL:
                             cc = 'N'
                         else:
                             cc = self.reference_sequence[self.reference_pos + j]
-                        buf[n] = strand_mark_char(cc, p.b)
-                        n += 1
-                        assert n < MAX_PILEUP_BUFFER_SIZE
+                        kputc(strand_mark_char(cc, p.b), buf)
             if mark_ends and p.is_tail:
-                buf[n] = '$'
-                n += 1
-                assert n < MAX_PILEUP_BUFFER_SIZE
+                kputc('$', buf)
 
-            buf[n] = ':'
-            n += 1
-            assert n < MAX_PILEUP_BUFFER_SIZE
+            kputc(':', buf)
 
-        if n == 0:
+        if buf.l == 0:
             # could be zero if all qualities are too low
             return ""
         else:
             # quicker to ensemble all and split than to encode all separately.
             # ignore last ":"
-            return force_str(PyBytes_FromStringAndSize(<char*>buf, n-1)).split(":")
+            return force_str(PyBytes_FromStringAndSize(buf.s, buf.l-1)).split(":")
 
     def get_query_qualities(self):
         """query base quality scores at pileup column position.
diff --git a/pysam/libcalignmentfile.pyx b/pysam/libcalignmentfile.pyx
index 4030868..d35b0db 100644
--- a/pysam/libcalignmentfile.pyx
+++ b/pysam/libcalignmentfile.pyx
@@ -56,6 +56,10 @@
 ########################################################
 import os
 import collections
+try:
+    from collections.abc import Sequence, Mapping  # noqa
+except ImportError:
+    from collections import Sequence, Mapping  # noqa
 import re
 import warnings
 import array
@@ -99,11 +103,11 @@ IndexStats = collections.namedtuple("IndexStats",
 cdef int MAX_POS = (1 << 31) - 1
 
 # valid types for SAM headers
-VALID_HEADER_TYPES = {"HD" : collections.Mapping,
-                      "SQ" : collections.Sequence,
-                      "RG" : collections.Sequence,
-                      "PG" : collections.Sequence,
-                      "CO" : collections.Sequence}
+VALID_HEADER_TYPES = {"HD" : Mapping,
+                      "SQ" : Sequence,
+                      "RG" : Sequence,
+                      "PG" : Sequence,
+                      "CO" : Sequence}
 
 # order of records within SAM headers
 VALID_HEADERS = ("HD", "SQ", "RG", "PG", "CO")
@@ -293,7 +297,7 @@ cdef class AlignmentHeader(object):
                     raise ValueError(
                         "invalid type for record %s: %s, expected %s".format(
                             record, type(data), VALID_HEADER_TYPES[record]))
-                if isinstance(data, collections.Mapping):
+                if isinstance(data, Mapping):
                     lines.append(build_header_line(data, record))
                 else:
                     for fields in header_dict[record]:
@@ -303,7 +307,7 @@ cdef class AlignmentHeader(object):
         for record, data in sorted(header_dict.items()):
             if record in VALID_HEADERS:
                 continue
-            if isinstance(data, collections.Mapping):
+            if isinstance(data, Mapping):
                 lines.append(build_header_line(data, record))
             else:
                 for fields in header_dict[record]:
@@ -346,7 +350,7 @@ cdef class AlignmentHeader(object):
         return makeAlignmentHeader(bam_hdr_dup(self.ptr))
 
     property nreferences:
-        """"int with the number of :term:`reference` sequences in the file.
+        """int with the number of :term:`reference` sequences in the file.
 
         This is a read-only attribute."""
         def __get__(self):
@@ -418,7 +422,8 @@ cdef class AlignmentHeader(object):
         # convert to python string
         t = self.__str__()
         for line in t.split("\n"):
-            if not line.strip():
+            line = line.strip(' \0')
+            if not line:
                 continue
             assert line.startswith("@"), \
                 "header line without '@': '%s'" % line
@@ -454,13 +459,13 @@ cdef class AlignmentHeader(object):
                 # interpret type of known header record tags, default to str
                 x[key] = KNOWN_HEADER_FIELDS[record].get(key, str)(value)
 
-            if VALID_HEADER_TYPES[record] == collections.Mapping:
+            if VALID_HEADER_TYPES[record] == Mapping:
                 if record in result:
                     raise ValueError(
                         "multiple '%s' lines are not permitted" % record)
 
                 result[record] = x
-            elif VALID_HEADER_TYPES[record] == collections.Sequence:
+            elif VALID_HEADER_TYPES[record] == Sequence:
                 if record not in result: result[record] = []
                 result[record].append(x)
 
@@ -889,7 +894,7 @@ cdef class AlignmentFile(HTSFile):
                 self.header = template.header.copy()
             elif isinstance(header, AlignmentHeader):
                 self.header = header.copy()
-            elif isinstance(header, collections.Mapping):
+            elif isinstance(header, Mapping):
                 self.header = AlignmentHeader.from_dict(header)
             elif reference_names and reference_lengths:
                 self.header = AlignmentHeader.from_references(
@@ -1604,7 +1609,6 @@ cdef class AlignmentFile(HTSFile):
         Or it can be a generator filtering such reads. Example
         samfile.find_introns((read for read in samfile.fetch(...) if read.is_reverse)
         """
-        import collections
         res = collections.Counter()
         for r in read_iterator:
             if 'N' in r.cigarstring:
@@ -1635,7 +1639,6 @@ cdef class AlignmentFile(HTSFile):
             AlignedSegment r
             int BAM_CREF_SKIP = 3 #BAM_CREF_SKIP
 
-        import collections
         res = collections.Counter()
 
         match_or_deletion = {0, 2, 7, 8} # only M/=/X (0/7/8) and D (2) are related to genome position
@@ -1898,7 +1901,7 @@ cdef class AlignmentFile(HTSFile):
         return self.header.get_reference_length(reference)
     
     property nreferences:
-        """"int with the number of :term:`reference` sequences in the file.
+        """int with the number of :term:`reference` sequences in the file.
         This is a read-only attribute."""
         def __get__(self):
             if self.header:
@@ -2610,10 +2613,16 @@ cdef class IteratorColumnRegion(IteratorColumn):
                   int start = 0,
                   int stop = MAX_POS,
                   int truncate = False,
+                  int multiple_iterators = True,
                   **kwargs ):
 
-        # initialize iterator
-        self._setup_iterator(tid, start, stop, 1)
+        # initialize iterator. Multiple iterators not available
+        # for CRAM.
+        if multiple_iterators and samfile.is_cram:
+            warnings.warn("multiple_iterators not implemented for CRAM")
+            multiple_iterators = False
+
+        self._setup_iterator(tid, start, stop, multiple_iterators)
         self.start = start
         self.stop = stop
         self.truncate = truncate
diff --git a/pysam/libcbcf.pyx b/pysam/libcbcf.pyx
index 9ac58dc..e40a801 100644
--- a/pysam/libcbcf.pyx
+++ b/pysam/libcbcf.pyx
@@ -95,7 +95,7 @@ from cpython.ref     cimport Py_INCREF
 from cpython.dict    cimport PyDict_GetItemString, PyDict_SetItemString
 from cpython.tuple   cimport PyTuple_New, PyTuple_SET_ITEM
 from cpython.bytes   cimport PyBytes_FromStringAndSize
-from cpython.unicode cimport PyUnicode_DecodeASCII
+from cpython.unicode cimport PyUnicode_DecodeUTF8
 from cpython.version cimport PY_MAJOR_VERSION
 
 from pysam.libchtslib cimport HTSFile, hisremote
@@ -151,7 +151,7 @@ cdef inline bcf_str_cache_get_charptr(const char* s):
     if PY_MAJOR_VERSION < 3:
         val = s
     else:
-        val = PyUnicode_DecodeASCII(s, strlen(s), NULL)
+        val = PyUnicode_DecodeUTF8(s, strlen(s), NULL)
 
     PyDict_SetItemString(bcf_str_cache, s, val)
 
@@ -282,7 +282,7 @@ cdef bcf_array_to_object(void *data, int type, ssize_t n, ssize_t count, int sca
             else:
                 # Otherwise, copy the entire block
                 b = datac[:n]
-            value = tuple(v.decode('ascii') if v and v != bcf_str_missing else None for v in b.split(b','))
+            value = tuple(v.decode('utf-8') if v and v != bcf_str_missing else None for v in b.split(b','))
     else:
         value = []
         if type == BCF_BT_INT8:
@@ -412,9 +412,13 @@ cdef bcf_empty_array(int type, ssize_t n, int vlen):
     return value
 
 
-cdef bcf_copy_expand_array(void *src_data, int src_type, ssize_t src_values,
-                           void *dst_data, int dst_type, ssize_t dst_values,
+cdef bcf_copy_expand_array(void *src_data, int src_type, size_t src_values,
+                           void *dst_data, int dst_type, size_t dst_values,
                            int vlen):
+    """copy data from src to dest where the size of the elements (src_type/dst_type) differ
+    as well as the number of elements (src_values/dst_values).
+    """
+    
     cdef char    *src_datac
     cdef char    *dst_datac
     cdef int8_t  *src_datai8
@@ -432,7 +436,7 @@ cdef bcf_copy_expand_array(void *src_data, int src_type, ssize_t src_values,
     if src_type == dst_type == BCF_BT_CHAR:
         src_datac = <char *>src_data
         dst_datac = <char *>dst_data
-        memcpy(src_datac, dst_datac, src_values)
+        memcpy(dst_datac, src_datac, src_values)
         for i in range(src_values, dst_values):
             dst_datac[i] = 0
     elif src_type == BCF_BT_INT8 and dst_type == BCF_BT_INT32:
@@ -574,7 +578,7 @@ cdef object bcf_check_values(VariantRecord record, value, int sample,
     cdef int given = len(values)
     if value_count[0] != -1 and value_count[0] != given:
         if scalar[0]:
-            raise TypeError('value expected to be scalar, given len={}'.format(value_count[0], given))
+            raise TypeError('value expected to be scalar, given len={}'.format(given))
         else:
             raise TypeError('values expected to be {}-tuple, given len={}'.format(value_count[0], given))
 
@@ -813,14 +817,14 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
     if key == 'phased':
         sample.phased = bool(value)
         return
-
+    
     cdef bcf_hdr_t *hdr = sample.record.header.ptr
     cdef bcf1_t *r = sample.record.ptr
     cdef int fmt_id
     cdef vdict_t *d
     cdef khiter_t k
     cdef int fmt_type, scalar, realloc, dst_type, vlen = 0
-    cdef ssize_t i, n, value_count, alloc_size, alloc_len, dst_size
+    cdef ssize_t i, nsamples, value_count, alloc_size, alloc_len, dst_size
 
     if bcf_unpack(r, BCF_UN_ALL) < 0:
         raise ValueError('Error unpacking VariantRecord')
@@ -857,11 +861,10 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
                               fmt.type if fmt else -1,
                               fmt.n    if fmt else -1,
                               &value_count, &scalar, &realloc)
-
     vlen = value_count < 0
     value_count = len(values)
-
-    # If we can, write updated values to existing allocated storage
+    
+    # If we can, write updated values to existing allocated storage.
     if fmt and not realloc:
         r.d.indiv_dirty = 1
         bcf_object_to_array(values, fmt.p + sample.index * fmt.size, fmt.type, fmt.n, vlen)
@@ -871,9 +874,9 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
     if fmt and fmt.n > alloc_len:
         alloc_len = fmt.n
 
-    n = r.n_sample
-    new_values = bcf_empty_array(fmt_type, n*alloc_len, vlen)
-    cdef char *valp = <char *>new_values
+    nsamples = r.n_sample
+    new_values = bcf_empty_array(fmt_type, nsamples * alloc_len, vlen)
+    cdef char *new_values_p = <char *>new_values
 
     if fmt_type == BCF_HT_INT:
         dst_type = BCF_BT_INT32
@@ -887,15 +890,15 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
     else:
         raise ValueError('Unsupported FORMAT type')
 
-    if fmt and n > 1:
-        for i in range(n):
-            bcf_copy_expand_array(fmt.p + i*fmt.size, fmt.type, fmt.n,
-                                  valp  + i*dst_size, dst_type, alloc_len,
+    if fmt and nsamples > 1:
+        for i in range(nsamples):
+            bcf_copy_expand_array(fmt.p + i * fmt.size, fmt.type, fmt.n,
+                                  new_values_p  + i * dst_size, dst_type, alloc_len,
                                   vlen)
 
-    bcf_object_to_array(values, valp + sample.index*dst_size, dst_type, alloc_len, vlen)
+    bcf_object_to_array(values, new_values_p + sample.index * dst_size, dst_type, alloc_len, vlen)
 
-    if bcf_update_format(hdr, r, bkey, valp, <int>(n*alloc_len), fmt_type) < 0:
+    if bcf_update_format(hdr, r, bkey, new_values_p, <int>(nsamples * alloc_len), fmt_type) < 0:
         raise ValueError('Unable to update format values')
 
 
@@ -1160,7 +1163,7 @@ cdef inline bcf_sync_end(VariantRecord record):
     cdef bcf_hdr_t *hdr = record.header.ptr
     cdef bcf_info_t *info
     cdef int end_id = bcf_header_get_info_id(record.header.ptr, b'END')
-    cdef int ref_len 
+    cdef int ref_len
 
     # allow missing ref when instantiating a new record
     if record.ref is not None:
@@ -2036,12 +2039,20 @@ cdef class VariantHeader(object):
 
     def __str__(self):
         cdef int hlen
-        cdef char *hstr = bcf_hdr_fmt_text(self.ptr, 0, &hlen)
+        cdef kstring_t line
+        line.l = line.m = 0
+        line.s = NULL
 
-        try:
-            return charptr_to_str_w_len(hstr, hlen)
-        finally:
-            free(hstr)
+        if bcf_hdr_format(self.ptr, 0, &line) < 0:
+            if line.m:
+                free(line.s)
+            raise ValueError('bcf_hdr_format failed')
+
+        ret = charptr_to_str_w_len(line.s, line.l)
+
+        if line.m:
+            free(line.s)
+        return ret
 
     def new_record(self, contig=None, start=0, stop=0, alleles=None,
                          id=None, qual=None, filter=None, info=None, samples=None,
@@ -3121,7 +3132,9 @@ cdef class VariantRecord(object):
         cdef bcf1_t *r = self.ptr
         if bcf_unpack(r, BCF_UN_STR) < 0:
             raise ValueError('Error unpacking VariantRecord')
-        return bcf_str_cache_get_charptr(r.d.id) if r.d.id != b'.' else None
+        # causes a memory leak https://github.com/pysam-developers/pysam/issues/773
+        # return bcf_str_cache_get_charptr(r.d.id) if r.d.id != b'.' else None
+        return charptr_to_str(r.d.id) if r.d.id != b'.' else None
 
     @id.setter
     def id(self, value):
@@ -3178,7 +3191,7 @@ cdef class VariantRecord(object):
     @alleles.setter
     def alleles(self, values):
         cdef bcf1_t *r = self.ptr
-        
+
         # Cache rlen of symbolic alleles before call to bcf_update_alleles_str
         cdef int rlen = r.rlen
 
diff --git a/pysam/libcbcftools.pxd b/pysam/libcbcftools.pxd
index f1c8d76..62a6f3d 100644
--- a/pysam/libcbcftools.pxd
+++ b/pysam/libcbcftools.pxd
@@ -2,8 +2,8 @@ cdef extern from "bcftools.pysam.h":
 
     int bcftools_main(int argc, char *argv[])
     void bcftools_set_stderr(int fd)
-    void bcftools_unset_stderr()
+    void bcftools_close_stderr()
     void bcftools_set_stdout(int fd)
     void bcftools_set_stdout_fn(const char *)
-    void bcftools_unset_stdout()
+    void bcftools_close_stdout()
     void bcftools_set_optind(int)
diff --git a/pysam/libcfaidx.pyx b/pysam/libcfaidx.pyx
index cf783b9..40d8430 100644
--- a/pysam/libcfaidx.pyx
+++ b/pysam/libcfaidx.pyx
@@ -214,7 +214,7 @@ cdef class FastaFile:
         return False
 
     property closed:
-        """"bool indicating the current state of the file object.
+        """bool indicating the current state of the file object.
         This is a read-only attribute; the close() method changes the value.
         """
         def __get__(self):
@@ -231,7 +231,7 @@ cdef class FastaFile:
             return self._references
 
     property nreferences:
-        """"int with the number of :term:`reference` sequences in the file.
+        """int with the number of :term:`reference` sequences in the file.
         This is a read-only attribute."""
         def __get__(self):
             return len(self._references) if self.references else None
@@ -610,7 +610,7 @@ cdef class FastxFile:
         return False
 
     property closed:
-        """"bool indicating the current state of the file object.
+        """bool indicating the current state of the file object.
         This is a read-only attribute; the close() method changes the value.
         """
         def __get__(self):
diff --git a/pysam/libchtslib.pxd b/pysam/libchtslib.pxd
index 8bcf399..9dafee3 100644
--- a/pysam/libchtslib.pxd
+++ b/pysam/libchtslib.pxd
@@ -20,6 +20,11 @@ cdef extern from "htslib/kstring.h" nogil:
         size_t l, m
         char *s
 
+    int kputc(int c, kstring_t *s)
+    int kputw(int c, kstring_t *s)
+    int kputl(long c, kstring_t *s)
+    int ksprintf(kstring_t *s, const char *fmt, ...)
+
 
 cdef extern from "htslib_util.h" nogil:
     int hts_set_verbosity(int verbosity)
@@ -1703,6 +1708,12 @@ cdef extern from "htslib/vcf.h" nogil:
     # Read VCF header from a file and update the header
     int bcf_hdr_set(bcf_hdr_t *hdr, const char *fname)
 
+    # Appends formatted header text to _str_.
+    # If _is_bcf_ is zero, `IDX` fields are discarded.
+    #  @return 0 if successful, or negative if an error occurred
+    #  @since 1.4
+    int bcf_hdr_format(const bcf_hdr_t *hdr, int is_bcf, kstring_t *str);
+    
     # Returns formatted header (newly allocated string) and its length,
     # excluding the terminating \0. If is_bcf parameter is unset, IDX
     # fields are discarded.
diff --git a/pysam/libcsamtools.pxd b/pysam/libcsamtools.pxd
index ff797f8..70fda60 100644
--- a/pysam/libcsamtools.pxd
+++ b/pysam/libcsamtools.pxd
@@ -2,8 +2,8 @@ cdef extern from "samtools.pysam.h":
 
     int samtools_main(int argc, char *argv[])
     void samtools_set_stderr(int fd)
-    void samtools_unset_stderr()
+    void samtools_close_stderr()
     void samtools_set_stdout(int fd)
     void samtools_set_stdout_fn(const char *)
-    void samtools_unset_stdout()
+    void samtools_close_stdout()
     void samtools_set_optind(int)
diff --git a/pysam/libcutils.pyx b/pysam/libcutils.pyx
index 994a240..ab8e9a6 100644
--- a/pysam/libcutils.pyx
+++ b/pysam/libcutils.pyx
@@ -18,10 +18,10 @@ from libc.stdio cimport stdout as c_stdout
 from posix.fcntl cimport open as c_open, O_WRONLY
 
 from libcsamtools cimport samtools_main, samtools_set_stdout, samtools_set_stderr, \
-    samtools_unset_stderr, samtools_unset_stdout, samtools_set_stdout_fn, samtools_set_optind
+    samtools_close_stdout, samtools_close_stderr, samtools_set_stdout_fn, samtools_set_optind
 
 from libcbcftools cimport bcftools_main, bcftools_set_stdout, bcftools_set_stderr, \
-    bcftools_unset_stderr, bcftools_unset_stdout, bcftools_set_stdout_fn, bcftools_set_optind
+    bcftools_close_stdout, bcftools_close_stderr, bcftools_set_stdout_fn, bcftools_set_optind
 
 #####################################################################
 # hard-coded constants
@@ -91,14 +91,14 @@ cdef bint IS_PYTHON3 = PY_MAJOR_VERSION >= 3
 
 cdef from_string_and_size(const char* s, size_t length):
     if IS_PYTHON3:
-        return s[:length].decode("ascii")
+        return s[:length].decode("utf8")
     else:
         return s[:length]
 
 
 # filename encoding (adapted from lxml.etree.pyx)
 cdef str FILENAME_ENCODING = sys.getfilesystemencoding() or sys.getdefaultencoding() or 'ascii'
-
+cdef str TEXT_ENCODING = 'utf-8'
 
 cdef bytes encode_filename(object filename):
     """Make sure a filename is 8-bit encoded (or None)."""
@@ -115,9 +115,9 @@ cdef bytes encode_filename(object filename):
         raise TypeError("Argument must be string or unicode.")
 
 
-cdef bytes force_bytes(object s, encoding="ascii"):
+cdef bytes force_bytes(object s, encoding=TEXT_ENCODING):
     """convert string or unicode object to bytes, assuming
-    ascii encoding.
+    utf8 encoding.
     """
     if s is None:
         return None
@@ -129,7 +129,7 @@ cdef bytes force_bytes(object s, encoding="ascii"):
         raise TypeError("Argument must be string, bytes or unicode.")
 
 
-cdef charptr_to_str(const char* s, encoding="ascii"):
+cdef charptr_to_str(const char* s, encoding=TEXT_ENCODING):
     if s == NULL:
         return None
     if PY_MAJOR_VERSION < 3:
@@ -138,7 +138,7 @@ cdef charptr_to_str(const char* s, encoding="ascii"):
         return s.decode(encoding)
 
 
-cdef charptr_to_str_w_len(const char* s, size_t n, encoding="ascii"):
+cdef charptr_to_str_w_len(const char* s, size_t n, encoding=TEXT_ENCODING):
     if s == NULL:
         return None
     if PY_MAJOR_VERSION < 3:
@@ -147,14 +147,14 @@ cdef charptr_to_str_w_len(const char* s, size_t n, encoding="ascii"):
         return s[:n].decode(encoding)
 
 
-cdef bytes charptr_to_bytes(const char* s, encoding="ascii"):
+cdef bytes charptr_to_bytes(const char* s, encoding=TEXT_ENCODING):
     if s == NULL:
         return None
     else:
         return s
 
 
-cdef force_str(object s, encoding="ascii"):
+cdef force_str(object s, encoding=TEXT_ENCODING):
     """Return s converted to str type of current Python
     (bytes in Py2, unicode in Py3)"""
     if s is None:
@@ -274,9 +274,25 @@ def _pysam_dispatch(collection,
     False.
     '''
 
-    if method == "index":
-        if args and not os.path.exists(args[0]):
-            raise IOError("No such file or directory: '%s'" % args[0])
+    if method == "index" and args:
+        # We make sure that at least 1 input file exists,
+        # and if it doesn't we raise an IOError.
+        SIMPLE_FLAGS = ['-c', '--csi', '-f', '--force', '-t', '--tbi', '-n', '--nstats', '-s', '--stats']
+        ARGUMENTS = ['-m', '--min-shift', '-o', '--output-file', '--threads', '-@']
+        skip_next = False
+        for arg in args:
+            if skip_next:
+                skip_next = False
+                continue
+            if arg in SIMPLE_FLAGS or (len(arg) > 2 and arg.startswith('-@')):
+                continue
+            if arg in ARGUMENTS:
+                skip_next = True
+                continue
+            if not os.path.exists(arg):
+                raise IOError("No such file or directory: '%s'" % arg)
+            else:
+                break
             
     if args is None:
         args = []
@@ -285,8 +301,6 @@ def _pysam_dispatch(collection,
 
     # redirect stderr to file
     stderr_h, stderr_f = tempfile.mkstemp()
-    samtools_set_stderr(stderr_h)
-    bcftools_set_stderr(stderr_h)
         
     # redirect stdout to file
     if save_stdout:
@@ -297,9 +311,7 @@ def _pysam_dispatch(collection,
             raise IOError("error while opening {} for writing".format(stdout_f))
 
         samtools_set_stdout_fn(force_bytes(stdout_f))
-        samtools_set_stdout(stdout_h)
         bcftools_set_stdout_fn(force_bytes(stdout_f))
-        bcftools_set_stdout(stdout_h)
             
     elif catch_stdout:
         stdout_h, stdout_f = tempfile.mkstemp()
@@ -329,12 +341,11 @@ def _pysam_dispatch(collection,
             samtools_set_stdout_fn(force_bytes(stdout_f))
             bcftools_set_stdout_fn(force_bytes(stdout_f))
             args.extend(stdout_option.format(stdout_f).split(" "))
-        else:
-            samtools_set_stdout(stdout_h)
-            bcftools_set_stdout(stdout_h)
+            stdout_h = c_open(b"/dev/null", O_WRONLY)
     else:
         samtools_set_stdout_fn("-")
         bcftools_set_stdout_fn("-")
+        stdout_h = c_open(b"/dev/null", O_WRONLY)
 
     # setup the function call to samtools/bcftools main
     cdef char ** cargs
@@ -373,9 +384,17 @@ def _pysam_dispatch(collection,
 
     # call samtools/bcftools
     if collection == b"samtools":
+        samtools_set_stdout(stdout_h)
+        samtools_set_stderr(stderr_h)
         retval = samtools_main(n + 2, cargs)
+        samtools_close_stdout()
+        samtools_close_stderr()
     elif collection == b"bcftools":
+        bcftools_set_stdout(stdout_h)
+        bcftools_set_stderr(stderr_h)
         retval = bcftools_main(n + 2, cargs)
+        bcftools_close_stdout()
+        bcftools_close_stderr()
 
     for i from 0 <= i < n:
         free(cargs[i + 2])
@@ -395,13 +414,6 @@ def _pysam_dispatch(collection,
             os.remove(fn)
         return out
 
-    samtools_unset_stderr()
-    bcftools_unset_stderr()
-
-    if save_stdout or catch_stdout:
-        samtools_unset_stdout()
-        bcftools_unset_stdout()
-
     out_stderr = _collect(stderr_f)
     if save_stdout:
         out_stdout = None
diff --git a/pysam/version.py b/pysam/version.py
index d4ec161..49afa17 100644
--- a/pysam/version.py
+++ b/pysam/version.py
@@ -1,5 +1,5 @@
 # pysam versioning information
-__version__ = "0.15.2"
+__version__ = "0.15.3"
 
 # TODO: upgrade number
 __samtools_version__ = "1.9"
diff --git a/samtools/samtools.pysam.c b/samtools/samtools.pysam.c
index edbd9a2..b26f892 100644
--- a/samtools/samtools.pysam.c
+++ b/samtools/samtools.pysam.c
@@ -10,7 +10,6 @@
 FILE * samtools_stderr = NULL;
 FILE * samtools_stdout = NULL;
 const char * samtools_stdout_fn = NULL;
-int samtools_stdout_fileno = STDOUT_FILENO;
 
 
 FILE * samtools_set_stderr(int fd)
@@ -21,11 +20,10 @@ FILE * samtools_set_stderr(int fd)
   return samtools_stderr;
 }
 
-void samtools_unset_stderr(void)
+void samtools_close_stderr(void)
 {
-  if (samtools_stderr != NULL)
-    fclose(samtools_stderr);
-  samtools_stderr = fopen("/dev/null", "w");
+  fclose(samtools_stderr);
+  samtools_stderr = NULL;
 }
 
 FILE * samtools_set_stdout(int fd)
@@ -37,7 +35,6 @@ FILE * samtools_set_stdout(int fd)
     {
       fprintf(samtools_stderr, "could not set stdout to fd %i", fd);
     }
-  samtools_stdout_fileno = fd;
   return samtools_stdout;
 }
 
@@ -46,12 +43,10 @@ void samtools_set_stdout_fn(const char *fn)
   samtools_stdout_fn = fn;
 }
 
-void samtools_unset_stdout(void)
+void samtools_close_stdout(void)
 {
-  if (samtools_stdout != NULL)
-    fclose(samtools_stdout);
-  samtools_stdout = fopen("/dev/null", "w");
-  samtools_stdout_fileno = STDOUT_FILENO;
+  fclose(samtools_stdout);
+  samtools_stdout = NULL;
 }
 
 int samtools_puts(const char *s)
diff --git a/samtools/samtools.pysam.h b/samtools/samtools.pysam.h
index d97ee25..df8fd01 100644
--- a/samtools/samtools.pysam.h
+++ b/samtools/samtools.pysam.h
@@ -1,7 +1,7 @@
-#ifndef PYSAM_H
-#define PYSAM_H
+#ifndef samtools_PYSAM_H
+#define samtools_PYSAM_H
 
-#include "stdio.h"
+#include <stdio.h>
 
 extern FILE * samtools_stderr;
 
@@ -17,7 +17,7 @@ FILE * samtools_set_stderr(int fd);
 
 /*! set pysam standard output to point to file descriptor
 
-  Setting the stderr will close the previous stdout.
+  Setting the stdout will close the previous stdout.
  */
 FILE * samtools_set_stdout(int fd);
 
@@ -26,17 +26,15 @@ FILE * samtools_set_stdout(int fd);
  */
 void samtools_set_stdout_fn(const char * fn);
 
-/*! set pysam standard error to /dev/null.
+/*! close pysam standard error and set to NULL
   
-  Unsetting the stderr will close the previous stderr.
  */
-void samtools_unset_stderr(void);
+void samtools_close_stderr(void);
 
-/*! set pysam standard error to /dev/null.
+/*! close pysam standard output and set to NULL
   
-  Unsetting the stderr will close the previous stderr.
  */
-void samtools_unset_stdout(void);
+void samtools_close_stdout(void);
 
 int samtools_puts(const char *s);
 
diff --git a/setup.py b/setup.py
index 49261f9..4c97e87 100644
--- a/setup.py
+++ b/setup.py
@@ -13,8 +13,6 @@ This module provides a low-level wrapper around the htslib C-API as
 using cython and a high-level API for convenient access to the data
 within standard genomic file formats.
 
-The current version wraps htslib-1.7, samtools-1.7 and bcftools-1.6.
-
 See:
 http://www.htslib.org
 https://github.com/pysam-developers/pysam
@@ -53,6 +51,7 @@ def changedir(path):
 
 
 def run_configure(option):
+    sys.stdout.flush()
     try:
         retcode = subprocess.call(
             " ".join(("./configure", option)),
@@ -80,6 +79,25 @@ def run_make_print_config():
     return make_print_config
 
 
+@contextmanager
+def set_compiler_envvars():
+    tmp_vars = []
+    for var in ['CC', 'CFLAGS', 'LDFLAGS']:
+        if var in os.environ:
+            print ("# pysam: (env) {}={}".format(var, os.environ[var]))
+        elif var in sysconfig.get_config_vars():
+            value = sysconfig.get_config_var(var)
+            print ("# pysam: (sysconfig) {}={}".format(var, value))
+            os.environ[var] = value
+            tmp_vars += [var]
+
+    try:
+        yield
+    finally:
+        for var in tmp_vars:
+            del os.environ[var]
+
+
 def configure_library(library_dir, env_options=None, options=[]):
 
     configure_script = os.path.join(library_dir, "configure")
@@ -93,7 +111,7 @@ def configure_library(library_dir, env_options=None, options=[]):
         raise ValueError(
             "configure script {} does not exist".format(configure_script))
 
-    with changedir(library_dir):
+    with changedir(library_dir), set_compiler_envvars():
         if env_options is not None:
             if run_configure(env_options):
                 return env_options
@@ -120,7 +138,7 @@ def get_pysam_version():
     sys.path.insert(0, "pysam")
     import version
     return version.__version__
-    
+
 
 # How to link against HTSLIB
 # shared:   build shared chtslib from builtin htslib code.
@@ -412,7 +430,7 @@ Operating System :: POSIX
 Operating System :: Unix
 Operating System :: MacOS
 """
-    
+
 metadata = {
     'name': "pysam",
     'version': get_pysam_version(),
diff --git a/tests/AlignmentFile_test.py b/tests/AlignmentFile_test.py
index ec0f5e1..e0be5ef 100644
--- a/tests/AlignmentFile_test.py
+++ b/tests/AlignmentFile_test.py
@@ -22,7 +22,7 @@ from functools import partial
 
 import pysam
 import pysam.samtools
-from TestUtils import checkBinaryEqual, check_url, \
+from TestUtils import checkBinaryEqual, checkGZBinaryEqual, check_url, \
     check_samtools_view_equal, checkFieldEqual, force_str, \
     get_temp_filename, BAM_DATADIR
 
@@ -470,7 +470,8 @@ class TestIO(unittest.TestCase):
         self.checkEcho("ex2.bam",
                        "ex2.bam",
                        "tmp_ex2.bam",
-                       "rb", "wb")
+                       "rb", "wb",
+                       checkf=checkGZBinaryEqual)
 
     def testCRAM2CRAM(self):
         # in some systems different reference sequence paths might be
@@ -489,7 +490,8 @@ class TestIO(unittest.TestCase):
         self.checkEcho("ex2.sam",
                        "ex2.bam",
                        "tmp_ex2.bam",
-                       "r", "wb")
+                       "r", "wb",
+                       checkf=checkGZBinaryEqual)
 
     def testBAM2SAM(self):
         self.checkEcho("ex2.bam",
@@ -665,7 +667,8 @@ class TestIO(unittest.TestCase):
         self.checkEcho("ex1.bam",
                        "ex1.bam",
                        "tmp_ex1.bam",
-                       "rb", "wb")
+                       "rb", "wb",
+                       checkf=checkGZBinaryEqual)
 
         f = open(os.path.join(BAM_DATADIR, "ex1.bam"))
         samfile = pysam.AlignmentFile(f, "rb")
@@ -1372,7 +1375,7 @@ class TestDeNovoConstruction(unittest.TestCase):
         outfile.close()
 
         self.assertTrue(
-            checkBinaryEqual(tmpfilename, self.bamfile),
+            checkGZBinaryEqual(tmpfilename, self.bamfile),
             "mismatch when construction BAM file, see %s %s" %
             (tmpfilename, self.bamfile))
 
@@ -1407,6 +1410,11 @@ class TestEmptyHeader(unittest.TestCase):
         self.assertTrue("SQ" in s.header.to_dict())
         self.assertTrue("@SQ" in str(s.header))
 
+    def test_bam_without_seq_with_null_bytes_in_header(self):
+        s = pysam.AlignmentFile(os.path.join(BAM_DATADIR, "example_no_seq_in_header_null_bytes.bam"))
+        self.assertTrue("SQ" in s.header.to_dict())
+        self.assertTrue("@SQ" in str(s.header))
+
 
 class TestMismatchingHeader(unittest.TestCase):
     '''see issue 716.'''
diff --git a/tests/TestUtils.py b/tests/TestUtils.py
index 7811b9e..f33761e 100644
--- a/tests/TestUtils.py
+++ b/tests/TestUtils.py
@@ -96,6 +96,19 @@ def checkBinaryEqual(filename1, filename2):
     return found
 
 
+def checkGZBinaryEqual(filename1, filename2):
+    '''return true if the decompressed contents of the two files
+    are binary equal.
+    '''
+    with gzip.open(filename1, "rb") as infile1:
+        d1 = infile1.read()
+        with gzip.open(filename2, "rb") as infile2:
+            d2 = infile2.read()
+        if d1 == d2:
+            return True
+    return False
+
+
 def check_samtools_view_equal(
         filename1, filename2,
         without_header=False):
diff --git a/tests/VariantFile_test.py b/tests/VariantFile_test.py
index 88bd34f..4458d1f 100644
--- a/tests/VariantFile_test.py
+++ b/tests/VariantFile_test.py
@@ -1,4 +1,7 @@
+# -*- coding: utf-8 -*-
+
 import os
+import glob
 import sys
 import unittest
 import pysam
@@ -633,6 +636,37 @@ class TestSubsetting(unittest.TestCase):
             inf.subset_samples(["NA00001"])
 
 
+class TestVCFVersions(unittest.TestCase):
+
+    def setUp(self):
+        self.files_to_test = (glob.glob(os.path.join(CBCF_DATADIR, "example_v*.vcf.gz")) +
+                              glob.glob(os.path.join(CBCF_DATADIR, "example_v*.vcf")) +
+                              glob.glob(os.path.join(CBCF_DATADIR, "example_v*.bcf")))
+    
+    def test_all_records_can_be_fetched(self):
+
+        for fn in self.files_to_test:
+            with pysam.VariantFile(fn) as inf:
+                records = list(inf.fetch())
+
+
+class TestUnicode(unittest.TestCase):
+    filename = "example_vcf43_with_utf8.vcf.gz"
+
+    def test_record_with_unicode(self):
+        with pysam.VariantFile(os.path.join(CBCF_DATADIR,
+                                            self.filename)) as inf:
+            records = list(inf.fetch("20", 2345677, 2345678))
+        self.assertEqual(len(records), 1)
+        record = records.pop()
+        self.assertEqual(
+            record.info["CLNVI"],
+            ('Institute_of_Human_Genetics', u'Friedrich-Alexander-UniversitÃ¤t_Erlangen-NÃ¼rnberg'))
+        self.assertEqual(record.samples[0]["AN"], "alpha")
+        self.assertEqual(record.samples[1]["AN"], u"Ã¤")
+        self.assertEqual(record.samples[2]["AN"], u"Ã¼")
+                
+
 if __name__ == "__main__":
     # build data files
     print("building data files")
diff --git a/tests/VariantRecord_test.py b/tests/VariantRecord_test.py
new file mode 100644
index 0000000..fd80a80
--- /dev/null
+++ b/tests/VariantRecord_test.py
@@ -0,0 +1,72 @@
+import os
+import glob
+import sys
+import unittest
+import pysam
+import shutil
+import gzip
+import subprocess
+import pytest
+
+try:
+    from pathlib import Path
+except ImportError:
+    Path = None
+
+from TestUtils import get_temp_filename, check_lines_equal, load_and_convert, CBCF_DATADIR, get_temp_context
+
+
+@pytest.fixture
+def vcf_header():
+    vcf_header = pysam.VariantHeader()
+    vcf_header.add_sample("sample1")
+    vcf_header.add_sample("sample2")
+    vcf_header.contigs.add("1")
+    return vcf_header
+
+## segfault without coordinates
+
+def test_ascii_annotation_can_be_added(vcf_header):
+    vcf_header.formats.add("AN", 1, "String", "An annotation")
+    record = vcf_header.new_record(
+        contig="1",
+        start=12,
+        stop=13,
+        samples=[
+            {"AN": "anno1"},
+            {"AN": "anno2"}])
+    assert str(record)[:-1].split("\t")[-2:] == ["anno1", "anno2"]
+
+
+def test_ascii_annotation_with_variable_length_can_be_added(vcf_header):
+    vcf_header.formats.add("AN", 1, "String", "An annotation")
+    record = vcf_header.new_record(
+        contig="1",
+        start=12,
+        stop=13,
+        samples=[
+            {"AN": "anno1b"},
+            {"AN": "anno1"}])
+    assert str(record)[:-1].split("\t")[-2:] == ["anno1b", "anno1"]
+    record = vcf_header.new_record(
+        contig="1",
+        start=12,
+        stop=13,
+        samples=[
+            {"AN": "anno2"},
+            {"AN": "anno2b"}])
+    assert str(record)[:-1].split("\t")[-2:] == ["anno2", "anno2b"]
+    
+
+def test_unicode_annotation_can_be_added(vcf_header):
+    vcf_header.formats.add("AN", 1, "String", "An annotation")
+    record = vcf_header.new_record(
+        contig="1",
+        start=12,
+        stop=13,
+        samples=[
+            {"AN": "anno1"},
+            {"AN": "Friedrich-Alexander-Universit\u00E4t_Erlangen-N\u00FCrnberg"}])
+    assert str(record)[:-1].split("\t")[-2:] == [
+        "anno1",
+        "Friedrich-Alexander-Universit\u00E4t_Erlangen-N\u00FCrnberg"]
diff --git a/tests/cbcf_data/example_vcf43.vcf b/tests/cbcf_data/example_vcf43.vcf
new file mode 100644
index 0000000..9010401
--- /dev/null
+++ b/tests/cbcf_data/example_vcf43.vcf
@@ -0,0 +1,24 @@
+##fileformat=VCFv4.3
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
+##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4
+20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:7:56,60	0|0:48:4:51,51	0/0:61:2
+20	1234567	microsat1	GTC	G,GTCT	50	PASS	NS=3;DP=9;AA=G	GT:GQ:DP	0/1:35:4	0/2:17:2	1/1:40:3
diff --git a/tests/cbcf_data/example_vcf43_with_utf8.vcf b/tests/cbcf_data/example_vcf43_with_utf8.vcf
new file mode 100644
index 0000000..4826fc4
--- /dev/null
+++ b/tests/cbcf_data/example_vcf43_with_utf8.vcf
@@ -0,0 +1,27 @@
+##fileformat=VCFv4.3
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
+##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##INFO=<ID=CLNVI,Number=.,Type=String,Description="the variant's clinical sources reported as tag-value pairs of database and variant identifier">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##FORMAT=<ID=AN,Number=1,Type=String,Description="Some annotation">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4
+20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:7:56,60	0|0:48:4:51,51	0/0:61:2
+20	1234567	microsat1	GTC	G,GTCT	50	PASS	NS=3;DP=9;AA=G	GT:GQ:DP	0/1:35:4	0/2:17:2	1/1:40:3
+20	2345678	rs123	G	A	29	PASS	NS=3;DP=14;AF=0.5;CLNVI=Institute_of_Human_Genetics,Friedrich-Alexander-UniversitÃ¤t_Erlangen-NÃ¼rnberg;DB;H2	GT:GQ:DP:HQ:AN	0|0:48:1:51,51:alpha	1|0:48:8:51,51:Ã¤	1/1:43:5:.,.:Ã¼
diff --git a/tests/pysam_data/example_no_seq_in_header_null_bytes.bam b/tests/pysam_data/example_no_seq_in_header_null_bytes.bam
new file mode 100644
index 0000000..aaf4b84
Binary files /dev/null and b/tests/pysam_data/example_no_seq_in_header_null_bytes.bam differ
diff --git a/tests/samtools_test.py b/tests/samtools_test.py
index 40d84c0..b20a869 100644
--- a/tests/samtools_test.py
+++ b/tests/samtools_test.py
@@ -45,7 +45,7 @@ def get_version(executable):
     if IS_PYTHON3:
         lines = lines.decode('ascii')
     try:
-        x = re.search("Version:\s+(\S+)", lines).groups()[0]
+        x = re.search(r"Version:\s+(\S+)", lines).groups()[0]
     except AttributeError:
         raise ValueError("could not get version from %s" % lines)
     return x
@@ -82,6 +82,7 @@ class SamtoolsTest(unittest.TestCase):
         # TODO: issues with file naming
         # "faidx ex1.fa; %(out)s_ex1.fa.fai",
         "index ex1.bam %(out)s_ex1.bam.fai",
+        "index -@2 ex1.bam %(out)s_ex1.bam.fai",
         "idxstats ex1.bam > %(out)s_ex1.idxstats",
         "fixmate ex1.bam %(out)s_ex1.fixmate.bam",
         "flagstat ex1.bam > %(out)s_ex1.flagstat",
@@ -189,7 +190,7 @@ class SamtoolsTest(unittest.TestCase):
         pysam_method = getattr(self.module, command)
 
         # run samtools
-        full_statement = re.sub("%\(out\)s", self.executable, statement)
+        full_statement = re.sub(r"%\(out\)s", self.executable, statement)
         run_command(" ".join((self.executable, full_statement)))
         # sys.stdout.write("%s %s ok" % (command, self.executable))
 
@@ -199,7 +200,7 @@ class SamtoolsTest(unittest.TestCase):
             parts = parts[:-2]
 
         # avoid interpolation to preserve string quoting, tab chars, etc.
-        pysam_parts = [re.sub("%\(out\)s", "pysam", x) for x in parts[1:]]
+        pysam_parts = [re.sub(r"%\(out\)s", "pysam", x) for x in parts[1:]]
         output = pysam_method(*pysam_parts,
                               raw=True,
                               catch_stdout=True)
@@ -272,7 +273,7 @@ class SamtoolsTest(unittest.TestCase):
             mapped_command = self.get_command(statement, map_to_internal=True)
             pysam_method = getattr(self.module, mapped_command)
             usage_msg = pysam_method.usage()
-            expected = "Usage:\s+{} {}".format(self.executable, command)
+            expected = r"Usage:\s+{} {}".format(self.executable, command)
             self.assertTrue(re.search(expected, usage_msg) is not None)
 
     def tearDown(self):
@@ -287,6 +288,18 @@ class EmptyIndexTest(unittest.TestCase):
         self.assertRaises(IOError, pysam.samtools.index,
                           "exdoesntexist.bam")
 
+    def testEmptyIndexWithExtraFlag(self):
+        self.assertRaises(IOError, pysam.samtools.index,
+                          "-c",
+                          "exdoesntexist.bam")
+
+    def testEmptyIndexWithExtraArg(self):
+        self.assertRaises(IOError, pysam.samtools.index,
+                          "-c",
+                          "-m",
+                          "14",
+                          "exdoesntexist.bam")
+
 
 if sys.platform != "darwin":
     # fails with segfault with htslib 1.5 on Osx, an issue with flockfile
diff --git a/tests/tabix_test.py b/tests/tabix_test.py
index fdd39b0..c17f7ff 100644
--- a/tests/tabix_test.py
+++ b/tests/tabix_test.py
@@ -14,9 +14,8 @@ import unittest
 import subprocess
 import glob
 import re
-import copy
-import tempfile
-from TestUtils import check_url, load_and_convert, TABIX_DATADIR, get_temp_filename
+from TestUtils import checkBinaryEqual, checkGZBinaryEqual, check_url, \
+    load_and_convert, TABIX_DATADIR, get_temp_filename
 
 IS_PYTHON3 = sys.version_info[0] >= 3
 
@@ -40,30 +39,6 @@ def splitToBytes(s):
     return [x.encode("ascii") for x in s.split("\t")]
 
 
-def checkBinaryEqual(filename1, filename2):
-    '''return true if the two files are binary equal.'''
-    if os.path.getsize(filename1) != os.path.getsize(filename2):
-        return False
-
-    with open(filename1, "rb") as infile:
-        d1 = infile.read()
-
-    with open(filename2, "rb") as infile:
-        d2 = infile.read()
-
-    if len(d1) != len(d2):
-        return False
-
-    found = False
-    for c1, c2 in zip(d1, d2):
-        if c1 != c2:
-            break
-    else:
-        found = True
-
-    return found
-
-
 class TestIndexing(unittest.TestCase):
     filename = os.path.join(TABIX_DATADIR, "example.gtf.gz")
     filename_idx = os.path.join(TABIX_DATADIR, "example.gtf.gz.tbi")
@@ -77,7 +52,7 @@ class TestIndexing(unittest.TestCase):
         '''test indexing via preset.'''
 
         pysam.tabix_index(self.tmpfilename, preset="gff")
-        self.assertTrue(checkBinaryEqual(
+        self.assertTrue(checkGZBinaryEqual(
             self.tmpfilename + ".tbi", self.filename_idx))
 
     def test_indexing_to_custom_location_works(self):
@@ -86,7 +61,7 @@ class TestIndexing(unittest.TestCase):
         index_path = get_temp_filename(suffix='custom.tbi')
         pysam.tabix_index(self.tmpfilename, preset="gff",
                           index=index_path, force=True)
-        self.assertTrue(checkBinaryEqual(index_path, self.filename_idx))
+        self.assertTrue(checkGZBinaryEqual(index_path, self.filename_idx))
         os.unlink(index_path)
 
     def test_indexing_with_explict_columns_works(self):
@@ -98,7 +73,7 @@ class TestIndexing(unittest.TestCase):
                           end_col=4,
                           line_skip=0,
                           zerobased=False)
-        self.assertTrue(checkBinaryEqual(
+        self.assertTrue(checkGZBinaryEqual(
             self.tmpfilename + ".tbi", self.filename_idx))
 
     def test_indexing_with_lineskipping_works(self):
@@ -109,7 +84,7 @@ class TestIndexing(unittest.TestCase):
                           end_col=4,
                           line_skip=1,
                           zerobased=False)
-        self.assertFalse(checkBinaryEqual(
+        self.assertFalse(checkGZBinaryEqual(
             self.tmpfilename + ".tbi", self.filename_idx))
 
     def tearDown(self):
diff --git a/tests/tabixproxies_test.py b/tests/tabixproxies_test.py
index 32f8e42..7ad7db0 100644
--- a/tests/tabixproxies_test.py
+++ b/tests/tabixproxies_test.py
@@ -128,7 +128,7 @@ class TestGTF(TestParser):
         """build attribute string from dictionary d"""
         s = "; ".join(["{} \"{}\"".format(x, y) for (x, y) in d.items()]) + ";"
         # remove quotes around numeric values
-        s = re.sub("\"(\d+)\"", r"\1", s)
+        s = re.sub(r'"(\d+)"', r'\1', s)
         return s
 
     def testRead(self):